From 891839b976c75c77f238931123ac472e3284e95d Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Thu, 24 May 2018 15:19:06 +0900 Subject: New bytecode implementation of mruby VM. --- mrbgems/mruby-string-ext/src/string.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mrbgems/mruby-string-ext/src') diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index 4cab49094..142d449f4 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -627,8 +627,8 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb) mrb_define_method(mrb, s, "chr", mrb_str_chr, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "succ", mrb_str_succ, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "succ!", mrb_str_succ_bang, MRB_ARGS_NONE()); - mrb_alias_method(mrb, s, mrb_intern_lit(mrb, "next"), mrb_intern_lit(mrb, "succ")); - mrb_alias_method(mrb, s, mrb_intern_lit(mrb, "next!"), mrb_intern_lit(mrb, "succ!")); + mrb_define_alias(mrb, s, "next", "succ"); + mrb_define_alias(mrb, s, "next!", "succ!"); mrb_define_method(mrb, s, "ord", mrb_str_ord, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "delete_prefix!", mrb_str_del_prefix_bang, MRB_ARGS_REQ(1)); mrb_define_method(mrb, s, "delete_prefix", mrb_str_del_prefix, MRB_ARGS_REQ(1)); -- cgit v1.2.3 From 58ba38fe1e11828190596b44e0789dd8a0607ff3 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Thu, 20 Sep 2018 15:13:13 +0900 Subject: Add `String#tr` and `#tr!` to `mruby-string-ext` gem; fix #4086 This patch is based on `mruby/c` implementation by Hirohito Higashi. We might need to add `#tr_s`, `#squeeze` and `#delete` as well. Adding them should not be too hard using functions we implemented here. --- mrbgems/mruby-string-ext/src/string.c | 261 ++++++++++++++++++++++++++++++++ mrbgems/mruby-string-ext/test/string.rb | 13 ++ 2 files changed, 274 insertions(+) (limited to 'mrbgems/mruby-string-ext/src') diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index 142d449f4..9d318cb1a 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -235,6 +235,265 @@ mrb_str_end_with(mrb_state *mrb, mrb_value self) return mrb_false_value(); } +/* + #tr Pattern syntax + + ::= ()* | '^' ()* + ::= | + ::= ()+ + ::= '-' +*/ +struct tr_pattern { + uint8_t type; // 1:in-order, 2:range + mrb_bool flag_reverse; + int16_t n; + struct tr_pattern *next; + char ch[]; +}; + +static void +tr_pattern_free(mrb_state *mrb, struct tr_pattern *pat) +{ + while (pat) { + struct tr_pattern *p = pat->next; + mrb_free(mrb, pat); + pat = p; + } +} + +static struct tr_pattern* +tr_parse_pattern(mrb_state *mrb, const mrb_value v_pattern, mrb_bool flag_reverse_enable) +{ + const char *pattern = RSTRING_PTR(v_pattern); + int pattern_length = RSTRING_LEN(v_pattern); + mrb_bool flag_reverse = FALSE; + struct tr_pattern *ret = NULL; + struct tr_pattern *pat1; + int i = 0; + + if(flag_reverse_enable && pattern_length >= 2 && pattern[0] == '^') { + flag_reverse = TRUE; + i++; + } + + while (i < pattern_length) { + /* is range pattern ? */ + if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-') { + pat1 = (struct tr_pattern*)mrb_malloc_simple(mrb, sizeof(struct tr_pattern) + 2); + if (pat1 == NULL && ret) { + nomem: + tr_pattern_free(mrb, ret); + mrb_exc_raise(mrb, mrb_obj_value(mrb->nomem_err)); + return NULL; /* not reached */ + } + pat1->type = 2; + pat1->flag_reverse = flag_reverse; + pat1->n = pattern[i+2] - pattern[i] + 1; + pat1->next = NULL; + pat1->ch[0] = pattern[i]; + pat1->ch[1] = pattern[i+2]; + i += 3; + } + else { + /* in order pattern. */ + int start_pos = i++; + int len; + + while (i < pattern_length) { + if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-') + break; + i++; + } + + len = i - start_pos; + pat1 = (struct tr_pattern*)mrb_malloc_simple(mrb, sizeof(struct tr_pattern) + len); + if (pat1 == NULL && ret) { + goto nomem; + } + pat1->type = 1; + pat1->flag_reverse = flag_reverse; + pat1->n = len; + pat1->next = NULL; + memcpy(pat1->ch, &pattern[start_pos], len); + } + + if (ret == NULL) { + ret = pat1; + } + else { + struct tr_pattern *p = ret; + while (p->next != NULL) { + p = p->next; + } + p->next = pat1; + } + } + + return ret; +} + +static mrb_int +tr_find_character(const struct tr_pattern *pat, int ch) +{ + mrb_int ret = -1; + mrb_int n_sum = 0; + mrb_int flag_reverse = pat ? pat->flag_reverse : 0; + + while (pat != NULL) { + if (pat->type == 1) { /* pat->type == 1 in-order */ + int i; + for (i = 0; i < pat->n; i++) { + if (pat->ch[i] == ch) ret = n_sum + i; + } + } + else { /* pat->type == 2 range */ + if (pat->ch[0] <= ch && ch <= pat->ch[1]) + ret = n_sum + ch - pat->ch[0]; + } + n_sum += pat->n; + pat = pat->next; + } + + if (flag_reverse) { + return (ret < 0) ? MRB_INT_MAX : -1; + } + return ret; +} + +static mrb_int +tr_get_character(const struct tr_pattern *pat, mrb_int n_th) +{ + mrb_int n_sum = 0; + while (pat != NULL) { + if (n_th < (n_sum + pat->n)) { + mrb_int i = (n_th - n_sum); + return (pat->type == 1) ? pat->ch[i] :pat->ch[0] + i; + } + if (pat->next == NULL) { + return (pat->type == 1) ? pat->ch[pat->n - 1] : pat->ch[1]; + } + n_sum += pat->n; + pat = pat->next; + } + + return -1; +} + +static mrb_bool +str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2) +{ + struct tr_pattern *pat; + struct tr_pattern *rep; + char *s; + mrb_int len; + mrb_int i; + mrb_bool flag_changed = FALSE; + + mrb_str_modify(mrb, mrb_str_ptr(str)); + pat = tr_parse_pattern(mrb, p1, TRUE); + rep = tr_parse_pattern(mrb, p2, FALSE); + s = RSTRING_PTR(str); + len = RSTRING_LEN(str); + + for (i = 0; i < len; i++) { + mrb_int n = tr_find_character(pat, s[i]); + + if (n >= 0) { + flag_changed = TRUE; + if (rep == NULL) { + memmove(s + i, s + i + 1, len - i); + len--; + i--; + } + else { + mrb_int c = tr_get_character(rep, n); + + if (c < 0 || c > 0x80) { + mrb_raisef(mrb, E_ARGUMENT_ERROR, "character (%S) out of range", + mrb_fixnum_value((mrb_int)c)); + } + s[i] = c; + } + } + } + + tr_pattern_free(mrb, pat); + if (rep) tr_pattern_free(mrb, rep); + + RSTR_SET_LEN(RSTRING(str), len); + RSTRING_PTR(str)[len] = 0; + + return flag_changed; +} + +/* + * call-seq: + * str.tr(from_str, to_str) => new_str + * + * Returns a copy of str with the characters in from_str replaced by the + * corresponding characters in to_str. If to_str is shorter than from_str, + * it is padded with its last character in order to maintain the + * correspondence. + * + * "hello".tr('el', 'ip') #=> "hippo" + * "hello".tr('aeiou', '*') #=> "h*ll*" + * "hello".tr('aeiou', 'AA*') #=> "hAll*" + * + * Both strings may use the c1-c2 notation to denote ranges of characters, + * and from_str may start with a ^, which denotes all characters except + * those listed. + * + * "hello".tr('a-y', 'b-z') #=> "ifmmp" + * "hello".tr('^aeiou', '*') #=> "*e**o" + * + * The backslash character \ can be used to escape ^ or - and is otherwise + * ignored unless it appears at the end of a range or the end of the + * from_str or to_str: + * + * + * "hello^world".tr("\\^aeiou", "*") #=> "h*ll**w*rld" + * "hello-world".tr("a\\-eo", "*") #=> "h*ll**w*rld" + * + * "hello\r\nworld".tr("\r", "") #=> "hello\nworld" + * "hello\r\nworld".tr("\\r", "") #=> "hello\r\nwold" + * "hello\r\nworld".tr("\\\r", "") #=> "hello\nworld" + * + * "X['\\b']".tr("X\\", "") #=> "['b']" + * "X['\\b']".tr("X-\\]", "") #=> "'b'" + * + * Note: conversion is effective only in ASCII region. + */ +static mrb_value +mrb_str_tr(mrb_state *mrb, mrb_value str) +{ + mrb_value dup; + mrb_value p1, p2; + + mrb_get_args(mrb, "SS", &p1, &p2); + dup = mrb_str_dup(mrb, str); + str_tr(mrb, dup, p1, p2); + return dup; +} + +/* + * call-seq: + * str.tr!(from_str, to_str) -> str or nil + * + * Translates str in place, using the same rules as String#tr. + * Returns str, or nil if no changes were made. + */ +static mrb_value +mrb_str_tr_bang(mrb_state *mrb, mrb_value str) +{ + mrb_value p1, p2; + + mrb_get_args(mrb, "SS", &p1, &p2); + if (str_tr(mrb, str, p1, p2)) { + return str; + } + return mrb_nil_value(); +} + static mrb_value mrb_str_hex(mrb_state *mrb, mrb_value self) { @@ -620,6 +879,8 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb) mrb_define_method(mrb, s, "swapcase", mrb_str_swapcase, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "concat", mrb_str_concat_m, MRB_ARGS_REQ(1)); mrb_define_method(mrb, s, "<<", mrb_str_concat_m, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, s, "tr", mrb_str_tr, MRB_ARGS_REQ(2)); + mrb_define_method(mrb, s, "tr!", mrb_str_tr_bang, MRB_ARGS_REQ(2)); mrb_define_method(mrb, s, "start_with?", mrb_str_start_with, MRB_ARGS_REST()); mrb_define_method(mrb, s, "end_with?", mrb_str_end_with, MRB_ARGS_REST()); mrb_define_method(mrb, s, "hex", mrb_str_hex, MRB_ARGS_NONE()); diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index b6146fb90..6b8a89c4d 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -147,6 +147,19 @@ assert('String#casecmp') do assert_equal 0, "abcdef".casecmp(o) end +assert('String#tr') do + assert_equal "ABC", "abc".tr('a-z', 'A-Z') + assert_equal "hippo", "hello".tr('el', 'ip') + assert_equal "Ruby", "Lisp".tr("Lisp", "Ruby") + assert_equal "*e**o", "hello".tr('^aeiou', '*') +end + +assert('String#tr!') do + s = "abcdefghijklmnopqR" + assert_equal "ab12222hijklmnopqR", s.tr!("cdefg", "12") + assert_equal "ab12222hijklmnopqR", s +end + assert('String#start_with?') do assert_true "hello".start_with?("heaven", "hell") assert_true !"hello".start_with?("heaven", "paradise") -- cgit v1.2.3 From c0ae8a96a1fb658b21428bee174602e9373eef3b Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Thu, 20 Sep 2018 16:46:42 +0900 Subject: Add `String#tr_s` and `String#tr_s!`; ref #4086 --- mrbgems/mruby-string-ext/src/string.c | 55 ++++++++++++++++++++++++++++++--- mrbgems/mruby-string-ext/test/string.rb | 13 ++++++++ 2 files changed, 64 insertions(+), 4 deletions(-) (limited to 'mrbgems/mruby-string-ext/src') diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index 9d318cb1a..e41dde448 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -380,7 +380,7 @@ tr_get_character(const struct tr_pattern *pat, mrb_int n_th) } static mrb_bool -str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2) +str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squeeze) { struct tr_pattern *pat; struct tr_pattern *rep; @@ -388,6 +388,7 @@ str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2) mrb_int len; mrb_int i; mrb_bool flag_changed = FALSE; + mrb_int lastch = -1; mrb_str_modify(mrb, mrb_str_ptr(str)); pat = tr_parse_pattern(mrb, p1, TRUE); @@ -401,6 +402,7 @@ str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2) if (n >= 0) { flag_changed = TRUE; if (rep == NULL) { + compact: memmove(s + i, s + i + 1, len - i); len--; i--; @@ -408,11 +410,12 @@ str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2) else { mrb_int c = tr_get_character(rep, n); + if (squeeze && c == lastch) goto compact; if (c < 0 || c > 0x80) { mrb_raisef(mrb, E_ARGUMENT_ERROR, "character (%S) out of range", mrb_fixnum_value((mrb_int)c)); } - s[i] = c; + lastch = s[i] = c; } } } @@ -471,7 +474,7 @@ mrb_str_tr(mrb_state *mrb, mrb_value str) mrb_get_args(mrb, "SS", &p1, &p2); dup = mrb_str_dup(mrb, str); - str_tr(mrb, dup, p1, p2); + str_tr(mrb, dup, p1, p2, FALSE); return dup; } @@ -488,7 +491,49 @@ mrb_str_tr_bang(mrb_state *mrb, mrb_value str) mrb_value p1, p2; mrb_get_args(mrb, "SS", &p1, &p2); - if (str_tr(mrb, str, p1, p2)) { + if (str_tr(mrb, str, p1, p2, FALSE)) { + return str; + } + return mrb_nil_value(); +} + +/* + * call-seq: + * str.tr_s(from_str, to_str) -> new_str + * + * Processes a copy of str as described under String#tr, then removes + * duplicate characters in regions that were affected by the translation. + * + * "hello".tr_s('l', 'r') #=> "hero" + * "hello".tr_s('el', '*') #=> "h*o" + * "hello".tr_s('el', 'hx') #=> "hhxo" + */ +static mrb_value +mrb_str_tr_s(mrb_state *mrb, mrb_value str) +{ + mrb_value dup; + mrb_value p1, p2; + + mrb_get_args(mrb, "SS", &p1, &p2); + dup = mrb_str_dup(mrb, str); + str_tr(mrb, dup, p1, p2, TRUE); + return dup; +} + +/* + * call-seq: + * str.tr_s!(from_str, to_str) -> str or nil + * + * Performs String#tr_s processing on str in place, returning + * str, or nil if no changes were made. + */ +static mrb_value +mrb_str_tr_s_bang(mrb_state *mrb, mrb_value str) +{ + mrb_value p1, p2; + + mrb_get_args(mrb, "SS", &p1, &p2); + if (str_tr(mrb, str, p1, p2, TRUE)) { return str; } return mrb_nil_value(); @@ -881,6 +926,8 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb) mrb_define_method(mrb, s, "<<", mrb_str_concat_m, MRB_ARGS_REQ(1)); mrb_define_method(mrb, s, "tr", mrb_str_tr, MRB_ARGS_REQ(2)); mrb_define_method(mrb, s, "tr!", mrb_str_tr_bang, MRB_ARGS_REQ(2)); + mrb_define_method(mrb, s, "tr_s", mrb_str_tr_s, MRB_ARGS_REQ(2)); + mrb_define_method(mrb, s, "tr_s!", mrb_str_tr_s_bang, MRB_ARGS_REQ(2)); mrb_define_method(mrb, s, "start_with?", mrb_str_start_with, MRB_ARGS_REST()); mrb_define_method(mrb, s, "end_with?", mrb_str_end_with, MRB_ARGS_REST()); mrb_define_method(mrb, s, "hex", mrb_str_hex, MRB_ARGS_NONE()); diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index 6b8a89c4d..d50a2b3b4 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -160,6 +160,19 @@ assert('String#tr!') do assert_equal "ab12222hijklmnopqR", s end +assert('String#tr_s') do + assert_equal "hero", "hello".tr_s('l', 'r') + assert_equal "h*o", "hello".tr_s('el', '*') + assert_equal "hhxo", "hello".tr_s('el', 'hx') +end + +assert('String#tr_s!') do + s = "hello" + assert_equal "hero", s.tr_s!('l', 'r') + assert_equal "hero", s + assert_nil s.tr_s!('l', 'r') +end + assert('String#start_with?') do assert_true "hello".start_with?("heaven", "hell") assert_true !"hello".start_with?("heaven", "paradise") -- cgit v1.2.3 From 68523b4ec4a271134aae34d744582a974558c962 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Thu, 20 Sep 2018 17:14:15 +0900 Subject: Add `String#squeeze` and `#squeeze!`; ref #4086 mruby restriction: `String#squeeze` can take more than 1 pattern arguments in CRuby, in that case, the intersection of patterns will be used to match. But in mruby, it doesn't take multiple patterns. --- mrbgems/mruby-string-ext/src/string.c | 103 ++++++++++++++++++++++++++++++-- mrbgems/mruby-string-ext/test/string.rb | 12 ++++ 2 files changed, 111 insertions(+), 4 deletions(-) (limited to 'mrbgems/mruby-string-ext/src') diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index e41dde448..ddc4505fc 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -262,12 +262,11 @@ tr_pattern_free(mrb_state *mrb, struct tr_pattern *pat) } static struct tr_pattern* -tr_parse_pattern(mrb_state *mrb, const mrb_value v_pattern, mrb_bool flag_reverse_enable) +tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_pattern, mrb_bool flag_reverse_enable) { const char *pattern = RSTRING_PTR(v_pattern); int pattern_length = RSTRING_LEN(v_pattern); mrb_bool flag_reverse = FALSE; - struct tr_pattern *ret = NULL; struct tr_pattern *pat1; int i = 0; @@ -391,8 +390,8 @@ str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squee mrb_int lastch = -1; mrb_str_modify(mrb, mrb_str_ptr(str)); - pat = tr_parse_pattern(mrb, p1, TRUE); - rep = tr_parse_pattern(mrb, p2, FALSE); + pat = tr_parse_pattern(mrb, NULL, p1, TRUE); + rep = tr_parse_pattern(mrb, NULL, p2, FALSE); s = RSTRING_PTR(str); len = RSTRING_LEN(str); @@ -539,6 +538,100 @@ mrb_str_tr_s_bang(mrb_state *mrb, mrb_value str) return mrb_nil_value(); } +static mrb_bool +str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat) +{ + struct tr_pattern *pat = NULL; + mrb_int i; + char *s; + mrb_int len; + mrb_bool flag_changed = FALSE; + mrb_int lastch = -1; + + mrb_str_modify(mrb, mrb_str_ptr(str)); + if (!mrb_nil_p(v_pat)) { + pat = tr_parse_pattern(mrb, pat, v_pat, TRUE); + } + s = RSTRING_PTR(str); + len = RSTRING_LEN(str); + + if (pat) { + for (i = 0; i < len; i++) { + mrb_int n = tr_find_character(pat, s[i]); + + if (n >= 0 && s[i] == lastch) { + flag_changed = TRUE; + memmove(s + i, s + i + 1, len - i); + len--; + i--; + } + lastch = s[i]; + } + } + else { + for (i = 0; i < len; i++) { + if (s[i] == lastch) { + flag_changed = TRUE; + memmove(s + i, s + i + 1, len - i); + len--; + i--; + } + lastch = s[i]; + } + } + tr_pattern_free(mrb, pat); + + RSTR_SET_LEN(RSTRING(str), len); + RSTRING_PTR(str)[len] = 0; + + return flag_changed; +} + +/* + * call-seq: + * str.squeeze([other_str]) -> new_str + * + * Builds a set of characters from the other_str + * parameter(s) using the procedure described for String#count. Returns a + * new string where runs of the same character that occur in this set are + * replaced by a single character. If no arguments are given, all runs of + * identical characters are replaced by a single character. + * + * "yellow moon".squeeze #=> "yelow mon" + * " now is the".squeeze(" ") #=> " now is the" + * "putters shoot balls".squeeze("m-z") #=> "puters shot balls" + */ +static mrb_value +mrb_str_squeeze(mrb_state *mrb, mrb_value str) +{ + mrb_value pat = mrb_nil_value(); + mrb_value dup; + + mrb_get_args(mrb, "|S", &pat); + dup = mrb_str_dup(mrb, str); + str_squeeze(mrb, dup, pat); + return dup; +} + +/* + * call-seq: + * str.squeeze!([other_str]) -> str or nil + * + * Squeezes str in place, returning either str, or nil if no + * changes were made. + */ +static mrb_value +mrb_str_squeeze_bang(mrb_state *mrb, mrb_value str) +{ + mrb_value pat = mrb_nil_value(); + + mrb_get_args(mrb, "|S", &pat); + if (str_squeeze(mrb, str, pat)) { + return str; + } + return mrb_nil_value(); +} + static mrb_value mrb_str_hex(mrb_state *mrb, mrb_value self) { @@ -928,6 +1021,8 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb) mrb_define_method(mrb, s, "tr!", mrb_str_tr_bang, MRB_ARGS_REQ(2)); mrb_define_method(mrb, s, "tr_s", mrb_str_tr_s, MRB_ARGS_REQ(2)); mrb_define_method(mrb, s, "tr_s!", mrb_str_tr_s_bang, MRB_ARGS_REQ(2)); + mrb_define_method(mrb, s, "squeeze", mrb_str_squeeze, MRB_ARGS_OPT(1)); + mrb_define_method(mrb, s, "squeeze!", mrb_str_squeeze_bang, MRB_ARGS_OPT(1)); mrb_define_method(mrb, s, "start_with?", mrb_str_start_with, MRB_ARGS_REST()); mrb_define_method(mrb, s, "end_with?", mrb_str_end_with, MRB_ARGS_REST()); mrb_define_method(mrb, s, "hex", mrb_str_hex, MRB_ARGS_NONE()); diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index d50a2b3b4..fd6f83e71 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -173,6 +173,18 @@ assert('String#tr_s!') do assert_nil s.tr_s!('l', 'r') end +assert('String#squeeze') do + assert_equal "yelow mon", "yellow moon".squeeze + assert_equal " now is the", " now is the".squeeze(" ") + assert_equal "puters shot balls", "putters shoot balls".squeeze("m-z") +end + +assert('String#squeeze!') do + s = " now is the" + assert_equal " now is the", s.squeeze!(" ") + assert_equal " now is the", s +end + assert('String#start_with?') do assert_true "hello".start_with?("heaven", "hell") assert_true !"hello".start_with?("heaven", "paradise") -- cgit v1.2.3 From 58f7f2361a39ae288c4233ca434e1dbd37f127d0 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Thu, 20 Sep 2018 17:25:07 +0900 Subject: Implement `String#count`; ref #4086 mruby restriction: In mruby, `String#count` does not take multiple pattern arguments, but only one pattern. --- mrbgems/mruby-string-ext/src/string.c | 34 +++++++++++++++++++++++++++++++++ mrbgems/mruby-string-ext/test/string.rb | 9 +++++++++ 2 files changed, 43 insertions(+) (limited to 'mrbgems/mruby-string-ext/src') diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index ddc4505fc..d42a5d488 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -632,6 +632,39 @@ mrb_str_squeeze_bang(mrb_state *mrb, mrb_value str) return mrb_nil_value(); } +/* + * call_seq: + * str.count([other_str]) -> integer + * + * Each other_str parameter defines a set of characters to count. The + * intersection of these sets defines the characters to count in str. Any + * other_str that starts with a caret ^ is negated. The sequence c1-c2 + * means all characters between c1 and c2. The backslash character \ can + * be used to escape ^ or - and is otherwise ignored unless it appears at + * the end of a sequence or the end of a other_str. + */ +static mrb_value +mrb_str_count(mrb_state *mrb, mrb_value str) +{ + mrb_value v_pat = mrb_nil_value(); + struct tr_pattern *pat = NULL; + mrb_int i; + char *s; + mrb_int len; + mrb_int count = 0; + + mrb_get_args(mrb, "S", &v_pat); + pat = tr_parse_pattern(mrb, pat, v_pat, TRUE); + s = RSTRING_PTR(str); + len = RSTRING_LEN(str); + for (i = 0; i < len; i++) { + mrb_int n = tr_find_character(pat, s[i]); + + if (n >= 0) count++; + } + return mrb_fixnum_value(count); +} + static mrb_value mrb_str_hex(mrb_state *mrb, mrb_value self) { @@ -1017,6 +1050,7 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb) mrb_define_method(mrb, s, "swapcase", mrb_str_swapcase, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "concat", mrb_str_concat_m, MRB_ARGS_REQ(1)); mrb_define_method(mrb, s, "<<", mrb_str_concat_m, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, s, "count", mrb_str_count, MRB_ARGS_REQ(1)); mrb_define_method(mrb, s, "tr", mrb_str_tr, MRB_ARGS_REQ(2)); mrb_define_method(mrb, s, "tr!", mrb_str_tr_bang, MRB_ARGS_REQ(2)); mrb_define_method(mrb, s, "tr_s", mrb_str_tr_s, MRB_ARGS_REQ(2)); diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index fd6f83e71..3e9ab5b1b 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -147,6 +147,15 @@ assert('String#casecmp') do assert_equal 0, "abcdef".casecmp(o) end +assert('String#count') do + s = "abccdeff123" + assert_equal 1, s.count("a") + assert_equal 2, s.count("ab") + assert_equal 9, s.count("^c") + assert_equal 8, s.count("a-z") + assert_equal 4, s.count("a0-9") +end + assert('String#tr') do assert_equal "ABC", "abc".tr('a-z', 'A-Z') assert_equal "hippo", "hello".tr('el', 'ip') -- cgit v1.2.3 From 346f154ece3bd68b63dfee4b4c4d9a20c0eee063 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Thu, 20 Sep 2018 18:05:26 +0900 Subject: Implement `String#delete` and `#delete!`; ref #4086 mruby restriction: In mruby, `String#delete` only takes single pattern argument. --- mrbgems/mruby-string-ext/src/string.c | 58 +++++++++++++++++++++++++++++++++ mrbgems/mruby-string-ext/test/string.rb | 14 ++++++++ 2 files changed, 72 insertions(+) (limited to 'mrbgems/mruby-string-ext/src') diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index d42a5d488..a91b483e7 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -632,6 +632,62 @@ mrb_str_squeeze_bang(mrb_state *mrb, mrb_value str) return mrb_nil_value(); } +static mrb_bool +str_delete(mrb_state *mrb, mrb_value str, mrb_value v_pat) +{ + struct tr_pattern *pat = NULL; + mrb_int i; + char *s; + mrb_int len; + mrb_bool flag_changed = FALSE; + + mrb_str_modify(mrb, mrb_str_ptr(str)); + pat = tr_parse_pattern(mrb, pat, v_pat, TRUE); + s = RSTRING_PTR(str); + len = RSTRING_LEN(str); + + for (i = 0; i < len; i++) { + mrb_int n = tr_find_character(pat, s[i]); + + if (n >= 0) { + flag_changed = TRUE; + memmove(s + i, s + i + 1, len - i); + len--; + i--; + } + } + tr_pattern_free(mrb, pat); + + RSTR_SET_LEN(RSTRING(str), len); + RSTRING_PTR(str)[len] = 0; + + return flag_changed; +} + +static mrb_value +mrb_str_delete(mrb_state *mrb, mrb_value str) +{ + mrb_value pat; + mrb_value dup; + + mrb_get_args(mrb, "S", &pat); + dup = mrb_str_dup(mrb, str); + str_delete(mrb, dup, pat); + return dup; +} + +static mrb_value +mrb_str_delete_bang(mrb_state *mrb, mrb_value str) +{ + mrb_value pat; + + mrb_get_args(mrb, "S", &pat); + if (str_delete(mrb, str, pat)) { + return str; + } + return mrb_nil_value(); +} + /* * call_seq: * str.count([other_str]) -> integer @@ -1057,6 +1113,8 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb) mrb_define_method(mrb, s, "tr_s!", mrb_str_tr_s_bang, MRB_ARGS_REQ(2)); mrb_define_method(mrb, s, "squeeze", mrb_str_squeeze, MRB_ARGS_OPT(1)); mrb_define_method(mrb, s, "squeeze!", mrb_str_squeeze_bang, MRB_ARGS_OPT(1)); + mrb_define_method(mrb, s, "delete", mrb_str_delete, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, s, "delete!", mrb_str_delete_bang, MRB_ARGS_REQ(1)); mrb_define_method(mrb, s, "start_with?", mrb_str_start_with, MRB_ARGS_REST()); mrb_define_method(mrb, s, "end_with?", mrb_str_end_with, MRB_ARGS_REST()); mrb_define_method(mrb, s, "hex", mrb_str_hex, MRB_ARGS_NONE()); diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index 3e9ab5b1b..36a253989 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -194,6 +194,20 @@ assert('String#squeeze!') do assert_equal " now is the", s end +assert('String#delete') do + assert_equal "he", "hello".delete("lo") + assert_equal "hll", "hello".delete("aeiou") + assert_equal "ll", "hello".delete("^l") + assert_equal "ho", "hello".delete("ej-m") +end + +assert('String#delete!') do + s = "hello" + assert_equal "he", s.delete!("lo") + assert_equal "he", s + assert_nil s.delete!("lz") +end + assert('String#start_with?') do assert_true "hello".start_with?("heaven", "hell") assert_true !"hello".start_with?("heaven", "paradise") -- cgit v1.2.3 From 7b04fcd092006b6e78cd63619fb7ae972f8e0c5d Mon Sep 17 00:00:00 2001 From: Clayton Smith Date: Mon, 24 Sep 2018 18:16:12 -0400 Subject: Fix comparisons in str_squeeze. --- mrbgems/mruby-string-ext/src/string.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mrbgems/mruby-string-ext/src') diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index a91b483e7..8d79c80e2 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -543,7 +543,7 @@ str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat) { struct tr_pattern *pat = NULL; mrb_int i; - char *s; + unsigned char *s; mrb_int len; mrb_bool flag_changed = FALSE; mrb_int lastch = -1; @@ -552,7 +552,7 @@ str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat) if (!mrb_nil_p(v_pat)) { pat = tr_parse_pattern(mrb, pat, v_pat, TRUE); } - s = RSTRING_PTR(str); + s = (unsigned char *)RSTRING_PTR(str); len = RSTRING_LEN(str); if (pat) { -- cgit v1.2.3 From 67897195deb7d7a3cc0927b0f585416153e49448 Mon Sep 17 00:00:00 2001 From: Takeshi Watanabe Date: Tue, 25 Sep 2018 11:48:59 +0900 Subject: Fix leak in `mrb_str_count` --- mrbgems/mruby-string-ext/src/string.c | 1 + 1 file changed, 1 insertion(+) (limited to 'mrbgems/mruby-string-ext/src') diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index 8d79c80e2..b66ada1d2 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -718,6 +718,7 @@ mrb_str_count(mrb_state *mrb, mrb_value str) if (n >= 0) count++; } + tr_pattern_free(mrb, pat); return mrb_fixnum_value(count); } -- cgit v1.2.3 From fb49a9431482d7f60d065af68c5c0e028b130c41 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Sun, 23 Sep 2018 21:10:25 +0900 Subject: Rename `tr_pattern_free()` to `tr_free_pattern()`. --- mrbgems/mruby-string-ext/src/string.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'mrbgems/mruby-string-ext/src') diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index b66ada1d2..3faa3d428 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -252,7 +252,7 @@ struct tr_pattern { }; static void -tr_pattern_free(mrb_state *mrb, struct tr_pattern *pat) +tr_free_pattern(mrb_state *mrb, struct tr_pattern *pat) { while (pat) { struct tr_pattern *p = pat->next; @@ -281,7 +281,7 @@ tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_patte pat1 = (struct tr_pattern*)mrb_malloc_simple(mrb, sizeof(struct tr_pattern) + 2); if (pat1 == NULL && ret) { nomem: - tr_pattern_free(mrb, ret); + tr_free_pattern(mrb, ret); mrb_exc_raise(mrb, mrb_obj_value(mrb->nomem_err)); return NULL; /* not reached */ } @@ -419,8 +419,8 @@ str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squee } } - tr_pattern_free(mrb, pat); - if (rep) tr_pattern_free(mrb, rep); + tr_free_pattern(mrb, pat); + if (rep) tr_free_pattern(mrb, rep); RSTR_SET_LEN(RSTRING(str), len); RSTRING_PTR(str)[len] = 0; @@ -579,7 +579,7 @@ str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat) lastch = s[i]; } } - tr_pattern_free(mrb, pat); + tr_free_pattern(mrb, pat); RSTR_SET_LEN(RSTRING(str), len); RSTRING_PTR(str)[len] = 0; @@ -656,7 +656,7 @@ str_delete(mrb_state *mrb, mrb_value str, mrb_value v_pat) i--; } } - tr_pattern_free(mrb, pat); + tr_free_pattern(mrb, pat); RSTR_SET_LEN(RSTRING(str), len); RSTRING_PTR(str)[len] = 0; @@ -718,7 +718,7 @@ mrb_str_count(mrb_state *mrb, mrb_value str) if (n >= 0) count++; } - tr_pattern_free(mrb, pat); + tr_free_pattern(mrb, pat); return mrb_fixnum_value(count); } -- cgit v1.2.3 From 9e3cbaaacd01aa75622252fe42d35a47014da1fd Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Wed, 26 Sep 2018 12:16:07 +0900 Subject: Avoid using `memmove()` for performance; fix #4130 --- mrbgems/mruby-string-ext/src/string.c | 61 ++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 30 deletions(-) (limited to 'mrbgems/mruby-string-ext/src') diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index 3faa3d428..aedff7266 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -386,6 +386,7 @@ str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squee char *s; mrb_int len; mrb_int i; + mrb_int j; mrb_bool flag_changed = FALSE; mrb_int lastch = -1; @@ -395,21 +396,22 @@ str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squee s = RSTRING_PTR(str); len = RSTRING_LEN(str); - for (i = 0; i < len; i++) { + for (i=j=0; ij) s[j] = s[i]; if (n >= 0) { flag_changed = TRUE; if (rep == NULL) { - compact: - memmove(s + i, s + i + 1, len - i); - len--; - i--; + j--; } else { mrb_int c = tr_get_character(rep, n); - if (squeeze && c == lastch) goto compact; + if (squeeze && c == lastch) { + j--; + continue; + } if (c < 0 || c > 0x80) { mrb_raisef(mrb, E_ARGUMENT_ERROR, "character (%S) out of range", mrb_fixnum_value((mrb_int)c)); @@ -422,9 +424,10 @@ str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squee tr_free_pattern(mrb, pat); if (rep) tr_free_pattern(mrb, rep); - RSTR_SET_LEN(RSTRING(str), len); - RSTRING_PTR(str)[len] = 0; - + if (flag_changed) { + RSTR_SET_LEN(RSTRING(str), j); + RSTRING_PTR(str)[j] = 0; + } return flag_changed; } @@ -542,7 +545,7 @@ static mrb_bool str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat) { struct tr_pattern *pat = NULL; - mrb_int i; + mrb_int i, j; unsigned char *s; mrb_int len; mrb_bool flag_changed = FALSE; @@ -556,34 +559,33 @@ str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat) len = RSTRING_LEN(str); if (pat) { - for (i = 0; i < len; i++) { + for (i=j=0; ij) s[j] = s[i]; if (n >= 0 && s[i] == lastch) { flag_changed = TRUE; - memmove(s + i, s + i + 1, len - i); - len--; - i--; + j--; } lastch = s[i]; } } else { - for (i = 0; i < len; i++) { + for (i=j=0; ij) s[j] = s[i]; if (s[i] == lastch) { flag_changed = TRUE; - memmove(s + i, s + i + 1, len - i); - len--; - i--; + j--; } lastch = s[i]; } } tr_free_pattern(mrb, pat); - RSTR_SET_LEN(RSTRING(str), len); - RSTRING_PTR(str)[len] = 0; - + if (flag_changed) { + RSTR_SET_LEN(RSTRING(str), j); + RSTRING_PTR(str)[j] = 0; + } return flag_changed; } @@ -636,7 +638,7 @@ static mrb_bool str_delete(mrb_state *mrb, mrb_value str, mrb_value v_pat) { struct tr_pattern *pat = NULL; - mrb_int i; + mrb_int i, j; char *s; mrb_int len; mrb_bool flag_changed = FALSE; @@ -646,21 +648,20 @@ str_delete(mrb_state *mrb, mrb_value str, mrb_value v_pat) s = RSTRING_PTR(str); len = RSTRING_LEN(str); - for (i = 0; i < len; i++) { + for (i=j=0; ij) s[j] = s[i]; if (n >= 0) { flag_changed = TRUE; - memmove(s + i, s + i + 1, len - i); - len--; - i--; + j--; } } tr_free_pattern(mrb, pat); - - RSTR_SET_LEN(RSTRING(str), len); - RSTRING_PTR(str)[len] = 0; - + if (flag_changed) { + RSTR_SET_LEN(RSTRING(str), j); + RSTRING_PTR(str)[j] = 0; + } return flag_changed; } -- cgit v1.2.3 From 01c2f59e14d8483ab1dc9dc8b06d3a42f0ecc88e Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Wed, 26 Sep 2018 12:32:33 +0900 Subject: Revert "Fix comparisons in str_squeeze." This reverts commit 7b04fcd092006b6e78cd63619fb7ae972f8e0c5d. The issue was addressed by 9e3cbaa. No longer needed. --- mrbgems/mruby-string-ext/src/string.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mrbgems/mruby-string-ext/src') diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index aedff7266..ea8d77249 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -546,7 +546,7 @@ str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat) { struct tr_pattern *pat = NULL; mrb_int i, j; - unsigned char *s; + char *s; mrb_int len; mrb_bool flag_changed = FALSE; mrb_int lastch = -1; @@ -555,7 +555,7 @@ str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat) if (!mrb_nil_p(v_pat)) { pat = tr_parse_pattern(mrb, pat, v_pat, TRUE); } - s = (unsigned char *)RSTRING_PTR(str); + s = RSTRING_PTR(str); len = RSTRING_LEN(str); if (pat) { -- cgit v1.2.3 From e6841abad69df4d20e359e0b1c2eb190696ec0c4 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Thu, 27 Sep 2018 21:43:35 +0900 Subject: Fixed a `String#squeeze` bug in handling `iso-8859-1` strings; ref #4127 --- mrbgems/mruby-string-ext/src/string.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mrbgems/mruby-string-ext/src') diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index ea8d77249..9e35b18a9 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -573,7 +573,7 @@ str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat) else { for (i=j=0; ij) s[j] = s[i]; - if (s[i] == lastch) { + if (s[i] >= 0 && s[i] == lastch) { flag_changed = TRUE; j--; } -- cgit v1.2.3 From 52d55e6069a513f664f94947778011120e4974c3 Mon Sep 17 00:00:00 2001 From: take-cheeze Date: Mon, 29 Oct 2018 13:21:36 +0900 Subject: Keep tr_pattern static --- mrbgems/mruby-string-ext/src/string.c | 107 +++++++++++++++++++++------------- 1 file changed, 65 insertions(+), 42 deletions(-) (limited to 'mrbgems/mruby-string-ext/src') diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index 9e35b18a9..9b317386e 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -235,6 +235,12 @@ mrb_str_end_with(mrb_state *mrb, mrb_value self) return mrb_false_value(); } +enum tr_pattern_type { + TR_UNINITIALIZED = 0, + TR_IN_ORDER = 1, + TR_RANGE = 2, +}; + /* #tr Pattern syntax @@ -245,18 +251,26 @@ mrb_str_end_with(mrb_state *mrb, mrb_value self) */ struct tr_pattern { uint8_t type; // 1:in-order, 2:range - mrb_bool flag_reverse; - int16_t n; + mrb_bool flag_reverse : 1; + mrb_bool flag_on_stack : 1; + uint16_t n; + union { + uint16_t start_pos; + char ch[2]; + } val; struct tr_pattern *next; - char ch[]; }; -static void +#define STATIC_TR_PATTERN { TR_UNINITIALIZED, FALSE, TRUE, 0, {}, NULL } + +static inline void tr_free_pattern(mrb_state *mrb, struct tr_pattern *pat) { while (pat) { struct tr_pattern *p = pat->next; - mrb_free(mrb, pat); + if (!pat->flag_on_stack) { + mrb_free(mrb, pat); + } pat = p; } } @@ -277,20 +291,24 @@ tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_patte while (i < pattern_length) { /* is range pattern ? */ + mrb_bool const ret_uninit = (ret->type == TR_UNINITIALIZED); + pat1 = ret_uninit + ? ret + : (struct tr_pattern*)mrb_malloc_simple(mrb, sizeof(struct tr_pattern)); if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-') { - pat1 = (struct tr_pattern*)mrb_malloc_simple(mrb, sizeof(struct tr_pattern) + 2); if (pat1 == NULL && ret) { nomem: tr_free_pattern(mrb, ret); mrb_exc_raise(mrb, mrb_obj_value(mrb->nomem_err)); return NULL; /* not reached */ } - pat1->type = 2; + pat1->type = TR_RANGE; pat1->flag_reverse = flag_reverse; + pat1->flag_on_stack = ret_uninit; pat1->n = pattern[i+2] - pattern[i] + 1; pat1->next = NULL; - pat1->ch[0] = pattern[i]; - pat1->ch[1] = pattern[i+2]; + pat1->val.ch[0] = pattern[i]; + pat1->val.ch[1] = pattern[i+2]; i += 3; } else { @@ -305,18 +323,18 @@ tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_patte } len = i - start_pos; - pat1 = (struct tr_pattern*)mrb_malloc_simple(mrb, sizeof(struct tr_pattern) + len); if (pat1 == NULL && ret) { goto nomem; } - pat1->type = 1; + pat1->type = TR_IN_ORDER; pat1->flag_reverse = flag_reverse; + pat1->flag_on_stack = ret_uninit; pat1->n = len; pat1->next = NULL; - memcpy(pat1->ch, &pattern[start_pos], len); + pat1->val.start_pos = start_pos; } - if (ret == NULL) { + if (ret == NULL || ret_uninit) { ret = pat1; } else { @@ -331,23 +349,26 @@ tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_patte return ret; } -static mrb_int -tr_find_character(const struct tr_pattern *pat, int ch) +static inline mrb_int +tr_find_character(const struct tr_pattern *pat, const char *pat_str, int ch) { mrb_int ret = -1; mrb_int n_sum = 0; mrb_int flag_reverse = pat ? pat->flag_reverse : 0; while (pat != NULL) { - if (pat->type == 1) { /* pat->type == 1 in-order */ + if (pat->type == TR_IN_ORDER) { int i; for (i = 0; i < pat->n; i++) { - if (pat->ch[i] == ch) ret = n_sum + i; + if (pat_str[pat->val.start_pos + i] == ch) ret = n_sum + i; } } - else { /* pat->type == 2 range */ - if (pat->ch[0] <= ch && ch <= pat->ch[1]) - ret = n_sum + ch - pat->ch[0]; + else if (pat->type == TR_RANGE) { + if (pat->val.ch[0] <= ch && ch <= pat->val.ch[1]) + ret = n_sum + ch - pat->val.ch[0]; + } + else { + mrb_assert(FALSE); // should not reach } n_sum += pat->n; pat = pat->next; @@ -359,17 +380,17 @@ tr_find_character(const struct tr_pattern *pat, int ch) return ret; } -static mrb_int -tr_get_character(const struct tr_pattern *pat, mrb_int n_th) +static inline mrb_int +tr_get_character(const struct tr_pattern *pat, const char *pat_str, mrb_int n_th) { mrb_int n_sum = 0; while (pat != NULL) { if (n_th < (n_sum + pat->n)) { mrb_int i = (n_th - n_sum); - return (pat->type == 1) ? pat->ch[i] :pat->ch[0] + i; + return (pat->type == TR_IN_ORDER) ? pat_str[pat->val.start_pos + i] :pat->val.ch[0] + i; } if (pat->next == NULL) { - return (pat->type == 1) ? pat->ch[pat->n - 1] : pat->ch[1]; + return (pat->type == TR_IN_ORDER) ? pat_str[pat->val.start_pos + pat->n - 1] : pat->val.ch[1]; } n_sum += pat->n; pat = pat->next; @@ -381,23 +402,24 @@ tr_get_character(const struct tr_pattern *pat, mrb_int n_th) static mrb_bool str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squeeze) { - struct tr_pattern *pat; - struct tr_pattern *rep; + struct tr_pattern pat = STATIC_TR_PATTERN; + struct tr_pattern rep_storage = STATIC_TR_PATTERN; char *s; mrb_int len; mrb_int i; mrb_int j; mrb_bool flag_changed = FALSE; mrb_int lastch = -1; + struct tr_pattern *rep; mrb_str_modify(mrb, mrb_str_ptr(str)); - pat = tr_parse_pattern(mrb, NULL, p1, TRUE); - rep = tr_parse_pattern(mrb, NULL, p2, FALSE); + tr_parse_pattern(mrb, &pat, p1, TRUE); + rep = tr_parse_pattern(mrb, &rep_storage, p2, FALSE); s = RSTRING_PTR(str); len = RSTRING_LEN(str); for (i=j=0; ij) s[j] = s[i]; if (n >= 0) { @@ -406,7 +428,7 @@ str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squee j--; } else { - mrb_int c = tr_get_character(rep, n); + mrb_int c = tr_get_character(rep, RSTRING_PTR(p2), n); if (squeeze && c == lastch) { j--; @@ -421,8 +443,8 @@ str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squee } } - tr_free_pattern(mrb, pat); - if (rep) tr_free_pattern(mrb, rep); + tr_free_pattern(mrb, &pat); + tr_free_pattern(mrb, rep); if (flag_changed) { RSTR_SET_LEN(RSTRING(str), j); @@ -544,6 +566,7 @@ mrb_str_tr_s_bang(mrb_state *mrb, mrb_value str) static mrb_bool str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat) { + struct tr_pattern pat_storage = STATIC_TR_PATTERN; struct tr_pattern *pat = NULL; mrb_int i, j; char *s; @@ -553,14 +576,14 @@ str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat) mrb_str_modify(mrb, mrb_str_ptr(str)); if (!mrb_nil_p(v_pat)) { - pat = tr_parse_pattern(mrb, pat, v_pat, TRUE); + pat = tr_parse_pattern(mrb, &pat_storage, v_pat, TRUE); } s = RSTRING_PTR(str); len = RSTRING_LEN(str); if (pat) { for (i=j=0; ij) s[j] = s[i]; if (n >= 0 && s[i] == lastch) { @@ -637,19 +660,19 @@ mrb_str_squeeze_bang(mrb_state *mrb, mrb_value str) static mrb_bool str_delete(mrb_state *mrb, mrb_value str, mrb_value v_pat) { - struct tr_pattern *pat = NULL; + struct tr_pattern pat = STATIC_TR_PATTERN; mrb_int i, j; char *s; mrb_int len; mrb_bool flag_changed = FALSE; mrb_str_modify(mrb, mrb_str_ptr(str)); - pat = tr_parse_pattern(mrb, pat, v_pat, TRUE); + tr_parse_pattern(mrb, &pat, v_pat, TRUE); s = RSTRING_PTR(str); len = RSTRING_LEN(str); for (i=j=0; ij) s[j] = s[i]; if (n >= 0) { @@ -657,7 +680,7 @@ str_delete(mrb_state *mrb, mrb_value str, mrb_value v_pat) j--; } } - tr_free_pattern(mrb, pat); + tr_free_pattern(mrb, &pat); if (flag_changed) { RSTR_SET_LEN(RSTRING(str), j); RSTRING_PTR(str)[j] = 0; @@ -704,22 +727,22 @@ static mrb_value mrb_str_count(mrb_state *mrb, mrb_value str) { mrb_value v_pat = mrb_nil_value(); - struct tr_pattern *pat = NULL; mrb_int i; char *s; mrb_int len; mrb_int count = 0; + struct tr_pattern pat = STATIC_TR_PATTERN; mrb_get_args(mrb, "S", &v_pat); - pat = tr_parse_pattern(mrb, pat, v_pat, TRUE); + tr_parse_pattern(mrb, &pat, v_pat, TRUE); s = RSTRING_PTR(str); len = RSTRING_LEN(str); for (i = 0; i < len; i++) { - mrb_int n = tr_find_character(pat, s[i]); + mrb_int n = tr_find_character(&pat, RSTRING_PTR(v_pat), s[i]); if (n >= 0) count++; } - tr_free_pattern(mrb, pat); + tr_free_pattern(mrb, &pat); return mrb_fixnum_value(count); } -- cgit v1.2.3 From cca5e0977ca4ac54d109e68e256e89b041833c7a Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Tue, 30 Oct 2018 09:01:12 +0900 Subject: VS 2017 C does not understand inline struct initialization; ref #4153 --- mrbgems/mruby-string-ext/src/string.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'mrbgems/mruby-string-ext/src') diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index 9b317386e..9fd84af7b 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -252,7 +252,7 @@ enum tr_pattern_type { struct tr_pattern { uint8_t type; // 1:in-order, 2:range mrb_bool flag_reverse : 1; - mrb_bool flag_on_stack : 1; + mrb_bool flag_on_heap : 1; uint16_t n; union { uint16_t start_pos; @@ -261,14 +261,14 @@ struct tr_pattern { struct tr_pattern *next; }; -#define STATIC_TR_PATTERN { TR_UNINITIALIZED, FALSE, TRUE, 0, {}, NULL } +#define STATIC_TR_PATTERN { 0 } static inline void tr_free_pattern(mrb_state *mrb, struct tr_pattern *pat) { while (pat) { struct tr_pattern *p = pat->next; - if (!pat->flag_on_stack) { + if (pat->flag_on_heap) { mrb_free(mrb, pat); } pat = p; @@ -304,7 +304,7 @@ tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_patte } pat1->type = TR_RANGE; pat1->flag_reverse = flag_reverse; - pat1->flag_on_stack = ret_uninit; + pat1->flag_on_heap = !ret_uninit; pat1->n = pattern[i+2] - pattern[i] + 1; pat1->next = NULL; pat1->val.ch[0] = pattern[i]; @@ -328,7 +328,7 @@ tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_patte } pat1->type = TR_IN_ORDER; pat1->flag_reverse = flag_reverse; - pat1->flag_on_stack = ret_uninit; + pat1->flag_on_heap = !ret_uninit; pat1->n = len; pat1->next = NULL; pat1->val.start_pos = start_pos; -- cgit v1.2.3 From 7ad53273a2c5346557a233d040ad233019c03277 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Thu, 1 Nov 2018 22:52:12 +0900 Subject: Silence Appveyor's VC compilation warnings. --- mrbgems/mruby-pack/src/pack.c | 2 +- mrbgems/mruby-sleep/src/mrb_sleep.c | 2 +- mrbgems/mruby-string-ext/src/string.c | 5 +++-- src/vm.c | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) (limited to 'mrbgems/mruby-string-ext/src') diff --git a/mrbgems/mruby-pack/src/pack.c b/mrbgems/mruby-pack/src/pack.c index f970d9339..796ba4d34 100644 --- a/mrbgems/mruby-pack/src/pack.c +++ b/mrbgems/mruby-pack/src/pack.c @@ -64,7 +64,7 @@ static int littleendian = 0; const static unsigned char base64chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; -static signed char base64_dec_tab[128]; +static unsigned char base64_dec_tab[128]; static int diff --git a/mrbgems/mruby-sleep/src/mrb_sleep.c b/mrbgems/mruby-sleep/src/mrb_sleep.c index ce643cf12..0428f29eb 100644 --- a/mrbgems/mruby-sleep/src/mrb_sleep.c +++ b/mrbgems/mruby-sleep/src/mrb_sleep.c @@ -29,7 +29,7 @@ #ifdef _WIN32 #include #define sleep(x) Sleep(x * 1000) - #define usleep(x) Sleep(((x)<1000) ? 1 : ((x)/1000)) + #define usleep(x) Sleep((DWORD)((x)<1000) ? 1 : ((x)/1000)) #else #include #include diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index 9fd84af7b..c0105fbd9 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -279,7 +279,7 @@ static struct tr_pattern* tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_pattern, mrb_bool flag_reverse_enable) { const char *pattern = RSTRING_PTR(v_pattern); - int pattern_length = RSTRING_LEN(v_pattern); + mrb_int pattern_length = RSTRING_LEN(v_pattern); mrb_bool flag_reverse = FALSE; struct tr_pattern *pat1; int i = 0; @@ -438,7 +438,8 @@ str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squee mrb_raisef(mrb, E_ARGUMENT_ERROR, "character (%S) out of range", mrb_fixnum_value((mrb_int)c)); } - lastch = s[i] = c; + lastch = c; + s[i] = (char)c; } } } diff --git a/src/vm.c b/src/vm.c index 28b375c68..8157f6ca0 100644 --- a/src/vm.c +++ b/src/vm.c @@ -186,7 +186,7 @@ stack_extend_alloc(mrb_state *mrb, mrb_int room) if (off > size) size = off; #ifdef MRB_STACK_EXTEND_DOUBLING - if (room <= (size_t)size) + if ((size_t)room <= size) size *= 2; else size += room; -- cgit v1.2.3 From 65b04066a25812fe51e25d028d7cc2ef0b53fbc2 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Fri, 2 Nov 2018 08:54:24 +0900 Subject: Empty pattern string can generate `TR_UNINITIALIZED` pattern; fix #4156 --- mrbgems/mruby-string-ext/src/string.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mrbgems/mruby-string-ext/src') diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index c0105fbd9..6f574b9db 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -368,7 +368,7 @@ tr_find_character(const struct tr_pattern *pat, const char *pat_str, int ch) ret = n_sum + ch - pat->val.ch[0]; } else { - mrb_assert(FALSE); // should not reach + mrb_assert(pat->type == TR_UNINITIALIZED); } n_sum += pat->n; pat = pat->next; -- cgit v1.2.3 From 8e596956259353ddf3ff0370028a758d9ce435d8 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Fri, 2 Nov 2018 08:55:17 +0900 Subject: Empty pattern string for `String#tr` should remove characters; fix #4157 --- mrbgems/mruby-string-ext/src/string.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'mrbgems/mruby-string-ext/src') diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index 6f574b9db..460c8509e 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -384,13 +384,29 @@ static inline mrb_int tr_get_character(const struct tr_pattern *pat, const char *pat_str, mrb_int n_th) { mrb_int n_sum = 0; + while (pat != NULL) { if (n_th < (n_sum + pat->n)) { mrb_int i = (n_th - n_sum); - return (pat->type == TR_IN_ORDER) ? pat_str[pat->val.start_pos + i] :pat->val.ch[0] + i; + + switch (pat->type) { + case TR_IN_ORDER: + return pat_str[pat->val.start_pos + i]; + case TR_RANGE: + return pat->val.ch[0]+i; + case TR_UNINITIALIZED: + return -1; + } } if (pat->next == NULL) { - return (pat->type == TR_IN_ORDER) ? pat_str[pat->val.start_pos + pat->n - 1] : pat->val.ch[1]; + switch (pat->type) { + case TR_IN_ORDER: + return pat_str[pat->val.start_pos + pat->n - 1]; + case TR_RANGE: + return pat->val.ch[1]; + case TR_UNINITIALIZED: + return -1; + } } n_sum += pat->n; pat = pat->next; @@ -430,11 +446,11 @@ str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squee else { mrb_int c = tr_get_character(rep, RSTRING_PTR(p2), n); - if (squeeze && c == lastch) { + if (c < 0 || (squeeze && c == lastch)) { j--; continue; } - if (c < 0 || c > 0x80) { + if (c > 0x80) { mrb_raisef(mrb, E_ARGUMENT_ERROR, "character (%S) out of range", mrb_fixnum_value((mrb_int)c)); } -- cgit v1.2.3 From 4550f4e38153c623537e6df53a4fe7c1c063adc0 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Thu, 15 Nov 2018 02:03:54 +0900 Subject: Pattern length may overflow `uint16_t`; fixed #4163 The issue is reported by `https://hackerone.com/dgaletic`. --- mrbgems/mruby-string-ext/src/string.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'mrbgems/mruby-string-ext/src') diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index 460c8509e..cfc194906 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -282,7 +282,7 @@ tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_patte mrb_int pattern_length = RSTRING_LEN(v_pattern); mrb_bool flag_reverse = FALSE; struct tr_pattern *pat1; - int i = 0; + mrb_int i = 0; if(flag_reverse_enable && pattern_length >= 2 && pattern[0] == '^') { flag_reverse = TRUE; @@ -313,8 +313,8 @@ tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_patte } else { /* in order pattern. */ - int start_pos = i++; - int len; + mrb_int start_pos = i++; + mrb_int len; while (i < pattern_length) { if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-') @@ -323,6 +323,9 @@ tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_patte } len = i - start_pos; + if (len > UINT16_MAX) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "tr pattern too long (max 65536)"); + } if (pat1 == NULL && ret) { goto nomem; } -- cgit v1.2.3 From 485955eccc4993df89bdd227cdf9b0e2f6c95c9c Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Thu, 15 Nov 2018 04:44:27 +0900 Subject: `String#{squeeze,delete,count}` to use bitmap for matching; ref #4163 --- mrbgems/mruby-string-ext/src/string.c | 79 +++++++++++++++++++++++++++++------ 1 file changed, 67 insertions(+), 12 deletions(-) (limited to 'mrbgems/mruby-string-ext/src') diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index cfc194906..6d661c352 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -418,6 +418,59 @@ tr_get_character(const struct tr_pattern *pat, const char *pat_str, mrb_int n_th return -1; } +static inline void +tr_bitmap_set(uint8_t bitmap[32], uint8_t ch) +{ + uint8_t idx1 = ch / 8; + uint8_t idx2 = ch % 8; + bitmap[idx1] |= (1<flag_reverse : 0; + int i; + + for (i=0; i<32; i++) { + bitmap[i] = 0; + } + while (pat != NULL) { + if (pat->type == TR_IN_ORDER) { + for (i = 0; i < pat->n; i++) { + tr_bitmap_set(bitmap, pattern[pat->val.start_pos + i]); + } + } + else if (pat->type == TR_RANGE) { + for (i = pat->val.ch[0]; i < pat->val.ch[1]; i++) { + tr_bitmap_set(bitmap, i); + } + } + else { + mrb_assert(pat->type == TR_UNINITIALIZED); + } + pat = pat->next; + } + + if (flag_reverse) { + for (i=0; i<32; i++) { + bitmap[i] ^= 0xff; + } + } +} + static mrb_bool str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squeeze) { @@ -593,20 +646,21 @@ str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat) mrb_int len; mrb_bool flag_changed = FALSE; mrb_int lastch = -1; + uint8_t bitmap[32]; mrb_str_modify(mrb, mrb_str_ptr(str)); if (!mrb_nil_p(v_pat)) { pat = tr_parse_pattern(mrb, &pat_storage, v_pat, TRUE); + tr_compile_pattern(pat, v_pat, bitmap); + tr_free_pattern(mrb, pat); } s = RSTRING_PTR(str); len = RSTRING_LEN(str); if (pat) { for (i=j=0; ij) s[j] = s[i]; - if (n >= 0 && s[i] == lastch) { + if (tr_bitmap_detect(bitmap, s[i]) && s[i] == lastch) { flag_changed = TRUE; j--; } @@ -623,7 +677,6 @@ str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat) lastch = s[i]; } } - tr_free_pattern(mrb, pat); if (flag_changed) { RSTR_SET_LEN(RSTRING(str), j); @@ -685,22 +738,23 @@ str_delete(mrb_state *mrb, mrb_value str, mrb_value v_pat) char *s; mrb_int len; mrb_bool flag_changed = FALSE; + uint8_t bitmap[32]; mrb_str_modify(mrb, mrb_str_ptr(str)); tr_parse_pattern(mrb, &pat, v_pat, TRUE); + tr_compile_pattern(&pat, v_pat, bitmap); + tr_free_pattern(mrb, &pat); + s = RSTRING_PTR(str); len = RSTRING_LEN(str); for (i=j=0; ij) s[j] = s[i]; - if (n >= 0) { + if (tr_bitmap_detect(bitmap, s[i])) { flag_changed = TRUE; j--; } } - tr_free_pattern(mrb, &pat); if (flag_changed) { RSTR_SET_LEN(RSTRING(str), j); RSTRING_PTR(str)[j] = 0; @@ -752,17 +806,18 @@ mrb_str_count(mrb_state *mrb, mrb_value str) mrb_int len; mrb_int count = 0; struct tr_pattern pat = STATIC_TR_PATTERN; + uint8_t bitmap[32]; mrb_get_args(mrb, "S", &v_pat); tr_parse_pattern(mrb, &pat, v_pat, TRUE); + tr_compile_pattern(&pat, v_pat, bitmap); + tr_free_pattern(mrb, &pat); + s = RSTRING_PTR(str); len = RSTRING_LEN(str); for (i = 0; i < len; i++) { - mrb_int n = tr_find_character(&pat, RSTRING_PTR(v_pat), s[i]); - - if (n >= 0) count++; + if (tr_bitmap_detect(bitmap, s[i])) count++; } - tr_free_pattern(mrb, &pat); return mrb_fixnum_value(count); } -- cgit v1.2.3 From ff08856fe314faa4d16b4502c0960a3475387846 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Wed, 19 Sep 2018 21:51:53 +0900 Subject: Remove implicit conversion using `to_str` method; fix #3854 We have added internal convenience method `__to_str` which does string type check. The issue #3854 was fixed but fundamental flaw of lack of stack depth check along with fibers still remains. Use `MRB_GC_FIXED_ARENA` for workaround. --- include/mruby.h | 5 +--- include/mruby/string.h | 3 +-- mrbgems/mruby-io/src/file.c | 2 +- mrbgems/mruby-kernel-ext/src/kernel.c | 8 ++----- mrbgems/mruby-string-ext/mrblib/string.rb | 6 ++--- mrbgems/mruby-string-ext/src/string.c | 6 ++--- mrbgems/mruby-string-ext/test/string.rb | 11 --------- mrblib/string.rb | 11 ++++----- src/class.c | 11 +++++++-- src/kernel.c | 39 +++++-------------------------- src/object.c | 27 +++++++++++++++++++++ src/string.c | 32 ++----------------------- test/t/string.rb | 13 ----------- 13 files changed, 60 insertions(+), 114 deletions(-) (limited to 'mrbgems/mruby-string-ext/src') diff --git a/include/mruby.h b/include/mruby.h index 33597101a..12df9cd5a 100644 --- a/include/mruby.h +++ b/include/mruby.h @@ -854,10 +854,6 @@ typedef const char *mrb_args_format; /** * Retrieve arguments from mrb_state. * - * When applicable, implicit conversions (such as `to_str`, `to_ary`, `to_hash`) are - * applied to received arguments. - * Used inside a function of mrb_func_t type. - * * @param mrb The current MRuby state. * @param format [mrb_args_format] is a list of format specifiers * @param ... The passing variadic arguments must be a pointer of retrieving type. @@ -1187,6 +1183,7 @@ MRB_API void mrb_gc_unregister(mrb_state *mrb, mrb_value obj); MRB_API mrb_value mrb_to_int(mrb_state *mrb, mrb_value val); #define mrb_int(mrb, val) mrb_fixnum(mrb_to_int(mrb, val)) +MRB_API mrb_value mrb_to_str(mrb_state *mrb, mrb_value val); MRB_API void mrb_check_type(mrb_state *mrb, mrb_value x, enum mrb_vtype t); typedef enum call_type { diff --git a/include/mruby/string.h b/include/mruby/string.h index 481b2fb38..fa1955f48 100644 --- a/include/mruby/string.h +++ b/include/mruby/string.h @@ -311,8 +311,7 @@ MRB_API mrb_value mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb * @param [mrb_value] str Ruby string. * @return [mrb_value] A Ruby string. */ -MRB_API mrb_value mrb_string_type(mrb_state *mrb, mrb_value str); - +MRB_API mrb_value mrb_ensure_string_type(mrb_state *mrb, mrb_value str); MRB_API mrb_value mrb_check_string_type(mrb_state *mrb, mrb_value str); MRB_API mrb_value mrb_str_new_capa(mrb_state *mrb, size_t capa); MRB_API mrb_value mrb_str_buf_new(mrb_state *mrb, size_t capa); diff --git a/mrbgems/mruby-io/src/file.c b/mrbgems/mruby-io/src/file.c index e65741061..c00663481 100644 --- a/mrbgems/mruby-io/src/file.c +++ b/mrbgems/mruby-io/src/file.c @@ -115,7 +115,7 @@ mrb_file_s_unlink(mrb_state *mrb, mrb_value obj) mrb_get_args(mrb, "*", &argv, &argc); for (i = 0; i < argc; i++) { const char *utf8_path; - pathv = mrb_convert_type(mrb, argv[i], MRB_TT_STRING, "String", "to_str"); + pathv = mrb_ensure_string_type(mrb, argv[i]); utf8_path = mrb_string_value_cstr(mrb, &pathv); path = mrb_locale_from_utf8(utf8_path, -1); if (UNLINK(path) < 0) { diff --git a/mrbgems/mruby-kernel-ext/src/kernel.c b/mrbgems/mruby-kernel-ext/src/kernel.c index a60e9a210..bc2656399 100644 --- a/mrbgems/mruby-kernel-ext/src/kernel.c +++ b/mrbgems/mruby-kernel-ext/src/kernel.c @@ -141,8 +141,7 @@ mrb_f_float(mrb_state *mrb, mrb_value self) * String(arg) -> string * * Returns arg as an String. - * - * First tries to call its to_str method, then its to_s method. + * converted using to_s method. * * String(self) #=> "main" * String(self.class) #=> "Object" @@ -154,10 +153,7 @@ mrb_f_string(mrb_state *mrb, mrb_value self) mrb_value arg, tmp; mrb_get_args(mrb, "o", &arg); - tmp = mrb_check_convert_type(mrb, arg, MRB_TT_STRING, "String", "to_str"); - if (mrb_nil_p(tmp)) { - tmp = mrb_check_convert_type(mrb, arg, MRB_TT_STRING, "String", "to_s"); - } + tmp = mrb_convert_type(mrb, arg, MRB_TT_STRING, "String", "to_s"); return tmp; } diff --git a/mrbgems/mruby-string-ext/mrblib/string.rb b/mrbgems/mruby-string-ext/mrblib/string.rb index 27ca30610..9212d83a5 100644 --- a/mrbgems/mruby-string-ext/mrblib/string.rb +++ b/mrbgems/mruby-string-ext/mrblib/string.rb @@ -12,8 +12,8 @@ class String # String.try_convert(/re/) #=> nil # def self.try_convert(obj) - if obj.respond_to?(:to_str) - obj.to_str + if self === obj + obj else nil end @@ -142,7 +142,7 @@ class String # "abcdef".casecmp("ABCDEF") #=> 0 # def casecmp(str) - self.downcase <=> str.to_str.downcase + self.downcase <=> str.__to_str.downcase rescue NoMethodError nil end diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index 6d661c352..ba7e3c610 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -163,7 +163,7 @@ mrb_str_concat_m(mrb_state *mrb, mrb_value self) if (mrb_fixnum_p(str)) str = mrb_fixnum_chr(mrb, str); else - str = mrb_string_type(mrb, str); + str = mrb_ensure_string_type(mrb, str); mrb_str_concat(mrb, self, str); return self; } @@ -191,7 +191,7 @@ mrb_str_start_with(mrb_state *mrb, mrb_value self) for (i = 0; i < argc; i++) { size_t len_l, len_r; int ai = mrb_gc_arena_save(mrb); - sub = mrb_string_type(mrb, argv[i]); + sub = mrb_ensure_string_type(mrb, argv[i]); mrb_gc_arena_restore(mrb, ai); len_l = RSTRING_LEN(self); len_r = RSTRING_LEN(sub); @@ -220,7 +220,7 @@ mrb_str_end_with(mrb_state *mrb, mrb_value self) for (i = 0; i < argc; i++) { size_t len_l, len_r; int ai = mrb_gc_arena_save(mrb); - sub = mrb_string_type(mrb, argv[i]); + sub = mrb_ensure_string_type(mrb, argv[i]); mrb_gc_arena_restore(mrb, ai); len_l = RSTRING_LEN(self); len_r = RSTRING_LEN(sub); diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index 781506949..4ccdfd6c3 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -114,12 +114,6 @@ assert('String#concat') do assert_equal "Hello World!", "Hello " << "World" << 33 assert_equal "Hello World!", "Hello ".concat("World").concat(33) - o = Object.new - def o.to_str - "to_str" - end - assert_equal "hi to_str", "hi " << o - assert_raise(TypeError) { "".concat(Object.new) } end @@ -128,11 +122,6 @@ assert('String#casecmp') do assert_equal 0, "aBcDeF".casecmp("abcdef") assert_equal(-1, "abcdef".casecmp("abcdefg")) assert_equal 0, "abcdef".casecmp("ABCDEF") - o = Object.new - def o.to_str - "ABCDEF" - end - assert_equal 0, "abcdef".casecmp(o) end assert('String#count') do diff --git a/mrblib/string.rb b/mrblib/string.rb index 07b80b340..397603e9d 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -12,7 +12,7 @@ class String def each_line(rs = "\n", &block) return to_enum(:each_line, rs, &block) unless block return block.call(self) if rs.nil? - rs = rs.to_str + rs = rs.__to_str offset = 0 rs_len = rs.length this = dup @@ -67,7 +67,7 @@ class String block = nil end if !replace.nil? || !block - replace = replace.to_str + replace = replace.__to_str end offset = 0 result = [] @@ -129,12 +129,12 @@ class String end pattern, replace = *args - pattern = pattern.to_str + pattern = pattern.__to_str if args.length == 2 && block block = nil end unless block - replace = replace.to_str + replace = replace.__to_str end result = [] this = dup @@ -245,14 +245,13 @@ class String ## # ISO 15.2.10.5.3 def =~(re) - raise TypeError, "type mismatch: String given" if re.respond_to? :to_str re =~ self end ## # ISO 15.2.10.5.27 def match(re, &block) - if re.respond_to? :to_str + if String === re if Object.const_defined?(:Regexp) r = Regexp.new(re) r.match(self, &block) diff --git a/src/class.c b/src/class.c index 50ab0ea59..90c73104e 100644 --- a/src/class.c +++ b/src/class.c @@ -504,10 +504,17 @@ check_type(mrb_state *mrb, mrb_value val, enum mrb_vtype t, const char *c, const return tmp; } +#define CHECK_TYPE(mrb, val, t, c) do { \ + if (mrb_type(val) != (t)) {\ + mrb_raisef(mrb, E_TYPE_ERROR, "expected %S", mrb_str_new_lit(mrb, c));\ + }\ +} while (0) + static mrb_value to_str(mrb_state *mrb, mrb_value val) { - return check_type(mrb, val, MRB_TT_STRING, "String", "to_str"); + CHECK_TYPE(mrb, val, MRB_TT_STRING, "String"); + return val; } static mrb_value @@ -1972,7 +1979,7 @@ mrb_mod_const_get(mrb_state *mrb, mrb_value mod) } /* const get with class path string */ - path = mrb_string_type(mrb, path); + path = mrb_ensure_string_type(mrb, path); ptr = RSTRING_PTR(path); len = RSTRING_LEN(path); off = 0; diff --git a/src/kernel.c b/src/kernel.c index 195594d6b..ce9cd1d44 100644 --- a/src/kernel.c +++ b/src/kernel.c @@ -746,6 +746,7 @@ basic_obj_respond_to(mrb_state *mrb, mrb_value obj, mrb_sym id, int pub) { return mrb_respond_to(mrb, obj, id); } + /* 15.3.1.3.43 */ /* * call-seq: @@ -765,45 +766,16 @@ basic_obj_respond_to(mrb_state *mrb, mrb_value obj, mrb_sym id, int pub) static mrb_value obj_respond_to(mrb_state *mrb, mrb_value self) { - mrb_value mid; mrb_sym id, rtm_id; - mrb_bool priv = FALSE, respond_to_p = TRUE; - - mrb_get_args(mrb, "o|b", &mid, &priv); - - if (mrb_symbol_p(mid)) { - id = mrb_symbol(mid); - } - else { - mrb_value tmp; - if (mrb_string_p(mid)) { - tmp = mrb_check_intern_str(mrb, mid); - } - else { - tmp = mrb_check_string_type(mrb, mid); - if (mrb_nil_p(tmp)) { - tmp = mrb_inspect(mrb, mid); - mrb_raisef(mrb, E_TYPE_ERROR, "%S is not a symbol", tmp); - } - tmp = mrb_check_intern_str(mrb, tmp); - } - if (mrb_nil_p(tmp)) { - respond_to_p = FALSE; - } - else { - id = mrb_symbol(tmp); - } - } - - if (respond_to_p) { - respond_to_p = basic_obj_respond_to(mrb, self, id, !priv); - } + mrb_bool priv = FALSE, respond_to_p; + mrb_get_args(mrb, "n|b", &id, &priv); + respond_to_p = basic_obj_respond_to(mrb, self, id, !priv); if (!respond_to_p) { rtm_id = mrb_intern_lit(mrb, "respond_to_missing?"); if (basic_obj_respond_to(mrb, self, rtm_id, !priv)) { mrb_value args[2], v; - args[0] = mid; + args[0] = mrb_symbol_value(id); args[1] = mrb_bool_value(priv); v = mrb_funcall_argv(mrb, self, rtm_id, 2, args); return mrb_bool_value(mrb_bool(v)); @@ -873,6 +845,7 @@ mrb_init_kernel(mrb_state *mrb) mrb_define_method(mrb, krn, "to_s", mrb_any_to_s, MRB_ARGS_NONE()); /* 15.3.1.3.46 */ mrb_define_method(mrb, krn, "__case_eqq", mrb_obj_ceqq, MRB_ARGS_REQ(1)); /* internal */ mrb_define_method(mrb, krn, "__to_int", mrb_to_int, MRB_ARGS_NONE()); /* internal */ + mrb_define_method(mrb, krn, "__to_str", mrb_to_str, MRB_ARGS_NONE()); /* internal */ mrb_define_method(mrb, krn, "class_defined?", mrb_krn_class_defined, MRB_ARGS_REQ(1)); diff --git a/src/object.c b/src/object.c index ba6fa3947..18ccacfb9 100644 --- a/src/object.c +++ b/src/object.c @@ -579,6 +579,33 @@ mrb_Float(mrb_state *mrb, mrb_value val) } #endif +MRB_API mrb_value +mrb_to_str(mrb_state *mrb, mrb_value val) +{ + if (!mrb_string_p(val)) { + mrb_value type = inspect_type(mrb, val); + mrb_raisef(mrb, E_TYPE_ERROR, "can't convert %S to String", type); + } + return val; +} + +MRB_API mrb_value +mrb_ensure_string_type(mrb_state *mrb, mrb_value str) +{ + if (!mrb_string_p(str)) { + mrb_raisef(mrb, E_TYPE_ERROR, "%S cannot be converted to String", + inspect_type(mrb, str)); + } + return str; +} + +MRB_API mrb_value +mrb_check_string_type(mrb_state *mrb, mrb_value str) +{ + if (!mrb_string_p(str)) return mrb_nil_value(); + return str; +} + MRB_API mrb_value mrb_inspect(mrb_state *mrb, mrb_value obj) { diff --git a/src/string.c b/src/string.c index b7abfb762..b6d4ecef0 100644 --- a/src/string.c +++ b/src/string.c @@ -956,15 +956,7 @@ str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2) MRB_API mrb_bool mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2) { - if (mrb_immediate_p(str2)) return FALSE; - if (!mrb_string_p(str2)) { - if (mrb_nil_p(str2)) return FALSE; - if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "to_str"))) { - return FALSE; - } - str2 = mrb_funcall(mrb, str2, "to_str", 0); - return mrb_equal(mrb, str2, str1); - } + if (!mrb_string_p(str2)) return FALSE; return str_eql(mrb, str1, str2); } @@ -992,14 +984,8 @@ mrb_str_equal_m(mrb_state *mrb, mrb_value str1) MRB_API mrb_value mrb_str_to_str(mrb_state *mrb, mrb_value str) { - mrb_value s; - if (!mrb_string_p(str)) { - s = mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); - if (mrb_nil_p(s)) { - s = mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s"); - } - return s; + return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s"); } return str; } @@ -1714,18 +1700,6 @@ mrb_ptr_to_str(mrb_state *mrb, void *p) return mrb_obj_value(p_str); } -MRB_API mrb_value -mrb_string_type(mrb_state *mrb, mrb_value str) -{ - return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); -} - -MRB_API mrb_value -mrb_check_string_type(mrb_state *mrb, mrb_value str) -{ - return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); -} - /* 15.2.10.5.30 */ /* * call-seq: @@ -2379,7 +2353,6 @@ mrb_str_to_f(mrb_state *mrb, mrb_value self) /* * call-seq: * str.to_s => str - * str.to_str => str * * Returns the receiver. */ @@ -2783,7 +2756,6 @@ mrb_init_string(mrb_state *mrb) #endif mrb_define_method(mrb, s, "to_i", mrb_str_to_i, MRB_ARGS_ANY()); /* 15.2.10.5.39 */ mrb_define_method(mrb, s, "to_s", mrb_str_to_s, MRB_ARGS_NONE()); /* 15.2.10.5.40 */ - mrb_define_method(mrb, s, "to_str", mrb_str_to_s, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "to_sym", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.41 */ mrb_define_method(mrb, s, "upcase", mrb_str_upcase, MRB_ARGS_NONE()); /* 15.2.10.5.42 */ mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.43 */ diff --git a/test/t/string.rb b/test/t/string.rb index e91b915fe..3a1eced16 100644 --- a/test/t/string.rb +++ b/test/t/string.rb @@ -253,19 +253,6 @@ assert('String#chomp!', '15.2.10.5.10') do assert_equal 'abc', e end -assert('String#chomp! uses the correct length') do - class A - def to_str - $s.replace("AA") - "A" - end - end - - $s = "AAA" - $s.chomp!(A.new) - assert_equal $s, "A" -end - assert('String#chop', '15.2.10.5.11') do a = ''.chop b = 'abc'.chop -- cgit v1.2.3 From 56e0e1934d4ec751d83f9f54ce93ca74b0e21194 Mon Sep 17 00:00:00 2001 From: dearblue Date: Sat, 25 May 2019 12:12:21 +0900 Subject: Name the return value of `mrb_range_beg_len()` --- include/mruby/range.h | 8 +++++++- mrbgems/mruby-array-ext/src/array.c | 2 +- mrbgems/mruby-kernel-ext/src/kernel.c | 2 +- mrbgems/mruby-string-ext/src/string.c | 6 +++--- src/array.c | 8 ++++---- src/range.c | 12 ++++++------ src/string.c | 4 ++-- 7 files changed, 24 insertions(+), 18 deletions(-) (limited to 'mrbgems/mruby-string-ext/src') diff --git a/include/mruby/range.h b/include/mruby/range.h index b5626993a..ee6eb8d1d 100644 --- a/include/mruby/range.h +++ b/include/mruby/range.h @@ -64,7 +64,13 @@ MRB_API struct RRange* mrb_range_ptr(mrb_state *mrb, mrb_value range); */ MRB_API mrb_value mrb_range_new(mrb_state *mrb, mrb_value start, mrb_value end, mrb_bool exclude); -MRB_API mrb_int mrb_range_beg_len(mrb_state *mrb, mrb_value range, mrb_int *begp, mrb_int *lenp, mrb_int len, mrb_bool trunc); +enum mrb_range_beg_len { + MRB_RANGE_TYPE_MISMATCH = 0, /* (failure) not range */ + MRB_RANGE_OK = 1, /* (success) range */ + MRB_RANGE_OUT = 2 /* (failure) out of range */ +}; + +MRB_API enum mrb_range_beg_len mrb_range_beg_len(mrb_state *mrb, mrb_value range, mrb_int *begp, mrb_int *lenp, mrb_int len, mrb_bool trunc); mrb_value mrb_get_values_at(mrb_state *mrb, mrb_value obj, mrb_int olen, mrb_int argc, const mrb_value *argv, mrb_value (*func)(mrb_state*, mrb_value, mrb_int)); void mrb_gc_mark_range(mrb_state *mrb, struct RRange *r); diff --git a/mrbgems/mruby-array-ext/src/array.c b/mrbgems/mruby-array-ext/src/array.c index b6d9c9c80..20c771a97 100644 --- a/mrbgems/mruby-array-ext/src/array.c +++ b/mrbgems/mruby-array-ext/src/array.c @@ -145,7 +145,7 @@ mrb_ary_slice_bang(mrb_state *mrb, mrb_value self) mrb_get_args(mrb, "o|i", &index, &len); switch (mrb_type(index)) { case MRB_TT_RANGE: - if (mrb_range_beg_len(mrb, index, &i, &len, ARY_LEN(a), TRUE) == 1) { + if (mrb_range_beg_len(mrb, index, &i, &len, ARY_LEN(a), TRUE) == MRB_RANGE_OK) { goto delete_pos_len; } else { diff --git a/mrbgems/mruby-kernel-ext/src/kernel.c b/mrbgems/mruby-kernel-ext/src/kernel.c index 9288b0e6f..8e7e03c56 100644 --- a/mrbgems/mruby-kernel-ext/src/kernel.c +++ b/mrbgems/mruby-kernel-ext/src/kernel.c @@ -22,7 +22,7 @@ mrb_f_caller(mrb_state *mrb, mrb_value self) case 1: if (mrb_type(v) == MRB_TT_RANGE) { mrb_int beg, len; - if (mrb_range_beg_len(mrb, v, &beg, &len, bt_len, TRUE) == 1) { + if (mrb_range_beg_len(mrb, v, &beg, &len, bt_len, TRUE) == MRB_RANGE_OK) { lev = beg; n = len; } diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index ba7e3c610..85180516c 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -58,11 +58,11 @@ mrb_str_byteslice(mrb_state *mrb, mrb_value str) len = RSTRING_LEN(str); switch (mrb_range_beg_len(mrb, a1, &beg, &len, len, TRUE)) { - case 0: /* not range */ + case MRB_RANGE_TYPE_MISMATCH: break; - case 1: /* range */ + case MRB_RANGE_OK: return mrb_str_substr(mrb, str, beg, len); - case 2: /* out of range */ + case MRB_RANGE_OUT: mrb_raisef(mrb, E_RANGE_ERROR, "%S out of range", a1); break; } diff --git a/src/array.c b/src/array.c index d4302cb22..bd9b4d358 100644 --- a/src/array.c +++ b/src/array.c @@ -858,7 +858,7 @@ mrb_ary_aget(mrb_state *mrb, mrb_value self) switch (mrb_type(index)) { /* a[n..m] */ case MRB_TT_RANGE: - if (mrb_range_beg_len(mrb, index, &i, &len, ARY_LEN(a), TRUE) == 1) { + if (mrb_range_beg_len(mrb, index, &i, &len, ARY_LEN(a), TRUE) == MRB_RANGE_OK) { return ary_subseq(mrb, a, i, len); } else { @@ -927,13 +927,13 @@ mrb_ary_aset(mrb_state *mrb, mrb_value self) if (mrb_get_args(mrb, "oo|o", &v1, &v2, &v3) == 2) { /* a[n..m] = v */ switch (mrb_range_beg_len(mrb, v1, &i, &len, RARRAY_LEN(self), FALSE)) { - case 0: /* not range */ + case MRB_RANGE_TYPE_MISMATCH: mrb_ary_set(mrb, self, aget_index(mrb, v1), v2); break; - case 1: /* range */ + case MRB_RANGE_OK: mrb_ary_splice(mrb, self, i, len, v2); break; - case 2: /* out of range */ + case MRB_RANGE_OUT: mrb_raisef(mrb, E_RANGE_ERROR, "%S out of range", v1); break; } diff --git a/src/range.c b/src/range.c index 21771c8ec..9036ef093 100644 --- a/src/range.c +++ b/src/range.c @@ -352,7 +352,7 @@ mrb_get_values_at(mrb_state *mrb, mrb_value obj, mrb_int olen, mrb_int argc, con if (mrb_fixnum_p(argv[i])) { mrb_ary_push(mrb, result, func(mrb, obj, mrb_fixnum(argv[i]))); } - else if (mrb_range_beg_len(mrb, argv[i], &beg, &len, olen, FALSE) == 1) { + else if (mrb_range_beg_len(mrb, argv[i], &beg, &len, olen, FALSE) == MRB_RANGE_OK) { mrb_int const end = olen < beg + len ? olen : beg + len; for (j = beg; j < end; ++j) { mrb_ary_push(mrb, result, func(mrb, obj, j)); @@ -398,13 +398,13 @@ mrb_range_new(mrb_state *mrb, mrb_value beg, mrb_value end, mrb_bool excl) return mrb_range_value(r); } -MRB_API mrb_int +MRB_API enum mrb_range_beg_len mrb_range_beg_len(mrb_state *mrb, mrb_value range, mrb_int *begp, mrb_int *lenp, mrb_int len, mrb_bool trunc) { mrb_int beg, end; struct RRange *r; - if (mrb_type(range) != MRB_TT_RANGE) return 0; + if (mrb_type(range) != MRB_TT_RANGE) return MRB_RANGE_TYPE_MISMATCH; r = mrb_range_ptr(mrb, range); beg = mrb_int(mrb, RANGE_BEG(r)); @@ -412,11 +412,11 @@ mrb_range_beg_len(mrb_state *mrb, mrb_value range, mrb_int *begp, mrb_int *lenp, if (beg < 0) { beg += len; - if (beg < 0) return 2; + if (beg < 0) return MRB_RANGE_OUT; } if (trunc) { - if (beg > len) return 2; + if (beg > len) return MRB_RANGE_OUT; if (end > len) end = len; } @@ -427,7 +427,7 @@ mrb_range_beg_len(mrb_state *mrb, mrb_value range, mrb_int *begp, mrb_int *lenp, *begp = beg; *lenp = len; - return 1; + return MRB_RANGE_OK; } void diff --git a/src/string.c b/src/string.c index 578e3bdcc..db2d73e32 100644 --- a/src/string.c +++ b/src/string.c @@ -1041,9 +1041,9 @@ num_index: len = RSTRING_CHAR_LEN(str); switch (mrb_range_beg_len(mrb, indx, &beg, &len, len, TRUE)) { - case 1: + case MRB_RANGE_OK: return str_subseq(mrb, str, beg, len); - case 2: + case MRB_RANGE_OUT: return mrb_nil_value(); default: break; -- cgit v1.2.3 From db3e6f6ac66c258cb23ecbbbcaa7dc32bdfe4759 Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Wed, 12 Jun 2019 20:45:27 +0900 Subject: Fix typo in `String#setbyte` error message --- mrbgems/mruby-string-ext/src/string.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mrbgems/mruby-string-ext/src') diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index 85180516c..d9ebb7392 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -29,7 +29,7 @@ mrb_str_setbyte(mrb_state *mrb, mrb_value str) len = RSTRING_LEN(str); if (pos < -len || len <= pos) - mrb_raisef(mrb, E_INDEX_ERROR, "index %S is out of array", mrb_fixnum_value(pos)); + mrb_raisef(mrb, E_INDEX_ERROR, "index %S out of string", mrb_fixnum_value(pos)); if (pos < 0) pos += len; -- cgit v1.2.3 From 75df13a97334c162b2cf743c3e37c4933a4b0d1c Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Tue, 25 Jun 2019 22:58:21 +0900 Subject: Fix `String#byteslice` with `MRB_UTF8_STRING` and some edge cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Example: $ bin/mruby -e ' p "あa".byteslice(1) p "bar".byteslice(3) p "bar".byteslice(4..0) ' Before this patch: "a" "" RangeError (4..0 out of range) After this patch (same as Ruby): "\x81" nil nil --- include/mruby/string.h | 3 ++ mrbgems/mruby-string-ext/src/string.c | 58 +++++++++++++-------------------- mrbgems/mruby-string-ext/test/string.rb | 51 +++++++++++++++++++++++++++++ src/string.c | 49 ++++++++++++++-------------- 4 files changed, 102 insertions(+), 59 deletions(-) (limited to 'mrbgems/mruby-string-ext/src') diff --git a/include/mruby/string.h b/include/mruby/string.h index 22445f654..b563541cb 100644 --- a/include/mruby/string.h +++ b/include/mruby/string.h @@ -438,6 +438,9 @@ mrb_value mrb_str_inspect(mrb_state *mrb, mrb_value str); #define mrb_str_buf_cat(mrb, str, ptr, len) mrb_str_cat(mrb, str, ptr, len) #define mrb_str_buf_append(mrb, str, str2) mrb_str_cat_str(mrb, str, str2) +mrb_bool mrb_str_beg_len(mrb_int str_len, mrb_int *begp, mrb_int *lenp); +mrb_value mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len); + #ifdef MRB_UTF8_STRING mrb_int mrb_utf8_len(const char *str, mrb_int byte_len); #endif diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index d9ebb7392..50a4e5582 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -42,44 +42,32 @@ mrb_str_setbyte(mrb_state *mrb, mrb_value str) static mrb_value mrb_str_byteslice(mrb_state *mrb, mrb_value str) { - mrb_value a1; - mrb_int len; - - if (mrb_get_argc(mrb) == 2) { - mrb_int pos; - mrb_get_args(mrb, "ii", &pos, &len); - return mrb_str_substr(mrb, str, pos, len); + mrb_value a1, a2; + mrb_int str_len = RSTRING_LEN(str), beg, len; + mrb_bool empty = TRUE; + + if (mrb_get_args(mrb, "o|o", &a1, &a2) == 2) { + beg = mrb_fixnum(mrb_to_int(mrb, a1)); + len = mrb_fixnum(mrb_to_int(mrb, a2)); + goto subseq; } - mrb_get_args(mrb, "o|i", &a1, &len); - switch (mrb_type(a1)) { - case MRB_TT_RANGE: - { - mrb_int beg; - - len = RSTRING_LEN(str); - switch (mrb_range_beg_len(mrb, a1, &beg, &len, len, TRUE)) { - case MRB_RANGE_TYPE_MISMATCH: - break; - case MRB_RANGE_OK: - return mrb_str_substr(mrb, str, beg, len); - case MRB_RANGE_OUT: - mrb_raisef(mrb, E_RANGE_ERROR, "%S out of range", a1); - break; - } - return mrb_nil_value(); + if (mrb_type(a1) == MRB_TT_RANGE) { + if (mrb_range_beg_len(mrb, a1, &beg, &len, str_len, TRUE) == MRB_RANGE_OK) { + goto subseq; } -#ifndef MRB_WITHOUT_FLOAT - case MRB_TT_FLOAT: - a1 = mrb_fixnum_value((mrb_int)mrb_float(a1)); - /* fall through */ -#endif - case MRB_TT_FIXNUM: - return mrb_str_substr(mrb, str, mrb_fixnum(a1), 1); - default: - mrb_raise(mrb, E_TYPE_ERROR, "wrong type of argument"); + return mrb_nil_value(); + } + + beg = mrb_fixnum(mrb_to_int(mrb, a1)); + len = 1; + empty = FALSE; +subseq: + if (mrb_str_beg_len(str_len, &beg, &len) && (empty || len != 0)) { + return mrb_str_byte_subseq(mrb, str, beg, len); + } + else { + return mrb_nil_value(); } - /* not reached */ - return mrb_nil_value(); } /* diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index 02777e594..bf633bcef 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -26,10 +26,61 @@ end assert('String#byteslice') do str1 = "hello" + str2 = "\u3042ab" # "\xE3\x81\x82ab" + + assert_equal("h", str1.byteslice(0)) assert_equal("e", str1.byteslice(1)) + assert_equal(nil, str1.byteslice(5)) assert_equal("o", str1.byteslice(-1)) + assert_equal(nil, str1.byteslice(-6)) + assert_equal("\xE3", str2.byteslice(0)) + assert_equal("\x81", str2.byteslice(1)) + assert_equal(nil, str2.byteslice(5)) + assert_equal("b", str2.byteslice(-1)) + assert_equal(nil, str2.byteslice(-6)) + + assert_equal("", str1.byteslice(0, 0)) + assert_equal(str1, str1.byteslice(0, 6)) + assert_equal("el", str1.byteslice(1, 2)) + assert_equal("", str1.byteslice(5, 1)) + assert_equal("o", str1.byteslice(-1, 6)) + assert_equal(nil, str1.byteslice(-6, 1)) + assert_equal(nil, str1.byteslice(0, -1)) + assert_equal("", str2.byteslice(0, 0)) + assert_equal(str2, str2.byteslice(0, 6)) + assert_equal("\x81\x82", str2.byteslice(1, 2)) + assert_equal("", str2.byteslice(5, 1)) + assert_equal("b", str2.byteslice(-1, 6)) + assert_equal(nil, str2.byteslice(-6, 1)) + assert_equal(nil, str2.byteslice(0, -1)) + assert_equal("ell", str1.byteslice(1..3)) assert_equal("el", str1.byteslice(1...3)) + assert_equal("h", str1.byteslice(0..0)) + assert_equal("", str1.byteslice(5..0)) + assert_equal("o", str1.byteslice(4..5)) + assert_equal(nil, str1.byteslice(6..0)) + assert_equal("", str1.byteslice(-1..0)) + assert_equal("llo", str1.byteslice(-3..5)) + assert_equal("\x81\x82a", str2.byteslice(1..3)) + assert_equal("\x81\x82", str2.byteslice(1...3)) + assert_equal("\xE3", str2.byteslice(0..0)) + assert_equal("", str2.byteslice(5..0)) + assert_equal("b", str2.byteslice(4..5)) + assert_equal(nil, str2.byteslice(6..0)) + assert_equal("", str2.byteslice(-1..0)) + assert_equal("\x82ab", str2.byteslice(-3..5)) + + assert_raise(ArgumentError) { str1.byteslice } + assert_raise(ArgumentError) { str1.byteslice(1, 2, 3) } + assert_raise(TypeError) { str1.byteslice("1") } + assert_raise(TypeError) { str1.byteslice("1", 2) } + assert_raise(TypeError) { str1.byteslice(1, "2") } + assert_raise(TypeError) { str1.byteslice(1..2, 3) } + + skip unless Object.const_defined?(:Float) + assert_equal("o", str1.byteslice(4.0)) + assert_equal("\x82ab", str2.byteslice(2.0, 3.0)) end assert('String#dump') do diff --git a/src/string.c b/src/string.c index ed58c484b..f5fb936a6 100644 --- a/src/string.c +++ b/src/string.c @@ -410,8 +410,8 @@ str_make_shared(mrb_state *mrb, struct RString *orig, struct RString *s) } } -static mrb_value -byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) +mrb_value +mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) { struct RString *orig, *s; @@ -434,32 +434,33 @@ str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) beg = chars2bytes(str, 0, beg); len = chars2bytes(str, beg, len); - return byte_subseq(mrb, str, beg, len); + return mrb_str_byte_subseq(mrb, str, beg, len); } #else -#define str_subseq(mrb, str, beg, len) byte_subseq(mrb, str, beg, len) +#define str_subseq(mrb, str, beg, len) mrb_str_byte_subseq(mrb, str, beg, len) #endif -static mrb_value -str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) +mrb_bool +mrb_str_beg_len(mrb_int str_len, mrb_int *begp, mrb_int *lenp) { - mrb_int clen = RSTRING_CHAR_LEN(str); - - if (len < 0) return mrb_nil_value(); - if (clen == 0) { - len = 0; + if (str_len < *begp || *lenp < 0) return FALSE; + if (*begp < 0) { + *begp += str_len; + if (*begp < 0) return FALSE; } - if (beg > clen) return mrb_nil_value(); - if (beg < 0) { - beg += clen; - if (beg < 0) return mrb_nil_value(); + if (*lenp > str_len - *begp) + *lenp = str_len - *begp; + if (*lenp <= 0) { + *lenp = 0; } - if (len > clen - beg) - len = clen - beg; - if (len <= 0) { - len = 0; - } - return str_subseq(mrb, str, beg, len); + return TRUE; +} + +static mrb_value +str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) +{ + return mrb_str_beg_len(RSTRING_CHAR_LEN(str), &beg, &len) ? + str_subseq(mrb, str, beg, len) : mrb_nil_value(); } MRB_API mrb_int @@ -1917,7 +1918,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) } } else if (ISSPACE(c)) { - mrb_ary_push(mrb, result, byte_subseq(mrb, str, beg, end-beg)); + mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, beg, end-beg)); mrb_gc_arena_restore(mrb, ai); skip = TRUE; beg = idx; @@ -1942,7 +1943,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) else { end = chars2bytes(str, idx, 1); } - mrb_ary_push(mrb, result, byte_subseq(mrb, str, idx, end)); + mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, idx, end)); mrb_gc_arena_restore(mrb, ai); idx += end + pat_len; if (lim_p && lim <= ++i) break; @@ -1954,7 +1955,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) tmp = mrb_str_new_empty(mrb, str); } else { - tmp = byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg); + tmp = mrb_str_byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg); } mrb_ary_push(mrb, result, tmp); } -- cgit v1.2.3