From 58ba38fe1e11828190596b44e0789dd8a0607ff3 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Thu, 20 Sep 2018 15:13:13 +0900 Subject: Add `String#tr` and `#tr!` to `mruby-string-ext` gem; fix #4086 This patch is based on `mruby/c` implementation by Hirohito Higashi. We might need to add `#tr_s`, `#squeeze` and `#delete` as well. Adding them should not be too hard using functions we implemented here. --- mrbgems/mruby-string-ext/test/string.rb | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'mrbgems/mruby-string-ext/test/string.rb') diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index b6146fb90..6b8a89c4d 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -147,6 +147,19 @@ assert('String#casecmp') do assert_equal 0, "abcdef".casecmp(o) end +assert('String#tr') do + assert_equal "ABC", "abc".tr('a-z', 'A-Z') + assert_equal "hippo", "hello".tr('el', 'ip') + assert_equal "Ruby", "Lisp".tr("Lisp", "Ruby") + assert_equal "*e**o", "hello".tr('^aeiou', '*') +end + +assert('String#tr!') do + s = "abcdefghijklmnopqR" + assert_equal "ab12222hijklmnopqR", s.tr!("cdefg", "12") + assert_equal "ab12222hijklmnopqR", s +end + assert('String#start_with?') do assert_true "hello".start_with?("heaven", "hell") assert_true !"hello".start_with?("heaven", "paradise") -- cgit v1.2.3 From c0ae8a96a1fb658b21428bee174602e9373eef3b Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Thu, 20 Sep 2018 16:46:42 +0900 Subject: Add `String#tr_s` and `String#tr_s!`; ref #4086 --- mrbgems/mruby-string-ext/src/string.c | 55 ++++++++++++++++++++++++++++++--- mrbgems/mruby-string-ext/test/string.rb | 13 ++++++++ 2 files changed, 64 insertions(+), 4 deletions(-) (limited to 'mrbgems/mruby-string-ext/test/string.rb') diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index 9d318cb1a..e41dde448 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -380,7 +380,7 @@ tr_get_character(const struct tr_pattern *pat, mrb_int n_th) } static mrb_bool -str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2) +str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squeeze) { struct tr_pattern *pat; struct tr_pattern *rep; @@ -388,6 +388,7 @@ str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2) mrb_int len; mrb_int i; mrb_bool flag_changed = FALSE; + mrb_int lastch = -1; mrb_str_modify(mrb, mrb_str_ptr(str)); pat = tr_parse_pattern(mrb, p1, TRUE); @@ -401,6 +402,7 @@ str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2) if (n >= 0) { flag_changed = TRUE; if (rep == NULL) { + compact: memmove(s + i, s + i + 1, len - i); len--; i--; @@ -408,11 +410,12 @@ str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2) else { mrb_int c = tr_get_character(rep, n); + if (squeeze && c == lastch) goto compact; if (c < 0 || c > 0x80) { mrb_raisef(mrb, E_ARGUMENT_ERROR, "character (%S) out of range", mrb_fixnum_value((mrb_int)c)); } - s[i] = c; + lastch = s[i] = c; } } } @@ -471,7 +474,7 @@ mrb_str_tr(mrb_state *mrb, mrb_value str) mrb_get_args(mrb, "SS", &p1, &p2); dup = mrb_str_dup(mrb, str); - str_tr(mrb, dup, p1, p2); + str_tr(mrb, dup, p1, p2, FALSE); return dup; } @@ -488,7 +491,49 @@ mrb_str_tr_bang(mrb_state *mrb, mrb_value str) mrb_value p1, p2; mrb_get_args(mrb, "SS", &p1, &p2); - if (str_tr(mrb, str, p1, p2)) { + if (str_tr(mrb, str, p1, p2, FALSE)) { + return str; + } + return mrb_nil_value(); +} + +/* + * call-seq: + * str.tr_s(from_str, to_str) -> new_str + * + * Processes a copy of str as described under String#tr, then removes + * duplicate characters in regions that were affected by the translation. + * + * "hello".tr_s('l', 'r') #=> "hero" + * "hello".tr_s('el', '*') #=> "h*o" + * "hello".tr_s('el', 'hx') #=> "hhxo" + */ +static mrb_value +mrb_str_tr_s(mrb_state *mrb, mrb_value str) +{ + mrb_value dup; + mrb_value p1, p2; + + mrb_get_args(mrb, "SS", &p1, &p2); + dup = mrb_str_dup(mrb, str); + str_tr(mrb, dup, p1, p2, TRUE); + return dup; +} + +/* + * call-seq: + * str.tr_s!(from_str, to_str) -> str or nil + * + * Performs String#tr_s processing on str in place, returning + * str, or nil if no changes were made. + */ +static mrb_value +mrb_str_tr_s_bang(mrb_state *mrb, mrb_value str) +{ + mrb_value p1, p2; + + mrb_get_args(mrb, "SS", &p1, &p2); + if (str_tr(mrb, str, p1, p2, TRUE)) { return str; } return mrb_nil_value(); @@ -881,6 +926,8 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb) mrb_define_method(mrb, s, "<<", mrb_str_concat_m, MRB_ARGS_REQ(1)); mrb_define_method(mrb, s, "tr", mrb_str_tr, MRB_ARGS_REQ(2)); mrb_define_method(mrb, s, "tr!", mrb_str_tr_bang, MRB_ARGS_REQ(2)); + mrb_define_method(mrb, s, "tr_s", mrb_str_tr_s, MRB_ARGS_REQ(2)); + mrb_define_method(mrb, s, "tr_s!", mrb_str_tr_s_bang, MRB_ARGS_REQ(2)); mrb_define_method(mrb, s, "start_with?", mrb_str_start_with, MRB_ARGS_REST()); mrb_define_method(mrb, s, "end_with?", mrb_str_end_with, MRB_ARGS_REST()); mrb_define_method(mrb, s, "hex", mrb_str_hex, MRB_ARGS_NONE()); diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index 6b8a89c4d..d50a2b3b4 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -160,6 +160,19 @@ assert('String#tr!') do assert_equal "ab12222hijklmnopqR", s end +assert('String#tr_s') do + assert_equal "hero", "hello".tr_s('l', 'r') + assert_equal "h*o", "hello".tr_s('el', '*') + assert_equal "hhxo", "hello".tr_s('el', 'hx') +end + +assert('String#tr_s!') do + s = "hello" + assert_equal "hero", s.tr_s!('l', 'r') + assert_equal "hero", s + assert_nil s.tr_s!('l', 'r') +end + assert('String#start_with?') do assert_true "hello".start_with?("heaven", "hell") assert_true !"hello".start_with?("heaven", "paradise") -- cgit v1.2.3 From 68523b4ec4a271134aae34d744582a974558c962 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Thu, 20 Sep 2018 17:14:15 +0900 Subject: Add `String#squeeze` and `#squeeze!`; ref #4086 mruby restriction: `String#squeeze` can take more than 1 pattern arguments in CRuby, in that case, the intersection of patterns will be used to match. But in mruby, it doesn't take multiple patterns. --- mrbgems/mruby-string-ext/src/string.c | 103 ++++++++++++++++++++++++++++++-- mrbgems/mruby-string-ext/test/string.rb | 12 ++++ 2 files changed, 111 insertions(+), 4 deletions(-) (limited to 'mrbgems/mruby-string-ext/test/string.rb') diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index e41dde448..ddc4505fc 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -262,12 +262,11 @@ tr_pattern_free(mrb_state *mrb, struct tr_pattern *pat) } static struct tr_pattern* -tr_parse_pattern(mrb_state *mrb, const mrb_value v_pattern, mrb_bool flag_reverse_enable) +tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_pattern, mrb_bool flag_reverse_enable) { const char *pattern = RSTRING_PTR(v_pattern); int pattern_length = RSTRING_LEN(v_pattern); mrb_bool flag_reverse = FALSE; - struct tr_pattern *ret = NULL; struct tr_pattern *pat1; int i = 0; @@ -391,8 +390,8 @@ str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squee mrb_int lastch = -1; mrb_str_modify(mrb, mrb_str_ptr(str)); - pat = tr_parse_pattern(mrb, p1, TRUE); - rep = tr_parse_pattern(mrb, p2, FALSE); + pat = tr_parse_pattern(mrb, NULL, p1, TRUE); + rep = tr_parse_pattern(mrb, NULL, p2, FALSE); s = RSTRING_PTR(str); len = RSTRING_LEN(str); @@ -539,6 +538,100 @@ mrb_str_tr_s_bang(mrb_state *mrb, mrb_value str) return mrb_nil_value(); } +static mrb_bool +str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat) +{ + struct tr_pattern *pat = NULL; + mrb_int i; + char *s; + mrb_int len; + mrb_bool flag_changed = FALSE; + mrb_int lastch = -1; + + mrb_str_modify(mrb, mrb_str_ptr(str)); + if (!mrb_nil_p(v_pat)) { + pat = tr_parse_pattern(mrb, pat, v_pat, TRUE); + } + s = RSTRING_PTR(str); + len = RSTRING_LEN(str); + + if (pat) { + for (i = 0; i < len; i++) { + mrb_int n = tr_find_character(pat, s[i]); + + if (n >= 0 && s[i] == lastch) { + flag_changed = TRUE; + memmove(s + i, s + i + 1, len - i); + len--; + i--; + } + lastch = s[i]; + } + } + else { + for (i = 0; i < len; i++) { + if (s[i] == lastch) { + flag_changed = TRUE; + memmove(s + i, s + i + 1, len - i); + len--; + i--; + } + lastch = s[i]; + } + } + tr_pattern_free(mrb, pat); + + RSTR_SET_LEN(RSTRING(str), len); + RSTRING_PTR(str)[len] = 0; + + return flag_changed; +} + +/* + * call-seq: + * str.squeeze([other_str]) -> new_str + * + * Builds a set of characters from the other_str + * parameter(s) using the procedure described for String#count. Returns a + * new string where runs of the same character that occur in this set are + * replaced by a single character. If no arguments are given, all runs of + * identical characters are replaced by a single character. + * + * "yellow moon".squeeze #=> "yelow mon" + * " now is the".squeeze(" ") #=> " now is the" + * "putters shoot balls".squeeze("m-z") #=> "puters shot balls" + */ +static mrb_value +mrb_str_squeeze(mrb_state *mrb, mrb_value str) +{ + mrb_value pat = mrb_nil_value(); + mrb_value dup; + + mrb_get_args(mrb, "|S", &pat); + dup = mrb_str_dup(mrb, str); + str_squeeze(mrb, dup, pat); + return dup; +} + +/* + * call-seq: + * str.squeeze!([other_str]) -> str or nil + * + * Squeezes str in place, returning either str, or nil if no + * changes were made. + */ +static mrb_value +mrb_str_squeeze_bang(mrb_state *mrb, mrb_value str) +{ + mrb_value pat = mrb_nil_value(); + + mrb_get_args(mrb, "|S", &pat); + if (str_squeeze(mrb, str, pat)) { + return str; + } + return mrb_nil_value(); +} + static mrb_value mrb_str_hex(mrb_state *mrb, mrb_value self) { @@ -928,6 +1021,8 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb) mrb_define_method(mrb, s, "tr!", mrb_str_tr_bang, MRB_ARGS_REQ(2)); mrb_define_method(mrb, s, "tr_s", mrb_str_tr_s, MRB_ARGS_REQ(2)); mrb_define_method(mrb, s, "tr_s!", mrb_str_tr_s_bang, MRB_ARGS_REQ(2)); + mrb_define_method(mrb, s, "squeeze", mrb_str_squeeze, MRB_ARGS_OPT(1)); + mrb_define_method(mrb, s, "squeeze!", mrb_str_squeeze_bang, MRB_ARGS_OPT(1)); mrb_define_method(mrb, s, "start_with?", mrb_str_start_with, MRB_ARGS_REST()); mrb_define_method(mrb, s, "end_with?", mrb_str_end_with, MRB_ARGS_REST()); mrb_define_method(mrb, s, "hex", mrb_str_hex, MRB_ARGS_NONE()); diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index d50a2b3b4..fd6f83e71 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -173,6 +173,18 @@ assert('String#tr_s!') do assert_nil s.tr_s!('l', 'r') end +assert('String#squeeze') do + assert_equal "yelow mon", "yellow moon".squeeze + assert_equal " now is the", " now is the".squeeze(" ") + assert_equal "puters shot balls", "putters shoot balls".squeeze("m-z") +end + +assert('String#squeeze!') do + s = " now is the" + assert_equal " now is the", s.squeeze!(" ") + assert_equal " now is the", s +end + assert('String#start_with?') do assert_true "hello".start_with?("heaven", "hell") assert_true !"hello".start_with?("heaven", "paradise") -- cgit v1.2.3 From 58f7f2361a39ae288c4233ca434e1dbd37f127d0 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Thu, 20 Sep 2018 17:25:07 +0900 Subject: Implement `String#count`; ref #4086 mruby restriction: In mruby, `String#count` does not take multiple pattern arguments, but only one pattern. --- mrbgems/mruby-string-ext/src/string.c | 34 +++++++++++++++++++++++++++++++++ mrbgems/mruby-string-ext/test/string.rb | 9 +++++++++ 2 files changed, 43 insertions(+) (limited to 'mrbgems/mruby-string-ext/test/string.rb') diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index ddc4505fc..d42a5d488 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -632,6 +632,39 @@ mrb_str_squeeze_bang(mrb_state *mrb, mrb_value str) return mrb_nil_value(); } +/* + * call_seq: + * str.count([other_str]) -> integer + * + * Each other_str parameter defines a set of characters to count. The + * intersection of these sets defines the characters to count in str. Any + * other_str that starts with a caret ^ is negated. The sequence c1-c2 + * means all characters between c1 and c2. The backslash character \ can + * be used to escape ^ or - and is otherwise ignored unless it appears at + * the end of a sequence or the end of a other_str. + */ +static mrb_value +mrb_str_count(mrb_state *mrb, mrb_value str) +{ + mrb_value v_pat = mrb_nil_value(); + struct tr_pattern *pat = NULL; + mrb_int i; + char *s; + mrb_int len; + mrb_int count = 0; + + mrb_get_args(mrb, "S", &v_pat); + pat = tr_parse_pattern(mrb, pat, v_pat, TRUE); + s = RSTRING_PTR(str); + len = RSTRING_LEN(str); + for (i = 0; i < len; i++) { + mrb_int n = tr_find_character(pat, s[i]); + + if (n >= 0) count++; + } + return mrb_fixnum_value(count); +} + static mrb_value mrb_str_hex(mrb_state *mrb, mrb_value self) { @@ -1017,6 +1050,7 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb) mrb_define_method(mrb, s, "swapcase", mrb_str_swapcase, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "concat", mrb_str_concat_m, MRB_ARGS_REQ(1)); mrb_define_method(mrb, s, "<<", mrb_str_concat_m, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, s, "count", mrb_str_count, MRB_ARGS_REQ(1)); mrb_define_method(mrb, s, "tr", mrb_str_tr, MRB_ARGS_REQ(2)); mrb_define_method(mrb, s, "tr!", mrb_str_tr_bang, MRB_ARGS_REQ(2)); mrb_define_method(mrb, s, "tr_s", mrb_str_tr_s, MRB_ARGS_REQ(2)); diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index fd6f83e71..3e9ab5b1b 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -147,6 +147,15 @@ assert('String#casecmp') do assert_equal 0, "abcdef".casecmp(o) end +assert('String#count') do + s = "abccdeff123" + assert_equal 1, s.count("a") + assert_equal 2, s.count("ab") + assert_equal 9, s.count("^c") + assert_equal 8, s.count("a-z") + assert_equal 4, s.count("a0-9") +end + assert('String#tr') do assert_equal "ABC", "abc".tr('a-z', 'A-Z') assert_equal "hippo", "hello".tr('el', 'ip') -- cgit v1.2.3 From 346f154ece3bd68b63dfee4b4c4d9a20c0eee063 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Thu, 20 Sep 2018 18:05:26 +0900 Subject: Implement `String#delete` and `#delete!`; ref #4086 mruby restriction: In mruby, `String#delete` only takes single pattern argument. --- mrbgems/mruby-string-ext/src/string.c | 58 +++++++++++++++++++++++++++++++++ mrbgems/mruby-string-ext/test/string.rb | 14 ++++++++ 2 files changed, 72 insertions(+) (limited to 'mrbgems/mruby-string-ext/test/string.rb') diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index d42a5d488..a91b483e7 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -632,6 +632,62 @@ mrb_str_squeeze_bang(mrb_state *mrb, mrb_value str) return mrb_nil_value(); } +static mrb_bool +str_delete(mrb_state *mrb, mrb_value str, mrb_value v_pat) +{ + struct tr_pattern *pat = NULL; + mrb_int i; + char *s; + mrb_int len; + mrb_bool flag_changed = FALSE; + + mrb_str_modify(mrb, mrb_str_ptr(str)); + pat = tr_parse_pattern(mrb, pat, v_pat, TRUE); + s = RSTRING_PTR(str); + len = RSTRING_LEN(str); + + for (i = 0; i < len; i++) { + mrb_int n = tr_find_character(pat, s[i]); + + if (n >= 0) { + flag_changed = TRUE; + memmove(s + i, s + i + 1, len - i); + len--; + i--; + } + } + tr_pattern_free(mrb, pat); + + RSTR_SET_LEN(RSTRING(str), len); + RSTRING_PTR(str)[len] = 0; + + return flag_changed; +} + +static mrb_value +mrb_str_delete(mrb_state *mrb, mrb_value str) +{ + mrb_value pat; + mrb_value dup; + + mrb_get_args(mrb, "S", &pat); + dup = mrb_str_dup(mrb, str); + str_delete(mrb, dup, pat); + return dup; +} + +static mrb_value +mrb_str_delete_bang(mrb_state *mrb, mrb_value str) +{ + mrb_value pat; + + mrb_get_args(mrb, "S", &pat); + if (str_delete(mrb, str, pat)) { + return str; + } + return mrb_nil_value(); +} + /* * call_seq: * str.count([other_str]) -> integer @@ -1057,6 +1113,8 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb) mrb_define_method(mrb, s, "tr_s!", mrb_str_tr_s_bang, MRB_ARGS_REQ(2)); mrb_define_method(mrb, s, "squeeze", mrb_str_squeeze, MRB_ARGS_OPT(1)); mrb_define_method(mrb, s, "squeeze!", mrb_str_squeeze_bang, MRB_ARGS_OPT(1)); + mrb_define_method(mrb, s, "delete", mrb_str_delete, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, s, "delete!", mrb_str_delete_bang, MRB_ARGS_REQ(1)); mrb_define_method(mrb, s, "start_with?", mrb_str_start_with, MRB_ARGS_REST()); mrb_define_method(mrb, s, "end_with?", mrb_str_end_with, MRB_ARGS_REST()); mrb_define_method(mrb, s, "hex", mrb_str_hex, MRB_ARGS_NONE()); diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index 3e9ab5b1b..36a253989 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -194,6 +194,20 @@ assert('String#squeeze!') do assert_equal " now is the", s end +assert('String#delete') do + assert_equal "he", "hello".delete("lo") + assert_equal "hll", "hello".delete("aeiou") + assert_equal "ll", "hello".delete("^l") + assert_equal "ho", "hello".delete("ej-m") +end + +assert('String#delete!') do + s = "hello" + assert_equal "he", s.delete!("lo") + assert_equal "he", s + assert_nil s.delete!("lz") +end + assert('String#start_with?') do assert_true "hello".start_with?("heaven", "hell") assert_true !"hello".start_with?("heaven", "paradise") -- cgit v1.2.3 From f2084f300ba87ff6e42437a52aebb5e7a6c19355 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Fri, 2 Nov 2018 08:58:21 +0900 Subject: Add tests for empty patterns for `tr` and `count`; #4156 #4157 --- mrbgems/mruby-string-ext/test/string.rb | 2 ++ 1 file changed, 2 insertions(+) (limited to 'mrbgems/mruby-string-ext/test/string.rb') diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index 36a253989..f0f8be6b3 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -149,6 +149,7 @@ end assert('String#count') do s = "abccdeff123" + assert_equal 0, s.count("") assert_equal 1, s.count("a") assert_equal 2, s.count("ab") assert_equal 9, s.count("^c") @@ -161,6 +162,7 @@ assert('String#tr') do assert_equal "hippo", "hello".tr('el', 'ip') assert_equal "Ruby", "Lisp".tr("Lisp", "Ruby") assert_equal "*e**o", "hello".tr('^aeiou', '*') + assert_equal "heo", "hello".tr('l', '') end assert('String#tr!') do -- cgit v1.2.3 From afca99a40b8a3415b3a9a0e8fc41c93ddcbb11d8 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Wed, 19 Sep 2018 20:53:32 +0900 Subject: Remove implicit conversion using `to_int` method. The ISO standard does not include implicit type conversion using `to_int`. This implicit conversion often causes vulnerability. There will be no more attacks like #4120. In addition, we have added internal convenience method `__to_int` which does type check and conversion (from floats). --- include/mruby.h | 1 - mrbgems/mruby-enum-ext/mrblib/enum.rb | 27 +++++-------- mrbgems/mruby-enumerator/mrblib/enumerator.rb | 8 ++-- mrbgems/mruby-enumerator/test/enumerator.rb | 6 --- mrbgems/mruby-kernel-ext/src/kernel.c | 5 +-- mrbgems/mruby-numeric-ext/src/numeric_ext.c | 17 ++++---- mrbgems/mruby-pack/src/pack.c | 6 ++- mrbgems/mruby-random/src/random.c | 12 +++--- mrbgems/mruby-random/test/random.rb | 12 ------ mrbgems/mruby-range-ext/mrblib/range.rb | 5 +-- mrbgems/mruby-string-ext/test/string.rb | 12 ------ mrblib/array.rb | 2 +- src/kernel.c | 2 + src/numeric.c | 4 -- src/object.c | 57 ++++++++------------------- 15 files changed, 51 insertions(+), 125 deletions(-) (limited to 'mrbgems/mruby-string-ext/test/string.rb') diff --git a/include/mruby.h b/include/mruby.h index 8eff31746..33597101a 100644 --- a/include/mruby.h +++ b/include/mruby.h @@ -716,7 +716,6 @@ MRB_API mrb_value mrb_notimplement_m(mrb_state*, mrb_value); * @return [mrb_value] The newly duplicated object. */ MRB_API mrb_value mrb_obj_dup(mrb_state *mrb, mrb_value obj); -MRB_API mrb_value mrb_check_to_integer(mrb_state *mrb, mrb_value val, const char *method); /** * Returns true if obj responds to the given method. If the method was defined for that diff --git a/mrbgems/mruby-enum-ext/mrblib/enum.rb b/mrbgems/mruby-enum-ext/mrblib/enum.rb index ba92decee..fedf8b1ae 100644 --- a/mrbgems/mruby-enum-ext/mrblib/enum.rb +++ b/mrbgems/mruby-enum-ext/mrblib/enum.rb @@ -13,10 +13,9 @@ module Enumerable # a.drop(3) #=> [4, 5, 0] def drop(n) - raise TypeError, "no implicit conversion of #{n.class} into Integer" unless n.respond_to?(:to_int) + n = n.__to_int raise ArgumentError, "attempt to drop negative size" if n < 0 - n = n.to_int ary = [] self.each {|*val| n == 0 ? ary << val.__svalue : n -= 1 } ary @@ -57,8 +56,8 @@ module Enumerable # a.take(3) #=> [1, 2, 3] def take(n) - raise TypeError, "no implicit conversion of #{n.class} into Integer" unless n.respond_to?(:to_int) - i = n.to_int + n = n.__to_int + i = n.to_i raise ArgumentError, "attempt to take negative size" if i < 0 ary = [] return ary if i == 0 @@ -113,12 +112,12 @@ module Enumerable # [8, 9, 10] def each_cons(n, &block) - raise TypeError, "no implicit conversion of #{n.class} into Integer" unless n.respond_to?(:to_int) + n = n.__to_int raise ArgumentError, "invalid size" if n <= 0 return to_enum(:each_cons,n) unless block ary = [] - n = n.to_int + n = n.to_i self.each do |*val| ary.shift if ary.size == n ary << val.__svalue @@ -141,12 +140,12 @@ module Enumerable # [10] def each_slice(n, &block) - raise TypeError, "no implicit conversion of #{n.class} into Integer" unless n.respond_to?(:to_int) + n = n.__to_int raise ArgumentError, "invalid slice size" if n <= 0 return to_enum(:each_slice,n) unless block ary = [] - n = n.to_int + n = n.to_i self.each do |*val| ary << val.__svalue if ary.size == n @@ -223,9 +222,7 @@ module Enumerable end return nil when 1 - n = args[0] - raise TypeError, "no implicit conversion of #{n.class} into Integer" unless n.respond_to?(:to_int) - i = n.to_int + i = args[0].__to_int raise ArgumentError, "attempt to take negative size" if i < 0 ary = [] return ary if i == 0 @@ -673,13 +670,7 @@ module Enumerable if nv.nil? n = -1 else - unless nv.respond_to?(:to_int) - raise TypeError, "no implicit conversion of #{nv.class} into Integer" - end - n = nv.to_int - unless n.kind_of?(Integer) - raise TypeError, "no implicit conversion of #{nv.class} into Integer" - end + n = nv.__to_int return nil if n <= 0 end diff --git a/mrbgems/mruby-enumerator/mrblib/enumerator.rb b/mrbgems/mruby-enumerator/mrblib/enumerator.rb index dbc7d3004..6dd971f3a 100644 --- a/mrbgems/mruby-enumerator/mrblib/enumerator.rb +++ b/mrbgems/mruby-enumerator/mrblib/enumerator.rb @@ -157,12 +157,10 @@ class Enumerator def with_index(offset=0, &block) return to_enum :with_index, offset unless block - offset = if offset.nil? - 0 - elsif offset.respond_to?(:to_int) - offset.to_int + if offset.nil? + offset = 0 else - raise TypeError, "no implicit conversion of #{offset.class} into Integer" + offset = offset.__to_int end n = offset - 1 diff --git a/mrbgems/mruby-enumerator/test/enumerator.rb b/mrbgems/mruby-enumerator/test/enumerator.rb index ef4970883..f3bd1bdba 100644 --- a/mrbgems/mruby-enumerator/test/enumerator.rb +++ b/mrbgems/mruby-enumerator/test/enumerator.rb @@ -54,12 +54,6 @@ assert 'Enumerator#with_index' do assert_equal [[[1, 10], 20], [[2, 11], 21], [[3, 12], 22]], a end -assert 'Enumerator#with_index nonnum offset' do - s = Object.new - def s.to_int; 1 end - assert_equal([[1,1],[2,2],[3,3]], @obj.to_enum(:foo, 1, 2, 3).with_index(s).to_a) -end - assert 'Enumerator#with_index string offset' do assert_raise(TypeError){ @obj.to_enum(:foo, 1, 2, 3).with_index('1').to_a } end diff --git a/mrbgems/mruby-kernel-ext/src/kernel.c b/mrbgems/mruby-kernel-ext/src/kernel.c index 32d86376a..a60e9a210 100644 --- a/mrbgems/mruby-kernel-ext/src/kernel.c +++ b/mrbgems/mruby-kernel-ext/src/kernel.c @@ -93,9 +93,8 @@ mrb_f_method(mrb_state *mrb, mrb_value self) * (0, 0b, and 0x) are honored. * In any case, strings should be strictly conformed to numeric * representation. This behavior is different from that of - * String#to_i. Non string values will be converted using - * to_int, and to_i. Passing nil - * raises a TypeError. + * String#to_i. Non string values will be treated as integers. + * Passing nil raises a TypeError. * * Integer(123.999) #=> 123 * Integer("0x1a") #=> 26 diff --git a/mrbgems/mruby-numeric-ext/src/numeric_ext.c b/mrbgems/mruby-numeric-ext/src/numeric_ext.c index 1d6a07769..cd8bbf187 100644 --- a/mrbgems/mruby-numeric-ext/src/numeric_ext.c +++ b/mrbgems/mruby-numeric-ext/src/numeric_ext.c @@ -2,13 +2,10 @@ #include static inline mrb_int -to_int(mrb_value x) +to_int(mrb_state *mrb, mrb_value x) { - double f; - - if (mrb_fixnum_p(x)) return mrb_fixnum(x); - f = mrb_float(x); - return (mrb_int)f; + x = mrb_to_int(mrb, x); + return mrb_fixnum(x); } /* @@ -28,7 +25,7 @@ mrb_int_chr(mrb_state *mrb, mrb_value x) mrb_int chr; char c; - chr = to_int(x); + chr = to_int(mrb, x); if (chr >= (1 << CHAR_BIT)) { mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", x); } @@ -48,8 +45,8 @@ mrb_int_allbits(mrb_state *mrb, mrb_value self) { mrb_int n, m; - n = to_int(self); mrb_get_args(mrb, "i", &m); + n = to_int(mrb, self); return mrb_bool_value((n & m) == m); } @@ -64,8 +61,8 @@ mrb_int_anybits(mrb_state *mrb, mrb_value self) { mrb_int n, m; - n = to_int(self); mrb_get_args(mrb, "i", &m); + n = to_int(mrb, self); return mrb_bool_value((n & m) != 0); } @@ -80,8 +77,8 @@ mrb_int_nobits(mrb_state *mrb, mrb_value self) { mrb_int n, m; - n = to_int(self); mrb_get_args(mrb, "i", &m); + n = to_int(mrb, self); return mrb_bool_value((n & m) == 0); } diff --git a/mrbgems/mruby-pack/src/pack.c b/mrbgems/mruby-pack/src/pack.c index 796ba4d34..5caf7b62b 100644 --- a/mrbgems/mruby-pack/src/pack.c +++ b/mrbgems/mruby-pack/src/pack.c @@ -1124,14 +1124,16 @@ mrb_pack_pack(mrb_state *mrb, mrb_value ary) o = mrb_ary_ref(mrb, ary, aidx); if (type == PACK_TYPE_INTEGER) { o = mrb_to_int(mrb, o); + } #ifndef MRB_WITHOUT_FLOAT - } else if (type == PACK_TYPE_FLOAT) { + else if (type == PACK_TYPE_FLOAT) { if (!mrb_float_p(o)) { mrb_float f = mrb_to_flo(mrb, o); o = mrb_float_value(mrb, f); } + } #endif - } else if (type == PACK_TYPE_STRING) { + else if (type == PACK_TYPE_STRING) { if (!mrb_string_p(o)) { mrb_raisef(mrb, E_TYPE_ERROR, "can't convert %S into String", mrb_class_path(mrb, mrb_obj_class(mrb, o))); } diff --git a/mrbgems/mruby-random/src/random.c b/mrbgems/mruby-random/src/random.c index 5b926a228..68209840a 100644 --- a/mrbgems/mruby-random/src/random.c +++ b/mrbgems/mruby-random/src/random.c @@ -79,12 +79,12 @@ get_opt(mrb_state* mrb) mrb_get_args(mrb, "|o", &arg); if (!mrb_nil_p(arg)) { - arg = mrb_check_convert_type(mrb, arg, MRB_TT_FIXNUM, "Fixnum", "to_int"); - if (mrb_nil_p(arg)) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid argument type"); - } - if (mrb_fixnum(arg) < 0) { - arg = mrb_fixnum_value(0 - mrb_fixnum(arg)); + mrb_int i; + + arg = mrb_to_int(mrb, arg); + i = mrb_fixnum(arg); + if (i < 0) { + arg = mrb_fixnum_value(0 - i); } } return arg; diff --git a/mrbgems/mruby-random/test/random.rb b/mrbgems/mruby-random/test/random.rb index 1c59be3a6..1653ae4a6 100644 --- a/mrbgems/mruby-random/test/random.rb +++ b/mrbgems/mruby-random/test/random.rb @@ -74,15 +74,3 @@ assert('Array#shuffle!(random)') do ary1 != [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] and 10.times { |x| ary1.include? x } and ary1 == ary2 end - -assert('Array#sample checks input length after reading arguments') do - $ary = [1, 2, 3] - class ArrayChange - def to_i - $ary << 4 - 4 - end - end - - assert_equal [1, 2, 3, 4], $ary.sample(ArrayChange.new).sort -end diff --git a/mrbgems/mruby-range-ext/mrblib/range.rb b/mrbgems/mruby-range-ext/mrblib/range.rb index e5d1fb079..de7925ba7 100644 --- a/mrbgems/mruby-range-ext/mrblib/range.rb +++ b/mrbgems/mruby-range-ext/mrblib/range.rb @@ -15,10 +15,7 @@ class Range raise ArgumentError, "wrong number of arguments (given #{args.length}, expected 1)" unless args.length == 1 nv = args[0] - raise TypeError, "no implicit conversion from nil to integer" if nv.nil? - raise TypeError, "no implicit conversion of #{nv.class} into Integer" unless nv.respond_to?(:to_int) - n = nv.to_int - raise TypeError, "no implicit conversion of #{nv.class} into Integer" unless n.kind_of?(Integer) + n = nv.__to_int raise ArgumentError, "negative array size (or size too big)" unless 0 <= n ary = [] each do |i| diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index f0f8be6b3..781506949 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -31,18 +31,6 @@ assert('String#setbyte') do assert_equal("Hello", str1) end -assert("String#setbyte raises IndexError if arg conversion resizes String") do - $s = "01234\n" - class Tmp - def to_i - $s.chomp! '' - 95 - end - end - tmp = Tmp.new - assert_raise(IndexError) { $s.setbyte(5, tmp) } -end - assert('String#byteslice') do str1 = "hello" assert_equal("e", str1.byteslice(1)) diff --git a/mrblib/array.rb b/mrblib/array.rb index 53d880660..a677b2a1f 100644 --- a/mrblib/array.rb +++ b/mrblib/array.rb @@ -66,7 +66,7 @@ class Array # # ISO 15.2.12.5.15 def initialize(size=0, obj=nil, &block) - raise TypeError, "expected Integer for 1st argument" unless size.kind_of? Integral + size = size.__to_int raise ArgumentError, "negative array size" if size < 0 self.clear diff --git a/src/kernel.c b/src/kernel.c index db681d510..195594d6b 100644 --- a/src/kernel.c +++ b/src/kernel.c @@ -830,6 +830,7 @@ mrb_obj_ceqq(mrb_state *mrb, mrb_value self) } mrb_value mrb_obj_equal_m(mrb_state *mrb, mrb_value); + void mrb_init_kernel(mrb_state *mrb) { @@ -871,6 +872,7 @@ mrb_init_kernel(mrb_state *mrb) mrb_define_method(mrb, krn, "respond_to?", obj_respond_to, MRB_ARGS_ANY()); /* 15.3.1.3.43 */ mrb_define_method(mrb, krn, "to_s", mrb_any_to_s, MRB_ARGS_NONE()); /* 15.3.1.3.46 */ mrb_define_method(mrb, krn, "__case_eqq", mrb_obj_ceqq, MRB_ARGS_REQ(1)); /* internal */ + mrb_define_method(mrb, krn, "__to_int", mrb_to_int, MRB_ARGS_NONE()); /* internal */ mrb_define_method(mrb, krn, "class_defined?", mrb_krn_class_defined, MRB_ARGS_REQ(1)); diff --git a/src/numeric.c b/src/numeric.c index f7f0318e8..3624831cc 100644 --- a/src/numeric.c +++ b/src/numeric.c @@ -674,7 +674,6 @@ flo_round(mrb_state *mrb, mrb_value num) /* * call-seq: * flt.to_i -> integer - * flt.to_int -> integer * flt.truncate -> integer * * Returns flt truncated to an Integer. @@ -714,7 +713,6 @@ flo_nan_p(mrb_state *mrb, mrb_value num) /* * call-seq: * int.to_i -> integer - * int.to_int -> integer * * As int is already an Integer, all these * methods simply return the receiver. @@ -1513,7 +1511,6 @@ mrb_init_numeric(mrb_state *mrb) MRB_SET_INSTANCE_TT(integer, MRB_TT_FIXNUM); mrb_undef_class_method(mrb, integer, "new"); mrb_define_method(mrb, integer, "to_i", int_to_i, MRB_ARGS_NONE()); /* 15.2.8.3.24 */ - mrb_define_method(mrb, integer, "to_int", int_to_i, MRB_ARGS_NONE()); #ifndef MRB_WITHOUT_FLOAT mrb_define_method(mrb, integer, "ceil", int_to_i, MRB_ARGS_REQ(1)); /* 15.2.8.3.8 (x) */ mrb_define_method(mrb, integer, "floor", int_to_i, MRB_ARGS_REQ(1)); /* 15.2.8.3.10 (x) */ @@ -1565,7 +1562,6 @@ mrb_init_numeric(mrb_state *mrb) mrb_define_method(mrb, fl, "round", flo_round, MRB_ARGS_OPT(1)); /* 15.2.9.3.12 */ mrb_define_method(mrb, fl, "to_f", flo_to_f, MRB_ARGS_NONE()); /* 15.2.9.3.13 */ mrb_define_method(mrb, fl, "to_i", flo_truncate, MRB_ARGS_NONE()); /* 15.2.9.3.14 */ - mrb_define_method(mrb, fl, "to_int", flo_truncate, MRB_ARGS_NONE()); mrb_define_method(mrb, fl, "truncate", flo_truncate, MRB_ARGS_NONE()); /* 15.2.9.3.15 */ mrb_define_method(mrb, fl, "divmod", flo_divmod, MRB_ARGS_REQ(1)); mrb_define_method(mrb, fl, "eql?", flo_eql, MRB_ARGS_REQ(1)); /* 15.2.8.3.16 */ diff --git a/src/object.c b/src/object.c index 8724c5416..ba6fa3947 100644 --- a/src/object.c +++ b/src/object.c @@ -322,19 +322,6 @@ convert_type(mrb_state *mrb, mrb_value val, const char *tname, const char *metho return mrb_funcall_argv(mrb, val, m, 0, 0); } -MRB_API mrb_value -mrb_check_to_integer(mrb_state *mrb, mrb_value val, const char *method) -{ - mrb_value v; - - if (mrb_fixnum_p(val)) return val; - v = convert_type(mrb, val, "Integer", method, FALSE); - if (mrb_nil_p(v) || !mrb_fixnum_p(v)) { - return mrb_nil_value(); - } - return v; -} - MRB_API mrb_value mrb_convert_type(mrb_state *mrb, mrb_value val, enum mrb_vtype type, const char *tname, const char *method) { @@ -505,25 +492,22 @@ mrb_obj_is_kind_of(mrb_state *mrb, mrb_value obj, struct RClass *c) return FALSE; } -static mrb_value -mrb_to_integer(mrb_state *mrb, mrb_value val, const char *method) -{ - mrb_value v; - - if (mrb_fixnum_p(val)) return val; - v = convert_type(mrb, val, "Integer", method, TRUE); - if (!mrb_obj_is_kind_of(mrb, v, mrb->fixnum_class)) { - mrb_value type = inspect_type(mrb, val); - mrb_raisef(mrb, E_TYPE_ERROR, "can't convert %S to Integer (%S#%S gives %S)", - type, type, mrb_str_new_cstr(mrb, method), inspect_type(mrb, v)); - } - return v; -} - MRB_API mrb_value mrb_to_int(mrb_state *mrb, mrb_value val) { - return mrb_to_integer(mrb, val, "to_int"); + + if (!mrb_fixnum_p(val)) { + mrb_value type; + +#ifndef MRB_WITHOUT_FLOAT + if (mrb_float_p(val)) { + return mrb_flo_to_fixnum(mrb, val); + } +#endif + type = inspect_type(mrb, val); + mrb_raisef(mrb, E_TYPE_ERROR, "can't convert %S to Integer", type); + } + return val; } MRB_API mrb_value @@ -533,18 +517,12 @@ mrb_convert_to_integer(mrb_state *mrb, mrb_value val, mrb_int base) if (mrb_nil_p(val)) { if (base != 0) goto arg_error; - mrb_raise(mrb, E_TYPE_ERROR, "can't convert nil into Integer"); + mrb_raise(mrb, E_TYPE_ERROR, "can't convert nil into Integer"); } switch (mrb_type(val)) { #ifndef MRB_WITHOUT_FLOAT case MRB_TT_FLOAT: if (base != 0) goto arg_error; - else { - mrb_float f = mrb_float(val); - if (FIXABLE_FLOAT(f)) { - break; - } - } return mrb_flo_to_fixnum(mrb, val); #endif @@ -568,11 +546,8 @@ mrb_convert_to_integer(mrb_state *mrb, mrb_value val, mrb_int base) arg_error: mrb_raise(mrb, E_ARGUMENT_ERROR, "base specified for non string value"); } - tmp = convert_type(mrb, val, "Integer", "to_int", FALSE); - if (mrb_nil_p(tmp) || !mrb_fixnum_p(tmp)) { - tmp = mrb_to_integer(mrb, val, "to_i"); - } - return tmp; + /* to raise TypeError */ + return mrb_to_int(mrb, val); } MRB_API mrb_value -- cgit v1.2.3 From ff08856fe314faa4d16b4502c0960a3475387846 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Wed, 19 Sep 2018 21:51:53 +0900 Subject: Remove implicit conversion using `to_str` method; fix #3854 We have added internal convenience method `__to_str` which does string type check. The issue #3854 was fixed but fundamental flaw of lack of stack depth check along with fibers still remains. Use `MRB_GC_FIXED_ARENA` for workaround. --- include/mruby.h | 5 +--- include/mruby/string.h | 3 +-- mrbgems/mruby-io/src/file.c | 2 +- mrbgems/mruby-kernel-ext/src/kernel.c | 8 ++----- mrbgems/mruby-string-ext/mrblib/string.rb | 6 ++--- mrbgems/mruby-string-ext/src/string.c | 6 ++--- mrbgems/mruby-string-ext/test/string.rb | 11 --------- mrblib/string.rb | 11 ++++----- src/class.c | 11 +++++++-- src/kernel.c | 39 +++++-------------------------- src/object.c | 27 +++++++++++++++++++++ src/string.c | 32 ++----------------------- test/t/string.rb | 13 ----------- 13 files changed, 60 insertions(+), 114 deletions(-) (limited to 'mrbgems/mruby-string-ext/test/string.rb') diff --git a/include/mruby.h b/include/mruby.h index 33597101a..12df9cd5a 100644 --- a/include/mruby.h +++ b/include/mruby.h @@ -854,10 +854,6 @@ typedef const char *mrb_args_format; /** * Retrieve arguments from mrb_state. * - * When applicable, implicit conversions (such as `to_str`, `to_ary`, `to_hash`) are - * applied to received arguments. - * Used inside a function of mrb_func_t type. - * * @param mrb The current MRuby state. * @param format [mrb_args_format] is a list of format specifiers * @param ... The passing variadic arguments must be a pointer of retrieving type. @@ -1187,6 +1183,7 @@ MRB_API void mrb_gc_unregister(mrb_state *mrb, mrb_value obj); MRB_API mrb_value mrb_to_int(mrb_state *mrb, mrb_value val); #define mrb_int(mrb, val) mrb_fixnum(mrb_to_int(mrb, val)) +MRB_API mrb_value mrb_to_str(mrb_state *mrb, mrb_value val); MRB_API void mrb_check_type(mrb_state *mrb, mrb_value x, enum mrb_vtype t); typedef enum call_type { diff --git a/include/mruby/string.h b/include/mruby/string.h index 481b2fb38..fa1955f48 100644 --- a/include/mruby/string.h +++ b/include/mruby/string.h @@ -311,8 +311,7 @@ MRB_API mrb_value mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb * @param [mrb_value] str Ruby string. * @return [mrb_value] A Ruby string. */ -MRB_API mrb_value mrb_string_type(mrb_state *mrb, mrb_value str); - +MRB_API mrb_value mrb_ensure_string_type(mrb_state *mrb, mrb_value str); MRB_API mrb_value mrb_check_string_type(mrb_state *mrb, mrb_value str); MRB_API mrb_value mrb_str_new_capa(mrb_state *mrb, size_t capa); MRB_API mrb_value mrb_str_buf_new(mrb_state *mrb, size_t capa); diff --git a/mrbgems/mruby-io/src/file.c b/mrbgems/mruby-io/src/file.c index e65741061..c00663481 100644 --- a/mrbgems/mruby-io/src/file.c +++ b/mrbgems/mruby-io/src/file.c @@ -115,7 +115,7 @@ mrb_file_s_unlink(mrb_state *mrb, mrb_value obj) mrb_get_args(mrb, "*", &argv, &argc); for (i = 0; i < argc; i++) { const char *utf8_path; - pathv = mrb_convert_type(mrb, argv[i], MRB_TT_STRING, "String", "to_str"); + pathv = mrb_ensure_string_type(mrb, argv[i]); utf8_path = mrb_string_value_cstr(mrb, &pathv); path = mrb_locale_from_utf8(utf8_path, -1); if (UNLINK(path) < 0) { diff --git a/mrbgems/mruby-kernel-ext/src/kernel.c b/mrbgems/mruby-kernel-ext/src/kernel.c index a60e9a210..bc2656399 100644 --- a/mrbgems/mruby-kernel-ext/src/kernel.c +++ b/mrbgems/mruby-kernel-ext/src/kernel.c @@ -141,8 +141,7 @@ mrb_f_float(mrb_state *mrb, mrb_value self) * String(arg) -> string * * Returns arg as an String. - * - * First tries to call its to_str method, then its to_s method. + * converted using to_s method. * * String(self) #=> "main" * String(self.class) #=> "Object" @@ -154,10 +153,7 @@ mrb_f_string(mrb_state *mrb, mrb_value self) mrb_value arg, tmp; mrb_get_args(mrb, "o", &arg); - tmp = mrb_check_convert_type(mrb, arg, MRB_TT_STRING, "String", "to_str"); - if (mrb_nil_p(tmp)) { - tmp = mrb_check_convert_type(mrb, arg, MRB_TT_STRING, "String", "to_s"); - } + tmp = mrb_convert_type(mrb, arg, MRB_TT_STRING, "String", "to_s"); return tmp; } diff --git a/mrbgems/mruby-string-ext/mrblib/string.rb b/mrbgems/mruby-string-ext/mrblib/string.rb index 27ca30610..9212d83a5 100644 --- a/mrbgems/mruby-string-ext/mrblib/string.rb +++ b/mrbgems/mruby-string-ext/mrblib/string.rb @@ -12,8 +12,8 @@ class String # String.try_convert(/re/) #=> nil # def self.try_convert(obj) - if obj.respond_to?(:to_str) - obj.to_str + if self === obj + obj else nil end @@ -142,7 +142,7 @@ class String # "abcdef".casecmp("ABCDEF") #=> 0 # def casecmp(str) - self.downcase <=> str.to_str.downcase + self.downcase <=> str.__to_str.downcase rescue NoMethodError nil end diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index 6d661c352..ba7e3c610 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -163,7 +163,7 @@ mrb_str_concat_m(mrb_state *mrb, mrb_value self) if (mrb_fixnum_p(str)) str = mrb_fixnum_chr(mrb, str); else - str = mrb_string_type(mrb, str); + str = mrb_ensure_string_type(mrb, str); mrb_str_concat(mrb, self, str); return self; } @@ -191,7 +191,7 @@ mrb_str_start_with(mrb_state *mrb, mrb_value self) for (i = 0; i < argc; i++) { size_t len_l, len_r; int ai = mrb_gc_arena_save(mrb); - sub = mrb_string_type(mrb, argv[i]); + sub = mrb_ensure_string_type(mrb, argv[i]); mrb_gc_arena_restore(mrb, ai); len_l = RSTRING_LEN(self); len_r = RSTRING_LEN(sub); @@ -220,7 +220,7 @@ mrb_str_end_with(mrb_state *mrb, mrb_value self) for (i = 0; i < argc; i++) { size_t len_l, len_r; int ai = mrb_gc_arena_save(mrb); - sub = mrb_string_type(mrb, argv[i]); + sub = mrb_ensure_string_type(mrb, argv[i]); mrb_gc_arena_restore(mrb, ai); len_l = RSTRING_LEN(self); len_r = RSTRING_LEN(sub); diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index 781506949..4ccdfd6c3 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -114,12 +114,6 @@ assert('String#concat') do assert_equal "Hello World!", "Hello " << "World" << 33 assert_equal "Hello World!", "Hello ".concat("World").concat(33) - o = Object.new - def o.to_str - "to_str" - end - assert_equal "hi to_str", "hi " << o - assert_raise(TypeError) { "".concat(Object.new) } end @@ -128,11 +122,6 @@ assert('String#casecmp') do assert_equal 0, "aBcDeF".casecmp("abcdef") assert_equal(-1, "abcdef".casecmp("abcdefg")) assert_equal 0, "abcdef".casecmp("ABCDEF") - o = Object.new - def o.to_str - "ABCDEF" - end - assert_equal 0, "abcdef".casecmp(o) end assert('String#count') do diff --git a/mrblib/string.rb b/mrblib/string.rb index 07b80b340..397603e9d 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -12,7 +12,7 @@ class String def each_line(rs = "\n", &block) return to_enum(:each_line, rs, &block) unless block return block.call(self) if rs.nil? - rs = rs.to_str + rs = rs.__to_str offset = 0 rs_len = rs.length this = dup @@ -67,7 +67,7 @@ class String block = nil end if !replace.nil? || !block - replace = replace.to_str + replace = replace.__to_str end offset = 0 result = [] @@ -129,12 +129,12 @@ class String end pattern, replace = *args - pattern = pattern.to_str + pattern = pattern.__to_str if args.length == 2 && block block = nil end unless block - replace = replace.to_str + replace = replace.__to_str end result = [] this = dup @@ -245,14 +245,13 @@ class String ## # ISO 15.2.10.5.3 def =~(re) - raise TypeError, "type mismatch: String given" if re.respond_to? :to_str re =~ self end ## # ISO 15.2.10.5.27 def match(re, &block) - if re.respond_to? :to_str + if String === re if Object.const_defined?(:Regexp) r = Regexp.new(re) r.match(self, &block) diff --git a/src/class.c b/src/class.c index 50ab0ea59..90c73104e 100644 --- a/src/class.c +++ b/src/class.c @@ -504,10 +504,17 @@ check_type(mrb_state *mrb, mrb_value val, enum mrb_vtype t, const char *c, const return tmp; } +#define CHECK_TYPE(mrb, val, t, c) do { \ + if (mrb_type(val) != (t)) {\ + mrb_raisef(mrb, E_TYPE_ERROR, "expected %S", mrb_str_new_lit(mrb, c));\ + }\ +} while (0) + static mrb_value to_str(mrb_state *mrb, mrb_value val) { - return check_type(mrb, val, MRB_TT_STRING, "String", "to_str"); + CHECK_TYPE(mrb, val, MRB_TT_STRING, "String"); + return val; } static mrb_value @@ -1972,7 +1979,7 @@ mrb_mod_const_get(mrb_state *mrb, mrb_value mod) } /* const get with class path string */ - path = mrb_string_type(mrb, path); + path = mrb_ensure_string_type(mrb, path); ptr = RSTRING_PTR(path); len = RSTRING_LEN(path); off = 0; diff --git a/src/kernel.c b/src/kernel.c index 195594d6b..ce9cd1d44 100644 --- a/src/kernel.c +++ b/src/kernel.c @@ -746,6 +746,7 @@ basic_obj_respond_to(mrb_state *mrb, mrb_value obj, mrb_sym id, int pub) { return mrb_respond_to(mrb, obj, id); } + /* 15.3.1.3.43 */ /* * call-seq: @@ -765,45 +766,16 @@ basic_obj_respond_to(mrb_state *mrb, mrb_value obj, mrb_sym id, int pub) static mrb_value obj_respond_to(mrb_state *mrb, mrb_value self) { - mrb_value mid; mrb_sym id, rtm_id; - mrb_bool priv = FALSE, respond_to_p = TRUE; - - mrb_get_args(mrb, "o|b", &mid, &priv); - - if (mrb_symbol_p(mid)) { - id = mrb_symbol(mid); - } - else { - mrb_value tmp; - if (mrb_string_p(mid)) { - tmp = mrb_check_intern_str(mrb, mid); - } - else { - tmp = mrb_check_string_type(mrb, mid); - if (mrb_nil_p(tmp)) { - tmp = mrb_inspect(mrb, mid); - mrb_raisef(mrb, E_TYPE_ERROR, "%S is not a symbol", tmp); - } - tmp = mrb_check_intern_str(mrb, tmp); - } - if (mrb_nil_p(tmp)) { - respond_to_p = FALSE; - } - else { - id = mrb_symbol(tmp); - } - } - - if (respond_to_p) { - respond_to_p = basic_obj_respond_to(mrb, self, id, !priv); - } + mrb_bool priv = FALSE, respond_to_p; + mrb_get_args(mrb, "n|b", &id, &priv); + respond_to_p = basic_obj_respond_to(mrb, self, id, !priv); if (!respond_to_p) { rtm_id = mrb_intern_lit(mrb, "respond_to_missing?"); if (basic_obj_respond_to(mrb, self, rtm_id, !priv)) { mrb_value args[2], v; - args[0] = mid; + args[0] = mrb_symbol_value(id); args[1] = mrb_bool_value(priv); v = mrb_funcall_argv(mrb, self, rtm_id, 2, args); return mrb_bool_value(mrb_bool(v)); @@ -873,6 +845,7 @@ mrb_init_kernel(mrb_state *mrb) mrb_define_method(mrb, krn, "to_s", mrb_any_to_s, MRB_ARGS_NONE()); /* 15.3.1.3.46 */ mrb_define_method(mrb, krn, "__case_eqq", mrb_obj_ceqq, MRB_ARGS_REQ(1)); /* internal */ mrb_define_method(mrb, krn, "__to_int", mrb_to_int, MRB_ARGS_NONE()); /* internal */ + mrb_define_method(mrb, krn, "__to_str", mrb_to_str, MRB_ARGS_NONE()); /* internal */ mrb_define_method(mrb, krn, "class_defined?", mrb_krn_class_defined, MRB_ARGS_REQ(1)); diff --git a/src/object.c b/src/object.c index ba6fa3947..18ccacfb9 100644 --- a/src/object.c +++ b/src/object.c @@ -579,6 +579,33 @@ mrb_Float(mrb_state *mrb, mrb_value val) } #endif +MRB_API mrb_value +mrb_to_str(mrb_state *mrb, mrb_value val) +{ + if (!mrb_string_p(val)) { + mrb_value type = inspect_type(mrb, val); + mrb_raisef(mrb, E_TYPE_ERROR, "can't convert %S to String", type); + } + return val; +} + +MRB_API mrb_value +mrb_ensure_string_type(mrb_state *mrb, mrb_value str) +{ + if (!mrb_string_p(str)) { + mrb_raisef(mrb, E_TYPE_ERROR, "%S cannot be converted to String", + inspect_type(mrb, str)); + } + return str; +} + +MRB_API mrb_value +mrb_check_string_type(mrb_state *mrb, mrb_value str) +{ + if (!mrb_string_p(str)) return mrb_nil_value(); + return str; +} + MRB_API mrb_value mrb_inspect(mrb_state *mrb, mrb_value obj) { diff --git a/src/string.c b/src/string.c index b7abfb762..b6d4ecef0 100644 --- a/src/string.c +++ b/src/string.c @@ -956,15 +956,7 @@ str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2) MRB_API mrb_bool mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2) { - if (mrb_immediate_p(str2)) return FALSE; - if (!mrb_string_p(str2)) { - if (mrb_nil_p(str2)) return FALSE; - if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "to_str"))) { - return FALSE; - } - str2 = mrb_funcall(mrb, str2, "to_str", 0); - return mrb_equal(mrb, str2, str1); - } + if (!mrb_string_p(str2)) return FALSE; return str_eql(mrb, str1, str2); } @@ -992,14 +984,8 @@ mrb_str_equal_m(mrb_state *mrb, mrb_value str1) MRB_API mrb_value mrb_str_to_str(mrb_state *mrb, mrb_value str) { - mrb_value s; - if (!mrb_string_p(str)) { - s = mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); - if (mrb_nil_p(s)) { - s = mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s"); - } - return s; + return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s"); } return str; } @@ -1714,18 +1700,6 @@ mrb_ptr_to_str(mrb_state *mrb, void *p) return mrb_obj_value(p_str); } -MRB_API mrb_value -mrb_string_type(mrb_state *mrb, mrb_value str) -{ - return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); -} - -MRB_API mrb_value -mrb_check_string_type(mrb_state *mrb, mrb_value str) -{ - return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); -} - /* 15.2.10.5.30 */ /* * call-seq: @@ -2379,7 +2353,6 @@ mrb_str_to_f(mrb_state *mrb, mrb_value self) /* * call-seq: * str.to_s => str - * str.to_str => str * * Returns the receiver. */ @@ -2783,7 +2756,6 @@ mrb_init_string(mrb_state *mrb) #endif mrb_define_method(mrb, s, "to_i", mrb_str_to_i, MRB_ARGS_ANY()); /* 15.2.10.5.39 */ mrb_define_method(mrb, s, "to_s", mrb_str_to_s, MRB_ARGS_NONE()); /* 15.2.10.5.40 */ - mrb_define_method(mrb, s, "to_str", mrb_str_to_s, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "to_sym", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.41 */ mrb_define_method(mrb, s, "upcase", mrb_str_upcase, MRB_ARGS_NONE()); /* 15.2.10.5.42 */ mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.43 */ diff --git a/test/t/string.rb b/test/t/string.rb index e91b915fe..3a1eced16 100644 --- a/test/t/string.rb +++ b/test/t/string.rb @@ -253,19 +253,6 @@ assert('String#chomp!', '15.2.10.5.10') do assert_equal 'abc', e end -assert('String#chomp! uses the correct length') do - class A - def to_str - $s.replace("AA") - "A" - end - end - - $s = "AAA" - $s.chomp!(A.new) - assert_equal $s, "A" -end - assert('String#chop', '15.2.10.5.11') do a = ''.chop b = 'abc'.chop -- cgit v1.2.3 From b5d43a16a38d522aa1d1d1a889585749a6b8086d Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Wed, 19 Sep 2018 21:54:32 +0900 Subject: Removed `String#try_convert` method from `mruby-string-ext` gem. Because `try_convert` method rarely used in production. For mruby users, we have `__to_str` utility method to check string type. --- mrbgems/mruby-string-ext/mrblib/string.rb | 19 ------------------- mrbgems/mruby-string-ext/test/string.rb | 7 ------- 2 files changed, 26 deletions(-) (limited to 'mrbgems/mruby-string-ext/test/string.rb') diff --git a/mrbgems/mruby-string-ext/mrblib/string.rb b/mrbgems/mruby-string-ext/mrblib/string.rb index 9212d83a5..2a323c858 100644 --- a/mrbgems/mruby-string-ext/mrblib/string.rb +++ b/mrbgems/mruby-string-ext/mrblib/string.rb @@ -1,24 +1,5 @@ class String - ## - # call-seq: - # String.try_convert(obj) -> string or nil - # - # Try to convert obj into a String, using to_str method. - # Returns converted string or nil if obj cannot be converted - # for any reason. - # - # String.try_convert("str") #=> "str" - # String.try_convert(/re/) #=> nil - # - def self.try_convert(obj) - if self === obj - obj - else - nil - end - end - ## # call-seq: # string.clear -> string diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index 4ccdfd6c3..7c96ab694 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -4,13 +4,6 @@ UTF8STRING = ("\343\201\202".size == 1) -assert('String.try_convert') do - assert_nil String.try_convert(nil) - assert_nil String.try_convert(:foo) - assert_equal "", String.try_convert("") - assert_equal "1,2,3", String.try_convert("1,2,3") -end - assert('String#getbyte') do str1 = "hello" bytes1 = [104, 101, 108, 108, 111] -- cgit v1.2.3 From e3b339bec6c38cf6ea7d763cba3b81e21e5c7700 Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Tue, 12 Mar 2019 21:47:33 +0900 Subject: Fix missing assertions in `mruby-string-ext` test --- mrbgems/mruby-string-ext/test/string.rb | 72 +++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 31 deletions(-) (limited to 'mrbgems/mruby-string-ext/test/string.rb') diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index 7c96ab694..44ca1fde2 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -33,62 +33,72 @@ assert('String#byteslice') do end assert('String#dump') do - ("\1" * 100).dump # should not raise an exception - regress #1210 - "\0".inspect == "\"\\000\"" and - "foo".dump == "\"foo\"" + assert_equal("\"\\x00\"", "\0".dump) + assert_equal("\"foo\"", "foo".dump) + assert_nothing_raised { ("\1" * 100).dump } # regress #1210 end assert('String#strip') do s = " abc " - "".strip == "" and " \t\r\n\f\v".strip == "" and - "\0a\0".strip == "\0a" and - "abc".strip == "abc" and - " abc".strip == "abc" and - "abc ".strip == "abc" and - " abc ".strip == "abc" and - s == " abc " + assert_equal("abc", s.strip) + assert_equal(" abc ", s) + assert_equal("", "".strip) + assert_equal("", " \t\r\n\f\v".strip) + assert_equal("\0a", "\0a\0".strip) + assert_equal("abc", "abc".strip) + assert_equal("abc", " abc".strip) + assert_equal("abc", "abc ".strip) end assert('String#lstrip') do s = " abc " - s.lstrip - "".lstrip == "" and " \t\r\n\f\v".lstrip == "" and - "\0a\0".lstrip == "\0a\0" and - "abc".lstrip == "abc" and - " abc".lstrip == "abc" and - "abc ".lstrip == "abc " and - " abc ".lstrip == "abc " and - s == " abc " + assert_equal("abc ", s.lstrip) + assert_equal(" abc ", s) + assert_equal("", "".lstrip) + assert_equal("", " \t\r\n\f\v".lstrip) + assert_equal("\0a\0", "\0a\0".lstrip) + assert_equal("abc", "abc".lstrip) + assert_equal("abc", " abc".lstrip) + assert_equal("abc ", "abc ".lstrip) end assert('String#rstrip') do s = " abc " - s.rstrip - "".rstrip == "" and " \t\r\n\f\v".rstrip == "" and - "\0a\0".rstrip == "\0a" and - "abc".rstrip == "abc" and - " abc".rstrip == " abc" and - "abc ".rstrip == "abc" and - " abc ".rstrip == " abc" and - s == " abc " + assert_equal(" abc", s.rstrip) + assert_equal(" abc ", s) + assert_equal("", "".rstrip) + assert_equal("", " \t\r\n\f\v".rstrip) + assert_equal("\0a", "\0a\0".rstrip) + assert_equal("abc", "abc".rstrip) + assert_equal(" abc", " abc".rstrip) + assert_equal("abc", "abc ".rstrip) end assert('String#strip!') do s = " abc " t = "abc" - s.strip! == "abc" and s == "abc" and t.strip! == nil + assert_equal("abc", s.strip!) + assert_equal("abc", s) + assert_nil(t.strip!) + assert_equal("abc", t) end assert('String#lstrip!') do s = " abc " t = "abc " - s.lstrip! == "abc " and s == "abc " and t.lstrip! == nil + assert_equal("abc ", s.lstrip!) + assert_equal("abc ", s) + assert_nil(t.lstrip!) + assert_equal("abc ", t) end assert('String#rstrip!') do s = " abc " t = " abc" - s.rstrip! == " abc" and s == " abc" and t.rstrip! == nil + assert_equal(" abc", s.rstrip!) + assert_equal(" abc", s) + assert_nil(t.rstrip!) + assert_equal(" abc", t) end assert('String#swapcase') do @@ -127,7 +137,7 @@ assert('String#count') do assert_equal 4, s.count("a0-9") end -assert('String#tr') do +assert('String#tr') do assert_equal "ABC", "abc".tr('a-z', 'A-Z') assert_equal "hippo", "hello".tr('el', 'ip') assert_equal "Ruby", "Lisp".tr("Lisp", "Ruby") @@ -141,7 +151,7 @@ assert('String#tr!') do assert_equal "ab12222hijklmnopqR", s end -assert('String#tr_s') do +assert('String#tr_s') do assert_equal "hero", "hello".tr_s('l', 'r') assert_equal "h*o", "hello".tr_s('el', '*') assert_equal "hhxo", "hello".tr_s('el', 'hx') -- cgit v1.2.3 From 270131253f62d806ea480ef4793e0b39cd068ee4 Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Sat, 27 Apr 2019 12:50:02 +0900 Subject: Remove duplicated `String#each_char` --- mrbgems/mruby-io/test/io.rb | 4 ++-- mrbgems/mruby-method/test/method.rb | 2 +- mrbgems/mruby-string-ext/mrblib/string.rb | 10 +++++++--- mrbgems/mruby-string-ext/test/string.rb | 12 ++++++------ mrblib/string.rb | 12 ------------ 5 files changed, 16 insertions(+), 24 deletions(-) (limited to 'mrbgems/mruby-string-ext/test/string.rb') diff --git a/mrbgems/mruby-io/test/io.rb b/mrbgems/mruby-io/test/io.rb index 44eaca6be..2b3f9cf13 100644 --- a/mrbgems/mruby-io/test/io.rb +++ b/mrbgems/mruby-io/test/io.rb @@ -84,7 +84,7 @@ end assert('IO#getc', '15.2.20.5.8') do io = IO.new(IO.sysopen($mrbtest_io_rfname)) - $mrbtest_io_msg.each_char { |ch| + $mrbtest_io_msg.split("").each { |ch| assert_equal ch, io.getc } assert_equal nil, io.getc @@ -127,7 +127,7 @@ end assert('IO#readchar', '15.2.20.5.15') do # almost same as IO#getc IO.open(IO.sysopen($mrbtest_io_rfname)) do |io| - $mrbtest_io_msg.each_char { |ch| + $mrbtest_io_msg.split("").each { |ch| assert_equal ch, io.readchar } assert_raise(EOFError) do diff --git a/mrbgems/mruby-method/test/method.rb b/mrbgems/mruby-method/test/method.rb index dfddde9cc..0b67d3e61 100644 --- a/mrbgems/mruby-method/test/method.rb +++ b/mrbgems/mruby-method/test/method.rb @@ -21,7 +21,7 @@ class Interpreter } def interpret(string) @ret = "" - string.each_char {|b| Dispatcher[b].bind(self).call } + string.split("").each {|b| Dispatcher[b].bind(self).call } end end diff --git a/mrbgems/mruby-string-ext/mrblib/string.rb b/mrbgems/mruby-string-ext/mrblib/string.rb index 311803ea2..fdaf2f960 100644 --- a/mrbgems/mruby-string-ext/mrblib/string.rb +++ b/mrbgems/mruby-string-ext/mrblib/string.rb @@ -310,11 +310,15 @@ class String end end + ## + # Call the given block for each character of + # +self+. def each_char(&block) return to_enum :each_char unless block - - split('').each do |i| - block.call(i) + pos = 0 + while pos < self.size + block.call(self[pos]) + pos += 1 end self end diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index 44ca1fde2..02777e594 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -657,19 +657,19 @@ assert('String#chars(UTF-8)') do end if UTF8STRING assert('String#each_char') do - s = "" + chars = [] "hello!".each_char do |x| - s += x + chars << x end - assert_equal "hello!", s + assert_equal ["h", "e", "l", "l", "o", "!"], chars end assert('String#each_char(UTF-8)') do - s = "" + chars = [] "こんにちは世界!".each_char do |x| - s += x + chars << x end - assert_equal "こんにちは世界!", s + assert_equal ["こ", "ん", "に", "ち", "は", "世", "界", "!"], chars end if UTF8STRING assert('String#codepoints') do diff --git a/mrblib/string.rb b/mrblib/string.rb index c92a9e7be..506f23c83 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -164,18 +164,6 @@ class String self.replace(str) end - ## - # Call the given block for each character of - # +self+. - def each_char(&block) - pos = 0 - while pos < self.size - block.call(self[pos]) - pos += 1 - end - self - end - ## # Call the given block for each byte of +self+. def each_byte(&block) -- cgit v1.2.3 From 75df13a97334c162b2cf743c3e37c4933a4b0d1c Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Tue, 25 Jun 2019 22:58:21 +0900 Subject: Fix `String#byteslice` with `MRB_UTF8_STRING` and some edge cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Example: $ bin/mruby -e ' p "あa".byteslice(1) p "bar".byteslice(3) p "bar".byteslice(4..0) ' Before this patch: "a" "" RangeError (4..0 out of range) After this patch (same as Ruby): "\x81" nil nil --- include/mruby/string.h | 3 ++ mrbgems/mruby-string-ext/src/string.c | 58 +++++++++++++-------------------- mrbgems/mruby-string-ext/test/string.rb | 51 +++++++++++++++++++++++++++++ src/string.c | 49 ++++++++++++++-------------- 4 files changed, 102 insertions(+), 59 deletions(-) (limited to 'mrbgems/mruby-string-ext/test/string.rb') diff --git a/include/mruby/string.h b/include/mruby/string.h index 22445f654..b563541cb 100644 --- a/include/mruby/string.h +++ b/include/mruby/string.h @@ -438,6 +438,9 @@ mrb_value mrb_str_inspect(mrb_state *mrb, mrb_value str); #define mrb_str_buf_cat(mrb, str, ptr, len) mrb_str_cat(mrb, str, ptr, len) #define mrb_str_buf_append(mrb, str, str2) mrb_str_cat_str(mrb, str, str2) +mrb_bool mrb_str_beg_len(mrb_int str_len, mrb_int *begp, mrb_int *lenp); +mrb_value mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len); + #ifdef MRB_UTF8_STRING mrb_int mrb_utf8_len(const char *str, mrb_int byte_len); #endif diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index d9ebb7392..50a4e5582 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -42,44 +42,32 @@ mrb_str_setbyte(mrb_state *mrb, mrb_value str) static mrb_value mrb_str_byteslice(mrb_state *mrb, mrb_value str) { - mrb_value a1; - mrb_int len; - - if (mrb_get_argc(mrb) == 2) { - mrb_int pos; - mrb_get_args(mrb, "ii", &pos, &len); - return mrb_str_substr(mrb, str, pos, len); + mrb_value a1, a2; + mrb_int str_len = RSTRING_LEN(str), beg, len; + mrb_bool empty = TRUE; + + if (mrb_get_args(mrb, "o|o", &a1, &a2) == 2) { + beg = mrb_fixnum(mrb_to_int(mrb, a1)); + len = mrb_fixnum(mrb_to_int(mrb, a2)); + goto subseq; } - mrb_get_args(mrb, "o|i", &a1, &len); - switch (mrb_type(a1)) { - case MRB_TT_RANGE: - { - mrb_int beg; - - len = RSTRING_LEN(str); - switch (mrb_range_beg_len(mrb, a1, &beg, &len, len, TRUE)) { - case MRB_RANGE_TYPE_MISMATCH: - break; - case MRB_RANGE_OK: - return mrb_str_substr(mrb, str, beg, len); - case MRB_RANGE_OUT: - mrb_raisef(mrb, E_RANGE_ERROR, "%S out of range", a1); - break; - } - return mrb_nil_value(); + if (mrb_type(a1) == MRB_TT_RANGE) { + if (mrb_range_beg_len(mrb, a1, &beg, &len, str_len, TRUE) == MRB_RANGE_OK) { + goto subseq; } -#ifndef MRB_WITHOUT_FLOAT - case MRB_TT_FLOAT: - a1 = mrb_fixnum_value((mrb_int)mrb_float(a1)); - /* fall through */ -#endif - case MRB_TT_FIXNUM: - return mrb_str_substr(mrb, str, mrb_fixnum(a1), 1); - default: - mrb_raise(mrb, E_TYPE_ERROR, "wrong type of argument"); + return mrb_nil_value(); + } + + beg = mrb_fixnum(mrb_to_int(mrb, a1)); + len = 1; + empty = FALSE; +subseq: + if (mrb_str_beg_len(str_len, &beg, &len) && (empty || len != 0)) { + return mrb_str_byte_subseq(mrb, str, beg, len); + } + else { + return mrb_nil_value(); } - /* not reached */ - return mrb_nil_value(); } /* diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index 02777e594..bf633bcef 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -26,10 +26,61 @@ end assert('String#byteslice') do str1 = "hello" + str2 = "\u3042ab" # "\xE3\x81\x82ab" + + assert_equal("h", str1.byteslice(0)) assert_equal("e", str1.byteslice(1)) + assert_equal(nil, str1.byteslice(5)) assert_equal("o", str1.byteslice(-1)) + assert_equal(nil, str1.byteslice(-6)) + assert_equal("\xE3", str2.byteslice(0)) + assert_equal("\x81", str2.byteslice(1)) + assert_equal(nil, str2.byteslice(5)) + assert_equal("b", str2.byteslice(-1)) + assert_equal(nil, str2.byteslice(-6)) + + assert_equal("", str1.byteslice(0, 0)) + assert_equal(str1, str1.byteslice(0, 6)) + assert_equal("el", str1.byteslice(1, 2)) + assert_equal("", str1.byteslice(5, 1)) + assert_equal("o", str1.byteslice(-1, 6)) + assert_equal(nil, str1.byteslice(-6, 1)) + assert_equal(nil, str1.byteslice(0, -1)) + assert_equal("", str2.byteslice(0, 0)) + assert_equal(str2, str2.byteslice(0, 6)) + assert_equal("\x81\x82", str2.byteslice(1, 2)) + assert_equal("", str2.byteslice(5, 1)) + assert_equal("b", str2.byteslice(-1, 6)) + assert_equal(nil, str2.byteslice(-6, 1)) + assert_equal(nil, str2.byteslice(0, -1)) + assert_equal("ell", str1.byteslice(1..3)) assert_equal("el", str1.byteslice(1...3)) + assert_equal("h", str1.byteslice(0..0)) + assert_equal("", str1.byteslice(5..0)) + assert_equal("o", str1.byteslice(4..5)) + assert_equal(nil, str1.byteslice(6..0)) + assert_equal("", str1.byteslice(-1..0)) + assert_equal("llo", str1.byteslice(-3..5)) + assert_equal("\x81\x82a", str2.byteslice(1..3)) + assert_equal("\x81\x82", str2.byteslice(1...3)) + assert_equal("\xE3", str2.byteslice(0..0)) + assert_equal("", str2.byteslice(5..0)) + assert_equal("b", str2.byteslice(4..5)) + assert_equal(nil, str2.byteslice(6..0)) + assert_equal("", str2.byteslice(-1..0)) + assert_equal("\x82ab", str2.byteslice(-3..5)) + + assert_raise(ArgumentError) { str1.byteslice } + assert_raise(ArgumentError) { str1.byteslice(1, 2, 3) } + assert_raise(TypeError) { str1.byteslice("1") } + assert_raise(TypeError) { str1.byteslice("1", 2) } + assert_raise(TypeError) { str1.byteslice(1, "2") } + assert_raise(TypeError) { str1.byteslice(1..2, 3) } + + skip unless Object.const_defined?(:Float) + assert_equal("o", str1.byteslice(4.0)) + assert_equal("\x82ab", str2.byteslice(2.0, 3.0)) end assert('String#dump') do diff --git a/src/string.c b/src/string.c index ed58c484b..f5fb936a6 100644 --- a/src/string.c +++ b/src/string.c @@ -410,8 +410,8 @@ str_make_shared(mrb_state *mrb, struct RString *orig, struct RString *s) } } -static mrb_value -byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) +mrb_value +mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) { struct RString *orig, *s; @@ -434,32 +434,33 @@ str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) beg = chars2bytes(str, 0, beg); len = chars2bytes(str, beg, len); - return byte_subseq(mrb, str, beg, len); + return mrb_str_byte_subseq(mrb, str, beg, len); } #else -#define str_subseq(mrb, str, beg, len) byte_subseq(mrb, str, beg, len) +#define str_subseq(mrb, str, beg, len) mrb_str_byte_subseq(mrb, str, beg, len) #endif -static mrb_value -str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) +mrb_bool +mrb_str_beg_len(mrb_int str_len, mrb_int *begp, mrb_int *lenp) { - mrb_int clen = RSTRING_CHAR_LEN(str); - - if (len < 0) return mrb_nil_value(); - if (clen == 0) { - len = 0; + if (str_len < *begp || *lenp < 0) return FALSE; + if (*begp < 0) { + *begp += str_len; + if (*begp < 0) return FALSE; } - if (beg > clen) return mrb_nil_value(); - if (beg < 0) { - beg += clen; - if (beg < 0) return mrb_nil_value(); + if (*lenp > str_len - *begp) + *lenp = str_len - *begp; + if (*lenp <= 0) { + *lenp = 0; } - if (len > clen - beg) - len = clen - beg; - if (len <= 0) { - len = 0; - } - return str_subseq(mrb, str, beg, len); + return TRUE; +} + +static mrb_value +str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) +{ + return mrb_str_beg_len(RSTRING_CHAR_LEN(str), &beg, &len) ? + str_subseq(mrb, str, beg, len) : mrb_nil_value(); } MRB_API mrb_int @@ -1917,7 +1918,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) } } else if (ISSPACE(c)) { - mrb_ary_push(mrb, result, byte_subseq(mrb, str, beg, end-beg)); + mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, beg, end-beg)); mrb_gc_arena_restore(mrb, ai); skip = TRUE; beg = idx; @@ -1942,7 +1943,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) else { end = chars2bytes(str, idx, 1); } - mrb_ary_push(mrb, result, byte_subseq(mrb, str, idx, end)); + mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, idx, end)); mrb_gc_arena_restore(mrb, ai); idx += end + pat_len; if (lim_p && lim <= ++i) break; @@ -1954,7 +1955,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) tmp = mrb_str_new_empty(mrb, str); } else { - tmp = byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg); + tmp = mrb_str_byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg); } mrb_ary_push(mrb, result, tmp); } -- cgit v1.2.3 From bc3176da630e3e055d58aa065ff897aec66df280 Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Fri, 28 Jun 2019 19:26:29 +0900 Subject: Use `__ENCODING__` in tests It cannot be used for `String#size` test if judging whether or not `MRB_UTF8_STRING` is defined by result of `String#size`. --- mrbgems/mruby-string-ext/test/string.rb | 2 +- mrbgems/mruby-symbol-ext/test/symbol.rb | 2 +- test/t/string.rb | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'mrbgems/mruby-string-ext/test/string.rb') diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index bf633bcef..9a324c46d 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -2,7 +2,7 @@ ## # String(Ext) Test -UTF8STRING = ("\343\201\202".size == 1) +UTF8STRING = __ENCODING__ == "UTF-8" assert('String#getbyte') do str1 = "hello" diff --git a/mrbgems/mruby-symbol-ext/test/symbol.rb b/mrbgems/mruby-symbol-ext/test/symbol.rb index 61ecad247..db686e5f4 100644 --- a/mrbgems/mruby-symbol-ext/test/symbol.rb +++ b/mrbgems/mruby-symbol-ext/test/symbol.rb @@ -14,7 +14,7 @@ end assert("Symbol##{n}") do assert_equal 5, :hello.__send__(n) assert_equal 4, :"aA\0b".__send__(n) - if "あ".size == 1 # enable MRB_UTF8_STRING? + if __ENCODING__ == "UTF-8" assert_equal 8, :"こんにちは世界!".__send__(n) assert_equal 4, :"aあ\0b".__send__(n) else diff --git a/test/t/string.rb b/test/t/string.rb index 7ef236dbe..81699f17e 100644 --- a/test/t/string.rb +++ b/test/t/string.rb @@ -2,7 +2,7 @@ ## # String ISO Test -UTF8STRING = ("\343\201\202".size == 1) +UTF8STRING = __ENCODING__ == "UTF-8" assert('String', '15.2.10') do assert_equal Class, String.class -- cgit v1.2.3