From 35be8b252495d92ca811d76996f03c470ee33380 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Tue, 7 Aug 2018 23:01:22 +0900 Subject: Fixed the corner case bug in `String#{gsub!,sub!}`. `"a".sub!("a", "a")` should not return `nil`. --- mrblib/string.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mrblib/string.rb') diff --git a/mrblib/string.rb b/mrblib/string.rb index ee98cfa0c..07b80b340 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -99,7 +99,7 @@ class String raise FrozenError, "can't modify frozen String" if frozen? return to_enum(:gsub!, *args) if args.length == 1 && !block str = self.gsub(*args, &block) - return nil if str == self + return nil unless self.index(args[0]) self.replace(str) end @@ -161,7 +161,7 @@ class String def sub!(*args, &block) raise FrozenError, "can't modify frozen String" if frozen? str = self.sub(*args, &block) - return nil if str == self + return nil unless self.index(args[0]) self.replace(str) end -- cgit v1.2.3 From ff08856fe314faa4d16b4502c0960a3475387846 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Wed, 19 Sep 2018 21:51:53 +0900 Subject: Remove implicit conversion using `to_str` method; fix #3854 We have added internal convenience method `__to_str` which does string type check. The issue #3854 was fixed but fundamental flaw of lack of stack depth check along with fibers still remains. Use `MRB_GC_FIXED_ARENA` for workaround. --- include/mruby.h | 5 +--- include/mruby/string.h | 3 +-- mrbgems/mruby-io/src/file.c | 2 +- mrbgems/mruby-kernel-ext/src/kernel.c | 8 ++----- mrbgems/mruby-string-ext/mrblib/string.rb | 6 ++--- mrbgems/mruby-string-ext/src/string.c | 6 ++--- mrbgems/mruby-string-ext/test/string.rb | 11 --------- mrblib/string.rb | 11 ++++----- src/class.c | 11 +++++++-- src/kernel.c | 39 +++++-------------------------- src/object.c | 27 +++++++++++++++++++++ src/string.c | 32 ++----------------------- test/t/string.rb | 13 ----------- 13 files changed, 60 insertions(+), 114 deletions(-) (limited to 'mrblib/string.rb') diff --git a/include/mruby.h b/include/mruby.h index 33597101a..12df9cd5a 100644 --- a/include/mruby.h +++ b/include/mruby.h @@ -854,10 +854,6 @@ typedef const char *mrb_args_format; /** * Retrieve arguments from mrb_state. * - * When applicable, implicit conversions (such as `to_str`, `to_ary`, `to_hash`) are - * applied to received arguments. - * Used inside a function of mrb_func_t type. - * * @param mrb The current MRuby state. * @param format [mrb_args_format] is a list of format specifiers * @param ... The passing variadic arguments must be a pointer of retrieving type. @@ -1187,6 +1183,7 @@ MRB_API void mrb_gc_unregister(mrb_state *mrb, mrb_value obj); MRB_API mrb_value mrb_to_int(mrb_state *mrb, mrb_value val); #define mrb_int(mrb, val) mrb_fixnum(mrb_to_int(mrb, val)) +MRB_API mrb_value mrb_to_str(mrb_state *mrb, mrb_value val); MRB_API void mrb_check_type(mrb_state *mrb, mrb_value x, enum mrb_vtype t); typedef enum call_type { diff --git a/include/mruby/string.h b/include/mruby/string.h index 481b2fb38..fa1955f48 100644 --- a/include/mruby/string.h +++ b/include/mruby/string.h @@ -311,8 +311,7 @@ MRB_API mrb_value mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb * @param [mrb_value] str Ruby string. * @return [mrb_value] A Ruby string. */ -MRB_API mrb_value mrb_string_type(mrb_state *mrb, mrb_value str); - +MRB_API mrb_value mrb_ensure_string_type(mrb_state *mrb, mrb_value str); MRB_API mrb_value mrb_check_string_type(mrb_state *mrb, mrb_value str); MRB_API mrb_value mrb_str_new_capa(mrb_state *mrb, size_t capa); MRB_API mrb_value mrb_str_buf_new(mrb_state *mrb, size_t capa); diff --git a/mrbgems/mruby-io/src/file.c b/mrbgems/mruby-io/src/file.c index e65741061..c00663481 100644 --- a/mrbgems/mruby-io/src/file.c +++ b/mrbgems/mruby-io/src/file.c @@ -115,7 +115,7 @@ mrb_file_s_unlink(mrb_state *mrb, mrb_value obj) mrb_get_args(mrb, "*", &argv, &argc); for (i = 0; i < argc; i++) { const char *utf8_path; - pathv = mrb_convert_type(mrb, argv[i], MRB_TT_STRING, "String", "to_str"); + pathv = mrb_ensure_string_type(mrb, argv[i]); utf8_path = mrb_string_value_cstr(mrb, &pathv); path = mrb_locale_from_utf8(utf8_path, -1); if (UNLINK(path) < 0) { diff --git a/mrbgems/mruby-kernel-ext/src/kernel.c b/mrbgems/mruby-kernel-ext/src/kernel.c index a60e9a210..bc2656399 100644 --- a/mrbgems/mruby-kernel-ext/src/kernel.c +++ b/mrbgems/mruby-kernel-ext/src/kernel.c @@ -141,8 +141,7 @@ mrb_f_float(mrb_state *mrb, mrb_value self) * String(arg) -> string * * Returns arg as an String. - * - * First tries to call its to_str method, then its to_s method. + * converted using to_s method. * * String(self) #=> "main" * String(self.class) #=> "Object" @@ -154,10 +153,7 @@ mrb_f_string(mrb_state *mrb, mrb_value self) mrb_value arg, tmp; mrb_get_args(mrb, "o", &arg); - tmp = mrb_check_convert_type(mrb, arg, MRB_TT_STRING, "String", "to_str"); - if (mrb_nil_p(tmp)) { - tmp = mrb_check_convert_type(mrb, arg, MRB_TT_STRING, "String", "to_s"); - } + tmp = mrb_convert_type(mrb, arg, MRB_TT_STRING, "String", "to_s"); return tmp; } diff --git a/mrbgems/mruby-string-ext/mrblib/string.rb b/mrbgems/mruby-string-ext/mrblib/string.rb index 27ca30610..9212d83a5 100644 --- a/mrbgems/mruby-string-ext/mrblib/string.rb +++ b/mrbgems/mruby-string-ext/mrblib/string.rb @@ -12,8 +12,8 @@ class String # String.try_convert(/re/) #=> nil # def self.try_convert(obj) - if obj.respond_to?(:to_str) - obj.to_str + if self === obj + obj else nil end @@ -142,7 +142,7 @@ class String # "abcdef".casecmp("ABCDEF") #=> 0 # def casecmp(str) - self.downcase <=> str.to_str.downcase + self.downcase <=> str.__to_str.downcase rescue NoMethodError nil end diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index 6d661c352..ba7e3c610 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -163,7 +163,7 @@ mrb_str_concat_m(mrb_state *mrb, mrb_value self) if (mrb_fixnum_p(str)) str = mrb_fixnum_chr(mrb, str); else - str = mrb_string_type(mrb, str); + str = mrb_ensure_string_type(mrb, str); mrb_str_concat(mrb, self, str); return self; } @@ -191,7 +191,7 @@ mrb_str_start_with(mrb_state *mrb, mrb_value self) for (i = 0; i < argc; i++) { size_t len_l, len_r; int ai = mrb_gc_arena_save(mrb); - sub = mrb_string_type(mrb, argv[i]); + sub = mrb_ensure_string_type(mrb, argv[i]); mrb_gc_arena_restore(mrb, ai); len_l = RSTRING_LEN(self); len_r = RSTRING_LEN(sub); @@ -220,7 +220,7 @@ mrb_str_end_with(mrb_state *mrb, mrb_value self) for (i = 0; i < argc; i++) { size_t len_l, len_r; int ai = mrb_gc_arena_save(mrb); - sub = mrb_string_type(mrb, argv[i]); + sub = mrb_ensure_string_type(mrb, argv[i]); mrb_gc_arena_restore(mrb, ai); len_l = RSTRING_LEN(self); len_r = RSTRING_LEN(sub); diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index 781506949..4ccdfd6c3 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -114,12 +114,6 @@ assert('String#concat') do assert_equal "Hello World!", "Hello " << "World" << 33 assert_equal "Hello World!", "Hello ".concat("World").concat(33) - o = Object.new - def o.to_str - "to_str" - end - assert_equal "hi to_str", "hi " << o - assert_raise(TypeError) { "".concat(Object.new) } end @@ -128,11 +122,6 @@ assert('String#casecmp') do assert_equal 0, "aBcDeF".casecmp("abcdef") assert_equal(-1, "abcdef".casecmp("abcdefg")) assert_equal 0, "abcdef".casecmp("ABCDEF") - o = Object.new - def o.to_str - "ABCDEF" - end - assert_equal 0, "abcdef".casecmp(o) end assert('String#count') do diff --git a/mrblib/string.rb b/mrblib/string.rb index 07b80b340..397603e9d 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -12,7 +12,7 @@ class String def each_line(rs = "\n", &block) return to_enum(:each_line, rs, &block) unless block return block.call(self) if rs.nil? - rs = rs.to_str + rs = rs.__to_str offset = 0 rs_len = rs.length this = dup @@ -67,7 +67,7 @@ class String block = nil end if !replace.nil? || !block - replace = replace.to_str + replace = replace.__to_str end offset = 0 result = [] @@ -129,12 +129,12 @@ class String end pattern, replace = *args - pattern = pattern.to_str + pattern = pattern.__to_str if args.length == 2 && block block = nil end unless block - replace = replace.to_str + replace = replace.__to_str end result = [] this = dup @@ -245,14 +245,13 @@ class String ## # ISO 15.2.10.5.3 def =~(re) - raise TypeError, "type mismatch: String given" if re.respond_to? :to_str re =~ self end ## # ISO 15.2.10.5.27 def match(re, &block) - if re.respond_to? :to_str + if String === re if Object.const_defined?(:Regexp) r = Regexp.new(re) r.match(self, &block) diff --git a/src/class.c b/src/class.c index 50ab0ea59..90c73104e 100644 --- a/src/class.c +++ b/src/class.c @@ -504,10 +504,17 @@ check_type(mrb_state *mrb, mrb_value val, enum mrb_vtype t, const char *c, const return tmp; } +#define CHECK_TYPE(mrb, val, t, c) do { \ + if (mrb_type(val) != (t)) {\ + mrb_raisef(mrb, E_TYPE_ERROR, "expected %S", mrb_str_new_lit(mrb, c));\ + }\ +} while (0) + static mrb_value to_str(mrb_state *mrb, mrb_value val) { - return check_type(mrb, val, MRB_TT_STRING, "String", "to_str"); + CHECK_TYPE(mrb, val, MRB_TT_STRING, "String"); + return val; } static mrb_value @@ -1972,7 +1979,7 @@ mrb_mod_const_get(mrb_state *mrb, mrb_value mod) } /* const get with class path string */ - path = mrb_string_type(mrb, path); + path = mrb_ensure_string_type(mrb, path); ptr = RSTRING_PTR(path); len = RSTRING_LEN(path); off = 0; diff --git a/src/kernel.c b/src/kernel.c index 195594d6b..ce9cd1d44 100644 --- a/src/kernel.c +++ b/src/kernel.c @@ -746,6 +746,7 @@ basic_obj_respond_to(mrb_state *mrb, mrb_value obj, mrb_sym id, int pub) { return mrb_respond_to(mrb, obj, id); } + /* 15.3.1.3.43 */ /* * call-seq: @@ -765,45 +766,16 @@ basic_obj_respond_to(mrb_state *mrb, mrb_value obj, mrb_sym id, int pub) static mrb_value obj_respond_to(mrb_state *mrb, mrb_value self) { - mrb_value mid; mrb_sym id, rtm_id; - mrb_bool priv = FALSE, respond_to_p = TRUE; - - mrb_get_args(mrb, "o|b", &mid, &priv); - - if (mrb_symbol_p(mid)) { - id = mrb_symbol(mid); - } - else { - mrb_value tmp; - if (mrb_string_p(mid)) { - tmp = mrb_check_intern_str(mrb, mid); - } - else { - tmp = mrb_check_string_type(mrb, mid); - if (mrb_nil_p(tmp)) { - tmp = mrb_inspect(mrb, mid); - mrb_raisef(mrb, E_TYPE_ERROR, "%S is not a symbol", tmp); - } - tmp = mrb_check_intern_str(mrb, tmp); - } - if (mrb_nil_p(tmp)) { - respond_to_p = FALSE; - } - else { - id = mrb_symbol(tmp); - } - } - - if (respond_to_p) { - respond_to_p = basic_obj_respond_to(mrb, self, id, !priv); - } + mrb_bool priv = FALSE, respond_to_p; + mrb_get_args(mrb, "n|b", &id, &priv); + respond_to_p = basic_obj_respond_to(mrb, self, id, !priv); if (!respond_to_p) { rtm_id = mrb_intern_lit(mrb, "respond_to_missing?"); if (basic_obj_respond_to(mrb, self, rtm_id, !priv)) { mrb_value args[2], v; - args[0] = mid; + args[0] = mrb_symbol_value(id); args[1] = mrb_bool_value(priv); v = mrb_funcall_argv(mrb, self, rtm_id, 2, args); return mrb_bool_value(mrb_bool(v)); @@ -873,6 +845,7 @@ mrb_init_kernel(mrb_state *mrb) mrb_define_method(mrb, krn, "to_s", mrb_any_to_s, MRB_ARGS_NONE()); /* 15.3.1.3.46 */ mrb_define_method(mrb, krn, "__case_eqq", mrb_obj_ceqq, MRB_ARGS_REQ(1)); /* internal */ mrb_define_method(mrb, krn, "__to_int", mrb_to_int, MRB_ARGS_NONE()); /* internal */ + mrb_define_method(mrb, krn, "__to_str", mrb_to_str, MRB_ARGS_NONE()); /* internal */ mrb_define_method(mrb, krn, "class_defined?", mrb_krn_class_defined, MRB_ARGS_REQ(1)); diff --git a/src/object.c b/src/object.c index ba6fa3947..18ccacfb9 100644 --- a/src/object.c +++ b/src/object.c @@ -579,6 +579,33 @@ mrb_Float(mrb_state *mrb, mrb_value val) } #endif +MRB_API mrb_value +mrb_to_str(mrb_state *mrb, mrb_value val) +{ + if (!mrb_string_p(val)) { + mrb_value type = inspect_type(mrb, val); + mrb_raisef(mrb, E_TYPE_ERROR, "can't convert %S to String", type); + } + return val; +} + +MRB_API mrb_value +mrb_ensure_string_type(mrb_state *mrb, mrb_value str) +{ + if (!mrb_string_p(str)) { + mrb_raisef(mrb, E_TYPE_ERROR, "%S cannot be converted to String", + inspect_type(mrb, str)); + } + return str; +} + +MRB_API mrb_value +mrb_check_string_type(mrb_state *mrb, mrb_value str) +{ + if (!mrb_string_p(str)) return mrb_nil_value(); + return str; +} + MRB_API mrb_value mrb_inspect(mrb_state *mrb, mrb_value obj) { diff --git a/src/string.c b/src/string.c index b7abfb762..b6d4ecef0 100644 --- a/src/string.c +++ b/src/string.c @@ -956,15 +956,7 @@ str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2) MRB_API mrb_bool mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2) { - if (mrb_immediate_p(str2)) return FALSE; - if (!mrb_string_p(str2)) { - if (mrb_nil_p(str2)) return FALSE; - if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "to_str"))) { - return FALSE; - } - str2 = mrb_funcall(mrb, str2, "to_str", 0); - return mrb_equal(mrb, str2, str1); - } + if (!mrb_string_p(str2)) return FALSE; return str_eql(mrb, str1, str2); } @@ -992,14 +984,8 @@ mrb_str_equal_m(mrb_state *mrb, mrb_value str1) MRB_API mrb_value mrb_str_to_str(mrb_state *mrb, mrb_value str) { - mrb_value s; - if (!mrb_string_p(str)) { - s = mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); - if (mrb_nil_p(s)) { - s = mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s"); - } - return s; + return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s"); } return str; } @@ -1714,18 +1700,6 @@ mrb_ptr_to_str(mrb_state *mrb, void *p) return mrb_obj_value(p_str); } -MRB_API mrb_value -mrb_string_type(mrb_state *mrb, mrb_value str) -{ - return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); -} - -MRB_API mrb_value -mrb_check_string_type(mrb_state *mrb, mrb_value str) -{ - return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); -} - /* 15.2.10.5.30 */ /* * call-seq: @@ -2379,7 +2353,6 @@ mrb_str_to_f(mrb_state *mrb, mrb_value self) /* * call-seq: * str.to_s => str - * str.to_str => str * * Returns the receiver. */ @@ -2783,7 +2756,6 @@ mrb_init_string(mrb_state *mrb) #endif mrb_define_method(mrb, s, "to_i", mrb_str_to_i, MRB_ARGS_ANY()); /* 15.2.10.5.39 */ mrb_define_method(mrb, s, "to_s", mrb_str_to_s, MRB_ARGS_NONE()); /* 15.2.10.5.40 */ - mrb_define_method(mrb, s, "to_str", mrb_str_to_s, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "to_sym", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.41 */ mrb_define_method(mrb, s, "upcase", mrb_str_upcase, MRB_ARGS_NONE()); /* 15.2.10.5.42 */ mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.43 */ diff --git a/test/t/string.rb b/test/t/string.rb index e91b915fe..3a1eced16 100644 --- a/test/t/string.rb +++ b/test/t/string.rb @@ -253,19 +253,6 @@ assert('String#chomp!', '15.2.10.5.10') do assert_equal 'abc', e end -assert('String#chomp! uses the correct length') do - class A - def to_str - $s.replace("AA") - "A" - end - end - - $s = "AAA" - $s.chomp!(A.new) - assert_equal $s, "A" -end - assert('String#chop', '15.2.10.5.11') do a = ''.chop b = 'abc'.chop -- cgit v1.2.3 From c022e4643f2b6c84cb3f1ca716c0e7da3f14c8ca Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Mon, 19 Nov 2018 12:01:34 +0900 Subject: Avoid assignments from type checking `String#__to_str`. --- mrblib/string.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'mrblib/string.rb') diff --git a/mrblib/string.rb b/mrblib/string.rb index 397603e9d..64e85c5b6 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -12,7 +12,7 @@ class String def each_line(rs = "\n", &block) return to_enum(:each_line, rs, &block) unless block return block.call(self) if rs.nil? - rs = rs.__to_str + rs.__to_str offset = 0 rs_len = rs.length this = dup @@ -67,7 +67,7 @@ class String block = nil end if !replace.nil? || !block - replace = replace.__to_str + replace.__to_str end offset = 0 result = [] @@ -129,12 +129,12 @@ class String end pattern, replace = *args - pattern = pattern.__to_str + pattern.__to_str if args.length == 2 && block block = nil end unless block - replace = replace.__to_str + replace.__to_str end result = [] this = dup -- cgit v1.2.3 From 292bffb32e7434b4725f39924adf0e9f8af5d1cf Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Mon, 8 Apr 2019 20:27:28 +0900 Subject: Avoid infinite loop when no `Regexp` class is available; fix #4363 --- mrblib/string.rb | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'mrblib/string.rb') diff --git a/mrblib/string.rb b/mrblib/string.rb index 64e85c5b6..62c925885 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -242,25 +242,27 @@ class String end end + def _regexp(re, mid) + if String === re + if Object.const_defined?(:Regexp) + return Regexp.new(re) + else + raise NotImplementedError, "String##{mid} needs Regexp class" + end + end + re + end + ## # ISO 15.2.10.5.3 def =~(re) - re =~ self + _regexp(re, :=~) =~ self end ## # ISO 15.2.10.5.27 def match(re, &block) - if String === re - if Object.const_defined?(:Regexp) - r = Regexp.new(re) - r.match(self, &block) - else - raise NotImplementedError, "String#match needs Regexp class" - end - else - re.match(self, &block) - end + _regexp(re, :match).match(self, &block) end end -- cgit v1.2.3 From 916045921e629b59264a6f1e00e2347e64ef85cb Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Thu, 18 Apr 2019 19:58:05 +0900 Subject: Remove duplicated `include Comparable` in `mrblib/string.rb` --- mrblib/string.rb | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'mrblib/string.rb') diff --git a/mrblib/string.rb b/mrblib/string.rb index 62c925885..9ad8e8e73 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -3,7 +3,9 @@ # # ISO 15.2.10 class String + # ISO 15.2.10.3 include Comparable + ## # Calls the given block for each line # and pass the respective line. @@ -265,12 +267,3 @@ class String _regexp(re, :match).match(self, &block) end end - -## -# String is comparable -# -# ISO 15.2.10.3 -module Comparable; end -class String - include Comparable -end -- cgit v1.2.3 From 4a8b88f7757f71d7d88b89764b533dd5ba59fd44 Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Fri, 19 Apr 2019 20:14:23 +0900 Subject: Add type check (conversion) in `String#[]=` Before this patch: 'a'[0] = 1 #=> 1 'a'[:a] = '1' #=> ArgumentError 'a'[:a, 0] = '1' #=> ArgumentError 'a'[0, :a] = '1' #=> ArgumentError 'a'[0, 1] = 1 #=> 1 After this patch / Ruby: 'a'[0] = 1 #=> TypeError 'a'[:a] = '1' #=> TypeError 'a'[:a, 0] = '1' #=> TypeError 'a'[0, :a] = '1' #=> TypeError 'a'[0, 1] = 1 #=> TypeError --- mrblib/string.rb | 11 ++++++----- test/t/string.rb | 7 +++++++ 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'mrblib/string.rb') diff --git a/mrblib/string.rb b/mrblib/string.rb index 9ad8e8e73..e9eb2be1d 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -197,12 +197,12 @@ class String def []=(*args) anum = args.size if anum == 2 - pos, value = args + pos, value = args[0], args[1].__to_str case pos when String posnum = self.index(pos) if posnum - b = self[0, posnum.to_i] + b = self[0, posnum] a = self[(posnum + pos.length)..-1] self.replace([b, value, a].join('')) else @@ -217,17 +217,18 @@ class String end return self[head, tail-head]=value else + pos = pos.__to_int pos += self.length if pos < 0 if pos < 0 || pos > self.length raise IndexError, "index #{args[0]} out of string" end - b = self[0, pos.to_i] + b = self[0, pos] a = self[pos + 1..-1] self.replace([b, value, a].join('')) end return value elsif anum == 3 - pos, len, value = args + pos, len, value = args[0].__to_int, args[1].__to_int, args[2].__to_str pos += self.length if pos < 0 if pos < 0 || pos > self.length raise IndexError, "index #{args[0]} out of string" @@ -235,7 +236,7 @@ class String if len < 0 raise IndexError, "negative length #{len}" end - b = self[0, pos.to_i] + b = self[0, pos] a = self[pos + len..-1] self.replace([b, value, a].join('')) return value diff --git a/test/t/string.rb b/test/t/string.rb index cf3702cbe..404cf03e1 100644 --- a/test/t/string.rb +++ b/test/t/string.rb @@ -161,6 +161,9 @@ assert('String#[]=') do assert_equal 'aXc', e end + assert_raise(TypeError) { 'a'[0] = 1 } + assert_raise(TypeError) { 'a'[:a] = '1' } + # length of args is 2 a1 = 'abc' assert_raise(IndexError) do @@ -197,6 +200,10 @@ assert('String#[]=') do assert_raise(IndexError) do b3['XX'] = 'Y' end + + assert_raise(TypeError) { 'a'[:a, 0] = '1' } + assert_raise(TypeError) { 'a'[0, :a] = '1' } + assert_raise(TypeError) { 'a'[0, 1] = 1 } end assert('String#capitalize', '15.2.10.5.7') do -- cgit v1.2.3 From cdb458ed4e07698ecb028bfe397fa273ed454e13 Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Sun, 21 Apr 2019 20:34:39 +0900 Subject: Commented out `String#scan` because it is not implemented yet --- mrbgems/mruby-enumerator/mrblib/enumerator.rb | 7 ++++--- mrblib/string.rb | 21 +++++++++------------ test/t/string.rb | 4 +++- 3 files changed, 16 insertions(+), 16 deletions(-) (limited to 'mrblib/string.rb') diff --git a/mrbgems/mruby-enumerator/mrblib/enumerator.rb b/mrbgems/mruby-enumerator/mrblib/enumerator.rb index 457687268..89472ef01 100644 --- a/mrbgems/mruby-enumerator/mrblib/enumerator.rb +++ b/mrbgems/mruby-enumerator/mrblib/enumerator.rb @@ -244,9 +244,10 @@ class Enumerator # # === Examples # - # "Hello, world!".scan(/\w+/) #=> ["Hello", "world"] - # "Hello, world!".to_enum(:scan, /\w+/).to_a #=> ["Hello", "world"] - # "Hello, world!".to_enum(:scan).each(/\w+/).to_a #=> ["Hello", "world"] + # Array.new(3) #=> [nil, nil, nil] + # Array.new(3) { |i| i } #=> [0, 1, 2] + # Array.to_enum(:new, 3).to_a #=> [0, 1, 2] + # Array.to_enum(:new).each(3).to_a #=> [0, 1, 2] # # obj = Object.new # diff --git a/mrblib/string.rb b/mrblib/string.rb index e9eb2be1d..c92a9e7be 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -105,18 +105,15 @@ class String self.replace(str) end - ## - # Calls the given block for each match of +pattern+ - # If no block is given return an array with all - # matches of +pattern+. - # - # ISO 15.2.10.5.32 - def scan(reg, &block) - ### *** TODO *** ### - unless Object.const_defined?(:Regexp) - raise NotImplementedError, "scan not available (yet)" - end - end +# ## +# # Calls the given block for each match of +pattern+ +# # If no block is given return an array with all +# # matches of +pattern+. +# # +# # ISO 15.2.10.5.32 +# def scan(pattern, &block) +# # TODO: String#scan is not implemented yet +# end ## # Replace only the first match of +pattern+ with diff --git a/test/t/string.rb b/test/t/string.rb index 404cf03e1..e563db55a 100644 --- a/test/t/string.rb +++ b/test/t/string.rb @@ -509,7 +509,9 @@ assert('String#rindex(UTF-8)', '15.2.10.5.31') do assert_equal nil, str.index("さ") end if UTF8STRING -# 'String#scan', '15.2.10.5.32' will be tested in mrbgems. +# assert('String#scan', '15.2.10.5.32') do +# # Not implemented yet +# end assert('String#size', '15.2.10.5.33') do assert_equal 3, 'abc'.size -- cgit v1.2.3 From 270131253f62d806ea480ef4793e0b39cd068ee4 Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Sat, 27 Apr 2019 12:50:02 +0900 Subject: Remove duplicated `String#each_char` --- mrbgems/mruby-io/test/io.rb | 4 ++-- mrbgems/mruby-method/test/method.rb | 2 +- mrbgems/mruby-string-ext/mrblib/string.rb | 10 +++++++--- mrbgems/mruby-string-ext/test/string.rb | 12 ++++++------ mrblib/string.rb | 12 ------------ 5 files changed, 16 insertions(+), 24 deletions(-) (limited to 'mrblib/string.rb') diff --git a/mrbgems/mruby-io/test/io.rb b/mrbgems/mruby-io/test/io.rb index 44eaca6be..2b3f9cf13 100644 --- a/mrbgems/mruby-io/test/io.rb +++ b/mrbgems/mruby-io/test/io.rb @@ -84,7 +84,7 @@ end assert('IO#getc', '15.2.20.5.8') do io = IO.new(IO.sysopen($mrbtest_io_rfname)) - $mrbtest_io_msg.each_char { |ch| + $mrbtest_io_msg.split("").each { |ch| assert_equal ch, io.getc } assert_equal nil, io.getc @@ -127,7 +127,7 @@ end assert('IO#readchar', '15.2.20.5.15') do # almost same as IO#getc IO.open(IO.sysopen($mrbtest_io_rfname)) do |io| - $mrbtest_io_msg.each_char { |ch| + $mrbtest_io_msg.split("").each { |ch| assert_equal ch, io.readchar } assert_raise(EOFError) do diff --git a/mrbgems/mruby-method/test/method.rb b/mrbgems/mruby-method/test/method.rb index dfddde9cc..0b67d3e61 100644 --- a/mrbgems/mruby-method/test/method.rb +++ b/mrbgems/mruby-method/test/method.rb @@ -21,7 +21,7 @@ class Interpreter } def interpret(string) @ret = "" - string.each_char {|b| Dispatcher[b].bind(self).call } + string.split("").each {|b| Dispatcher[b].bind(self).call } end end diff --git a/mrbgems/mruby-string-ext/mrblib/string.rb b/mrbgems/mruby-string-ext/mrblib/string.rb index 311803ea2..fdaf2f960 100644 --- a/mrbgems/mruby-string-ext/mrblib/string.rb +++ b/mrbgems/mruby-string-ext/mrblib/string.rb @@ -310,11 +310,15 @@ class String end end + ## + # Call the given block for each character of + # +self+. def each_char(&block) return to_enum :each_char unless block - - split('').each do |i| - block.call(i) + pos = 0 + while pos < self.size + block.call(self[pos]) + pos += 1 end self end diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index 44ca1fde2..02777e594 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -657,19 +657,19 @@ assert('String#chars(UTF-8)') do end if UTF8STRING assert('String#each_char') do - s = "" + chars = [] "hello!".each_char do |x| - s += x + chars << x end - assert_equal "hello!", s + assert_equal ["h", "e", "l", "l", "o", "!"], chars end assert('String#each_char(UTF-8)') do - s = "" + chars = [] "こんにちは世界!".each_char do |x| - s += x + chars << x end - assert_equal "こんにちは世界!", s + assert_equal ["こ", "ん", "に", "ち", "は", "世", "界", "!"], chars end if UTF8STRING assert('String#codepoints') do diff --git a/mrblib/string.rb b/mrblib/string.rb index c92a9e7be..506f23c83 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -164,18 +164,6 @@ class String self.replace(str) end - ## - # Call the given block for each character of - # +self+. - def each_char(&block) - pos = 0 - while pos < self.size - block.call(self[pos]) - pos += 1 - end - self - end - ## # Call the given block for each byte of +self+. def each_byte(&block) -- cgit v1.2.3 From fd37bc53deb2d52fe3134838eab002dfb9ac35ab Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Wed, 15 May 2019 09:55:21 +0900 Subject: Remove `String#=~` and `String#match` that requires `Regexp`. --- mrblib/string.rb | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) (limited to 'mrblib/string.rb') diff --git a/mrblib/string.rb b/mrblib/string.rb index 506f23c83..42d594055 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -230,26 +230,16 @@ class String end end - def _regexp(re, mid) - if String === re - if Object.const_defined?(:Regexp) - return Regexp.new(re) - else - raise NotImplementedError, "String##{mid} needs Regexp class" - end - end - re - end - + # those two methods requires Regexp that is optional in mruby ## # ISO 15.2.10.5.3 - def =~(re) - _regexp(re, :=~) =~ self - end + #def =~(re) + # re =~ self + #end ## # ISO 15.2.10.5.27 - def match(re, &block) - _regexp(re, :match).match(self, &block) - end + #def match(re, &block) + # re.match(self, &block) + #end end -- cgit v1.2.3 From 780342eddb880a1c0a457311b005398ce36d0c9f Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Wed, 15 May 2019 14:59:48 +0900 Subject: Add Enumerator support to `String#each_byte`. `String#each_byte` is not defined in ISO Ruby but it is implemented in the core mruby because it's useful. --- mrblib/string.rb | 1 + 1 file changed, 1 insertion(+) (limited to 'mrblib/string.rb') diff --git a/mrblib/string.rb b/mrblib/string.rb index 42d594055..b0fe4033e 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -167,6 +167,7 @@ class String ## # Call the given block for each byte of +self+. def each_byte(&block) + return to_enum(:each_byte, &block) unless block bytes = self.bytes pos = 0 while pos < bytes.size -- cgit v1.2.3 From 0d452073f46fc46496200db610ce785e514cdb65 Mon Sep 17 00:00:00 2001 From: dearblue Date: Tue, 21 May 2019 22:02:11 +0900 Subject: Replace `String#[]=` method by C implements The purpose is to eliminate string objects that are temporarily created during processing. --- mrblib/string.rb | 54 ----------------- src/string.c | 176 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 173 insertions(+), 57 deletions(-) (limited to 'mrblib/string.rb') diff --git a/mrblib/string.rb b/mrblib/string.rb index b0fe4033e..c26cdb1e2 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -177,60 +177,6 @@ class String self end - ## - # Modify +self+ by replacing the content of +self+. - # The portion of the string affected is determined using the same criteria as +String#[]+. - def []=(*args) - anum = args.size - if anum == 2 - pos, value = args[0], args[1].__to_str - case pos - when String - posnum = self.index(pos) - if posnum - b = self[0, posnum] - a = self[(posnum + pos.length)..-1] - self.replace([b, value, a].join('')) - else - raise IndexError, "string not matched" - end - when Range - head = pos.begin - tail = pos.end - tail += self.length if tail < 0 - unless pos.exclude_end? - tail += 1 - end - return self[head, tail-head]=value - else - pos = pos.__to_int - pos += self.length if pos < 0 - if pos < 0 || pos > self.length - raise IndexError, "index #{args[0]} out of string" - end - b = self[0, pos] - a = self[pos + 1..-1] - self.replace([b, value, a].join('')) - end - return value - elsif anum == 3 - pos, len, value = args[0].__to_int, args[1].__to_int, args[2].__to_str - pos += self.length if pos < 0 - if pos < 0 || pos > self.length - raise IndexError, "index #{args[0]} out of string" - end - if len < 0 - raise IndexError, "negative length #{len}" - end - b = self[0, pos] - a = self[pos + len..-1] - self.replace([b, value, a].join('')) - return value - else - raise ArgumentError, "wrong number of arguments (#{anum} for 2..3)" - end - end - # those two methods requires Regexp that is optional in mruby ## # ISO 15.2.10.5.3 diff --git a/src/string.c b/src/string.c index c8a9e61ec..43bf8a841 100644 --- a/src/string.c +++ b/src/string.c @@ -427,13 +427,18 @@ mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) } return mrb_obj_value(s); } + +static void +str_range_to_bytes(mrb_value str, mrb_int *pos, mrb_int *len) +{ + *pos = chars2bytes(str, 0, *pos); + *len = chars2bytes(str, *pos, *len); +} #ifdef MRB_UTF8_STRING static inline mrb_value str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) { - beg = chars2bytes(str, 0, beg); - len = chars2bytes(str, beg, len); - + str_range_to_bytes(str, &beg, &len); return mrb_str_byte_subseq(mrb, str, beg, len); } #else @@ -1010,6 +1015,68 @@ mrb_str_dup(mrb_state *mrb, mrb_value str) return str_replace(mrb, dup, s); } +enum str_convert_range { + /* `beg` and `len` are byte unit in `0 ... str.bytesize` */ + STR_BYTE_RANGE_CORRECTED = 1, + + /* `beg` and `len` are char unit in any range */ + STR_CHAR_RANGE = 2, + + /* `beg` and `len` are char unit in `0 ... str.size` */ + STR_CHAR_RANGE_CORRECTED = 3, + + /* `beg` is out of range */ + STR_OUT_OF_RANGE = -1 +}; + +static enum str_convert_range +str_convert_range(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_int *beg, mrb_int *len) +{ + if (!mrb_undef_p(alen)) { + *beg = mrb_int(mrb, indx); + *len = mrb_int(mrb, alen); + return STR_CHAR_RANGE; + } + else { + switch (mrb_type(indx)) { + case MRB_TT_FIXNUM: + *beg = mrb_fixnum(indx); + *len = 1; + return STR_CHAR_RANGE; + + case MRB_TT_STRING: + *beg = str_index_str(mrb, str, indx, 0); + if (*beg < 0) { break; } + *len = RSTRING_LEN(indx); + return STR_BYTE_RANGE_CORRECTED; + + case MRB_TT_RANGE: + goto range_arg; + + default: + indx = mrb_to_int(mrb, indx); + if (mrb_fixnum_p(indx)) { + *beg = mrb_fixnum(indx); + *len = 1; + return STR_CHAR_RANGE; + } +range_arg: + *len = RSTRING_CHAR_LEN(str); + switch (mrb_range_beg_len(mrb, indx, beg, len, *len, TRUE)) { + case MRB_RANGE_OK: + return STR_CHAR_RANGE_CORRECTED; + case MRB_RANGE_OUT: + return STR_OUT_OF_RANGE; + default: + break; + } + + mrb_raise(mrb, E_TYPE_ERROR, "can't convert to Fixnum"); + } + } + return STR_OUT_OF_RANGE; +} + static mrb_value mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx) { @@ -1113,6 +1180,108 @@ mrb_str_aref_m(mrb_state *mrb, mrb_value str) return mrb_str_aref(mrb, str, a1); } +static mrb_noreturn void +str_out_of_index(mrb_state *mrb, mrb_value index) +{ + mrb_raisef(mrb, E_INDEX_ERROR, "index %S out of string", index); +} + +static mrb_value +str_replace_partial(mrb_state *mrb, mrb_value src, mrb_int pos, mrb_int end, mrb_value rep) +{ + const mrb_int shrink_threshold = 256; + struct RString *str = mrb_str_ptr(src); + mrb_int len = RSTR_LEN(str); + mrb_int replen, newlen; + char *strp; + + if (end > len) { end = len; } + + if (pos < 0 || pos > len) { + str_out_of_index(mrb, mrb_fixnum_value(pos)); + } + + replen = (mrb_nil_p(rep) ? 0 : RSTRING_LEN(rep)); + newlen = replen + len - (end - pos); + + if (newlen >= MRB_INT_MAX || newlen < replen /* overflowed */) { + mrb_raise(mrb, E_RUNTIME_ERROR, "string size too big"); + } + + mrb_str_modify(mrb, str); + + if (len < newlen || len - newlen >= shrink_threshold) { + resize_capa(mrb, str, newlen); + } + + strp = RSTR_PTR(str); + + memmove(strp + newlen - (len - end), strp + end, len - end); + if (!mrb_nil_p(rep)) { + memcpy(strp + pos, RSTRING_PTR(rep), replen); + } + RSTR_SET_LEN(str, newlen); + strp[newlen] = '\0'; + + return src; +} + +static void +mrb_str_aset(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_value replace) +{ + mrb_int beg, len, charlen; + + replace = mrb_to_str(mrb, replace); + + switch (str_convert_range(mrb, str, indx, alen, &beg, &len)) { + case STR_OUT_OF_RANGE: + default: + mrb_raise(mrb, E_INDEX_ERROR, "string not matched"); + case STR_CHAR_RANGE: + if (len < 0) { + mrb_raisef(mrb, E_INDEX_ERROR, "negative length %S", alen); + } + charlen = RSTRING_CHAR_LEN(str); + if (beg < 0) { beg += charlen; } + if (beg < 0 || beg > charlen) { str_out_of_index(mrb, indx); } + /* fall through */ + case STR_CHAR_RANGE_CORRECTED: + str_range_to_bytes(str, &beg, &len); + /* fall through */ + case STR_BYTE_RANGE_CORRECTED: + str_replace_partial(mrb, str, beg, beg + len, replace); + } +} + +/* + * call-seq: + * str[fixnum] = replace + * str[fixnum, fixnum] = replace + * str[range] = replace + * str[regexp] = replace + * str[regexp, fixnum] = replace + * str[other_str] = replace + * + * Modify +self+ by replacing the content of +self+. + * The portion of the string affected is determined using the same criteria as +String#[]+. + */ +static mrb_value +mrb_str_aset_m(mrb_state *mrb, mrb_value str) +{ + mrb_value indx, alen, replace; + + switch (mrb_get_args(mrb, "oo|S!", &indx, &alen, &replace)) { + case 2: + replace = alen; + alen = mrb_undef_value(); + break; + case 3: + break; + } + mrb_str_aset(mrb, str, indx, alen, replace); + return str; +} + /* 15.2.10.5.8 */ /* * call-seq: @@ -2678,6 +2847,7 @@ mrb_init_string(mrb_state *mrb) mrb_define_method(mrb, s, "+", mrb_str_plus_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.4 */ mrb_define_method(mrb, s, "*", mrb_str_times, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */ mrb_define_method(mrb, s, "[]", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.6 */ + mrb_define_method(mrb, s, "[]=", mrb_str_aset_m, MRB_ARGS_ANY()); mrb_define_method(mrb, s, "capitalize", mrb_str_capitalize, MRB_ARGS_NONE()); /* 15.2.10.5.7 */ mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, MRB_ARGS_NONE()); /* 15.2.10.5.8 */ mrb_define_method(mrb, s, "chomp", mrb_str_chomp, MRB_ARGS_ANY()); /* 15.2.10.5.9 */ -- cgit v1.2.3