diff options
| author | Yukihiro "Matz" Matsumoto <[email protected]> | 2019-06-29 21:36:48 +0900 |
|---|---|---|
| committer | GitHub <[email protected]> | 2019-06-29 21:36:48 +0900 |
| commit | ff5ec824fd31bf28e67b0b984d2596a240686cd2 (patch) | |
| tree | 30c65f666f33ba84b0ec66ad71cc759b59e24c9d | |
| parent | 4f4e240024f4e1980a64bc13bcc7fbf518533cbb (diff) | |
| parent | 0ad1cacff30c744fcf07869990cbab7bc51c4c68 (diff) | |
| download | mruby-ff5ec824fd31bf28e67b0b984d2596a240686cd2.tar.gz mruby-ff5ec824fd31bf28e67b0b984d2596a240686cd2.zip | |
Merge pull request #4541 from dearblue/replace-string-aset
Replace `String#[]=` method by C implements
| -rw-r--r-- | mrblib/string.rb | 54 | ||||
| -rw-r--r-- | src/string.c | 228 | ||||
| -rw-r--r-- | test/t/string.rb | 50 |
3 files changed, 236 insertions, 96 deletions
diff --git a/mrblib/string.rb b/mrblib/string.rb index b0fe4033e..c26cdb1e2 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -177,60 +177,6 @@ class String self end - ## - # Modify +self+ by replacing the content of +self+. - # The portion of the string affected is determined using the same criteria as +String#[]+. - def []=(*args) - anum = args.size - if anum == 2 - pos, value = args[0], args[1].__to_str - case pos - when String - posnum = self.index(pos) - if posnum - b = self[0, posnum] - a = self[(posnum + pos.length)..-1] - self.replace([b, value, a].join('')) - else - raise IndexError, "string not matched" - end - when Range - head = pos.begin - tail = pos.end - tail += self.length if tail < 0 - unless pos.exclude_end? - tail += 1 - end - return self[head, tail-head]=value - else - pos = pos.__to_int - pos += self.length if pos < 0 - if pos < 0 || pos > self.length - raise IndexError, "index #{args[0]} out of string" - end - b = self[0, pos] - a = self[pos + 1..-1] - self.replace([b, value, a].join('')) - end - return value - elsif anum == 3 - pos, len, value = args[0].__to_int, args[1].__to_int, args[2].__to_str - pos += self.length if pos < 0 - if pos < 0 || pos > self.length - raise IndexError, "index #{args[0]} out of string" - end - if len < 0 - raise IndexError, "negative length #{len}" - end - b = self[0, pos] - a = self[pos + len..-1] - self.replace([b, value, a].join('')) - return value - else - raise ArgumentError, "wrong number of arguments (#{anum} for 2..3)" - end - end - # those two methods requires Regexp that is optional in mruby ## # ISO 15.2.10.5.3 diff --git a/src/string.c b/src/string.c index c8a9e61ec..6938418fb 100644 --- a/src/string.c +++ b/src/string.c @@ -427,13 +427,18 @@ mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) } return mrb_obj_value(s); } + +static void +str_range_to_bytes(mrb_value str, mrb_int *pos, mrb_int *len) +{ + *pos = chars2bytes(str, 0, *pos); + *len = chars2bytes(str, *pos, *len); +} #ifdef MRB_UTF8_STRING static inline mrb_value str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) { - beg = chars2bytes(str, 0, beg); - len = chars2bytes(str, beg, len); - + str_range_to_bytes(str, &beg, &len); return mrb_str_byte_subseq(mrb, str, beg, len); } #else @@ -1010,51 +1015,91 @@ mrb_str_dup(mrb_state *mrb, mrb_value str) return str_replace(mrb, dup, s); } -static mrb_value -mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx) -{ - mrb_int idx; +enum str_convert_range { + /* `beg` and `len` are byte unit in `0 ... str.bytesize` */ + STR_BYTE_RANGE_CORRECTED = 1, - switch (mrb_type(indx)) { - case MRB_TT_FIXNUM: - idx = mrb_fixnum(indx); + /* `beg` and `len` are char unit in any range */ + STR_CHAR_RANGE = 2, -num_index: - str = str_substr(mrb, str, idx, 1); - if (!mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value(); - return str; + /* `beg` and `len` are char unit in `0 ... str.size` */ + STR_CHAR_RANGE_CORRECTED = 3, - case MRB_TT_STRING: - if (str_index_str(mrb, str, indx, 0) != -1) - return mrb_str_dup(mrb, indx); - return mrb_nil_value(); + /* `beg` is out of range */ + STR_OUT_OF_RANGE = -1 +}; + +static enum str_convert_range +str_convert_range(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_int *beg, mrb_int *len) +{ + if (!mrb_undef_p(alen)) { + *beg = mrb_int(mrb, indx); + *len = mrb_int(mrb, alen); + return STR_CHAR_RANGE; + } + else { + switch (mrb_type(indx)) { + case MRB_TT_FIXNUM: + *beg = mrb_fixnum(indx); + *len = 1; + return STR_CHAR_RANGE; - case MRB_TT_RANGE: - goto range_arg; + case MRB_TT_STRING: + *beg = str_index_str(mrb, str, indx, 0); + if (*beg < 0) { break; } + *len = RSTRING_LEN(indx); + return STR_BYTE_RANGE_CORRECTED; - default: - indx = mrb_Integer(mrb, indx); - if (mrb_nil_p(indx)) { - range_arg: - { - mrb_int beg, len; - - len = RSTRING_CHAR_LEN(str); - switch (mrb_range_beg_len(mrb, indx, &beg, &len, len, TRUE)) { + case MRB_TT_RANGE: + goto range_arg; + + default: + indx = mrb_to_int(mrb, indx); + if (mrb_fixnum_p(indx)) { + *beg = mrb_fixnum(indx); + *len = 1; + return STR_CHAR_RANGE; + } +range_arg: + *len = RSTRING_CHAR_LEN(str); + switch (mrb_range_beg_len(mrb, indx, beg, len, *len, TRUE)) { case MRB_RANGE_OK: - return str_subseq(mrb, str, beg, len); + return STR_CHAR_RANGE_CORRECTED; case MRB_RANGE_OUT: - return mrb_nil_value(); + return STR_OUT_OF_RANGE; default: break; - } } + mrb_raise(mrb, E_TYPE_ERROR, "can't convert to Fixnum"); + } + } + return STR_OUT_OF_RANGE; +} + +static mrb_value +mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen) +{ + mrb_int beg, len; + + switch (str_convert_range(mrb, str, indx, alen, &beg, &len)) { + case STR_CHAR_RANGE_CORRECTED: + return str_subseq(mrb, str, beg, len); + case STR_CHAR_RANGE: + str = str_substr(mrb, str, beg, len); + if (mrb_undef_p(alen) && !mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value(); + return str; + case STR_BYTE_RANGE_CORRECTED: + if (mrb_string_p(indx)) { + return mrb_str_dup(mrb, indx); } - idx = mrb_fixnum(indx); - goto num_index; + else { + return mrb_str_byte_subseq(mrb, str, beg, len); + } + case STR_OUT_OF_RANGE: + default: + return mrb_nil_value(); } - return mrb_nil_value(); /* not reached */ } /* 15.2.10.5.6 */ @@ -1101,16 +1146,114 @@ static mrb_value mrb_str_aref_m(mrb_state *mrb, mrb_value str) { mrb_value a1, a2; - mrb_int argc; - argc = mrb_get_args(mrb, "o|o", &a1, &a2); - if (argc == 2) { - mrb_int n1, n2; + if (mrb_get_args(mrb, "o|o", &a1, &a2) == 1) { + a2 = mrb_undef_value(); + } - mrb_get_args(mrb, "ii", &n1, &n2); - return str_substr(mrb, str, n1, n2); + return mrb_str_aref(mrb, str, a1, a2); +} + +static mrb_noreturn void +str_out_of_index(mrb_state *mrb, mrb_value index) +{ + mrb_raisef(mrb, E_INDEX_ERROR, "index %S out of string", index); +} + +static mrb_value +str_replace_partial(mrb_state *mrb, mrb_value src, mrb_int pos, mrb_int end, mrb_value rep) +{ + const mrb_int shrink_threshold = 256; + struct RString *str = mrb_str_ptr(src); + mrb_int len = RSTR_LEN(str); + mrb_int replen, newlen; + char *strp; + + if (end > len) { end = len; } + + if (pos < 0 || pos > len) { + str_out_of_index(mrb, mrb_fixnum_value(pos)); + } + + replen = (mrb_nil_p(rep) ? 0 : RSTRING_LEN(rep)); + newlen = replen + len - (end - pos); + + if (newlen >= MRB_INT_MAX || newlen < replen /* overflowed */) { + mrb_raise(mrb, E_RUNTIME_ERROR, "string size too big"); + } + + mrb_str_modify(mrb, str); + + if (len < newlen || len - newlen >= shrink_threshold) { + resize_capa(mrb, str, newlen); + } + + strp = RSTR_PTR(str); + + memmove(strp + newlen - (len - end), strp + end, len - end); + if (!mrb_nil_p(rep)) { + memcpy(strp + pos, RSTRING_PTR(rep), replen); } - return mrb_str_aref(mrb, str, a1); + RSTR_SET_LEN(str, newlen); + strp[newlen] = '\0'; + + return src; +} + +static void +mrb_str_aset(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_value replace) +{ + mrb_int beg, len, charlen; + + replace = mrb_to_str(mrb, replace); + + switch (str_convert_range(mrb, str, indx, alen, &beg, &len)) { + case STR_OUT_OF_RANGE: + default: + mrb_raise(mrb, E_INDEX_ERROR, "string not matched"); + case STR_CHAR_RANGE: + if (len < 0) { + mrb_raisef(mrb, E_INDEX_ERROR, "negative length %S", alen); + } + charlen = RSTRING_CHAR_LEN(str); + if (beg < 0) { beg += charlen; } + if (beg < 0 || beg > charlen) { str_out_of_index(mrb, indx); } + /* fall through */ + case STR_CHAR_RANGE_CORRECTED: + str_range_to_bytes(str, &beg, &len); + /* fall through */ + case STR_BYTE_RANGE_CORRECTED: + str_replace_partial(mrb, str, beg, beg + len, replace); + } +} + +/* + * call-seq: + * str[fixnum] = replace + * str[fixnum, fixnum] = replace + * str[range] = replace + * str[regexp] = replace + * str[regexp, fixnum] = replace + * str[other_str] = replace + * + * Modify +self+ by replacing the content of +self+. + * The portion of the string affected is determined using the same criteria as +String#[]+. + */ +static mrb_value +mrb_str_aset_m(mrb_state *mrb, mrb_value str) +{ + mrb_value indx, alen, replace; + + switch (mrb_get_args(mrb, "oo|S!", &indx, &alen, &replace)) { + case 2: + replace = alen; + alen = mrb_undef_value(); + break; + case 3: + break; + } + mrb_str_aset(mrb, str, indx, alen, replace); + return str; } /* 15.2.10.5.8 */ @@ -2678,6 +2821,7 @@ mrb_init_string(mrb_state *mrb) mrb_define_method(mrb, s, "+", mrb_str_plus_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.4 */ mrb_define_method(mrb, s, "*", mrb_str_times, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */ mrb_define_method(mrb, s, "[]", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.6 */ + mrb_define_method(mrb, s, "[]=", mrb_str_aset_m, MRB_ARGS_ANY()); mrb_define_method(mrb, s, "capitalize", mrb_str_capitalize, MRB_ARGS_NONE()); /* 15.2.10.5.7 */ mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, MRB_ARGS_NONE()); /* 15.2.10.5.8 */ mrb_define_method(mrb, s, "chomp", mrb_str_chomp, MRB_ARGS_ANY()); /* 15.2.10.5.9 */ diff --git a/test/t/string.rb b/test/t/string.rb index 81699f17e..87d60ed72 100644 --- a/test/t/string.rb +++ b/test/t/string.rb @@ -209,6 +209,56 @@ assert('String#[]=') do assert_raise(TypeError) { 'a'[0, 1] = 1 } end +assert('String[]=(UTF-8)') do + a = "➀➁➂➃➄" + a[3] = "⚃" + assert_equal "➀➁➂⚃➄", a + + b = "➀➁➂➃➄" + b[3, 0] = "⛄" + assert_equal "➀➁➂⛄➃➄", b + + c = "➀➁➂➃➄" + c[3, 2] = "⚃⚄" + assert_equal "➀➁➂⚃⚄", c + + d = "➀➁➂➃➄" + d[5] = "⛄" + assert_equal "➀➁➂➃➄⛄", d + + e = "➀➁➂➃➄" + e[5, 0] = "⛄" + assert_equal "➀➁➂➃➄⛄", e + + f = "➀➁➂➃➄" + f[5, 2] = "⛄" + assert_equal "➀➁➂➃➄⛄", f + + g = "➀➁➂➃➄" + assert_raise(IndexError) { g[6] = "⛄" } + + h = "➀➁➂➃➄" + assert_raise(IndexError) { h[6, 0] = "⛄" } + + i = "➀➁➂➃➄" + assert_raise(IndexError) { i[6, 2] = "⛄" } + + j = "➀➁➂➃➄" + j["➃"] = "⚃" + assert_equal "➀➁➂⚃➄", j + + k = "➀➁➂➃➄" + assert_raise(IndexError) { k["⛄"] = "⛇" } + + l = "➀➁➂➃➄" + assert_nothing_raised { l["➂"] = "" } + assert_equal "➀➁➃➄", l + + m = "➀➁➂➃➄" + assert_raise(TypeError) { m["➂"] = nil } + assert_equal "➀➁➂➃➄", m +end if UTF8STRING + assert('String#capitalize', '15.2.10.5.7') do a = 'abc' a.capitalize |
