summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--mrblib/string.rb54
-rw-r--r--src/string.c228
-rw-r--r--test/t/string.rb50
3 files changed, 236 insertions, 96 deletions
diff --git a/mrblib/string.rb b/mrblib/string.rb
index b0fe4033e..c26cdb1e2 100644
--- a/mrblib/string.rb
+++ b/mrblib/string.rb
@@ -177,60 +177,6 @@ class String
self
end
- ##
- # Modify +self+ by replacing the content of +self+.
- # The portion of the string affected is determined using the same criteria as +String#[]+.
- def []=(*args)
- anum = args.size
- if anum == 2
- pos, value = args[0], args[1].__to_str
- case pos
- when String
- posnum = self.index(pos)
- if posnum
- b = self[0, posnum]
- a = self[(posnum + pos.length)..-1]
- self.replace([b, value, a].join(''))
- else
- raise IndexError, "string not matched"
- end
- when Range
- head = pos.begin
- tail = pos.end
- tail += self.length if tail < 0
- unless pos.exclude_end?
- tail += 1
- end
- return self[head, tail-head]=value
- else
- pos = pos.__to_int
- pos += self.length if pos < 0
- if pos < 0 || pos > self.length
- raise IndexError, "index #{args[0]} out of string"
- end
- b = self[0, pos]
- a = self[pos + 1..-1]
- self.replace([b, value, a].join(''))
- end
- return value
- elsif anum == 3
- pos, len, value = args[0].__to_int, args[1].__to_int, args[2].__to_str
- pos += self.length if pos < 0
- if pos < 0 || pos > self.length
- raise IndexError, "index #{args[0]} out of string"
- end
- if len < 0
- raise IndexError, "negative length #{len}"
- end
- b = self[0, pos]
- a = self[pos + len..-1]
- self.replace([b, value, a].join(''))
- return value
- else
- raise ArgumentError, "wrong number of arguments (#{anum} for 2..3)"
- end
- end
-
# those two methods requires Regexp that is optional in mruby
##
# ISO 15.2.10.5.3
diff --git a/src/string.c b/src/string.c
index c8a9e61ec..6938418fb 100644
--- a/src/string.c
+++ b/src/string.c
@@ -427,13 +427,18 @@ mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
}
return mrb_obj_value(s);
}
+
+static void
+str_range_to_bytes(mrb_value str, mrb_int *pos, mrb_int *len)
+{
+ *pos = chars2bytes(str, 0, *pos);
+ *len = chars2bytes(str, *pos, *len);
+}
#ifdef MRB_UTF8_STRING
static inline mrb_value
str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
{
- beg = chars2bytes(str, 0, beg);
- len = chars2bytes(str, beg, len);
-
+ str_range_to_bytes(str, &beg, &len);
return mrb_str_byte_subseq(mrb, str, beg, len);
}
#else
@@ -1010,51 +1015,91 @@ mrb_str_dup(mrb_state *mrb, mrb_value str)
return str_replace(mrb, dup, s);
}
-static mrb_value
-mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx)
-{
- mrb_int idx;
+enum str_convert_range {
+ /* `beg` and `len` are byte unit in `0 ... str.bytesize` */
+ STR_BYTE_RANGE_CORRECTED = 1,
- switch (mrb_type(indx)) {
- case MRB_TT_FIXNUM:
- idx = mrb_fixnum(indx);
+ /* `beg` and `len` are char unit in any range */
+ STR_CHAR_RANGE = 2,
-num_index:
- str = str_substr(mrb, str, idx, 1);
- if (!mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value();
- return str;
+ /* `beg` and `len` are char unit in `0 ... str.size` */
+ STR_CHAR_RANGE_CORRECTED = 3,
- case MRB_TT_STRING:
- if (str_index_str(mrb, str, indx, 0) != -1)
- return mrb_str_dup(mrb, indx);
- return mrb_nil_value();
+ /* `beg` is out of range */
+ STR_OUT_OF_RANGE = -1
+};
+
+static enum str_convert_range
+str_convert_range(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_int *beg, mrb_int *len)
+{
+ if (!mrb_undef_p(alen)) {
+ *beg = mrb_int(mrb, indx);
+ *len = mrb_int(mrb, alen);
+ return STR_CHAR_RANGE;
+ }
+ else {
+ switch (mrb_type(indx)) {
+ case MRB_TT_FIXNUM:
+ *beg = mrb_fixnum(indx);
+ *len = 1;
+ return STR_CHAR_RANGE;
- case MRB_TT_RANGE:
- goto range_arg;
+ case MRB_TT_STRING:
+ *beg = str_index_str(mrb, str, indx, 0);
+ if (*beg < 0) { break; }
+ *len = RSTRING_LEN(indx);
+ return STR_BYTE_RANGE_CORRECTED;
- default:
- indx = mrb_Integer(mrb, indx);
- if (mrb_nil_p(indx)) {
- range_arg:
- {
- mrb_int beg, len;
-
- len = RSTRING_CHAR_LEN(str);
- switch (mrb_range_beg_len(mrb, indx, &beg, &len, len, TRUE)) {
+ case MRB_TT_RANGE:
+ goto range_arg;
+
+ default:
+ indx = mrb_to_int(mrb, indx);
+ if (mrb_fixnum_p(indx)) {
+ *beg = mrb_fixnum(indx);
+ *len = 1;
+ return STR_CHAR_RANGE;
+ }
+range_arg:
+ *len = RSTRING_CHAR_LEN(str);
+ switch (mrb_range_beg_len(mrb, indx, beg, len, *len, TRUE)) {
case MRB_RANGE_OK:
- return str_subseq(mrb, str, beg, len);
+ return STR_CHAR_RANGE_CORRECTED;
case MRB_RANGE_OUT:
- return mrb_nil_value();
+ return STR_OUT_OF_RANGE;
default:
break;
- }
}
+
mrb_raise(mrb, E_TYPE_ERROR, "can't convert to Fixnum");
+ }
+ }
+ return STR_OUT_OF_RANGE;
+}
+
+static mrb_value
+mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen)
+{
+ mrb_int beg, len;
+
+ switch (str_convert_range(mrb, str, indx, alen, &beg, &len)) {
+ case STR_CHAR_RANGE_CORRECTED:
+ return str_subseq(mrb, str, beg, len);
+ case STR_CHAR_RANGE:
+ str = str_substr(mrb, str, beg, len);
+ if (mrb_undef_p(alen) && !mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value();
+ return str;
+ case STR_BYTE_RANGE_CORRECTED:
+ if (mrb_string_p(indx)) {
+ return mrb_str_dup(mrb, indx);
}
- idx = mrb_fixnum(indx);
- goto num_index;
+ else {
+ return mrb_str_byte_subseq(mrb, str, beg, len);
+ }
+ case STR_OUT_OF_RANGE:
+ default:
+ return mrb_nil_value();
}
- return mrb_nil_value(); /* not reached */
}
/* 15.2.10.5.6 */
@@ -1101,16 +1146,114 @@ static mrb_value
mrb_str_aref_m(mrb_state *mrb, mrb_value str)
{
mrb_value a1, a2;
- mrb_int argc;
- argc = mrb_get_args(mrb, "o|o", &a1, &a2);
- if (argc == 2) {
- mrb_int n1, n2;
+ if (mrb_get_args(mrb, "o|o", &a1, &a2) == 1) {
+ a2 = mrb_undef_value();
+ }
- mrb_get_args(mrb, "ii", &n1, &n2);
- return str_substr(mrb, str, n1, n2);
+ return mrb_str_aref(mrb, str, a1, a2);
+}
+
+static mrb_noreturn void
+str_out_of_index(mrb_state *mrb, mrb_value index)
+{
+ mrb_raisef(mrb, E_INDEX_ERROR, "index %S out of string", index);
+}
+
+static mrb_value
+str_replace_partial(mrb_state *mrb, mrb_value src, mrb_int pos, mrb_int end, mrb_value rep)
+{
+ const mrb_int shrink_threshold = 256;
+ struct RString *str = mrb_str_ptr(src);
+ mrb_int len = RSTR_LEN(str);
+ mrb_int replen, newlen;
+ char *strp;
+
+ if (end > len) { end = len; }
+
+ if (pos < 0 || pos > len) {
+ str_out_of_index(mrb, mrb_fixnum_value(pos));
+ }
+
+ replen = (mrb_nil_p(rep) ? 0 : RSTRING_LEN(rep));
+ newlen = replen + len - (end - pos);
+
+ if (newlen >= MRB_INT_MAX || newlen < replen /* overflowed */) {
+ mrb_raise(mrb, E_RUNTIME_ERROR, "string size too big");
+ }
+
+ mrb_str_modify(mrb, str);
+
+ if (len < newlen || len - newlen >= shrink_threshold) {
+ resize_capa(mrb, str, newlen);
+ }
+
+ strp = RSTR_PTR(str);
+
+ memmove(strp + newlen - (len - end), strp + end, len - end);
+ if (!mrb_nil_p(rep)) {
+ memcpy(strp + pos, RSTRING_PTR(rep), replen);
}
- return mrb_str_aref(mrb, str, a1);
+ RSTR_SET_LEN(str, newlen);
+ strp[newlen] = '\0';
+
+ return src;
+}
+
+static void
+mrb_str_aset(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_value replace)
+{
+ mrb_int beg, len, charlen;
+
+ replace = mrb_to_str(mrb, replace);
+
+ switch (str_convert_range(mrb, str, indx, alen, &beg, &len)) {
+ case STR_OUT_OF_RANGE:
+ default:
+ mrb_raise(mrb, E_INDEX_ERROR, "string not matched");
+ case STR_CHAR_RANGE:
+ if (len < 0) {
+ mrb_raisef(mrb, E_INDEX_ERROR, "negative length %S", alen);
+ }
+ charlen = RSTRING_CHAR_LEN(str);
+ if (beg < 0) { beg += charlen; }
+ if (beg < 0 || beg > charlen) { str_out_of_index(mrb, indx); }
+ /* fall through */
+ case STR_CHAR_RANGE_CORRECTED:
+ str_range_to_bytes(str, &beg, &len);
+ /* fall through */
+ case STR_BYTE_RANGE_CORRECTED:
+ str_replace_partial(mrb, str, beg, beg + len, replace);
+ }
+}
+
+/*
+ * call-seq:
+ * str[fixnum] = replace
+ * str[fixnum, fixnum] = replace
+ * str[range] = replace
+ * str[regexp] = replace
+ * str[regexp, fixnum] = replace
+ * str[other_str] = replace
+ *
+ * Modify +self+ by replacing the content of +self+.
+ * The portion of the string affected is determined using the same criteria as +String#[]+.
+ */
+static mrb_value
+mrb_str_aset_m(mrb_state *mrb, mrb_value str)
+{
+ mrb_value indx, alen, replace;
+
+ switch (mrb_get_args(mrb, "oo|S!", &indx, &alen, &replace)) {
+ case 2:
+ replace = alen;
+ alen = mrb_undef_value();
+ break;
+ case 3:
+ break;
+ }
+ mrb_str_aset(mrb, str, indx, alen, replace);
+ return str;
}
/* 15.2.10.5.8 */
@@ -2678,6 +2821,7 @@ mrb_init_string(mrb_state *mrb)
mrb_define_method(mrb, s, "+", mrb_str_plus_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.4 */
mrb_define_method(mrb, s, "*", mrb_str_times, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */
mrb_define_method(mrb, s, "[]", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.6 */
+ mrb_define_method(mrb, s, "[]=", mrb_str_aset_m, MRB_ARGS_ANY());
mrb_define_method(mrb, s, "capitalize", mrb_str_capitalize, MRB_ARGS_NONE()); /* 15.2.10.5.7 */
mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, MRB_ARGS_NONE()); /* 15.2.10.5.8 */
mrb_define_method(mrb, s, "chomp", mrb_str_chomp, MRB_ARGS_ANY()); /* 15.2.10.5.9 */
diff --git a/test/t/string.rb b/test/t/string.rb
index 81699f17e..87d60ed72 100644
--- a/test/t/string.rb
+++ b/test/t/string.rb
@@ -209,6 +209,56 @@ assert('String#[]=') do
assert_raise(TypeError) { 'a'[0, 1] = 1 }
end
+assert('String[]=(UTF-8)') do
+ a = "➀➁➂➃➄"
+ a[3] = "⚃"
+ assert_equal "➀➁➂⚃➄", a
+
+ b = "➀➁➂➃➄"
+ b[3, 0] = "⛄"
+ assert_equal "➀➁➂⛄➃➄", b
+
+ c = "➀➁➂➃➄"
+ c[3, 2] = "⚃⚄"
+ assert_equal "➀➁➂⚃⚄", c
+
+ d = "➀➁➂➃➄"
+ d[5] = "⛄"
+ assert_equal "➀➁➂➃➄⛄", d
+
+ e = "➀➁➂➃➄"
+ e[5, 0] = "⛄"
+ assert_equal "➀➁➂➃➄⛄", e
+
+ f = "➀➁➂➃➄"
+ f[5, 2] = "⛄"
+ assert_equal "➀➁➂➃➄⛄", f
+
+ g = "➀➁➂➃➄"
+ assert_raise(IndexError) { g[6] = "⛄" }
+
+ h = "➀➁➂➃➄"
+ assert_raise(IndexError) { h[6, 0] = "⛄" }
+
+ i = "➀➁➂➃➄"
+ assert_raise(IndexError) { i[6, 2] = "⛄" }
+
+ j = "➀➁➂➃➄"
+ j["➃"] = "⚃"
+ assert_equal "➀➁➂⚃➄", j
+
+ k = "➀➁➂➃➄"
+ assert_raise(IndexError) { k["⛄"] = "⛇" }
+
+ l = "➀➁➂➃➄"
+ assert_nothing_raised { l["➂"] = "" }
+ assert_equal "➀➁➃➄", l
+
+ m = "➀➁➂➃➄"
+ assert_raise(TypeError) { m["➂"] = nil }
+ assert_equal "➀➁➂➃➄", m
+end if UTF8STRING
+
assert('String#capitalize', '15.2.10.5.7') do
a = 'abc'
a.capitalize