summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--mrblib/string.rb54
-rw-r--r--src/string.c176
2 files changed, 173 insertions, 57 deletions
diff --git a/mrblib/string.rb b/mrblib/string.rb
index b0fe4033e..c26cdb1e2 100644
--- a/mrblib/string.rb
+++ b/mrblib/string.rb
@@ -177,60 +177,6 @@ class String
self
end
- ##
- # Modify +self+ by replacing the content of +self+.
- # The portion of the string affected is determined using the same criteria as +String#[]+.
- def []=(*args)
- anum = args.size
- if anum == 2
- pos, value = args[0], args[1].__to_str
- case pos
- when String
- posnum = self.index(pos)
- if posnum
- b = self[0, posnum]
- a = self[(posnum + pos.length)..-1]
- self.replace([b, value, a].join(''))
- else
- raise IndexError, "string not matched"
- end
- when Range
- head = pos.begin
- tail = pos.end
- tail += self.length if tail < 0
- unless pos.exclude_end?
- tail += 1
- end
- return self[head, tail-head]=value
- else
- pos = pos.__to_int
- pos += self.length if pos < 0
- if pos < 0 || pos > self.length
- raise IndexError, "index #{args[0]} out of string"
- end
- b = self[0, pos]
- a = self[pos + 1..-1]
- self.replace([b, value, a].join(''))
- end
- return value
- elsif anum == 3
- pos, len, value = args[0].__to_int, args[1].__to_int, args[2].__to_str
- pos += self.length if pos < 0
- if pos < 0 || pos > self.length
- raise IndexError, "index #{args[0]} out of string"
- end
- if len < 0
- raise IndexError, "negative length #{len}"
- end
- b = self[0, pos]
- a = self[pos + len..-1]
- self.replace([b, value, a].join(''))
- return value
- else
- raise ArgumentError, "wrong number of arguments (#{anum} for 2..3)"
- end
- end
-
# those two methods requires Regexp that is optional in mruby
##
# ISO 15.2.10.5.3
diff --git a/src/string.c b/src/string.c
index c8a9e61ec..43bf8a841 100644
--- a/src/string.c
+++ b/src/string.c
@@ -427,13 +427,18 @@ mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
}
return mrb_obj_value(s);
}
+
+static void
+str_range_to_bytes(mrb_value str, mrb_int *pos, mrb_int *len)
+{
+ *pos = chars2bytes(str, 0, *pos);
+ *len = chars2bytes(str, *pos, *len);
+}
#ifdef MRB_UTF8_STRING
static inline mrb_value
str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
{
- beg = chars2bytes(str, 0, beg);
- len = chars2bytes(str, beg, len);
-
+ str_range_to_bytes(str, &beg, &len);
return mrb_str_byte_subseq(mrb, str, beg, len);
}
#else
@@ -1010,6 +1015,68 @@ mrb_str_dup(mrb_state *mrb, mrb_value str)
return str_replace(mrb, dup, s);
}
+enum str_convert_range {
+ /* `beg` and `len` are byte unit in `0 ... str.bytesize` */
+ STR_BYTE_RANGE_CORRECTED = 1,
+
+ /* `beg` and `len` are char unit in any range */
+ STR_CHAR_RANGE = 2,
+
+ /* `beg` and `len` are char unit in `0 ... str.size` */
+ STR_CHAR_RANGE_CORRECTED = 3,
+
+ /* `beg` is out of range */
+ STR_OUT_OF_RANGE = -1
+};
+
+static enum str_convert_range
+str_convert_range(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_int *beg, mrb_int *len)
+{
+ if (!mrb_undef_p(alen)) {
+ *beg = mrb_int(mrb, indx);
+ *len = mrb_int(mrb, alen);
+ return STR_CHAR_RANGE;
+ }
+ else {
+ switch (mrb_type(indx)) {
+ case MRB_TT_FIXNUM:
+ *beg = mrb_fixnum(indx);
+ *len = 1;
+ return STR_CHAR_RANGE;
+
+ case MRB_TT_STRING:
+ *beg = str_index_str(mrb, str, indx, 0);
+ if (*beg < 0) { break; }
+ *len = RSTRING_LEN(indx);
+ return STR_BYTE_RANGE_CORRECTED;
+
+ case MRB_TT_RANGE:
+ goto range_arg;
+
+ default:
+ indx = mrb_to_int(mrb, indx);
+ if (mrb_fixnum_p(indx)) {
+ *beg = mrb_fixnum(indx);
+ *len = 1;
+ return STR_CHAR_RANGE;
+ }
+range_arg:
+ *len = RSTRING_CHAR_LEN(str);
+ switch (mrb_range_beg_len(mrb, indx, beg, len, *len, TRUE)) {
+ case MRB_RANGE_OK:
+ return STR_CHAR_RANGE_CORRECTED;
+ case MRB_RANGE_OUT:
+ return STR_OUT_OF_RANGE;
+ default:
+ break;
+ }
+
+ mrb_raise(mrb, E_TYPE_ERROR, "can't convert to Fixnum");
+ }
+ }
+ return STR_OUT_OF_RANGE;
+}
+
static mrb_value
mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx)
{
@@ -1113,6 +1180,108 @@ mrb_str_aref_m(mrb_state *mrb, mrb_value str)
return mrb_str_aref(mrb, str, a1);
}
+static mrb_noreturn void
+str_out_of_index(mrb_state *mrb, mrb_value index)
+{
+ mrb_raisef(mrb, E_INDEX_ERROR, "index %S out of string", index);
+}
+
+static mrb_value
+str_replace_partial(mrb_state *mrb, mrb_value src, mrb_int pos, mrb_int end, mrb_value rep)
+{
+ const mrb_int shrink_threshold = 256;
+ struct RString *str = mrb_str_ptr(src);
+ mrb_int len = RSTR_LEN(str);
+ mrb_int replen, newlen;
+ char *strp;
+
+ if (end > len) { end = len; }
+
+ if (pos < 0 || pos > len) {
+ str_out_of_index(mrb, mrb_fixnum_value(pos));
+ }
+
+ replen = (mrb_nil_p(rep) ? 0 : RSTRING_LEN(rep));
+ newlen = replen + len - (end - pos);
+
+ if (newlen >= MRB_INT_MAX || newlen < replen /* overflowed */) {
+ mrb_raise(mrb, E_RUNTIME_ERROR, "string size too big");
+ }
+
+ mrb_str_modify(mrb, str);
+
+ if (len < newlen || len - newlen >= shrink_threshold) {
+ resize_capa(mrb, str, newlen);
+ }
+
+ strp = RSTR_PTR(str);
+
+ memmove(strp + newlen - (len - end), strp + end, len - end);
+ if (!mrb_nil_p(rep)) {
+ memcpy(strp + pos, RSTRING_PTR(rep), replen);
+ }
+ RSTR_SET_LEN(str, newlen);
+ strp[newlen] = '\0';
+
+ return src;
+}
+
+static void
+mrb_str_aset(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_value replace)
+{
+ mrb_int beg, len, charlen;
+
+ replace = mrb_to_str(mrb, replace);
+
+ switch (str_convert_range(mrb, str, indx, alen, &beg, &len)) {
+ case STR_OUT_OF_RANGE:
+ default:
+ mrb_raise(mrb, E_INDEX_ERROR, "string not matched");
+ case STR_CHAR_RANGE:
+ if (len < 0) {
+ mrb_raisef(mrb, E_INDEX_ERROR, "negative length %S", alen);
+ }
+ charlen = RSTRING_CHAR_LEN(str);
+ if (beg < 0) { beg += charlen; }
+ if (beg < 0 || beg > charlen) { str_out_of_index(mrb, indx); }
+ /* fall through */
+ case STR_CHAR_RANGE_CORRECTED:
+ str_range_to_bytes(str, &beg, &len);
+ /* fall through */
+ case STR_BYTE_RANGE_CORRECTED:
+ str_replace_partial(mrb, str, beg, beg + len, replace);
+ }
+}
+
+/*
+ * call-seq:
+ * str[fixnum] = replace
+ * str[fixnum, fixnum] = replace
+ * str[range] = replace
+ * str[regexp] = replace
+ * str[regexp, fixnum] = replace
+ * str[other_str] = replace
+ *
+ * Modify +self+ by replacing the content of +self+.
+ * The portion of the string affected is determined using the same criteria as +String#[]+.
+ */
+static mrb_value
+mrb_str_aset_m(mrb_state *mrb, mrb_value str)
+{
+ mrb_value indx, alen, replace;
+
+ switch (mrb_get_args(mrb, "oo|S!", &indx, &alen, &replace)) {
+ case 2:
+ replace = alen;
+ alen = mrb_undef_value();
+ break;
+ case 3:
+ break;
+ }
+ mrb_str_aset(mrb, str, indx, alen, replace);
+ return str;
+}
+
/* 15.2.10.5.8 */
/*
* call-seq:
@@ -2678,6 +2847,7 @@ mrb_init_string(mrb_state *mrb)
mrb_define_method(mrb, s, "+", mrb_str_plus_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.4 */
mrb_define_method(mrb, s, "*", mrb_str_times, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */
mrb_define_method(mrb, s, "[]", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.6 */
+ mrb_define_method(mrb, s, "[]=", mrb_str_aset_m, MRB_ARGS_ANY());
mrb_define_method(mrb, s, "capitalize", mrb_str_capitalize, MRB_ARGS_NONE()); /* 15.2.10.5.7 */
mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, MRB_ARGS_NONE()); /* 15.2.10.5.8 */
mrb_define_method(mrb, s, "chomp", mrb_str_chomp, MRB_ARGS_ANY()); /* 15.2.10.5.9 */