summaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorKOBAYASHI Shuji <[email protected]>2019-06-25 22:58:21 +0900
committerKOBAYASHI Shuji <[email protected]>2019-06-25 23:09:23 +0900
commit75df13a97334c162b2cf743c3e37c4933a4b0d1c (patch)
tree60968fa95933371218ab05d2493bd78d579e8445 /src
parentcb3ee2d0501612f406e2d44b1e6d55b18861b1e1 (diff)
downloadmruby-75df13a97334c162b2cf743c3e37c4933a4b0d1c.tar.gz
mruby-75df13a97334c162b2cf743c3e37c4933a4b0d1c.zip
Fix `String#byteslice` with `MRB_UTF8_STRING` and some edge cases
Example: $ bin/mruby -e ' p "あa".byteslice(1) p "bar".byteslice(3) p "bar".byteslice(4..0) ' Before this patch: "a" "" RangeError (4..0 out of range) After this patch (same as Ruby): "\x81" nil nil
Diffstat (limited to 'src')
-rw-r--r--src/string.c49
1 files changed, 25 insertions, 24 deletions
diff --git a/src/string.c b/src/string.c
index ed58c484b..f5fb936a6 100644
--- a/src/string.c
+++ b/src/string.c
@@ -410,8 +410,8 @@ str_make_shared(mrb_state *mrb, struct RString *orig, struct RString *s)
}
}
-static mrb_value
-byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
+mrb_value
+mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
{
struct RString *orig, *s;
@@ -434,32 +434,33 @@ str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
beg = chars2bytes(str, 0, beg);
len = chars2bytes(str, beg, len);
- return byte_subseq(mrb, str, beg, len);
+ return mrb_str_byte_subseq(mrb, str, beg, len);
}
#else
-#define str_subseq(mrb, str, beg, len) byte_subseq(mrb, str, beg, len)
+#define str_subseq(mrb, str, beg, len) mrb_str_byte_subseq(mrb, str, beg, len)
#endif
-static mrb_value
-str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
+mrb_bool
+mrb_str_beg_len(mrb_int str_len, mrb_int *begp, mrb_int *lenp)
{
- mrb_int clen = RSTRING_CHAR_LEN(str);
-
- if (len < 0) return mrb_nil_value();
- if (clen == 0) {
- len = 0;
+ if (str_len < *begp || *lenp < 0) return FALSE;
+ if (*begp < 0) {
+ *begp += str_len;
+ if (*begp < 0) return FALSE;
}
- if (beg > clen) return mrb_nil_value();
- if (beg < 0) {
- beg += clen;
- if (beg < 0) return mrb_nil_value();
+ if (*lenp > str_len - *begp)
+ *lenp = str_len - *begp;
+ if (*lenp <= 0) {
+ *lenp = 0;
}
- if (len > clen - beg)
- len = clen - beg;
- if (len <= 0) {
- len = 0;
- }
- return str_subseq(mrb, str, beg, len);
+ return TRUE;
+}
+
+static mrb_value
+str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
+{
+ return mrb_str_beg_len(RSTRING_CHAR_LEN(str), &beg, &len) ?
+ str_subseq(mrb, str, beg, len) : mrb_nil_value();
}
MRB_API mrb_int
@@ -1917,7 +1918,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
}
}
else if (ISSPACE(c)) {
- mrb_ary_push(mrb, result, byte_subseq(mrb, str, beg, end-beg));
+ mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, beg, end-beg));
mrb_gc_arena_restore(mrb, ai);
skip = TRUE;
beg = idx;
@@ -1942,7 +1943,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
else {
end = chars2bytes(str, idx, 1);
}
- mrb_ary_push(mrb, result, byte_subseq(mrb, str, idx, end));
+ mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, idx, end));
mrb_gc_arena_restore(mrb, ai);
idx += end + pat_len;
if (lim_p && lim <= ++i) break;
@@ -1954,7 +1955,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
tmp = mrb_str_new_empty(mrb, str);
}
else {
- tmp = byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);
+ tmp = mrb_str_byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);
}
mrb_ary_push(mrb, result, tmp);
}