diff options
| author | KOBAYASHI Shuji <[email protected]> | 2019-06-25 22:58:21 +0900 |
|---|---|---|
| committer | KOBAYASHI Shuji <[email protected]> | 2019-06-25 23:09:23 +0900 |
| commit | 75df13a97334c162b2cf743c3e37c4933a4b0d1c (patch) | |
| tree | 60968fa95933371218ab05d2493bd78d579e8445 /src/string.c | |
| parent | cb3ee2d0501612f406e2d44b1e6d55b18861b1e1 (diff) | |
| download | mruby-75df13a97334c162b2cf743c3e37c4933a4b0d1c.tar.gz mruby-75df13a97334c162b2cf743c3e37c4933a4b0d1c.zip | |
Fix `String#byteslice` with `MRB_UTF8_STRING` and some edge cases
Example:
$ bin/mruby -e '
p "あa".byteslice(1)
p "bar".byteslice(3)
p "bar".byteslice(4..0)
'
Before this patch:
"a"
""
RangeError (4..0 out of range)
After this patch (same as Ruby):
"\x81"
nil
nil
Diffstat (limited to 'src/string.c')
| -rw-r--r-- | src/string.c | 49 |
1 files changed, 25 insertions, 24 deletions
diff --git a/src/string.c b/src/string.c index ed58c484b..f5fb936a6 100644 --- a/src/string.c +++ b/src/string.c @@ -410,8 +410,8 @@ str_make_shared(mrb_state *mrb, struct RString *orig, struct RString *s) } } -static mrb_value -byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) +mrb_value +mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) { struct RString *orig, *s; @@ -434,32 +434,33 @@ str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) beg = chars2bytes(str, 0, beg); len = chars2bytes(str, beg, len); - return byte_subseq(mrb, str, beg, len); + return mrb_str_byte_subseq(mrb, str, beg, len); } #else -#define str_subseq(mrb, str, beg, len) byte_subseq(mrb, str, beg, len) +#define str_subseq(mrb, str, beg, len) mrb_str_byte_subseq(mrb, str, beg, len) #endif -static mrb_value -str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) +mrb_bool +mrb_str_beg_len(mrb_int str_len, mrb_int *begp, mrb_int *lenp) { - mrb_int clen = RSTRING_CHAR_LEN(str); - - if (len < 0) return mrb_nil_value(); - if (clen == 0) { - len = 0; + if (str_len < *begp || *lenp < 0) return FALSE; + if (*begp < 0) { + *begp += str_len; + if (*begp < 0) return FALSE; } - if (beg > clen) return mrb_nil_value(); - if (beg < 0) { - beg += clen; - if (beg < 0) return mrb_nil_value(); + if (*lenp > str_len - *begp) + *lenp = str_len - *begp; + if (*lenp <= 0) { + *lenp = 0; } - if (len > clen - beg) - len = clen - beg; - if (len <= 0) { - len = 0; - } - return str_subseq(mrb, str, beg, len); + return TRUE; +} + +static mrb_value +str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) +{ + return mrb_str_beg_len(RSTRING_CHAR_LEN(str), &beg, &len) ? + str_subseq(mrb, str, beg, len) : mrb_nil_value(); } MRB_API mrb_int @@ -1917,7 +1918,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) } } else if (ISSPACE(c)) { - mrb_ary_push(mrb, result, byte_subseq(mrb, str, beg, end-beg)); + mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, beg, end-beg)); mrb_gc_arena_restore(mrb, ai); skip = TRUE; beg = idx; @@ -1942,7 +1943,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) else { end = chars2bytes(str, idx, 1); } - mrb_ary_push(mrb, result, byte_subseq(mrb, str, idx, end)); + mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, idx, end)); mrb_gc_arena_restore(mrb, ai); idx += end + pat_len; if (lim_p && lim <= ++i) break; @@ -1954,7 +1955,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) tmp = mrb_str_new_empty(mrb, str); } else { - tmp = byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg); + tmp = mrb_str_byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg); } mrb_ary_push(mrb, result, tmp); } |
