diff options
| author | Yukihiro "Matz" Matsumoto <[email protected]> | 2019-06-26 12:41:56 +0900 |
|---|---|---|
| committer | GitHub <[email protected]> | 2019-06-26 12:41:56 +0900 |
| commit | 84779f01703c4cfb31c74b34a864d3208a175a77 (patch) | |
| tree | ffb1b18a181a68aed8af886a460a4e58fae5a7db | |
| parent | a5b005b57c486d7058c12971432c65d1cfb09b71 (diff) | |
| parent | 75df13a97334c162b2cf743c3e37c4933a4b0d1c (diff) | |
| download | mruby-84779f01703c4cfb31c74b34a864d3208a175a77.tar.gz mruby-84779f01703c4cfb31c74b34a864d3208a175a77.zip | |
Merge pull request #4532 from shuujii/fix-String-byteslice-with-MRB_UTF8_STRING-and-some-edge-cases
Fix `String#byteslice` with `MRB_UTF8_STRING` and some edge cases
| -rw-r--r-- | include/mruby/string.h | 3 | ||||
| -rw-r--r-- | mrbgems/mruby-string-ext/src/string.c | 58 | ||||
| -rw-r--r-- | mrbgems/mruby-string-ext/test/string.rb | 51 | ||||
| -rw-r--r-- | src/string.c | 49 |
4 files changed, 102 insertions, 59 deletions
diff --git a/include/mruby/string.h b/include/mruby/string.h index 22445f654..b563541cb 100644 --- a/include/mruby/string.h +++ b/include/mruby/string.h @@ -438,6 +438,9 @@ mrb_value mrb_str_inspect(mrb_state *mrb, mrb_value str); #define mrb_str_buf_cat(mrb, str, ptr, len) mrb_str_cat(mrb, str, ptr, len) #define mrb_str_buf_append(mrb, str, str2) mrb_str_cat_str(mrb, str, str2) +mrb_bool mrb_str_beg_len(mrb_int str_len, mrb_int *begp, mrb_int *lenp); +mrb_value mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len); + #ifdef MRB_UTF8_STRING mrb_int mrb_utf8_len(const char *str, mrb_int byte_len); #endif diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index d9ebb7392..50a4e5582 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -42,44 +42,32 @@ mrb_str_setbyte(mrb_state *mrb, mrb_value str) static mrb_value mrb_str_byteslice(mrb_state *mrb, mrb_value str) { - mrb_value a1; - mrb_int len; - - if (mrb_get_argc(mrb) == 2) { - mrb_int pos; - mrb_get_args(mrb, "ii", &pos, &len); - return mrb_str_substr(mrb, str, pos, len); + mrb_value a1, a2; + mrb_int str_len = RSTRING_LEN(str), beg, len; + mrb_bool empty = TRUE; + + if (mrb_get_args(mrb, "o|o", &a1, &a2) == 2) { + beg = mrb_fixnum(mrb_to_int(mrb, a1)); + len = mrb_fixnum(mrb_to_int(mrb, a2)); + goto subseq; } - mrb_get_args(mrb, "o|i", &a1, &len); - switch (mrb_type(a1)) { - case MRB_TT_RANGE: - { - mrb_int beg; - - len = RSTRING_LEN(str); - switch (mrb_range_beg_len(mrb, a1, &beg, &len, len, TRUE)) { - case MRB_RANGE_TYPE_MISMATCH: - break; - case MRB_RANGE_OK: - return mrb_str_substr(mrb, str, beg, len); - case MRB_RANGE_OUT: - mrb_raisef(mrb, E_RANGE_ERROR, "%S out of range", a1); - break; - } - return mrb_nil_value(); + if (mrb_type(a1) == MRB_TT_RANGE) { + if (mrb_range_beg_len(mrb, a1, &beg, &len, str_len, TRUE) == MRB_RANGE_OK) { + goto subseq; } -#ifndef MRB_WITHOUT_FLOAT - case MRB_TT_FLOAT: - a1 = mrb_fixnum_value((mrb_int)mrb_float(a1)); - /* fall through */ -#endif - case MRB_TT_FIXNUM: - return mrb_str_substr(mrb, str, mrb_fixnum(a1), 1); - default: - mrb_raise(mrb, E_TYPE_ERROR, "wrong type of argument"); + return mrb_nil_value(); + } + + beg = mrb_fixnum(mrb_to_int(mrb, a1)); + len = 1; + empty = FALSE; +subseq: + if (mrb_str_beg_len(str_len, &beg, &len) && (empty || len != 0)) { + return mrb_str_byte_subseq(mrb, str, beg, len); + } + else { + return mrb_nil_value(); } - /* not reached */ - return mrb_nil_value(); } /* diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index 02777e594..bf633bcef 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -26,10 +26,61 @@ end assert('String#byteslice') do str1 = "hello" + str2 = "\u3042ab" # "\xE3\x81\x82ab" + + assert_equal("h", str1.byteslice(0)) assert_equal("e", str1.byteslice(1)) + assert_equal(nil, str1.byteslice(5)) assert_equal("o", str1.byteslice(-1)) + assert_equal(nil, str1.byteslice(-6)) + assert_equal("\xE3", str2.byteslice(0)) + assert_equal("\x81", str2.byteslice(1)) + assert_equal(nil, str2.byteslice(5)) + assert_equal("b", str2.byteslice(-1)) + assert_equal(nil, str2.byteslice(-6)) + + assert_equal("", str1.byteslice(0, 0)) + assert_equal(str1, str1.byteslice(0, 6)) + assert_equal("el", str1.byteslice(1, 2)) + assert_equal("", str1.byteslice(5, 1)) + assert_equal("o", str1.byteslice(-1, 6)) + assert_equal(nil, str1.byteslice(-6, 1)) + assert_equal(nil, str1.byteslice(0, -1)) + assert_equal("", str2.byteslice(0, 0)) + assert_equal(str2, str2.byteslice(0, 6)) + assert_equal("\x81\x82", str2.byteslice(1, 2)) + assert_equal("", str2.byteslice(5, 1)) + assert_equal("b", str2.byteslice(-1, 6)) + assert_equal(nil, str2.byteslice(-6, 1)) + assert_equal(nil, str2.byteslice(0, -1)) + assert_equal("ell", str1.byteslice(1..3)) assert_equal("el", str1.byteslice(1...3)) + assert_equal("h", str1.byteslice(0..0)) + assert_equal("", str1.byteslice(5..0)) + assert_equal("o", str1.byteslice(4..5)) + assert_equal(nil, str1.byteslice(6..0)) + assert_equal("", str1.byteslice(-1..0)) + assert_equal("llo", str1.byteslice(-3..5)) + assert_equal("\x81\x82a", str2.byteslice(1..3)) + assert_equal("\x81\x82", str2.byteslice(1...3)) + assert_equal("\xE3", str2.byteslice(0..0)) + assert_equal("", str2.byteslice(5..0)) + assert_equal("b", str2.byteslice(4..5)) + assert_equal(nil, str2.byteslice(6..0)) + assert_equal("", str2.byteslice(-1..0)) + assert_equal("\x82ab", str2.byteslice(-3..5)) + + assert_raise(ArgumentError) { str1.byteslice } + assert_raise(ArgumentError) { str1.byteslice(1, 2, 3) } + assert_raise(TypeError) { str1.byteslice("1") } + assert_raise(TypeError) { str1.byteslice("1", 2) } + assert_raise(TypeError) { str1.byteslice(1, "2") } + assert_raise(TypeError) { str1.byteslice(1..2, 3) } + + skip unless Object.const_defined?(:Float) + assert_equal("o", str1.byteslice(4.0)) + assert_equal("\x82ab", str2.byteslice(2.0, 3.0)) end assert('String#dump') do diff --git a/src/string.c b/src/string.c index 856e908af..7b58705dd 100644 --- a/src/string.c +++ b/src/string.c @@ -410,8 +410,8 @@ str_make_shared(mrb_state *mrb, struct RString *orig, struct RString *s) } } -static mrb_value -byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) +mrb_value +mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) { struct RString *orig, *s; @@ -434,32 +434,33 @@ str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) beg = chars2bytes(str, 0, beg); len = chars2bytes(str, beg, len); - return byte_subseq(mrb, str, beg, len); + return mrb_str_byte_subseq(mrb, str, beg, len); } #else -#define str_subseq(mrb, str, beg, len) byte_subseq(mrb, str, beg, len) +#define str_subseq(mrb, str, beg, len) mrb_str_byte_subseq(mrb, str, beg, len) #endif -static mrb_value -str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) +mrb_bool +mrb_str_beg_len(mrb_int str_len, mrb_int *begp, mrb_int *lenp) { - mrb_int clen = RSTRING_CHAR_LEN(str); - - if (len < 0) return mrb_nil_value(); - if (clen == 0) { - len = 0; + if (str_len < *begp || *lenp < 0) return FALSE; + if (*begp < 0) { + *begp += str_len; + if (*begp < 0) return FALSE; } - if (beg > clen) return mrb_nil_value(); - if (beg < 0) { - beg += clen; - if (beg < 0) return mrb_nil_value(); + if (*lenp > str_len - *begp) + *lenp = str_len - *begp; + if (*lenp <= 0) { + *lenp = 0; } - if (len > clen - beg) - len = clen - beg; - if (len <= 0) { - len = 0; - } - return str_subseq(mrb, str, beg, len); + return TRUE; +} + +static mrb_value +str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) +{ + return mrb_str_beg_len(RSTRING_CHAR_LEN(str), &beg, &len) ? + str_subseq(mrb, str, beg, len) : mrb_nil_value(); } MRB_API mrb_int @@ -1919,7 +1920,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) } } else if (ISSPACE(c)) { - mrb_ary_push(mrb, result, byte_subseq(mrb, str, beg, end-beg)); + mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, beg, end-beg)); mrb_gc_arena_restore(mrb, ai); skip = TRUE; beg = idx; @@ -1944,7 +1945,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) else { end = chars2bytes(str, idx, 1); } - mrb_ary_push(mrb, result, byte_subseq(mrb, str, idx, end)); + mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, idx, end)); mrb_gc_arena_restore(mrb, ai); idx += end + pat_len; if (lim_p && lim <= ++i) break; @@ -1956,7 +1957,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) tmp = mrb_str_new_empty(mrb, str); } else { - tmp = byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg); + tmp = mrb_str_byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg); } mrb_ary_push(mrb, result, tmp); } |
