summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorYukihiro "Matz" Matsumoto <[email protected]>2019-06-26 12:41:56 +0900
committerGitHub <[email protected]>2019-06-26 12:41:56 +0900
commit84779f01703c4cfb31c74b34a864d3208a175a77 (patch)
treeffb1b18a181a68aed8af886a460a4e58fae5a7db
parenta5b005b57c486d7058c12971432c65d1cfb09b71 (diff)
parent75df13a97334c162b2cf743c3e37c4933a4b0d1c (diff)
downloadmruby-84779f01703c4cfb31c74b34a864d3208a175a77.tar.gz
mruby-84779f01703c4cfb31c74b34a864d3208a175a77.zip
Merge pull request #4532 from shuujii/fix-String-byteslice-with-MRB_UTF8_STRING-and-some-edge-cases
Fix `String#byteslice` with `MRB_UTF8_STRING` and some edge cases
-rw-r--r--include/mruby/string.h3
-rw-r--r--mrbgems/mruby-string-ext/src/string.c58
-rw-r--r--mrbgems/mruby-string-ext/test/string.rb51
-rw-r--r--src/string.c49
4 files changed, 102 insertions, 59 deletions
diff --git a/include/mruby/string.h b/include/mruby/string.h
index 22445f654..b563541cb 100644
--- a/include/mruby/string.h
+++ b/include/mruby/string.h
@@ -438,6 +438,9 @@ mrb_value mrb_str_inspect(mrb_state *mrb, mrb_value str);
#define mrb_str_buf_cat(mrb, str, ptr, len) mrb_str_cat(mrb, str, ptr, len)
#define mrb_str_buf_append(mrb, str, str2) mrb_str_cat_str(mrb, str, str2)
+mrb_bool mrb_str_beg_len(mrb_int str_len, mrb_int *begp, mrb_int *lenp);
+mrb_value mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len);
+
#ifdef MRB_UTF8_STRING
mrb_int mrb_utf8_len(const char *str, mrb_int byte_len);
#endif
diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c
index d9ebb7392..50a4e5582 100644
--- a/mrbgems/mruby-string-ext/src/string.c
+++ b/mrbgems/mruby-string-ext/src/string.c
@@ -42,44 +42,32 @@ mrb_str_setbyte(mrb_state *mrb, mrb_value str)
static mrb_value
mrb_str_byteslice(mrb_state *mrb, mrb_value str)
{
- mrb_value a1;
- mrb_int len;
-
- if (mrb_get_argc(mrb) == 2) {
- mrb_int pos;
- mrb_get_args(mrb, "ii", &pos, &len);
- return mrb_str_substr(mrb, str, pos, len);
+ mrb_value a1, a2;
+ mrb_int str_len = RSTRING_LEN(str), beg, len;
+ mrb_bool empty = TRUE;
+
+ if (mrb_get_args(mrb, "o|o", &a1, &a2) == 2) {
+ beg = mrb_fixnum(mrb_to_int(mrb, a1));
+ len = mrb_fixnum(mrb_to_int(mrb, a2));
+ goto subseq;
}
- mrb_get_args(mrb, "o|i", &a1, &len);
- switch (mrb_type(a1)) {
- case MRB_TT_RANGE:
- {
- mrb_int beg;
-
- len = RSTRING_LEN(str);
- switch (mrb_range_beg_len(mrb, a1, &beg, &len, len, TRUE)) {
- case MRB_RANGE_TYPE_MISMATCH:
- break;
- case MRB_RANGE_OK:
- return mrb_str_substr(mrb, str, beg, len);
- case MRB_RANGE_OUT:
- mrb_raisef(mrb, E_RANGE_ERROR, "%S out of range", a1);
- break;
- }
- return mrb_nil_value();
+ if (mrb_type(a1) == MRB_TT_RANGE) {
+ if (mrb_range_beg_len(mrb, a1, &beg, &len, str_len, TRUE) == MRB_RANGE_OK) {
+ goto subseq;
}
-#ifndef MRB_WITHOUT_FLOAT
- case MRB_TT_FLOAT:
- a1 = mrb_fixnum_value((mrb_int)mrb_float(a1));
- /* fall through */
-#endif
- case MRB_TT_FIXNUM:
- return mrb_str_substr(mrb, str, mrb_fixnum(a1), 1);
- default:
- mrb_raise(mrb, E_TYPE_ERROR, "wrong type of argument");
+ return mrb_nil_value();
+ }
+
+ beg = mrb_fixnum(mrb_to_int(mrb, a1));
+ len = 1;
+ empty = FALSE;
+subseq:
+ if (mrb_str_beg_len(str_len, &beg, &len) && (empty || len != 0)) {
+ return mrb_str_byte_subseq(mrb, str, beg, len);
+ }
+ else {
+ return mrb_nil_value();
}
- /* not reached */
- return mrb_nil_value();
}
/*
diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb
index 02777e594..bf633bcef 100644
--- a/mrbgems/mruby-string-ext/test/string.rb
+++ b/mrbgems/mruby-string-ext/test/string.rb
@@ -26,10 +26,61 @@ end
assert('String#byteslice') do
str1 = "hello"
+ str2 = "\u3042ab" # "\xE3\x81\x82ab"
+
+ assert_equal("h", str1.byteslice(0))
assert_equal("e", str1.byteslice(1))
+ assert_equal(nil, str1.byteslice(5))
assert_equal("o", str1.byteslice(-1))
+ assert_equal(nil, str1.byteslice(-6))
+ assert_equal("\xE3", str2.byteslice(0))
+ assert_equal("\x81", str2.byteslice(1))
+ assert_equal(nil, str2.byteslice(5))
+ assert_equal("b", str2.byteslice(-1))
+ assert_equal(nil, str2.byteslice(-6))
+
+ assert_equal("", str1.byteslice(0, 0))
+ assert_equal(str1, str1.byteslice(0, 6))
+ assert_equal("el", str1.byteslice(1, 2))
+ assert_equal("", str1.byteslice(5, 1))
+ assert_equal("o", str1.byteslice(-1, 6))
+ assert_equal(nil, str1.byteslice(-6, 1))
+ assert_equal(nil, str1.byteslice(0, -1))
+ assert_equal("", str2.byteslice(0, 0))
+ assert_equal(str2, str2.byteslice(0, 6))
+ assert_equal("\x81\x82", str2.byteslice(1, 2))
+ assert_equal("", str2.byteslice(5, 1))
+ assert_equal("b", str2.byteslice(-1, 6))
+ assert_equal(nil, str2.byteslice(-6, 1))
+ assert_equal(nil, str2.byteslice(0, -1))
+
assert_equal("ell", str1.byteslice(1..3))
assert_equal("el", str1.byteslice(1...3))
+ assert_equal("h", str1.byteslice(0..0))
+ assert_equal("", str1.byteslice(5..0))
+ assert_equal("o", str1.byteslice(4..5))
+ assert_equal(nil, str1.byteslice(6..0))
+ assert_equal("", str1.byteslice(-1..0))
+ assert_equal("llo", str1.byteslice(-3..5))
+ assert_equal("\x81\x82a", str2.byteslice(1..3))
+ assert_equal("\x81\x82", str2.byteslice(1...3))
+ assert_equal("\xE3", str2.byteslice(0..0))
+ assert_equal("", str2.byteslice(5..0))
+ assert_equal("b", str2.byteslice(4..5))
+ assert_equal(nil, str2.byteslice(6..0))
+ assert_equal("", str2.byteslice(-1..0))
+ assert_equal("\x82ab", str2.byteslice(-3..5))
+
+ assert_raise(ArgumentError) { str1.byteslice }
+ assert_raise(ArgumentError) { str1.byteslice(1, 2, 3) }
+ assert_raise(TypeError) { str1.byteslice("1") }
+ assert_raise(TypeError) { str1.byteslice("1", 2) }
+ assert_raise(TypeError) { str1.byteslice(1, "2") }
+ assert_raise(TypeError) { str1.byteslice(1..2, 3) }
+
+ skip unless Object.const_defined?(:Float)
+ assert_equal("o", str1.byteslice(4.0))
+ assert_equal("\x82ab", str2.byteslice(2.0, 3.0))
end
assert('String#dump') do
diff --git a/src/string.c b/src/string.c
index 856e908af..7b58705dd 100644
--- a/src/string.c
+++ b/src/string.c
@@ -410,8 +410,8 @@ str_make_shared(mrb_state *mrb, struct RString *orig, struct RString *s)
}
}
-static mrb_value
-byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
+mrb_value
+mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
{
struct RString *orig, *s;
@@ -434,32 +434,33 @@ str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
beg = chars2bytes(str, 0, beg);
len = chars2bytes(str, beg, len);
- return byte_subseq(mrb, str, beg, len);
+ return mrb_str_byte_subseq(mrb, str, beg, len);
}
#else
-#define str_subseq(mrb, str, beg, len) byte_subseq(mrb, str, beg, len)
+#define str_subseq(mrb, str, beg, len) mrb_str_byte_subseq(mrb, str, beg, len)
#endif
-static mrb_value
-str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
+mrb_bool
+mrb_str_beg_len(mrb_int str_len, mrb_int *begp, mrb_int *lenp)
{
- mrb_int clen = RSTRING_CHAR_LEN(str);
-
- if (len < 0) return mrb_nil_value();
- if (clen == 0) {
- len = 0;
+ if (str_len < *begp || *lenp < 0) return FALSE;
+ if (*begp < 0) {
+ *begp += str_len;
+ if (*begp < 0) return FALSE;
}
- if (beg > clen) return mrb_nil_value();
- if (beg < 0) {
- beg += clen;
- if (beg < 0) return mrb_nil_value();
+ if (*lenp > str_len - *begp)
+ *lenp = str_len - *begp;
+ if (*lenp <= 0) {
+ *lenp = 0;
}
- if (len > clen - beg)
- len = clen - beg;
- if (len <= 0) {
- len = 0;
- }
- return str_subseq(mrb, str, beg, len);
+ return TRUE;
+}
+
+static mrb_value
+str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
+{
+ return mrb_str_beg_len(RSTRING_CHAR_LEN(str), &beg, &len) ?
+ str_subseq(mrb, str, beg, len) : mrb_nil_value();
}
MRB_API mrb_int
@@ -1919,7 +1920,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
}
}
else if (ISSPACE(c)) {
- mrb_ary_push(mrb, result, byte_subseq(mrb, str, beg, end-beg));
+ mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, beg, end-beg));
mrb_gc_arena_restore(mrb, ai);
skip = TRUE;
beg = idx;
@@ -1944,7 +1945,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
else {
end = chars2bytes(str, idx, 1);
}
- mrb_ary_push(mrb, result, byte_subseq(mrb, str, idx, end));
+ mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, idx, end));
mrb_gc_arena_restore(mrb, ai);
idx += end + pat_len;
if (lim_p && lim <= ++i) break;
@@ -1956,7 +1957,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
tmp = mrb_str_new_empty(mrb, str);
}
else {
- tmp = byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);
+ tmp = mrb_str_byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);
}
mrb_ary_push(mrb, result, tmp);
}