summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorYukihiro "Matz" Matsumoto <[email protected]>2016-01-05 18:05:25 +0900
committerYukihiro "Matz" Matsumoto <[email protected]>2016-01-05 18:05:25 +0900
commit9145aed85ef4458846f6412585f86d1ea0eb150f (patch)
treee38598317c7d2ae643c077ab8b11a5549cba7001
parentaec825a64c4618ee16a0243c933556648398bb76 (diff)
downloadmruby-9145aed85ef4458846f6412585f86d1ea0eb150f.tar.gz
mruby-9145aed85ef4458846f6412585f86d1ea0eb150f.zip
bytes2chars() conversion to fail if target byte offset is not on the character boundary; ref #3067
that means String#index matches first byte of a multi-byte character. this behavior is different from CRuby, but a compromise for mruby which does not have encoding stuffs.
-rw-r--r--src/string.c7
1 files changed, 6 insertions, 1 deletions
diff --git a/src/string.c b/src/string.c
index 0d31a304f..a3f337d4b 100644
--- a/src/string.c
+++ b/src/string.c
@@ -306,17 +306,20 @@ bytes2chars(char *p, mrb_int bi)
mrb_int i, b, n;
for (b=i=0; b<bi; i++) {
- n = utf8len(p, p+bi);
+ n = utf8len_codepage[(unsigned char)*p];
b += n;
p += n;
}
+ if (b != bi) return -1;
return i;
}
+#define BYTES_ALIGN_CHECK(pos) if (pos < 0) return mrb_nil_value();
#else
#define RSTRING_CHAR_LEN(s) RSTRING_LEN(s)
#define chars2bytes(p, off, ci) (ci)
#define bytes2chars(p, bi) (bi)
+#define BYTES_ALIGN_CHECK(pos)
#endif
static inline mrb_int
@@ -1608,6 +1611,7 @@ mrb_str_index(mrb_state *mrb, mrb_value str)
if (pos == -1) return mrb_nil_value();
pos = bytes2chars(RSTRING_PTR(str), pos);
+ BYTES_ALIGN_CHECK(pos);
return mrb_fixnum_value(pos);
}
@@ -1877,6 +1881,7 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str)
pos = str_rindex(mrb, str, sub, pos);
if (pos >= 0) {
pos = bytes2chars(RSTRING_PTR(str), pos);
+ BYTES_ALIGN_CHECK(pos);
return mrb_fixnum_value(pos);
}
break;