diff options
| author | Yukihiro "Matz" Matsumoto <[email protected]> | 2016-01-05 18:05:25 +0900 |
|---|---|---|
| committer | Yukihiro "Matz" Matsumoto <[email protected]> | 2016-01-05 18:05:25 +0900 |
| commit | 9145aed85ef4458846f6412585f86d1ea0eb150f (patch) | |
| tree | e38598317c7d2ae643c077ab8b11a5549cba7001 /src | |
| parent | aec825a64c4618ee16a0243c933556648398bb76 (diff) | |
| download | mruby-9145aed85ef4458846f6412585f86d1ea0eb150f.tar.gz mruby-9145aed85ef4458846f6412585f86d1ea0eb150f.zip | |
bytes2chars() conversion to fail if target byte offset is not on the character boundary; ref #3067
that means String#index matches first byte of a multi-byte character. this behavior is different
from CRuby, but a compromise for mruby which does not have encoding stuffs.
Diffstat (limited to 'src')
| -rw-r--r-- | src/string.c | 7 |
1 files changed, 6 insertions, 1 deletions
diff --git a/src/string.c b/src/string.c index 0d31a304f..a3f337d4b 100644 --- a/src/string.c +++ b/src/string.c @@ -306,17 +306,20 @@ bytes2chars(char *p, mrb_int bi) mrb_int i, b, n; for (b=i=0; b<bi; i++) { - n = utf8len(p, p+bi); + n = utf8len_codepage[(unsigned char)*p]; b += n; p += n; } + if (b != bi) return -1; return i; } +#define BYTES_ALIGN_CHECK(pos) if (pos < 0) return mrb_nil_value(); #else #define RSTRING_CHAR_LEN(s) RSTRING_LEN(s) #define chars2bytes(p, off, ci) (ci) #define bytes2chars(p, bi) (bi) +#define BYTES_ALIGN_CHECK(pos) #endif static inline mrb_int @@ -1608,6 +1611,7 @@ mrb_str_index(mrb_state *mrb, mrb_value str) if (pos == -1) return mrb_nil_value(); pos = bytes2chars(RSTRING_PTR(str), pos); + BYTES_ALIGN_CHECK(pos); return mrb_fixnum_value(pos); } @@ -1877,6 +1881,7 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str) pos = str_rindex(mrb, str, sub, pos); if (pos >= 0) { pos = bytes2chars(RSTRING_PTR(str), pos); + BYTES_ALIGN_CHECK(pos); return mrb_fixnum_value(pos); } break; |
