diff options
| author | KOBAYASHI Shuji <[email protected]> | 2019-02-04 18:11:22 +0900 |
|---|---|---|
| committer | KOBAYASHI Shuji <[email protected]> | 2019-02-04 18:11:22 +0900 |
| commit | 69fd1a592560d321061790c94f93532db93dccb9 (patch) | |
| tree | b7a94cb0998d7cdb6739dc550bf26ad1fe3e6513 /src | |
| parent | 3f7137feaeb3a86df01ae080b4d8826250f4ef29 (diff) | |
| download | mruby-69fd1a592560d321061790c94f93532db93dccb9.tar.gz mruby-69fd1a592560d321061790c94f93532db93dccb9.zip | |
Fix `Symbol#size` for multi-byte characters with `MRB_UTF8_STRING`
Before:
p :あ.size #=> 3
After:
p :あ.size #=> 1
Diffstat (limited to 'src')
| -rw-r--r-- | src/string.c | 35 |
1 files changed, 22 insertions, 13 deletions
diff --git a/src/string.c b/src/string.c index 52b869eca..148e2fee2 100644 --- a/src/string.c +++ b/src/string.c @@ -238,27 +238,36 @@ utf8len(const char* p, const char* e) return len; } -static mrb_int -utf8_strlen(mrb_value str, mrb_int len) +mrb_int +mrb_utf8_len(const char *str, mrb_int byte_len) { mrb_int total = 0; - char* p = RSTRING_PTR(str); - char* e = p; - if (RSTRING(str)->flags & MRB_STR_NO_UTF) { - return RSTRING_LEN(str); - } - e += len < 0 ? RSTRING_LEN(str) : len; - while (p<e) { + const char *p = str; + const char *e = p + byte_len; + + while (p < e) { p += utf8len(p, e); total++; } - if (RSTRING_LEN(str) == total) { - RSTRING(str)->flags |= MRB_STR_NO_UTF; - } return total; } -#define RSTRING_CHAR_LEN(s) utf8_strlen(s, -1) +static mrb_int +utf8_strlen(mrb_value str) +{ + mrb_int byte_len = RSTRING_LEN(str); + + if (RSTRING(str)->flags & MRB_STR_NO_UTF) { + return byte_len; + } + else { + mrb_int utf8_len = mrb_utf8_len(RSTRING_PTR(str), byte_len); + if (byte_len == utf8_len) RSTRING(str)->flags |= MRB_STR_NO_UTF; + return utf8_len; + } +} + +#define RSTRING_CHAR_LEN(s) utf8_strlen(s) /* map character index to byte offset index */ static mrb_int |
