summaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorKOBAYASHI Shuji <[email protected]>2019-02-04 18:11:22 +0900
committerKOBAYASHI Shuji <[email protected]>2019-02-04 18:11:22 +0900
commit69fd1a592560d321061790c94f93532db93dccb9 (patch)
treeb7a94cb0998d7cdb6739dc550bf26ad1fe3e6513 /src
parent3f7137feaeb3a86df01ae080b4d8826250f4ef29 (diff)
downloadmruby-69fd1a592560d321061790c94f93532db93dccb9.tar.gz
mruby-69fd1a592560d321061790c94f93532db93dccb9.zip
Fix `Symbol#size` for multi-byte characters with `MRB_UTF8_STRING`
Before: p :あ.size #=> 3 After: p :あ.size #=> 1
Diffstat (limited to 'src')
-rw-r--r--src/string.c35
1 files changed, 22 insertions, 13 deletions
diff --git a/src/string.c b/src/string.c
index 52b869eca..148e2fee2 100644
--- a/src/string.c
+++ b/src/string.c
@@ -238,27 +238,36 @@ utf8len(const char* p, const char* e)
return len;
}
-static mrb_int
-utf8_strlen(mrb_value str, mrb_int len)
+mrb_int
+mrb_utf8_len(const char *str, mrb_int byte_len)
{
mrb_int total = 0;
- char* p = RSTRING_PTR(str);
- char* e = p;
- if (RSTRING(str)->flags & MRB_STR_NO_UTF) {
- return RSTRING_LEN(str);
- }
- e += len < 0 ? RSTRING_LEN(str) : len;
- while (p<e) {
+ const char *p = str;
+ const char *e = p + byte_len;
+
+ while (p < e) {
p += utf8len(p, e);
total++;
}
- if (RSTRING_LEN(str) == total) {
- RSTRING(str)->flags |= MRB_STR_NO_UTF;
- }
return total;
}
-#define RSTRING_CHAR_LEN(s) utf8_strlen(s, -1)
+static mrb_int
+utf8_strlen(mrb_value str)
+{
+ mrb_int byte_len = RSTRING_LEN(str);
+
+ if (RSTRING(str)->flags & MRB_STR_NO_UTF) {
+ return byte_len;
+ }
+ else {
+ mrb_int utf8_len = mrb_utf8_len(RSTRING_PTR(str), byte_len);
+ if (byte_len == utf8_len) RSTRING(str)->flags |= MRB_STR_NO_UTF;
+ return utf8_len;
+ }
+}
+
+#define RSTRING_CHAR_LEN(s) utf8_strlen(s)
/* map character index to byte offset index */
static mrb_int