summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorYukihiro "Matz" Matsumoto <[email protected]>2021-01-09 19:08:50 +0900
committerYukihiro "Matz" Matsumoto <[email protected]>2021-01-09 19:09:57 +0900
commitf81591ceb6d8d2f82b3b83b9d9faaa06dd796740 (patch)
tree98ca8b7ea9b93a985afe7ff897eba546e44605ac
parent62e5247300dcdca08fd1023f5bccc23427063e5f (diff)
downloadmruby-f81591ceb6d8d2f82b3b83b9d9faaa06dd796740.tar.gz
mruby-f81591ceb6d8d2f82b3b83b9d9faaa06dd796740.zip
Detect invalid first byte of UTF-8 char; fix #5269
The first byte of UTF-8 character should not be `80..c1`.
-rw-r--r--src/string.c12
1 files changed, 7 insertions, 5 deletions
diff --git a/src/string.c b/src/string.c
index 946dc8be1..83401b3db 100644
--- a/src/string.c
+++ b/src/string.c
@@ -284,10 +284,12 @@ static const char utf8len_codepage[256] =
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1,
};
+#define utf8_islead(c) ((unsigned char)((c)&0xc0) != 0x80)
+
mrb_int
mrb_utf8len(const char* p, const char* e)
{
@@ -299,7 +301,7 @@ mrb_utf8len(const char* p, const char* e)
if (len == 1) return 1;
if (len > e - p) return 1;
for (i = 1; i < len; ++i)
- if ((p[i] & 0xc0) != 0x80)
+ if (utf8_islead(p[i]))
return 1;
return len;
}
@@ -307,15 +309,15 @@ mrb_utf8len(const char* p, const char* e)
mrb_int
mrb_utf8_strlen(const char *str, mrb_int byte_len)
{
- mrb_int total = 0;
+ mrb_int len = 0;
const char *p = str;
const char *e = p + byte_len;
while (p < e) {
p += mrb_utf8len(p, e);
- total++;
+ len++;
}
- return total;
+ return len;
}
static mrb_int