diff options
| author | Yukihiro "Matz" Matsumoto <[email protected]> | 2021-01-09 19:08:50 +0900 |
|---|---|---|
| committer | Yukihiro "Matz" Matsumoto <[email protected]> | 2021-01-09 19:09:57 +0900 |
| commit | f81591ceb6d8d2f82b3b83b9d9faaa06dd796740 (patch) | |
| tree | 98ca8b7ea9b93a985afe7ff897eba546e44605ac | |
| parent | 62e5247300dcdca08fd1023f5bccc23427063e5f (diff) | |
| download | mruby-f81591ceb6d8d2f82b3b83b9d9faaa06dd796740.tar.gz mruby-f81591ceb6d8d2f82b3b83b9d9faaa06dd796740.zip | |
Detect invalid first byte of UTF-8 char; fix #5269
The first byte of UTF-8 character should not be `80..c1`.
| -rw-r--r-- | src/string.c | 12 |
1 files changed, 7 insertions, 5 deletions
diff --git a/src/string.c b/src/string.c index 946dc8be1..83401b3db 100644 --- a/src/string.c +++ b/src/string.c @@ -284,10 +284,12 @@ static const char utf8len_codepage[256] = 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1, }; +#define utf8_islead(c) ((unsigned char)((c)&0xc0) != 0x80) + mrb_int mrb_utf8len(const char* p, const char* e) { @@ -299,7 +301,7 @@ mrb_utf8len(const char* p, const char* e) if (len == 1) return 1; if (len > e - p) return 1; for (i = 1; i < len; ++i) - if ((p[i] & 0xc0) != 0x80) + if (utf8_islead(p[i])) return 1; return len; } @@ -307,15 +309,15 @@ mrb_utf8len(const char* p, const char* e) mrb_int mrb_utf8_strlen(const char *str, mrb_int byte_len) { - mrb_int total = 0; + mrb_int len = 0; const char *p = str; const char *e = p + byte_len; while (p < e) { p += mrb_utf8len(p, e); - total++; + len++; } - return total; + return len; } static mrb_int |
