diff options
| -rw-r--r-- | include/stc/cstr.h | 2 | ||||
| -rw-r--r-- | include/stc/csview.h | 2 | ||||
| -rw-r--r-- | include/stc/utf8.h | 5 | ||||
| -rw-r--r-- | src/utf8code.c | 19 |
4 files changed, 13 insertions, 15 deletions
diff --git a/include/stc/cstr.h b/include/stc/cstr.h index bfc3b51d..0dabc044 100644 --- a/include/stc/cstr.h +++ b/include/stc/cstr.h @@ -303,7 +303,7 @@ STC_INLINE bool cstr_starts_with_s(cstr s, cstr sub) STC_INLINE bool cstr_istarts_with(cstr s, const char* sub) { csview sv = cstr_sv(&s); size_t len = strlen(sub); - return len <= sv.size && !utf8_icmp_n(cstr_npos, sv.str, sv.size, sub, len); + return len <= sv.size && !utf8_icmp_sv(cstr_npos, sv, c_sv(sub, len)); } diff --git a/include/stc/csview.h b/include/stc/csview.h index 2ebcaabe..cd19aed7 100644 --- a/include/stc/csview.h +++ b/include/stc/csview.h @@ -127,7 +127,7 @@ STC_INLINE int csview_cmp(const csview* x, const csview* y) { return strcmp(x->str, y->str); } STC_INLINE int csview_icmp(const csview* x, const csview* y) - { return utf8_icmp_n(~(size_t)0, x->str, x->size, y->str, y->size); } + { return utf8_icmp_sv(csview_npos, *x, *y); } STC_INLINE bool csview_eq(const csview* x, const csview* y) { return x->size == y->size && !memcmp(x->str, y->str, x->size); } diff --git a/include/stc/utf8.h b/include/stc/utf8.h index 34368737..4b87bc0b 100644 --- a/include/stc/utf8.h +++ b/include/stc/utf8.h @@ -16,8 +16,7 @@ extern uint32_t utf8_casefold(uint32_t c); extern uint32_t utf8_tolower(uint32_t c); extern uint32_t utf8_toupper(uint32_t c); extern bool utf8_valid_n(const char* s, size_t nbytes); -extern int utf8_icmp_n(size_t u8max, const char* s1, size_t n1, - const char* s2, size_t n2); +extern int utf8_icmp_sv(size_t u8max, csview s1, csview s2); extern unsigned utf8_encode(char *out, uint32_t c); extern uint32_t utf8_peek(const char *s, int u8pos); @@ -36,7 +35,7 @@ STC_INLINE uint32_t utf8_decode(utf8_decode_t* d, const uint32_t byte) { /* case-insensitive utf8 string comparison */ STC_INLINE int utf8_icmp(const char* s1, const char* s2) { - return utf8_icmp_n(~(size_t)0, s1, ~(size_t)0, s2, ~(size_t)0); + return utf8_icmp_sv(~(size_t)0, c_sv(s1, ~(size_t)0), c_sv(s2, ~(size_t)0)); } STC_INLINE bool utf8_valid(const char* s) { diff --git a/src/utf8code.c b/src/utf8code.c index 394597e1..28c101fa 100644 --- a/src/utf8code.c +++ b/src/utf8code.c @@ -108,24 +108,23 @@ int utf8_icmp(const char* s1, const char* s2) { for (;;) { do { utf8_decode(&d1, (uint8_t)*s1++); } while (d1.state); do { utf8_decode(&d2, (uint8_t)*s2++); } while (d2.state); - int c = utf8_casefold(d1.codep) - utf8_casefold(d2.codep); - if (c || !*s2) + int32_t c = utf8_casefold(d1.codep) - utf8_casefold(d2.codep); + if (c || !s2[-1]) return c; } } */ -int utf8_icmp_n(size_t u8max, const char* s1, const size_t n1, - const char* s2, const size_t n2) { +int utf8_icmp_sv(size_t u8max, const csview s1, const csview s2) { utf8_decode_t d1 = {.state=0}, d2 = {.state=0}; size_t j1 = 0, j2 = 0; - while (u8max-- && ((j1 < n1) & (j2 < n2))) { - do { utf8_decode(&d1, (uint8_t)s1[j1++]); } while (d1.state); - do { utf8_decode(&d2, (uint8_t)s2[j2++]); } while (d2.state); - int c = utf8_casefold(d1.codep) - utf8_casefold(d2.codep); - if (c || !s2[j2]) + while (u8max-- && ((j1 < s1.size) & (j2 < s2.size))) { + do { utf8_decode(&d1, (uint8_t)s1.str[j1++]); } while (d1.state); + do { utf8_decode(&d2, (uint8_t)s2.str[j2++]); } while (d2.state); + int32_t c = utf8_casefold(d1.codep) - utf8_casefold(d2.codep); + if (c || !s2.str[j2 - 1]) // OK if s1.size / s2.size are npos return c; } - return (j2 < n2) - (j1 < n1); + return (j2 < s2.size) - (j1 < s1.size); } bool utf8_isupper(uint32_t c) { |
