summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--include/stc/cstr.h2
-rw-r--r--include/stc/csview.h2
-rw-r--r--include/stc/utf8.h5
-rw-r--r--src/utf8code.c19
4 files changed, 13 insertions, 15 deletions
diff --git a/include/stc/cstr.h b/include/stc/cstr.h
index bfc3b51d..0dabc044 100644
--- a/include/stc/cstr.h
+++ b/include/stc/cstr.h
@@ -303,7 +303,7 @@ STC_INLINE bool cstr_starts_with_s(cstr s, cstr sub)
STC_INLINE bool cstr_istarts_with(cstr s, const char* sub) {
csview sv = cstr_sv(&s);
size_t len = strlen(sub);
- return len <= sv.size && !utf8_icmp_n(cstr_npos, sv.str, sv.size, sub, len);
+ return len <= sv.size && !utf8_icmp_sv(cstr_npos, sv, c_sv(sub, len));
}
diff --git a/include/stc/csview.h b/include/stc/csview.h
index 2ebcaabe..cd19aed7 100644
--- a/include/stc/csview.h
+++ b/include/stc/csview.h
@@ -127,7 +127,7 @@ STC_INLINE int csview_cmp(const csview* x, const csview* y)
{ return strcmp(x->str, y->str); }
STC_INLINE int csview_icmp(const csview* x, const csview* y)
- { return utf8_icmp_n(~(size_t)0, x->str, x->size, y->str, y->size); }
+ { return utf8_icmp_sv(csview_npos, *x, *y); }
STC_INLINE bool csview_eq(const csview* x, const csview* y)
{ return x->size == y->size && !memcmp(x->str, y->str, x->size); }
diff --git a/include/stc/utf8.h b/include/stc/utf8.h
index 34368737..4b87bc0b 100644
--- a/include/stc/utf8.h
+++ b/include/stc/utf8.h
@@ -16,8 +16,7 @@ extern uint32_t utf8_casefold(uint32_t c);
extern uint32_t utf8_tolower(uint32_t c);
extern uint32_t utf8_toupper(uint32_t c);
extern bool utf8_valid_n(const char* s, size_t nbytes);
-extern int utf8_icmp_n(size_t u8max, const char* s1, size_t n1,
- const char* s2, size_t n2);
+extern int utf8_icmp_sv(size_t u8max, csview s1, csview s2);
extern unsigned utf8_encode(char *out, uint32_t c);
extern uint32_t utf8_peek(const char *s, int u8pos);
@@ -36,7 +35,7 @@ STC_INLINE uint32_t utf8_decode(utf8_decode_t* d, const uint32_t byte) {
/* case-insensitive utf8 string comparison */
STC_INLINE int utf8_icmp(const char* s1, const char* s2) {
- return utf8_icmp_n(~(size_t)0, s1, ~(size_t)0, s2, ~(size_t)0);
+ return utf8_icmp_sv(~(size_t)0, c_sv(s1, ~(size_t)0), c_sv(s2, ~(size_t)0));
}
STC_INLINE bool utf8_valid(const char* s) {
diff --git a/src/utf8code.c b/src/utf8code.c
index 394597e1..28c101fa 100644
--- a/src/utf8code.c
+++ b/src/utf8code.c
@@ -108,24 +108,23 @@ int utf8_icmp(const char* s1, const char* s2) {
for (;;) {
do { utf8_decode(&d1, (uint8_t)*s1++); } while (d1.state);
do { utf8_decode(&d2, (uint8_t)*s2++); } while (d2.state);
- int c = utf8_casefold(d1.codep) - utf8_casefold(d2.codep);
- if (c || !*s2)
+ int32_t c = utf8_casefold(d1.codep) - utf8_casefold(d2.codep);
+ if (c || !s2[-1])
return c;
}
}
*/
-int utf8_icmp_n(size_t u8max, const char* s1, const size_t n1,
- const char* s2, const size_t n2) {
+int utf8_icmp_sv(size_t u8max, const csview s1, const csview s2) {
utf8_decode_t d1 = {.state=0}, d2 = {.state=0};
size_t j1 = 0, j2 = 0;
- while (u8max-- && ((j1 < n1) & (j2 < n2))) {
- do { utf8_decode(&d1, (uint8_t)s1[j1++]); } while (d1.state);
- do { utf8_decode(&d2, (uint8_t)s2[j2++]); } while (d2.state);
- int c = utf8_casefold(d1.codep) - utf8_casefold(d2.codep);
- if (c || !s2[j2])
+ while (u8max-- && ((j1 < s1.size) & (j2 < s2.size))) {
+ do { utf8_decode(&d1, (uint8_t)s1.str[j1++]); } while (d1.state);
+ do { utf8_decode(&d2, (uint8_t)s2.str[j2++]); } while (d2.state);
+ int32_t c = utf8_casefold(d1.codep) - utf8_casefold(d2.codep);
+ if (c || !s2.str[j2 - 1]) // OK if s1.size / s2.size are npos
return c;
}
- return (j2 < n2) - (j1 < n1);
+ return (j2 < s2.size) - (j1 < s1.size);
}
bool utf8_isupper(uint32_t c) {