diff options
| author | Tyge Lovset <[email protected]> | 2022-05-31 04:54:03 +0200 |
|---|---|---|
| committer | Tyge Lovset <[email protected]> | 2022-05-31 04:54:03 +0200 |
| commit | b09f435cf7fe7a8b4d976b921384c6176381cc7f (patch) | |
| tree | 7df61684e10afcfc433e7d7120faa000f8b0d400 | |
| parent | 2688277a4efe011ffd35f0e3ce859bb110207d8e (diff) | |
| download | STC-modified-b09f435cf7fe7a8b4d976b921384c6176381cc7f.tar.gz STC-modified-b09f435cf7fe7a8b4d976b921384c6176381cc7f.zip | |
Generalized utf8_icmp_n() and added cstr_icmp(), csview_icmp().
| -rw-r--r-- | include/stc/cstr.h | 5 | ||||
| -rw-r--r-- | include/stc/csview.h | 3 | ||||
| -rw-r--r-- | include/stc/utf8.h | 5 | ||||
| -rw-r--r-- | src/utf8code.c | 16 |
4 files changed, 20 insertions, 9 deletions
diff --git a/include/stc/cstr.h b/include/stc/cstr.h index 273ad70b..7bbd9f3b 100644 --- a/include/stc/cstr.h +++ b/include/stc/cstr.h @@ -240,6 +240,11 @@ STC_INLINE char* cstr_expand_uninit(cstr *self, size_t n) { STC_INLINE int cstr_cmp(const cstr* s1, const cstr* s2)
{ return strcmp(cstr_str(s1), cstr_str(s2)); }
+STC_INLINE int cstr_icmp(const cstr* s1, const cstr* s2) {
+ csview x = cstr_sv(s1), y = cstr_sv(s2);
+ return utf8_icmp_n(~(size_t)0, x.str, x.size, y.str, y.size);
+}
+
STC_INLINE bool cstr_eq(const cstr* s1, const cstr* s2) {
csview x = cstr_sv(s1), y = cstr_sv(s2);
return x.size == y.size && !memcmp(x.str, y.str, x.size);
diff --git a/include/stc/csview.h b/include/stc/csview.h index eebcb9b7..8508db4f 100644 --- a/include/stc/csview.h +++ b/include/stc/csview.h @@ -165,6 +165,9 @@ STC_INLINE bool cstr_ends_with_sv(cstr s, csview sub) { STC_INLINE int csview_cmp(const csview* x, const csview* y)
{ return strcmp(x->str, y->str); }
+STC_INLINE int csview_icmp(const csview* x, const csview* y)
+ { return utf8_icmp_n(~(size_t)0, x->str, x->size, y->str, y->size); }
+
STC_INLINE bool csview_eq(const csview* x, const csview* y)
{ return x->size == y->size && !memcmp(x->str, y->str, x->size); }
diff --git a/include/stc/utf8.h b/include/stc/utf8.h index 333a0f92..a3647e3c 100644 --- a/include/stc/utf8.h +++ b/include/stc/utf8.h @@ -38,7 +38,8 @@ uint32_t utf8_toupper(uint32_t c); bool utf8_valid(const char* s);
bool utf8_valid_n(const char* s, size_t n);
-int utf8_icmp_n(const char* s1, const char* s2, size_t u8max);
+int utf8_icmp_n(size_t u8max, const char* s1, size_t n1,
+ const char* s2, size_t n2);
/* encode/decode next utf8 codepoint. */
enum { UTF8_OK = 0, UTF8_ERROR = 4 };
@@ -50,7 +51,7 @@ void utf8_decode(utf8_decode_t *d, const uint8_t b); /* case-insensitive utf8 string comparison */
STC_INLINE int utf8_icmp(const char* s1, const char* s2)
- { return utf8_icmp_n(s1, s2, (size_t)(-1)); }
+ { return utf8_icmp_n(~(size_t)0, s1, ~(size_t)0, s2, ~(size_t)0); }
/* number of characters in the utf8 codepoint from s */
STC_INLINE unsigned utf8_codep_size(const char *s) {
diff --git a/src/utf8code.c b/src/utf8code.c index cef3efca..2f429541 100644 --- a/src/utf8code.c +++ b/src/utf8code.c @@ -100,17 +100,19 @@ uint32_t utf8_toupper(uint32_t c) { return c; } -int utf8_icmp_n(const char* s1, const char* s2, size_t u8max) { +int utf8_icmp_n(size_t u8max, const char* s1, const size_t n1, + const char* s2, const size_t n2) { int ret = 0; utf8_decode_t d1 = {UTF8_OK}, d2 = {UTF8_OK}; - for (; u8max--; s1 += d1.size, s2 += d2.size) { - utf8_peek(&d1, s1); - utf8_peek(&d2, s2); + size_t j1 = 0, j2 = 0; + for (; u8max-- && ((j1 < n1) & (j2 < n2)); j1 += d1.size, j2 += d2.size) { + utf8_peek(&d1, s1+j1); + utf8_peek(&d2, s2+j2); ret = utf8_tolower(d1.codep) - utf8_tolower(d2.codep); - if (ret || !*s2) - break; + if (ret || !s2[j2]) + return ret; } - return ret; + return (j2 < n2) - (j1 < n1); } bool utf8_isupper(uint32_t c) { |
