From 2688277a4efe011ffd35f0e3ce859bb110207d8e Mon Sep 17 00:00:00 2001 From: Tyge Lovset Date: Mon, 30 May 2022 23:15:53 +0200 Subject: Added utf8_icmp_n() case insensitive comparison. --- include/stc/ccommon.h | 8 ++++---- include/stc/utf8.h | 6 ++++++ 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/stc/ccommon.h b/include/stc/ccommon.h index 9c8ba414..2ebcf7c1 100644 --- a/include/stc/ccommon.h +++ b/include/stc/ccommon.h @@ -120,7 +120,7 @@ typedef const char* crawstr; #define _c_ROTL(x, k) (x << (k) | x >> (8*sizeof(x) - (k))) STC_INLINE uint64_t c_fasthash(const void* key, size_t len) { - const uint8_t *x = (const uint8_t*) key; + const uint8_t *x = (const uint8_t*) key; uint64_t u8, h = 1; size_t n = len >> 3; uint32_t u4; while (n--) { @@ -137,16 +137,16 @@ STC_INLINE uint64_t c_fasthash(const void* key, size_t len) { return _c_ROTL(h, 26) ^ h; } -STC_INLINE uint64_t c_strhash(const char *str) +STC_INLINE uint64_t c_strhash(const char *str) { return c_fasthash(str, strlen(str)); } -STC_INLINE char* c_strnstrn(const char *s, const char *needle, +STC_INLINE char* c_strnstrn(const char *s, const char *needle, size_t slen, const size_t nlen) { if (!nlen) return (char *)s; if (nlen > slen) return NULL; slen -= nlen; do { - if (*s == *needle && !memcmp(s, needle, nlen)) + if (*s == *needle && !memcmp(s, needle, nlen)) return (char *)s; ++s; } while (slen--); diff --git a/include/stc/utf8.h b/include/stc/utf8.h index 67722345..333a0f92 100644 --- a/include/stc/utf8.h +++ b/include/stc/utf8.h @@ -35,8 +35,10 @@ bool utf8_isalpha(uint32_t c); bool utf8_isalnum(uint32_t c); uint32_t utf8_tolower(uint32_t c); uint32_t utf8_toupper(uint32_t c); + bool utf8_valid(const char* s); bool utf8_valid_n(const char* s, size_t n); +int utf8_icmp_n(const char* s1, const char* s2, size_t u8max); /* encode/decode next utf8 codepoint. */ enum { UTF8_OK = 0, UTF8_ERROR = 4 }; @@ -46,6 +48,10 @@ void utf8_peek(utf8_decode_t* d, const char *s); unsigned utf8_encode(char *out, uint32_t c); void utf8_decode(utf8_decode_t *d, const uint8_t b); +/* case-insensitive utf8 string comparison */ +STC_INLINE int utf8_icmp(const char* s1, const char* s2) + { return utf8_icmp_n(s1, s2, (size_t)(-1)); } + /* number of characters in the utf8 codepoint from s */ STC_INLINE unsigned utf8_codep_size(const char *s) { unsigned b = (uint8_t)*s; -- cgit v1.2.3