From 379c01f19e186dbd9889bc9dd43d6d867a2a33be Mon Sep 17 00:00:00 2001 From: Tyge Løvset Date: Fri, 14 Jan 2022 08:16:52 +0100 Subject: Removed cstr iter; => csview iter is now utf8 iter. See utf8.h example. --- include/stc/cstr.h | 6 ------ include/stc/csview.h | 7 ++++--- include/stc/utf8.h | 34 +++++++++++++++++++--------------- 3 files changed, 23 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/stc/cstr.h b/include/stc/cstr.h index 7ba20d10..04592b69 100644 --- a/include/stc/cstr.h +++ b/include/stc/cstr.h @@ -31,7 +31,6 @@ #include typedef struct cstr { char* str; } cstr; -typedef struct cstr_iter { char *ref; } cstr_iter; typedef char cstr_value; #define cstr_npos (SIZE_MAX >> 1) @@ -110,11 +109,6 @@ STC_INLINE void cstr_erase(cstr* self, const size_t pos) STC_INLINE char* cstr_front(cstr* self) { return self->str; } STC_INLINE char* cstr_back(cstr* self) { return self->str + _cstr_rep(self)->size - 1; } -STC_INLINE cstr_iter cstr_begin(cstr* self) - { return c_make(cstr_iter){self->str}; } -STC_INLINE cstr_iter cstr_end(cstr* self) - { return c_make(cstr_iter){self->str + _cstr_rep(self)->size}; } -STC_INLINE void cstr_next(cstr_iter* it) {++it->ref; } STC_INLINE bool cstr_equals(cstr s, const char* str) { return strcmp(s.str, str) == 0; } STC_INLINE bool cstr_equals_s(cstr s1, cstr s2) diff --git a/include/stc/csview.h b/include/stc/csview.h index 58a7f4f1..d8bb567e 100644 --- a/include/stc/csview.h +++ b/include/stc/csview.h @@ -27,7 +27,7 @@ #include "utf8.h" typedef struct csview { const char* str; size_t size; } csview; -typedef struct csview_iter { const char *ref; } csview_iter; +typedef union csview_iter { const char *ref; csview cp; } csview_iter; typedef char csview_value; #define csview_null c_make(csview){"", 0} @@ -69,10 +69,11 @@ STC_INLINE bool csview_ends_with(csview sv, csview sub) { if (sub.size > sv.size) return false; return !memcmp(sv.str + sv.size - sub.size, sub.str, sub.size); } STC_INLINE csview_iter csview_begin(const csview* self) - { return c_make(csview_iter){self->str}; } + { return c_make(csview_iter){.cp = {self->str, utf8_codepoint_size(*self->str)}}; } STC_INLINE csview_iter csview_end(const csview* self) { return c_make(csview_iter){self->str + self->size}; } -STC_INLINE void csview_next(csview_iter* it) { ++it->ref; } +STC_INLINE void csview_next(csview_iter* it) + { it->ref += it->cp.size; it->cp.size = utf8_codepoint_size(*it->ref); } /* utf8 */ STC_INLINE bool csview_valid_utf8(csview sv) diff --git a/include/stc/utf8.h b/include/stc/utf8.h index 9a65906d..a064b906 100644 --- a/include/stc/utf8.h +++ b/include/stc/utf8.h @@ -8,13 +8,18 @@ int main() { c_auto (cstr, s1) { - s1 = cstr_new("hell😀 world"); - cstr_replace_v(&s1, utf8_substr(s1.str, 4, 1), c_sv("x")); + s1 = cstr_new("hell😀 w😀rld"); + cstr_replace_v(&s1, utf8_substr(s1.str, 7, 1), c_sv("x")); printf("%s\n", s1.str); + + csview sv = csview_from_s(s1); + c_foreach (i, csview, sv) + printf(c_PRIsv ",", c_ARGsv(i.cp)); } } // Output: -// Hellx world +// hell😀 wxrld +// h,e,l,l,😀, ,w,x,r,l,d, */ #include "ccommon.h" #include @@ -32,20 +37,21 @@ STC_API const char* utf8_at(const char *s, size_t index); /* decode next utf8 codepoint. */ STC_API uint32_t utf8_decode(uint32_t *state, uint32_t *codep, const uint32_t byte); -STC_INLINE bool utf8_valid(const char* str) - { return utf8_size(str) != SIZE_MAX; } +STC_INLINE size_t utf8_pos(const char* s, size_t index) + { return utf8_at(s, index) - s; } -STC_INLINE uint32_t utf8_peek(const char *s) -{ +STC_INLINE bool utf8_valid(const char* s) + { return utf8_size(s) != SIZE_MAX; } + +STC_INLINE uint32_t utf8_peek(const char *s) { uint32_t state = 0, codepoint; - utf8_decode(&state, &codepoint, (uint8_t)s[0]); + utf8_decode(&state, &codepoint, (uint8_t)*s); return codepoint; } -STC_INLINE int utf8_codepoint_size(char c) -{ +STC_INLINE size_t utf8_codepoint_size(char c) { uint8_t u = (uint8_t)c; - int ret = (u & 0xF0) == 0xE0; + size_t ret = (u & 0xF0) == 0xE0; ret += ret << 1; // 3 ret |= u < 0x80; // 1 ret |= ((0xC1 < u) & (u < 0xE0)) << 1; // 2 @@ -53,9 +59,8 @@ STC_INLINE int utf8_codepoint_size(char c) return ret; } -STC_INLINE const char *utf8_next(const char *s) -{ - const char* t = s + utf8_codepoint_size(s[0]); +STC_INLINE const char *utf8_next(const char *s) { + const char* t = s + utf8_codepoint_size(*s); uintptr_t p = (uintptr_t)t; p &= (uintptr_t) -(*s != 0); @@ -128,7 +133,6 @@ STC_DEF const char* utf8_at(const char *s, size_t index) return s; } - #endif #endif #undef i_opt -- cgit v1.2.3