diff options
| author | Tyge Løvset <[email protected]> | 2022-06-10 11:29:17 +0200 |
|---|---|---|
| committer | Tyge Løvset <[email protected]> | 2022-06-10 11:29:17 +0200 |
| commit | 8883fc8108428878d3d6291ba8981cf6df72499c (patch) | |
| tree | 9fbdc79019501714dc984c1fbd5eb2c7ea979bb4 /include | |
| parent | f1d09dfcc7570e69eb6e9688b736f7b031b22b2d (diff) | |
| download | STC-modified-8883fc8108428878d3d6291ba8981cf6df72499c.tar.gz STC-modified-8883fc8108428878d3d6291ba8981cf6df72499c.zip | |
utf8 fixes and improvements. Some api changes.
Diffstat (limited to 'include')
| -rw-r--r-- | include/stc/cstr.h | 17 | ||||
| -rw-r--r-- | include/stc/csview.h | 10 | ||||
| -rw-r--r-- | include/stc/utf8.h | 13 |
3 files changed, 22 insertions, 18 deletions
diff --git a/include/stc/cstr.h b/include/stc/cstr.h index 2dc2ccae..41db1cd3 100644 --- a/include/stc/cstr.h +++ b/include/stc/cstr.h @@ -189,21 +189,24 @@ STC_INLINE size_t cstr_u8size(cstr s) STC_INLINE size_t cstr_u8size_n(cstr s, size_t nbytes) { return utf8_size_n(cstr_str(&s), nbytes); } -STC_INLINE csview cstr_view_at(const cstr* self, size_t u8idx) { +STC_INLINE size_t cstr_bytepos(const cstr* self, size_t u8idx) + { return utf8_pos(cstr_str(self), u8idx); } + +STC_INLINE const char* cstr_at(const cstr* self, size_t u8idx) + { return utf8_at(cstr_str(self), u8idx); } + +STC_INLINE csview cstr_chr(const cstr* self, size_t u8idx) { csview sv = cstr_sv(self); sv.str = utf8_at(sv.str, u8idx); - sv.size = utf8_codep_size(sv.str); + sv.size = utf8_chr_size(sv.str); return sv; } -STC_INLINE const char* cstr_at(const cstr* self, size_t u8idx) - { return utf8_at(cstr_str(self), u8idx); } - // utf8 iterator STC_INLINE cstr_iter cstr_begin(const cstr* self) { const char* str = cstr_str(self); - return c_make(cstr_iter){.chr = {str, utf8_codep_size(str)}}; + return c_make(cstr_iter){.chr = {str, utf8_chr_size(str)}}; } STC_INLINE cstr_iter cstr_end(const cstr* self) { csview sv = cstr_sv(self); @@ -211,7 +214,7 @@ STC_INLINE cstr_iter cstr_end(const cstr* self) { } STC_INLINE void cstr_next(cstr_iter* it) { it->ref += it->chr.size; - it->chr.size = utf8_codep_size(it->ref); + it->chr.size = utf8_chr_size(it->ref); } diff --git a/include/stc/csview.h b/include/stc/csview.h index 6cfd6e82..6d12901b 100644 --- a/include/stc/csview.h +++ b/include/stc/csview.h @@ -81,25 +81,25 @@ STC_INLINE csview csview_slice(csview sv, size_t p1, size_t p2) { /* iterator */ STC_INLINE csview_iter csview_begin(const csview* self) - { return c_make(csview_iter){.chr = {self->str, utf8_codep_size(self->str)}}; } + { return c_make(csview_iter){.chr = {self->str, utf8_chr_size(self->str)}}; } STC_INLINE csview_iter csview_end(const csview* self) { return c_make(csview_iter){self->str + self->size}; } STC_INLINE void csview_next(csview_iter* it) - { it->ref += it->chr.size; it->chr.size = utf8_codep_size(it->ref); } + { it->ref += it->chr.size; it->chr.size = utf8_chr_size(it->ref); } /* utf8 */ -STC_INLINE size_t csview_size_u8(csview sv) +STC_INLINE size_t csview_u8size(csview sv) { return utf8_size_n(sv.str, sv.size); } -STC_INLINE csview csview_substr_u8(csview sv, size_t u8pos, size_t u8len) { +STC_INLINE csview csview_u8substr(csview sv, size_t u8pos, size_t u8len) { sv.str = utf8_at(sv.str, u8pos); sv.size = utf8_pos(sv.str, u8len); return sv; } -STC_INLINE bool csview_valid_u8(csview sv) // depends on src/utf8code.c +STC_INLINE bool csview_valid_utf8(csview sv) // depends on src/utf8code.c { return utf8_valid_n(sv.str, sv.size); } diff --git a/include/stc/utf8.h b/include/stc/utf8.h index b7edd2cb..31ea3aa9 100644 --- a/include/stc/utf8.h +++ b/include/stc/utf8.h @@ -37,7 +37,7 @@ uint32_t utf8_casefold(uint32_t c); uint32_t utf8_tolower(uint32_t c); uint32_t utf8_toupper(uint32_t c); bool utf8_valid(const char* s); -bool utf8_valid_n(const char* s, size_t n); +bool utf8_valid_n(const char* s, size_t nbytes); int utf8_icmp_n(size_t u8max, const char* s1, size_t n1, const char* s2, size_t n2); unsigned utf8_encode(char *out, uint32_t c); @@ -60,7 +60,7 @@ STC_INLINE int utf8_icmp(const char* s1, const char* s2) { } /* number of characters in the utf8 codepoint from s */ -STC_INLINE unsigned utf8_codep_size(const char *s) { +STC_INLINE unsigned utf8_chr_size(const char *s) { unsigned b = (uint8_t)*s; if (b < 0x80) return 1; if (b < 0xC2) return 0; @@ -74,14 +74,15 @@ STC_INLINE unsigned utf8_codep_size(const char *s) { STC_INLINE size_t utf8_size(const char *s) { size_t size = 0; while (*s) - size += (*s++ & 0xC0) != 0x80; + size += (*++s & 0xC0) != 0x80; return size; } -STC_INLINE size_t utf8_size_n(const char *s, size_t n) { +STC_INLINE size_t utf8_size_n(const char *s, size_t nbytes) { size_t size = 0; - while ((n-- != 0) & (*s != 0)) - size += (*s++ & 0xC0) != 0x80; + while ((nbytes-- != 0) & (*s != 0)) { + size += (*++s & 0xC0) != 0x80; + } return size; } |
