diff options
| author | Tyge Løvset <[email protected]> | 2022-06-10 08:41:01 +0200 |
|---|---|---|
| committer | Tyge Løvset <[email protected]> | 2022-06-10 08:41:01 +0200 |
| commit | f1d09dfcc7570e69eb6e9688b736f7b031b22b2d (patch) | |
| tree | 461d077630b735851ce901ad90e70e4b299ffe98 | |
| parent | d463acdbee5bb3a9509cb8414602f495408583b4 (diff) | |
| download | STC-modified-f1d09dfcc7570e69eb6e9688b736f7b031b22b2d.tar.gz STC-modified-f1d09dfcc7570e69eb6e9688b736f7b031b22b2d.zip | |
Some changes to utf8 cstr methods.
| -rw-r--r-- | docs/cstr_api.md | 11 | ||||
| -rw-r--r-- | include/stc/cstr.h | 19 | ||||
| -rw-r--r-- | src/utf8code.c | 7 |
3 files changed, 15 insertions, 22 deletions
diff --git a/docs/cstr_api.md b/docs/cstr_api.md index 8dd1b397..cfc807f6 100644 --- a/docs/cstr_api.md +++ b/docs/cstr_api.md @@ -85,11 +85,10 @@ bool cstr_getdelim(cstr *self, int delim, FILE *stream); // does no #### UTF8 methods ```c -size_t cstr_size_u8(cstr s); // number of utf8 codepoints -size_t cstr_size_n_u8(cstr s, size_t nbytes); // utf8 size within n bytes -csview cstr_at(const cstr* self, size_t bytepos); // utf8 codepoints as a csview -csview cstr_at_u8(const cstr* self, size_t u8idx); // utf8 codepoints at utf8 pos -size_t cstr_pos_u8(const cstr* self, size_t u8idx); // byte position at utf8 index +size_t cstr_u8size(cstr s); // number of utf8 codepoints +size_t cstr_u8size_n(cstr s, size_t nbytes); // utf8 size within n bytes +const char* cstr_at(const cstr* self, size_t u8idx); // byte position at utf8 index +csview cstr_view_at(const cstr* self, size_t u8idx); // utf8 codepoint at utf8 pos as csview // iterate utf8 codepoints cstr_iter cstr_begin(const cstr* self); @@ -97,7 +96,7 @@ cstr_iter cstr_end(const cstr* self); void cstr_next(cstr_iter* it); // utf8 functions requires linking with src/utf8code.c symbols: -bool cstr_valid_u8(const cstr* self); // check if str is valid utf8 +bool cstr_valid_utf8(const cstr* self); // check if str is valid utf8 cstr cstr_tolower(const cstr* self); // returns new lowercase utf8 cstr cstr cstr_toupper(const cstr* self); // returns new uppercase utf8 cstr void cstr_lowercase(cstr* self); // transform cstr to lowercase utf8 diff --git a/include/stc/cstr.h b/include/stc/cstr.h index b0fa276f..2dc2ccae 100644 --- a/include/stc/cstr.h +++ b/include/stc/cstr.h @@ -178,33 +178,26 @@ extern void cstr_foldcase(cstr* self); extern void cstr_lowercase(cstr* self); extern void cstr_uppercase(cstr* self); -STC_INLINE bool cstr_valid_u8(const cstr* self) +STC_INLINE bool cstr_valid_utf8(const cstr* self) { return utf8_valid(cstr_str(self)); } // other utf8 -STC_INLINE size_t cstr_size_u8(cstr s) +STC_INLINE size_t cstr_u8size(cstr s) { return utf8_size(cstr_str(&s)); } -STC_INLINE size_t cstr_size_n_u8(cstr s, size_t nbytes) +STC_INLINE size_t cstr_u8size_n(cstr s, size_t nbytes) { return utf8_size_n(cstr_str(&s), nbytes); } -STC_INLINE csview cstr_at(const cstr* self, size_t bytepos) { - csview sv = cstr_sv(self); - sv.str += bytepos; - sv.size = utf8_codep_size(sv.str); - return sv; -} - -STC_INLINE csview cstr_at_u8(const cstr* self, size_t u8idx) { +STC_INLINE csview cstr_view_at(const cstr* self, size_t u8idx) { csview sv = cstr_sv(self); sv.str = utf8_at(sv.str, u8idx); sv.size = utf8_codep_size(sv.str); return sv; } -STC_INLINE size_t cstr_pos_u8(const cstr* self, size_t u8idx) - { return utf8_pos(cstr_str(self), u8idx); } +STC_INLINE const char* cstr_at(const cstr* self, size_t u8idx) + { return utf8_at(cstr_str(self), u8idx); } // utf8 iterator diff --git a/src/utf8code.c b/src/utf8code.c index 1c1e4336..a6ecdb65 100644 --- a/src/utf8code.c +++ b/src/utf8code.c @@ -46,10 +46,11 @@ unsigned utf8_encode(char *out, uint32_t c) } uint32_t utf8_peek(const char* s, int pos) { - int inc = 1; - if (pos < 0) pos = -pos, inc = -1; + int inc = -1; + if (pos > 0) + pos = -pos, inc = 1; while (pos) - pos -= (*(s += inc) & 0xC0) != 0x80; + pos += (*(s += inc) & 0xC0) != 0x80; utf8_decode_t d = {.state=0}; do { utf8_decode(&d, (uint8_t)*s++); } while (d.state); return d.codep; |
