diff options
| author | Tyge Løvset <[email protected]> | 2022-09-22 13:04:58 +0200 |
|---|---|---|
| committer | Tyge Løvset <[email protected]> | 2022-09-22 13:04:58 +0200 |
| commit | 4c5f0ba23ba6294e144243555ed3c02a72259692 (patch) | |
| tree | 380b249e1c36b83b65507da5a2af4f843252a474 /include/stc/utf8.h | |
| parent | 61c222312bb1cc03ab987fdbc541039723c9e243 (diff) | |
| download | STC-modified-4c5f0ba23ba6294e144243555ed3c02a72259692.tar.gz STC-modified-4c5f0ba23ba6294e144243555ed3c02a72259692.zip | |
Updated _advance() iter methods. Some have signed offsets.
utf8_peek()/utf8_peek_off() added/changed.
Diffstat (limited to 'include/stc/utf8.h')
| -rw-r--r-- | include/stc/utf8.h | 12 |
1 files changed, 9 insertions, 3 deletions
diff --git a/include/stc/utf8.h b/include/stc/utf8.h index c20b80cb..7a4ba921 100644 --- a/include/stc/utf8.h +++ b/include/stc/utf8.h @@ -17,13 +17,13 @@ extern uint32_t utf8_toupper(uint32_t c); extern bool utf8_valid_n(const char* s, size_t nbytes); extern int utf8_icmp_sv(csview s1, csview s2); extern unsigned utf8_encode(char *out, uint32_t c); -extern uint32_t utf8_peek(const char *s, int u8pos); +extern uint32_t utf8_peek_off(const char *s, int offset); + +/* following functions uses src/utf8code.c */ STC_INLINE bool utf8_isupper(uint32_t c) { return utf8_tolower(c) != c; } STC_INLINE bool utf8_islower(uint32_t c) { return utf8_toupper(c) != c; } -/* following functions uses src/utf8code.c */ - /* decode next utf8 codepoint. https://bjoern.hoehrmann.de/utf-8/decoder/dfa */ typedef struct { uint32_t state, codep; } utf8_decode_t; @@ -35,6 +35,12 @@ STC_INLINE uint32_t utf8_decode(utf8_decode_t* d, const uint32_t byte) { return d->state = utf8_dtab[256 + d->state + type]; } +STC_INLINE uint32_t utf8_peek(const char* s) { + utf8_decode_t d = {.state=0}; + do { utf8_decode(&d, (uint8_t)*s++); } while (d.state); + return d.codep; +} + /* case-insensitive utf8 string comparison */ STC_INLINE int utf8_icmp(const char* s1, const char* s2) { return utf8_icmp_sv(c_sv(s1, ~(size_t)0), c_sv(s2, ~(size_t)0)); |
