From 4c5f0ba23ba6294e144243555ed3c02a72259692 Mon Sep 17 00:00:00 2001 From: Tyge Løvset Date: Thu, 22 Sep 2022 13:04:58 +0200 Subject: Updated _advance() iter methods. Some have signed offsets. utf8_peek()/utf8_peek_off() added/changed. --- include/stc/utf8.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/stc/utf8.h') diff --git a/include/stc/utf8.h b/include/stc/utf8.h index c20b80cb..7a4ba921 100644 --- a/include/stc/utf8.h +++ b/include/stc/utf8.h @@ -17,13 +17,13 @@ extern uint32_t utf8_toupper(uint32_t c); extern bool utf8_valid_n(const char* s, size_t nbytes); extern int utf8_icmp_sv(csview s1, csview s2); extern unsigned utf8_encode(char *out, uint32_t c); -extern uint32_t utf8_peek(const char *s, int u8pos); +extern uint32_t utf8_peek_off(const char *s, int offset); + +/* following functions uses src/utf8code.c */ STC_INLINE bool utf8_isupper(uint32_t c) { return utf8_tolower(c) != c; } STC_INLINE bool utf8_islower(uint32_t c) { return utf8_toupper(c) != c; } -/* following functions uses src/utf8code.c */ - /* decode next utf8 codepoint. https://bjoern.hoehrmann.de/utf-8/decoder/dfa */ typedef struct { uint32_t state, codep; } utf8_decode_t; @@ -35,6 +35,12 @@ STC_INLINE uint32_t utf8_decode(utf8_decode_t* d, const uint32_t byte) { return d->state = utf8_dtab[256 + d->state + type]; } +STC_INLINE uint32_t utf8_peek(const char* s) { + utf8_decode_t d = {.state=0}; + do { utf8_decode(&d, (uint8_t)*s++); } while (d.state); + return d.codep; +} + /* case-insensitive utf8 string comparison */ STC_INLINE int utf8_icmp(const char* s1, const char* s2) { return utf8_icmp_sv(c_sv(s1, ~(size_t)0), c_sv(s2, ~(size_t)0)); -- cgit v1.2.3