summaryrefslogtreecommitdiffhomepage
path: root/include/stc/utf8.h
diff options
context:
space:
mode:
authorTyge Løvset <[email protected]>2022-09-22 13:04:58 +0200
committerTyge Løvset <[email protected]>2022-09-22 13:04:58 +0200
commit4c5f0ba23ba6294e144243555ed3c02a72259692 (patch)
tree380b249e1c36b83b65507da5a2af4f843252a474 /include/stc/utf8.h
parent61c222312bb1cc03ab987fdbc541039723c9e243 (diff)
downloadSTC-modified-4c5f0ba23ba6294e144243555ed3c02a72259692.tar.gz
STC-modified-4c5f0ba23ba6294e144243555ed3c02a72259692.zip
Updated _advance() iter methods. Some have signed offsets.
utf8_peek()/utf8_peek_off() added/changed.
Diffstat (limited to 'include/stc/utf8.h')
-rw-r--r--include/stc/utf8.h12
1 files changed, 9 insertions, 3 deletions
diff --git a/include/stc/utf8.h b/include/stc/utf8.h
index c20b80cb..7a4ba921 100644
--- a/include/stc/utf8.h
+++ b/include/stc/utf8.h
@@ -17,13 +17,13 @@ extern uint32_t utf8_toupper(uint32_t c);
extern bool utf8_valid_n(const char* s, size_t nbytes);
extern int utf8_icmp_sv(csview s1, csview s2);
extern unsigned utf8_encode(char *out, uint32_t c);
-extern uint32_t utf8_peek(const char *s, int u8pos);
+extern uint32_t utf8_peek_off(const char *s, int offset);
+
+/* following functions uses src/utf8code.c */
STC_INLINE bool utf8_isupper(uint32_t c) { return utf8_tolower(c) != c; }
STC_INLINE bool utf8_islower(uint32_t c) { return utf8_toupper(c) != c; }
-/* following functions uses src/utf8code.c */
-
/* decode next utf8 codepoint. https://bjoern.hoehrmann.de/utf-8/decoder/dfa */
typedef struct { uint32_t state, codep; } utf8_decode_t;
@@ -35,6 +35,12 @@ STC_INLINE uint32_t utf8_decode(utf8_decode_t* d, const uint32_t byte) {
return d->state = utf8_dtab[256 + d->state + type];
}
+STC_INLINE uint32_t utf8_peek(const char* s) {
+ utf8_decode_t d = {.state=0};
+ do { utf8_decode(&d, (uint8_t)*s++); } while (d.state);
+ return d.codep;
+}
+
/* case-insensitive utf8 string comparison */
STC_INLINE int utf8_icmp(const char* s1, const char* s2) {
return utf8_icmp_sv(c_sv(s1, ~(size_t)0), c_sv(s2, ~(size_t)0));