From d463acdbee5bb3a9509cb8414602f495408583b4 Mon Sep 17 00:00:00 2001 From: Tyge Løvset Date: Thu, 9 Jun 2022 16:59:52 +0200 Subject: Added utf8_peek() --- src/utf8code.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/utf8code.c') diff --git a/src/utf8code.c b/src/utf8code.c index 9613ba95..1c1e4336 100644 --- a/src/utf8code.c +++ b/src/utf8code.c @@ -45,6 +45,16 @@ unsigned utf8_encode(char *out, uint32_t c) return 0; } +uint32_t utf8_peek(const char* s, int pos) { + int inc = 1; + if (pos < 0) pos = -pos, inc = -1; + while (pos) + pos -= (*(s += inc) & 0xC0) != 0x80; + utf8_decode_t d = {.state=0}; + do { utf8_decode(&d, (uint8_t)*s++); } while (d.state); + return d.codep; +} + bool utf8_valid(const char* s) { utf8_decode_t d = {.state=0}; while (*s) -- cgit v1.2.3