1) REMOVED files/modules not relevant: makes lib more focused:

- threads.h/threads.c (external lib) - coptions.h - will be kept as a gist. - more will follow, (examples, some benchmarks, etc). 2) Replaced UTF8 decoder with Björn Höhrmann's DFA decoder.
author: Tyge Løvset <[email protected]> 2022-05-31 15:04:52 +0200
committer: Tyge Løvset <[email protected]> 2022-05-31 15:04:52 +0200
commit: eb9821bec4a292458499042392924595b3338085 (patch)
tree: 90b6286619deefc2eaf72fce4fc67d5959c84557 /include/stc/utf8.h
parent: 0a92ec2235b5f42e93012be14938bb11e3f3650a (diff)
download: STC-modified-eb9821bec4a292458499042392924595b3338085.tar.gz
STC-modified-eb9821bec4a292458499042392924595b3338085.zip
1 files changed, 10 insertions, 7 deletions
diff --git a/include/stc/utf8.h b/include/stc/utf8.h
index e5fa5894..630a7a7c 100644
--- a/include/stc/utf8.h
+++ b/include/stc/utf8.h
@@ -35,19 +35,22 @@ bool        utf8_isalpha(uint32_t c);
 bool        utf8_isalnum(uint32_t c);
 uint32_t    utf8_tolower(uint32_t c);
 uint32_t    utf8_toupper(uint32_t c);
-
 bool        utf8_valid(const char* s);
 bool        utf8_valid_n(const char* s, size_t n);
-
 int         utf8_icmp_n(size_t u8max, const char* s1, size_t n1,
                                       const char* s2, size_t n2);
+unsigned    utf8_encode(char *out, uint32_t c);
+
 /* encode/decode next utf8 codepoint. */
-enum { UTF8_OK = 0, UTF8_ERROR = 4 };
-typedef struct { uint32_t state, codep, size; } utf8_decode_t;
+typedef struct { uint32_t state, codep; } utf8_decode_t;
 
-void        utf8_peek(utf8_decode_t* d, const char *s);
-unsigned    utf8_encode(char *out, uint32_t c);
-void        utf8_decode(utf8_decode_t *d, const uint8_t b);
+STC_INLINE uint32_t utf8_decode(utf8_decode_t* d, const uint32_t byte) {
+    extern const uint8_t utf8_dtab[];
+    const uint32_t type = utf8_dtab[byte];
+    d->codep = d->state ? (byte & 0x3fu) | (d->codep << 6)
+                        : (0xff >> type) & byte;
+    return d->state = utf8_dtab[256 + d->state + type];
+}
 
 /* case-insensitive utf8 string comparison */
 STC_INLINE int utf8_icmp(const char* s1, const char* s2) {
author	Tyge Løvset <[email protected]>	2022-05-31 15:04:52 +0200
committer	Tyge Løvset <[email protected]>	2022-05-31 15:04:52 +0200
commit	eb9821bec4a292458499042392924595b3338085 (patch)
tree	90b6286619deefc2eaf72fce4fc67d5959c84557 /include/stc/utf8.h
parent	0a92ec2235b5f42e93012be14938bb11e3f3650a (diff)
download	STC-modified-eb9821bec4a292458499042392924595b3338085.tar.gz STC-modified-eb9821bec4a292458499042392924595b3338085.zip