From 8c6ba8a3444e4b8640e7fe04f565cef57c850432 Mon Sep 17 00:00:00 2001 From: Tyge Lovset Date: Mon, 2 Jan 2023 09:00:47 +0100 Subject: Added selected unicode character classes. --- src/utf8code.c | 220 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 209 insertions(+), 11 deletions(-) (limited to 'src/utf8code.c') diff --git a/src/utf8code.c b/src/utf8code.c index 5dfb7d30..6fe8515e 100644 --- a/src/utf8code.c +++ b/src/utf8code.c @@ -112,17 +112,29 @@ int utf8_icmp_sv(const csview s1, const csview s2) { return (int)(s1.size - s2.size); } -bool utf8_isspace(uint32_t c) { - static uint16_t t[] = {0x20, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0xA0, - 0x1680, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000}; - for (size_t i=0; i= 0x2000) & (c <= 0x200A); -} +typedef struct { + uint16_t lo; + uint16_t hi; +} URange16; + +typedef struct { + const char *name; + const URange16 *r16; + int nr16; +} UGroup; + +static const UGroup unicode_groups[]; +static const int num_unicode_groups; +static const int Lt_group; -bool utf8_isdigit(uint32_t c) { - return ((c >= '0') & (c <= '9')) || - ((c >= 0xFF10) & (c <= 0xFF19)); +bool utf8_isgroup(int group, uint32_t c) { + for (int j=0; j