From 350bb65a2f68b14ce16a21ea8670cc087e39f4ce Mon Sep 17 00:00:00 2001 From: Tyge Løvset Date: Thu, 12 Jan 2023 18:26:16 +0100 Subject: docs and utf8 updates. --- src/utf8code.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src/utf8code.c') diff --git a/src/utf8code.c b/src/utf8code.c index a8e4c9ab..ecf79880 100644 --- a/src/utf8code.c +++ b/src/utf8code.c @@ -136,7 +136,8 @@ bool utf8_isgroup(int group, uint32_t c) { bool utf8_isalpha(uint32_t c) { static int16_t groups[] = {U8G_Latin, U8G_Nl, U8G_Greek, U8G_Cyrillic, - U8G_Han, U8G_Arabic, U8G_Devanagari}; + U8G_Han, U8G_Devanagari, U8G_Arabic}; + if (c < 128) return isalpha(c) != 0; for (unsigned j=0; j < c_ARRAYLEN(groups); ++j) if (utf8_isgroup(groups[j], c)) return true; @@ -149,6 +150,12 @@ bool utf8_iscased(uint32_t c) { utf8_isgroup(U8G_Lt, c); } +bool utf8_isword(uint32_t c) { + if (c < 128) return (isalnum(c) != 0) | (c == '_'); + return utf8_isalpha(c) || utf8_isgroup(U8G_Nd, c) || + utf8_isgroup(U8G_Pc, c); +} + /* The tables below are extracted from the RE2 library */ static const URange16 Cc_range16[] = { // Control -- cgit v1.2.3