summaryrefslogtreecommitdiffhomepage
path: root/src/utf8code.c
diff options
context:
space:
mode:
authorTyge Løvset <[email protected]>2023-01-12 18:26:16 +0100
committerTyge Løvset <[email protected]>2023-01-12 18:26:16 +0100
commit350bb65a2f68b14ce16a21ea8670cc087e39f4ce (patch)
tree4c92b37b05aa5c1a3ccb695466fa75dbd938bf17 /src/utf8code.c
parent891aebc0a971df8e57618c16ed214d982072cbd3 (diff)
downloadSTC-modified-350bb65a2f68b14ce16a21ea8670cc087e39f4ce.tar.gz
STC-modified-350bb65a2f68b14ce16a21ea8670cc087e39f4ce.zip
docs and utf8 updates.
Diffstat (limited to 'src/utf8code.c')
-rw-r--r--src/utf8code.c9
1 files changed, 8 insertions, 1 deletions
diff --git a/src/utf8code.c b/src/utf8code.c
index a8e4c9ab..ecf79880 100644
--- a/src/utf8code.c
+++ b/src/utf8code.c
@@ -136,7 +136,8 @@ bool utf8_isgroup(int group, uint32_t c) {
bool utf8_isalpha(uint32_t c) {
static int16_t groups[] = {U8G_Latin, U8G_Nl, U8G_Greek, U8G_Cyrillic,
- U8G_Han, U8G_Arabic, U8G_Devanagari};
+ U8G_Han, U8G_Devanagari, U8G_Arabic};
+ if (c < 128) return isalpha(c) != 0;
for (unsigned j=0; j < c_ARRAYLEN(groups); ++j)
if (utf8_isgroup(groups[j], c))
return true;
@@ -149,6 +150,12 @@ bool utf8_iscased(uint32_t c) {
utf8_isgroup(U8G_Lt, c);
}
+bool utf8_isword(uint32_t c) {
+ if (c < 128) return (isalnum(c) != 0) | (c == '_');
+ return utf8_isalpha(c) || utf8_isgroup(U8G_Nd, c) ||
+ utf8_isgroup(U8G_Pc, c);
+}
+
/* The tables below are extracted from the RE2 library */
static const URange16 Cc_range16[] = { // Control