summaryrefslogtreecommitdiffhomepage
path: root/src/utf8code.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/utf8code.c')
-rw-r--r--src/utf8code.c263
1 files changed, 135 insertions, 128 deletions
diff --git a/src/utf8code.c b/src/utf8code.c
index ecf79880..a892f5fd 100644
--- a/src/utf8code.c
+++ b/src/utf8code.c
@@ -20,7 +20,7 @@ const uint8_t utf8_dtab[] = {
12,36,12,12,12,12,12,12,12,12,12,12,
};
-unsigned utf8_encode(char *out, uint32_t c)
+int utf8_encode(char *out, uint32_t c)
{
if (c < 0x80U) {
out[0] = (char) c;
@@ -53,7 +53,7 @@ uint32_t utf8_peek_off(const char* s, int pos) {
return utf8_peek(s);
}
-bool utf8_valid_n(const char* s, size_t nbytes) {
+bool utf8_valid_n(const char* s, intptr_t nbytes) {
utf8_decode_t d = {.state=0};
while ((nbytes-- != 0) & (*s != 0))
utf8_decode(&d, (uint8_t)*s++);
@@ -61,7 +61,7 @@ bool utf8_valid_n(const char* s, size_t nbytes) {
}
uint32_t utf8_casefold(uint32_t c) {
- for (size_t i=0; i < casefold_len; ++i) {
+ for (int i=0; i < casefold_len; ++i) {
const struct CaseMapping entry = casemappings[i];
if (c <= entry.c2) {
if (c < entry.c1) return c;
@@ -74,7 +74,7 @@ uint32_t utf8_casefold(uint32_t c) {
}
uint32_t utf8_tolower(uint32_t c) {
- for (size_t i=0; i < sizeof upcase_ind/sizeof *upcase_ind; ++i) {
+ for (int i=0; i < (int)(sizeof upcase_ind/sizeof *upcase_ind); ++i) {
const struct CaseMapping entry = casemappings[upcase_ind[i]];
if (c <= entry.c2) {
if (c < entry.c1) return c;
@@ -87,7 +87,7 @@ uint32_t utf8_tolower(uint32_t c) {
}
uint32_t utf8_toupper(uint32_t c) {
- for (size_t i=0; i < sizeof lowcase_ind/sizeof *lowcase_ind; ++i) {
+ for (int i=0; i < (int)(sizeof lowcase_ind/sizeof *lowcase_ind); ++i) {
const struct CaseMapping entry = casemappings[lowcase_ind[i]];
if (c <= entry.m2) {
int d = entry.m2 - entry.c2;
@@ -101,7 +101,7 @@ uint32_t utf8_toupper(uint32_t c) {
int utf8_icmp_sv(const csview s1, const csview s2) {
utf8_decode_t d1 = {.state=0}, d2 = {.state=0};
- size_t j1 = 0, j2 = 0;
+ intptr_t j1 = 0, j2 = 0;
while ((j1 < s1.size) & (j2 < s2.size)) {
do { utf8_decode(&d1, (uint8_t)s1.str[j1++]); } while (d1.state);
do { utf8_decode(&d2, (uint8_t)s2.str[j2++]); } while (d2.state);
@@ -122,13 +122,18 @@ typedef struct {
int nr16;
} UGroup;
-static const UGroup unicode_groups[U8G_SIZE];
+#ifndef __cplusplus
+static
+#else
+extern
+#endif
+const UGroup _utf8_unicode_groups[U8G_SIZE];
bool utf8_isgroup(int group, uint32_t c) {
- for (int j=0; j<unicode_groups[group].nr16; ++j) {
- if (c < unicode_groups[group].r16[j].lo)
+ for (int j=0; j<_utf8_unicode_groups[group].nr16; ++j) {
+ if (c < _utf8_unicode_groups[group].r16[j].lo)
return false;
- if (c <= unicode_groups[group].r16[j].hi)
+ if (c <= _utf8_unicode_groups[group].r16[j].hi)
return true;
}
return false;
@@ -137,21 +142,21 @@ bool utf8_isgroup(int group, uint32_t c) {
bool utf8_isalpha(uint32_t c) {
static int16_t groups[] = {U8G_Latin, U8G_Nl, U8G_Greek, U8G_Cyrillic,
U8G_Han, U8G_Devanagari, U8G_Arabic};
- if (c < 128) return isalpha(c) != 0;
- for (unsigned j=0; j < c_ARRAYLEN(groups); ++j)
+ if (c < 128) return isalpha((int)c) != 0;
+ for (int j=0; j < c_ARRAYLEN(groups); ++j)
if (utf8_isgroup(groups[j], c))
return true;
return false;
}
bool utf8_iscased(uint32_t c) {
- if (c < 128) return isalpha(c) != 0;
+ if (c < 128) return isalpha((int)c) != 0;
return utf8_islower(c) || utf8_isupper(c) ||
utf8_isgroup(U8G_Lt, c);
}
bool utf8_isword(uint32_t c) {
- if (c < 128) return (isalnum(c) != 0) | (c == '_');
+ if (c < 128) return (isalnum((int)c) != 0) | (c == '_');
return utf8_isalpha(c) || utf8_isgroup(U8G_Nd, c) ||
utf8_isgroup(U8G_Pc, c);
}
@@ -323,139 +328,141 @@ static const URange16 Zs_range16[] = { // Space separator
};
static const URange16 Arabic_range16[] = {
- { 1536, 1540 },
- { 1542, 1547 },
- { 1549, 1562 },
- { 1564, 1566 },
- { 1568, 1599 },
- { 1601, 1610 },
- { 1622, 1647 },
- { 1649, 1756 },
- { 1758, 1791 },
- { 1872, 1919 },
- { 2160, 2190 },
- { 2192, 2193 },
- { 2200, 2273 },
- { 2275, 2303 },
- { 64336, 64450 },
- { 64467, 64829 },
- { 64832, 64911 },
- { 64914, 64967 },
- { 64975, 64975 },
- { 65008, 65023 },
- { 65136, 65140 },
- { 65142, 65276 },
+ { 1536, 1540 },
+ { 1542, 1547 },
+ { 1549, 1562 },
+ { 1564, 1566 },
+ { 1568, 1599 },
+ { 1601, 1610 },
+ { 1622, 1647 },
+ { 1649, 1756 },
+ { 1758, 1791 },
+ { 1872, 1919 },
+ { 2160, 2190 },
+ { 2192, 2193 },
+ { 2200, 2273 },
+ { 2275, 2303 },
+ { 64336, 64450 },
+ { 64467, 64829 },
+ { 64832, 64911 },
+ { 64914, 64967 },
+ { 64975, 64975 },
+ { 65008, 65023 },
+ { 65136, 65140 },
+ { 65142, 65276 },
};
static const URange16 Cyrillic_range16[] = {
- { 1024, 1156 },
- { 1159, 1327 },
- { 7296, 7304 },
- { 7467, 7467 },
- { 7544, 7544 },
- { 11744, 11775 },
- { 42560, 42655 },
- { 65070, 65071 },
+ { 1024, 1156 },
+ { 1159, 1327 },
+ { 7296, 7304 },
+ { 7467, 7467 },
+ { 7544, 7544 },
+ { 11744, 11775 },
+ { 42560, 42655 },
+ { 65070, 65071 },
};
static const URange16 Devanagari_range16[] = {
- { 2304, 2384 },
- { 2389, 2403 },
- { 2406, 2431 },
- { 43232, 43263 },
+ { 2304, 2384 },
+ { 2389, 2403 },
+ { 2406, 2431 },
+ { 43232, 43263 },
};
static const URange16 Greek_range16[] = {
- { 880, 883 },
- { 885, 887 },
- { 890, 893 },
- { 895, 895 },
- { 900, 900 },
- { 902, 902 },
- { 904, 906 },
- { 908, 908 },
- { 910, 929 },
- { 931, 993 },
- { 1008, 1023 },
- { 7462, 7466 },
- { 7517, 7521 },
- { 7526, 7530 },
- { 7615, 7615 },
- { 7936, 7957 },
- { 7960, 7965 },
- { 7968, 8005 },
- { 8008, 8013 },
- { 8016, 8023 },
- { 8025, 8025 },
- { 8027, 8027 },
- { 8029, 8029 },
- { 8031, 8061 },
- { 8064, 8116 },
- { 8118, 8132 },
- { 8134, 8147 },
- { 8150, 8155 },
- { 8157, 8175 },
- { 8178, 8180 },
- { 8182, 8190 },
- { 8486, 8486 },
- { 43877, 43877 },
+ { 880, 883 },
+ { 885, 887 },
+ { 890, 893 },
+ { 895, 895 },
+ { 900, 900 },
+ { 902, 902 },
+ { 904, 906 },
+ { 908, 908 },
+ { 910, 929 },
+ { 931, 993 },
+ { 1008, 1023 },
+ { 7462, 7466 },
+ { 7517, 7521 },
+ { 7526, 7530 },
+ { 7615, 7615 },
+ { 7936, 7957 },
+ { 7960, 7965 },
+ { 7968, 8005 },
+ { 8008, 8013 },
+ { 8016, 8023 },
+ { 8025, 8025 },
+ { 8027, 8027 },
+ { 8029, 8029 },
+ { 8031, 8061 },
+ { 8064, 8116 },
+ { 8118, 8132 },
+ { 8134, 8147 },
+ { 8150, 8155 },
+ { 8157, 8175 },
+ { 8178, 8180 },
+ { 8182, 8190 },
+ { 8486, 8486 },
+ { 43877, 43877 },
};
static const URange16 Han_range16[] = {
- { 11904, 11929 },
- { 11931, 12019 },
- { 12032, 12245 },
- { 12293, 12293 },
- { 12295, 12295 },
- { 12321, 12329 },
- { 12344, 12347 },
- { 13312, 19903 },
- { 19968, 40959 },
- { 63744, 64109 },
- { 64112, 64217 },
+ { 11904, 11929 },
+ { 11931, 12019 },
+ { 12032, 12245 },
+ { 12293, 12293 },
+ { 12295, 12295 },
+ { 12321, 12329 },
+ { 12344, 12347 },
+ { 13312, 19903 },
+ { 19968, 40959 },
+ { 63744, 64109 },
+ { 64112, 64217 },
};
static const URange16 Latin_range16[] = {
- { 65, 90 },
- { 97, 122 },
- { 170, 170 },
- { 186, 186 },
- { 192, 214 },
- { 216, 246 },
- { 248, 696 },
- { 736, 740 },
- { 7424, 7461 },
- { 7468, 7516 },
- { 7522, 7525 },
- { 7531, 7543 },
- { 7545, 7614 },
- { 7680, 7935 },
- { 8305, 8305 },
- { 8319, 8319 },
- { 8336, 8348 },
- { 8490, 8491 },
- { 8498, 8498 },
- { 8526, 8526 },
- { 8544, 8584 },
- { 11360, 11391 },
- { 42786, 42887 },
- { 42891, 42954 },
- { 42960, 42961 },
- { 42963, 42963 },
- { 42965, 42969 },
- { 42994, 43007 },
- { 43824, 43866 },
- { 43868, 43876 },
- { 43878, 43881 },
- { 64256, 64262 },
- { 65313, 65338 },
- { 65345, 65370 },
+ { 65, 90 },
+ { 97, 122 },
+ { 170, 170 },
+ { 186, 186 },
+ { 192, 214 },
+ { 216, 246 },
+ { 248, 696 },
+ { 736, 740 },
+ { 7424, 7461 },
+ { 7468, 7516 },
+ { 7522, 7525 },
+ { 7531, 7543 },
+ { 7545, 7614 },
+ { 7680, 7935 },
+ { 8305, 8305 },
+ { 8319, 8319 },
+ { 8336, 8348 },
+ { 8490, 8491 },
+ { 8498, 8498 },
+ { 8526, 8526 },
+ { 8544, 8584 },
+ { 11360, 11391 },
+ { 42786, 42887 },
+ { 42891, 42954 },
+ { 42960, 42961 },
+ { 42963, 42963 },
+ { 42965, 42969 },
+ { 42994, 43007 },
+ { 43824, 43866 },
+ { 43868, 43876 },
+ { 43878, 43881 },
+ { 64256, 64262 },
+ { 65313, 65338 },
+ { 65345, 65370 },
};
#define UNI_ENTRY(Code) \
{ Code##_range16, sizeof(Code##_range16)/sizeof(URange16) }
-
-static const UGroup unicode_groups[U8G_SIZE] = {
+#ifndef __cplusplus
+static
+#endif
+const UGroup _utf8_unicode_groups[U8G_SIZE] = {
[U8G_Cc] = UNI_ENTRY(Cc),
[U8G_Lt] = UNI_ENTRY(Lt),
[U8G_Nd] = UNI_ENTRY(Nd),