summaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorTyge Løvset <[email protected]>2022-02-24 11:58:27 +0100
committerTyge Løvset <[email protected]>2022-02-24 11:58:27 +0100
commite6b6d23abd95870afc632d6694a334e1e18c64f7 (patch)
tree0a8864a63e6aeb4f004d52049282acfb80557276 /src
parent9a06a1caececf1ea3136c3ec80e0ecadb6b29f53 (diff)
downloadSTC-modified-e6b6d23abd95870afc632d6694a334e1e18c64f7.tar.gz
STC-modified-e6b6d23abd95870afc632d6694a334e1e18c64f7.zip
Switched to the python generated unicode uppercase/lowercase convertions
Diffstat (limited to 'src')
-rw-r--r--src/casefold.py35
-rw-r--r--src/cregex_utf8.c1341
2 files changed, 156 insertions, 1220 deletions
diff --git a/src/casefold.py b/src/casefold.py
index de08837e..73d6fcbc 100644
--- a/src/casefold.py
+++ b/src/casefold.py
@@ -140,6 +140,7 @@ if __name__ == "__main__":
print('''#include <stdint.h>
#include <stdio.h>
#include <ctype.h>
+#include <stc/utf8.h>
#include <stdbool.h>''')
cfold = make_casetable()
@@ -190,8 +191,7 @@ bool utf8_isspace(uint32_t c) {
bool utf8_isdigit(uint32_t c) {
return ((c >= '0') & (c <= '9')) ||
- ((c >= 0xFF10) & (c <= 0xFF19)) ||
- ((c >= 0x1D7CE) & (c <= 0x1D7FF));
+ ((c >= 0xFF10) & (c <= 0xFF19));
}
bool utf8_isxdigit(uint32_t c) {
@@ -204,36 +204,17 @@ bool utf8_isxdigit(uint32_t c) {
bool utf8_isalnum(uint32_t c) {
if (c < 128) return isalnum(c) != 0;
- if ((c >= 0xFF10) & (c <= 0xFF19) ||
- ((c >= 0x1D7CE) & (c <= 0x1D7FF))) return true;
+ if ((c >= 0xFF10) & (c <= 0xFF19)) return true;
return utf8_islower(c) || utf8_isupper(c);
}
-
-#ifdef TEST
-size_t utf8_encode(char *out, uint32_t c)
-{
- char* p = out;
- if (c < 0x80U) {
- *p++ = (char) c;
- } else if (c < 0x0800U) {
- *p++ = (char) ((c>>6 & 0x1F) | 0xC0);
- *p++ = (char) ((c & 0x3F) | 0x80);
- } else if (c < 0x010000U) {
- if (c < 0xD800U || c >= 0xE000U) {
- *p++ = (char) ((c>>12 & 0x0F) | 0xE0);
- *p++ = (char) ((c>>6 & 0x3F) | 0x80);
- *p++ = (char) ((c & 0x3F) | 0x80);
- }
- } else if (c < 0x110000U) {
- *p++ = (char) ((c>>18 & 0x07) | 0xF0);
- *p++ = (char) ((c>>12 & 0x3F) | 0x80);
- *p++ = (char) ((c>>6 & 0x3F) | 0x80);
- *p++ = (char) ((c & 0x3F) | 0x80);
- }
- return p - out;
+bool utf8_isalpha(uint32_t c) {
+ if (c < 128) return isalpha(c) != 0;
+ return utf8_islower(c) || utf8_isupper(c);
}
+
+#ifdef TEST
int main()
{
for (int i=0; i < sizeof cfold_low/sizeof *cfold_low; ++i)
diff --git a/src/cregex_utf8.c b/src/cregex_utf8.c
index afdda635..ad4176ba 100644
--- a/src/cregex_utf8.c
+++ b/src/cregex_utf8.c
@@ -1,1193 +1,148 @@
-#include <stdint.h>
-#include <stc/utf8.h>
-
-enum { UPPER = 0, LOWER = 1, HT_SIZE = 1997 };
-// based on unicode CaseFolding.txt
-static const uint16_t cfold[][2] = {
-//{0x00041, 0x00061}, // LATIN CAPITAL LETTER A
-//{0x00042, 0x00062}, // LATIN CAPITAL LETTER B
-//{0x00043, 0x00063}, // LATIN CAPITAL LETTER C
-//{0x00044, 0x00064}, // LATIN CAPITAL LETTER D
-//{0x00045, 0x00065}, // LATIN CAPITAL LETTER E
-//{0x00046, 0x00066}, // LATIN CAPITAL LETTER F
-//{0x00047, 0x00067}, // LATIN CAPITAL LETTER G
-//{0x00048, 0x00068}, // LATIN CAPITAL LETTER H
-//{0x00049, 0x00069}, // LATIN CAPITAL LETTER I
-//{0x0004A, 0x0006A}, // LATIN CAPITAL LETTER J
-//{0x0004B, 0x0006B}, // LATIN CAPITAL LETTER K
-//{0x0004C, 0x0006C}, // LATIN CAPITAL LETTER L
-//{0x0004D, 0x0006D}, // LATIN CAPITAL LETTER M
-//{0x0004E, 0x0006E}, // LATIN CAPITAL LETTER N
-//{0x0004F, 0x0006F}, // LATIN CAPITAL LETTER O
-//{0x00050, 0x00070}, // LATIN CAPITAL LETTER P
-//{0x00051, 0x00071}, // LATIN CAPITAL LETTER Q
-//{0x00052, 0x00072}, // LATIN CAPITAL LETTER R
-//{0x00053, 0x00073}, // LATIN CAPITAL LETTER S
-//{0x00054, 0x00074}, // LATIN CAPITAL LETTER T
-//{0x00055, 0x00075}, // LATIN CAPITAL LETTER U
-//{0x00056, 0x00076}, // LATIN CAPITAL LETTER V
-//{0x00057, 0x00077}, // LATIN CAPITAL LETTER W
-//{0x00058, 0x00078}, // LATIN CAPITAL LETTER X
-//{0x00059, 0x00079}, // LATIN CAPITAL LETTER Y
-//{0x0005A, 0x0007A}, // LATIN CAPITAL LETTER Z
-{0x000B5, 0x003BC}, // MICRO SIGN
-{0x000C0, 0x000E0}, // LATIN CAPITAL LETTER A WITH GRAVE
-{0x000C1, 0x000E1}, // LATIN CAPITAL LETTER A WITH ACUTE
-{0x000C2, 0x000E2}, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-{0x000C3, 0x000E3}, // LATIN CAPITAL LETTER A WITH TILDE
-{0x000C4, 0x000E4}, // LATIN CAPITAL LETTER A WITH DIAERESIS
-{0x000C5, 0x000E5}, // LATIN CAPITAL LETTER A WITH RING ABOVE
-{0x000C6, 0x000E6}, // LATIN CAPITAL LETTER AE
-{0x000C7, 0x000E7}, // LATIN CAPITAL LETTER C WITH CEDILLA
-{0x000C8, 0x000E8}, // LATIN CAPITAL LETTER E WITH GRAVE
-{0x000C9, 0x000E9}, // LATIN CAPITAL LETTER E WITH ACUTE
-{0x000CA, 0x000EA}, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
-{0x000CB, 0x000EB}, // LATIN CAPITAL LETTER E WITH DIAERESIS
-{0x000CC, 0x000EC}, // LATIN CAPITAL LETTER I WITH GRAVE
-{0x000CD, 0x000ED}, // LATIN CAPITAL LETTER I WITH ACUTE
-{0x000CE, 0x000EE}, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-{0x000CF, 0x000EF}, // LATIN CAPITAL LETTER I WITH DIAERESIS
-{0x000D0, 0x000F0}, // LATIN CAPITAL LETTER ETH
-{0x000D1, 0x000F1}, // LATIN CAPITAL LETTER N WITH TILDE
-{0x000D2, 0x000F2}, // LATIN CAPITAL LETTER O WITH GRAVE
-{0x000D3, 0x000F3}, // LATIN CAPITAL LETTER O WITH ACUTE
-{0x000D4, 0x000F4}, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-{0x000D5, 0x000F5}, // LATIN CAPITAL LETTER O WITH TILDE
-{0x000D6, 0x000F6}, // LATIN CAPITAL LETTER O WITH DIAERESIS
-{0x000D8, 0x000F8}, // LATIN CAPITAL LETTER O WITH STROKE
-{0x000D9, 0x000F9}, // LATIN CAPITAL LETTER U WITH GRAVE
-{0x000DA, 0x000FA}, // LATIN CAPITAL LETTER U WITH ACUTE
-{0x000DB, 0x000FB}, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
-{0x000DC, 0x000FC}, // LATIN CAPITAL LETTER U WITH DIAERESIS
-{0x000DD, 0x000FD}, // LATIN CAPITAL LETTER Y WITH ACUTE
-{0x000DE, 0x000FE}, // LATIN CAPITAL LETTER THORN
-{0x00100, 0x00101}, // LATIN CAPITAL LETTER A WITH MACRON
-{0x00102, 0x00103}, // LATIN CAPITAL LETTER A WITH BREVE
-{0x00104, 0x00105}, // LATIN CAPITAL LETTER A WITH OGONEK
-{0x00106, 0x00107}, // LATIN CAPITAL LETTER C WITH ACUTE
-{0x00108, 0x00109}, // LATIN CAPITAL LETTER C WITH CIRCUMFLEX
-{0x0010A, 0x0010B}, // LATIN CAPITAL LETTER C WITH DOT ABOVE
-{0x0010C, 0x0010D}, // LATIN CAPITAL LETTER C WITH CARON
-{0x0010E, 0x0010F}, // LATIN CAPITAL LETTER D WITH CARON
-{0x00110, 0x00111}, // LATIN CAPITAL LETTER D WITH STROKE
-{0x00112, 0x00113}, // LATIN CAPITAL LETTER E WITH MACRON
-{0x00114, 0x00115}, // LATIN CAPITAL LETTER E WITH BREVE
-{0x00116, 0x00117}, // LATIN CAPITAL LETTER E WITH DOT ABOVE
-{0x00118, 0x00119}, // LATIN CAPITAL LETTER E WITH OGONEK
-{0x0011A, 0x0011B}, // LATIN CAPITAL LETTER E WITH CARON
-{0x0011C, 0x0011D}, // LATIN CAPITAL LETTER G WITH CIRCUMFLEX
-{0x0011E, 0x0011F}, // LATIN CAPITAL LETTER G WITH BREVE
-{0x00120, 0x00121}, // LATIN CAPITAL LETTER G WITH DOT ABOVE
-{0x00122, 0x00123}, // LATIN CAPITAL LETTER G WITH CEDILLA
-{0x00124, 0x00125}, // LATIN CAPITAL LETTER H WITH CIRCUMFLEX
-{0x00126, 0x00127}, // LATIN CAPITAL LETTER H WITH STROKE
-{0x00128, 0x00129}, // LATIN CAPITAL LETTER I WITH TILDE
-{0x0012A, 0x0012B}, // LATIN CAPITAL LETTER I WITH MACRON
-{0x0012C, 0x0012D}, // LATIN CAPITAL LETTER I WITH BREVE
-{0x0012E, 0x0012F}, // LATIN CAPITAL LETTER I WITH OGONEK
-{0x00132, 0x00133}, // LATIN CAPITAL LIGATURE IJ
-{0x00134, 0x00135}, // LATIN CAPITAL LETTER J WITH CIRCUMFLEX
-{0x00136, 0x00137}, // LATIN CAPITAL LETTER K WITH CEDILLA
-{0x00139, 0x0013A}, // LATIN CAPITAL LETTER L WITH ACUTE
-{0x0013B, 0x0013C}, // LATIN CAPITAL LETTER L WITH CEDILLA
-{0x0013D, 0x0013E}, // LATIN CAPITAL LETTER L WITH CARON
-{0x0013F, 0x00140}, // LATIN CAPITAL LETTER L WITH MIDDLE DOT
-{0x00141, 0x00142}, // LATIN CAPITAL LETTER L WITH STROKE
-{0x00143, 0x00144}, // LATIN CAPITAL LETTER N WITH ACUTE
-{0x00145, 0x00146}, // LATIN CAPITAL LETTER N WITH CEDILLA
-{0x00147, 0x00148}, // LATIN CAPITAL LETTER N WITH CARON
-{0x0014A, 0x0014B}, // LATIN CAPITAL LETTER ENG
-{0x0014C, 0x0014D}, // LATIN CAPITAL LETTER O WITH MACRON
-{0x0014E, 0x0014F}, // LATIN CAPITAL LETTER O WITH BREVE
-{0x00150, 0x00151}, // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
-{0x00152, 0x00153}, // LATIN CAPITAL LIGATURE OE
-{0x00154, 0x00155}, // LATIN CAPITAL LETTER R WITH ACUTE
-{0x00156, 0x00157}, // LATIN CAPITAL LETTER R WITH CEDILLA
-{0x00158, 0x00159}, // LATIN CAPITAL LETTER R WITH CARON
-{0x0015A, 0x0015B}, // LATIN CAPITAL LETTER S WITH ACUTE
-{0x0015C, 0x0015D}, // LATIN CAPITAL LETTER S WITH CIRCUMFLEX
-{0x0015E, 0x0015F}, // LATIN CAPITAL LETTER S WITH CEDILLA
-{0x00160, 0x00161}, // LATIN CAPITAL LETTER S WITH CARON
-{0x00162, 0x00163}, // LATIN CAPITAL LETTER T WITH CEDILLA
-{0x00164, 0x00165}, // LATIN CAPITAL LETTER T WITH CARON
-{0x00166, 0x00167}, // LATIN CAPITAL LETTER T WITH STROKE
-{0x00168, 0x00169}, // LATIN CAPITAL LETTER U WITH TILDE
-{0x0016A, 0x0016B}, // LATIN CAPITAL LETTER U WITH MACRON
-{0x0016C, 0x0016D}, // LATIN CAPITAL LETTER U WITH BREVE
-{0x0016E, 0x0016F}, // LATIN CAPITAL LETTER U WITH RING ABOVE
-{0x00170, 0x00171}, // LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
-{0x00172, 0x00173}, // LATIN CAPITAL LETTER U WITH OGONEK
-{0x00174, 0x00175}, // LATIN CAPITAL LETTER W WITH CIRCUMFLEX
-{0x00176, 0x00177}, // LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
-{0x00178, 0x000FF}, // LATIN CAPITAL LETTER Y WITH DIAERESIS
-{0x00179, 0x0017A}, // LATIN CAPITAL LETTER Z WITH ACUTE
-{0x0017B, 0x0017C}, // LATIN CAPITAL LETTER Z WITH DOT ABOVE
-{0x0017D, 0x0017E}, // LATIN CAPITAL LETTER Z WITH CARON
-{0x0017F, 0x00073}, // LATIN SMALL LETTER LONG S
-{0x00181, 0x00253}, // LATIN CAPITAL LETTER B WITH HOOK
-{0x00182, 0x00183}, // LATIN CAPITAL LETTER B WITH TOPBAR
-{0x00184, 0x00185}, // LATIN CAPITAL LETTER TONE SIX
-{0x00186, 0x00254}, // LATIN CAPITAL LETTER OPEN O
-{0x00187, 0x00188}, // LATIN CAPITAL LETTER C WITH HOOK
-{0x00189, 0x00256}, // LATIN CAPITAL LETTER AFRICAN D
-{0x0018A, 0x00257}, // LATIN CAPITAL LETTER D WITH HOOK
-{0x0018B, 0x0018C}, // LATIN CAPITAL LETTER D WITH TOPBAR
-{0x0018E, 0x001DD}, // LATIN CAPITAL LETTER REVERSED E
-{0x0018F, 0x00259}, // LATIN CAPITAL LETTER SCHWA
-{0x00190, 0x0025B}, // LATIN CAPITAL LETTER OPEN E
-{0x00191, 0x00192}, // LATIN CAPITAL LETTER F WITH HOOK
-{0x00193, 0x00260}, // LATIN CAPITAL LETTER G WITH HOOK
-{0x00194, 0x00263}, // LATIN CAPITAL LETTER GAMMA
-{0x00196, 0x00269}, // LATIN CAPITAL LETTER IOTA
-{0x00197, 0x00268}, // LATIN CAPITAL LETTER I WITH STROKE
-{0x00198, 0x00199}, // LATIN CAPITAL LETTER K WITH HOOK
-{0x0019C, 0x0026F}, // LATIN CAPITAL LETTER TURNED M
-{0x0019D, 0x00272}, // LATIN CAPITAL LETTER N WITH LEFT HOOK
-{0x0019F, 0x00275}, // LATIN CAPITAL LETTER O WITH MIDDLE TILDE
-{0x001A0, 0x001A1}, // LATIN CAPITAL LETTER O WITH HORN
-{0x001A2, 0x001A3}, // LATIN CAPITAL LETTER OI
-{0x001A4, 0x001A5}, // LATIN CAPITAL LETTER P WITH HOOK
-{0x001A6, 0x00280}, // LATIN LETTER YR
-{0x001A7, 0x001A8}, // LATIN CAPITAL LETTER TONE TWO
-{0x001A9, 0x00283}, // LATIN CAPITAL LETTER ESH
-{0x001AC, 0x001AD}, // LATIN CAPITAL LETTER T WITH HOOK
-{0x001AE, 0x00288}, // LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
-{0x001AF, 0x001B0}, // LATIN CAPITAL LETTER U WITH HORN
-{0x001B1, 0x0028A}, // LATIN CAPITAL LETTER UPSILON
-{0x001B2, 0x0028B}, // LATIN CAPITAL LETTER V WITH HOOK
-{0x001B3, 0x001B4}, // LATIN CAPITAL LETTER Y WITH HOOK
-{0x001B5, 0x001B6}, // LATIN CAPITAL LETTER Z WITH STROKE
-{0x001B7, 0x00292}, // LATIN CAPITAL LETTER EZH
-{0x001B8, 0x001B9}, // LATIN CAPITAL LETTER EZH REVERSED
-{0x001BC, 0x001BD}, // LATIN CAPITAL LETTER TONE FIVE
-{0x001C4, 0x001C6}, // LATIN CAPITAL LETTER DZ WITH CARON
-{0x001C5, 0x001C6}, // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
-{0x001C7, 0x001C9}, // LATIN CAPITAL LETTER LJ
-{0x001C8, 0x001C9}, // LATIN CAPITAL LETTER L WITH SMALL LETTER J
-{0x001CA, 0x001CC}, // LATIN CAPITAL LETTER NJ
-{0x001CB, 0x001CC}, // LATIN CAPITAL LETTER N WITH SMALL LETTER J
-{0x001CD, 0x001CE}, // LATIN CAPITAL LETTER A WITH CARON
-{0x001CF, 0x001D0}, // LATIN CAPITAL LETTER I WITH CARON
-{0x001D1, 0x001D2}, // LATIN CAPITAL LETTER O WITH CARON
-{0x001D3, 0x001D4}, // LATIN CAPITAL LETTER U WITH CARON
-{0x001D5, 0x001D6}, // LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
-{0x001D7, 0x001D8}, // LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
-{0x001D9, 0x001DA}, // LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
-{0x001DB, 0x001DC}, // LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
-{0x001DE, 0x001DF}, // LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
-{0x001E0, 0x001E1}, // LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
-{0x001E2, 0x001E3}, // LATIN CAPITAL LETTER AE WITH MACRON
-{0x001E4, 0x001E5}, // LATIN CAPITAL LETTER G WITH STROKE
-{0x001E6, 0x001E7}, // LATIN CAPITAL LETTER G WITH CARON
-{0x001E8, 0x001E9}, // LATIN CAPITAL LETTER K WITH CARON
-{0x001EA, 0x001EB}, // LATIN CAPITAL LETTER O WITH OGONEK
-{0x001EC, 0x001ED}, // LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
-{0x001EE, 0x001EF}, // LATIN CAPITAL LETTER EZH WITH CARON
-{0x001F1, 0x001F3}, // LATIN CAPITAL LETTER DZ
-{0x001F2, 0x001F3}, // LATIN CAPITAL LETTER D WITH SMALL LETTER Z
-{0x001F4, 0x001F5}, // LATIN CAPITAL LETTER G WITH ACUTE
-{0x001F6, 0x00195}, // LATIN CAPITAL LETTER HWAIR
-{0x001F7, 0x001BF}, // LATIN CAPITAL LETTER WYNN
-{0x001F8, 0x001F9}, // LATIN CAPITAL LETTER N WITH GRAVE
-{0x001FA, 0x001FB}, // LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
-{0x001FC, 0x001FD}, // LATIN CAPITAL LETTER AE WITH ACUTE
-{0x001FE, 0x001FF}, // LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
-{0x00200, 0x00201}, // LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
-{0x00202, 0x00203}, // LATIN CAPITAL LETTER A WITH INVERTED BREVE
-{0x00204, 0x00205}, // LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
-{0x00206, 0x00207}, // LATIN CAPITAL LETTER E WITH INVERTED BREVE
-{0x00208, 0x00209}, // LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
-{0x0020A, 0x0020B}, // LATIN CAPITAL LETTER I WITH INVERTED BREVE
-{0x0020C, 0x0020D}, // LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
-{0x0020E, 0x0020F}, // LATIN CAPITAL LETTER O WITH INVERTED BREVE
-{0x00210, 0x00211}, // LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
-{0x00212, 0x00213}, // LATIN CAPITAL LETTER R WITH INVERTED BREVE
-{0x00214, 0x00215}, // LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
-{0x00216, 0x00217}, // LATIN CAPITAL LETTER U WITH INVERTED BREVE
-{0x00218, 0x00219}, // LATIN CAPITAL LETTER S WITH COMMA BELOW
-{0x0021A, 0x0021B}, // LATIN CAPITAL LETTER T WITH COMMA BELOW
-{0x0021C, 0x0021D}, // LATIN CAPITAL LETTER YOGH
-{0x0021E, 0x0021F}, // LATIN CAPITAL LETTER H WITH CARON
-{0x00220, 0x0019E}, // LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
-{0x00222, 0x00223}, // LATIN CAPITAL LETTER OU
-{0x00224, 0x00225}, // LATIN CAPITAL LETTER Z WITH HOOK
-{0x00226, 0x00227}, // LATIN CAPITAL LETTER A WITH DOT ABOVE
-{0x00228, 0x00229}, // LATIN CAPITAL LETTER E WITH CEDILLA
-{0x0022A, 0x0022B}, // LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
-{0x0022C, 0x0022D}, // LATIN CAPITAL LETTER O WITH TILDE AND MACRON
-{0x0022E, 0x0022F}, // LATIN CAPITAL LETTER O WITH DOT ABOVE
-{0x00230, 0x00231}, // LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
-{0x00232, 0x00233}, // LATIN CAPITAL LETTER Y WITH MACRON
-{0x0023A, 0x02C65}, // LATIN CAPITAL LETTER A WITH STROKE
-{0x0023B, 0x0023C}, // LATIN CAPITAL LETTER C WITH STROKE
-{0x0023D, 0x0019A}, // LATIN CAPITAL LETTER L WITH BAR
-{0x0023E, 0x02C66}, // LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
-{0x00241, 0x00242}, // LATIN CAPITAL LETTER GLOTTAL STOP
-{0x00243, 0x00180}, // LATIN CAPITAL LETTER B WITH STROKE
-{0x00244, 0x00289}, // LATIN CAPITAL LETTER U BAR
-{0x00245, 0x0028C}, // LATIN CAPITAL LETTER TURNED V
-{0x00246, 0x00247}, // LATIN CAPITAL LETTER E WITH STROKE
-{0x00248, 0x00249}, // LATIN CAPITAL LETTER J WITH STROKE
-{0x0024A, 0x0024B}, // LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL
-{0x0024C, 0x0024D}, // LATIN CAPITAL LETTER R WITH STROKE
-{0x0024E, 0x0024F}, // LATIN CAPITAL LETTER Y WITH STROKE
-{0x00345, 0x003B9}, // COMBINING GREEK YPOGEGRAMMENI
-{0x00370, 0x00371}, // GREEK CAPITAL LETTER HETA
-{0x00372, 0x00373}, // GREEK CAPITAL LETTER ARCHAIC SAMPI
-{0x00376, 0x00377}, // GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA
-{0x0037F, 0x003F3}, // GREEK CAPITAL LETTER YOT
-{0x00386, 0x003AC}, // GREEK CAPITAL LETTER ALPHA WITH TONOS
-{0x00388, 0x003AD}, // GREEK CAPITAL LETTER EPSILON WITH TONOS
-{0x00389, 0x003AE}, // GREEK CAPITAL LETTER ETA WITH TONOS
-{0x0038A, 0x003AF}, // GREEK CAPITAL LETTER IOTA WITH TONOS
-{0x0038C, 0x003CC}, // GREEK CAPITAL LETTER OMICRON WITH TONOS
-{0x0038E, 0x003CD}, // GREEK CAPITAL LETTER UPSILON WITH TONOS
-{0x0038F, 0x003CE}, // GREEK CAPITAL LETTER OMEGA WITH TONOS
-{0x00391, 0x003B1}, // GREEK CAPITAL LETTER ALPHA
-{0x00392, 0x003B2}, // GREEK CAPITAL LETTER BETA
-{0x00393, 0x003B3}, // GREEK CAPITAL LETTER GAMMA
-{0x00394, 0x003B4}, // GREEK CAPITAL LETTER DELTA
-{0x00395, 0x003B5}, // GREEK CAPITAL LETTER EPSILON
-{0x00396, 0x003B6}, // GREEK CAPITAL LETTER ZETA
-{0x00397, 0x003B7}, // GREEK CAPITAL LETTER ETA
-{0x00398, 0x003B8}, // GREEK CAPITAL LETTER THETA
-{0x00399, 0x003B9}, // GREEK CAPITAL LETTER IOTA
-{0x0039A, 0x003BA}, // GREEK CAPITAL LETTER KAPPA
-{0x0039B, 0x003BB}, // GREEK CAPITAL LETTER LAMDA
-{0x0039C, 0x003BC}, // GREEK CAPITAL LETTER MU
-{0x0039D, 0x003BD}, // GREEK CAPITAL LETTER NU
-{0x0039E, 0x003BE}, // GREEK CAPITAL LETTER XI
-{0x0039F, 0x003BF}, // GREEK CAPITAL LETTER OMICRON
-{0x003A0, 0x003C0}, // GREEK CAPITAL LETTER PI
-{0x003A1, 0x003C1}, // GREEK CAPITAL LETTER RHO
-{0x003A3, 0x003C3}, // GREEK CAPITAL LETTER SIGMA
-{0x003A4, 0x003C4}, // GREEK CAPITAL LETTER TAU
-{0x003A5, 0x003C5}, // GREEK CAPITAL LETTER UPSILON
-{0x003A6, 0x003C6}, // GREEK CAPITAL LETTER PHI
-{0x003A7, 0x003C7}, // GREEK CAPITAL LETTER CHI
-{0x003A8, 0x003C8}, // GREEK CAPITAL LETTER PSI
-{0x003A9, 0x003C9}, // GREEK CAPITAL LETTER OMEGA
-{0x003AA, 0x003CA}, // GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
-{0x003AB, 0x003CB}, // GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
-{0x003C2, 0x003C3}, // GREEK SMALL LETTER FINAL SIGMA
-{0x003CF, 0x003D7}, // GREEK CAPITAL KAI SYMBOL
-{0x003D0, 0x003B2}, // GREEK BETA SYMBOL
-{0x003D1, 0x003B8}, // GREEK THETA SYMBOL
-{0x003D5, 0x003C6}, // GREEK PHI SYMBOL
-{0x003D6, 0x003C0}, // GREEK PI SYMBOL
-{0x003D8, 0x003D9}, // GREEK LETTER ARCHAIC KOPPA
-{0x003DA, 0x003DB}, // GREEK LETTER STIGMA
-{0x003DC, 0x003DD}, // GREEK LETTER DIGAMMA
-{0x003DE, 0x003DF}, // GREEK LETTER KOPPA
-{0x003E0, 0x003E1}, // GREEK LETTER SAMPI
-{0x003F0, 0x003BA}, // GREEK KAPPA SYMBOL
-{0x003F1, 0x003C1}, // GREEK RHO SYMBOL
-{0x003F4, 0x003B8}, // GREEK CAPITAL THETA SYMBOL
-{0x003F5, 0x003B5}, // GREEK LUNATE EPSILON SYMBOL
-{0x003F7, 0x003F8}, // GREEK CAPITAL LETTER SHO
-{0x003F9, 0x003F2}, // GREEK CAPITAL LUNATE SIGMA SYMBOL
-{0x003FA, 0x003FB}, // GREEK CAPITAL LETTER SAN
-{0x003FD, 0x0037B}, // GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL
-{0x003FE, 0x0037C}, // GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL
-{0x003FF, 0x0037D}, // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
-{0x00400, 0x00450}, // CYRILLIC CAPITAL LETTER IE WITH GRAVE
-{0x00401, 0x00451}, // CYRILLIC CAPITAL LETTER IO
-{0x00402, 0x00452}, // CYRILLIC CAPITAL LETTER DJE
-{0x00403, 0x00453}, // CYRILLIC CAPITAL LETTER GJE
-{0x00404, 0x00454}, // CYRILLIC CAPITAL LETTER UKRAINIAN IE
-{0x00405, 0x00455}, // CYRILLIC CAPITAL LETTER DZE
-{0x00406, 0x00456}, // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
-{0x00407, 0x00457}, // CYRILLIC CAPITAL LETTER YI
-{0x00408, 0x00458}, // CYRILLIC CAPITAL LETTER JE
-{0x00409, 0x00459}, // CYRILLIC CAPITAL LETTER LJE
-{0x0040A, 0x0045A}, // CYRILLIC CAPITAL LETTER NJE
-{0x0040B, 0x0045B}, // CYRILLIC CAPITAL LETTER TSHE
-{0x0040C, 0x0045C}, // CYRILLIC CAPITAL LETTER KJE
-{0x0040D, 0x0045D}, // CYRILLIC CAPITAL LETTER I WITH GRAVE
-{0x0040E, 0x0045E}, // CYRILLIC CAPITAL LETTER SHORT U
-{0x0040F, 0x0045F}, // CYRILLIC CAPITAL LETTER DZHE
-{0x00410, 0x00430}, // CYRILLIC CAPITAL LETTER A
-{0x00411, 0x00431}, // CYRILLIC CAPITAL LETTER BE
-{0x00412, 0x00432}, // CYRILLIC CAPITAL LETTER VE
-{0x00413, 0x00433}, // CYRILLIC CAPITAL LETTER GHE
-{0x00414, 0x00434}, // CYRILLIC CAPITAL LETTER DE
-{0x00415, 0x00435}, // CYRILLIC CAPITAL LETTER IE
-{0x00416, 0x00436}, // CYRILLIC CAPITAL LETTER ZHE
-{0x00417, 0x00437}, // CYRILLIC CAPITAL LETTER ZE
-{0x00418, 0x00438}, // CYRILLIC CAPITAL LETTER I
-{0x00419, 0x00439}, // CYRILLIC CAPITAL LETTER SHORT I
-{0x0041A, 0x0043A}, // CYRILLIC CAPITAL LETTER KA
-{0x0041B, 0x0043B}, // CYRILLIC CAPITAL LETTER EL
-{0x0041C, 0x0043C}, // CYRILLIC CAPITAL LETTER EM
-{0x0041D, 0x0043D}, // CYRILLIC CAPITAL LETTER EN
-{0x0041E, 0x0043E}, // CYRILLIC CAPITAL LETTER O
-{0x0041F, 0x0043F}, // CYRILLIC CAPITAL LETTER PE
-{0x00420, 0x00440}, // CYRILLIC CAPITAL LETTER ER
-{0x00421, 0x00441}, // CYRILLIC CAPITAL LETTER ES
-{0x00422, 0x00442}, // CYRILLIC CAPITAL LETTER TE
-{0x00423, 0x00443}, // CYRILLIC CAPITAL LETTER U
-{0x00424, 0x00444}, // CYRILLIC CAPITAL LETTER EF
-{0x00425, 0x00445}, // CYRILLIC CAPITAL LETTER HA
-{0x00426, 0x00446}, // CYRILLIC CAPITAL LETTER TSE
-{0x00427, 0x00447}, // CYRILLIC CAPITAL LETTER CHE
-{0x00428, 0x00448}, // CYRILLIC CAPITAL LETTER SHA
-{0x00429, 0x00449}, // CYRILLIC CAPITAL LETTER SHCHA
-{0x0042A, 0x0044A}, // CYRILLIC CAPITAL LETTER HARD SIGN
-{0x0042B, 0x0044B}, // CYRILLIC CAPITAL LETTER YERU
-{0x0042C, 0x0044C}, // CYRILLIC CAPITAL LETTER SOFT SIGN
-{0x0042D, 0x0044D}, // CYRILLIC CAPITAL LETTER E
-{0x0042E, 0x0044E}, // CYRILLIC CAPITAL LETTER YU
-{0x0042F, 0x0044F}, // CYRILLIC CAPITAL LETTER YA
-{0x00460, 0x00461}, // CYRILLIC CAPITAL LETTER OMEGA
-{0x00462, 0x00463}, // CYRILLIC CAPITAL LETTER YAT
-{0x00464, 0x00465}, // CYRILLIC CAPITAL LETTER IOTIFIED E
-{0x00466, 0x00467}, // CYRILLIC CAPITAL LETTER LITTLE YUS
-{0x00468, 0x00469}, // CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
-{0x0046A, 0x0046B}, // CYRILLIC CAPITAL LETTER BIG YUS
-{0x0046C, 0x0046D}, // CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
-{0x0046E, 0x0046F}, // CYRILLIC CAPITAL LETTER KSI
-{0x00470, 0x00471}, // CYRILLIC CAPITAL LETTER PSI
-{0x00472, 0x00473}, // CYRILLIC CAPITAL LETTER FITA
-{0x00474, 0x00475}, // CYRILLIC CAPITAL LETTER IZHITSA
-{0x00476, 0x00477}, // CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
-{0x00478, 0x00479}, // CYRILLIC CAPITAL LETTER UK
-{0x0047A, 0x0047B}, // CYRILLIC CAPITAL LETTER ROUND OMEGA
-{0x0047C, 0x0047D}, // CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
-{0x0047E, 0x0047F}, // CYRILLIC CAPITAL LETTER OT
-{0x00480, 0x00481}, // CYRILLIC CAPITAL LETTER KOPPA
-{0x0048A, 0x0048B}, // CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
-{0x0048C, 0x0048D}, // CYRILLIC CAPITAL LETTER SEMISOFT SIGN
-{0x0048E, 0x0048F}, // CYRILLIC CAPITAL LETTER ER WITH TICK
-{0x00490, 0x00491}, // CYRILLIC CAPITAL LETTER GHE WITH UPTURN
-{0x00492, 0x00493}, // CYRILLIC CAPITAL LETTER GHE WITH STROKE
-{0x00494, 0x00495}, // CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
-{0x00496, 0x00497}, // CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
-{0x00498, 0x00499}, // CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
-{0x0049A, 0x0049B}, // CYRILLIC CAPITAL LETTER KA WITH DESCENDER
-{0x0049C, 0x0049D}, // CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
-{0x0049E, 0x0049F}, // CYRILLIC CAPITAL LETTER KA WITH STROKE
-{0x004A0, 0x004A1}, // CYRILLIC CAPITAL LETTER BASHKIR KA
-{0x004A2, 0x004A3}, // CYRILLIC CAPITAL LETTER EN WITH DESCENDER
-{0x004A4, 0x004A5}, // CYRILLIC CAPITAL LIGATURE EN GHE
-{0x004A6, 0x004A7}, // CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
-{0x004A8, 0x004A9}, // CYRILLIC CAPITAL LETTER ABKHASIAN HA
-{0x004AA, 0x004AB}, // CYRILLIC CAPITAL LETTER ES WITH DESCENDER
-{0x004AC, 0x004AD}, // CYRILLIC CAPITAL LETTER TE WITH DESCENDER
-{0x004AE, 0x004AF}, // CYRILLIC CAPITAL LETTER STRAIGHT U
-{0x004B0, 0x004B1}, // CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
-{0x004B2, 0x004B3}, // CYRILLIC CAPITAL LETTER HA WITH DESCENDER
-{0x004B4, 0x004B5}, // CYRILLIC CAPITAL LIGATURE TE TSE
-{0x004B6, 0x004B7}, // CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
-{0x004B8, 0x004B9}, // CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
-{0x004BA, 0x004BB}, // CYRILLIC CAPITAL LETTER SHHA
-{0x004BC, 0x004BD}, // CYRILLIC CAPITAL LETTER ABKHASIAN CHE
-{0x004BE, 0x004BF}, // CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
-{0x004C0, 0x004CF}, // CYRILLIC LETTER PALOCHKA
-{0x004C1, 0x004C2}, // CYRILLIC CAPITAL LETTER ZHE WITH BREVE
-{0x004C3, 0x004C4}, // CYRILLIC CAPITAL LETTER KA WITH HOOK
-{0x004C5, 0x004C6}, // CYRILLIC CAPITAL LETTER EL WITH TAIL
-{0x004C7, 0x004C8}, // CYRILLIC CAPITAL LETTER EN WITH HOOK
-{0x004C9, 0x004CA}, // CYRILLIC CAPITAL LETTER EN WITH TAIL
-{0x004CB, 0x004CC}, // CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
-{0x004CD, 0x004CE}, // CYRILLIC CAPITAL LETTER EM WITH TAIL
-{0x004D0, 0x004D1}, // CYRILLIC CAPITAL LETTER A WITH BREVE
-{0x004D2, 0x004D3}, // CYRILLIC CAPITAL LETTER A WITH DIAERESIS
-{0x004D4, 0x004D5}, // CYRILLIC CAPITAL LIGATURE A IE
-{0x004D6, 0x004D7}, // CYRILLIC CAPITAL LETTER IE WITH BREVE
-{0x004D8, 0x004D9}, // CYRILLIC CAPITAL LETTER SCHWA
-{0x004DA, 0x004DB}, // CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
-{0x004DC, 0x004DD}, // CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
-{0x004DE, 0x004DF}, // CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
-{0x004E0, 0x004E1}, // CYRILLIC CAPITAL LETTER ABKHASIAN DZE
-{0x004E2, 0x004E3}, // CYRILLIC CAPITAL LETTER I WITH MACRON
-{0x004E4, 0x004E5}, // CYRILLIC CAPITAL LETTER I WITH DIAERESIS
-{0x004E6, 0x004E7}, // CYRILLIC CAPITAL LETTER O WITH DIAERESIS
-{0x004E8, 0x004E9}, // CYRILLIC CAPITAL LETTER BARRED O
-{0x004EA, 0x004EB}, // CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
-{0x004EC, 0x004ED}, // CYRILLIC CAPITAL LETTER E WITH DIAERESIS
-{0x004EE, 0x004EF}, // CYRILLIC CAPITAL LETTER U WITH MACRON
-{0x004F0, 0x004F1}, // CYRILLIC CAPITAL LETTER U WITH DIAERESIS
-{0x004F2, 0x004F3}, // CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
-{0x004F4, 0x004F5}, // CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
-{0x004F6, 0x004F7}, // CYRILLIC CAPITAL LETTER GHE WITH DESCENDER
-{0x004F8, 0x004F9}, // CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
-{0x004FA, 0x004FB}, // CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK
-{0x004FC, 0x004FD}, // CYRILLIC CAPITAL LETTER HA WITH HOOK
-{0x004FE, 0x004FF}, // CYRILLIC CAPITAL LETTER HA WITH STROKE
-{0x00500, 0x00501}, // CYRILLIC CAPITAL LETTER KOMI DE
-{0x00502, 0x00503}, // CYRILLIC CAPITAL LETTER KOMI DJE
-{0x00504, 0x00505}, // CYRILLIC CAPITAL LETTER KOMI ZJE
-{0x00506, 0x00507}, // CYRILLIC CAPITAL LETTER KOMI DZJE
-{0x00508, 0x00509}, // CYRILLIC CAPITAL LETTER KOMI LJE
-{0x0050A, 0x0050B}, // CYRILLIC CAPITAL LETTER KOMI NJE
-{0x0050C, 0x0050D}, // CYRILLIC CAPITAL LETTER KOMI SJE
-{0x0050E, 0x0050F}, // CYRILLIC CAPITAL LETTER KOMI TJE
-{0x00510, 0x00511}, // CYRILLIC CAPITAL LETTER REVERSED ZE
-{0x00512, 0x00513}, // CYRILLIC CAPITAL LETTER EL WITH HOOK
-{0x00514, 0x00515}, // CYRILLIC CAPITAL LETTER LHA
-{0x00516, 0x00517}, // CYRILLIC CAPITAL LETTER RHA
-{0x00518, 0x00519}, // CYRILLIC CAPITAL LETTER YAE
-{0x0051A, 0x0051B}, // CYRILLIC CAPITAL LETTER QA
-{0x0051C, 0x0051D}, // CYRILLIC CAPITAL LETTER WE
-{0x0051E, 0x0051F}, // CYRILLIC CAPITAL LETTER ALEUT KA
-{0x00520, 0x00521}, // CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK
-{0x00522, 0x00523}, // CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK
-{0x00524, 0x00525}, // CYRILLIC CAPITAL LETTER PE WITH DESCENDER
-{0x00526, 0x00527}, // CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER
-{0x00528, 0x00529}, // CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK
-{0x0052A, 0x0052B}, // CYRILLIC CAPITAL LETTER DZZHE
-{0x0052C, 0x0052D}, // CYRILLIC CAPITAL LETTER DCHE
-{0x0052E, 0x0052F}, // CYRILLIC CAPITAL LETTER EL WITH DESCENDER
-{0x01C80, 0x00432}, // CYRILLIC SMALL LETTER ROUNDED VE
-{0x01C81, 0x00434}, // CYRILLIC SMALL LETTER LONG-LEGGED DE
-{0x01C82, 0x0043E}, // CYRILLIC SMALL LETTER NARROW O
-{0x01C83, 0x00441}, // CYRILLIC SMALL LETTER WIDE ES
-{0x01C84, 0x00442}, // CYRILLIC SMALL LETTER TALL TE
-{0x01C85, 0x00442}, // CYRILLIC SMALL LETTER THREE-LEGGED TE
-{0x01C86, 0x0044A}, // CYRILLIC SMALL LETTER TALL HARD SIGN
-{0x01C87, 0x00463}, // CYRILLIC SMALL LETTER TALL YAT
-{0x01C88, 0x0A64B}, // CYRILLIC SMALL LETTER UNBLENDED UK
-{0x01E00, 0x01E01}, // LATIN CAPITAL LETTER A WITH RING BELOW
-{0x01E02, 0x01E03}, // LATIN CAPITAL LETTER B WITH DOT ABOVE
-{0x01E04, 0x01E05}, // LATIN CAPITAL LETTER B WITH DOT BELOW
-{0x01E06, 0x01E07}, // LATIN CAPITAL LETTER B WITH LINE BELOW
-{0x01E08, 0x01E09}, // LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
-{0x01E0A, 0x01E0B}, // LATIN CAPITAL LETTER D WITH DOT ABOVE
-{0x01E0C, 0x01E0D}, // LATIN CAPITAL LETTER D WITH DOT BELOW
-{0x01E0E, 0x01E0F}, // LATIN CAPITAL LETTER D WITH LINE BELOW
-{0x01E10, 0x01E11}, // LATIN CAPITAL LETTER D WITH CEDILLA
-{0x01E12, 0x01E13}, // LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
-{0x01E14, 0x01E15}, // LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
-{0x01E16, 0x01E17}, // LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
-{0x01E18, 0x01E19}, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
-{0x01E1A, 0x01E1B}, // LATIN CAPITAL LETTER E WITH TILDE BELOW
-{0x01E1C, 0x01E1D}, // LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
-{0x01E1E, 0x01E1F}, // LATIN CAPITAL LETTER F WITH DOT ABOVE
-{0x01E20, 0x01E21}, // LATIN CAPITAL LETTER G WITH MACRON
-{0x01E22, 0x01E23}, // LATIN CAPITAL LETTER H WITH DOT ABOVE
-{0x01E24, 0x01E25}, // LATIN CAPITAL LETTER H WITH DOT BELOW
-{0x01E26, 0x01E27}, // LATIN CAPITAL LETTER H WITH DIAERESIS
-{0x01E28, 0x01E29}, // LATIN CAPITAL LETTER H WITH CEDILLA
-{0x01E2A, 0x01E2B}, // LATIN CAPITAL LETTER H WITH BREVE BELOW
-{0x01E2C, 0x01E2D}, // LATIN CAPITAL LETTER I WITH TILDE BELOW
-{0x01E2E, 0x01E2F}, // LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
-{0x01E30, 0x01E31}, // LATIN CAPITAL LETTER K WITH ACUTE
-{0x01E32, 0x01E33}, // LATIN CAPITAL LETTER K WITH DOT BELOW
-{0x01E34, 0x01E35}, // LATIN CAPITAL LETTER K WITH LINE BELOW
-{0x01E36, 0x01E37}, // LATIN CAPITAL LETTER L WITH DOT BELOW
-{0x01E38, 0x01E39}, // LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
-{0x01E3A, 0x01E3B}, // LATIN CAPITAL LETTER L WITH LINE BELOW
-{0x01E3C, 0x01E3D}, // LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
-{0x01E3E, 0x01E3F}, // LATIN CAPITAL LETTER M WITH ACUTE
-{0x01E40, 0x01E41}, // LATIN CAPITAL LETTER M WITH DOT ABOVE
-{0x01E42, 0x01E43}, // LATIN CAPITAL LETTER M WITH DOT BELOW
-{0x01E44, 0x01E45}, // LATIN CAPITAL LETTER N WITH DOT ABOVE
-{0x01E46, 0x01E47}, // LATIN CAPITAL LETTER N WITH DOT BELOW
-{0x01E48, 0x01E49}, // LATIN CAPITAL LETTER N WITH LINE BELOW
-{0x01E4A, 0x01E4B}, // LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
-{0x01E4C, 0x01E4D}, // LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
-{0x01E4E, 0x01E4F}, // LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
-{0x01E50, 0x01E51}, // LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
-{0x01E52, 0x01E53}, // LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
-{0x01E54, 0x01E55}, // LATIN CAPITAL LETTER P WITH ACUTE
-{0x01E56, 0x01E57}, // LATIN CAPITAL LETTER P WITH DOT ABOVE
-{0x01E58, 0x01E59}, // LATIN CAPITAL LETTER R WITH DOT ABOVE
-{0x01E5A, 0x01E5B}, // LATIN CAPITAL LETTER R WITH DOT BELOW
-{0x01E5C, 0x01E5D}, // LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
-{0x01E5E, 0x01E5F}, // LATIN CAPITAL LETTER R WITH LINE BELOW
-{0x01E60, 0x01E61}, // LATIN CAPITAL LETTER S WITH DOT ABOVE
-{0x01E62, 0x01E63}, // LATIN CAPITAL LETTER S WITH DOT BELOW
-{0x01E64, 0x01E65}, // LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
-{0x01E66, 0x01E67}, // LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
-{0x01E68, 0x01E69}, // LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
-{0x01E6A, 0x01E6B}, // LATIN CAPITAL LETTER T WITH DOT ABOVE
-{0x01E6C, 0x01E6D}, // LATIN CAPITAL LETTER T WITH DOT BELOW
-{0x01E6E, 0x01E6F}, // LATIN CAPITAL LETTER T WITH LINE BELOW
-{0x01E70, 0x01E71}, // LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
-{0x01E72, 0x01E73}, // LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
-{0x01E74, 0x01E75}, // LATIN CAPITAL LETTER U WITH TILDE BELOW
-{0x01E76, 0x01E77}, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
-{0x01E78, 0x01E79}, // LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
-{0x01E7A, 0x01E7B}, // LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
-{0x01E7C, 0x01E7D}, // LATIN CAPITAL LETTER V WITH TILDE
-{0x01E7E, 0x01E7F}, // LATIN CAPITAL LETTER V WITH DOT BELOW
-{0x01E80, 0x01E81}, // LATIN CAPITAL LETTER W WITH GRAVE
-{0x01E82, 0x01E83}, // LATIN CAPITAL LETTER W WITH ACUTE
-{0x01E84, 0x01E85}, // LATIN CAPITAL LETTER W WITH DIAERESIS
-{0x01E86, 0x01E87}, // LATIN CAPITAL LETTER W WITH DOT ABOVE
-{0x01E88, 0x01E89}, // LATIN CAPITAL LETTER W WITH DOT BELOW
-{0x01E8A, 0x01E8B}, // LATIN CAPITAL LETTER X WITH DOT ABOVE
-{0x01E8C, 0x01E8D}, // LATIN CAPITAL LETTER X WITH DIAERESIS
-{0x01E8E, 0x01E8F}, // LATIN CAPITAL LETTER Y WITH DOT ABOVE
-{0x01E90, 0x01E91}, // LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
-{0x01E92, 0x01E93}, // LATIN CAPITAL LETTER Z WITH DOT BELOW
-{0x01E94, 0x01E95}, // LATIN CAPITAL LETTER Z WITH LINE BELOW
-{0x01E9B, 0x01E61}, // LATIN SMALL LETTER LONG S WITH DOT ABOVE
-{0x01E9E, 0x000DF}, // LATIN CAPITAL LETTER SHARP S
-{0x01EA0, 0x01EA1}, // LATIN CAPITAL LETTER A WITH DOT BELOW
-{0x01EA2, 0x01EA3}, // LATIN CAPITAL LETTER A WITH HOOK ABOVE
-{0x01EA4, 0x01EA5}, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
-{0x01EA6, 0x01EA7}, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
-{0x01EA8, 0x01EA9}, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
-{0x01EAA, 0x01EAB}, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
-{0x01EAC, 0x01EAD}, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
-{0x01EAE, 0x01EAF}, // LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
-{0x01EB0, 0x01EB1}, // LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
-{0x01EB2, 0x01EB3}, // LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
-{0x01EB4, 0x01EB5}, // LATIN CAPITAL LETTER A WITH BREVE AND TILDE
-{0x01EB6, 0x01EB7}, // LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
-{0x01EB8, 0x01EB9}, // LATIN CAPITAL LETTER E WITH DOT BELOW
-{0x01EBA, 0x01EBB}, // LATIN CAPITAL LETTER E WITH HOOK ABOVE
-{0x01EBC, 0x01EBD}, // LATIN CAPITAL LETTER E WITH TILDE
-{0x01EBE, 0x01EBF}, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
-{0x01EC0, 0x01EC1}, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
-{0x01EC2, 0x01EC3}, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
-{0x01EC4, 0x01EC5}, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
-{0x01EC6, 0x01EC7}, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
-{0x01EC8, 0x01EC9}, // LATIN CAPITAL LETTER I WITH HOOK ABOVE
-{0x01ECA, 0x01ECB}, // LATIN CAPITAL LETTER I WITH DOT BELOW
-{0x01ECC, 0x01ECD}, // LATIN CAPITAL LETTER O WITH DOT BELOW
-{0x01ECE, 0x01ECF}, // LATIN CAPITAL LETTER O WITH HOOK ABOVE
-{0x01ED0, 0x01ED1}, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
-{0x01ED2, 0x01ED3}, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
-{0x01ED4, 0x01ED5}, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
-{0x01ED6, 0x01ED7}, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
-{0x01ED8, 0x01ED9}, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
-{0x01EDA, 0x01EDB}, // LATIN CAPITAL LETTER O WITH HORN AND ACUTE
-{0x01EDC, 0x01EDD}, // LATIN CAPITAL LETTER O WITH HORN AND GRAVE
-{0x01EDE, 0x01EDF}, // LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
-{0x01EE0, 0x01EE1}, // LATIN CAPITAL LETTER O WITH HORN AND TILDE
-{0x01EE2, 0x01EE3}, // LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
-{0x01EE4, 0x01EE5}, // LATIN CAPITAL LETTER U WITH DOT BELOW
-{0x01EE6, 0x01EE7}, // LATIN CAPITAL LETTER U WITH HOOK ABOVE
-{0x01EE8, 0x01EE9}, // LATIN CAPITAL LETTER U WITH HORN AND ACUTE
-{0x01EEA, 0x01EEB}, // LATIN CAPITAL LETTER U WITH HORN AND GRAVE
-{0x01EEC, 0x01EED}, // LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
-{0x01EEE, 0x01EEF}, // LATIN CAPITAL LETTER U WITH HORN AND TILDE
-{0x01EF0, 0x01EF1}, // LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
-{0x01EF2, 0x01EF3}, // LATIN CAPITAL LETTER Y WITH GRAVE
-{0x01EF4, 0x01EF5}, // LATIN CAPITAL LETTER Y WITH DOT BELOW
-{0x01EF6, 0x01EF7}, // LATIN CAPITAL LETTER Y WITH HOOK ABOVE
-{0x01EF8, 0x01EF9}, // LATIN CAPITAL LETTER Y WITH TILDE
-{0x01EFA, 0x01EFB}, // LATIN CAPITAL LETTER MIDDLE-WELSH LL
-{0x01EFC, 0x01EFD}, // LATIN CAPITAL LETTER MIDDLE-WELSH V
-{0x01EFE, 0x01EFF}, // LATIN CAPITAL LETTER Y WITH LOOP
-{0x01F08, 0x01F00}, // GREEK CAPITAL LETTER ALPHA WITH PSILI
-{0x01F09, 0x01F01}, // GREEK CAPITAL LETTER ALPHA WITH DASIA
-{0x01F0A, 0x01F02}, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
-{0x01F0B, 0x01F03}, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
-{0x01F0C, 0x01F04}, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
-{0x01F0D, 0x01F05}, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
-{0x01F0E, 0x01F06}, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
-{0x01F0F, 0x01F07}, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
-{0x01F18, 0x01F10}, // GREEK CAPITAL LETTER EPSILON WITH PSILI
-{0x01F19, 0x01F11}, // GREEK CAPITAL LETTER EPSILON WITH DASIA
-{0x01F1A, 0x01F12}, // GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA
-{0x01F1B, 0x01F13}, // GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA
-{0x01F1C, 0x01F14}, // GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA
-{0x01F1D, 0x01F15}, // GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
-{0x01F28, 0x01F20}, // GREEK CAPITAL LETTER ETA WITH PSILI
-{0x01F29, 0x01F21}, // GREEK CAPITAL LETTER ETA WITH DASIA
-{0x01F2A, 0x01F22}, // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
-{0x01F2B, 0x01F23}, // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
-{0x01F2C, 0x01F24}, // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
-{0x01F2D, 0x01F25}, // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
-{0x01F2E, 0x01F26}, // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
-{0x01F2F, 0x01F27}, // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
-{0x01F38, 0x01F30}, // GREEK CAPITAL LETTER IOTA WITH PSILI
-{0x01F39, 0x01F31}, // GREEK CAPITAL LETTER IOTA WITH DASIA
-{0x01F3A, 0x01F32}, // GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA
-{0x01F3B, 0x01F33}, // GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA
-{0x01F3C, 0x01F34}, // GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA
-{0x01F3D, 0x01F35}, // GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA
-{0x01F3E, 0x01F36}, // GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI
-{0x01F3F, 0x01F37}, // GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
-{0x01F48, 0x01F40}, // GREEK CAPITAL LETTER OMICRON WITH PSILI
-{0x01F49, 0x01F41}, // GREEK CAPITAL LETTER OMICRON WITH DASIA
-{0x01F4A, 0x01F42}, // GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA
-{0x01F4B, 0x01F43}, // GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA
-{0x01F4C, 0x01F44}, // GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA
-{0x01F4D, 0x01F45}, // GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
-{0x01F59, 0x01F51}, // GREEK CAPITAL LETTER UPSILON WITH DASIA
-{0x01F5B, 0x01F53}, // GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
-{0x01F5D, 0x01F55}, // GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
-{0x01F5F, 0x01F57}, // GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
-{0x01F68, 0x01F60}, // GREEK CAPITAL LETTER OMEGA WITH PSILI
-{0x01F69, 0x01F61}, // GREEK CAPITAL LETTER OMEGA WITH DASIA
-{0x01F6A, 0x01F62}, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
-{0x01F6B, 0x01F63}, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
-{0x01F6C, 0x01F64}, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
-{0x01F6D, 0x01F65}, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
-{0x01F6E, 0x01F66}, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
-{0x01F6F, 0x01F67}, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
-{0x01F88, 0x01F80}, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
-{0x01F89, 0x01F81}, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
-{0x01F8A, 0x01F82}, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
-{0x01F8B, 0x01F83}, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
-{0x01F8C, 0x01F84}, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
-{0x01F8D, 0x01F85}, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
-{0x01F8E, 0x01F86}, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
-{0x01F8F, 0x01F87}, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
-{0x01F98, 0x01F90}, // GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
-{0x01F99, 0x01F91}, // GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
-{0x01F9A, 0x01F92}, // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
-{0x01F9B, 0x01F93}, // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
-{0x01F9C, 0x01F94}, // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
-{0x01F9D, 0x01F95}, // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
-{0x01F9E, 0x01F96}, // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
-{0x01F9F, 0x01F97}, // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
-{0x01FA8, 0x01FA0}, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
-{0x01FA9, 0x01FA1}, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
-{0x01FAA, 0x01FA2}, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
-{0x01FAB, 0x01FA3}, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
-{0x01FAC, 0x01FA4}, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
-{0x01FAD, 0x01FA5}, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
-{0x01FAE, 0x01FA6}, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
-{0x01FAF, 0x01FA7}, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
-{0x01FB8, 0x01FB0}, // GREEK CAPITAL LETTER ALPHA WITH VRACHY
-{0x01FB9, 0x01FB1}, // GREEK CAPITAL LETTER ALPHA WITH MACRON
-{0x01FBA, 0x01F70}, // GREEK CAPITAL LETTER ALPHA WITH VARIA
-{0x01FBB, 0x01F71}, // GREEK CAPITAL LETTER ALPHA WITH OXIA
-{0x01FBC, 0x01FB3}, // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
-{0x01FBE, 0x003B9}, // GREEK PROSGEGRAMMENI
-{0x01FC8, 0x01F72}, // GREEK CAPITAL LETTER EPSILON WITH VARIA
-{0x01FC9, 0x01F73}, // GREEK CAPITAL LETTER EPSILON WITH OXIA
-{0x01FCA, 0x01F74}, // GREEK CAPITAL LETTER ETA WITH VARIA
-{0x01FCB, 0x01F75}, // GREEK CAPITAL LETTER ETA WITH OXIA
-{0x01FCC, 0x01FC3}, // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
-{0x01FD8, 0x01FD0}, // GREEK CAPITAL LETTER IOTA WITH VRACHY
-{0x01FD9, 0x01FD1}, // GREEK CAPITAL LETTER IOTA WITH MACRON
-{0x01FDA, 0x01F76}, // GREEK CAPITAL LETTER IOTA WITH VARIA
-{0x01FDB, 0x01F77}, // GREEK CAPITAL LETTER IOTA WITH OXIA
-{0x01FE8, 0x01FE0}, // GREEK CAPITAL LETTER UPSILON WITH VRACHY
-{0x01FE9, 0x01FE1}, // GREEK CAPITAL LETTER UPSILON WITH MACRON
-{0x01FEA, 0x01F7A}, // GREEK CAPITAL LETTER UPSILON WITH VARIA
-{0x01FEB, 0x01F7B}, // GREEK CAPITAL LETTER UPSILON WITH OXIA
-{0x01FEC, 0x01FE5}, // GREEK CAPITAL LETTER RHO WITH DASIA
-{0x01FF8, 0x01F78}, // GREEK CAPITAL LETTER OMICRON WITH VARIA
-{0x01FF9, 0x01F79}, // GREEK CAPITAL LETTER OMICRON WITH OXIA
-{0x01FFA, 0x01F7C}, // GREEK CAPITAL LETTER OMEGA WITH VARIA
-{0x01FFB, 0x01F7D}, // GREEK CAPITAL LETTER OMEGA WITH OXIA
-{0x01FFC, 0x01FF3}, // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
-{0x02126, 0x003C9}, // OHM SIGN
-{0x0212A, 0x0006B}, // KELVIN SIGN
-{0x0212B, 0x000E5}, // ANGSTROM SIGN
-{0x02132, 0x0214E}, // TURNED CAPITAL F
-{0x02160, 0x02170}, // ROMAN NUMERAL ONE
-{0x02161, 0x02171}, // ROMAN NUMERAL TWO
-{0x02162, 0x02172}, // ROMAN NUMERAL THREE
-{0x02163, 0x02173}, // ROMAN NUMERAL FOUR
-{0x02164, 0x02174}, // ROMAN NUMERAL FIVE
-{0x02165, 0x02175}, // ROMAN NUMERAL SIX
-{0x02166, 0x02176}, // ROMAN NUMERAL SEVEN
-{0x02167, 0x02177}, // ROMAN NUMERAL EIGHT
-{0x02168, 0x02178}, // ROMAN NUMERAL NINE
-{0x02169, 0x02179}, // ROMAN NUMERAL TEN
-{0x0216A, 0x0217A}, // ROMAN NUMERAL ELEVEN
-{0x0216B, 0x0217B}, // ROMAN NUMERAL TWELVE
-{0x0216C, 0x0217C}, // ROMAN NUMERAL FIFTY
-{0x0216D, 0x0217D}, // ROMAN NUMERAL ONE HUNDRED
-{0x0216E, 0x0217E}, // ROMAN NUMERAL FIVE HUNDRED
-{0x0216F, 0x0217F}, // ROMAN NUMERAL ONE THOUSAND
-{0x02183, 0x02184}, // ROMAN NUMERAL REVERSED ONE HUNDRED
-{0x024B6, 0x024D0}, // CIRCLED LATIN CAPITAL LETTER A
-{0x024B7, 0x024D1}, // CIRCLED LATIN CAPITAL LETTER B
-{0x024B8, 0x024D2}, // CIRCLED LATIN CAPITAL LETTER C
-{0x024B9, 0x024D3}, // CIRCLED LATIN CAPITAL LETTER D
-{0x024BA, 0x024D4}, // CIRCLED LATIN CAPITAL LETTER E
-{0x024BB, 0x024D5}, // CIRCLED LATIN CAPITAL LETTER F
-{0x024BC, 0x024D6}, // CIRCLED LATIN CAPITAL LETTER G
-{0x024BD, 0x024D7}, // CIRCLED LATIN CAPITAL LETTER H
-{0x024BE, 0x024D8}, // CIRCLED LATIN CAPITAL LETTER I
-{0x024BF, 0x024D9}, // CIRCLED LATIN CAPITAL LETTER J
-{0x024C0, 0x024DA}, // CIRCLED LATIN CAPITAL LETTER K
-{0x024C1, 0x024DB}, // CIRCLED LATIN CAPITAL LETTER L
-{0x024C2, 0x024DC}, // CIRCLED LATIN CAPITAL LETTER M
-{0x024C3, 0x024DD}, // CIRCLED LATIN CAPITAL LETTER N
-{0x024C4, 0x024DE}, // CIRCLED LATIN CAPITAL LETTER O
-{0x024C5, 0x024DF}, // CIRCLED LATIN CAPITAL LETTER P
-{0x024C6, 0x024E0}, // CIRCLED LATIN CAPITAL LETTER Q
-{0x024C7, 0x024E1}, // CIRCLED LATIN CAPITAL LETTER R
-{0x024C8, 0x024E2}, // CIRCLED LATIN CAPITAL LETTER S
-{0x024C9, 0x024E3}, // CIRCLED LATIN CAPITAL LETTER T
-{0x024CA, 0x024E4}, // CIRCLED LATIN CAPITAL LETTER U
-{0x024CB, 0x024E5}, // CIRCLED LATIN CAPITAL LETTER V
-{0x024CC, 0x024E6}, // CIRCLED LATIN CAPITAL LETTER W
-{0x024CD, 0x024E7}, // CIRCLED LATIN CAPITAL LETTER X
-{0x024CE, 0x024E8}, // CIRCLED LATIN CAPITAL LETTER Y
-{0x024CF, 0x024E9}, // CIRCLED LATIN CAPITAL LETTER Z
-{0x02C60, 0x02C61}, // LATIN CAPITAL LETTER L WITH DOUBLE BAR
-{0x02C62, 0x0026B}, // LATIN CAPITAL LETTER L WITH MIDDLE TILDE
-{0x02C63, 0x01D7D}, // LATIN CAPITAL LETTER P WITH STROKE
-{0x02C64, 0x0027D}, // LATIN CAPITAL LETTER R WITH TAIL
-{0x02C67, 0x02C68}, // LATIN CAPITAL LETTER H WITH DESCENDER
-{0x02C69, 0x02C6A}, // LATIN CAPITAL LETTER K WITH DESCENDER
-{0x02C6B, 0x02C6C}, // LATIN CAPITAL LETTER Z WITH DESCENDER
-{0x02C6D, 0x00251}, // LATIN CAPITAL LETTER ALPHA
-{0x02C6E, 0x00271}, // LATIN CAPITAL LETTER M WITH HOOK
-{0x02C6F, 0x00250}, // LATIN CAPITAL LETTER TURNED A
-{0x02C70, 0x00252}, // LATIN CAPITAL LETTER TURNED ALPHA
-{0x02C72, 0x02C73}, // LATIN CAPITAL LETTER W WITH HOOK
-{0x02C75, 0x02C76}, // LATIN CAPITAL LETTER HALF H
-{0x02C7E, 0x0023F}, // LATIN CAPITAL LETTER S WITH SWASH TAIL
-{0x02C7F, 0x00240}, // LATIN CAPITAL LETTER Z WITH SWASH TAIL
-{0x0A640, 0x0A641}, // CYRILLIC CAPITAL LETTER ZEMLYA
-{0x0A642, 0x0A643}, // CYRILLIC CAPITAL LETTER DZELO
-{0x0A644, 0x0A645}, // CYRILLIC CAPITAL LETTER REVERSED DZE
-{0x0A646, 0x0A647}, // CYRILLIC CAPITAL LETTER IOTA
-{0x0A648, 0x0A649}, // CYRILLIC CAPITAL LETTER DJERV
-{0x0A64A, 0x0A64B}, // CYRILLIC CAPITAL LETTER MONOGRAPH UK
-{0x0A64C, 0x0A64D}, // CYRILLIC CAPITAL LETTER BROAD OMEGA
-{0x0A64E, 0x0A64F}, // CYRILLIC CAPITAL LETTER NEUTRAL YER
-{0x0A650, 0x0A651}, // CYRILLIC CAPITAL LETTER YERU WITH BACK YER
-{0x0A652, 0x0A653}, // CYRILLIC CAPITAL LETTER IOTIFIED YAT
-{0x0A654, 0x0A655}, // CYRILLIC CAPITAL LETTER REVERSED YU
-{0x0A656, 0x0A657}, // CYRILLIC CAPITAL LETTER IOTIFIED A
-{0x0A658, 0x0A659}, // CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS
-{0x0A65A, 0x0A65B}, // CYRILLIC CAPITAL LETTER BLENDED YUS
-{0x0A65C, 0x0A65D}, // CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS
-{0x0A65E, 0x0A65F}, // CYRILLIC CAPITAL LETTER YN
-{0x0A660, 0x0A661}, // CYRILLIC CAPITAL LETTER REVERSED TSE
-{0x0A662, 0x0A663}, // CYRILLIC CAPITAL LETTER SOFT DE
-{0x0A664, 0x0A665}, // CYRILLIC CAPITAL LETTER SOFT EL
-{0x0A666, 0x0A667}, // CYRILLIC CAPITAL LETTER SOFT EM
-{0x0A668, 0x0A669}, // CYRILLIC CAPITAL LETTER MONOCULAR O
-{0x0A66A, 0x0A66B}, // CYRILLIC CAPITAL LETTER BINOCULAR O
-{0x0A66C, 0x0A66D}, // CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O
-{0x0A680, 0x0A681}, // CYRILLIC CAPITAL LETTER DWE
-{0x0A682, 0x0A683}, // CYRILLIC CAPITAL LETTER DZWE
-{0x0A684, 0x0A685}, // CYRILLIC CAPITAL LETTER ZHWE
-{0x0A686, 0x0A687}, // CYRILLIC CAPITAL LETTER CCHE
-{0x0A688, 0x0A689}, // CYRILLIC CAPITAL LETTER DZZE
-{0x0A68A, 0x0A68B}, // CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK
-{0x0A68C, 0x0A68D}, // CYRILLIC CAPITAL LETTER TWE
-{0x0A68E, 0x0A68F}, // CYRILLIC CAPITAL LETTER TSWE
-{0x0A690, 0x0A691}, // CYRILLIC CAPITAL LETTER TSSE
-{0x0A692, 0x0A693}, // CYRILLIC CAPITAL LETTER TCHE
-{0x0A694, 0x0A695}, // CYRILLIC CAPITAL LETTER HWE
-{0x0A696, 0x0A697}, // CYRILLIC CAPITAL LETTER SHWE
-{0x0A698, 0x0A699}, // CYRILLIC CAPITAL LETTER DOUBLE O
-{0x0A69A, 0x0A69B}, // CYRILLIC CAPITAL LETTER CROSSED O
-{0x0A722, 0x0A723}, // LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF
-{0x0A724, 0x0A725}, // LATIN CAPITAL LETTER EGYPTOLOGICAL AIN
-{0x0A726, 0x0A727}, // LATIN CAPITAL LETTER HENG
-{0x0A728, 0x0A729}, // LATIN CAPITAL LETTER TZ
-{0x0A72A, 0x0A72B}, // LATIN CAPITAL LETTER TRESILLO
-{0x0A72C, 0x0A72D}, // LATIN CAPITAL LETTER CUATRILLO
-{0x0A72E, 0x0A72F}, // LATIN CAPITAL LETTER CUATRILLO WITH COMMA
-{0x0A732, 0x0A733}, // LATIN CAPITAL LETTER AA
-{0x0A734, 0x0A735}, // LATIN CAPITAL LETTER AO
-{0x0A736, 0x0A737}, // LATIN CAPITAL LETTER AU
-{0x0A738, 0x0A739}, // LATIN CAPITAL LETTER AV
-{0x0A73A, 0x0A73B}, // LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR
-{0x0A73C, 0x0A73D}, // LATIN CAPITAL LETTER AY
-{0x0A73E, 0x0A73F}, // LATIN CAPITAL LETTER REVERSED C WITH DOT
-{0x0A740, 0x0A741}, // LATIN CAPITAL LETTER K WITH STROKE
-{0x0A742, 0x0A743}, // LATIN CAPITAL LETTER K WITH DIAGONAL STROKE
-{0x0A744, 0x0A745}, // LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE
-{0x0A746, 0x0A747}, // LATIN CAPITAL LETTER BROKEN L
-{0x0A748, 0x0A749}, // LATIN CAPITAL LETTER L WITH HIGH STROKE
-{0x0A74A, 0x0A74B}, // LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY
-{0x0A74C, 0x0A74D}, // LATIN CAPITAL LETTER O WITH LOOP
-{0x0A74E, 0x0A74F}, // LATIN CAPITAL LETTER OO
-{0x0A750, 0x0A751}, // LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER
-{0x0A752, 0x0A753}, // LATIN CAPITAL LETTER P WITH FLOURISH
-{0x0A754, 0x0A755}, // LATIN CAPITAL LETTER P WITH SQUIRREL TAIL
-{0x0A756, 0x0A757}, // LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER
-{0x0A758, 0x0A759}, // LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE
-{0x0A75A, 0x0A75B}, // LATIN CAPITAL LETTER R ROTUNDA
-{0x0A75C, 0x0A75D}, // LATIN CAPITAL LETTER RUM ROTUNDA
-{0x0A75E, 0x0A75F}, // LATIN CAPITAL LETTER V WITH DIAGONAL STROKE
-{0x0A760, 0x0A761}, // LATIN CAPITAL LETTER VY
-{0x0A762, 0x0A763}, // LATIN CAPITAL LETTER VISIGOTHIC Z
-{0x0A764, 0x0A765}, // LATIN CAPITAL LETTER THORN WITH STROKE
-{0x0A766, 0x0A767}, // LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER
-{0x0A768, 0x0A769}, // LATIN CAPITAL LETTER VEND
-{0x0A76A, 0x0A76B}, // LATIN CAPITAL LETTER ET
-{0x0A76C, 0x0A76D}, // LATIN CAPITAL LETTER IS
-{0x0A76E, 0x0A76F}, // LATIN CAPITAL LETTER CON
-{0x0A779, 0x0A77A}, // LATIN CAPITAL LETTER INSULAR D
-{0x0A77B, 0x0A77C}, // LATIN CAPITAL LETTER INSULAR F
-{0x0A77D, 0x01D79}, // LATIN CAPITAL LETTER INSULAR G
-{0x0A77E, 0x0A77F}, // LATIN CAPITAL LETTER TURNED INSULAR G
-{0x0A780, 0x0A781}, // LATIN CAPITAL LETTER TURNED L
-{0x0A782, 0x0A783}, // LATIN CAPITAL LETTER INSULAR R
-{0x0A784, 0x0A785}, // LATIN CAPITAL LETTER INSULAR S
-{0x0A786, 0x0A787}, // LATIN CAPITAL LETTER INSULAR T
-{0x0A78B, 0x0A78C}, // LATIN CAPITAL LETTER SALTILLO
-{0x0A78D, 0x00265}, // LATIN CAPITAL LETTER TURNED H
-{0x0A790, 0x0A791}, // LATIN CAPITAL LETTER N WITH DESCENDER
-{0x0A792, 0x0A793}, // LATIN CAPITAL LETTER C WITH BAR
-{0x0A796, 0x0A797}, // LATIN CAPITAL LETTER B WITH FLOURISH
-{0x0A798, 0x0A799}, // LATIN CAPITAL LETTER F WITH STROKE
-{0x0A79A, 0x0A79B}, // LATIN CAPITAL LETTER VOLAPUK AE
-{0x0A79C, 0x0A79D}, // LATIN CAPITAL LETTER VOLAPUK OE
-{0x0A79E, 0x0A79F}, // LATIN CAPITAL LETTER VOLAPUK UE
-{0x0A7A0, 0x0A7A1}, // LATIN CAPITAL LETTER G WITH OBLIQUE STROKE
-{0x0A7A2, 0x0A7A3}, // LATIN CAPITAL LETTER K WITH OBLIQUE STROKE
-{0x0A7A4, 0x0A7A5}, // LATIN CAPITAL LETTER N WITH OBLIQUE STROKE
-{0x0A7A6, 0x0A7A7}, // LATIN CAPITAL LETTER R WITH OBLIQUE STROKE
-{0x0A7A8, 0x0A7A9}, // LATIN CAPITAL LETTER S WITH OBLIQUE STROKE
-{0x0A7AA, 0x00266}, // LATIN CAPITAL LETTER H WITH HOOK
-{0x0A7AB, 0x0025C}, // LATIN CAPITAL LETTER REVERSED OPEN E
-{0x0A7AC, 0x00261}, // LATIN CAPITAL LETTER SCRIPT G
-{0x0A7AD, 0x0026C}, // LATIN CAPITAL LETTER L WITH BELT
-{0x0A7AE, 0x0026A}, // LATIN CAPITAL LETTER SMALL CAPITAL I
-{0x0A7B0, 0x0029E}, // LATIN CAPITAL LETTER TURNED K
-{0x0A7B1, 0x00287}, // LATIN CAPITAL LETTER TURNED T
-{0x0A7B2, 0x0029D}, // LATIN CAPITAL LETTER J WITH CROSSED-TAIL
-{0x0A7B3, 0x0AB53}, // LATIN CAPITAL LETTER CHI
-{0x0A7B4, 0x0A7B5}, // LATIN CAPITAL LETTER BETA
-{0x0A7B6, 0x0A7B7}, // LATIN CAPITAL LETTER OMEGA
-{0x0A7B8, 0x0A7B9}, // LATIN CAPITAL LETTER U WITH STROKE
-{0x0A7BA, 0x0A7BB}, // LATIN CAPITAL LETTER GLOTTAL A
-{0x0A7BC, 0x0A7BD}, // LATIN CAPITAL LETTER GLOTTAL I
-{0x0A7BE, 0x0A7BF}, // LATIN CAPITAL LETTER GLOTTAL U
-{0x0A7C0, 0x0A7C1}, // LATIN CAPITAL LETTER OLD POLISH O
-{0x0A7C2, 0x0A7C3}, // LATIN CAPITAL LETTER ANGLICANA W
-{0x0A7C4, 0x0A794}, // LATIN CAPITAL LETTER C WITH PALATAL HOOK
-{0x0A7C5, 0x00282}, // LATIN CAPITAL LETTER S WITH HOOK
-{0x0A7C6, 0x01D8E}, // LATIN CAPITAL LETTER Z WITH PALATAL HOOK
-{0x0A7C7, 0x0A7C8}, // LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
-{0x0A7C9, 0x0A7CA}, // LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
-{0x0A7D0, 0x0A7D1}, // LATIN CAPITAL LETTER CLOSED INSULAR G
-{0x0A7D6, 0x0A7D7}, // LATIN CAPITAL LETTER MIDDLE SCOTS S
-{0x0A7D8, 0x0A7D9}, // LATIN CAPITAL LETTER SIGMOID S
-{0x0A7F5, 0x0A7F6}, // LATIN CAPITAL LETTER REVERSED HALF H
-{0x0FF21, 0x0FF41}, // FULLWIDTH LATIN CAPITAL LETTER A
-{0x0FF22, 0x0FF42}, // FULLWIDTH LATIN CAPITAL LETTER B
-{0x0FF23, 0x0FF43}, // FULLWIDTH LATIN CAPITAL LETTER C
-{0x0FF24, 0x0FF44}, // FULLWIDTH LATIN CAPITAL LETTER D
-{0x0FF25, 0x0FF45}, // FULLWIDTH LATIN CAPITAL LETTER E
-{0x0FF26, 0x0FF46}, // FULLWIDTH LATIN CAPITAL LETTER F
-{0x0FF27, 0x0FF47}, // FULLWIDTH LATIN CAPITAL LETTER G
-{0x0FF28, 0x0FF48}, // FULLWIDTH LATIN CAPITAL LETTER H
-{0x0FF29, 0x0FF49}, // FULLWIDTH LATIN CAPITAL LETTER I
-{0x0FF2A, 0x0FF4A}, // FULLWIDTH LATIN CAPITAL LETTER J
-{0x0FF2B, 0x0FF4B}, // FULLWIDTH LATIN CAPITAL LETTER K
-{0x0FF2C, 0x0FF4C}, // FULLWIDTH LATIN CAPITAL LETTER L
-{0x0FF2D, 0x0FF4D}, // FULLWIDTH LATIN CAPITAL LETTER M
-{0x0FF2E, 0x0FF4E}, // FULLWIDTH LATIN CAPITAL LETTER N
-{0x0FF2F, 0x0FF4F}, // FULLWIDTH LATIN CAPITAL LETTER O
-{0x0FF30, 0x0FF50}, // FULLWIDTH LATIN CAPITAL LETTER P
-{0x0FF31, 0x0FF51}, // FULLWIDTH LATIN CAPITAL LETTER Q
-{0x0FF32, 0x0FF52}, // FULLWIDTH LATIN CAPITAL LETTER R
-{0x0FF33, 0x0FF53}, // FULLWIDTH LATIN CAPITAL LETTER S
-{0x0FF34, 0x0FF54}, // FULLWIDTH LATIN CAPITAL LETTER T
-{0x0FF35, 0x0FF55}, // FULLWIDTH LATIN CAPITAL LETTER U
-{0x0FF36, 0x0FF56}, // FULLWIDTH LATIN CAPITAL LETTER V
-{0x0FF37, 0x0FF57}, // FULLWIDTH LATIN CAPITAL LETTER W
-{0x0FF38, 0x0FF58}, // FULLWIDTH LATIN CAPITAL LETTER X
-{0x0FF39, 0x0FF59}, // FULLWIDTH LATIN CAPITAL LETTER Y
-{0x0FF3A, 0x0FF5A}, // FULLWIDTH LATIN CAPITAL LETTER Z
-};
-
-static short cfold_idx[HT_SIZE] = {
- -1, 233, 863, -1, 751, 1300, -1, 1177, -1, 659, 29, 1088, 255, 567, 1621,
- 1654, 1002, 1535, 493, -1, 285, 916, 410, -1, 804, 185, 1202, 1365, 713, 96,
- 1142, 833, 628, 14, 1413, 1055, 1375, 552, 1631, 339, 969, 433, 828, 877, 250,
- 1251, 1380, 765, 145, 1245, 1297, 673, 1338, 55, 1100, 1448, 595, 1496, 1016, 388,
- 1549, 1638, 1682, 298, 930, 1697, -1, 203, 838, 1214, 1285, 726, -1, 1154, 1427,
- 642, 42, 1069, -1, 580, 1588, -1, 353, 983, 478, 1510, 891, 1396, -1, 1318,
- 779, 159, 1185, 1366, 687, 71, 1462, 1125, 1643, 527, -1, 1030, 1393, 508, 1554,
- 944, 312, 446, -1, -1, 381, 852, -1, 740, -1, 121, 1158, 1441, -1, 7,
- 1083, 1609, 608, 1606, -1, 367, 991, 482, 1524, 274, 905, 1266, -1, 793, 173,
- 1197, 1343, 701, 85, 1137, 1402, 617, 1675, -1, 213, 1044, 530, -1, 958, 328,
- 472, 1487, -1, 237, 866, -1, 754, 134, 1306, 1166, -1, 662, 35, 1094, 1660,
- 573, 1624, 1005, -1, 496, 1538, -1, 205, 919, 1272, 807, 188, -1, 1208, 1371,
- 716, 99, 1149, 1416, 631, 20, 1058, -1, 1577, 558, -1, 342, 972, 439, 1499,
- 880, 253, 1383, -1, 768, 148, 1257, 1303, 676, 61, 1344, 1106, 1451, 601, 1688,
- 393, 1019, -1, -1, 933, 301, -1, -1, -1, 206, 841, 1220, 729, -1, 111,
- 1430, -1, 645, -1, 1072, -1, 586, 1591, -1, 356, -1, 465, 1513, 260, 894,
- 1261, -1, 782, 162, 1191, 1321, 690, 74, 1114, 1280, 533, 1465, 1653, 1033, -1,
- 512, 1558, 947, 317, 452, 1476, -1, 224, 855, 1232, 743, 124, -1, 1444, -1,
- 651, 13, 247, 1089, 614, 1283, 994, 370, 1527, 1612, -1, 277, 908, 409, 796,
- -1, 176, 1203, 1349, 704, 88, 1143, 1405, 620, 1681, -1, 215, 1047, 536, 1569,
- 331, 961, 413, -1, 869, -1, 1240, -1, 757, 137, 1172, 1312, 665, 1322, 41,
- 1101, 1666, 579, 1627, 1008, 379, 499, 1541, 922, 290, -1, -1, 810, 192, -1,
- 1215, -1, 719, 102, 1155, 1419, 634, 26, 1061, 1389, 564, 1580, 1647, 345, 975,
- 445, 501, 257, 883, 1386, 1309, 771, 151, 1263, 1350, 679, 63, 1454, 1502, 607,
- 1557, 1694, 396, 1022, -1, -1, 936, 304, 430, -1, -1, 211, 844, 1226, 732,
- 114, -1, 1159, 1433, 648, 52, 1075, -1, 592, 1594, 984, 359, 471, 1516, -1,
- 264, 897, -1, 785, -1, 165, 1180, 1327, 693, 77, 1120, 1468, 539, 1659, -1,
- 1036, 1561, 514, -1, 320, 950, 458, 1479, 858, 228, 402, -1, 746, 127, 1167,
- 1290, 654, 1447, 19, 1095, -1, 557, 817, 997, 373, 488, 1530, 1616, 280, 911,
- -1, 799, 180, -1, 1209, 1355, 708, 91, 1132, 1408, 623, 4, 1050, 1597, 542,
- 1687, -1, 334, 964, 419, 818, 872, 242, 1246, 1287, 760, 140, 1178, 1328, 668,
- 47, 1374, 1107, 1491, 585, 1630, 1011, 1399, 1544, 1672, 925, 293, 414, -1, 813,
- 196, 1221, -1, 721, 105, -1, 1144, 1422, 637, 32, 1064, -1, 570, 187, 978,
- 348, 1505, 451, 1583, 258, 886, 1252, 1315, 774, 154, 1275, 1356, 682, 66, 1115,
- 1392, 613, 1457, -1, 399, 1025, 511, -1, 306, 939, 436, -1, 847, 313, -1,
- -1, 735, 116, -1, 1436, -1, -1, 58, 1078, 1633, 598, 1598, 986, 362, 477,
- 1519, 1398, 268, 900, -1, 788, 168, -1, 1186, 1333, 696, 80, 1126, 1471, 545,
- 1665, 1039, 209, 520, 1564, -1, 323, 953, 462, 1482, 861, 231, -1, 1296, 749,
- 130, 1173, -1, 657, 25, 1650, 1084, 1619, 563, -1, 1000, -1, 491, 1533, 914,
- 283, 519, -1, 802, 183, 1198, 1361, 711, 1693, 94, 1138, 1411, 626, 10, 1053,
- 217, 548, 1574, -1, 337, 967, 429, 483, 246, 377, 824, 875, 763, 143, 1241,
- 1293, 671, 51, 1334, 1494, 591, 1607, 1636, 385, 1014, 1547, 1678, 928, 296, -1,
- -1, 836, 200, 1227, -1, 724, 108, -1, 1150, 1425, 640, 38, 1067, -1, 576,
- 1586, 981, 351, 457, 489, 1395, 315, 889, 1508, 777, 157, -1, 1181, 1362, 685,
- 69, 1121, 1460, 523, -1, 1028, 1391, 506, 1552, -1, 310, 942, 442, -1, 850,
- 220, -1, -1, 738, 119, 1156, 1439, -1, 3, -1, 1081, -1, 604, 1604, 365,
- 989, 425, 1522, 903, 272, 406, 1262, 791, 171, 1192, 1339, 699, 1474, 83, 1133,
- 1671, 551, -1, 1042, -1, 526, 1567, -1, 326, 956, 468, 1485, 234, 864, 1249,
- 1302, 752, 132, 1179, -1, 660, 31, 1090, 1656, 569, 1622, -1, 1003, -1, 494,
- 1536, 917, 286, 1268, -1, 805, 186, 1204, 1367, 714, 97, 1699, 1145, 1414, 629,
- 16, 1056, 1603, 554, 1281, 970, 340, 435, 507, 830, 251, 878, 1253, 766, 1299,
- 146, 1247, 1340, 674, 57, 1102, 1381, 597, 1449, 1497, 390, 1017, 1550, 1639, 299,
- 931, 422, 985, 839, 1684, 1216, -1, 727, -1, -1, 1428, 643, 44, -1, 1070,
- -1, 582, 1589, 354, -1, 461, 479, 892, 1397, 1511, -1, 780, 160, 1187, 1319,
- 1368, 688, 72, 1127, 1463, 529, 1644, 1031, 189, 509, 1555, 1649, 314, 945, 448,
- -1, 853, -1, 1228, -1, 741, 122, 1442, -1, -1, 9, 1085, 243, 610, 1608,
- -1, 368, 992, 484, 1525, 906, 275, -1, -1, 794, 174, 1199, 1345, 702, 86,
- 1677, 1139, 1403, 618, -1, 1045, 1601, 532, -1, 959, 329, 474, 1488, -1, 238,
- 867, -1, 755, 1308, 135, 1168, -1, 663, 37, 1096, 1662, 575, 1625, -1, 376,
- 1006, 497, 1539, 288, 920, 1274, -1, 808, 190, 1210, -1, 717, 100, 1151, 1417,
- 632, -1, 22, 223, 1059, 560, 1578, 973, 343, 441, 485, 505, 254, 881, 1384,
- 769, 149, 1305, 1269, 1346, 677, 179, 1108, 1452, 603, 1500, 1020, 394, 1690, -1,
- -1, 302, 934, 426, -1, 842, 208, 1222, -1, 730, 112, 1157, 1431, 646, 48,
- 1073, 1379, 1592, 588, -1, -1, -1, 467, 487, 895, 262, 1514, -1, 783, 163,
- 1193, 1323, 691, 75, 1282, 1116, 1466, 535, 1655, 199, 1034, 513, 1559, 948, 318,
- 454, 1477, -1, 357, 856, 1234, 744, 1286, 125, 1162, 1445, 652, 15, 1091, 249,
- 553, 1614, -1, 371, 995, 1528, -1, 278, 909, -1, -1, 797, 177, 1205, 1351,
- 706, 89, 1128, 1406, 621, 1683, -1, 1048, -1, 538, 1570, 962, 332, 415, -1,
- 1372, 240, 870, 1242, 758, 138, 1314, 1174, 1324, 666, 43, 1103, 1668, 581, 821,
- 1009, 380, 1542, 1628, -1, 291, 923, 412, 811, -1, 193, 1217, -1, 705, 103,
- 1420, -1, 635, 28, -1, 225, 1062, 566, 1581, 346, 976, 447, 1503, 884, 1388,
- -1, -1, 772, 152, 1265, 1311, 680, 1352, 64, 1455, 1696, 609, 1640, 1023, 397,
- 500, -1, -1, 305, 937, 432, -1, 212, 845, 1229, -1, 733, 115, 1434, -1,
- 649, 54, 1076, 235, 594, 1595, -1, 360, -1, 473, 1279, 266, 898, 1267, 1517,
- 786, 166, 1182, 1329, 694, 78, 1122, 1469, 1661, 541, -1, 207, 1037, 516, 1562,
- 951, 321, 1480, -1, -1, 229, 859, 1238, 747, 128, 1292, 1169, -1, 655, 21,
- 1097, -1, 559, 1617, 998, 374, 481, 1531, -1, 281, 912, 515, 800, 1277, 181,
- 1211, 1357, 709, 92, 1134, 1409, 624, 6, 1689, 1051, 1572, 544, -1, 335, 965,
- 1492, 820, 873, 244, 1248, 1376, 761, 141, 1233, 1289, 669, 1330, 1674, 1109, -1,
- 587, 823, 1012, 1401, 1545, 1632, -1, 294, 926, 416, 814, 197, 834, 1223, -1,
- 722, 106, 1146, 1423, 638, 34, 1065, 227, 572, 1584, -1, 349, 979, 453, 1506,
- 887, 259, 1254, 1317, 775, 155, 1458, 1358, 683, 67, 1377, 1117, -1, 615, -1,
- 400, 1026, 411, -1, 940, 308, 438, -1, -1, 216, 848, 1237, 736, 117, -1,
- 1163, 1437, -1, 60, 1079, 239, 600, 1600, -1, 363, 1520, 421, -1, 270, 901,
- 404, 1258, 789, 169, 1188, 1335, 697, 81, 1129, 1400, 547, 1472, 1667, 1040, 1599,
- 522, 1565, 324, 954, 464, 1483, 862, 232, -1, -1, 750, 131, 1175, 1298, 658,
- 1652, 27, 1086, -1, 565, 1620, 1001, -1, 492, 1534, -1, 284, 915, -1, 803,
- 184, -1, 1200, 1363, 712, 95, 1140, 1412, 627, 12, 1054, 1605, 550, 1575, 1695,
- 338, 968, 431, 826, 383, 248, 876, 1295, 764, 144, 1243, 1336, 672, 53, 1495,
- 1680, 1637, 593, -1, 386, 1015, 1548, -1, 929, 297, 420, -1, -1, 202, 837,
- 1212, 725, -1, 109, 1152, 1426, 641, 40, 1068, -1, 578, 1587, -1, 352, 982,
- 459, 503, 890, 1509, -1, -1, 778, 158, 1183, 1364, 686, 70, 1123, 1278, 525,
- 1461, 1642, 1029, 1387, 1553, -1, 943, 311, 444, -1, -1, 221, 851, -1, 739,
- 120, -1, 1440, -1, -1, 5, 1082, -1, 606, -1, 990, 366, 480, 1523, -1,
- 273, 904, 407, 792, 172, 1264, 1194, 1341, 700, 84, 1135, 1475, 616, 1635, 1043,
- 1673, 528, -1, -1, 327, 957, 470, 1486, 865, 236, -1, 1304, 753, 133, 1164,
- -1, 661, 33, 1658, 1092, -1, 571, 1623, 1004, -1, 495, 1537, 918, 287, 1270,
- -1, 806, 387, 1206, 1369, 715, -1, 98, 1147, 1415, 630, 18, 1057, -1, 556,
- 1576, -1, 341, 971, 437, 832, 252, 879, 1382, 1301, 767, 147, 1255, 1342, 675,
- 59, 1104, 1450, 599, 1498, 1686, 392, 1018, 1551, -1, 932, 300, 424, -1, -1,
- 204, 840, 1218, 728, 110, -1, 1429, -1, 644, 46, 1071, -1, 584, 1590, -1,
- 355, 463, 1512, -1, 893, 1259, -1, 781, -1, 161, 1189, 1370, 689, 73, 1112,
- 1464, 531, 1645, 1651, 195, 1032, 510, 1556, 316, 946, 450, -1, 854, 222, 1230,
- -1, 742, 123, 1160, 1443, 650, 11, -1, 389, 1087, 612, 1610, 993, 369, 1526,
- -1, 907, 276, -1, 408, 795, 175, 1201, 1347, -1, 703, 87, 1141, 1404, 619,
- 831, 1046, 1679, 534, 1568, -1, 330, 960, 476, 1489, 868, -1, -1, 1310, 756,
- 136, 1170, 1320, 664, 39, 1098, 1664, 577, 1626, -1, 378, 1007, 498, 1540, 921,
- 289, 1276, -1, 809, 191, 1213, -1, 718, 101, -1, 1153, 1418, 633, 24, 1060,
- 1646, 562, 1579, 974, 344, 403, 443, 1239, 256, 882, 1385, 770, 1307, 150, 1271,
- 1348, 678, 62, 1110, 1453, 605, 829, 1501, 395, 1021, 1692, -1, 303, 935, 428,
- -1, 843, 210, 1224, -1, 731, 113, -1, 1432, 647, -1, 50, 1074, -1, 590,
- 1593, -1, 358, 469, 1515, -1, 261, 263, 896, 784, 164, -1, 1195, 1325, 692,
- 76, 1118, 1467, 537, 1657, 1035, 201, 1560, -1, -1, 319, 949, 456, 1478, 857,
- 226, 1236, 1288, 745, 126, 1165, 1446, 653, 17, -1, 391, 1093, 555, 1615, 372,
- 996, 486, 1529, 910, 279, -1, -1, 798, 178, 1207, 1353, 707, 90, 1685, 1130,
- 1407, 622, 2, 1049, 1571, 540, -1, 963, 333, 417, 816, 1490, 241, 871, 1244,
- 1316, 759, 139, 1176, 1326, 667, 45, 1105, 1373, 583, 1629, 1670, 382, 1010, 1543,
- -1, 292, 924, -1, -1, 812, 194, 1219, -1, 720, 104, -1, 1421, 636, -1,
- 30, 1063, -1, 568, 1582, 977, 347, 1, 449, 1390, 885, 1250, 1504, 773, 153,
- 1313, 1273, 1354, 681, 65, 1113, 1456, 611, 1641, 1024, 398, 502, 1698, -1, 938,
- -1, 434, -1, 846, 214, 1231, -1, 734, -1, 1161, 1435, -1, 56, -1, 1077,
- 1596, 596, -1, 361, -1, 475, 1518, 899, 265, 267, -1, 787, 167, 1184, 1331,
- 695, 1470, 79, 1124, 1663, 543, 0, 1038, -1, 518, 1563, -1, 322, 952, 460,
- 1481, 230, 860, -1, 1294, 748, 129, 1171, -1, 656, 23, 1099, 1648, 561, 819,
- 1618, 375, 999, 490, 1532, 282, 913, 517, -1, 801, 182, 1196, 1359, 710, 93,
- 1136, 1410, 1691, 625, 8, 219, 1052, 546, 1573, 966, 336, 427, 822, 1378, 245,
- 874, 1493, 762, 142, 1291, 1235, 1332, 670, 49, 1111, 1611, 589, 825, 827, 384,
- 1013, 1546, 1634, 295, 927, 418, 815, 835, 198, 1225, 1676, 723, 107, 1148, 1424,
- 639, 36, -1, 1066, -1, 574, 1585, 350, 980, 455, 1507, 888, 1394, 1256, -1,
- 776, 156, -1, 1360, 684, 1459, 68, 1119, -1, 521, -1, 1027, 401, 504, -1,
- -1, 307, 309, 440, 941, 218, 849, -1, -1, 737, 118, 1438, -1, -1, 987,
- 1080, 1613, 602, 1602, -1, 364, 988, 423, 1521, 902, 269, 271, 405, 790, 170,
- 1190, 1260, 698, 82, 1284, 1131, 1337, 549, 1473, 1041, 1669, 524, 1566, 955, 325,
- 466, 1484,
-};
-
-static const size_t cfold_len = sizeof cfold / sizeof cfold[0];
-static const uint16_t* cfold_tab = &cfold[0][0];
-
-static inline uint32_t hash(uint32_t key, size_t len) {
- uint64_t x = key*0xc6a4a7935bd1e99d;
- return (uint32_t)((uint32_t)x*len >> 32);
-}
-
-static inline int cfold_lookup(uint32_t codep) {
- int idx, i = hash(codep, HT_SIZE);
- while ((idx = cfold_idx[i]) != -1 && cfold_tab[idx] != codep)
- if (++i == HT_SIZE) i = 0;
- return idx;
-}
-
-static inline int utf8_isupper(uint32_t codep) {
- if (codep < 128) return (codep >= 'A') & (codep <= 'Z');
- return (cfold_lookup(codep) & 1) == 0;
-}
-
-static inline int utf8_islower(uint32_t codep) {
- if (codep < 128) return (codep >= 'a') & (codep <= 'z');
- int idx = cfold_lookup(codep);
- return (idx != -1) & (idx & 1);
-}
-
-static inline uint32_t utf8_toupper(uint32_t codep) {
- if (codep < 128) return toupper(codep);
- int idx = cfold_lookup(codep);
- return (idx == -1) | !(idx & 1) ? codep : cfold_tab[idx - 1];
-}
-
-static inline uint32_t utf8_tolower(uint32_t codep) {
- if (codep < 128) return tolower(codep);
- int idx = cfold_lookup(codep);
- return (idx == -1) | (idx & 1) ? codep : cfold_tab[idx + 1];
-}
-
-static inline int utf8_isalpha(uint32_t codep) {
- if (codep < 128) return isalpha(codep) != 0;
- return cfold_lookup(codep) != -1;
-}
-
-static inline int utf8_isspace(uint32_t codep) {
- static uint16_t t[] = {0x09, 0x0D, 0x20, 0x85, 0xA0, 0x1680,
- 0x2028, 0x2029, 0x202F, 0x205F, 0x3000};
- for (int i=0; i<sizeof t/sizeof *t; ++i)
- if (codep == t[i]) return true;
- return (codep >= 0x2000) & (codep <= 0x200A);
-}
-
-static inline int utf8_isdigit(uint32_t codep) {
- return ((codep >= '0') & (codep <= '9')) ||
- ((codep >= 0xFF10) & (codep <= 0xFF19)) ||
- ((codep >= 0x1D7CE) & (codep <= 0x1D7FF));
-}
-
-static inline int utf8_isxdigit(uint32_t codep) {
- static uint16_t t[] = {0x30, 0x39, 0x41, 0x46, 0x61, 0x66,
- 0xFF10, 0xFF19, 0xFF21, 0xFF26, 0xFF41, 0xFF46};
- for (int i=1; i<sizeof t/sizeof *t; i += 2)
- if (codep <= t[i]) return codep >= t[i - 1];
- return false;
-}
-
-static inline int utf8_isalnum(uint32_t codep) {
- if (codep < 128) return isalnum(codep) != 0;
- if ((codep >= 0xFF10) & (codep <= 0xFF19) ||
- ((codep >= 0x1D7CE) & (codep <= 0x1D7FF))) return true;
- return cfold_lookup(codep) != -1;
-}
-
-// ------------------------------------------------------------
-#if 0
-#include <stdio.h>
-
-int coll = 0, count = 0;
-
-void maketables()
-{
- for (int i=0; i<HT_SIZE; ++i) cfold_idx[i] = -1;
- for (size_t i = 0; i < cfold_len*2; ++i)
- {
- size_t index = hash(cfold_tab[i], HT_SIZE);
- coll += cfold_idx[index] != -1;
-
- while (cfold_idx[index] != -1) {
- if (++index >= HT_SIZE) index = 0;
- ++ count;
- }
- cfold_idx[index] = i;
- }
-}
-
-void printtables()
-{
- printf("static short cfold_idx[%d] = {\n ", HT_SIZE);
- for (int i = 0; i < HT_SIZE; ++i) {
- printf(" %d,", cfold_idx[i]);
- if ((i+1) % 15 == 0) printf("\n ");
- }
- printf("\n};\n");
-}
-
-void printtest()
-{
- for (size_t i=0; i<cfold_len; ++i) {
- printf("tolow %d => %d\n", cfold[i][UPPER], utf8_tolower(cfold[i][UPPER]));
- printf("toupp %d => %d\n", cfold[i][LOWER], utf8_toupper(cfold[i][LOWER]));
- }
-}
-
-
-struct Buf { char str[8]; int len; };
-
-static int pushchar(struct Buf* buf, uint8_t c)
-{
- buf->str[buf->len++] = c;
- return 0;
-}
-
-static int utf8_encode(struct Buf* buf, uint32_t c)
-{
- if (c < 0x80UL) {
- return pushchar(buf, c);
- } else if (c < 0x0800UL) {
- return !((pushchar(buf, (c >> 6 & 0x1F) | 0xC0) == 0) &&
- (pushchar(buf, (c >> 0 & 0x3F) | 0x80) == 0));
- } else if (c < 0x010000UL) {
- if (c >= 0xd800 && c <= 0xdfff) {
- fprintf(stderr, "invalid codepoint %06x", c);
- return -1;
- }
- return !((pushchar(buf, (c >> 12 & 0x0F) | 0xE0) == 0) &&
- (pushchar(buf, (c >> 6 & 0x3F) | 0x80) == 0) &&
- (pushchar(buf, (c >> 0 & 0x3F) | 0x80) == 0));
- } else if (c < 0x110000UL) {
- return !((pushchar(buf, (c >> 18 & 0x07) | 0xF0) == 0) &&
- (pushchar(buf, (c >> 12 & 0x3F) | 0x80) == 0) &&
- (pushchar(buf, (c >> 6 & 0x3F) | 0x80) == 0) &&
- (pushchar(buf, (c >> 0 & 0x3F) | 0x80) == 0));
- } else {
- fprintf(stderr, "unable to encode %06x as UTF-8", c);
- return -1;
- }
-}
-
-void printchars()
-{
- c_forrange (i, int, cfold_len) {
- struct Buf b1 = {{0}}, b2 = {{0}};
- utf8_encode(&b1, cfold[i][UPPER]);
- utf8_encode(&b2, cfold[i][LOWER]);
- printf("%4d: %6u : %s => %s : %d\n", i, cfold[i][UPPER], b1.str, b2.str, cfold[i][LOWER] - cfold[i][UPPER]);
- }
-}
-
-int main()
-{
- //printchars();
- maketables();
- printtables();
- //printtest();
- printf("\ncoll1 %d, probe1: %d\n", coll, count);
- printf("sizes %zu\n", cfold_len*4 + HT_SIZE*2);
- printf("count %zu\n", cfold_len);
- int n = 0;
- for (int i=128; i<256; ++i)
- n += cfold_lookup(i) != -1;
- printf("look: %zu, %d\n", look, n);
-}
-#endif
+#include <stdint.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <stc/utf8.h>
+#include <stdbool.h>
+
+static struct CaseFold { uint16_t c0, c1, m1; } casefold[] = {
+ {65, 90, 122}, {181, 181, 956}, {192, 214, 246}, {216, 222, 254},
+ {256, 302, 303}, {306, 310, 311}, {313, 327, 328}, {330, 374, 375}, {376, 376, 255},
+ {377, 381, 382}, {383, 383, 115}, {385, 385, 595}, {386, 388, 389}, {390, 390, 596},
+ {391, 391, 392}, {393, 394, 599}, {395, 395, 396}, {398, 398, 477}, {399, 399, 601},
+ {400, 400, 603}, {401, 401, 402}, {403, 403, 608}, {404, 404, 611}, {406, 406, 617},
+ {407, 407, 616}, {408, 408, 409}, {412, 412, 623}, {413, 413, 626}, {415, 415, 629},
+ {416, 420, 421}, {422, 422, 640}, {423, 423, 424}, {425, 425, 643}, {428, 428, 429},
+ {430, 430, 648}, {431, 431, 432}, {433, 434, 651}, {435, 437, 438}, {439, 439, 658},
+ {440, 442, 443}, {452, 452, 454}, {453, 453, 454}, {455, 455, 457}, {456, 456, 457},
+ {458, 458, 460}, {459, 475, 476}, {478, 494, 495}, {497, 497, 499}, {498, 500, 501},
+ {502, 502, 405}, {503, 503, 447}, {504, 542, 543}, {544, 544, 414}, {546, 562, 563},
+ {570, 570, 11365}, {571, 571, 572}, {573, 573, 410}, {574, 574, 11366}, {577, 577, 578},
+ {579, 579, 384}, {580, 580, 649}, {581, 581, 652}, {582, 590, 591}, {837, 837, 953},
+ {880, 882, 883}, {886, 886, 887}, {895, 895, 1011}, {902, 902, 940}, {904, 906, 943},
+ {908, 908, 972}, {910, 911, 974}, {913, 929, 961}, {931, 939, 971}, {962, 962, 963},
+ {975, 975, 983}, {976, 976, 946}, {977, 977, 952}, {981, 981, 966}, {982, 982, 960},
+ {984, 1006, 1007}, {1008, 1008, 954}, {1009, 1009, 961}, {1012, 1012, 952}, {1013, 1013, 949},
+ {1015, 1015, 1016}, {1017, 1017, 1010}, {1018, 1018, 1019}, {1021, 1023, 893},
+ {1024, 1039, 1119}, {1040, 1071, 1103}, {1120, 1152, 1153}, {1162, 1214, 1215},
+ {1216, 1216, 1231}, {1217, 1229, 1230}, {1232, 1326, 1327}, {1329, 1366, 1414},
+ {4256, 4293, 11557}, {4295, 4296, 11560}, {5112, 5117, 5109}, {7296, 7296, 1074},
+ {7297, 7297, 1076}, {7298, 7298, 1086}, {7299, 7300, 1090}, {7301, 7301, 1090},
+ {7302, 7302, 1098}, {7303, 7303, 1123}, {7304, 7304, 42571}, {7312, 7354, 4346},
+ {7357, 7359, 4351}, {7680, 7828, 7829}, {7835, 7835, 7777}, {7838, 7838, 223},
+ {7840, 7934, 7935}, {7944, 7951, 7943}, {7960, 7965, 7957}, {7976, 7983, 7975},
+ {7992, 7999, 7991}, {8008, 8013, 8005}, {8025, 8028, 8020}, {8040, 8047, 8039},
+ {8072, 8079, 8071}, {8088, 8095, 8087}, {8104, 8111, 8103}, {8120, 8121, 8113},
+ {8122, 8123, 8049}, {8124, 8124, 8115}, {8126, 8126, 953}, {8136, 8139, 8053},
+ {8140, 8140, 8131}, {8152, 8153, 8145}, {8154, 8155, 8055}, {8168, 8169, 8161},
+ {8170, 8171, 8059}, {8172, 8172, 8165}, {8184, 8185, 8057}, {8186, 8187, 8061},
+ {8188, 8188, 8179}, {8486, 8486, 969}, {8490, 8490, 107}, {8491, 8491, 229},
+ {8498, 8498, 8526}, {8544, 8559, 8575}, {8579, 8579, 8580}, {9398, 9423, 9449},
+ {11264, 11311, 11359}, {11360, 11360, 11361}, {11362, 11362, 619}, {11363, 11363, 7549},
+ {11364, 11364, 637}, {11367, 11371, 11372}, {11373, 11373, 593}, {11374, 11374, 625},
+ {11375, 11375, 592}, {11376, 11376, 594}, {11378, 11380, 11381}, {11390, 11391, 576},
+ {11392, 11490, 11491}, {11499, 11501, 11502}, {11506, 11508, 11509}, {42562, 42604, 42605},
+ {42624, 42650, 42651}, {42786, 42798, 42799}, {42802, 42862, 42863}, {42873, 42875, 42876},
+ {42877, 42877, 7545}, {42878, 42886, 42887}, {42891, 42891, 42892}, {42893, 42893, 613},
+ {42896, 42898, 42899}, {42902, 42920, 42921}, {42922, 42922, 614}, {42923, 42923, 604},
+ {42924, 42924, 609}, {42925, 42925, 620}, {42926, 42926, 618}, {42928, 42928, 670},
+ {42929, 42929, 647}, {42930, 42930, 669}, {42931, 42931, 43859}, {42932, 42946, 42947},
+ {42948, 42948, 42900}, {42949, 42949, 642}, {42950, 42950, 7566}, {42951, 42953, 42954},
+ {42960, 42962, 42963}, {42968, 42970, 42971}, {43888, 43913, 5049}, {65313, 65338, 65370},
+}; // 188
+static uint8_t cfold_low[] = {
+ 0, 138, 10, 111, 2, 139, 3, 8, 4, 5, 6, 7, 9, 59, 12, 14, 16, 20, 49, 25,
+ 56, 52, 29, 31, 33, 35, 37, 39, 50, 40, 41, 42, 43, 44, 45, 17, 46, 47, 48, 51,
+ 53, 55, 155, 58, 62, 152, 150, 153, 11, 13, 15, 18, 19, 171, 21, 172, 22, 167, 170, 24,
+ 23, 174, 146, 173, 26, 151, 27, 28, 148, 30, 181, 32, 176, 34, 60, 36, 61, 38, 177, 175,
+ 64, 65, 87, 67, 68, 71, 75, 83, 76, 82, 63, 126, 80, 1, 78, 81, 72, 73, 77, 137,
+ 69, 70, 74, 79, 85, 66, 84, 86, 89, 99, 100, 101, 102, 103, 104, 88, 90, 105, 91, 93,
+ 92, 94, 95, 107, 108, 186, 98, 164, 147, 182, 109, 110, 112, 113, 114, 115, 116, 117, 118, 119,
+ 124, 127, 130, 134, 132, 135, 120, 121, 122, 123, 125, 128, 129, 131, 133, 136, 140, 141, 142, 143,
+ 144, 145, 54, 57, 149, 154, 156, 157, 158, 96, 97, 159, 106, 160, 161, 162, 163, 165, 166, 168,
+ 180, 169, 179, 183, 184, 185, 178, 187,
+};
+
+uint32_t utf8_tolower(uint32_t c) {
+ for (int i=0; i < sizeof casefold/sizeof *casefold; ++i) {
+ if (c <= casefold[i].c1) {
+ if (c < casefold[i].c0) return c;
+ int d = casefold[i].m1 - casefold[i].c1;
+ if (d == 1) return c + ((casefold[i].c1 & 1) == (c & 1));
+ return c + d;
+ }
+ }
+ return c;
+}
+
+uint32_t utf8_toupper(uint32_t c) {
+ for (int i=0; i < sizeof cfold_low/sizeof *cfold_low; ++i) {
+ struct CaseFold cfold = casefold[cfold_low[i]];
+ if (c <= cfold.m1) {
+ int d = cfold.m1 - cfold.c1;
+ if (c < cfold.c0 + d) return c;
+ if (d == 1) return c - ((cfold.m1 & 1) == (c & 1));
+ return c - d;
+ }
+ }
+ return c;
+}
+
+bool utf8_isupper(uint32_t c) {
+ return utf8_tolower(c) != c;
+}
+
+bool utf8_islower(uint32_t c) {
+ return utf8_toupper(c) != c;
+}
+
+bool utf8_isspace(uint32_t c) {
+ static uint16_t t[] = {0x20, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0xA0,
+ 0x1680, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000};
+ for (int i=0; i<sizeof t/sizeof *t; ++i)
+ if (c == t[i]) return true;
+ return (c >= 0x2000) & (c <= 0x200A);
+}
+
+bool utf8_isdigit(uint32_t c) {
+ return ((c >= '0') & (c <= '9')) ||
+ ((c >= 0xFF10) & (c <= 0xFF19));
+}
+
+bool utf8_isxdigit(uint32_t c) {
+ static uint16_t t[] = {0x30, 0x39, 0x41, 0x46, 0x61, 0x66, 0xFF10,
+ 0xFF19, 0xFF21, 0xFF26, 0xFF41, 0xFF46};
+ for (int i=1; i<sizeof t/sizeof *t; i += 2)
+ if (c <= t[i]) return c >= t[i - 1];
+ return false;
+}
+
+bool utf8_isalnum(uint32_t c) {
+ if (c < 128) return isalnum(c) != 0;
+ if ((c >= 0xFF10) & (c <= 0xFF19)) return true;
+ return utf8_islower(c) || utf8_isupper(c);
+}
+
+bool utf8_isalpha(uint32_t c) {
+ if (c < 128) return isalpha(c) != 0;
+ return utf8_islower(c) || utf8_isupper(c);
+}
+
+
+#ifdef TEST
+int main()
+{
+ for (int i=0; i < sizeof cfold_low/sizeof *cfold_low; ++i)
+ {
+ char x[3][5]={0};
+ uint32_t a = casefold[i].c0;
+ uint32_t b = utf8_tolower(a);
+ uint32_t c = utf8_toupper(b);
+
+ utf8_encode(x[0], a);
+ utf8_encode(x[1], b);
+ utf8_encode(x[2], c);
+ printf("%s %s %s - %u %u %u\n", x[0], x[1], x[2], a, b, c);
+ }
+}
+#endif
+