summaryrefslogtreecommitdiffhomepage
path: root/src/utf8tabs.inc
diff options
context:
space:
mode:
authorTyge Lovset <[email protected]>2022-06-07 07:57:10 +0200
committerTyge Lovset <[email protected]>2022-06-07 07:57:10 +0200
commitd5ab884da0c68361a4280783ad46dc357e1adb23 (patch)
treea6dec0de3bd897490ca5427b49157b1215736774 /src/utf8tabs.inc
parentbfcd2e3a6912ac392694c238fce8fc613d2f1525 (diff)
downloadSTC-modified-d5ab884da0c68361a4280783ad46dc357e1adb23.tar.gz
STC-modified-d5ab884da0c68361a4280783ad46dc357e1adb23.zip
Bugfixed / Updated utf8tabs.* and utf8code.c
Diffstat (limited to 'src/utf8tabs.inc')
-rw-r--r--src/utf8tabs.inc275
1 files changed, 213 insertions, 62 deletions
diff --git a/src/utf8tabs.inc b/src/utf8tabs.inc
index 84b69eec..058cfa82 100644
--- a/src/utf8tabs.inc
+++ b/src/utf8tabs.inc
@@ -1,62 +1,213 @@
-#include <stdint.h>
-
-struct CaseFold { uint16_t c0, c1, m1; };
-
-static struct CaseFold casefold[] = {
- {65, 90, 122}, {181, 181, 956}, {192, 214, 246}, {216, 222, 254},
- {256, 302, 303}, {306, 310, 311}, {313, 327, 328}, {330, 374, 375}, {376, 376, 255},
- {377, 381, 382}, {383, 383, 115}, {385, 385, 595}, {386, 388, 389}, {390, 390, 596},
- {391, 391, 392}, {393, 394, 599}, {395, 395, 396}, {398, 398, 477}, {399, 399, 601},
- {400, 400, 603}, {401, 401, 402}, {403, 403, 608}, {404, 404, 611}, {406, 406, 617},
- {407, 407, 616}, {408, 408, 409}, {412, 412, 623}, {413, 413, 626}, {415, 415, 629},
- {416, 420, 421}, {422, 422, 640}, {423, 423, 424}, {425, 425, 643}, {428, 428, 429},
- {430, 430, 648}, {431, 431, 432}, {433, 434, 651}, {435, 437, 438}, {439, 439, 658},
- {440, 442, 443}, {452, 452, 454}, {453, 453, 454}, {455, 455, 457}, {456, 456, 457},
- {458, 458, 460}, {459, 475, 476}, {478, 494, 495}, {497, 497, 499}, {498, 500, 501},
- {502, 502, 405}, {503, 503, 447}, {504, 542, 543}, {544, 544, 414}, {546, 562, 563},
- {570, 570, 11365}, {571, 571, 572}, {573, 573, 410}, {574, 574, 11366}, {577, 577, 578},
- {579, 579, 384}, {580, 580, 649}, {581, 581, 652}, {582, 590, 591}, {837, 837, 953},
- {880, 882, 883}, {886, 886, 887}, {895, 895, 1011}, {902, 902, 940}, {904, 906, 943},
- {908, 908, 972}, {910, 911, 974}, {913, 929, 961}, {931, 939, 971}, {962, 962, 963},
- {975, 975, 983}, {976, 976, 946}, {977, 977, 952}, {981, 981, 966}, {982, 982, 960},
- {984, 1006, 1007}, {1008, 1008, 954}, {1009, 1009, 961}, {1012, 1012, 952}, {1013, 1013, 949},
- {1015, 1015, 1016}, {1017, 1017, 1010}, {1018, 1018, 1019}, {1021, 1023, 893},
- {1024, 1039, 1119}, {1040, 1071, 1103}, {1120, 1152, 1153}, {1162, 1214, 1215},
- {1216, 1216, 1231}, {1217, 1229, 1230}, {1232, 1326, 1327}, {1329, 1366, 1414},
- {4256, 4293, 11557}, {4295, 4296, 11560}, {5112, 5117, 5109}, {7296, 7296, 1074},
- {7297, 7297, 1076}, {7298, 7298, 1086}, {7299, 7300, 1090}, {7301, 7301, 1090},
- {7302, 7302, 1098}, {7303, 7303, 1123}, {7304, 7304, 42571}, {7312, 7354, 4346},
- {7357, 7359, 4351}, {7680, 7828, 7829}, {7835, 7835, 7777}, {7838, 7838, 223},
- {7840, 7934, 7935}, {7944, 7951, 7943}, {7960, 7965, 7957}, {7976, 7983, 7975},
- {7992, 7999, 7991}, {8008, 8013, 8005}, {8025, 8028, 8020}, {8040, 8047, 8039},
- {8072, 8079, 8071}, {8088, 8095, 8087}, {8104, 8111, 8103}, {8120, 8121, 8113},
- {8122, 8123, 8049}, {8124, 8124, 8115}, {8126, 8126, 953}, {8136, 8139, 8053},
- {8140, 8140, 8131}, {8152, 8153, 8145}, {8154, 8155, 8055}, {8168, 8169, 8161},
- {8170, 8171, 8059}, {8172, 8172, 8165}, {8184, 8185, 8057}, {8186, 8187, 8061},
- {8188, 8188, 8179}, {8486, 8486, 969}, {8490, 8490, 107}, {8491, 8491, 229},
- {8498, 8498, 8526}, {8544, 8559, 8575}, {8579, 8579, 8580}, {9398, 9423, 9449},
- {11264, 11311, 11359}, {11360, 11360, 11361}, {11362, 11362, 619}, {11363, 11363, 7549},
- {11364, 11364, 637}, {11367, 11371, 11372}, {11373, 11373, 593}, {11374, 11374, 625},
- {11375, 11375, 592}, {11376, 11376, 594}, {11378, 11380, 11381}, {11390, 11391, 576},
- {11392, 11490, 11491}, {11499, 11501, 11502}, {11506, 11508, 11509}, {42562, 42604, 42605},
- {42624, 42650, 42651}, {42786, 42798, 42799}, {42802, 42862, 42863}, {42873, 42875, 42876},
- {42877, 42877, 7545}, {42878, 42886, 42887}, {42891, 42891, 42892}, {42893, 42893, 613},
- {42896, 42898, 42899}, {42902, 42920, 42921}, {42922, 42922, 614}, {42923, 42923, 604},
- {42924, 42924, 609}, {42925, 42925, 620}, {42926, 42926, 618}, {42928, 42928, 670},
- {42929, 42929, 647}, {42930, 42930, 669}, {42931, 42931, 43859}, {42932, 42946, 42947},
- {42948, 42948, 42900}, {42949, 42949, 642}, {42950, 42950, 7566}, {42951, 42953, 42954},
- {42960, 42962, 42963}, {42968, 42970, 42971}, {43888, 43913, 5049}, {65313, 65338, 65370},
-}; // 188
-
-static uint8_t cfold_low[] = {
- 0, 138, 10, 111, 2, 139, 3, 8, 4, 5, 6, 7, 9, 59, 12, 14, 16, 20, 49, 25,
- 56, 52, 29, 31, 33, 35, 37, 39, 50, 40, 41, 42, 43, 44, 45, 17, 46, 47, 48, 51,
- 53, 55, 155, 58, 62, 152, 150, 153, 11, 13, 15, 18, 19, 171, 21, 172, 22, 167, 170, 24,
- 23, 174, 146, 173, 26, 151, 27, 28, 148, 30, 181, 32, 176, 34, 60, 36, 61, 38, 177, 175,
- 64, 65, 87, 67, 68, 71, 75, 83, 76, 82, 63, 126, 80, 1, 78, 81, 72, 73, 77, 137,
- 69, 70, 74, 79, 85, 66, 84, 86, 89, 99, 100, 101, 102, 103, 104, 88, 90, 105, 91, 93,
- 92, 94, 95, 107, 108, 186, 98, 164, 147, 182, 109, 110, 112, 113, 114, 115, 116, 117, 118, 119,
- 124, 127, 130, 134, 132, 135, 120, 121, 122, 123, 125, 128, 129, 131, 133, 136, 140, 141, 142, 143,
- 144, 145, 54, 57, 149, 154, 156, 157, 158, 96, 97, 159, 106, 160, 161, 162, 163, 165, 166, 168,
- 180, 169, 179, 183, 184, 185, 178, 187,
-};
+#include <stdint.h>
+
+struct CaseMapping { uint16_t c0, c1, m1; };
+
+static struct CaseMapping casemappings[] = {
+ {0x0041, 0x005A, 0x007A}, // A a (26) LATIN CAPITAL LETTER Z
+ {0x00B5, 0x00B5, 0x03BC}, // µ μ ( 1) MICRO SIGN
+ {0x00C0, 0x00D6, 0x00F6}, // À à (23) LATIN CAPITAL LETTER O WITH DIAERESIS
+ {0x00D8, 0x00DE, 0x00FE}, // Ø ø ( 7) LATIN CAPITAL LETTER THORN
+ {0x0100, 0x012E, 0x012F}, // Ā ā (24) LATIN CAPITAL LETTER I WITH OGONEK
+ {0x0132, 0x0136, 0x0137}, // IJ ij ( 3) LATIN CAPITAL LETTER K WITH CEDILLA
+ {0x0139, 0x0147, 0x0148}, // Ĺ ĺ ( 8) LATIN CAPITAL LETTER N WITH CARON
+ {0x014A, 0x0176, 0x0177}, // Ŋ ŋ (23) LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+ {0x0178, 0x0178, 0x00FF}, // Ÿ ÿ ( 1) LATIN CAPITAL LETTER Y WITH DIAERESIS
+ {0x0179, 0x017D, 0x017E}, // Ź ź ( 3) LATIN CAPITAL LETTER Z WITH CARON
+ {0x017F, 0x017F, 0x0073}, // ſ s ( 1) LATIN SMALL LETTER LONG S
+ {0x0181, 0x0181, 0x0253}, // Ɓ ɓ ( 1) LATIN CAPITAL LETTER B WITH HOOK
+ {0x0182, 0x0184, 0x0185}, // Ƃ ƃ ( 2) LATIN CAPITAL LETTER TONE SIX
+ {0x0186, 0x0186, 0x0254}, // Ɔ ɔ ( 1) LATIN CAPITAL LETTER OPEN O
+ {0x0187, 0x0187, 0x0188}, // Ƈ ƈ ( 1) LATIN CAPITAL LETTER C WITH HOOK
+ {0x0189, 0x018A, 0x0257}, // Ɖ ɖ ( 2) LATIN CAPITAL LETTER D WITH HOOK
+ {0x018B, 0x018B, 0x018C}, // Ƌ ƌ ( 1) LATIN CAPITAL LETTER D WITH TOPBAR
+ {0x018E, 0x018E, 0x01DD}, // Ǝ ǝ ( 1) LATIN CAPITAL LETTER REVERSED E
+ {0x018F, 0x018F, 0x0259}, // Ə ə ( 1) LATIN CAPITAL LETTER SCHWA
+ {0x0190, 0x0190, 0x025B}, // Ɛ ɛ ( 1) LATIN CAPITAL LETTER OPEN E
+ {0x0191, 0x0191, 0x0192}, // Ƒ ƒ ( 1) LATIN CAPITAL LETTER F WITH HOOK
+ {0x0193, 0x0193, 0x0260}, // Ɠ ɠ ( 1) LATIN CAPITAL LETTER G WITH HOOK
+ {0x0194, 0x0194, 0x0263}, // Ɣ ɣ ( 1) LATIN CAPITAL LETTER GAMMA
+ {0x0196, 0x0196, 0x0269}, // Ɩ ɩ ( 1) LATIN CAPITAL LETTER IOTA
+ {0x0197, 0x0197, 0x0268}, // Ɨ ɨ ( 1) LATIN CAPITAL LETTER I WITH STROKE
+ {0x0198, 0x0198, 0x0199}, // Ƙ ƙ ( 1) LATIN CAPITAL LETTER K WITH HOOK
+ {0x019C, 0x019C, 0x026F}, // Ɯ ɯ ( 1) LATIN CAPITAL LETTER TURNED M
+ {0x019D, 0x019D, 0x0272}, // Ɲ ɲ ( 1) LATIN CAPITAL LETTER N WITH LEFT HOOK
+ {0x019F, 0x019F, 0x0275}, // Ɵ ɵ ( 1) LATIN CAPITAL LETTER O WITH MIDDLE TILDE
+ {0x01A0, 0x01A4, 0x01A5}, // Ơ ơ ( 3) LATIN CAPITAL LETTER P WITH HOOK
+ {0x01A6, 0x01A6, 0x0280}, // Ʀ ʀ ( 1) LATIN LETTER YR
+ {0x01A7, 0x01A7, 0x01A8}, // Ƨ ƨ ( 1) LATIN CAPITAL LETTER TONE TWO
+ {0x01A9, 0x01A9, 0x0283}, // Ʃ ʃ ( 1) LATIN CAPITAL LETTER ESH
+ {0x01AC, 0x01AC, 0x01AD}, // Ƭ ƭ ( 1) LATIN CAPITAL LETTER T WITH HOOK
+ {0x01AE, 0x01AE, 0x0288}, // Ʈ ʈ ( 1) LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
+ {0x01AF, 0x01AF, 0x01B0}, // Ư ư ( 1) LATIN CAPITAL LETTER U WITH HORN
+ {0x01B1, 0x01B2, 0x028B}, // Ʊ ʊ ( 2) LATIN CAPITAL LETTER V WITH HOOK
+ {0x01B3, 0x01B5, 0x01B6}, // Ƴ ƴ ( 2) LATIN CAPITAL LETTER Z WITH STROKE
+ {0x01B7, 0x01B7, 0x0292}, // Ʒ ʒ ( 1) LATIN CAPITAL LETTER EZH
+ {0x01B8, 0x01B8, 0x01B9}, // Ƹ ƹ ( 1) LATIN CAPITAL LETTER EZH REVERSED
+ {0x01BC, 0x01BC, 0x01BD}, // Ƽ ƽ ( 1) LATIN CAPITAL LETTER TONE FIVE
+ {0x01C4, 0x01C4, 0x01C6}, // DŽ dž ( 1) LATIN CAPITAL LETTER DZ WITH CARON
+ {0x01C5, 0x01C5, 0x01C6}, // Dž dž ( 1) LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
+ {0x01C7, 0x01C7, 0x01C9}, // LJ lj ( 1) LATIN CAPITAL LETTER LJ
+ {0x01C8, 0x01C8, 0x01C9}, // Lj lj ( 1) LATIN CAPITAL LETTER L WITH SMALL LETTER J
+ {0x01CA, 0x01CA, 0x01CC}, // NJ nj ( 1) LATIN CAPITAL LETTER NJ
+ {0x01CB, 0x01DB, 0x01DC}, // Nj nj ( 9) LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
+ {0x01DE, 0x01EE, 0x01EF}, // Ǟ ǟ ( 9) LATIN CAPITAL LETTER EZH WITH CARON
+ {0x01F1, 0x01F1, 0x01F3}, // DZ dz ( 1) LATIN CAPITAL LETTER DZ
+ {0x01F2, 0x01F4, 0x01F5}, // Dz dz ( 2) LATIN CAPITAL LETTER G WITH ACUTE
+ {0x01F6, 0x01F6, 0x0195}, // Ƕ ƕ ( 1) LATIN CAPITAL LETTER HWAIR
+ {0x01F7, 0x01F7, 0x01BF}, // Ƿ ƿ ( 1) LATIN CAPITAL LETTER WYNN
+ {0x01F8, 0x021E, 0x021F}, // Ǹ ǹ (20) LATIN CAPITAL LETTER H WITH CARON
+ {0x0220, 0x0220, 0x019E}, // Ƞ ƞ ( 1) LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
+ {0x0222, 0x0232, 0x0233}, // Ȣ ȣ ( 9) LATIN CAPITAL LETTER Y WITH MACRON
+ {0x023A, 0x023A, 0x2C65}, // Ⱥ ⱥ ( 1) LATIN CAPITAL LETTER A WITH STROKE
+ {0x023B, 0x023B, 0x023C}, // Ȼ ȼ ( 1) LATIN CAPITAL LETTER C WITH STROKE
+ {0x023D, 0x023D, 0x019A}, // Ƚ ƚ ( 1) LATIN CAPITAL LETTER L WITH BAR
+ {0x023E, 0x023E, 0x2C66}, // Ⱦ ⱦ ( 1) LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
+ {0x0241, 0x0241, 0x0242}, // Ɂ ɂ ( 1) LATIN CAPITAL LETTER GLOTTAL STOP
+ {0x0243, 0x0243, 0x0180}, // Ƀ ƀ ( 1) LATIN CAPITAL LETTER B WITH STROKE
+ {0x0244, 0x0244, 0x0289}, // Ʉ ʉ ( 1) LATIN CAPITAL LETTER U BAR
+ {0x0245, 0x0245, 0x028C}, // Ʌ ʌ ( 1) LATIN CAPITAL LETTER TURNED V
+ {0x0246, 0x024E, 0x024F}, // Ɇ ɇ ( 5) LATIN CAPITAL LETTER Y WITH STROKE
+ {0x0345, 0x0345, 0x03B9}, // ͅ ι ( 1) COMBINING GREEK YPOGEGRAMMENI
+ {0x0370, 0x0372, 0x0373}, // Ͱ ͱ ( 2) GREEK CAPITAL LETTER ARCHAIC SAMPI
+ {0x0376, 0x0376, 0x0377}, // Ͷ ͷ ( 1) GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA
+ {0x037F, 0x037F, 0x03F3}, // Ϳ ϳ ( 1) GREEK CAPITAL LETTER YOT
+ {0x0386, 0x0386, 0x03AC}, // Ά ά ( 1) GREEK CAPITAL LETTER ALPHA WITH TONOS
+ {0x0388, 0x038A, 0x03AF}, // Έ έ ( 3) GREEK CAPITAL LETTER IOTA WITH TONOS
+ {0x038C, 0x038C, 0x03CC}, // Ό ό ( 1) GREEK CAPITAL LETTER OMICRON WITH TONOS
+ {0x038E, 0x038F, 0x03CE}, // Ύ ύ ( 2) GREEK CAPITAL LETTER OMEGA WITH TONOS
+ {0x0391, 0x03A1, 0x03C1}, // Α α (17) GREEK CAPITAL LETTER RHO
+ {0x03A3, 0x03AB, 0x03CB}, // Σ σ ( 9) GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+ {0x03C2, 0x03C2, 0x03C3}, // ς σ ( 1) GREEK SMALL LETTER FINAL SIGMA
+ {0x03CF, 0x03CF, 0x03D7}, // Ϗ ϗ ( 1) GREEK CAPITAL KAI SYMBOL
+ {0x03D0, 0x03D0, 0x03B2}, // ϐ β ( 1) GREEK BETA SYMBOL
+ {0x03D1, 0x03D1, 0x03B8}, // ϑ θ ( 1) GREEK THETA SYMBOL
+ {0x03D5, 0x03D5, 0x03C6}, // ϕ φ ( 1) GREEK PHI SYMBOL
+ {0x03D6, 0x03D6, 0x03C0}, // ϖ π ( 1) GREEK PI SYMBOL
+ {0x03D8, 0x03EE, 0x03EF}, // Ϙ ϙ (12) COPTIC CAPITAL LETTER DEI
+ {0x03F0, 0x03F0, 0x03BA}, // ϰ κ ( 1) GREEK KAPPA SYMBOL
+ {0x03F1, 0x03F1, 0x03C1}, // ϱ ρ ( 1) GREEK RHO SYMBOL
+ {0x03F4, 0x03F4, 0x03B8}, // ϴ θ ( 1) GREEK CAPITAL THETA SYMBOL
+ {0x03F5, 0x03F5, 0x03B5}, // ϵ ε ( 1) GREEK LUNATE EPSILON SYMBOL
+ {0x03F7, 0x03F7, 0x03F8}, // Ϸ ϸ ( 1) GREEK CAPITAL LETTER SHO
+ {0x03F9, 0x03F9, 0x03F2}, // Ϲ ϲ ( 1) GREEK CAPITAL LUNATE SIGMA SYMBOL
+ {0x03FA, 0x03FA, 0x03FB}, // Ϻ ϻ ( 1) GREEK CAPITAL LETTER SAN
+ {0x03FD, 0x03FF, 0x037D}, // Ͻ ͻ ( 2) GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
+ {0x0400, 0x040F, 0x045F}, // Ѐ ѐ (16) CYRILLIC CAPITAL LETTER DZHE
+ {0x0410, 0x042F, 0x044F}, // А а (32) CYRILLIC CAPITAL LETTER YA
+ {0x0460, 0x0480, 0x0481}, // Ѡ ѡ (17) CYRILLIC CAPITAL LETTER KOPPA
+ {0x048A, 0x04BE, 0x04BF}, // Ҋ ҋ (27) CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
+ {0x04C0, 0x04C0, 0x04CF}, // Ӏ ӏ ( 1) CYRILLIC LETTER PALOCHKA
+ {0x04C1, 0x04CD, 0x04CE}, // Ӂ ӂ ( 7) CYRILLIC CAPITAL LETTER EM WITH TAIL
+ {0x04D0, 0x052E, 0x052F}, // Ӑ ӑ (48) CYRILLIC CAPITAL LETTER EL WITH DESCENDER
+ {0x0531, 0x0556, 0x0586}, // Ա ա (38) ARMENIAN CAPITAL LETTER FEH
+ {0x10A0, 0x10C5, 0x2D25}, // Ⴀ ⴀ (38) GEORGIAN CAPITAL LETTER HOE
+ {0x10C7, 0x10C7, 0x2D27}, // Ⴧ ⴧ ( 1) GEORGIAN CAPITAL LETTER YN
+ {0x10CD, 0x10CD, 0x2D2D}, // Ⴭ ⴭ ( 1) GEORGIAN CAPITAL LETTER AEN
+ {0x13F8, 0x13FD, 0x13F5}, // ᏸ Ᏸ ( 3) CHEROKEE SMALL LETTER MV
+ {0x1C80, 0x1C80, 0x0432}, // ᲀ в ( 1) CYRILLIC SMALL LETTER ROUNDED VE
+ {0x1C81, 0x1C81, 0x0434}, // ᲁ д ( 1) CYRILLIC SMALL LETTER LONG-LEGGED DE
+ {0x1C82, 0x1C82, 0x043E}, // ᲂ о ( 1) CYRILLIC SMALL LETTER NARROW O
+ {0x1C83, 0x1C84, 0x0442}, // ᲃ с ( 1) CYRILLIC SMALL LETTER TALL TE
+ {0x1C85, 0x1C85, 0x0442}, // ᲅ т ( 1) CYRILLIC SMALL LETTER THREE-LEGGED TE
+ {0x1C86, 0x1C86, 0x044A}, // ᲆ ъ ( 1) CYRILLIC SMALL LETTER TALL HARD SIGN
+ {0x1C87, 0x1C87, 0x0463}, // ᲇ ѣ ( 1) CYRILLIC SMALL LETTER TALL YAT
+ {0x1C88, 0x1C88, 0xA64B}, // ᲈ ꙋ ( 1) CYRILLIC SMALL LETTER UNBLENDED UK
+ {0x1C90, 0x1CBA, 0x10FA}, // Ა ა (22) GEORGIAN MTAVRULI CAPITAL LETTER AIN
+ {0x1CBD, 0x1CBF, 0x10FF}, // Ჽ ჽ ( 2) GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
+ {0x1E00, 0x1E94, 0x1E95}, // Ḁ ḁ (75) LATIN CAPITAL LETTER Z WITH LINE BELOW
+ {0x1E9B, 0x1E9B, 0x1E61}, // ẛ ṡ ( 1) LATIN SMALL LETTER LONG S WITH DOT ABOVE
+ {0x1E9E, 0x1E9E, 0x00DF}, // ẞ ß ( 1) LATIN CAPITAL LETTER SHARP S
+ {0x1EA0, 0x1EFE, 0x1EFF}, // Ạ ạ (48) LATIN CAPITAL LETTER Y WITH LOOP
+ {0x1F08, 0x1F0F, 0x1F07}, // Ἀ ἀ ( 4) GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
+ {0x1F18, 0x1F1D, 0x1F15}, // Ἐ ἐ ( 3) GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
+ {0x1F28, 0x1F2F, 0x1F27}, // Ἠ ἠ ( 4) GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
+ {0x1F38, 0x1F3F, 0x1F37}, // Ἰ ἰ ( 4) GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
+ {0x1F48, 0x1F4D, 0x1F45}, // Ὀ ὀ ( 3) GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
+ {0x1F59, 0x1F5F, 0x1F57}, // Ὑ ὑ ( 4) GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
+ {0x1F68, 0x1F6F, 0x1F67}, // Ὠ ὠ ( 4) GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
+ {0x1F88, 0x1F8F, 0x1F87}, // ᾈ ᾀ ( 4) GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+ {0x1F98, 0x1F9F, 0x1F97}, // ᾘ ᾐ ( 4) GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+ {0x1FA8, 0x1FAF, 0x1FA7}, // ᾨ ᾠ ( 4) GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+ {0x1FB8, 0x1FB9, 0x1FB1}, // Ᾰ ᾰ ( 1) GREEK CAPITAL LETTER ALPHA WITH MACRON
+ {0x1FBA, 0x1FBB, 0x1F71}, // Ὰ ὰ ( 1) GREEK CAPITAL LETTER ALPHA WITH OXIA
+ {0x1FBC, 0x1FBC, 0x1FB3}, // ᾼ ᾳ ( 1) GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+ {0x1FBE, 0x1FBE, 0x03B9}, // ι ι ( 1) GREEK PROSGEGRAMMENI
+ {0x1FC8, 0x1FCB, 0x1F75}, // Ὲ ὲ ( 2) GREEK CAPITAL LETTER ETA WITH OXIA
+ {0x1FCC, 0x1FCC, 0x1FC3}, // ῌ ῃ ( 1) GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+ {0x1FD8, 0x1FD9, 0x1FD1}, // Ῐ ῐ ( 1) GREEK CAPITAL LETTER IOTA WITH MACRON
+ {0x1FDA, 0x1FDB, 0x1F77}, // Ὶ ὶ ( 1) GREEK CAPITAL LETTER IOTA WITH OXIA
+ {0x1FE8, 0x1FE9, 0x1FE1}, // Ῠ ῠ ( 1) GREEK CAPITAL LETTER UPSILON WITH MACRON
+ {0x1FEA, 0x1FEB, 0x1F7B}, // Ὺ ὺ ( 1) GREEK CAPITAL LETTER UPSILON WITH OXIA
+ {0x1FEC, 0x1FEC, 0x1FE5}, // Ῥ ῥ ( 1) GREEK CAPITAL LETTER RHO WITH DASIA
+ {0x1FF8, 0x1FF9, 0x1F79}, // Ὸ ὸ ( 1) GREEK CAPITAL LETTER OMICRON WITH OXIA
+ {0x1FFA, 0x1FFB, 0x1F7D}, // Ὼ ὼ ( 1) GREEK CAPITAL LETTER OMEGA WITH OXIA
+ {0x1FFC, 0x1FFC, 0x1FF3}, // ῼ ῳ ( 1) GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+ {0x2126, 0x2126, 0x03C9}, // Ω ω ( 1) OHM SIGN
+ {0x212A, 0x212A, 0x006B}, // K k ( 1) KELVIN SIGN
+ {0x212B, 0x212B, 0x00E5}, // Å å ( 1) ANGSTROM SIGN
+ {0x2132, 0x2132, 0x214E}, // Ⅎ ⅎ ( 1) TURNED CAPITAL F
+ {0x2160, 0x216F, 0x217F}, // Ⅰ ⅰ (16) ROMAN NUMERAL ONE THOUSAND
+ {0x2183, 0x2183, 0x2184}, // Ↄ ↄ ( 1) ROMAN NUMERAL REVERSED ONE HUNDRED
+ {0x24B6, 0x24CF, 0x24E9}, // Ⓐ ⓐ (26) CIRCLED LATIN CAPITAL LETTER Z
+ {0x2C00, 0x2C2F, 0x2C5F}, // Ⰰ ⰰ (48) GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI
+ {0x2C60, 0x2C60, 0x2C61}, // Ⱡ ⱡ ( 1) LATIN CAPITAL LETTER L WITH DOUBLE BAR
+ {0x2C62, 0x2C62, 0x026B}, // Ɫ ɫ ( 1) LATIN CAPITAL LETTER L WITH MIDDLE TILDE
+ {0x2C63, 0x2C63, 0x1D7D}, // Ᵽ ᵽ ( 1) LATIN CAPITAL LETTER P WITH STROKE
+ {0x2C64, 0x2C64, 0x027D}, // Ɽ ɽ ( 1) LATIN CAPITAL LETTER R WITH TAIL
+ {0x2C67, 0x2C6B, 0x2C6C}, // Ⱨ ⱨ ( 3) LATIN CAPITAL LETTER Z WITH DESCENDER
+ {0x2C6D, 0x2C6D, 0x0251}, // Ɑ ɑ ( 1) LATIN CAPITAL LETTER ALPHA
+ {0x2C6E, 0x2C6E, 0x0271}, // Ɱ ɱ ( 1) LATIN CAPITAL LETTER M WITH HOOK
+ {0x2C6F, 0x2C6F, 0x0250}, // Ɐ ɐ ( 1) LATIN CAPITAL LETTER TURNED A
+ {0x2C70, 0x2C70, 0x0252}, // Ɒ ɒ ( 1) LATIN CAPITAL LETTER TURNED ALPHA
+ {0x2C72, 0x2C72, 0x2C73}, // Ⱳ ⱳ ( 1) LATIN CAPITAL LETTER W WITH HOOK
+ {0x2C75, 0x2C75, 0x2C76}, // Ⱶ ⱶ ( 1) LATIN CAPITAL LETTER HALF H
+ {0x2C7E, 0x2C7F, 0x0240}, // Ȿ ȿ ( 1) LATIN CAPITAL LETTER Z WITH SWASH TAIL
+ {0x2C80, 0x2CE2, 0x2CE3}, // Ⲁ ⲁ (50) COPTIC CAPITAL LETTER OLD NUBIAN WAU
+ {0x2CEB, 0x2CED, 0x2CEE}, // Ⳬ ⳬ ( 2) COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA
+ {0x2CF2, 0x2CF2, 0x2CF3}, // Ⳳ ⳳ ( 1) COPTIC CAPITAL LETTER BOHAIRIC KHEI
+ {0xA640, 0xA66C, 0xA66D}, // Ꙁ ꙁ (23) CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O
+ {0xA680, 0xA69A, 0xA69B}, // Ꚁ ꚁ (14) CYRILLIC CAPITAL LETTER CROSSED O
+ {0xA722, 0xA72E, 0xA72F}, // Ꜣ ꜣ ( 7) LATIN CAPITAL LETTER CUATRILLO WITH COMMA
+ {0xA732, 0xA76E, 0xA76F}, // Ꜳ ꜳ (31) LATIN CAPITAL LETTER CON
+ {0xA779, 0xA77B, 0xA77C}, // Ꝺ ꝺ ( 2) LATIN CAPITAL LETTER INSULAR F
+ {0xA77D, 0xA77D, 0x1D79}, // Ᵹ ᵹ ( 1) LATIN CAPITAL LETTER INSULAR G
+ {0xA77E, 0xA786, 0xA787}, // Ꝿ ꝿ ( 5) LATIN CAPITAL LETTER INSULAR T
+ {0xA78B, 0xA78B, 0xA78C}, // Ꞌ ꞌ ( 1) LATIN CAPITAL LETTER SALTILLO
+ {0xA78D, 0xA78D, 0x0265}, // Ɥ ɥ ( 1) LATIN CAPITAL LETTER TURNED H
+ {0xA790, 0xA792, 0xA793}, // Ꞑ ꞑ ( 2) LATIN CAPITAL LETTER C WITH BAR
+ {0xA796, 0xA7A8, 0xA7A9}, // Ꞗ ꞗ (10) LATIN CAPITAL LETTER S WITH OBLIQUE STROKE
+ {0xA7AA, 0xA7AA, 0x0266}, // Ɦ ɦ ( 1) LATIN CAPITAL LETTER H WITH HOOK
+ {0xA7AB, 0xA7AB, 0x025C}, // Ɜ ɜ ( 1) LATIN CAPITAL LETTER REVERSED OPEN E
+ {0xA7AC, 0xA7AC, 0x0261}, // Ɡ ɡ ( 1) LATIN CAPITAL LETTER SCRIPT G
+ {0xA7AD, 0xA7AD, 0x026C}, // Ɬ ɬ ( 1) LATIN CAPITAL LETTER L WITH BELT
+ {0xA7AE, 0xA7AE, 0x026A}, // Ɪ ɪ ( 1) LATIN CAPITAL LETTER SMALL CAPITAL I
+ {0xA7B0, 0xA7B0, 0x029E}, // Ʞ ʞ ( 1) LATIN CAPITAL LETTER TURNED K
+ {0xA7B1, 0xA7B1, 0x0287}, // Ʇ ʇ ( 1) LATIN CAPITAL LETTER TURNED T
+ {0xA7B2, 0xA7B2, 0x029D}, // Ʝ ʝ ( 1) LATIN CAPITAL LETTER J WITH CROSSED-TAIL
+ {0xA7B3, 0xA7B3, 0xAB53}, // Ꭓ ꭓ ( 1) LATIN CAPITAL LETTER CHI
+ {0xA7B4, 0xA7C2, 0xA7C3}, // Ꞵ ꞵ ( 8) LATIN CAPITAL LETTER ANGLICANA W
+ {0xA7C4, 0xA7C4, 0xA794}, // Ꞔ ꞔ ( 1) LATIN CAPITAL LETTER C WITH PALATAL HOOK
+ {0xA7C5, 0xA7C5, 0x0282}, // Ʂ ʂ ( 1) LATIN CAPITAL LETTER S WITH HOOK
+ {0xA7C6, 0xA7C6, 0x1D8E}, // Ᶎ ᶎ ( 1) LATIN CAPITAL LETTER Z WITH PALATAL HOOK
+ {0xA7C7, 0xA7C9, 0xA7CA}, // Ꟈ ꟈ ( 2) LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
+ {0xA7D0, 0xA7D0, 0xA7D1}, // Ꟑ ꟑ ( 1) LATIN CAPITAL LETTER CLOSED INSULAR G
+ {0xA7D6, 0xA7D8, 0xA7D9}, // Ꟗ ꟗ ( 2) LATIN CAPITAL LETTER SIGMOID S
+ {0xA7F5, 0xA7F5, 0xA7F6}, // Ꟶ ꟶ ( 1) LATIN CAPITAL LETTER REVERSED HALF H
+ {0xAB70, 0xABBF, 0x13EF}, // ꭰ Ꭰ (40) CHEROKEE SMALL LETTER YA
+ {0xFF21, 0xFF3A, 0xFF5A}, // A a (26) FULLWIDTH LATIN CAPITAL LETTER Z
+}; // 192
+
+enum { casefold_len = 192 };
+
+static uint8_t upcase_ind[192] = {
+ 0, 140, 10, 113, 2, 141, 3, 8, 4, 5, 6, 7, 9, 60, 12, 14, 16, 20, 50, 25,
+ 57, 53, 29, 31, 33, 35, 37, 39, 40, 51, 41, 42, 43, 44, 45, 46, 17, 47, 48, 49,
+ 52, 54, 56, 158, 59, 63, 154, 152, 155, 11, 13, 15, 18, 19, 174, 21, 175, 22, 170, 173,
+ 24, 23, 177, 148, 176, 26, 153, 27, 28, 150, 30, 184, 32, 179, 34, 61, 36, 62, 38, 180,
+ 178, 65, 66, 88, 68, 69, 72, 76, 84, 77, 83, 64, 128, 81, 1, 79, 82, 73, 74, 78,
+ 139, 70, 71, 75, 80, 86, 67, 85, 87, 90, 101, 102, 103, 104, 105, 106, 89, 91, 107, 92,
+ 94, 93, 95, 96, 109, 110, 190, 100, 167, 149, 185, 111, 112, 114, 115, 116, 117, 118, 119, 120,
+ 121, 126, 129, 132, 136, 134, 137, 122, 123, 124, 125, 127, 130, 131, 133, 135, 138, 142, 143, 144,
+ 145, 146, 147, 55, 58, 151, 156, 157, 159, 160, 161, 97, 98, 99, 162, 108, 163, 164, 165, 166,
+ 168, 169, 171, 183, 172, 182, 186, 187, 188, 189, 181, 191,
+};