summaryrefslogtreecommitdiffhomepage
path: root/src/utf8code.c
diff options
context:
space:
mode:
authorTyge Løvset <[email protected]>2022-06-08 00:43:27 +0200
committerTyge Løvset <[email protected]>2022-06-08 00:43:27 +0200
commit3e81c98dd6fce24409ebaac47de117d4f73537c6 (patch)
tree83017e25ecb9f7d0ca0c84280768b0be9ce68848 /src/utf8code.c
parent23f458d4f1cc94c8fe7345b24c788b4788826dfe (diff)
downloadSTC-modified-3e81c98dd6fce24409ebaac47de117d4f73537c6.tar.gz
STC-modified-3e81c98dd6fce24409ebaac47de117d4f73537c6.zip
Fixed utf8 tables again. Separate casefold, toupper, tolower functions.
Diffstat (limited to 'src/utf8code.c')
-rw-r--r--src/utf8code.c19
1 files changed, 16 insertions, 3 deletions
diff --git a/src/utf8code.c b/src/utf8code.c
index 0cdcd8b6..38dff832 100644
--- a/src/utf8code.c
+++ b/src/utf8code.c
@@ -59,7 +59,7 @@ bool utf8_valid_n(const char* s, size_t n) {
return d.state == 0;
}
-uint32_t utf8_tolower(uint32_t c) {
+uint32_t utf8_casefold(uint32_t c) {
for (size_t i=0; i < casefold_len; ++i) {
const struct CaseMapping entry = casemappings[i];
if (c <= entry.c1) {
@@ -72,9 +72,22 @@ uint32_t utf8_tolower(uint32_t c) {
return c;
}
-uint32_t utf8_toupper(uint32_t c) {
+uint32_t utf8_tolower(uint32_t c) {
for (size_t i=0; i < sizeof upcase_ind/sizeof *upcase_ind; ++i) {
- struct CaseMapping entry = casemappings[upcase_ind[i]];
+ const struct CaseMapping entry = casemappings[upcase_ind[i]];
+ if (c <= entry.c1) {
+ if (c < entry.c0) return c;
+ int d = entry.m1 - entry.c1;
+ if (d == 1) return c + ((entry.c1 & 1) == (c & 1));
+ return c + d;
+ }
+ }
+ return c;
+}
+
+uint32_t utf8_toupper(uint32_t c) {
+ for (size_t i=0; i < sizeof lowcase_ind/sizeof *lowcase_ind; ++i) {
+ const struct CaseMapping entry = casemappings[lowcase_ind[i]];
if (c <= entry.m1) {
int d = entry.m1 - entry.c1;
if (c < (uint32_t)(entry.c0 + d)) return c;