diff options
| author | Tyge Løvset <[email protected]> | 2022-06-08 00:43:27 +0200 |
|---|---|---|
| committer | Tyge Løvset <[email protected]> | 2022-06-08 00:43:27 +0200 |
| commit | 3e81c98dd6fce24409ebaac47de117d4f73537c6 (patch) | |
| tree | 83017e25ecb9f7d0ca0c84280768b0be9ce68848 /src/utf8code.c | |
| parent | 23f458d4f1cc94c8fe7345b24c788b4788826dfe (diff) | |
| download | STC-modified-3e81c98dd6fce24409ebaac47de117d4f73537c6.tar.gz STC-modified-3e81c98dd6fce24409ebaac47de117d4f73537c6.zip | |
Fixed utf8 tables again. Separate casefold, toupper, tolower functions.
Diffstat (limited to 'src/utf8code.c')
| -rw-r--r-- | src/utf8code.c | 19 |
1 files changed, 16 insertions, 3 deletions
diff --git a/src/utf8code.c b/src/utf8code.c index 0cdcd8b6..38dff832 100644 --- a/src/utf8code.c +++ b/src/utf8code.c @@ -59,7 +59,7 @@ bool utf8_valid_n(const char* s, size_t n) { return d.state == 0; } -uint32_t utf8_tolower(uint32_t c) { +uint32_t utf8_casefold(uint32_t c) { for (size_t i=0; i < casefold_len; ++i) { const struct CaseMapping entry = casemappings[i]; if (c <= entry.c1) { @@ -72,9 +72,22 @@ uint32_t utf8_tolower(uint32_t c) { return c; } -uint32_t utf8_toupper(uint32_t c) { +uint32_t utf8_tolower(uint32_t c) { for (size_t i=0; i < sizeof upcase_ind/sizeof *upcase_ind; ++i) { - struct CaseMapping entry = casemappings[upcase_ind[i]]; + const struct CaseMapping entry = casemappings[upcase_ind[i]]; + if (c <= entry.c1) { + if (c < entry.c0) return c; + int d = entry.m1 - entry.c1; + if (d == 1) return c + ((entry.c1 & 1) == (c & 1)); + return c + d; + } + } + return c; +} + +uint32_t utf8_toupper(uint32_t c) { + for (size_t i=0; i < sizeof lowcase_ind/sizeof *lowcase_ind; ++i) { + const struct CaseMapping entry = casemappings[lowcase_ind[i]]; if (c <= entry.m1) { int d = entry.m1 - entry.c1; if (c < (uint32_t)(entry.c0 + d)) return c; |
