From 3e81c98dd6fce24409ebaac47de117d4f73537c6 Mon Sep 17 00:00:00 2001 From: Tyge Løvset Date: Wed, 8 Jun 2022 00:43:27 +0200 Subject: Fixed utf8 tables again. Separate casefold, toupper, tolower functions. --- src/utf8code.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'src/utf8code.c') diff --git a/src/utf8code.c b/src/utf8code.c index 0cdcd8b6..38dff832 100644 --- a/src/utf8code.c +++ b/src/utf8code.c @@ -59,7 +59,7 @@ bool utf8_valid_n(const char* s, size_t n) { return d.state == 0; } -uint32_t utf8_tolower(uint32_t c) { +uint32_t utf8_casefold(uint32_t c) { for (size_t i=0; i < casefold_len; ++i) { const struct CaseMapping entry = casemappings[i]; if (c <= entry.c1) { @@ -72,9 +72,22 @@ uint32_t utf8_tolower(uint32_t c) { return c; } -uint32_t utf8_toupper(uint32_t c) { +uint32_t utf8_tolower(uint32_t c) { for (size_t i=0; i < sizeof upcase_ind/sizeof *upcase_ind; ++i) { - struct CaseMapping entry = casemappings[upcase_ind[i]]; + const struct CaseMapping entry = casemappings[upcase_ind[i]]; + if (c <= entry.c1) { + if (c < entry.c0) return c; + int d = entry.m1 - entry.c1; + if (d == 1) return c + ((entry.c1 & 1) == (c & 1)); + return c + d; + } + } + return c; +} + +uint32_t utf8_toupper(uint32_t c) { + for (size_t i=0; i < sizeof lowcase_ind/sizeof *lowcase_ind; ++i) { + const struct CaseMapping entry = casemappings[lowcase_ind[i]]; if (c <= entry.m1) { int d = entry.m1 - entry.c1; if (c < (uint32_t)(entry.c0 + d)) return c; -- cgit v1.2.3