diff options
| author | Tyge Lovset <[email protected]> | 2022-05-30 00:59:14 +0200 |
|---|---|---|
| committer | Tyge Lovset <[email protected]> | 2022-05-30 00:59:14 +0200 |
| commit | 831dc0843aeedcb45138a6ed576ea03f2dcd58f8 (patch) | |
| tree | 12fedc72d39abda4bb50e59471dddca88b675f86 /src | |
| parent | 6e96a8f11783d9c8956ddf272739f005c7b17c16 (diff) | |
| download | STC-modified-831dc0843aeedcb45138a6ed576ea03f2dcd58f8.tar.gz STC-modified-831dc0843aeedcb45138a6ed576ea03f2dcd58f8.zip | |
Added more utf8 features. Changed linking macros in ccommon.h.
Diffstat (limited to 'src')
| -rw-r--r-- | src/casefold.c (renamed from src/cregex_utf8.c) | 75 |
1 files changed, 67 insertions, 8 deletions
diff --git a/src/cregex_utf8.c b/src/casefold.c index 69be6315..1b0a9463 100644 --- a/src/cregex_utf8.c +++ b/src/casefold.c @@ -1,8 +1,6 @@ -#include <stdint.h> -#include <stdio.h> #include <ctype.h> -#include <stc/utf8.h> -#include <stdbool.h> +#define i_header +#include <stc/cstr.h> static struct CaseFold { uint16_t c0, c1, m1; } casefold[] = { {65, 90, 122}, {181, 181, 956}, {192, 214, 246}, {216, 222, 254}, @@ -127,6 +125,52 @@ bool utf8_isalpha(uint32_t c) { return utf8_islower(c) || utf8_isupper(c); } +static struct fnfold { + int (*conv_asc)(int); + uint32_t (*conv_u8)(uint32_t); +} +fn_tolower = {tolower, utf8_tolower}, +fn_toupper = {toupper, utf8_toupper}; + + +static cstr cstr_casefold(const cstr* self, struct fnfold fold) { + csview sv = cstr_sv(self); + cstr out = cstr_null; + char *buf = cstr_reserve(&out, sv.size*3/2); + uint32_t cp; size_t sz = 0; + utf8_decode_t d = {UTF8_OK}; + + for (; *sv.str; sv.str += d.size) { + utf8_peek(sv.str, &d); + switch (d.size) { + case 1: + buf[sz++] = (char)fold.conv_asc(*sv.str); + break; + default: + cp = fold.conv_u8(d.codep); + sz += utf8_encode(buf + sz, cp); + } + } + _cstr_set_size(&out, sz); + cstr_shrink_to_fit(&out); + return out; +} + +cstr cstr_tolower(const cstr* self) { + return cstr_casefold(self, fn_tolower); +} + +cstr cstr_toupper(const cstr* self) { + return cstr_casefold(self, fn_toupper); +} + +void cstr_lowercase(cstr* self) { + cstr_take(self, cstr_casefold(self, fn_tolower)); +} + +void cstr_uppercase(cstr* self) { + cstr_take(self, cstr_casefold(self, fn_toupper)); +} #ifdef TEST int main() @@ -134,14 +178,29 @@ int main() for (size_t i=0; i < sizeof cfold_low/sizeof *cfold_low; ++i) { char x[3][5]={0}; + unsigned s0, s1, s2; uint32_t a = casefold[i].c0; uint32_t b = utf8_tolower(a); uint32_t c = utf8_toupper(b); - utf8_encode(x[0], a); - utf8_encode(x[1], b); - utf8_encode(x[2], c); - printf("%s %s %s - %u %u %u\n", x[0], x[1], x[2], a, b, c); + s0 = utf8_encode(x[0], a); + s1 = utf8_encode(x[1], b); + s2 = utf8_encode(x[2], c); + printf("%s %s %s - %u %u %u (%u %u %u)\n", x[0], x[1], x[2], a, b, c, s0, s1, s2); + } + c_auto (cstr, t1) + { + t1 = cstr_new("Die preußischen Köstlichkeiten."); + + cstr_buf b = cstr_buffer(&t1); + printf("%s, %llu %llu\n", b.data, b.size, b.cap); + cstr_lowercase(&t1); + b = cstr_buffer(&t1); + printf("%s, %llu %llu\n", b.data, b.size, b.cap); + + cstr_uppercase(&t1); + b = cstr_buffer(&t1); + printf("%s, %llu %llu\n", b.data, b.size, b.cap); } } #endif |
