diff options
| author | Tyge Løvset <[email protected]> | 2022-08-07 08:03:46 +0200 |
|---|---|---|
| committer | Tyge Løvset <[email protected]> | 2022-08-07 08:20:34 +0200 |
| commit | c87898773d1af364a9847610401a9959f6019fe7 (patch) | |
| tree | 5e64c9d96fb9e12192ce298f1d2909d43b72571a /include/stc/utf8.h | |
| parent | 618b5704e6f85cfe1b6e5c9c9373abe76a8bb628 (diff) | |
| download | STC-modified-c87898773d1af364a9847610401a9959f6019fe7.tar.gz STC-modified-c87898773d1af364a9847610401a9959f6019fe7.zip | |
Internal: moved some functions in csview/cstr to implementation sections.
Diffstat (limited to 'include/stc/utf8.h')
| -rw-r--r-- | include/stc/utf8.h | 61 |
1 files changed, 22 insertions, 39 deletions
diff --git a/include/stc/utf8.h b/include/stc/utf8.h index c6fb6944..34368737 100644 --- a/include/stc/utf8.h +++ b/include/stc/utf8.h @@ -1,46 +1,27 @@ #ifndef UTF8_H_INCLUDED #define UTF8_H_INCLUDED -/* -// Example: -#include <stc/cstr.h> -#include <stc/csview.h> -int main() -{ - c_auto (cstr, s1) { - s1 = cstr_new("hell😀 w😀rld"); - printf("%s\n", cstr_str(&s1)); - cstr_replace_sv(&s1, utf8_substr(cstr_str(&s1), 7, 1), c_sv("🐨")); - printf("%s\n", cstr_str(&s1)); - - c_foreach (i, cstr, s1) - printf("%.*s,", c_ARGsv(i.chr)); - } -} -// Output: -// hell😀 w😀rld -// hell😀 w🐨rld -// h,e,l,l,😀, ,w,🐨,r,l,d, -*/ #include "ccommon.h" #include <ctype.h> // utf8 methods defined in src/utf8code.c: -bool utf8_islower(uint32_t c); -bool utf8_isupper(uint32_t c); -bool utf8_isspace(uint32_t c); -bool utf8_isdigit(uint32_t c); -bool utf8_isxdigit(uint32_t c); -bool utf8_isalpha(uint32_t c); -bool utf8_isalnum(uint32_t c); -uint32_t utf8_casefold(uint32_t c); -uint32_t utf8_tolower(uint32_t c); -uint32_t utf8_toupper(uint32_t c); -bool utf8_valid_n(const char* s, size_t nbytes); -int utf8_icmp_n(size_t u8max, const char* s1, size_t n1, - const char* s2, size_t n2); -unsigned utf8_encode(char *out, uint32_t c); -uint32_t utf8_peek(const char *s, int u8pos); +extern bool utf8_islower(uint32_t c); +extern bool utf8_isupper(uint32_t c); +extern bool utf8_isspace(uint32_t c); +extern bool utf8_isdigit(uint32_t c); +extern bool utf8_isxdigit(uint32_t c); +extern bool utf8_isalpha(uint32_t c); +extern bool utf8_isalnum(uint32_t c); +extern uint32_t utf8_casefold(uint32_t c); +extern uint32_t utf8_tolower(uint32_t c); +extern uint32_t utf8_toupper(uint32_t c); +extern bool utf8_valid_n(const char* s, size_t nbytes); +extern int utf8_icmp_n(size_t u8max, const char* s1, size_t n1, + const char* s2, size_t n2); +extern unsigned utf8_encode(char *out, uint32_t c); +extern uint32_t utf8_peek(const char *s, int u8pos); + +/* following functions uses src/utf8code.c */ /* decode next utf8 codepoint. https://bjoern.hoehrmann.de/utf-8/decoder/dfa */ typedef struct { uint32_t state, codep; } utf8_decode_t; @@ -62,15 +43,17 @@ STC_INLINE bool utf8_valid(const char* s) { return utf8_valid_n(s, ~(size_t)0); } +/* following functions are independent but assume valid utf8 strings: */ + /* number of bytes in the utf8 codepoint from s */ STC_INLINE unsigned utf8_chr_size(const char *s) { unsigned b = (uint8_t)*s; if (b < 0x80) return 1; - if (b < 0xC2) return 0; + /*if (b < 0xC2) return 0;*/ if (b < 0xE0) return 2; if (b < 0xF0) return 3; - if (b < 0xF5) return 4; - return 0; + /*if (b < 0xF5)*/ return 4; + /*return 0;*/ } /* number of codepoints in the utf8 string s */ |
