diff options
| author | Tyge Løvset <[email protected]> | 2022-06-01 09:20:25 +0200 |
|---|---|---|
| committer | Tyge Løvset <[email protected]> | 2022-06-01 09:20:25 +0200 |
| commit | 073854ba3d9dcdc75fff138c489819c30806508e (patch) | |
| tree | 6b95ef688c8d603bd02da3baffa242900b03c682 /docs | |
| parent | 22c20b522fcc9cc0743ad04fe6c3203c7a778401 (diff) | |
| download | STC-modified-073854ba3d9dcdc75fff138c489819c30806508e.tar.gz STC-modified-073854ba3d9dcdc75fff138c489819c30806508e.zip | |
Added some docs on utf8. API may change for cstr: to take const cstr* args instead of cstr values.
Diffstat (limited to 'docs')
| -rw-r--r-- | docs/cstr_api.md | 18 | ||||
| -rw-r--r-- | docs/csview_api.md | 12 |
2 files changed, 17 insertions, 13 deletions
diff --git a/docs/cstr_api.md b/docs/cstr_api.md index a8fd7e73..24f51d32 100644 --- a/docs/cstr_api.md +++ b/docs/cstr_api.md @@ -38,20 +38,22 @@ size_t cstr_length(cstr s); size_t cstr_capacity(cstr s); bool cstr_empty(cstr s); -// utf8: -size_t cstr_size_u8(cstr s); // utf8 size +// utf8 encoded strings: +size_t cstr_size_u8(cstr s); // number of utf8 codepoints size_t cstr_size_n_u8(cstr s, size_t nbytes); // utf8 size within n bytes -csview cstr_at(const cstr* self, size_t bytepos); // utf8 character as a csview -csview cstr_at_u8(const cstr* self, size_t u8idx); // utf8 character at utf8 pos +csview cstr_at(const cstr* self, size_t bytepos); // utf8 codepoints as a csview +csview cstr_at_u8(const cstr* self, size_t u8idx); // utf8 codepoints at utf8 pos size_t cstr_pos_u8(const cstr* self, size_t u8idx); // byte position at utf8 index - -// utf8: requires linking with src/utf8code.c +// utf8 functions requires linking with src/utf8code.c: +bool cstr_valid_u8(const cstr* self); // check if str is valid utf8 cstr cstr_tolower(const cstr* self); // returns new lowercase utf8 cstr cstr cstr_toupper(const cstr* self); // returns new uppercase utf8 cstr void cstr_lowercase(cstr* self); // transform cstr to lowercase utf8 void cstr_uppercase(cstr* self); // transform cstr to uppercase utf8 -bool cstr_valid_u8(const cstr* self); // check if str is valid utf8 -utf8_decode_t cstr_peek(const cstr* self, size_t bytepos); // get next unicode point at string pos +bool cstr_iequals(cstr s, const char* str); // case-insensitive comparison +bool cstr_istarts_with(cstr s, const char* str); // " +bool cstr_iends_with(cstr s, const char* str); // " +int cstr_icmp(const cstr* s1, const cstr* s2); // " size_t cstr_reserve(cstr* self, size_t capacity); void cstr_resize(cstr* self, size_t len, char fill); diff --git a/docs/csview_api.md b/docs/csview_api.md index 543173d3..8e665558 100644 --- a/docs/csview_api.md +++ b/docs/csview_api.md @@ -57,10 +57,14 @@ void csview_next(csview_iter* it); // NB: UTF8 #### UTF8 methods ``` -bool csview_valid_u8(csview sv); size_t csview_size_u8(csview sv); csview csview_substr_u8(csview sv, size_t u8pos, size_t u8len); +// require linking with src/utf8code.c: +bool csview_valid_u8(csview sv); +int csview_icmp(const csview* x, const csview* y); + +// from utf8.h/utf8code.c: bool utf8_valid(const char* s); bool utf8_valid_n(const char* s, size_t n); size_t utf8_size(const char *s); @@ -68,10 +72,8 @@ size_t utf8_size_n(const char *s, size_t n); // number of const char* utf8_at(const char *s, size_t index); // from UTF8 index to char* position size_t utf8_pos(const char* s, size_t index); // from UTF8 index to byte index position unsigned utf8_codep_size(const char* s); // 0-4 (0 if s[0] is illegal utf8) -void utf8_peek(utf8_decode_t* d, const char *s); // next codepoint as uint32_t -uint32_t utf8_decode(utf8_decode_t *d, uint8_t byte, // d holds state, size and unicode point - const uint32_t byte); // decode next utf8 codepoint. -unsigned utf8_encode(char *out, uint32_t cp); // encode unicode cp into out +uint32_t utf8_decode(utf8_decode_t *d, uint8_t byte); // decode next byte to utf8, return state. +unsigned utf8_encode(char *out, uint32_t cp); // encode unicode cp into out buffer ``` #### Extended cstr methods |
