summaryrefslogtreecommitdiffhomepage
path: root/docs
diff options
context:
space:
mode:
authorTyge Løvset <[email protected]>2022-06-01 09:20:25 +0200
committerTyge Løvset <[email protected]>2022-06-01 09:20:25 +0200
commit073854ba3d9dcdc75fff138c489819c30806508e (patch)
tree6b95ef688c8d603bd02da3baffa242900b03c682 /docs
parent22c20b522fcc9cc0743ad04fe6c3203c7a778401 (diff)
downloadSTC-modified-073854ba3d9dcdc75fff138c489819c30806508e.tar.gz
STC-modified-073854ba3d9dcdc75fff138c489819c30806508e.zip
Added some docs on utf8. API may change for cstr: to take const cstr* args instead of cstr values.
Diffstat (limited to 'docs')
-rw-r--r--docs/cstr_api.md18
-rw-r--r--docs/csview_api.md12
2 files changed, 17 insertions, 13 deletions
diff --git a/docs/cstr_api.md b/docs/cstr_api.md
index a8fd7e73..24f51d32 100644
--- a/docs/cstr_api.md
+++ b/docs/cstr_api.md
@@ -38,20 +38,22 @@ size_t cstr_length(cstr s);
size_t cstr_capacity(cstr s);
bool cstr_empty(cstr s);
-// utf8:
-size_t cstr_size_u8(cstr s); // utf8 size
+// utf8 encoded strings:
+size_t cstr_size_u8(cstr s); // number of utf8 codepoints
size_t cstr_size_n_u8(cstr s, size_t nbytes); // utf8 size within n bytes
-csview cstr_at(const cstr* self, size_t bytepos); // utf8 character as a csview
-csview cstr_at_u8(const cstr* self, size_t u8idx); // utf8 character at utf8 pos
+csview cstr_at(const cstr* self, size_t bytepos); // utf8 codepoints as a csview
+csview cstr_at_u8(const cstr* self, size_t u8idx); // utf8 codepoints at utf8 pos
size_t cstr_pos_u8(const cstr* self, size_t u8idx); // byte position at utf8 index
-
-// utf8: requires linking with src/utf8code.c
+// utf8 functions requires linking with src/utf8code.c:
+bool cstr_valid_u8(const cstr* self); // check if str is valid utf8
cstr cstr_tolower(const cstr* self); // returns new lowercase utf8 cstr
cstr cstr_toupper(const cstr* self); // returns new uppercase utf8 cstr
void cstr_lowercase(cstr* self); // transform cstr to lowercase utf8
void cstr_uppercase(cstr* self); // transform cstr to uppercase utf8
-bool cstr_valid_u8(const cstr* self); // check if str is valid utf8
-utf8_decode_t cstr_peek(const cstr* self, size_t bytepos); // get next unicode point at string pos
+bool cstr_iequals(cstr s, const char* str); // case-insensitive comparison
+bool cstr_istarts_with(cstr s, const char* str); // "
+bool cstr_iends_with(cstr s, const char* str); // "
+int cstr_icmp(const cstr* s1, const cstr* s2); // "
size_t cstr_reserve(cstr* self, size_t capacity);
void cstr_resize(cstr* self, size_t len, char fill);
diff --git a/docs/csview_api.md b/docs/csview_api.md
index 543173d3..8e665558 100644
--- a/docs/csview_api.md
+++ b/docs/csview_api.md
@@ -57,10 +57,14 @@ void csview_next(csview_iter* it); // NB: UTF8
#### UTF8 methods
```
-bool csview_valid_u8(csview sv);
size_t csview_size_u8(csview sv);
csview csview_substr_u8(csview sv, size_t u8pos, size_t u8len);
+// require linking with src/utf8code.c:
+bool csview_valid_u8(csview sv);
+int csview_icmp(const csview* x, const csview* y);
+
+// from utf8.h/utf8code.c:
bool utf8_valid(const char* s);
bool utf8_valid_n(const char* s, size_t n);
size_t utf8_size(const char *s);
@@ -68,10 +72,8 @@ size_t utf8_size_n(const char *s, size_t n); // number of
const char* utf8_at(const char *s, size_t index); // from UTF8 index to char* position
size_t utf8_pos(const char* s, size_t index); // from UTF8 index to byte index position
unsigned utf8_codep_size(const char* s); // 0-4 (0 if s[0] is illegal utf8)
-void utf8_peek(utf8_decode_t* d, const char *s); // next codepoint as uint32_t
-uint32_t utf8_decode(utf8_decode_t *d, uint8_t byte, // d holds state, size and unicode point
- const uint32_t byte); // decode next utf8 codepoint.
-unsigned utf8_encode(char *out, uint32_t cp); // encode unicode cp into out
+uint32_t utf8_decode(utf8_decode_t *d, uint8_t byte); // decode next byte to utf8, return state.
+unsigned utf8_encode(char *out, uint32_t cp); // encode unicode cp into out buffer
```
#### Extended cstr methods