Added some docs on utf8. API may change for cstr: to take const cstr* args instead of cstr values.

author: Tyge Løvset <[email protected]> 2022-06-01 09:20:25 +0200
committer: Tyge Løvset <[email protected]> 2022-06-01 09:20:25 +0200
commit: 073854ba3d9dcdc75fff138c489819c30806508e (patch)
tree: 6b95ef688c8d603bd02da3baffa242900b03c682 /docs
parent: 22c20b522fcc9cc0743ad04fe6c3203c7a778401 (diff)
download: STC-modified-073854ba3d9dcdc75fff138c489819c30806508e.tar.gz
STC-modified-073854ba3d9dcdc75fff138c489819c30806508e.zip
2 files changed, 17 insertions, 13 deletions
diff --git a/docs/cstr_api.md b/docs/cstr_api.md
index a8fd7e73..24f51d32 100644
--- a/docs/cstr_api.md
+++ b/docs/cstr_api.md
@@ -38,20 +38,22 @@ size_t       cstr_length(cstr s);
 size_t       cstr_capacity(cstr s);
 bool         cstr_empty(cstr s);
 
-// utf8:
-size_t       cstr_size_u8(cstr s);                                    // utf8 size
+// utf8 encoded strings: 
+size_t       cstr_size_u8(cstr s);                                    // number of utf8 codepoints
 size_t       cstr_size_n_u8(cstr s, size_t nbytes);                   // utf8 size within n bytes  
-csview       cstr_at(const cstr* self, size_t bytepos);               // utf8 character as a csview
-csview       cstr_at_u8(const cstr* self, size_t u8idx);              // utf8 character at utf8 pos
+csview       cstr_at(const cstr* self, size_t bytepos);               // utf8 codepoints as a csview
+csview       cstr_at_u8(const cstr* self, size_t u8idx);              // utf8 codepoints at utf8 pos
 size_t       cstr_pos_u8(const cstr* self, size_t u8idx);             // byte position at utf8 index
-
-// utf8: requires linking with src/utf8code.c
+// utf8 functions requires linking with src/utf8code.c:
+bool         cstr_valid_u8(const cstr* self);                         // check if str is valid utf8
 cstr         cstr_tolower(const cstr* self);                          // returns new lowercase utf8 cstr
 cstr         cstr_toupper(const cstr* self);                          // returns new uppercase utf8 cstr
 void         cstr_lowercase(cstr* self);                              // transform cstr to lowercase utf8
 void         cstr_uppercase(cstr* self);                              // transform cstr to uppercase utf8
-bool         cstr_valid_u8(const cstr* self);                         // check if str is valid utf8
-utf8_decode_t cstr_peek(const cstr* self, size_t bytepos);            // get next unicode point at string pos
+bool         cstr_iequals(cstr s, const char* str);                   // case-insensitive comparison
+bool         cstr_istarts_with(cstr s, const char* str);              //   "
+bool         cstr_iends_with(cstr s, const char* str);                //   "
+int          cstr_icmp(const cstr* s1, const cstr* s2);               //   "
 
 size_t       cstr_reserve(cstr* self, size_t capacity);
 void         cstr_resize(cstr* self, size_t len, char fill);
diff --git a/docs/csview_api.md b/docs/csview_api.md
index 543173d3..8e665558 100644
--- a/docs/csview_api.md
+++ b/docs/csview_api.md
@@ -57,10 +57,14 @@ void            csview_next(csview_iter* it);                       // NB: UTF8
 
 #### UTF8 methods
 ```
-bool            csview_valid_u8(csview sv);
 size_t          csview_size_u8(csview sv);
 csview          csview_substr_u8(csview sv, size_t u8pos, size_t u8len);
 
+// require linking with src/utf8code.c:
+bool            csview_valid_u8(csview sv);
+int             csview_icmp(const csview* x, const csview* y);
+
+// from utf8.h/utf8code.c:
 bool            utf8_valid(const char* s);
 bool            utf8_valid_n(const char* s, size_t n);
 size_t          utf8_size(const char *s);
@@ -68,10 +72,8 @@ size_t          utf8_size_n(const char *s, size_t n);               // number of
 const char*     utf8_at(const char *s, size_t index);               // from UTF8 index to char* position
 size_t          utf8_pos(const char* s, size_t index);              // from UTF8 index to byte index position
 unsigned        utf8_codep_size(const char* s);                     // 0-4 (0 if s[0] is illegal utf8)
-void            utf8_peek(utf8_decode_t* d, const char *s);         // next codepoint as uint32_t
-uint32_t        utf8_decode(utf8_decode_t *d, uint8_t byte,         // d holds state, size and unicode point
-                            const uint32_t byte);                   // decode next utf8 codepoint.
-unsigned        utf8_encode(char *out, uint32_t cp);                // encode unicode cp into out
+uint32_t        utf8_decode(utf8_decode_t *d, uint8_t byte);        // decode next byte to utf8, return state.
+unsigned        utf8_encode(char *out, uint32_t cp);                // encode unicode cp into out buffer
 ```
 
 #### Extended cstr methods
author	Tyge Løvset <[email protected]>	2022-06-01 09:20:25 +0200
committer	Tyge Løvset <[email protected]>	2022-06-01 09:20:25 +0200
commit	073854ba3d9dcdc75fff138c489819c30806508e (patch)
tree	6b95ef688c8d603bd02da3baffa242900b03c682 /docs
parent	22c20b522fcc9cc0743ad04fe6c3203c7a778401 (diff)
download	STC-modified-073854ba3d9dcdc75fff138c489819c30806508e.tar.gz STC-modified-073854ba3d9dcdc75fff138c489819c30806508e.zip