Some changes to utf8 cstr methods.

author: Tyge Løvset <[email protected]> 2022-06-10 08:41:01 +0200
committer: Tyge Løvset <[email protected]> 2022-06-10 08:41:01 +0200
commit: f1d09dfcc7570e69eb6e9688b736f7b031b22b2d (patch)
tree: 461d077630b735851ce901ad90e70e4b299ffe98
parent: d463acdbee5bb3a9509cb8414602f495408583b4 (diff)
download: STC-modified-f1d09dfcc7570e69eb6e9688b736f7b031b22b2d.tar.gz
STC-modified-f1d09dfcc7570e69eb6e9688b736f7b031b22b2d.zip
3 files changed, 15 insertions, 22 deletions
diff --git a/docs/cstr_api.md b/docs/cstr_api.md
index 8dd1b397..cfc807f6 100644
--- a/docs/cstr_api.md
+++ b/docs/cstr_api.md
@@ -85,11 +85,10 @@ bool         cstr_getdelim(cstr *self, int delim, FILE *stream);      // does no
 
 #### UTF8 methods
 ```c
-size_t       cstr_size_u8(cstr s);                                    // number of utf8 codepoints
-size_t       cstr_size_n_u8(cstr s, size_t nbytes);                   // utf8 size within n bytes  
-csview       cstr_at(const cstr* self, size_t bytepos);               // utf8 codepoints as a csview
-csview       cstr_at_u8(const cstr* self, size_t u8idx);              // utf8 codepoints at utf8 pos
-size_t       cstr_pos_u8(const cstr* self, size_t u8idx);             // byte position at utf8 index
+size_t       cstr_u8size(cstr s);                                     // number of utf8 codepoints
+size_t       cstr_u8size_n(cstr s, size_t nbytes);                    // utf8 size within n bytes  
+const char*  cstr_at(const cstr* self, size_t u8idx);                 // byte position at utf8 index
+csview       cstr_view_at(const cstr* self, size_t u8idx);            // utf8 codepoint at utf8 pos as csview
 
 // iterate utf8 codepoints
 cstr_iter    cstr_begin(const cstr* self);
@@ -97,7 +96,7 @@ cstr_iter    cstr_end(const cstr* self);
 void         cstr_next(cstr_iter* it);
 
 // utf8 functions requires linking with src/utf8code.c symbols:
-bool         cstr_valid_u8(const cstr* self);                         // check if str is valid utf8
+bool         cstr_valid_utf8(const cstr* self);                       // check if str is valid utf8
 cstr         cstr_tolower(const cstr* self);                          // returns new lowercase utf8 cstr
 cstr         cstr_toupper(const cstr* self);                          // returns new uppercase utf8 cstr
 void         cstr_lowercase(cstr* self);                              // transform cstr to lowercase utf8
diff --git a/include/stc/cstr.h b/include/stc/cstr.h
index b0fa276f..2dc2ccae 100644
--- a/include/stc/cstr.h
+++ b/include/stc/cstr.h
@@ -178,33 +178,26 @@ extern void cstr_foldcase(cstr* self);
 extern void cstr_lowercase(cstr* self);
 extern void cstr_uppercase(cstr* self);
 
-STC_INLINE bool cstr_valid_u8(const cstr* self) 
+STC_INLINE bool cstr_valid_utf8(const cstr* self)
     { return utf8_valid(cstr_str(self)); }
 
 // other utf8 
 
-STC_INLINE size_t cstr_size_u8(cstr s) 
+STC_INLINE size_t cstr_u8size(cstr s) 
     { return utf8_size(cstr_str(&s)); }
 
-STC_INLINE size_t cstr_size_n_u8(cstr s, size_t nbytes) 
+STC_INLINE size_t cstr_u8size_n(cstr s, size_t nbytes) 
     { return utf8_size_n(cstr_str(&s), nbytes); }
 
-STC_INLINE csview cstr_at(const cstr* self, size_t bytepos) {
-    csview sv = cstr_sv(self);
-    sv.str += bytepos;
-    sv.size = utf8_codep_size(sv.str);
-    return sv;
-}
-
-STC_INLINE csview cstr_at_u8(const cstr* self, size_t u8idx) {
+STC_INLINE csview cstr_view_at(const cstr* self, size_t u8idx) {
     csview sv = cstr_sv(self);
     sv.str = utf8_at(sv.str, u8idx);
     sv.size = utf8_codep_size(sv.str);
     return sv;
 }
 
-STC_INLINE size_t cstr_pos_u8(const cstr* self, size_t u8idx) 
-    { return utf8_pos(cstr_str(self), u8idx); }
+STC_INLINE const char* cstr_at(const cstr* self, size_t u8idx) 
+    { return utf8_at(cstr_str(self), u8idx); }
 
 // utf8 iterator
 
diff --git a/src/utf8code.c b/src/utf8code.c
index 1c1e4336..a6ecdb65 100644
--- a/src/utf8code.c
+++ b/src/utf8code.c
@@ -46,10 +46,11 @@ unsigned utf8_encode(char *out, uint32_t c)
 }
 
 uint32_t utf8_peek(const char* s, int pos) {
-    int inc = 1;
-    if (pos < 0) pos = -pos, inc = -1;
+    int inc = -1;
+    if (pos > 0)
+        pos = -pos, inc = 1;
     while (pos)
-        pos -= (*(s += inc) & 0xC0) != 0x80;
+        pos += (*(s += inc) & 0xC0) != 0x80;
     utf8_decode_t d = {.state=0};
     do { utf8_decode(&d, (uint8_t)*s++); } while (d.state);
     return d.codep;
author	Tyge Løvset <[email protected]>	2022-06-10 08:41:01 +0200
committer	Tyge Løvset <[email protected]>	2022-06-10 08:41:01 +0200
commit	f1d09dfcc7570e69eb6e9688b736f7b031b22b2d (patch)
tree	461d077630b735851ce901ad90e70e4b299ffe98
parent	d463acdbee5bb3a9509cb8414602f495408583b4 (diff)
download	STC-modified-f1d09dfcc7570e69eb6e9688b736f7b031b22b2d.tar.gz STC-modified-f1d09dfcc7570e69eb6e9688b736f7b031b22b2d.zip