summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorTyge Løvset <[email protected]>2022-06-10 08:41:01 +0200
committerTyge Løvset <[email protected]>2022-06-10 08:41:01 +0200
commitf1d09dfcc7570e69eb6e9688b736f7b031b22b2d (patch)
tree461d077630b735851ce901ad90e70e4b299ffe98
parentd463acdbee5bb3a9509cb8414602f495408583b4 (diff)
downloadSTC-modified-f1d09dfcc7570e69eb6e9688b736f7b031b22b2d.tar.gz
STC-modified-f1d09dfcc7570e69eb6e9688b736f7b031b22b2d.zip
Some changes to utf8 cstr methods.
-rw-r--r--docs/cstr_api.md11
-rw-r--r--include/stc/cstr.h19
-rw-r--r--src/utf8code.c7
3 files changed, 15 insertions, 22 deletions
diff --git a/docs/cstr_api.md b/docs/cstr_api.md
index 8dd1b397..cfc807f6 100644
--- a/docs/cstr_api.md
+++ b/docs/cstr_api.md
@@ -85,11 +85,10 @@ bool cstr_getdelim(cstr *self, int delim, FILE *stream); // does no
#### UTF8 methods
```c
-size_t cstr_size_u8(cstr s); // number of utf8 codepoints
-size_t cstr_size_n_u8(cstr s, size_t nbytes); // utf8 size within n bytes
-csview cstr_at(const cstr* self, size_t bytepos); // utf8 codepoints as a csview
-csview cstr_at_u8(const cstr* self, size_t u8idx); // utf8 codepoints at utf8 pos
-size_t cstr_pos_u8(const cstr* self, size_t u8idx); // byte position at utf8 index
+size_t cstr_u8size(cstr s); // number of utf8 codepoints
+size_t cstr_u8size_n(cstr s, size_t nbytes); // utf8 size within n bytes
+const char* cstr_at(const cstr* self, size_t u8idx); // byte position at utf8 index
+csview cstr_view_at(const cstr* self, size_t u8idx); // utf8 codepoint at utf8 pos as csview
// iterate utf8 codepoints
cstr_iter cstr_begin(const cstr* self);
@@ -97,7 +96,7 @@ cstr_iter cstr_end(const cstr* self);
void cstr_next(cstr_iter* it);
// utf8 functions requires linking with src/utf8code.c symbols:
-bool cstr_valid_u8(const cstr* self); // check if str is valid utf8
+bool cstr_valid_utf8(const cstr* self); // check if str is valid utf8
cstr cstr_tolower(const cstr* self); // returns new lowercase utf8 cstr
cstr cstr_toupper(const cstr* self); // returns new uppercase utf8 cstr
void cstr_lowercase(cstr* self); // transform cstr to lowercase utf8
diff --git a/include/stc/cstr.h b/include/stc/cstr.h
index b0fa276f..2dc2ccae 100644
--- a/include/stc/cstr.h
+++ b/include/stc/cstr.h
@@ -178,33 +178,26 @@ extern void cstr_foldcase(cstr* self);
extern void cstr_lowercase(cstr* self);
extern void cstr_uppercase(cstr* self);
-STC_INLINE bool cstr_valid_u8(const cstr* self)
+STC_INLINE bool cstr_valid_utf8(const cstr* self)
{ return utf8_valid(cstr_str(self)); }
// other utf8
-STC_INLINE size_t cstr_size_u8(cstr s)
+STC_INLINE size_t cstr_u8size(cstr s)
{ return utf8_size(cstr_str(&s)); }
-STC_INLINE size_t cstr_size_n_u8(cstr s, size_t nbytes)
+STC_INLINE size_t cstr_u8size_n(cstr s, size_t nbytes)
{ return utf8_size_n(cstr_str(&s), nbytes); }
-STC_INLINE csview cstr_at(const cstr* self, size_t bytepos) {
- csview sv = cstr_sv(self);
- sv.str += bytepos;
- sv.size = utf8_codep_size(sv.str);
- return sv;
-}
-
-STC_INLINE csview cstr_at_u8(const cstr* self, size_t u8idx) {
+STC_INLINE csview cstr_view_at(const cstr* self, size_t u8idx) {
csview sv = cstr_sv(self);
sv.str = utf8_at(sv.str, u8idx);
sv.size = utf8_codep_size(sv.str);
return sv;
}
-STC_INLINE size_t cstr_pos_u8(const cstr* self, size_t u8idx)
- { return utf8_pos(cstr_str(self), u8idx); }
+STC_INLINE const char* cstr_at(const cstr* self, size_t u8idx)
+ { return utf8_at(cstr_str(self), u8idx); }
// utf8 iterator
diff --git a/src/utf8code.c b/src/utf8code.c
index 1c1e4336..a6ecdb65 100644
--- a/src/utf8code.c
+++ b/src/utf8code.c
@@ -46,10 +46,11 @@ unsigned utf8_encode(char *out, uint32_t c)
}
uint32_t utf8_peek(const char* s, int pos) {
- int inc = 1;
- if (pos < 0) pos = -pos, inc = -1;
+ int inc = -1;
+ if (pos > 0)
+ pos = -pos, inc = 1;
while (pos)
- pos -= (*(s += inc) & 0xC0) != 0x80;
+ pos += (*(s += inc) & 0xC0) != 0x80;
utf8_decode_t d = {.state=0};
do { utf8_decode(&d, (uint8_t)*s++); } while (d.state);
return d.codep;