summaryrefslogtreecommitdiffhomepage
path: root/include
diff options
context:
space:
mode:
authorTyge Løvset <[email protected]>2022-06-10 11:29:17 +0200
committerTyge Løvset <[email protected]>2022-06-10 11:29:17 +0200
commit8883fc8108428878d3d6291ba8981cf6df72499c (patch)
tree9fbdc79019501714dc984c1fbd5eb2c7ea979bb4 /include
parentf1d09dfcc7570e69eb6e9688b736f7b031b22b2d (diff)
downloadSTC-modified-8883fc8108428878d3d6291ba8981cf6df72499c.tar.gz
STC-modified-8883fc8108428878d3d6291ba8981cf6df72499c.zip
utf8 fixes and improvements. Some api changes.
Diffstat (limited to 'include')
-rw-r--r--include/stc/cstr.h17
-rw-r--r--include/stc/csview.h10
-rw-r--r--include/stc/utf8.h13
3 files changed, 22 insertions, 18 deletions
diff --git a/include/stc/cstr.h b/include/stc/cstr.h
index 2dc2ccae..41db1cd3 100644
--- a/include/stc/cstr.h
+++ b/include/stc/cstr.h
@@ -189,21 +189,24 @@ STC_INLINE size_t cstr_u8size(cstr s)
STC_INLINE size_t cstr_u8size_n(cstr s, size_t nbytes)
{ return utf8_size_n(cstr_str(&s), nbytes); }
-STC_INLINE csview cstr_view_at(const cstr* self, size_t u8idx) {
+STC_INLINE size_t cstr_bytepos(const cstr* self, size_t u8idx)
+ { return utf8_pos(cstr_str(self), u8idx); }
+
+STC_INLINE const char* cstr_at(const cstr* self, size_t u8idx)
+ { return utf8_at(cstr_str(self), u8idx); }
+
+STC_INLINE csview cstr_chr(const cstr* self, size_t u8idx) {
csview sv = cstr_sv(self);
sv.str = utf8_at(sv.str, u8idx);
- sv.size = utf8_codep_size(sv.str);
+ sv.size = utf8_chr_size(sv.str);
return sv;
}
-STC_INLINE const char* cstr_at(const cstr* self, size_t u8idx)
- { return utf8_at(cstr_str(self), u8idx); }
-
// utf8 iterator
STC_INLINE cstr_iter cstr_begin(const cstr* self) {
const char* str = cstr_str(self);
- return c_make(cstr_iter){.chr = {str, utf8_codep_size(str)}};
+ return c_make(cstr_iter){.chr = {str, utf8_chr_size(str)}};
}
STC_INLINE cstr_iter cstr_end(const cstr* self) {
csview sv = cstr_sv(self);
@@ -211,7 +214,7 @@ STC_INLINE cstr_iter cstr_end(const cstr* self) {
}
STC_INLINE void cstr_next(cstr_iter* it) {
it->ref += it->chr.size;
- it->chr.size = utf8_codep_size(it->ref);
+ it->chr.size = utf8_chr_size(it->ref);
}
diff --git a/include/stc/csview.h b/include/stc/csview.h
index 6cfd6e82..6d12901b 100644
--- a/include/stc/csview.h
+++ b/include/stc/csview.h
@@ -81,25 +81,25 @@ STC_INLINE csview csview_slice(csview sv, size_t p1, size_t p2) {
/* iterator */
STC_INLINE csview_iter csview_begin(const csview* self)
- { return c_make(csview_iter){.chr = {self->str, utf8_codep_size(self->str)}}; }
+ { return c_make(csview_iter){.chr = {self->str, utf8_chr_size(self->str)}}; }
STC_INLINE csview_iter csview_end(const csview* self)
{ return c_make(csview_iter){self->str + self->size}; }
STC_INLINE void csview_next(csview_iter* it)
- { it->ref += it->chr.size; it->chr.size = utf8_codep_size(it->ref); }
+ { it->ref += it->chr.size; it->chr.size = utf8_chr_size(it->ref); }
/* utf8 */
-STC_INLINE size_t csview_size_u8(csview sv)
+STC_INLINE size_t csview_u8size(csview sv)
{ return utf8_size_n(sv.str, sv.size); }
-STC_INLINE csview csview_substr_u8(csview sv, size_t u8pos, size_t u8len) {
+STC_INLINE csview csview_u8substr(csview sv, size_t u8pos, size_t u8len) {
sv.str = utf8_at(sv.str, u8pos);
sv.size = utf8_pos(sv.str, u8len);
return sv;
}
-STC_INLINE bool csview_valid_u8(csview sv) // depends on src/utf8code.c
+STC_INLINE bool csview_valid_utf8(csview sv) // depends on src/utf8code.c
{ return utf8_valid_n(sv.str, sv.size); }
diff --git a/include/stc/utf8.h b/include/stc/utf8.h
index b7edd2cb..31ea3aa9 100644
--- a/include/stc/utf8.h
+++ b/include/stc/utf8.h
@@ -37,7 +37,7 @@ uint32_t utf8_casefold(uint32_t c);
uint32_t utf8_tolower(uint32_t c);
uint32_t utf8_toupper(uint32_t c);
bool utf8_valid(const char* s);
-bool utf8_valid_n(const char* s, size_t n);
+bool utf8_valid_n(const char* s, size_t nbytes);
int utf8_icmp_n(size_t u8max, const char* s1, size_t n1,
const char* s2, size_t n2);
unsigned utf8_encode(char *out, uint32_t c);
@@ -60,7 +60,7 @@ STC_INLINE int utf8_icmp(const char* s1, const char* s2) {
}
/* number of characters in the utf8 codepoint from s */
-STC_INLINE unsigned utf8_codep_size(const char *s) {
+STC_INLINE unsigned utf8_chr_size(const char *s) {
unsigned b = (uint8_t)*s;
if (b < 0x80) return 1;
if (b < 0xC2) return 0;
@@ -74,14 +74,15 @@ STC_INLINE unsigned utf8_codep_size(const char *s) {
STC_INLINE size_t utf8_size(const char *s) {
size_t size = 0;
while (*s)
- size += (*s++ & 0xC0) != 0x80;
+ size += (*++s & 0xC0) != 0x80;
return size;
}
-STC_INLINE size_t utf8_size_n(const char *s, size_t n) {
+STC_INLINE size_t utf8_size_n(const char *s, size_t nbytes) {
size_t size = 0;
- while ((n-- != 0) & (*s != 0))
- size += (*s++ & 0xC0) != 0x80;
+ while ((nbytes-- != 0) & (*s != 0)) {
+ size += (*++s & 0xC0) != 0x80;
+ }
return size;
}