diff options
| author | Tyge Løvset <[email protected]> | 2022-01-14 08:16:52 +0100 |
|---|---|---|
| committer | Tyge Løvset <[email protected]> | 2022-01-14 08:16:52 +0100 |
| commit | 379c01f19e186dbd9889bc9dd43d6d867a2a33be (patch) | |
| tree | 85f38eb2439b5b8f7897979b5d5286a583ba994f /include/stc | |
| parent | f78823c591a73e7aa58115b4ffa618fe477ca0e2 (diff) | |
| download | STC-modified-379c01f19e186dbd9889bc9dd43d6d867a2a33be.tar.gz STC-modified-379c01f19e186dbd9889bc9dd43d6d867a2a33be.zip | |
Removed cstr iter; => csview iter is now utf8 iter. See utf8.h example.
Diffstat (limited to 'include/stc')
| -rw-r--r-- | include/stc/cstr.h | 6 | ||||
| -rw-r--r-- | include/stc/csview.h | 7 | ||||
| -rw-r--r-- | include/stc/utf8.h | 34 |
3 files changed, 23 insertions, 24 deletions
diff --git a/include/stc/cstr.h b/include/stc/cstr.h index 7ba20d10..04592b69 100644 --- a/include/stc/cstr.h +++ b/include/stc/cstr.h @@ -31,7 +31,6 @@ #include <ctype.h>
typedef struct cstr { char* str; } cstr;
-typedef struct cstr_iter { char *ref; } cstr_iter;
typedef char cstr_value;
#define cstr_npos (SIZE_MAX >> 1)
@@ -110,11 +109,6 @@ STC_INLINE void cstr_erase(cstr* self, const size_t pos) STC_INLINE char* cstr_front(cstr* self) { return self->str; }
STC_INLINE char* cstr_back(cstr* self)
{ return self->str + _cstr_rep(self)->size - 1; }
-STC_INLINE cstr_iter cstr_begin(cstr* self)
- { return c_make(cstr_iter){self->str}; }
-STC_INLINE cstr_iter cstr_end(cstr* self)
- { return c_make(cstr_iter){self->str + _cstr_rep(self)->size}; }
-STC_INLINE void cstr_next(cstr_iter* it) {++it->ref; }
STC_INLINE bool cstr_equals(cstr s, const char* str)
{ return strcmp(s.str, str) == 0; }
STC_INLINE bool cstr_equals_s(cstr s1, cstr s2)
diff --git a/include/stc/csview.h b/include/stc/csview.h index 58a7f4f1..d8bb567e 100644 --- a/include/stc/csview.h +++ b/include/stc/csview.h @@ -27,7 +27,7 @@ #include "utf8.h"
typedef struct csview { const char* str; size_t size; } csview;
-typedef struct csview_iter { const char *ref; } csview_iter;
+typedef union csview_iter { const char *ref; csview cp; } csview_iter;
typedef char csview_value;
#define csview_null c_make(csview){"", 0}
@@ -69,10 +69,11 @@ STC_INLINE bool csview_ends_with(csview sv, csview sub) { if (sub.size > sv.size) return false;
return !memcmp(sv.str + sv.size - sub.size, sub.str, sub.size); }
STC_INLINE csview_iter csview_begin(const csview* self)
- { return c_make(csview_iter){self->str}; }
+ { return c_make(csview_iter){.cp = {self->str, utf8_codepoint_size(*self->str)}}; }
STC_INLINE csview_iter csview_end(const csview* self)
{ return c_make(csview_iter){self->str + self->size}; }
-STC_INLINE void csview_next(csview_iter* it) { ++it->ref; }
+STC_INLINE void csview_next(csview_iter* it)
+ { it->ref += it->cp.size; it->cp.size = utf8_codepoint_size(*it->ref); }
/* utf8 */
STC_INLINE bool csview_valid_utf8(csview sv)
diff --git a/include/stc/utf8.h b/include/stc/utf8.h index 9a65906d..a064b906 100644 --- a/include/stc/utf8.h +++ b/include/stc/utf8.h @@ -8,13 +8,18 @@ int main()
{
c_auto (cstr, s1) {
- s1 = cstr_new("hell😀 world");
- cstr_replace_v(&s1, utf8_substr(s1.str, 4, 1), c_sv("x"));
+ s1 = cstr_new("hell😀 w😀rld");
+ cstr_replace_v(&s1, utf8_substr(s1.str, 7, 1), c_sv("x"));
printf("%s\n", s1.str);
+
+ csview sv = csview_from_s(s1);
+ c_foreach (i, csview, sv)
+ printf(c_PRIsv ",", c_ARGsv(i.cp));
}
}
// Output:
-// Hellx world
+// hell😀 wxrld
+// h,e,l,l,😀, ,w,x,r,l,d,
*/
#include "ccommon.h"
#include <ctype.h>
@@ -32,20 +37,21 @@ STC_API const char* utf8_at(const char *s, size_t index); /* decode next utf8 codepoint. */
STC_API uint32_t utf8_decode(uint32_t *state, uint32_t *codep, const uint32_t byte);
-STC_INLINE bool utf8_valid(const char* str)
- { return utf8_size(str) != SIZE_MAX; }
+STC_INLINE size_t utf8_pos(const char* s, size_t index)
+ { return utf8_at(s, index) - s; }
-STC_INLINE uint32_t utf8_peek(const char *s)
-{
+STC_INLINE bool utf8_valid(const char* s)
+ { return utf8_size(s) != SIZE_MAX; }
+
+STC_INLINE uint32_t utf8_peek(const char *s) {
uint32_t state = 0, codepoint;
- utf8_decode(&state, &codepoint, (uint8_t)s[0]);
+ utf8_decode(&state, &codepoint, (uint8_t)*s);
return codepoint;
}
-STC_INLINE int utf8_codepoint_size(char c)
-{
+STC_INLINE size_t utf8_codepoint_size(char c) {
uint8_t u = (uint8_t)c;
- int ret = (u & 0xF0) == 0xE0;
+ size_t ret = (u & 0xF0) == 0xE0;
ret += ret << 1; // 3
ret |= u < 0x80; // 1
ret |= ((0xC1 < u) & (u < 0xE0)) << 1; // 2
@@ -53,9 +59,8 @@ STC_INLINE int utf8_codepoint_size(char c) return ret;
}
-STC_INLINE const char *utf8_next(const char *s)
-{
- const char* t = s + utf8_codepoint_size(s[0]);
+STC_INLINE const char *utf8_next(const char *s) {
+ const char* t = s + utf8_codepoint_size(*s);
uintptr_t p = (uintptr_t)t;
p &= (uintptr_t) -(*s != 0);
@@ -128,7 +133,6 @@ STC_DEF const char* utf8_at(const char *s, size_t index) return s;
}
-
#endif
#endif
#undef i_opt
|
