From f78823c591a73e7aa58115b4ffa618fe477ca0e2 Mon Sep 17 00:00:00 2001 From: Tyge Løvset Date: Thu, 13 Jan 2022 23:16:41 +0100 Subject: Changed cstr_replace_v() and added utf8_substr(). Moved/changed some code from csview.h. csview.h now include utf8.h --- include/stc/csview.h | 19 +++++++++++++++++-- include/stc/utf8.h | 53 ++++++++++++++++++++-------------------------------- 2 files changed, 37 insertions(+), 35 deletions(-) diff --git a/include/stc/csview.h b/include/stc/csview.h index c8c80b4d..58a7f4f1 100644 --- a/include/stc/csview.h +++ b/include/stc/csview.h @@ -24,6 +24,7 @@ #define CSVIEW_H_INCLUDED #include "ccommon.h" +#include "utf8.h" typedef struct csview { const char* str; size_t size; } csview; typedef struct csview_iter { const char *ref; } csview_iter; @@ -73,6 +74,20 @@ STC_INLINE csview_iter csview_end(const csview* self) { return c_make(csview_iter){self->str + self->size}; } STC_INLINE void csview_next(csview_iter* it) { ++it->ref; } +/* utf8 */ +STC_INLINE bool csview_valid_utf8(csview sv) + { return utf8_valid(sv.str); } + +STC_INLINE size_t csview_size_utf8(csview sv) + { return utf8_size(sv.str); } + +STC_INLINE csview utf8_substr(const char* str, size_t pos, size_t n) { + csview sv; + sv.str = utf8_at(str, pos); + sv.size = utf8_at(sv.str, n) - sv.str; + return sv; +} + /* csview interaction with cstr: */ #ifdef CSTR_H_INCLUDED @@ -96,8 +111,8 @@ STC_INLINE cstr* cstr_append_v(cstr* self, csview sv) { return cstr_append_n(self, sv.str, sv.size); } STC_INLINE void cstr_insert_v(cstr* self, size_t pos, csview sv) { cstr_replace_n(self, pos, 0, sv.str, sv.size); } -STC_INLINE void cstr_replace_v(cstr* self, size_t pos, size_t len, csview sv) - { cstr_replace_n(self, pos, len, sv.str, sv.size); } +STC_INLINE void cstr_replace_v(cstr* self, csview sub, csview with) + { cstr_replace_n(self, sub.str - self->str, sub.size, with.str, with.size); } STC_INLINE bool cstr_equals_v(cstr s, csview sv) { return sv.size == cstr_size(s) && !memcmp(s.str, sv.str, sv.size); } STC_INLINE size_t cstr_find_v(cstr s, csview needle) diff --git a/include/stc/utf8.h b/include/stc/utf8.h index 0455a0c5..9a65906d 100644 --- a/include/stc/utf8.h +++ b/include/stc/utf8.h @@ -1,6 +1,21 @@ #ifndef UTF8_H_INCLUDED #define UTF8_H_INCLUDED +/* +// Example: +#include +#include +int main() +{ + c_auto (cstr, s1) { + s1 = cstr_new("hell😀 world"); + cstr_replace_v(&s1, utf8_substr(s1.str, 4, 1), c_sv("x")); + printf("%s\n", s1.str); + } +} +// Output: +// Hellx world +*/ #include "ccommon.h" #include @@ -27,7 +42,7 @@ STC_INLINE uint32_t utf8_peek(const char *s) return codepoint; } -STC_INLINE int utf8_codepoint_width(char c) +STC_INLINE int utf8_codepoint_size(char c) { uint8_t u = (uint8_t)c; int ret = (u & 0xF0) == 0xE0; @@ -40,41 +55,13 @@ STC_INLINE int utf8_codepoint_width(char c) STC_INLINE const char *utf8_next(const char *s) { - const char* t = s + utf8_codepoint_width(s[0]); + const char* t = s + utf8_codepoint_size(s[0]); uintptr_t p = (uintptr_t)t; p &= (uintptr_t) -(*s != 0); return (const char *)p; } -#ifdef CSVIEW_H_INCLUDED -STC_INLINE bool csview_valid_utf8(csview sv) - { return utf8_valid(sv.str); } - -STC_INLINE size_t csview_size_utf8(csview sv) - { return utf8_size(sv.str); } - -STC_INLINE csview csview_substr_utf8(csview sv, size_t pos, size_t n) { - sv.str = utf8_at(sv.str, pos); - sv.size = utf8_at(sv.str, n) - sv.str; - return sv; -} -#endif - -#ifdef CSTR_H_INCLUDED -STC_INLINE bool cstr_valid_utf8(cstr s) - { return utf8_valid(cstr_str(&s)); } - -STC_INLINE size_t cstr_size_utf8(cstr s) - { return utf8_size(cstr_str(&s)); } - -STC_INLINE csview cstr_substr_utf8(cstr s, size_t pos, size_t n) { - csview sv = {utf8_at(cstr_str(&s), pos)}; - sv.size = utf8_at(sv.str, n) - sv.str; - return sv; -} -#endif - // --------------------------- IMPLEMENTATION --------------------------------- // Copyright (c) 2008-2009 Bjoern Hoehrmann // See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. @@ -135,9 +122,9 @@ STC_DEF size_t utf8_size_n(const char *s, size_t n) STC_DEF const char* utf8_at(const char *s, size_t index) { uint32_t state = 0, codepoint; - - for (size_t size = 0; (size < index) & (*s != 0); ++s) - size += !utf8_decode(&state, &codepoint, (uint8_t)*s); + + for (size_t k = 0; (k < index) & (*s != 0); ++s) + k += !utf8_decode(&state, &codepoint, (uint8_t)*s); return s; } -- cgit v1.2.3