diff options
| author | Tyge Løvset <[email protected]> | 2023-08-14 16:46:24 +0200 |
|---|---|---|
| committer | Tyge Løvset <[email protected]> | 2023-08-14 16:46:24 +0200 |
| commit | 78d8668e6d527070568a405408ed906e51055bf4 (patch) | |
| tree | 60f963a36826acce264c7ecd0af3eb80502a4335 /include | |
| parent | 2b6b4785c5c26bc47d800c1a7c7a48784df2d57b (diff) | |
| download | STC-modified-78d8668e6d527070568a405408ed906e51055bf4.tar.gz STC-modified-78d8668e6d527070568a405408ed906e51055bf4.zip | |
Reverted csubstr => csview. Sorry about that!
Added crawstr to become the null-terminated string view.
Diffstat (limited to 'include')
| -rw-r--r-- | include/stc/ccommon.h | 13 | ||||
| -rw-r--r-- | include/stc/crawstr.h | 116 | ||||
| -rw-r--r-- | include/stc/cregex.h | 36 | ||||
| -rw-r--r-- | include/stc/cstr.h | 148 | ||||
| -rw-r--r-- | include/stc/csubstr.h | 211 | ||||
| -rw-r--r-- | include/stc/csview.h | 159 | ||||
| -rw-r--r-- | include/stc/forward.h | 34 | ||||
| -rw-r--r-- | include/stc/priv/template.h | 14 | ||||
| -rw-r--r-- | include/stc/utf8.h | 4 |
9 files changed, 367 insertions, 368 deletions
diff --git a/include/stc/ccommon.h b/include/stc/ccommon.h index 1b4a2277..e5422adc 100644 --- a/include/stc/ccommon.h +++ b/include/stc/ccommon.h @@ -130,14 +130,13 @@ typedef const char* ccharptr; #define ccharptr_clone(s) (s) #define ccharptr_drop(p) ((void)p) -#define c_ss(...) c_MACRO_OVERLOAD(c_ss, __VA_ARGS__) -#define c_ss_1(literal) c_ss_2(literal, c_litstrlen(literal)) -#define c_ss_2(str, n) (c_LITERAL(csubstr){str, n}) -#define c_SS(ss) (int)(ss).size, (ss).str // printf("%.*s\n", c_SS(ss)); - -#define c_sv(literal) c_sv_2(literal, c_litstrlen(literal)) +#define c_sv(...) c_MACRO_OVERLOAD(c_sv, __VA_ARGS__) +#define c_sv_1(literal) c_sv_2(literal, c_litstrlen(literal)) #define c_sv_2(str, n) (c_LITERAL(csview){str, n}) -#define c_SV(sv) c_SS(sv) // [deprecated] - unneeded +#define c_SV(ss) (int)(ss).size, (ss).str // printf("%.*s\n", c_SV(ss)); + +#define c_rs(literal) c_rs_2(literal, c_litstrlen(literal)) +#define c_rs_2(str, n) (c_LITERAL(crawstr){str, n}) #define c_ROTL(x, k) (x << (k) | x >> (8*sizeof(x) - (k))) diff --git a/include/stc/crawstr.h b/include/stc/crawstr.h new file mode 100644 index 00000000..7cf62e94 --- /dev/null +++ b/include/stc/crawstr.h @@ -0,0 +1,116 @@ +/* MIT License + * + * Copyright (c) 2023 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#define _i_inc_utf8 +#include "utf8.h" + +#ifndef CRAWSTR_H_INCLUDED +#define CRAWSTR_H_INCLUDED + +#define crawstr_init() c_rs("") +#define crawstr_drop(p) c_default_drop(p) +#define crawstr_clone(rs) c_default_clone(rs) + +STC_INLINE crawstr crawstr_from(const char* str) + { return c_rs_2(str, c_strlen(str)); } +STC_INLINE void crawstr_clear(crawstr* self) { *self = crawstr_init(); } +STC_INLINE csview crawstr_sv(crawstr rs) { return c_sv_2(rs.str, rs.size); } + +STC_INLINE intptr_t crawstr_size(crawstr rs) { return rs.size; } +STC_INLINE bool crawstr_empty(crawstr rs) { return rs.size == 0; } + +STC_INLINE bool crawstr_equals(crawstr rs, const char* str) { + intptr_t n = c_strlen(str); + return rs.size == n && !c_memcmp(rs.str, str, n); +} + +STC_INLINE intptr_t crawstr_find(crawstr rs, const char* search) { + char* res = cstrnstrn(rs.str, search, rs.size, c_strlen(search)); + return res ? (res - rs.str) : c_NPOS; +} + +STC_INLINE bool crawstr_contains(crawstr rs, const char* str) + { return crawstr_find(rs, str) != c_NPOS; } + +STC_INLINE bool crawstr_starts_with(crawstr rs, const char* str) { + intptr_t n = c_strlen(str); + return n > rs.size ? false : !c_memcmp(rs.str, str, n); +} + +STC_INLINE bool crawstr_ends_with(crawstr rs, const char* str) { + intptr_t n = c_strlen(str); + return n > rs.size ? false : !c_memcmp(rs.str + rs.size - n, str, n); +} + +/* utf8 iterator */ +STC_INLINE crawstr_iter crawstr_begin(const crawstr* self) { + if (!self->size) return c_LITERAL(crawstr_iter){.ref = NULL}; + return c_LITERAL(crawstr_iter){.u8 = {{self->str, utf8_chr_size(self->str)}}}; +} +STC_INLINE crawstr_iter crawstr_end(const crawstr* self) { + (void)self; return c_LITERAL(crawstr_iter){.ref = NULL}; +} +STC_INLINE void crawstr_next(crawstr_iter* it) { + it->ref += it->u8.chr.size; + it->u8.chr.size = utf8_chr_size(it->ref); + if (!*it->ref) it->ref = NULL; +} +STC_INLINE crawstr_iter crawstr_advance(crawstr_iter it, intptr_t pos) { + int inc = -1; + if (pos > 0) pos = -pos, inc = 1; + while (pos && *it.ref) pos += (*(it.ref += inc) & 0xC0) != 0x80; + it.u8.chr.size = utf8_chr_size(it.ref); + if (!*it.ref) it.ref = NULL; + return it; +} + +/* utf8 size */ +STC_INLINE intptr_t crawstr_u8_size(crawstr rs) + { return utf8_size_n(rs.str, rs.size); } + +/* utf8 validation: depends on src/utf8code.c */ +STC_INLINE bool crawstr_valid_utf8(crawstr rs) + { return utf8_valid_n(rs.str, rs.size); } + +/* utf8 ignore case cmp: depends on src/utf8code.c */ +STC_INLINE int crawstr_icmp(const crawstr* x, const crawstr* y) + { return utf8_icmp_sv(c_sv_2(x->str, x->size), c_sv_2(y->str, y->size)); } + + +STC_INLINE int crawstr_cmp(const crawstr* x, const crawstr* y) { + intptr_t n = x->size < y->size ? x->size : y->size; + int c = c_memcmp(x->str, y->str, n); + return c ? c : (int)(x->size - y->size); +} + +STC_INLINE bool crawstr_eq(const crawstr* x, const crawstr* y) + { return x->size == y->size && !c_memcmp(x->str, y->str, x->size); } + +STC_INLINE uint64_t crawstr_hash(const crawstr *self) + { return cfasthash(self->str, self->size); } + +#endif // CRAWSTR_H_INCLUDED +#undef i_static +#undef i_header +#undef i_implement +#undef i_import +#undef i_opt diff --git a/include/stc/cregex.h b/include/stc/cregex.h index 3aab3c8b..bce94b04 100644 --- a/include/stc/cregex.h +++ b/include/stc/cregex.h @@ -34,7 +34,7 @@ THE SOFTWARE. */ #include <stdbool.h> #include <string.h> -#include "forward.h" // csubstr +#include "forward.h" // csview #include "ccommon.h" enum { @@ -82,7 +82,7 @@ typedef struct { typedef struct { const cregex* re; const char* input; - csubstr match[CREG_MAX_CAPTURES]; + csview match[CREG_MAX_CAPTURES]; } cregex_iter; #define c_formatch(it, Re, Input) \ @@ -115,11 +115,11 @@ int cregex_captures(const cregex* re); /* return CREG_OK, CREG_NOMATCH or CREG_MATCHERROR. */ #define cregex_find(...) c_MACRO_OVERLOAD(cregex_find, __VA_ARGS__) #define cregex_find_3(re, input, match) cregex_find_4(re, input, match, CREG_DEFAULT) -int cregex_find_4(const cregex* re, const char* input, csubstr match[], int mflags); +int cregex_find_4(const cregex* re, const char* input, csview match[], int mflags); -/* find with csubstr as input. */ -STC_INLINE int cregex_find_ss(const cregex* re, csubstr input, csubstr match[]) { - csubstr *mp = NULL; +/* find with csview as input. */ +STC_INLINE int cregex_find_sv(const cregex* re, csview input, csview match[]) { + csview *mp = NULL; if (match) { match[0] = input; mp = match; } return cregex_find(re, input.str, mp, CREG_STARTEND); } @@ -129,27 +129,27 @@ STC_INLINE int cregex_find_ss(const cregex* re, csubstr input, csubstr match[]) #define cregex_find_pattern_3(pattern, input, match) \ cregex_find_pattern_4(pattern, input, match, CREG_DEFAULT) int cregex_find_pattern_4(const char* pattern, const char* input, - csubstr match[], int cmflags); + csview match[], int cmflags); STC_INLINE bool cregex_is_match(const cregex* re, const char* input) { return cregex_find_4(re, input, NULL, CREG_DEFAULT) == CREG_OK; } -/* replace csubstr input with replace using regular expression pattern */ -#define cregex_replace_ss(...) c_MACRO_OVERLOAD(cregex_replace_ss, __VA_ARGS__) -#define cregex_replace_ss_3(pattern, input, replace) \ - cregex_replace_ss_4(pattern, input, replace, INT32_MAX) -#define cregex_replace_ss_4(pattern, input, replace, count) \ - cregex_replace_ss_6(pattern, input, replace, count, NULL, CREG_DEFAULT) -cstr cregex_replace_ss_6(const cregex* re, csubstr input, const char* replace, int count, - bool (*transform)(int group, csubstr match, cstr* result), int rflags); +/* replace csview input with replace using regular expression pattern */ +#define cregex_replace_sv(...) c_MACRO_OVERLOAD(cregex_replace_sv, __VA_ARGS__) +#define cregex_replace_sv_3(pattern, input, replace) \ + cregex_replace_sv_4(pattern, input, replace, INT32_MAX) +#define cregex_replace_sv_4(pattern, input, replace, count) \ + cregex_replace_sv_6(pattern, input, replace, count, NULL, CREG_DEFAULT) +cstr cregex_replace_sv_6(const cregex* re, csview input, const char* replace, int count, + bool (*transform)(int group, csview match, cstr* result), int rflags); /* replace input with replace using regular expression */ #define cregex_replace(...) c_MACRO_OVERLOAD(cregex_replace, __VA_ARGS__) #define cregex_replace_3(re, input, replace) cregex_replace_4(re, input, replace, INT32_MAX) STC_INLINE cstr cregex_replace_4(const cregex* re, const char* input, const char* replace, int count) { - csubstr ss = {input, c_strlen(input)}; - return cregex_replace_ss_4(re, ss, replace, count); + csview sv = {input, c_strlen(input)}; + return cregex_replace_sv_4(re, sv, replace, count); } /* replace + compile RE pattern, and extra arguments */ @@ -159,7 +159,7 @@ STC_INLINE cstr cregex_replace_4(const cregex* re, const char* input, const char #define cregex_replace_pattern_4(pattern, input, replace, count) \ cregex_replace_pattern_6(pattern, input, replace, count, NULL, CREG_DEFAULT) cstr cregex_replace_pattern_6(const char* pattern, const char* input, const char* replace, int count, - bool (*transform)(int group, csubstr match, cstr* result), int crflags); + bool (*transform)(int group, csview match, cstr* result), int crflags); /* destroy regex */ void cregex_drop(cregex* re); diff --git a/include/stc/cstr.h b/include/stc/cstr.h index ce398628..7f4bad97 100644 --- a/include/stc/cstr.h +++ b/include/stc/cstr.h @@ -75,7 +75,7 @@ STC_API char* cstr_reserve(cstr* self, intptr_t cap); STC_API void cstr_shrink_to_fit(cstr* self); STC_API char* cstr_resize(cstr* self, intptr_t size, char value); STC_API intptr_t cstr_find_at(const cstr* self, intptr_t pos, const char* search); -STC_API intptr_t cstr_find_ss(const cstr* self, csubstr search); +STC_API intptr_t cstr_find_sv(const cstr* self, csview search); STC_API char* cstr_assign_n(cstr* self, const char* str, intptr_t len); STC_API char* cstr_append_n(cstr* self, const char* str, intptr_t len); STC_API char* cstr_append_uninit(cstr *self, intptr_t len); @@ -85,7 +85,7 @@ STC_API void cstr_u8_erase(cstr* self, intptr_t bytepos, intptr_t u8len); STC_API cstr cstr_from_fmt(const char* fmt, ...); STC_API intptr_t cstr_append_fmt(cstr* self, const char* fmt, ...); STC_API intptr_t cstr_printf(cstr* self, const char* fmt, ...); -STC_API cstr cstr_replace_ss(csubstr sv, csubstr search, csubstr repl, int32_t count); +STC_API cstr cstr_replace_sv(csview sv, csview search, csview repl, int32_t count); STC_API uint64_t cstr_hash(const cstr *self); STC_INLINE cstr_buf cstr_buffer(cstr* s) { @@ -93,12 +93,12 @@ STC_INLINE cstr_buf cstr_buffer(cstr* s) { ? c_LITERAL(cstr_buf){s->lon.data, cstr_l_size(s), cstr_l_cap(s)} : c_LITERAL(cstr_buf){s->sml.data, cstr_s_size(s), cstr_s_cap}; } -STC_INLINE csview cstr_sv(const cstr* s) { - return cstr_is_long(s) ? c_sv_2(s->lon.data, cstr_l_size(s)) - : c_sv_2(s->sml.data, cstr_s_size(s)); +STC_INLINE crawstr cstr_rs(const cstr* s) { + return cstr_is_long(s) ? c_rs_2(s->lon.data, cstr_l_size(s)) + : c_rs_2(s->sml.data, cstr_s_size(s)); } -STC_INLINE csubstr cstr_ss(const cstr* s) - { csview sv = cstr_sv(s); return c_ss_2(sv.str, sv.size); } +STC_INLINE csview cstr_sv(const cstr* s) + { crawstr rs = cstr_rs(s); return c_sv_2(rs.str, rs.size); } STC_INLINE cstr cstr_init(void) { return cstr_null; } @@ -112,12 +112,12 @@ STC_INLINE cstr cstr_from_n(const char* str, const intptr_t len) { STC_INLINE cstr cstr_from(const char* str) { return cstr_from_n(str, c_strlen(str)); } -STC_INLINE cstr cstr_from_ss(csubstr sv) - { return cstr_from_n(sv.str, sv.size); } - STC_INLINE cstr cstr_from_sv(csview sv) { return cstr_from_n(sv.str, sv.size); } +STC_INLINE cstr cstr_from_rs(crawstr rs) + { return cstr_from_n(rs.str, rs.size); } + STC_INLINE cstr cstr_with_size(const intptr_t size, const char value) { cstr s; c_memset(_cstr_init(&s, size, size), value, size); @@ -144,8 +144,8 @@ STC_INLINE cstr cstr_move(cstr* self) { } STC_INLINE cstr cstr_clone(cstr s) { - csview sv = cstr_sv(&s); - return cstr_from_n(sv.str, sv.size); + crawstr rs = cstr_rs(&s); + return cstr_from_n(rs.str, rs.size); } STC_INLINE void cstr_drop(cstr* self) { @@ -175,9 +175,9 @@ STC_INLINE intptr_t cstr_capacity(const cstr* self) // utf8 methods defined in/depending on src/utf8code.c: -extern cstr cstr_casefold_ss(csubstr sv); -extern cstr cstr_tolower_ss(csubstr sv); -extern cstr cstr_toupper_ss(csubstr sv); +extern cstr cstr_casefold_sv(csview sv); +extern cstr cstr_tolower_sv(csview sv); +extern cstr cstr_toupper_sv(csview sv); extern cstr cstr_tolower(const char* str); extern cstr cstr_toupper(const char* str); extern void cstr_lowercase(cstr* self); @@ -198,9 +198,9 @@ STC_INLINE intptr_t cstr_u8_to_pos(const cstr* self, intptr_t u8idx) STC_INLINE const char* cstr_u8_at(const cstr* self, intptr_t u8idx) { return utf8_at(cstr_str(self), u8idx); } -STC_INLINE csubstr cstr_u8_chr(const cstr* self, intptr_t u8idx) { +STC_INLINE csview cstr_u8_chr(const cstr* self, intptr_t u8idx) { const char* str = cstr_str(self); - csubstr sv; + csview sv; sv.str = utf8_at(str, u8idx); sv.size = utf8_chr_size(sv.str); return sv; @@ -209,9 +209,9 @@ STC_INLINE csubstr cstr_u8_chr(const cstr* self, intptr_t u8idx) { // utf8 iterator STC_INLINE cstr_iter cstr_begin(const cstr* self) { - csview sv = cstr_sv(self); - if (!sv.size) return c_LITERAL(cstr_iter){.ref = NULL}; - return c_LITERAL(cstr_iter){.u8 = {{sv.str, utf8_chr_size(sv.str)}}}; + crawstr rs = cstr_rs(self); + if (!rs.size) return c_LITERAL(cstr_iter){.ref = NULL}; + return c_LITERAL(cstr_iter){.u8 = {{rs.str, utf8_chr_size(rs.str)}}}; } STC_INLINE cstr_iter cstr_end(const cstr* self) { (void)self; return c_LITERAL(cstr_iter){NULL}; @@ -241,7 +241,7 @@ STC_INLINE int cstr_icmp(const cstr* s1, const cstr* s2) { return utf8_icmp(cstr_str(s1), cstr_str(s2)); } STC_INLINE bool cstr_eq(const cstr* s1, const cstr* s2) { - csview x = cstr_sv(s1), y = cstr_sv(s2); + crawstr x = cstr_rs(s1), y = cstr_rs(s2); return x.size == y.size && !c_memcmp(x.str, y.str, x.size); } @@ -249,7 +249,7 @@ STC_INLINE bool cstr_eq(const cstr* s1, const cstr* s2) { STC_INLINE bool cstr_equals(const cstr* self, const char* str) { return !strcmp(cstr_str(self), str); } -STC_INLINE bool cstr_equals_ss(const cstr* self, csubstr sv) +STC_INLINE bool cstr_equals_sv(const cstr* self, csview sv) { return sv.size == cstr_size(self) && !c_memcmp(cstr_str(self), sv.str, sv.size); } STC_INLINE bool cstr_equals_s(const cstr* self, cstr s) @@ -271,14 +271,14 @@ STC_INLINE intptr_t cstr_find_s(const cstr* self, cstr search) STC_INLINE bool cstr_contains(const cstr* self, const char* search) { return strstr((char*)cstr_str(self), search) != NULL; } -STC_INLINE bool cstr_contains_ss(const cstr* self, csubstr search) - { return cstr_find_ss(self, search) != c_NPOS; } +STC_INLINE bool cstr_contains_sv(const cstr* self, csview search) + { return cstr_find_sv(self, search) != c_NPOS; } STC_INLINE bool cstr_contains_s(const cstr* self, cstr search) { return strstr((char*)cstr_str(self), cstr_str(&search)) != NULL; } -STC_INLINE bool cstr_starts_with_ss(const cstr* self, csubstr sub) { +STC_INLINE bool cstr_starts_with_sv(const cstr* self, csview sub) { if (sub.size > cstr_size(self)) return false; return !c_memcmp(cstr_str(self), sub.str, sub.size); } @@ -290,43 +290,43 @@ STC_INLINE bool cstr_starts_with(const cstr* self, const char* sub) { } STC_INLINE bool cstr_starts_with_s(const cstr* self, cstr sub) - { return cstr_starts_with_ss(self, cstr_ss(&sub)); } + { return cstr_starts_with_sv(self, cstr_sv(&sub)); } STC_INLINE bool cstr_istarts_with(const cstr* self, const char* sub) { - csubstr sv = cstr_ss(self); + csview sv = cstr_sv(self); intptr_t len = c_strlen(sub); - return len <= sv.size && !utf8_icmp_ss(sv, c_ss(sub, len)); + return len <= sv.size && !utf8_icmp_sv(sv, c_sv(sub, len)); } -STC_INLINE bool cstr_ends_with_ss(const cstr* self, csubstr sub) { - csview sv = cstr_sv(self); - if (sub.size > sv.size) return false; - return !c_memcmp(sv.str + sv.size - sub.size, sub.str, sub.size); +STC_INLINE bool cstr_ends_with_sv(const cstr* self, csview sub) { + crawstr rs = cstr_rs(self); + if (sub.size > rs.size) return false; + return !c_memcmp(rs.str + rs.size - sub.size, sub.str, sub.size); } STC_INLINE bool cstr_ends_with_s(const cstr* self, cstr sub) - { return cstr_ends_with_ss(self, cstr_ss(&sub)); } + { return cstr_ends_with_sv(self, cstr_sv(&sub)); } STC_INLINE bool cstr_ends_with(const cstr* self, const char* sub) - { return cstr_ends_with_ss(self, c_ss(sub, c_strlen(sub))); } + { return cstr_ends_with_sv(self, c_sv(sub, c_strlen(sub))); } STC_INLINE bool cstr_iends_with(const cstr* self, const char* sub) { - csview sv = cstr_sv(self); + crawstr rs = cstr_rs(self); intptr_t n = c_strlen(sub); - return n <= sv.size && !utf8_icmp(sv.str + sv.size - n, sub); + return n <= rs.size && !utf8_icmp(rs.str + rs.size - n, sub); } STC_INLINE char* cstr_assign(cstr* self, const char* str) { return cstr_assign_n(self, str, c_strlen(str)); } -STC_INLINE char* cstr_assign_ss(cstr* self, csubstr sv) +STC_INLINE char* cstr_assign_sv(cstr* self, csview sv) { return cstr_assign_n(self, sv.str, sv.size); } STC_INLINE char* cstr_copy(cstr* self, cstr s) { - csview sv = cstr_sv(&s); - return cstr_assign_n(self, sv.str, sv.size); + crawstr rs = cstr_rs(&s); + return cstr_assign_n(self, rs.str, rs.size); } @@ -334,51 +334,51 @@ STC_INLINE char* cstr_push(cstr* self, const char* chr) { return cstr_append_n(self, chr, utf8_chr_size(chr)); } STC_INLINE void cstr_pop(cstr* self) { - csview sv = cstr_sv(self); - const char* s = sv.str + sv.size; + crawstr rs = cstr_rs(self); + const char* s = rs.str + rs.size; while ((*--s & 0xC0) == 0x80) ; - _cstr_set_size(self, (s - sv.str)); + _cstr_set_size(self, (s - rs.str)); } STC_INLINE char* cstr_append(cstr* self, const char* str) { return cstr_append_n(self, str, c_strlen(str)); } -STC_INLINE char* cstr_append_ss(cstr* self, csubstr sv) +STC_INLINE char* cstr_append_sv(cstr* self, csview sv) { return cstr_append_n(self, sv.str, sv.size); } STC_INLINE char* cstr_append_s(cstr* self, cstr s) - { return cstr_append_ss(self, cstr_ss(&s)); } + { return cstr_append_sv(self, cstr_sv(&s)); } #define cstr_replace(...) c_MACRO_OVERLOAD(cstr_replace, __VA_ARGS__) #define cstr_replace_3(self, search, repl) cstr_replace_4(self, search, repl, INT32_MAX) STC_INLINE void cstr_replace_4(cstr* self, const char* search, const char* repl, int32_t count) { - cstr_take(self, cstr_replace_ss(cstr_ss(self), c_ss(search, c_strlen(search)), - c_ss(repl, c_strlen(repl)), count)); + cstr_take(self, cstr_replace_sv(cstr_sv(self), c_sv(search, c_strlen(search)), + c_sv(repl, c_strlen(repl)), count)); } -STC_INLINE void cstr_replace_at_ss(cstr* self, intptr_t pos, intptr_t len, const csubstr repl) { +STC_INLINE void cstr_replace_at_sv(cstr* self, intptr_t pos, intptr_t len, const csview repl) { char* d = _cstr_internal_move(self, pos + len, pos + repl.size); c_memcpy(d + pos, repl.str, repl.size); } STC_INLINE void cstr_replace_at(cstr* self, intptr_t pos, intptr_t len, const char* repl) - { cstr_replace_at_ss(self, pos, len, c_ss(repl, c_strlen(repl))); } + { cstr_replace_at_sv(self, pos, len, c_sv(repl, c_strlen(repl))); } STC_INLINE void cstr_replace_at_s(cstr* self, intptr_t pos, intptr_t len, cstr repl) - { cstr_replace_at_ss(self, pos, len, cstr_ss(&repl)); } + { cstr_replace_at_sv(self, pos, len, cstr_sv(&repl)); } -STC_INLINE void cstr_u8_replace_at(cstr* self, intptr_t bytepos, intptr_t u8len, csubstr repl) - { cstr_replace_at_ss(self, bytepos, utf8_pos(cstr_str(self) + bytepos, u8len), repl); } +STC_INLINE void cstr_u8_replace_at(cstr* self, intptr_t bytepos, intptr_t u8len, csview repl) + { cstr_replace_at_sv(self, bytepos, utf8_pos(cstr_str(self) + bytepos, u8len), repl); } STC_INLINE void cstr_insert(cstr* self, intptr_t pos, const char* str) - { cstr_replace_at_ss(self, pos, 0, c_ss(str, c_strlen(str))); } + { cstr_replace_at_sv(self, pos, 0, c_sv(str, c_strlen(str))); } -STC_INLINE void cstr_insert_ss(cstr* self, intptr_t pos, csubstr sv) - { cstr_replace_at_ss(self, pos, 0, sv); } +STC_INLINE void cstr_insert_sv(cstr* self, intptr_t pos, csview sv) + { cstr_replace_at_sv(self, pos, 0, sv); } STC_INLINE void cstr_insert_s(cstr* self, intptr_t pos, cstr s) - { cstr_replace_at_ss(self, pos, 0, cstr_ss(&s)); } + { cstr_replace_at_sv(self, pos, 0, cstr_sv(&s)); } STC_INLINE bool cstr_getline(cstr *self, FILE *fp) { return cstr_getdelim(self, '\n', fp); } @@ -397,7 +397,7 @@ fn_tocase[] = {{tolower, utf8_casefold}, {tolower, utf8_tolower}, {toupper, utf8_toupper}}; -static cstr cstr_tocase(csubstr sv, int k) { +static cstr cstr_tocase(csview sv, int k) { cstr out = cstr_init(); char *buf = cstr_reserve(&out, sv.size*3/2); const char *end = sv.str + sv.size; @@ -418,26 +418,26 @@ static cstr cstr_tocase(csubstr sv, int k) { return out; } -cstr cstr_casefold_ss(csubstr sv) +cstr cstr_casefold_sv(csview sv) { return cstr_tocase(sv, 0); } -cstr cstr_tolower_ss(csubstr sv) +cstr cstr_tolower_sv(csview sv) { return cstr_tocase(sv, 1); } -cstr cstr_toupper_ss(csubstr sv) +cstr cstr_toupper_sv(csview sv) { return cstr_tocase(sv, 2); } cstr cstr_tolower(const char* str) - { return cstr_tolower_ss(c_ss(str, c_strlen(str))); } + { return cstr_tolower_sv(c_sv(str, c_strlen(str))); } cstr cstr_toupper(const char* str) - { return cstr_toupper_ss(c_ss(str, c_strlen(str))); } + { return cstr_toupper_sv(c_sv(str, c_strlen(str))); } void cstr_lowercase(cstr* self) - { cstr_take(self, cstr_tolower_ss(cstr_ss(self))); } + { cstr_take(self, cstr_tolower_sv(cstr_sv(self))); } void cstr_uppercase(cstr* self) - { cstr_take(self, cstr_toupper_ss(cstr_ss(self))); } + { cstr_take(self, cstr_toupper_sv(cstr_sv(self))); } bool cstr_valid_utf8(const cstr* self) { return utf8_valid(cstr_str(self)); } @@ -449,14 +449,14 @@ bool cstr_valid_utf8(const cstr* self) #define CSTR_C_INCLUDED STC_DEF uint64_t cstr_hash(const cstr *self) { - csview sv = cstr_sv(self); - return cfasthash(sv.str, sv.size); + crawstr rs = cstr_rs(self); + return cfasthash(rs.str, rs.size); } -STC_DEF intptr_t cstr_find_ss(const cstr* self, csubstr search) { - csview sv = cstr_sv(self); - char* res = cstrnstrn(sv.str, search.str, sv.size, search.size); - return res ? (res - sv.str) : c_NPOS; +STC_DEF intptr_t cstr_find_sv(const cstr* self, csview search) { + crawstr rs = cstr_rs(self); + char* res = cstrnstrn(rs.str, search.str, rs.size, search.size); + return res ? (res - rs.str) : c_NPOS; } STC_DEF char* _cstr_internal_move(cstr* self, const intptr_t pos1, const intptr_t pos2) { @@ -530,10 +530,10 @@ STC_DEF char* cstr_resize(cstr* self, const intptr_t size, const char value) { } STC_DEF intptr_t cstr_find_at(const cstr* self, const intptr_t pos, const char* search) { - csview sv = cstr_sv(self); - if (pos > sv.size) return c_NPOS; - const char* res = strstr((char*)sv.str + pos, search); - return res ? (res - sv.str) : c_NPOS; + crawstr rs = cstr_rs(self); + if (pos > rs.size) return c_NPOS; + const char* res = strstr((char*)rs.str + pos, search); + return res ? (res - rs.str) : c_NPOS; } STC_DEF char* cstr_assign_n(cstr* self, const char* str, const intptr_t len) { @@ -583,7 +583,7 @@ STC_DEF bool cstr_getdelim(cstr *self, const int delim, FILE *fp) { } } -STC_DEF cstr cstr_replace_ss(csubstr in, csubstr search, csubstr repl, int32_t count) { +STC_DEF cstr cstr_replace_sv(csview in, csview search, csview repl, int32_t count) { cstr out = cstr_null; intptr_t from = 0; char* res; if (!count) count = INT32_MAX; diff --git a/include/stc/csubstr.h b/include/stc/csubstr.h deleted file mode 100644 index c7a43052..00000000 --- a/include/stc/csubstr.h +++ /dev/null @@ -1,211 +0,0 @@ -/* MIT License - * - * Copyright (c) 2023 Tyge Løvset - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#define i_header // external linkage by default. override with i_static. -#define _i_inc_utf8 -#include "utf8.h" - -#ifndef CSUBSTR_H_INCLUDED -#define CSUBSTR_H_INCLUDED - -#define csubstr_init() c_ss_1("") -#define csubstr_drop(p) c_default_drop(p) -#define csubstr_clone(ss) c_default_clone(ss) -#define csubstr_from_n(str, n) c_ss_2(str, n) - -STC_API csubstr_iter csubstr_advance(csubstr_iter it, intptr_t pos); -STC_API intptr_t csubstr_find_ss(csubstr ss, csubstr search); -STC_API uint64_t csubstr_hash(const csubstr *self); -STC_API csubstr csubstr_slice_ex(csubstr ss, intptr_t p1, intptr_t p2); -STC_API csubstr csubstr_substr_ex(csubstr ss, intptr_t pos, intptr_t n); -STC_API csubstr csubstr_token(csubstr ss, const char* sep, intptr_t* start); - -STC_INLINE csubstr csubstr_from(const char* str) - { return c_LITERAL(csubstr){str, c_strlen(str)}; } -STC_INLINE void csubstr_clear(csubstr* self) { *self = csubstr_init(); } -STC_INLINE intptr_t csubstr_size(csubstr ss) { return ss.size; } -STC_INLINE bool csubstr_empty(csubstr ss) { return ss.size == 0; } - -STC_INLINE bool csubstr_equals_ss(csubstr ss1, csubstr ss2) - { return ss1.size == ss2.size && !c_memcmp(ss1.str, ss2.str, ss1.size); } - -STC_INLINE bool csubstr_equals(csubstr ss, const char* str) - { return csubstr_equals_ss(ss, c_ss_2(str, c_strlen(str))); } - -STC_INLINE intptr_t csubstr_find(csubstr ss, const char* str) - { return csubstr_find_ss(ss, c_ss_2(str, c_strlen(str))); } - -STC_INLINE bool csubstr_contains(csubstr ss, const char* str) - { return csubstr_find(ss, str) != c_NPOS; } - -STC_INLINE bool csubstr_starts_with(csubstr ss, const char* str) { - intptr_t n = c_strlen(str); - return n > ss.size ? false : !c_memcmp(ss.str, str, n); -} - -STC_INLINE bool csubstr_ends_with(csubstr ss, const char* str) { - intptr_t n = c_strlen(str); - return n > ss.size ? false : !c_memcmp(ss.str + ss.size - n, str, n); -} - -STC_INLINE csubstr csubstr_substr(csubstr ss, intptr_t pos, intptr_t n) { - if (pos + n > ss.size) n = ss.size - pos; - ss.str += pos, ss.size = n; - return ss; -} - -STC_INLINE csubstr csubstr_slice(csubstr ss, intptr_t p1, intptr_t p2) { - if (p2 > ss.size) p2 = ss.size; - ss.str += p1, ss.size = p2 > p1 ? p2 - p1 : 0; - return ss; -} - -/* utf8 iterator */ -STC_INLINE csubstr_iter csubstr_begin(const csubstr* self) { - if (!self->size) return c_LITERAL(csubstr_iter){NULL}; - return c_LITERAL(csubstr_iter){.u8 = {{self->str, utf8_chr_size(self->str)}, - self->str + self->size}}; -} -STC_INLINE csubstr_iter csubstr_end(const csubstr* self) { - return c_LITERAL(csubstr_iter){.u8 = {{NULL}, self->str + self->size}}; -} -STC_INLINE void csubstr_next(csubstr_iter* it) { - it->ref += it->u8.chr.size; - it->u8.chr.size = utf8_chr_size(it->ref); - if (it->ref == it->u8.end) it->ref = NULL; -} - -/* utf8 */ -STC_INLINE intptr_t csubstr_u8_size(csubstr ss) - { return utf8_size_n(ss.str, ss.size); } - -STC_INLINE csubstr csubstr_u8_substr(csubstr ss, intptr_t bytepos, intptr_t u8len) { - ss.str += bytepos; - ss.size = utf8_pos(ss.str, u8len); - return ss; -} - -STC_INLINE bool csubstr_valid_utf8(csubstr ss) // depends on src/utf8code.c - { return utf8_valid_n(ss.str, ss.size); } - -#define c_fortoken_ss(it, inputss, sep) \ - for (struct { csubstr _inp, token, *ref; const char *_sep; intptr_t pos; } \ - it = {._inp=inputss, .token=it._inp, .ref=&it.token, ._sep=sep} \ - ; it.pos <= it._inp.size && (it.token = csubstr_token(it._inp, it._sep, &it.pos)).str ; ) - -#define c_fortoken(it, input, sep) \ - c_fortoken_ss(it, csubstr_from(input), sep) - -/* ---- Container helper functions ---- */ - -STC_INLINE int csubstr_cmp(const csubstr* x, const csubstr* y) { - intptr_t n = x->size < y->size ? x->size : y->size; - int c = c_memcmp(x->str, y->str, n); - return c ? c : (int)(x->size - y->size); -} - -STC_INLINE int csubstr_icmp(const csubstr* x, const csubstr* y) - { return utf8_icmp_ss(*x, *y); } - -STC_INLINE bool csubstr_eq(const csubstr* x, const csubstr* y) - { return x->size == y->size && !c_memcmp(x->str, y->str, x->size); } - -#endif // CSUBSTR_H_INCLUDED - -/* csubstr interaction with cstr: */ -#ifdef CSTR_H_INCLUDED - -STC_INLINE csubstr cstr_substr(const cstr* self, intptr_t pos, intptr_t n) - { return csubstr_substr(cstr_ss(self), pos, n); } - -STC_INLINE csubstr cstr_slice(const cstr* self, intptr_t p1, intptr_t p2) - { return csubstr_slice(cstr_ss(self), p1, p2); } - -STC_INLINE csubstr cstr_substr_ex(const cstr* self, intptr_t pos, intptr_t n) - { return csubstr_substr_ex(cstr_ss(self), pos, n); } - -STC_INLINE csubstr cstr_slice_ex(const cstr* self, intptr_t p1, intptr_t p2) - { return csubstr_slice_ex(cstr_ss(self), p1, p2); } - -STC_INLINE csubstr cstr_u8_substr(const cstr* self , intptr_t bytepos, intptr_t u8len) - { return csubstr_u8_substr(cstr_ss(self), bytepos, u8len); } -#endif - -/* -------------------------- IMPLEMENTATION ------------------------- */ -#if defined i_implement || defined i_static -#ifndef CSUBSTR_C_INCLUDED -#define CSUBSTR_C_INCLUDED - -STC_DEF csubstr_iter csubstr_advance(csubstr_iter it, intptr_t pos) { - int inc = -1; - if (pos > 0) pos = -pos, inc = 1; - while (pos && it.ref != it.u8.end) pos += (*(it.ref += inc) & 0xC0) != 0x80; - it.u8.chr.size = utf8_chr_size(it.ref); - if (it.ref == it.u8.end) it.ref = NULL; - return it; -} - -STC_DEF intptr_t csubstr_find_ss(csubstr ss, csubstr search) { - char* res = cstrnstrn(ss.str, search.str, ss.size, search.size); - return res ? (res - ss.str) : c_NPOS; -} - -STC_DEF uint64_t csubstr_hash(const csubstr *self) - { return cfasthash(self->str, self->size); } - -STC_DEF csubstr csubstr_substr_ex(csubstr ss, intptr_t pos, intptr_t n) { - if (pos < 0) { - pos += ss.size; - if (pos < 0) pos = 0; - } - if (pos > ss.size) pos = ss.size; - if (pos + n > ss.size) n = ss.size - pos; - ss.str += pos, ss.size = n; - return ss; -} - -STC_DEF csubstr csubstr_slice_ex(csubstr ss, intptr_t p1, intptr_t p2) { - if (p1 < 0) { - p1 += ss.size; - if (p1 < 0) p1 = 0; - } - if (p2 < 0) p2 += ss.size; - if (p2 > ss.size) p2 = ss.size; - ss.str += p1, ss.size = (p2 > p1 ? p2 - p1 : 0); - return ss; -} - -STC_DEF csubstr csubstr_token(csubstr ss, const char* sep, intptr_t* start) { - intptr_t sep_size = c_strlen(sep); - csubstr slice = {ss.str + *start, ss.size - *start}; - const char* res = cstrnstrn(slice.str, sep, slice.size, sep_size); - csubstr tok = {slice.str, res ? (res - slice.str) : slice.size}; - *start += tok.size + sep_size; - return tok; -} -#endif // CSUBSTR_C_INCLUDED -#endif // i_implement -#undef i_static -#undef i_header -#undef i_implement -#undef i_import -#undef i_opt diff --git a/include/stc/csview.h b/include/stc/csview.h index 367258e4..2a051ddd 100644 --- a/include/stc/csview.h +++ b/include/stc/csview.h @@ -20,33 +20,39 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#define i_header // external linkage by default. override with i_static. #define _i_inc_utf8 #include "utf8.h" #ifndef CSVIEW_H_INCLUDED #define CSVIEW_H_INCLUDED -#define csview_init() c_sv("") +#define csview_init() c_sv_1("") #define csview_drop(p) c_default_drop(p) #define csview_clone(sv) c_default_clone(sv) +#define csview_from_n(str, n) c_sv_2(str, n) + +STC_API csview_iter csview_advance(csview_iter it, intptr_t pos); +STC_API intptr_t csview_find_sv(csview sv, csview search); +STC_API uint64_t csview_hash(const csview *self); +STC_API csview csview_slice_ex(csview sv, intptr_t p1, intptr_t p2); +STC_API csview csview_substr_ex(csview sv, intptr_t pos, intptr_t n); +STC_API csview csview_token(csview sv, const char* sep, intptr_t* start); STC_INLINE csview csview_from(const char* str) - { return c_sv_2(str, c_strlen(str)); } + { return c_LITERAL(csview){str, c_strlen(str)}; } STC_INLINE void csview_clear(csview* self) { *self = csview_init(); } -STC_INLINE csubstr csview_ss(csview sv) { return c_ss_2(sv.str, sv.size); } - STC_INLINE intptr_t csview_size(csview sv) { return sv.size; } STC_INLINE bool csview_empty(csview sv) { return sv.size == 0; } -STC_INLINE bool csview_equals(csview sv, const char* str) { - intptr_t n = c_strlen(str); - return sv.size == n && !c_memcmp(sv.str, str, n); -} +STC_INLINE bool csview_equals_sv(csview sv1, csview sv2) + { return sv1.size == sv2.size && !c_memcmp(sv1.str, sv2.str, sv1.size); } -STC_INLINE intptr_t csview_find(csview sv, const char* search) { - char* res = cstrnstrn(sv.str, search, sv.size, c_strlen(search)); - return res ? (res - sv.str) : c_NPOS; -} +STC_INLINE bool csview_equals(csview sv, const char* str) + { return csview_equals_sv(sv, c_sv_2(str, c_strlen(str))); } + +STC_INLINE intptr_t csview_find(csview sv, const char* str) + { return csview_find_sv(sv, c_sv_2(str, c_strlen(str))); } STC_INLINE bool csview_contains(csview sv, const char* str) { return csview_find(sv, str) != c_NPOS; } @@ -61,40 +67,55 @@ STC_INLINE bool csview_ends_with(csview sv, const char* str) { return n > sv.size ? false : !c_memcmp(sv.str + sv.size - n, str, n); } +STC_INLINE csview csview_substr(csview sv, intptr_t pos, intptr_t n) { + if (pos + n > sv.size) n = sv.size - pos; + sv.str += pos, sv.size = n; + return sv; +} + +STC_INLINE csview csview_slice(csview sv, intptr_t p1, intptr_t p2) { + if (p2 > sv.size) p2 = sv.size; + sv.str += p1, sv.size = p2 > p1 ? p2 - p1 : 0; + return sv; +} + /* utf8 iterator */ STC_INLINE csview_iter csview_begin(const csview* self) { - if (!self->size) return c_LITERAL(csview_iter){.ref = NULL}; - return c_LITERAL(csview_iter){.u8 = {{self->str, utf8_chr_size(self->str)}}}; + if (!self->size) return c_LITERAL(csview_iter){NULL}; + return c_LITERAL(csview_iter){.u8 = {{self->str, utf8_chr_size(self->str)}, + self->str + self->size}}; } STC_INLINE csview_iter csview_end(const csview* self) { - (void)self; return c_LITERAL(csview_iter){.ref = NULL}; + return c_LITERAL(csview_iter){.u8 = {{NULL}, self->str + self->size}}; } STC_INLINE void csview_next(csview_iter* it) { it->ref += it->u8.chr.size; it->u8.chr.size = utf8_chr_size(it->ref); - if (!*it->ref) it->ref = NULL; -} -STC_INLINE csview_iter csview_advance(csview_iter it, intptr_t pos) { - int inc = -1; - if (pos > 0) pos = -pos, inc = 1; - while (pos && *it.ref) pos += (*(it.ref += inc) & 0xC0) != 0x80; - it.u8.chr.size = utf8_chr_size(it.ref); - if (!*it.ref) it.ref = NULL; - return it; + if (it->ref == it->u8.end) it->ref = NULL; } -/* utf8 size */ +/* utf8 */ STC_INLINE intptr_t csview_u8_size(csview sv) { return utf8_size_n(sv.str, sv.size); } -/* utf8 validation: depends on src/utf8code.c */ -STC_INLINE bool csview_valid_utf8(csview sv) +STC_INLINE csview csview_u8_substr(csview sv, intptr_t bytepos, intptr_t u8len) { + sv.str += bytepos; + sv.size = utf8_pos(sv.str, u8len); + return sv; +} + +STC_INLINE bool csview_valid_utf8(csview sv) // depends on src/utf8code.c { return utf8_valid_n(sv.str, sv.size); } -/* utf8 ignore case cmp: depends on src/utf8code.c */ -STC_INLINE int csview_icmp(const csview* x, const csview* y) - { return utf8_icmp_ss(c_ss_2(x->str, x->size), c_ss_2(y->str, y->size)); } +#define c_fortoken_sv(it, inputsv, sep) \ + for (struct { csview _inp, token, *ref; const char *_sep; intptr_t pos; } \ + it = {._inp=inputsv, .token=it._inp, .ref=&it.token, ._sep=sep} \ + ; it.pos <= it._inp.size && (it.token = csview_token(it._inp, it._sep, &it.pos)).str ; ) +#define c_fortoken(it, input, sep) \ + c_fortoken_sv(it, csview_from(input), sep) + +/* ---- Container helper functions ---- */ STC_INLINE int csview_cmp(const csview* x, const csview* y) { intptr_t n = x->size < y->size ? x->size : y->size; @@ -102,13 +123,87 @@ STC_INLINE int csview_cmp(const csview* x, const csview* y) { return c ? c : (int)(x->size - y->size); } +STC_INLINE int csview_icmp(const csview* x, const csview* y) + { return utf8_icmp_sv(*x, *y); } + STC_INLINE bool csview_eq(const csview* x, const csview* y) { return x->size == y->size && !c_memcmp(x->str, y->str, x->size); } -STC_INLINE uint64_t csview_hash(const csview *self) +#endif // CSVIEW_H_INCLUDED + +/* csview interaction with cstr: */ +#ifdef CSTR_H_INCLUDED + +STC_INLINE csview cstr_substr(const cstr* self, intptr_t pos, intptr_t n) + { return csview_substr(cstr_sv(self), pos, n); } + +STC_INLINE csview cstr_slice(const cstr* self, intptr_t p1, intptr_t p2) + { return csview_slice(cstr_sv(self), p1, p2); } + +STC_INLINE csview cstr_substr_ex(const cstr* self, intptr_t pos, intptr_t n) + { return csview_substr_ex(cstr_sv(self), pos, n); } + +STC_INLINE csview cstr_slice_ex(const cstr* self, intptr_t p1, intptr_t p2) + { return csview_slice_ex(cstr_sv(self), p1, p2); } + +STC_INLINE csview cstr_u8_substr(const cstr* self , intptr_t bytepos, intptr_t u8len) + { return csview_u8_substr(cstr_sv(self), bytepos, u8len); } +#endif + +/* -------------------------- IMPLEMENTATION ------------------------- */ +#if defined i_implement || defined i_static +#ifndef CSVIEW_C_INCLUDED +#define CSVIEW_C_INCLUDED + +STC_DEF csview_iter csview_advance(csview_iter it, intptr_t pos) { + int inc = -1; + if (pos > 0) pos = -pos, inc = 1; + while (pos && it.ref != it.u8.end) pos += (*(it.ref += inc) & 0xC0) != 0x80; + it.u8.chr.size = utf8_chr_size(it.ref); + if (it.ref == it.u8.end) it.ref = NULL; + return it; +} + +STC_DEF intptr_t csview_find_sv(csview sv, csview search) { + char* res = cstrnstrn(sv.str, search.str, sv.size, search.size); + return res ? (res - sv.str) : c_NPOS; +} + +STC_DEF uint64_t csview_hash(const csview *self) { return cfasthash(self->str, self->size); } -#endif // CSVIEW_H_INCLUDED +STC_DEF csview csview_substr_ex(csview sv, intptr_t pos, intptr_t n) { + if (pos < 0) { + pos += sv.size; + if (pos < 0) pos = 0; + } + if (pos > sv.size) pos = sv.size; + if (pos + n > sv.size) n = sv.size - pos; + sv.str += pos, sv.size = n; + return sv; +} + +STC_DEF csview csview_slice_ex(csview sv, intptr_t p1, intptr_t p2) { + if (p1 < 0) { + p1 += sv.size; + if (p1 < 0) p1 = 0; + } + if (p2 < 0) p2 += sv.size; + if (p2 > sv.size) p2 = sv.size; + sv.str += p1, sv.size = (p2 > p1 ? p2 - p1 : 0); + return sv; +} + +STC_DEF csview csview_token(csview sv, const char* sep, intptr_t* start) { + intptr_t sep_size = c_strlen(sep); + csview slice = {sv.str + *start, sv.size - *start}; + const char* res = cstrnstrn(slice.str, sep, slice.size, sep_size); + csview tok = {slice.str, res ? (res - slice.str) : slice.size}; + *start += tok.size + sep_size; + return tok; +} +#endif // CSVIEW_C_INCLUDED +#endif // i_implement #undef i_static #undef i_header #undef i_implement diff --git a/include/stc/forward.h b/include/stc/forward.h index 5c9c4f4d..839be012 100644 --- a/include/stc/forward.h +++ b/include/stc/forward.h @@ -39,21 +39,8 @@ #define forward_cqueue(CX, VAL) _c_cdeq_types(CX, VAL) #define forward_cvec(CX, VAL) _c_cvec_types(CX, VAL) -// csubstr : non-null terminated string view -typedef const char csubstr_value; -typedef struct csubstr { - csubstr_value* str; - intptr_t size; -} csubstr; - -typedef union { - csubstr_value* ref; - struct { csubstr chr; csubstr_value* end; } u8; -} csubstr_iter; - - -// csview : null-terminated string view -typedef csubstr_value csview_value; +// csview : non-null terminated string view +typedef const char csview_value; typedef struct csview { csview_value* str; intptr_t size; @@ -61,10 +48,23 @@ typedef struct csview { typedef union { csview_value* ref; - struct { csubstr chr; } u8; + struct { csview chr; csview_value* end; } u8; } csview_iter; +// crawstr : null-terminated string view +typedef csview_value crawstr_value; +typedef struct crawstr { + crawstr_value* str; + intptr_t size; +} crawstr; + +typedef union { + crawstr_value* ref; + struct { csview chr; } u8; +} crawstr_iter; + + // cstr : null-terminated string (short string optimized - sso) typedef char cstr_value; typedef struct { cstr_value* data; intptr_t size, cap; } cstr_buf; @@ -75,7 +75,7 @@ typedef union cstr { typedef union { cstr_value* ref; - struct { csubstr chr; } u8; + struct { csview chr; } u8; } cstr_iter; diff --git a/include/stc/priv/template.h b/include/stc/priv/template.h index 47225ec8..65dee203 100644 --- a/include/stc/priv/template.h +++ b/include/stc/priv/template.h @@ -114,10 +114,10 @@ #endif #elif defined i_key_ssv #define i_keyclass cstr - #define i_rawclass csubstr - #define i_keyfrom cstr_from_ss - #define i_keyto cstr_ss - #define i_eq csubstr_eq + #define i_rawclass csview + #define i_keyfrom cstr_from_sv + #define i_keyto cstr_sv + #define i_eq csview_eq #ifndef i_tag #define i_tag ssv #endif @@ -232,9 +232,9 @@ #define i_valraw const char* #elif defined i_val_ssv #define i_valclass cstr - #define i_valraw csubstr - #define i_valfrom cstr_from_ss - #define i_valto cstr_ss + #define i_valraw csview + #define i_valfrom cstr_from_sv + #define i_valto cstr_sv #elif defined i_valboxed #define i_valclass i_valboxed #define i_valraw c_PASTE(i_valboxed, _raw) diff --git a/include/stc/utf8.h b/include/stc/utf8.h index 7d2adee0..6d12856f 100644 --- a/include/stc/utf8.h +++ b/include/stc/utf8.h @@ -48,7 +48,7 @@ extern uint32_t utf8_toupper(uint32_t c); extern bool utf8_iscased(uint32_t c); extern bool utf8_isword(uint32_t c); extern bool utf8_valid_n(const char* s, intptr_t nbytes); -extern int utf8_icmp_ss(csubstr s1, csubstr s2); +extern int utf8_icmp_sv(csview s1, csview s2); extern int utf8_encode(char *out, uint32_t c); extern uint32_t utf8_peek_off(const char *s, int offset); @@ -92,7 +92,7 @@ STC_INLINE uint32_t utf8_peek(const char* s) { /* case-insensitive utf8 string comparison */ STC_INLINE int utf8_icmp(const char* s1, const char* s2) { - return utf8_icmp_ss(c_ss(s1, INTPTR_MAX), c_ss(s2, INTPTR_MAX)); + return utf8_icmp_sv(c_sv(s1, INTPTR_MAX), c_sv(s2, INTPTR_MAX)); } STC_INLINE bool utf8_valid(const char* s) { |
