summaryrefslogtreecommitdiffhomepage
path: root/include/stc/csubstr.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/stc/csubstr.h')
-rw-r--r--include/stc/csubstr.h208
1 files changed, 208 insertions, 0 deletions
diff --git a/include/stc/csubstr.h b/include/stc/csubstr.h
new file mode 100644
index 00000000..152f7041
--- /dev/null
+++ b/include/stc/csubstr.h
@@ -0,0 +1,208 @@
+/* MIT License
+ *
+ * Copyright (c) 2023 Tyge Løvset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#define i_header // external linkage by default. override with i_static.
+#define _i_inc_utf8
+#include "utf8.h"
+
+#ifndef CSUBSTR_H_INCLUDED
+#define CSUBSTR_H_INCLUDED
+
+#define csubstr_init() c_ss_1("")
+#define csubstr_drop(p) c_default_drop(p)
+#define csubstr_clone(ss) c_default_clone(ss)
+#define csubstr_from_n(str, n) c_ss_2(str, n)
+
+STC_API csubstr_iter csubstr_advance(csubstr_iter it, intptr_t pos);
+STC_API intptr_t csubstr_find_ss(csubstr ss, csubstr search);
+STC_API uint64_t csubstr_hash(const csubstr *self);
+STC_API csubstr csubstr_slice_ex(csubstr ss, intptr_t p1, intptr_t p2);
+STC_API csubstr csubstr_substr_ex(csubstr ss, intptr_t pos, intptr_t n);
+STC_API csubstr csubstr_token(csubstr ss, const char* sep, intptr_t* start);
+
+STC_INLINE csubstr csubstr_from(const char* str)
+ { return c_LITERAL(csubstr){str, c_strlen(str)}; }
+STC_INLINE void csubstr_clear(csubstr* self) { *self = csubstr_init(); }
+STC_INLINE intptr_t csubstr_size(csubstr ss) { return ss.size; }
+STC_INLINE bool csubstr_empty(csubstr ss) { return ss.size == 0; }
+
+STC_INLINE bool csubstr_equals(csubstr ss, const char* str)
+ { intptr_t n = c_strlen(str); return ss.size == n && !c_memcmp(ss.str, str, n); }
+
+STC_INLINE intptr_t csubstr_find(csubstr ss, const char* str)
+ { return csubstr_find_ss(ss, c_ss_2(str, c_strlen(str))); }
+
+STC_INLINE bool csubstr_contains(csubstr ss, const char* str)
+ { return csubstr_find(ss, str) != c_NPOS; }
+
+STC_INLINE bool csubstr_starts_with(csubstr ss, const char* str) {
+ intptr_t n = c_strlen(str);
+ return n > ss.size ? false : !c_memcmp(ss.str, str, n);
+}
+
+STC_INLINE bool csubstr_ends_with(csubstr ss, const char* str) {
+ intptr_t n = c_strlen(str);
+ return n > ss.size ? false : !c_memcmp(ss.str + ss.size - n, str, n);
+}
+
+STC_INLINE csubstr csubstr_substr(csubstr ss, intptr_t pos, intptr_t n) {
+ if (pos + n > ss.size) n = ss.size - pos;
+ ss.str += pos, ss.size = n;
+ return ss;
+}
+
+STC_INLINE csubstr csubstr_slice(csubstr ss, intptr_t p1, intptr_t p2) {
+ if (p2 > ss.size) p2 = ss.size;
+ ss.str += p1, ss.size = p2 > p1 ? p2 - p1 : 0;
+ return ss;
+}
+
+/* utf8 iterator */
+STC_INLINE csubstr_iter csubstr_begin(const csubstr* self) {
+ if (!self->size) return c_LITERAL(csubstr_iter){NULL};
+ return c_LITERAL(csubstr_iter){.u8 = {{self->str, utf8_chr_size(self->str)},
+ self->str + self->size}};
+}
+STC_INLINE csubstr_iter csubstr_end(const csubstr* self) {
+ return c_LITERAL(csubstr_iter){.u8 = {{NULL}, self->str + self->size}};
+}
+STC_INLINE void csubstr_next(csubstr_iter* it) {
+ it->ref += it->u8.chr.size;
+ it->u8.chr.size = utf8_chr_size(it->ref);
+ if (it->ref == it->u8.end) it->ref = NULL;
+}
+
+/* utf8 */
+STC_INLINE intptr_t csubstr_u8_size(csubstr ss)
+ { return utf8_size_n(ss.str, ss.size); }
+
+STC_INLINE csubstr csubstr_u8_substr(csubstr ss, intptr_t bytepos, intptr_t u8len) {
+ ss.str += bytepos;
+ ss.size = utf8_pos(ss.str, u8len);
+ return ss;
+}
+
+STC_INLINE bool csubstr_valid_utf8(csubstr ss) // depends on src/utf8code.c
+ { return utf8_valid_n(ss.str, ss.size); }
+
+#define c_fortoken_ss(it, inputss, sep) \
+ for (struct { csubstr _inp, token, *ref; const char *_sep; intptr_t pos; } \
+ it = {._inp=inputss, .token=it._inp, .ref=&it.token, ._sep=sep} \
+ ; it.pos <= it._inp.size && (it.token = csubstr_token(it._inp, it._sep, &it.pos)).str ; )
+
+#define c_fortoken(it, input, sep) \
+ c_fortoken_ss(it, csubstr_from(input), sep)
+
+/* ---- Container helper functions ---- */
+
+STC_INLINE int csubstr_cmp(const csubstr* x, const csubstr* y) {
+ intptr_t n = x->size < y->size ? x->size : y->size;
+ int c = c_memcmp(x->str, y->str, n);
+ return c ? c : (int)(x->size - y->size);
+}
+
+STC_INLINE int csubstr_icmp(const csubstr* x, const csubstr* y)
+ { return utf8_icmp_ss(*x, *y); }
+
+STC_INLINE bool csubstr_eq(const csubstr* x, const csubstr* y)
+ { return x->size == y->size && !c_memcmp(x->str, y->str, x->size); }
+
+#endif // CSUBSTR_H_INCLUDED
+
+/* csubstr interaction with cstr: */
+#ifdef CSTR_H_INCLUDED
+
+STC_INLINE csubstr cstr_substr(const cstr* self, intptr_t pos, intptr_t n)
+ { return csubstr_substr(cstr_ss(self), pos, n); }
+
+STC_INLINE csubstr cstr_slice(const cstr* self, intptr_t p1, intptr_t p2)
+ { return csubstr_slice(cstr_ss(self), p1, p2); }
+
+STC_INLINE csubstr cstr_substr_ex(const cstr* self, intptr_t pos, intptr_t n)
+ { return csubstr_substr_ex(cstr_ss(self), pos, n); }
+
+STC_INLINE csubstr cstr_slice_ex(const cstr* self, intptr_t p1, intptr_t p2)
+ { return csubstr_slice_ex(cstr_ss(self), p1, p2); }
+
+STC_INLINE csubstr cstr_u8_substr(const cstr* self , intptr_t bytepos, intptr_t u8len)
+ { return csubstr_u8_substr(cstr_ss(self), bytepos, u8len); }
+#endif
+
+/* -------------------------- IMPLEMENTATION ------------------------- */
+#if defined i_implement || defined i_static
+#ifndef CSUBSTR_C_INCLUDED
+#define CSUBSTR_C_INCLUDED
+
+STC_DEF csubstr_iter csubstr_advance(csubstr_iter it, intptr_t pos) {
+ int inc = -1;
+ if (pos > 0) pos = -pos, inc = 1;
+ while (pos && it.ref != it.u8.end) pos += (*(it.ref += inc) & 0xC0) != 0x80;
+ it.u8.chr.size = utf8_chr_size(it.ref);
+ if (it.ref == it.u8.end) it.ref = NULL;
+ return it;
+}
+
+STC_DEF intptr_t csubstr_find_ss(csubstr ss, csubstr search) {
+ char* res = cstrnstrn(ss.str, search.str, ss.size, search.size);
+ return res ? (res - ss.str) : c_NPOS;
+}
+
+STC_DEF uint64_t csubstr_hash(const csubstr *self)
+ { return cfasthash(self->str, self->size); }
+
+STC_DEF csubstr csubstr_substr_ex(csubstr ss, intptr_t pos, intptr_t n) {
+ if (pos < 0) {
+ pos += ss.size;
+ if (pos < 0) pos = 0;
+ }
+ if (pos > ss.size) pos = ss.size;
+ if (pos + n > ss.size) n = ss.size - pos;
+ ss.str += pos, ss.size = n;
+ return ss;
+}
+
+STC_DEF csubstr csubstr_slice_ex(csubstr ss, intptr_t p1, intptr_t p2) {
+ if (p1 < 0) {
+ p1 += ss.size;
+ if (p1 < 0) p1 = 0;
+ }
+ if (p2 < 0) p2 += ss.size;
+ if (p2 > ss.size) p2 = ss.size;
+ ss.str += p1, ss.size = (p2 > p1 ? p2 - p1 : 0);
+ return ss;
+}
+
+STC_DEF csubstr csubstr_token(csubstr ss, const char* sep, intptr_t* start) {
+ intptr_t sep_size = c_strlen(sep);
+ csubstr slice = {ss.str + *start, ss.size - *start};
+ const char* res = cstrnstrn(slice.str, sep, slice.size, sep_size);
+ csubstr tok = {slice.str, res ? (res - slice.str) : slice.size};
+ *start += tok.size + sep_size;
+ return tok;
+}
+#endif // CSUBSTR_C_INCLUDED
+#endif // i_implement
+#undef i_static
+#undef i_header
+#undef i_implement
+#undef i_import
+#undef i_opt