diff options
| author | Tyge Løvset <[email protected]> | 2022-01-08 23:13:32 +0100 |
|---|---|---|
| committer | Tyge Løvset <[email protected]> | 2022-01-08 23:13:32 +0100 |
| commit | 63562f5135243ac2a2553b4e7360c59e86686d6f (patch) | |
| tree | 439a3853a6bfded32a3031b577457bfba470f1a7 /include | |
| parent | 0a9910eee6582e6ee414071a0d5e7062448989cf (diff) | |
| download | STC-modified-63562f5135243ac2a2553b4e7360c59e86686d6f.tar.gz STC-modified-63562f5135243ac2a2553b4e7360c59e86686d6f.zip | |
Moved utf8 from cregex.h to separate file. Splitted csview.h into another file strings.h.
Diffstat (limited to 'include')
| -rw-r--r-- | include/stc/cbits.h | 3 | ||||
| -rw-r--r-- | include/stc/ccommon.h | 52 | ||||
| -rw-r--r-- | include/stc/cmap.h | 6 | ||||
| -rw-r--r-- | include/stc/cregex.h | 161 | ||||
| -rw-r--r-- | include/stc/cstr.h | 29 | ||||
| -rw-r--r-- | include/stc/csview.h | 49 | ||||
| -rw-r--r-- | include/stc/strings.h | 47 | ||||
| -rw-r--r-- | include/stc/utf8.h | 99 |
8 files changed, 237 insertions, 209 deletions
diff --git a/include/stc/cbits.h b/include/stc/cbits.h index 0df7d765..6c2235e7 100644 --- a/include/stc/cbits.h +++ b/include/stc/cbits.h @@ -20,8 +20,6 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "ccommon.h"
-
#ifndef CBITS_H_INCLUDED
#define CBITS_H_INCLUDED
/*
@@ -53,6 +51,7 @@ int main() { }
}
*/
+#include "ccommon.h"
#include <stdlib.h>
#include <string.h>
diff --git a/include/stc/ccommon.h b/include/stc/ccommon.h index 22caf0ef..d58545e2 100644 --- a/include/stc/ccommon.h +++ b/include/stc/ccommon.h @@ -42,20 +42,16 @@ /* Macro overloading feature support based on: https://rextester.com/ONP80107 */
#define c_MACRO_OVERLOAD(name, ...) \
- c_PASTE3(name, _, c_NUM_ARGS(__VA_ARGS__))(__VA_ARGS__)
+ c_PASTE(name, c_NUM_ARGS(__VA_ARGS__))(__VA_ARGS__)
#define c_CONCAT(a, b) a ## b
#define c_PASTE(a, b) c_CONCAT(a, b)
-#define c_CONCAT3(a, b, c) a ## b ## c
-#define c_PASTE3(a, b, c) c_CONCAT3(a, b, c)
#define c_EXPAND(...) __VA_ARGS__
#define c_NUM_ARGS(...) _c_APPLY_ARG_N((__VA_ARGS__, _c_RSEQ_N))
#define _c_APPLY_ARG_N(args) c_EXPAND(_c_ARG_N args)
-#define _c_RSEQ_N 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, \
- 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+#define _c_RSEQ_N 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
#define _c_ARG_N(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, \
- _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, \
- _23, _24, _25, _26, _27, _28, _29, _30, N, ...) N
+ _13, _14, _15, N, ...) N
#define c_static_assert(cond) \
typedef char c_PASTE(_static_assert_line_, __LINE__)[(cond) ? 1 : -1]
@@ -102,7 +98,8 @@ typedef const char c_strlit[]; #define c_no_clone 4
#define c_no_cmp 8
#define c_static 16
-#define c_shared 32
+#define c_header 32
+#define c_implement 64
/* Generic algorithms */
@@ -134,13 +131,25 @@ STC_INLINE uint64_t c_hash64(const void* key, size_t len) { return x*0xc6a4a7935bd1e99d;
}
+STC_INLINE char* c_strnstrn(const char *s, const char *needle, size_t slen, const size_t nlen) {
+ if (!nlen) return (char *)s;
+ if (nlen > slen) return NULL;
+ slen -= nlen;
+ do {
+ if (*s == *needle && !memcmp(s, needle, nlen))
+ return (char *)s;
+ ++s;
+ } while (slen--);
+ return NULL;
+}
+
#define c_foreach(...) c_MACRO_OVERLOAD(c_foreach, __VA_ARGS__)
-#define c_foreach_3(it, C, cnt) \
+#define c_foreach3(it, C, cnt) \
for (C##_iter it = C##_begin(&cnt), it##_end_ = C##_end(&cnt) \
; it.ref != it##_end_.ref; C##_next(&it))
-#define c_foreach_4(it, C, start, finish) \
+#define c_foreach4(it, C, start, finish) \
for (C##_iter it = start, it##_end_ = finish \
; it.ref != it##_end_.ref; C##_next(&it))
@@ -151,11 +160,11 @@ STC_INLINE uint64_t c_hash64(const void* key, size_t len) { ; C##_next(&_._it))
#define c_forrange(...) c_MACRO_OVERLOAD(c_forrange, __VA_ARGS__)
-#define c_forrange_1(stop) for (size_t _c_ii=0, _c_end=stop; _c_ii < _c_end; ++_c_ii)
-#define c_forrange_2(i, stop) for (size_t i=0, _c_end=stop; i < _c_end; ++i)
-#define c_forrange_3(i, type, stop) for (type i=0, _c_end=stop; i < _c_end; ++i)
-#define c_forrange_4(i, type, start, stop) for (type i=start, _c_end=stop; i < _c_end; ++i)
-#define c_forrange_5(i, type, start, stop, step) \
+#define c_forrange1(stop) for (size_t _c_ii=0, _c_end=stop; _c_ii < _c_end; ++_c_ii)
+#define c_forrange2(i, stop) for (size_t i=0, _c_end=stop; i < _c_end; ++i)
+#define c_forrange3(i, type, stop) for (type i=0, _c_end=stop; i < _c_end; ++i)
+#define c_forrange4(i, type, start, stop) for (type i=start, _c_end=stop; i < _c_end; ++i)
+#define c_forrange5(i, type, start, stop, step) \
for (type i=start, _c_inc=step, _c_end=(stop) - (0 < _c_inc) \
; (i <= _c_end) == (0 < _c_inc); i += _c_inc)
@@ -165,15 +174,15 @@ STC_INLINE uint64_t c_hash64(const void* key, size_t len) { #define c_breakauto continue
#define c_auto(...) c_MACRO_OVERLOAD(c_auto, __VA_ARGS__)
-#define c_auto_2(C, a) \
+#define c_auto2(C, a) \
c_autovar(C a = C##_init(), C##_drop(&a))
-#define c_auto_3(C, a, b) \
+#define c_auto3(C, a, b) \
c_autovar(c_EXPAND(C a = C##_init(), b = C##_init()), \
C##_drop(&b), C##_drop(&a))
-#define c_auto_4(C, a, b, c) \
+#define c_auto4(C, a, b, c) \
c_autovar(c_EXPAND(C a = C##_init(), b = C##_init(), c = C##_init()), \
C##_drop(&c), C##_drop(&b), C##_drop(&a))
-#define c_auto_5(C, a, b, c, d) \
+#define c_auto5(C, a, b, c, d) \
c_autovar(c_EXPAND(C a = C##_init(), b = C##_init(), c = C##_init(), d = C##_init()), \
C##_drop(&d), C##_drop(&c), C##_drop(&b), C##_drop(&a))
@@ -224,7 +233,8 @@ STC_INLINE uint64_t c_hash64(const void* key, size_t len) { #undef _i_static
#undef _i_implement
-#if (c_option(c_shared) || defined(STC_HEADER) || defined(STC_IMPLEMENTATION)) && !c_option(c_static)
+#if !c_option(c_static) && (c_option(c_header) || c_option(c_implement) || \
+ defined(STC_HEADER) || defined(STC_IMPLEMENTATION))
# define STC_API extern
# define STC_DEF
#else
@@ -232,6 +242,6 @@ STC_INLINE uint64_t c_hash64(const void* key, size_t len) { # define STC_API static inline
# define STC_DEF static inline
#endif
-#if defined(STC_IMPLEMENTATION) ^ defined(_i_static)
+#if (c_option(c_implement) || defined(STC_IMPLEMENTATION)) ^ defined(_i_static)
# define _i_implement
#endif
diff --git a/include/stc/cmap.h b/include/stc/cmap.h index 948613a4..d3281163 100644 --- a/include/stc/cmap.h +++ b/include/stc/cmap.h @@ -296,7 +296,8 @@ _cx_memb(_bucket_)(const _cx_self* self, const _cx_rawkey* rkeyptr) { _cx_rawkey _raw = i_keyto(_i_keyref(self->table + b.idx));
if (i_eq(&_raw, rkeyptr)) break;
}
- if (++b.idx == _cap) b.idx = 0;
+ _cx_size _mask = (_cx_size) -(++b.idx != _cap);
+ b.idx &= _mask; // b.idx = (b.idx + 1) % _cap
}
return b;
}
@@ -367,7 +368,8 @@ _cx_memb(_erase_entry)(_cx_self* self, _cx_value* _val) { uint8_t* _hashx = self->_hashx;
_cx_memb(_value_drop)(&_slot[i]);
for (;;) { /* delete without leaving tombstone */
- if (++j == _cap) j = 0;
+ _cx_size _mask = (_cx_size) -(++j != _cap);
+ j &= _mask;
if (! _hashx[j])
break;
_cx_rawkey _raw = i_keyto(_i_keyref(_slot + j));
diff --git a/include/stc/cregex.h b/include/stc/cregex.h index eafefc4c..6259576e 100644 --- a/include/stc/cregex.h +++ b/include/stc/cregex.h @@ -22,11 +22,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "ccommon.h" - #ifndef CREGEX_INCLUDED #define CREGEX_INCLUDED +#include "ccommon.h" +#include "utf8.h" #include <stdlib.h> #include <setjmp.h> #include <stdarg.h> @@ -56,9 +56,6 @@ typedef enum { cregex_UNCLOSED_SUBEXPRESSION, } cregex_error_t; -/* check if a given string is valid utf8 */ -STC_API bool cregex_valid_utf8(const char *s); - /* create an empty expression */ STC_INLINE cregex cregex_init(void) { cregex re = {NULL}; return re; } @@ -92,82 +89,6 @@ STC_API void cregex_drop(cregex *re); /* -------------------------- IMPLEMENTATION ------------------------- */ #if defined(_i_implement) -enum { - _rx_UTF8_ACCEPT = 0, - _rx_UTF8_REJECT = 1 -}; - -static const uint8_t _rx_utf8d[] = { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf - 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df - 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef - 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff - 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2 - 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4 - 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6 - 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8 -}; - -static inline uint32_t _rx_utf8_decode(uint32_t *state, uint32_t *codep, - const uint32_t byte) -{ - const uint32_t type = _rx_utf8d[byte]; - const uint32_t x = (uint32_t) -(*state != _rx_UTF8_ACCEPT); - - *codep = (x & ((byte & 0x3fu) | (*codep << 6))) - | (~x & ((0xff >> type) & (byte))); - - *state = _rx_utf8d[256 + (*state << 4) + type]; - return *state; -} - -static bool _rx_utf8_count_codepoints(size_t *count, const uint8_t *s) -{ - uint32_t state = _rx_UTF8_ACCEPT, codepoint; - - for (*count = 0; *s; ++s) - *count += !_rx_utf8_decode(&state, &codepoint, *s); - return state == _rx_UTF8_ACCEPT; -} - -STC_DEF bool cregex_valid_utf8(const char *s) -{ - size_t count; - bool valid = _rx_utf8_count_codepoints(&count, (const uint8_t *)s); - return valid; -} - -static inline uint32_t _rx_utf8_peek(const char *s) -{ - uint32_t state = _rx_UTF8_ACCEPT, codepoint; - _rx_utf8_decode(&state, &codepoint, (uint8_t)s[0]); - return codepoint; -} - -static inline uint32_t _rx_utf8_char_width(uint8_t c) -{ - uint32_t ret = ((c & 0xF0) == 0xE0); - ret += (ret << 1); // 3 - ret |= (c < 0x80); // 1 - ret |= ((c & 0xE0) == 0xC0) << 1; // 2 - ret |= ((c & 0xF8) == 0xF0) << 2; // 4 - return ret; -} - -static inline const char *_rx_utf8_next(const char *s) -{ - const char* t = s + _rx_utf8_char_width((uint8_t)s[0]); - - uintptr_t p = (uintptr_t)t; - p &= (uintptr_t) -(*s != 0); - return (const char *)p; -} /* function pointer type used to evaluate if a regex node * matched a given string */ @@ -243,8 +164,8 @@ static bool _rx_char_is_match(cregex_node *node, const char *orig, const char *c return false; } - *next = _rx_utf8_next(cur); - return node->chr.chr == _rx_utf8_peek(cur); + *next = utf8_next(cur); + return node->chr.chr == utf8_peek(cur); } static bool _rx_start_is_match(cregex_node *node, const char *orig, const char *cur, @@ -272,7 +193,7 @@ static bool _rx_any_is_match(cregex_node *node, const char *orig, const char *cu const char **next) { if (*cur) { - *next = _rx_utf8_next(cur); + *next = utf8_next(cur); return true; } @@ -305,8 +226,8 @@ static bool _rx_class_is_match(cregex_node *node, const char *orig, const char * if (*cur == 0) return false; - const uint32_t chr = _rx_utf8_peek(cur); - *next = _rx_utf8_next(cur); + const uint32_t chr = utf8_peek(cur); + *next = utf8_next(cur); bool found = false; for (_rx_RangeNode *range = cls->ranges; range != NULL; @@ -362,7 +283,7 @@ struct { } _rx_CompileException; /* set global error value to the default value */ -static inline void clear_compile_exception(void) +static inline void _rx_clear_compile_exception(void) { _rx_CompileException.err = cregex_OK; _rx_CompileException.s = NULL; @@ -381,8 +302,8 @@ static size_t _rx_calc_compiled_escaped_len(const char *s, const char **leftover if (*s == 0) _rx_throw_compile_exception(cregex_UNEXPECTED_EOL, s); - const uint32_t chr = _rx_utf8_peek(s); - *leftover = _rx_utf8_next(s); + const uint32_t chr = utf8_peek(s); + *leftover = utf8_next(s); switch (chr) { case 's': @@ -409,7 +330,7 @@ static size_t _rx_calc_compiled_escaped_len(const char *s, const char **leftover } static const size_t _rx_calc_compiled_class_len(const char *s, - const char **leftover) + const char **leftover) { if (*s == '^') s++; @@ -417,19 +338,19 @@ static const size_t _rx_calc_compiled_class_len(const char *s, size_t ret = 1; while (*s && *s != ']') { - uint32_t chr = _rx_utf8_peek(s); - s = _rx_utf8_next(s); + uint32_t chr = utf8_peek(s); + s = utf8_next(s); if (chr == '\\') { - s = _rx_utf8_next(s); + s = utf8_next(s); } if (*s == '-' && s[1] != ']') { s++; - chr = _rx_utf8_peek(s); - s = _rx_utf8_next(s); + chr = utf8_peek(s); + s = utf8_next(s); if (chr == '\\') - s = _rx_utf8_next(s); + s = utf8_next(s); } ret++; @@ -452,9 +373,9 @@ static const size_t _rx_calc_compiled_len(const char *s) if (*s == 0) { return 1; } else { - const uint32_t chr = _rx_utf8_peek(s); + const uint32_t chr = utf8_peek(s); size_t ret = 0; - s = _rx_utf8_next(s); + s = utf8_next(s); switch (chr) { case '{': { @@ -515,12 +436,12 @@ static size_t _rx_parse_digit(const char *s, const char **leftover) size_t ret = 0; while (*s) { - uint32_t chr = _rx_utf8_peek(s); + uint32_t chr = utf8_peek(s); if (_rx_is_digit(chr)) { ret *= 10; ret += chr - '0'; - s = _rx_utf8_next(s); + s = utf8_next(s); } else { break; } @@ -538,7 +459,7 @@ static void _rx_parse_complex_quant(const char *re, const char **leftover, if (*re == 0) _rx_throw_compile_exception(cregex_INVALID_COMPLEX_QUANT, re); - uint32_t tmp = _rx_utf8_peek(re); + uint32_t tmp = utf8_peek(re); size_t min = 0, max = SIZE_MAX; if (_rx_is_digit(tmp)) { @@ -547,11 +468,11 @@ static void _rx_parse_complex_quant(const char *re, const char **leftover, _rx_throw_compile_exception(cregex_INVALID_COMPLEX_QUANT, re); } - tmp = _rx_utf8_peek(re); + tmp = utf8_peek(re); if (tmp == ',') { - re = _rx_utf8_next(re); - if (_rx_is_digit(_rx_utf8_peek(re))) + re = utf8_next(re); + if (_rx_is_digit(utf8_peek(re))) max = _rx_parse_digit(re, &re); else max = SIZE_MAX; @@ -559,7 +480,7 @@ static void _rx_parse_complex_quant(const char *re, const char **leftover, max = min; } - tmp = _rx_utf8_peek(re); + tmp = utf8_peek(re); if (tmp == '}') { *leftover = re + 1; *min_p = min; @@ -614,8 +535,8 @@ static cregex_node *_rx_compile_next_escaped(const char *re, const char **leftov if (*re == 0) _rx_throw_compile_exception(cregex_UNEXPECTED_EOL, re); - const uint32_t chr = _rx_utf8_peek(re); - *leftover = _rx_utf8_next(re); + const uint32_t chr = utf8_peek(re); + *leftover = utf8_next(re); cregex_node *ret = cur + 1; switch (chr) { @@ -694,21 +615,21 @@ static cregex_node *_rx_compile_next_complex_class(const char *re, while (*re && *re != ']') { uint32_t first = 0, last = 0; - first = _rx_utf8_peek(re); - re = _rx_utf8_next(re); + first = utf8_peek(re); + re = utf8_next(re); if (first == '\\') { if (*re == 0) _rx_throw_compile_exception( cregex_INVALID_COMPLEX_CLASS, re); - first = _rx_utf8_peek(re); - re = _rx_utf8_next(re); + first = utf8_peek(re); + re = utf8_next(re); } if (*re == '-' && re[1] != ']' && re[1]) { re++; - last = _rx_utf8_peek(re); - re = _rx_utf8_next(re); + last = utf8_peek(re); + re = utf8_next(re); if (last == '\\') { if (*re == 0) @@ -716,8 +637,8 @@ static cregex_node *_rx_compile_next_complex_class(const char *re, cregex_INVALID_COMPLEX_CLASS, re); - last = _rx_utf8_peek(re); - re = _rx_utf8_next(re); + last = utf8_peek(re); + re = utf8_next(re); } } else { last = first; @@ -813,8 +734,8 @@ static cregex_node *_rx_compile_next(const char *re, const char **leftover, if (*re == 0) return NULL; - const uint32_t chr = _rx_utf8_peek(re); - re = _rx_utf8_next(re); + const uint32_t chr = utf8_peek(re); + re = utf8_next(re); cregex_node *next = cur + 1; switch (chr) { @@ -908,13 +829,13 @@ STC_DEF cregex cregex_new(const char *re) { cregex ret = {NULL}; - clear_compile_exception(); + _rx_clear_compile_exception(); if (re == NULL) { _rx_CompileException.err = cregex_INVALID_PARAMS; return ret; } - if (!cregex_valid_utf8(re)) { + if (!utf8_is_valid(re)) { _rx_CompileException.err = cregex_INVALID_UTF8; _rx_CompileException.s = NULL; return ret; @@ -952,7 +873,7 @@ STC_DEF bool cregex_find(cregex re, const char *s, cregex_match *m) m->start = SIZE_MAX; m->end = SIZE_MAX; - for (const char *tmp_s = s; *tmp_s; tmp_s = _rx_utf8_next(tmp_s)) { + for (const char *tmp_s = s; *tmp_s; tmp_s = utf8_next(tmp_s)) { const char *next = NULL; if (_rx_is_match(re.nodes, s, tmp_s, &next)) { m->start = tmp_s - s; diff --git a/include/stc/cstr.h b/include/stc/cstr.h index c0c78fd2..7ba20d10 100644 --- a/include/stc/cstr.h +++ b/include/stc/cstr.h @@ -20,11 +20,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "ccommon.h"
-
#ifndef CSTR_H_INCLUDED
#define CSTR_H_INCLUDED
+#include "ccommon.h"
#include <stdlib.h> /* malloc */
#include <string.h>
#include <stdarg.h>
@@ -166,6 +165,14 @@ cstr_ends_with(cstr s, const char* sub) { return n <= sz && !memcmp(s.str + sz - n, sub, n);
}
+STC_INLINE int
+c_strncasecmp(const char* s1, const char* s2, size_t nmax) {
+ int ret = 0;
+ while (nmax-- && (ret = tolower(*s1++) - tolower(*s2)) == 0 && *s2++)
+ ;
+ return ret;
+}
+
/* container adaptor functions: */
#define cstr_cmp(xp, yp) strcmp((xp)->str, (yp)->str)
#define cstr_eq(xp, yp) (!cstr_cmp(xp, yp))
@@ -361,24 +368,6 @@ cstr_find_n(cstr s, const char* needle, const size_t pos, const size_t nmax) { return res ? res - s.str : cstr_npos;
}
-STC_DEF int
-c_strncasecmp(const char* s1, const char* s2, size_t nmax) {
- int ret = 0;
- while (nmax-- && (ret = tolower(*s1++) - tolower(*s2)) == 0 && *s2++) ;
- return ret;
-}
-
-STC_DEF char*
-c_strnstrn(const char *s, const char *needle, size_t slen, const size_t nlen) {
- if (!nlen) return (char *)s;
- if (nlen > slen) return NULL;
- slen -= nlen;
- do {
- if (*s == *needle && !memcmp(s, needle, nlen)) return (char *)s;
- ++s;
- } while (slen--);
- return NULL;
-}
#endif
#endif
#undef i_opt
diff --git a/include/stc/csview.h b/include/stc/csview.h index d6492649..d6b0d7db 100644 --- a/include/stc/csview.h +++ b/include/stc/csview.h @@ -23,18 +23,17 @@ #ifndef CSVIEW_H_INCLUDED
#define CSVIEW_H_INCLUDED
-#include "cstr.h"
+#include "ccommon.h"
typedef struct csview { const char* str; size_t size; } csview;
typedef struct csview_iter { const char *ref; } csview_iter;
typedef char csview_value;
#define csview_null c_make(csview){"", 0}
-#define csview_npos cstr_npos
-#define c_svfmt "%.*s"
-#define c_svarg(sv) (int)(sv).size, (sv).str
+#define csview_npos (SIZE_MAX >> 1)
+#define c_PRIsv "%.*s"
+#define c_ARGsv(sv) (int)(sv).size, (sv).str
#define c_sv(literal) csview_new(literal)
-#define cstr_sv(s) csview_from_s(s)
STC_API csview csview_substr(csview sv, intptr_t pos, size_t n);
STC_API csview csview_slice(csview sv, intptr_t p1, intptr_t p2);
@@ -46,8 +45,6 @@ STC_INLINE csview csview_from(const char* str) { return c_make(csview){str, strlen(str)}; }
STC_INLINE csview csview_from_n(const char* str, size_t n)
{ return c_make(csview){str, n}; }
-STC_INLINE csview csview_from_s(cstr s)
- { return c_make(csview){s.str, _cstr_rep(&s)->size}; }
STC_INLINE size_t csview_size(csview sv) { return sv.size; }
STC_INLINE size_t csview_length(csview sv) { return sv.size; }
STC_INLINE bool csview_empty(csview sv) { return sv.size == 0; }
@@ -60,7 +57,7 @@ STC_INLINE bool csview_equals(csview sv, csview sv2) { return sv.size == sv2.size && !memcmp(sv.str, sv2.str, sv.size); }
STC_INLINE size_t csview_find(csview sv, csview needle)
{ char* res = c_strnstrn(sv.str, needle.str, sv.size, needle.size);
- return res ? res - sv.str : cstr_npos; }
+ return res ? res - sv.str : csview_npos; }
STC_INLINE bool csview_contains(csview sv, csview needle)
{ return c_strnstrn(sv.str, needle.str, sv.size, needle.size) != NULL; }
STC_INLINE bool csview_starts_with(csview sv, csview sub)
@@ -75,42 +72,6 @@ STC_INLINE csview_iter csview_end(const csview* self) { return c_make(csview_iter){self->str + self->size}; }
STC_INLINE void csview_next(csview_iter* it) { ++it->ref; }
-
-/* cstr interaction with csview: */
-
-STC_INLINE cstr cstr_from_v(csview sv)
- { return cstr_from_n(sv.str, sv.size); }
-STC_INLINE cstr cstr_from_replace_all_v(csview sv, csview find, csview repl)
- { return cstr_from_replace_all(sv.str, sv.size, find.str, find.size,
- repl.str, repl.size); }
-STC_INLINE csview cstr_to_v(const cstr* self)
- { return c_make(csview){self->str, _cstr_rep(self)->size}; }
-STC_INLINE csview cstr_substr(cstr s, intptr_t pos, size_t n)
- { return csview_substr(csview_from_s(s), pos, n); }
-STC_INLINE csview cstr_slice(cstr s, intptr_t p1, intptr_t p2)
- { return csview_slice(csview_from_s(s), p1, p2); }
-STC_INLINE cstr* cstr_assign_v(cstr* self, csview sv)
- { return cstr_assign_n(self, sv.str, sv.size); }
-STC_INLINE cstr* cstr_append_v(cstr* self, csview sv)
- { return cstr_append_n(self, sv.str, sv.size); }
-STC_INLINE void cstr_insert_v(cstr* self, size_t pos, csview sv)
- { cstr_replace_n(self, pos, 0, sv.str, sv.size); }
-STC_INLINE void cstr_replace_v(cstr* self, size_t pos, size_t len, csview sv)
- { cstr_replace_n(self, pos, len, sv.str, sv.size); }
-STC_INLINE bool cstr_equals_v(cstr s, csview sv)
- { return sv.size == cstr_size(s) && !memcmp(s.str, sv.str, sv.size); }
-STC_INLINE size_t cstr_find_v(cstr s, csview needle)
- { char* res = c_strnstrn(s.str, needle.str, cstr_size(s), needle.size);
- return res ? res - s.str : cstr_npos; }
-STC_INLINE bool cstr_contains_v(cstr s, csview needle)
- { return c_strnstrn(s.str, needle.str, cstr_size(s), needle.size) != NULL; }
-STC_INLINE bool cstr_starts_with_v(cstr s, csview sub)
- { if (sub.size > cstr_size(s)) return false;
- return !memcmp(s.str, sub.str, sub.size); }
-STC_INLINE bool cstr_ends_with_v(cstr s, csview sub)
- { if (sub.size > cstr_size(s)) return false;
- return !memcmp(s.str + cstr_size(s) - sub.size, sub.str, sub.size); }
-
/* ---- Container helper functions ---- */
STC_INLINE int csview_cmp(const csview* x, const csview* y) {
diff --git a/include/stc/strings.h b/include/stc/strings.h new file mode 100644 index 00000000..20fbfe5d --- /dev/null +++ b/include/stc/strings.h @@ -0,0 +1,47 @@ +#ifndef STC_STRINGS_INCLUDED
+#define STC_STRINGS_INCLUDED
+
+#include "cstr.h"
+#include "csview.h"
+#include <ctype.h>
+
+/* cstr interaction with csview: */
+
+STC_INLINE csview csview_from_s(cstr s)
+ { return c_make(csview){s.str, _cstr_rep(&s)->size}; }
+
+STC_INLINE cstr cstr_from_v(csview sv)
+ { return cstr_from_n(sv.str, sv.size); }
+STC_INLINE cstr cstr_from_replace_all_v(csview sv, csview find, csview repl)
+ { return cstr_from_replace_all(sv.str, sv.size, find.str, find.size,
+ repl.str, repl.size); }
+STC_INLINE csview cstr_to_v(const cstr* self)
+ { return c_make(csview){self->str, _cstr_rep(self)->size}; }
+STC_INLINE csview cstr_substr(cstr s, intptr_t pos, size_t n)
+ { return csview_substr(csview_from_s(s), pos, n); }
+STC_INLINE csview cstr_slice(cstr s, intptr_t p1, intptr_t p2)
+ { return csview_slice(csview_from_s(s), p1, p2); }
+STC_INLINE cstr* cstr_assign_v(cstr* self, csview sv)
+ { return cstr_assign_n(self, sv.str, sv.size); }
+STC_INLINE cstr* cstr_append_v(cstr* self, csview sv)
+ { return cstr_append_n(self, sv.str, sv.size); }
+STC_INLINE void cstr_insert_v(cstr* self, size_t pos, csview sv)
+ { cstr_replace_n(self, pos, 0, sv.str, sv.size); }
+STC_INLINE void cstr_replace_v(cstr* self, size_t pos, size_t len, csview sv)
+ { cstr_replace_n(self, pos, len, sv.str, sv.size); }
+STC_INLINE bool cstr_equals_v(cstr s, csview sv)
+ { return sv.size == cstr_size(s) && !memcmp(s.str, sv.str, sv.size); }
+STC_INLINE size_t cstr_find_v(cstr s, csview needle)
+ { char* res = c_strnstrn(s.str, needle.str, cstr_size(s), needle.size);
+ return res ? res - s.str : cstr_npos; }
+STC_INLINE bool cstr_contains_v(cstr s, csview needle)
+ { return c_strnstrn(s.str, needle.str, cstr_size(s), needle.size) != NULL; }
+STC_INLINE bool cstr_starts_with_v(cstr s, csview sub)
+ { if (sub.size > cstr_size(s)) return false;
+ return !memcmp(s.str, sub.str, sub.size); }
+STC_INLINE bool cstr_ends_with_v(cstr s, csview sub)
+ { if (sub.size > cstr_size(s)) return false;
+ return !memcmp(s.str + cstr_size(s) - sub.size, sub.str, sub.size); }
+
+#endif
+#undef i_opt
diff --git a/include/stc/utf8.h b/include/stc/utf8.h new file mode 100644 index 00000000..77b86a18 --- /dev/null +++ b/include/stc/utf8.h @@ -0,0 +1,99 @@ +#ifndef STC_UTF8_INCLUDED
+#define STC_UTF8_INCLUDED
+
+#include "ccommon.h"
+#include <ctype.h>
+
+enum utf8_state {
+ utf8_ACCEPT = 0,
+ utf8_REJECT = 12
+};
+
+STC_API uint32_t utf8_decode(uint32_t *state, uint32_t *codep, const uint32_t byte);
+STC_API bool utf8_valid_codepoints(const uint8_t *s, size_t *count);
+
+STC_INLINE bool utf8_is_valid(const char *s)
+{
+ size_t count;
+ return utf8_valid_codepoints((const uint8_t *)s, &count);
+}
+
+STC_INLINE uint32_t utf8_peek(const char *s)
+{
+ uint32_t state = utf8_ACCEPT, codepoint;
+ utf8_decode(&state, &codepoint, (uint8_t)s[0]);
+ return codepoint;
+}
+
+STC_INLINE int utf8_codepoint_width(uint8_t c)
+{
+ int ret = (c & 0xF0) == 0xE0;
+ ret += ret << 1; // 3
+ ret |= c < 0x80; // 1
+ ret |= ((0xC1 < c) & (c < 0xE0)) << 1; // 2
+ ret |= ((0xEF < c) & (c < 0xF5)) << 2; // 4
+ return ret;
+}
+
+STC_INLINE const char *utf8_next(const char *s)
+{
+ const char* t = s + utf8_codepoint_width((uint8_t)s[0]);
+
+ uintptr_t p = (uintptr_t)t;
+ p &= (uintptr_t) -(*s != 0);
+ return (const char *)p;
+}
+
+// assumes input is valid utf8! Use utf8_valid_codepoints() if unsure.
+STC_INLINE size_t utf8_size(const char *s)
+{
+ size_t count = 0;
+ while (*s)
+ s += utf8_codepoint_width((uint8_t)*s), ++count;
+ return count;
+}
+
+
+// --------------------------- IMPLEMENTATION ---------------------------------
+#ifdef _i_implement
+
+static const uint8_t utf8_table[] = {
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3,11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+ 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
+ 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
+ 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
+ 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
+ 12,36,12,12,12,12,12,12,12,12,12,12,
+};
+
+STC_DEF uint32_t utf8_decode(uint32_t *state, uint32_t *codep,
+ const uint32_t byte)
+{
+ const uint32_t type = utf8_table[byte];
+ const uint32_t x = (uint32_t) -(*state != utf8_ACCEPT);
+
+ *codep = (x & ((byte & 0x3fu) | (*codep << 6)))
+ | (~x & ((0xff >> type) & (byte)));
+
+ return *state = utf8_table[256 + *state + type];
+}
+
+STC_DEF bool utf8_valid_codepoints(const uint8_t *s, size_t *count)
+{
+ uint32_t state = utf8_ACCEPT, codepoint;
+
+ for (*count = 0; *s; ++s)
+ *count += utf8_decode(&state, &codepoint, *s) == utf8_ACCEPT;
+ return state == utf8_ACCEPT;
+}
+
+#endif
+#endif
+#undef i_opt
|
