summaryrefslogtreecommitdiffhomepage
path: root/include
diff options
context:
space:
mode:
authorTyge Løvset <[email protected]>2022-01-08 23:13:32 +0100
committerTyge Løvset <[email protected]>2022-01-08 23:13:32 +0100
commit63562f5135243ac2a2553b4e7360c59e86686d6f (patch)
tree439a3853a6bfded32a3031b577457bfba470f1a7 /include
parent0a9910eee6582e6ee414071a0d5e7062448989cf (diff)
downloadSTC-modified-63562f5135243ac2a2553b4e7360c59e86686d6f.tar.gz
STC-modified-63562f5135243ac2a2553b4e7360c59e86686d6f.zip
Moved utf8 from cregex.h to separate file. Splitted csview.h into another file strings.h.
Diffstat (limited to 'include')
-rw-r--r--include/stc/cbits.h3
-rw-r--r--include/stc/ccommon.h52
-rw-r--r--include/stc/cmap.h6
-rw-r--r--include/stc/cregex.h161
-rw-r--r--include/stc/cstr.h29
-rw-r--r--include/stc/csview.h49
-rw-r--r--include/stc/strings.h47
-rw-r--r--include/stc/utf8.h99
8 files changed, 237 insertions, 209 deletions
diff --git a/include/stc/cbits.h b/include/stc/cbits.h
index 0df7d765..6c2235e7 100644
--- a/include/stc/cbits.h
+++ b/include/stc/cbits.h
@@ -20,8 +20,6 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "ccommon.h"
-
#ifndef CBITS_H_INCLUDED
#define CBITS_H_INCLUDED
/*
@@ -53,6 +51,7 @@ int main() {
}
}
*/
+#include "ccommon.h"
#include <stdlib.h>
#include <string.h>
diff --git a/include/stc/ccommon.h b/include/stc/ccommon.h
index 22caf0ef..d58545e2 100644
--- a/include/stc/ccommon.h
+++ b/include/stc/ccommon.h
@@ -42,20 +42,16 @@
/* Macro overloading feature support based on: https://rextester.com/ONP80107 */
#define c_MACRO_OVERLOAD(name, ...) \
- c_PASTE3(name, _, c_NUM_ARGS(__VA_ARGS__))(__VA_ARGS__)
+ c_PASTE(name, c_NUM_ARGS(__VA_ARGS__))(__VA_ARGS__)
#define c_CONCAT(a, b) a ## b
#define c_PASTE(a, b) c_CONCAT(a, b)
-#define c_CONCAT3(a, b, c) a ## b ## c
-#define c_PASTE3(a, b, c) c_CONCAT3(a, b, c)
#define c_EXPAND(...) __VA_ARGS__
#define c_NUM_ARGS(...) _c_APPLY_ARG_N((__VA_ARGS__, _c_RSEQ_N))
#define _c_APPLY_ARG_N(args) c_EXPAND(_c_ARG_N args)
-#define _c_RSEQ_N 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, \
- 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+#define _c_RSEQ_N 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
#define _c_ARG_N(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, \
- _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, \
- _23, _24, _25, _26, _27, _28, _29, _30, N, ...) N
+ _13, _14, _15, N, ...) N
#define c_static_assert(cond) \
typedef char c_PASTE(_static_assert_line_, __LINE__)[(cond) ? 1 : -1]
@@ -102,7 +98,8 @@ typedef const char c_strlit[];
#define c_no_clone 4
#define c_no_cmp 8
#define c_static 16
-#define c_shared 32
+#define c_header 32
+#define c_implement 64
/* Generic algorithms */
@@ -134,13 +131,25 @@ STC_INLINE uint64_t c_hash64(const void* key, size_t len) {
return x*0xc6a4a7935bd1e99d;
}
+STC_INLINE char* c_strnstrn(const char *s, const char *needle, size_t slen, const size_t nlen) {
+ if (!nlen) return (char *)s;
+ if (nlen > slen) return NULL;
+ slen -= nlen;
+ do {
+ if (*s == *needle && !memcmp(s, needle, nlen))
+ return (char *)s;
+ ++s;
+ } while (slen--);
+ return NULL;
+}
+
#define c_foreach(...) c_MACRO_OVERLOAD(c_foreach, __VA_ARGS__)
-#define c_foreach_3(it, C, cnt) \
+#define c_foreach3(it, C, cnt) \
for (C##_iter it = C##_begin(&cnt), it##_end_ = C##_end(&cnt) \
; it.ref != it##_end_.ref; C##_next(&it))
-#define c_foreach_4(it, C, start, finish) \
+#define c_foreach4(it, C, start, finish) \
for (C##_iter it = start, it##_end_ = finish \
; it.ref != it##_end_.ref; C##_next(&it))
@@ -151,11 +160,11 @@ STC_INLINE uint64_t c_hash64(const void* key, size_t len) {
; C##_next(&_._it))
#define c_forrange(...) c_MACRO_OVERLOAD(c_forrange, __VA_ARGS__)
-#define c_forrange_1(stop) for (size_t _c_ii=0, _c_end=stop; _c_ii < _c_end; ++_c_ii)
-#define c_forrange_2(i, stop) for (size_t i=0, _c_end=stop; i < _c_end; ++i)
-#define c_forrange_3(i, type, stop) for (type i=0, _c_end=stop; i < _c_end; ++i)
-#define c_forrange_4(i, type, start, stop) for (type i=start, _c_end=stop; i < _c_end; ++i)
-#define c_forrange_5(i, type, start, stop, step) \
+#define c_forrange1(stop) for (size_t _c_ii=0, _c_end=stop; _c_ii < _c_end; ++_c_ii)
+#define c_forrange2(i, stop) for (size_t i=0, _c_end=stop; i < _c_end; ++i)
+#define c_forrange3(i, type, stop) for (type i=0, _c_end=stop; i < _c_end; ++i)
+#define c_forrange4(i, type, start, stop) for (type i=start, _c_end=stop; i < _c_end; ++i)
+#define c_forrange5(i, type, start, stop, step) \
for (type i=start, _c_inc=step, _c_end=(stop) - (0 < _c_inc) \
; (i <= _c_end) == (0 < _c_inc); i += _c_inc)
@@ -165,15 +174,15 @@ STC_INLINE uint64_t c_hash64(const void* key, size_t len) {
#define c_breakauto continue
#define c_auto(...) c_MACRO_OVERLOAD(c_auto, __VA_ARGS__)
-#define c_auto_2(C, a) \
+#define c_auto2(C, a) \
c_autovar(C a = C##_init(), C##_drop(&a))
-#define c_auto_3(C, a, b) \
+#define c_auto3(C, a, b) \
c_autovar(c_EXPAND(C a = C##_init(), b = C##_init()), \
C##_drop(&b), C##_drop(&a))
-#define c_auto_4(C, a, b, c) \
+#define c_auto4(C, a, b, c) \
c_autovar(c_EXPAND(C a = C##_init(), b = C##_init(), c = C##_init()), \
C##_drop(&c), C##_drop(&b), C##_drop(&a))
-#define c_auto_5(C, a, b, c, d) \
+#define c_auto5(C, a, b, c, d) \
c_autovar(c_EXPAND(C a = C##_init(), b = C##_init(), c = C##_init(), d = C##_init()), \
C##_drop(&d), C##_drop(&c), C##_drop(&b), C##_drop(&a))
@@ -224,7 +233,8 @@ STC_INLINE uint64_t c_hash64(const void* key, size_t len) {
#undef _i_static
#undef _i_implement
-#if (c_option(c_shared) || defined(STC_HEADER) || defined(STC_IMPLEMENTATION)) && !c_option(c_static)
+#if !c_option(c_static) && (c_option(c_header) || c_option(c_implement) || \
+ defined(STC_HEADER) || defined(STC_IMPLEMENTATION))
# define STC_API extern
# define STC_DEF
#else
@@ -232,6 +242,6 @@ STC_INLINE uint64_t c_hash64(const void* key, size_t len) {
# define STC_API static inline
# define STC_DEF static inline
#endif
-#if defined(STC_IMPLEMENTATION) ^ defined(_i_static)
+#if (c_option(c_implement) || defined(STC_IMPLEMENTATION)) ^ defined(_i_static)
# define _i_implement
#endif
diff --git a/include/stc/cmap.h b/include/stc/cmap.h
index 948613a4..d3281163 100644
--- a/include/stc/cmap.h
+++ b/include/stc/cmap.h
@@ -296,7 +296,8 @@ _cx_memb(_bucket_)(const _cx_self* self, const _cx_rawkey* rkeyptr) {
_cx_rawkey _raw = i_keyto(_i_keyref(self->table + b.idx));
if (i_eq(&_raw, rkeyptr)) break;
}
- if (++b.idx == _cap) b.idx = 0;
+ _cx_size _mask = (_cx_size) -(++b.idx != _cap);
+ b.idx &= _mask; // b.idx = (b.idx + 1) % _cap
}
return b;
}
@@ -367,7 +368,8 @@ _cx_memb(_erase_entry)(_cx_self* self, _cx_value* _val) {
uint8_t* _hashx = self->_hashx;
_cx_memb(_value_drop)(&_slot[i]);
for (;;) { /* delete without leaving tombstone */
- if (++j == _cap) j = 0;
+ _cx_size _mask = (_cx_size) -(++j != _cap);
+ j &= _mask;
if (! _hashx[j])
break;
_cx_rawkey _raw = i_keyto(_i_keyref(_slot + j));
diff --git a/include/stc/cregex.h b/include/stc/cregex.h
index eafefc4c..6259576e 100644
--- a/include/stc/cregex.h
+++ b/include/stc/cregex.h
@@ -22,11 +22,11 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "ccommon.h"
-
#ifndef CREGEX_INCLUDED
#define CREGEX_INCLUDED
+#include "ccommon.h"
+#include "utf8.h"
#include <stdlib.h>
#include <setjmp.h>
#include <stdarg.h>
@@ -56,9 +56,6 @@ typedef enum {
cregex_UNCLOSED_SUBEXPRESSION,
} cregex_error_t;
-/* check if a given string is valid utf8 */
-STC_API bool cregex_valid_utf8(const char *s);
-
/* create an empty expression */
STC_INLINE cregex cregex_init(void)
{ cregex re = {NULL}; return re; }
@@ -92,82 +89,6 @@ STC_API void cregex_drop(cregex *re);
/* -------------------------- IMPLEMENTATION ------------------------- */
#if defined(_i_implement)
-enum {
- _rx_UTF8_ACCEPT = 0,
- _rx_UTF8_REJECT = 1
-};
-
-static const uint8_t _rx_utf8d[] = {
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
- 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
- 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
- 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
- 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
- 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
- 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
- 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
- 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
-};
-
-static inline uint32_t _rx_utf8_decode(uint32_t *state, uint32_t *codep,
- const uint32_t byte)
-{
- const uint32_t type = _rx_utf8d[byte];
- const uint32_t x = (uint32_t) -(*state != _rx_UTF8_ACCEPT);
-
- *codep = (x & ((byte & 0x3fu) | (*codep << 6)))
- | (~x & ((0xff >> type) & (byte)));
-
- *state = _rx_utf8d[256 + (*state << 4) + type];
- return *state;
-}
-
-static bool _rx_utf8_count_codepoints(size_t *count, const uint8_t *s)
-{
- uint32_t state = _rx_UTF8_ACCEPT, codepoint;
-
- for (*count = 0; *s; ++s)
- *count += !_rx_utf8_decode(&state, &codepoint, *s);
- return state == _rx_UTF8_ACCEPT;
-}
-
-STC_DEF bool cregex_valid_utf8(const char *s)
-{
- size_t count;
- bool valid = _rx_utf8_count_codepoints(&count, (const uint8_t *)s);
- return valid;
-}
-
-static inline uint32_t _rx_utf8_peek(const char *s)
-{
- uint32_t state = _rx_UTF8_ACCEPT, codepoint;
- _rx_utf8_decode(&state, &codepoint, (uint8_t)s[0]);
- return codepoint;
-}
-
-static inline uint32_t _rx_utf8_char_width(uint8_t c)
-{
- uint32_t ret = ((c & 0xF0) == 0xE0);
- ret += (ret << 1); // 3
- ret |= (c < 0x80); // 1
- ret |= ((c & 0xE0) == 0xC0) << 1; // 2
- ret |= ((c & 0xF8) == 0xF0) << 2; // 4
- return ret;
-}
-
-static inline const char *_rx_utf8_next(const char *s)
-{
- const char* t = s + _rx_utf8_char_width((uint8_t)s[0]);
-
- uintptr_t p = (uintptr_t)t;
- p &= (uintptr_t) -(*s != 0);
- return (const char *)p;
-}
/* function pointer type used to evaluate if a regex node
* matched a given string */
@@ -243,8 +164,8 @@ static bool _rx_char_is_match(cregex_node *node, const char *orig, const char *c
return false;
}
- *next = _rx_utf8_next(cur);
- return node->chr.chr == _rx_utf8_peek(cur);
+ *next = utf8_next(cur);
+ return node->chr.chr == utf8_peek(cur);
}
static bool _rx_start_is_match(cregex_node *node, const char *orig, const char *cur,
@@ -272,7 +193,7 @@ static bool _rx_any_is_match(cregex_node *node, const char *orig, const char *cu
const char **next)
{
if (*cur) {
- *next = _rx_utf8_next(cur);
+ *next = utf8_next(cur);
return true;
}
@@ -305,8 +226,8 @@ static bool _rx_class_is_match(cregex_node *node, const char *orig, const char *
if (*cur == 0)
return false;
- const uint32_t chr = _rx_utf8_peek(cur);
- *next = _rx_utf8_next(cur);
+ const uint32_t chr = utf8_peek(cur);
+ *next = utf8_next(cur);
bool found = false;
for (_rx_RangeNode *range = cls->ranges; range != NULL;
@@ -362,7 +283,7 @@ struct {
} _rx_CompileException;
/* set global error value to the default value */
-static inline void clear_compile_exception(void)
+static inline void _rx_clear_compile_exception(void)
{
_rx_CompileException.err = cregex_OK;
_rx_CompileException.s = NULL;
@@ -381,8 +302,8 @@ static size_t _rx_calc_compiled_escaped_len(const char *s, const char **leftover
if (*s == 0)
_rx_throw_compile_exception(cregex_UNEXPECTED_EOL, s);
- const uint32_t chr = _rx_utf8_peek(s);
- *leftover = _rx_utf8_next(s);
+ const uint32_t chr = utf8_peek(s);
+ *leftover = utf8_next(s);
switch (chr) {
case 's':
@@ -409,7 +330,7 @@ static size_t _rx_calc_compiled_escaped_len(const char *s, const char **leftover
}
static const size_t _rx_calc_compiled_class_len(const char *s,
- const char **leftover)
+ const char **leftover)
{
if (*s == '^')
s++;
@@ -417,19 +338,19 @@ static const size_t _rx_calc_compiled_class_len(const char *s,
size_t ret = 1;
while (*s && *s != ']') {
- uint32_t chr = _rx_utf8_peek(s);
- s = _rx_utf8_next(s);
+ uint32_t chr = utf8_peek(s);
+ s = utf8_next(s);
if (chr == '\\') {
- s = _rx_utf8_next(s);
+ s = utf8_next(s);
}
if (*s == '-' && s[1] != ']') {
s++;
- chr = _rx_utf8_peek(s);
- s = _rx_utf8_next(s);
+ chr = utf8_peek(s);
+ s = utf8_next(s);
if (chr == '\\')
- s = _rx_utf8_next(s);
+ s = utf8_next(s);
}
ret++;
@@ -452,9 +373,9 @@ static const size_t _rx_calc_compiled_len(const char *s)
if (*s == 0) {
return 1;
} else {
- const uint32_t chr = _rx_utf8_peek(s);
+ const uint32_t chr = utf8_peek(s);
size_t ret = 0;
- s = _rx_utf8_next(s);
+ s = utf8_next(s);
switch (chr) {
case '{': {
@@ -515,12 +436,12 @@ static size_t _rx_parse_digit(const char *s, const char **leftover)
size_t ret = 0;
while (*s) {
- uint32_t chr = _rx_utf8_peek(s);
+ uint32_t chr = utf8_peek(s);
if (_rx_is_digit(chr)) {
ret *= 10;
ret += chr - '0';
- s = _rx_utf8_next(s);
+ s = utf8_next(s);
} else {
break;
}
@@ -538,7 +459,7 @@ static void _rx_parse_complex_quant(const char *re, const char **leftover,
if (*re == 0)
_rx_throw_compile_exception(cregex_INVALID_COMPLEX_QUANT, re);
- uint32_t tmp = _rx_utf8_peek(re);
+ uint32_t tmp = utf8_peek(re);
size_t min = 0, max = SIZE_MAX;
if (_rx_is_digit(tmp)) {
@@ -547,11 +468,11 @@ static void _rx_parse_complex_quant(const char *re, const char **leftover,
_rx_throw_compile_exception(cregex_INVALID_COMPLEX_QUANT, re);
}
- tmp = _rx_utf8_peek(re);
+ tmp = utf8_peek(re);
if (tmp == ',') {
- re = _rx_utf8_next(re);
- if (_rx_is_digit(_rx_utf8_peek(re)))
+ re = utf8_next(re);
+ if (_rx_is_digit(utf8_peek(re)))
max = _rx_parse_digit(re, &re);
else
max = SIZE_MAX;
@@ -559,7 +480,7 @@ static void _rx_parse_complex_quant(const char *re, const char **leftover,
max = min;
}
- tmp = _rx_utf8_peek(re);
+ tmp = utf8_peek(re);
if (tmp == '}') {
*leftover = re + 1;
*min_p = min;
@@ -614,8 +535,8 @@ static cregex_node *_rx_compile_next_escaped(const char *re, const char **leftov
if (*re == 0)
_rx_throw_compile_exception(cregex_UNEXPECTED_EOL, re);
- const uint32_t chr = _rx_utf8_peek(re);
- *leftover = _rx_utf8_next(re);
+ const uint32_t chr = utf8_peek(re);
+ *leftover = utf8_next(re);
cregex_node *ret = cur + 1;
switch (chr) {
@@ -694,21 +615,21 @@ static cregex_node *_rx_compile_next_complex_class(const char *re,
while (*re && *re != ']') {
uint32_t first = 0, last = 0;
- first = _rx_utf8_peek(re);
- re = _rx_utf8_next(re);
+ first = utf8_peek(re);
+ re = utf8_next(re);
if (first == '\\') {
if (*re == 0)
_rx_throw_compile_exception(
cregex_INVALID_COMPLEX_CLASS, re);
- first = _rx_utf8_peek(re);
- re = _rx_utf8_next(re);
+ first = utf8_peek(re);
+ re = utf8_next(re);
}
if (*re == '-' && re[1] != ']' && re[1]) {
re++;
- last = _rx_utf8_peek(re);
- re = _rx_utf8_next(re);
+ last = utf8_peek(re);
+ re = utf8_next(re);
if (last == '\\') {
if (*re == 0)
@@ -716,8 +637,8 @@ static cregex_node *_rx_compile_next_complex_class(const char *re,
cregex_INVALID_COMPLEX_CLASS,
re);
- last = _rx_utf8_peek(re);
- re = _rx_utf8_next(re);
+ last = utf8_peek(re);
+ re = utf8_next(re);
}
} else {
last = first;
@@ -813,8 +734,8 @@ static cregex_node *_rx_compile_next(const char *re, const char **leftover,
if (*re == 0)
return NULL;
- const uint32_t chr = _rx_utf8_peek(re);
- re = _rx_utf8_next(re);
+ const uint32_t chr = utf8_peek(re);
+ re = utf8_next(re);
cregex_node *next = cur + 1;
switch (chr) {
@@ -908,13 +829,13 @@ STC_DEF cregex cregex_new(const char *re)
{
cregex ret = {NULL};
- clear_compile_exception();
+ _rx_clear_compile_exception();
if (re == NULL) {
_rx_CompileException.err = cregex_INVALID_PARAMS;
return ret;
}
- if (!cregex_valid_utf8(re)) {
+ if (!utf8_is_valid(re)) {
_rx_CompileException.err = cregex_INVALID_UTF8;
_rx_CompileException.s = NULL;
return ret;
@@ -952,7 +873,7 @@ STC_DEF bool cregex_find(cregex re, const char *s, cregex_match *m)
m->start = SIZE_MAX;
m->end = SIZE_MAX;
- for (const char *tmp_s = s; *tmp_s; tmp_s = _rx_utf8_next(tmp_s)) {
+ for (const char *tmp_s = s; *tmp_s; tmp_s = utf8_next(tmp_s)) {
const char *next = NULL;
if (_rx_is_match(re.nodes, s, tmp_s, &next)) {
m->start = tmp_s - s;
diff --git a/include/stc/cstr.h b/include/stc/cstr.h
index c0c78fd2..7ba20d10 100644
--- a/include/stc/cstr.h
+++ b/include/stc/cstr.h
@@ -20,11 +20,10 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "ccommon.h"
-
#ifndef CSTR_H_INCLUDED
#define CSTR_H_INCLUDED
+#include "ccommon.h"
#include <stdlib.h> /* malloc */
#include <string.h>
#include <stdarg.h>
@@ -166,6 +165,14 @@ cstr_ends_with(cstr s, const char* sub) {
return n <= sz && !memcmp(s.str + sz - n, sub, n);
}
+STC_INLINE int
+c_strncasecmp(const char* s1, const char* s2, size_t nmax) {
+ int ret = 0;
+ while (nmax-- && (ret = tolower(*s1++) - tolower(*s2)) == 0 && *s2++)
+ ;
+ return ret;
+}
+
/* container adaptor functions: */
#define cstr_cmp(xp, yp) strcmp((xp)->str, (yp)->str)
#define cstr_eq(xp, yp) (!cstr_cmp(xp, yp))
@@ -361,24 +368,6 @@ cstr_find_n(cstr s, const char* needle, const size_t pos, const size_t nmax) {
return res ? res - s.str : cstr_npos;
}
-STC_DEF int
-c_strncasecmp(const char* s1, const char* s2, size_t nmax) {
- int ret = 0;
- while (nmax-- && (ret = tolower(*s1++) - tolower(*s2)) == 0 && *s2++) ;
- return ret;
-}
-
-STC_DEF char*
-c_strnstrn(const char *s, const char *needle, size_t slen, const size_t nlen) {
- if (!nlen) return (char *)s;
- if (nlen > slen) return NULL;
- slen -= nlen;
- do {
- if (*s == *needle && !memcmp(s, needle, nlen)) return (char *)s;
- ++s;
- } while (slen--);
- return NULL;
-}
#endif
#endif
#undef i_opt
diff --git a/include/stc/csview.h b/include/stc/csview.h
index d6492649..d6b0d7db 100644
--- a/include/stc/csview.h
+++ b/include/stc/csview.h
@@ -23,18 +23,17 @@
#ifndef CSVIEW_H_INCLUDED
#define CSVIEW_H_INCLUDED
-#include "cstr.h"
+#include "ccommon.h"
typedef struct csview { const char* str; size_t size; } csview;
typedef struct csview_iter { const char *ref; } csview_iter;
typedef char csview_value;
#define csview_null c_make(csview){"", 0}
-#define csview_npos cstr_npos
-#define c_svfmt "%.*s"
-#define c_svarg(sv) (int)(sv).size, (sv).str
+#define csview_npos (SIZE_MAX >> 1)
+#define c_PRIsv "%.*s"
+#define c_ARGsv(sv) (int)(sv).size, (sv).str
#define c_sv(literal) csview_new(literal)
-#define cstr_sv(s) csview_from_s(s)
STC_API csview csview_substr(csview sv, intptr_t pos, size_t n);
STC_API csview csview_slice(csview sv, intptr_t p1, intptr_t p2);
@@ -46,8 +45,6 @@ STC_INLINE csview csview_from(const char* str)
{ return c_make(csview){str, strlen(str)}; }
STC_INLINE csview csview_from_n(const char* str, size_t n)
{ return c_make(csview){str, n}; }
-STC_INLINE csview csview_from_s(cstr s)
- { return c_make(csview){s.str, _cstr_rep(&s)->size}; }
STC_INLINE size_t csview_size(csview sv) { return sv.size; }
STC_INLINE size_t csview_length(csview sv) { return sv.size; }
STC_INLINE bool csview_empty(csview sv) { return sv.size == 0; }
@@ -60,7 +57,7 @@ STC_INLINE bool csview_equals(csview sv, csview sv2)
{ return sv.size == sv2.size && !memcmp(sv.str, sv2.str, sv.size); }
STC_INLINE size_t csview_find(csview sv, csview needle)
{ char* res = c_strnstrn(sv.str, needle.str, sv.size, needle.size);
- return res ? res - sv.str : cstr_npos; }
+ return res ? res - sv.str : csview_npos; }
STC_INLINE bool csview_contains(csview sv, csview needle)
{ return c_strnstrn(sv.str, needle.str, sv.size, needle.size) != NULL; }
STC_INLINE bool csview_starts_with(csview sv, csview sub)
@@ -75,42 +72,6 @@ STC_INLINE csview_iter csview_end(const csview* self)
{ return c_make(csview_iter){self->str + self->size}; }
STC_INLINE void csview_next(csview_iter* it) { ++it->ref; }
-
-/* cstr interaction with csview: */
-
-STC_INLINE cstr cstr_from_v(csview sv)
- { return cstr_from_n(sv.str, sv.size); }
-STC_INLINE cstr cstr_from_replace_all_v(csview sv, csview find, csview repl)
- { return cstr_from_replace_all(sv.str, sv.size, find.str, find.size,
- repl.str, repl.size); }
-STC_INLINE csview cstr_to_v(const cstr* self)
- { return c_make(csview){self->str, _cstr_rep(self)->size}; }
-STC_INLINE csview cstr_substr(cstr s, intptr_t pos, size_t n)
- { return csview_substr(csview_from_s(s), pos, n); }
-STC_INLINE csview cstr_slice(cstr s, intptr_t p1, intptr_t p2)
- { return csview_slice(csview_from_s(s), p1, p2); }
-STC_INLINE cstr* cstr_assign_v(cstr* self, csview sv)
- { return cstr_assign_n(self, sv.str, sv.size); }
-STC_INLINE cstr* cstr_append_v(cstr* self, csview sv)
- { return cstr_append_n(self, sv.str, sv.size); }
-STC_INLINE void cstr_insert_v(cstr* self, size_t pos, csview sv)
- { cstr_replace_n(self, pos, 0, sv.str, sv.size); }
-STC_INLINE void cstr_replace_v(cstr* self, size_t pos, size_t len, csview sv)
- { cstr_replace_n(self, pos, len, sv.str, sv.size); }
-STC_INLINE bool cstr_equals_v(cstr s, csview sv)
- { return sv.size == cstr_size(s) && !memcmp(s.str, sv.str, sv.size); }
-STC_INLINE size_t cstr_find_v(cstr s, csview needle)
- { char* res = c_strnstrn(s.str, needle.str, cstr_size(s), needle.size);
- return res ? res - s.str : cstr_npos; }
-STC_INLINE bool cstr_contains_v(cstr s, csview needle)
- { return c_strnstrn(s.str, needle.str, cstr_size(s), needle.size) != NULL; }
-STC_INLINE bool cstr_starts_with_v(cstr s, csview sub)
- { if (sub.size > cstr_size(s)) return false;
- return !memcmp(s.str, sub.str, sub.size); }
-STC_INLINE bool cstr_ends_with_v(cstr s, csview sub)
- { if (sub.size > cstr_size(s)) return false;
- return !memcmp(s.str + cstr_size(s) - sub.size, sub.str, sub.size); }
-
/* ---- Container helper functions ---- */
STC_INLINE int csview_cmp(const csview* x, const csview* y) {
diff --git a/include/stc/strings.h b/include/stc/strings.h
new file mode 100644
index 00000000..20fbfe5d
--- /dev/null
+++ b/include/stc/strings.h
@@ -0,0 +1,47 @@
+#ifndef STC_STRINGS_INCLUDED
+#define STC_STRINGS_INCLUDED
+
+#include "cstr.h"
+#include "csview.h"
+#include <ctype.h>
+
+/* cstr interaction with csview: */
+
+STC_INLINE csview csview_from_s(cstr s)
+ { return c_make(csview){s.str, _cstr_rep(&s)->size}; }
+
+STC_INLINE cstr cstr_from_v(csview sv)
+ { return cstr_from_n(sv.str, sv.size); }
+STC_INLINE cstr cstr_from_replace_all_v(csview sv, csview find, csview repl)
+ { return cstr_from_replace_all(sv.str, sv.size, find.str, find.size,
+ repl.str, repl.size); }
+STC_INLINE csview cstr_to_v(const cstr* self)
+ { return c_make(csview){self->str, _cstr_rep(self)->size}; }
+STC_INLINE csview cstr_substr(cstr s, intptr_t pos, size_t n)
+ { return csview_substr(csview_from_s(s), pos, n); }
+STC_INLINE csview cstr_slice(cstr s, intptr_t p1, intptr_t p2)
+ { return csview_slice(csview_from_s(s), p1, p2); }
+STC_INLINE cstr* cstr_assign_v(cstr* self, csview sv)
+ { return cstr_assign_n(self, sv.str, sv.size); }
+STC_INLINE cstr* cstr_append_v(cstr* self, csview sv)
+ { return cstr_append_n(self, sv.str, sv.size); }
+STC_INLINE void cstr_insert_v(cstr* self, size_t pos, csview sv)
+ { cstr_replace_n(self, pos, 0, sv.str, sv.size); }
+STC_INLINE void cstr_replace_v(cstr* self, size_t pos, size_t len, csview sv)
+ { cstr_replace_n(self, pos, len, sv.str, sv.size); }
+STC_INLINE bool cstr_equals_v(cstr s, csview sv)
+ { return sv.size == cstr_size(s) && !memcmp(s.str, sv.str, sv.size); }
+STC_INLINE size_t cstr_find_v(cstr s, csview needle)
+ { char* res = c_strnstrn(s.str, needle.str, cstr_size(s), needle.size);
+ return res ? res - s.str : cstr_npos; }
+STC_INLINE bool cstr_contains_v(cstr s, csview needle)
+ { return c_strnstrn(s.str, needle.str, cstr_size(s), needle.size) != NULL; }
+STC_INLINE bool cstr_starts_with_v(cstr s, csview sub)
+ { if (sub.size > cstr_size(s)) return false;
+ return !memcmp(s.str, sub.str, sub.size); }
+STC_INLINE bool cstr_ends_with_v(cstr s, csview sub)
+ { if (sub.size > cstr_size(s)) return false;
+ return !memcmp(s.str + cstr_size(s) - sub.size, sub.str, sub.size); }
+
+#endif
+#undef i_opt
diff --git a/include/stc/utf8.h b/include/stc/utf8.h
new file mode 100644
index 00000000..77b86a18
--- /dev/null
+++ b/include/stc/utf8.h
@@ -0,0 +1,99 @@
+#ifndef STC_UTF8_INCLUDED
+#define STC_UTF8_INCLUDED
+
+#include "ccommon.h"
+#include <ctype.h>
+
+enum utf8_state {
+ utf8_ACCEPT = 0,
+ utf8_REJECT = 12
+};
+
+STC_API uint32_t utf8_decode(uint32_t *state, uint32_t *codep, const uint32_t byte);
+STC_API bool utf8_valid_codepoints(const uint8_t *s, size_t *count);
+
+STC_INLINE bool utf8_is_valid(const char *s)
+{
+ size_t count;
+ return utf8_valid_codepoints((const uint8_t *)s, &count);
+}
+
+STC_INLINE uint32_t utf8_peek(const char *s)
+{
+ uint32_t state = utf8_ACCEPT, codepoint;
+ utf8_decode(&state, &codepoint, (uint8_t)s[0]);
+ return codepoint;
+}
+
+STC_INLINE int utf8_codepoint_width(uint8_t c)
+{
+ int ret = (c & 0xF0) == 0xE0;
+ ret += ret << 1; // 3
+ ret |= c < 0x80; // 1
+ ret |= ((0xC1 < c) & (c < 0xE0)) << 1; // 2
+ ret |= ((0xEF < c) & (c < 0xF5)) << 2; // 4
+ return ret;
+}
+
+STC_INLINE const char *utf8_next(const char *s)
+{
+ const char* t = s + utf8_codepoint_width((uint8_t)s[0]);
+
+ uintptr_t p = (uintptr_t)t;
+ p &= (uintptr_t) -(*s != 0);
+ return (const char *)p;
+}
+
+// assumes input is valid utf8! Use utf8_valid_codepoints() if unsure.
+STC_INLINE size_t utf8_size(const char *s)
+{
+ size_t count = 0;
+ while (*s)
+ s += utf8_codepoint_width((uint8_t)*s), ++count;
+ return count;
+}
+
+
+// --------------------------- IMPLEMENTATION ---------------------------------
+#ifdef _i_implement
+
+static const uint8_t utf8_table[] = {
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3,11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+ 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
+ 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
+ 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
+ 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
+ 12,36,12,12,12,12,12,12,12,12,12,12,
+};
+
+STC_DEF uint32_t utf8_decode(uint32_t *state, uint32_t *codep,
+ const uint32_t byte)
+{
+ const uint32_t type = utf8_table[byte];
+ const uint32_t x = (uint32_t) -(*state != utf8_ACCEPT);
+
+ *codep = (x & ((byte & 0x3fu) | (*codep << 6)))
+ | (~x & ((0xff >> type) & (byte)));
+
+ return *state = utf8_table[256 + *state + type];
+}
+
+STC_DEF bool utf8_valid_codepoints(const uint8_t *s, size_t *count)
+{
+ uint32_t state = utf8_ACCEPT, codepoint;
+
+ for (*count = 0; *s; ++s)
+ *count += utf8_decode(&state, &codepoint, *s) == utf8_ACCEPT;
+ return state == utf8_ACCEPT;
+}
+
+#endif
+#endif
+#undef i_opt