diff options
| author | Tyge Lovset <[email protected]> | 2022-05-30 10:17:07 +0200 |
|---|---|---|
| committer | Tyge Lovset <[email protected]> | 2022-05-30 10:17:07 +0200 |
| commit | b28d3fa7c3b9233ca485014744bf84e6c4f5a1d3 (patch) | |
| tree | 8c97999b1ede5e0cf45c94b2035e94b0734dff1c /include | |
| parent | 831dc0843aeedcb45138a6ed576ea03f2dcd58f8 (diff) | |
| download | STC-modified-b28d3fa7c3b9233ca485014744bf84e6c4f5a1d3.tar.gz STC-modified-b28d3fa7c3b9233ca485014744bf84e6c4f5a1d3.zip | |
Large refactoring on strings / utf8 and some file structure.
Diffstat (limited to 'include')
| -rw-r--r-- | include/stc/alt/cstr.h | 4 | ||||
| -rw-r--r-- | include/stc/cbits.h | 3 | ||||
| -rw-r--r-- | include/stc/ccommon.h | 14 | ||||
| -rw-r--r-- | include/stc/cstr.h | 31 | ||||
| -rw-r--r-- | include/stc/csview.h | 170 | ||||
| -rw-r--r-- | include/stc/template.h | 11 | ||||
| -rw-r--r-- | include/stc/utf8.h | 113 |
7 files changed, 162 insertions, 184 deletions
diff --git a/include/stc/alt/cstr.h b/include/stc/alt/cstr.h index a43c7cc4..0012d364 100644 --- a/include/stc/alt/cstr.h +++ b/include/stc/alt/cstr.h @@ -35,7 +35,7 @@ #define cstr_npos (SIZE_MAX >> 1)
typedef struct { size_t size, cap; char chr[1]; } cstr_priv;
#define _cstr_p(self) c_unchecked_container_of((self)->str, cstr_priv, chr)
-#ifdef _i_static
+#ifdef i_static
static cstr_priv _cstr_nullrep = {0, 0, {0}};
static const cstr cstr_null = {_cstr_nullrep.chr};
#else
@@ -189,7 +189,7 @@ STC_INLINE uint64_t cstr_hash(const cstr *self) { /* -------------------------- IMPLEMENTATION ------------------------- */
#if defined(i_implement)
-#ifndef _i_static
+#ifndef i_static
static cstr_priv _cstr_nullrep = {0, 0, {0}};
const cstr cstr_null = {_cstr_nullrep.chr};
#endif
diff --git a/include/stc/cbits.h b/include/stc/cbits.h index 4c104bac..e35e4225 100644 --- a/include/stc/cbits.h +++ b/include/stc/cbits.h @@ -321,3 +321,6 @@ STC_DEF bool _cbits_disjoint(const uint64_t* set, const uint64_t* other, const s #undef i_len
#undef i_type
#undef i_opt
+#undef i_header
+#undef i_implement
+#undef i_static
diff --git a/include/stc/ccommon.h b/include/stc/ccommon.h index 46d53bc1..f27190af 100644 --- a/include/stc/ccommon.h +++ b/include/stc/ccommon.h @@ -106,9 +106,6 @@ #define c_no_atomic (1<<1)
#define c_no_clone (1<<2)
#define c_no_cmp (1<<3)
-#define c_static (1<<4)
-#define c_header (1<<5)
-#define c_implement (1<<6)
/* Generic algorithms */
@@ -254,19 +251,16 @@ STC_INLINE char* c_strnstrn(const char *s, const char *needle, #undef STC_API
#undef STC_DEF
-#undef _i_static
-#undef i_implement
-#if !c_option(c_static) && (c_option(c_header|c_implement) || defined(STC_HEADER) || \
- defined(STC_IMPLEMENT) || defined(STC_IMPLEMENTATION))
+#if !defined i_static && (defined(i_header) || defined(i_implement) || \
+ defined(STC_HEADER) || defined(STC_IMPLEMENT))
# define STC_API extern
# define STC_DEF
#else
-# define _i_static
+# define i_static
# define STC_API static inline
# define STC_DEF static inline
#endif
-#if defined(_i_static) || c_option(c_implement) || defined(STC_IMPLEMENT) \
- || defined(STC_IMPLEMENTATION)
+#if defined(i_static) || defined(STC_IMPLEMENT)
# define i_implement
#endif
diff --git a/include/stc/cstr.h b/include/stc/cstr.h index 1d57437e..561bb1ca 100644 --- a/include/stc/cstr.h +++ b/include/stc/cstr.h @@ -169,7 +169,22 @@ STC_INLINE size_t cstr_length(cstr s) STC_INLINE size_t cstr_capacity(cstr s)
{ return cstr_is_long(&s) ? cstr_l_cap(&s) : cstr_s_cap; }
-// utf8:
+// utf8 methods defined in/depending on src/utf8utils.c:
+cstr cstr_tolower(const cstr* self);
+cstr cstr_toupper(const cstr* self);
+void cstr_lowercase(cstr* self);
+void cstr_uppercase(cstr* self);
+
+STC_INLINE bool cstr_valid_u8(const cstr* self)
+ { return utf8_valid(cstr_str(self)); }
+
+STC_INLINE utf8_decode_t cstr_peek(const cstr* self, size_t bytepos) {
+ utf8_decode_t d = {UTF8_OK};
+ utf8_peek(&d, cstr_str(self) + bytepos);
+ return d;
+}
+
+// other utf8
STC_INLINE size_t cstr_size_u8(cstr s)
{ return utf8_size(cstr_str(&s)); }
@@ -183,6 +198,7 @@ STC_INLINE csview cstr_at(const cstr* self, size_t bytepos) { sv.size = utf8_codep_size(sv.str);
return sv;
}
+
STC_INLINE csview cstr_at_u8(const cstr* self, size_t u8idx) {
csview sv = cstr_sv(self);
sv.str = utf8_at(sv.str, u8idx);
@@ -193,14 +209,7 @@ STC_INLINE csview cstr_at_u8(const cstr* self, size_t u8idx) { STC_INLINE size_t cstr_pos_u8(const cstr* self, size_t u8idx)
{ return utf8_pos(cstr_str(self), u8idx); }
-STC_INLINE bool cstr_valid_u8(const cstr* self)
- { return utf8_valid(cstr_str(self)); }
-
-STC_INLINE utf8_decode_t cstr_peek(const cstr* self, size_t bytepos) {
- utf8_decode_t d = {UTF8_OK};
- utf8_peek(cstr_str(self) + bytepos, &d);
- return d;
-}
+// utf8 iterator
STC_INLINE cstr_iter cstr_begin(const cstr* self) {
const char* str = cstr_str(self);
@@ -525,4 +534,8 @@ STC_DEF int cstr_printf(cstr* self, const char* fmt, ...) { #endif
#endif // CSTR_H_INCLUDED
#undef i_opt
+#undef i_header
+#undef i_static
+#undef i_implement
+//#undef i_implement
#endif // !STC_CSTR_V1
diff --git a/include/stc/csview.h b/include/stc/csview.h index 270a79f8..ba0a7568 100644 --- a/include/stc/csview.h +++ b/include/stc/csview.h @@ -27,49 +27,60 @@ #include "forward.h"
#include "utf8.h"
-#define csview_null c_sv("")
-#define csview_new(literal) c_sv(literal)
-#define csview_npos (SIZE_MAX >> 1)
-
-STC_API csview csview_substr(csview sv, intptr_t pos, size_t n);
-STC_API csview csview_slice(csview sv, intptr_t p1, intptr_t p2);
-STC_API csview csview_token(csview sv, csview sep, size_t* start);
-
-STC_INLINE csview csview_init() { return csview_null; }
-STC_INLINE csview csview_from(const char* str)
- { return c_make(csview){str, strlen(str)}; }
-STC_INLINE csview csview_from_n(const char* str, size_t n)
- { return c_make(csview){str, n}; }
-STC_INLINE size_t csview_size(csview sv) { return sv.size; }
-STC_INLINE size_t csview_length(csview sv) { return sv.size; }
-STC_INLINE bool csview_empty(csview sv) { return sv.size == 0; }
-STC_INLINE char csview_front(csview sv) { return sv.str[0]; }
-STC_INLINE char csview_back(csview sv) { return sv.str[sv.size - 1]; }
-
-STC_INLINE void csview_clear(csview* self) { *self = csview_null; }
-
-STC_INLINE bool csview_equals(csview sv, csview sv2)
- { return sv.size == sv2.size && !memcmp(sv.str, sv2.str, sv.size); }
-STC_INLINE size_t csview_find(csview sv, csview needle)
- { char* res = c_strnstrn(sv.str, needle.str, sv.size, needle.size);
- return res ? res - sv.str : csview_npos; }
-STC_INLINE bool csview_contains(csview sv, csview needle)
- { return c_strnstrn(sv.str, needle.str, sv.size, needle.size) != NULL; }
-STC_INLINE bool csview_starts_with(csview sv, csview sub)
- { if (sub.size > sv.size) return false;
- return !memcmp(sv.str, sub.str, sub.size); }
-STC_INLINE bool csview_ends_with(csview sv, csview sub)
- { if (sub.size > sv.size) return false;
- return !memcmp(sv.str + sv.size - sub.size, sub.str, sub.size); }
-STC_INLINE csview_iter csview_begin(const csview* self)
- { return c_make(csview_iter){.chr = {self->str, utf8_codep_size(self->str)}}; }
-STC_INLINE csview_iter csview_end(const csview* self)
- { return c_make(csview_iter){self->str + self->size}; }
-STC_INLINE void csview_next(csview_iter* it)
- { it->ref += it->chr.size; it->chr.size = utf8_codep_size(it->ref); }
+#define csview_null c_sv("")
+#define csview_new(literal) c_sv(literal)
+#define csview_npos (SIZE_MAX >> 1)
+
+STC_API csview csview_substr(csview sv, intptr_t pos, size_t n);
+STC_API csview csview_slice(csview sv, intptr_t p1, intptr_t p2);
+STC_API csview csview_token(csview sv, csview sep, size_t* start);
+
+STC_INLINE csview csview_init() { return csview_null; }
+STC_INLINE csview csview_from(const char* str)
+ { return c_make(csview){str, strlen(str)}; }
+STC_INLINE csview csview_from_n(const char* str, size_t n)
+ { return c_make(csview){str, n}; }
+STC_INLINE void csview_clear(csview* self) { *self = csview_null; }
+
+STC_INLINE size_t csview_size(csview sv) { return sv.size; }
+STC_INLINE size_t csview_length(csview sv) { return sv.size; }
+STC_INLINE bool csview_empty(csview sv) { return sv.size == 0; }
+STC_INLINE char csview_front(csview sv) { return sv.str[0]; }
+STC_INLINE char csview_back(csview sv) { return sv.str[sv.size - 1]; }
+
+STC_INLINE bool csview_equals(csview sv, csview sv2)
+ { return sv.size == sv2.size && !memcmp(sv.str, sv2.str, sv.size); }
+
+STC_INLINE size_t csview_find(csview sv, csview needle) {
+ char* res = c_strnstrn(sv.str, needle.str, sv.size, needle.size);
+ return res ? res - sv.str : csview_npos;
+}
+
+STC_INLINE bool csview_contains(csview sv, csview needle)
+ { return c_strnstrn(sv.str, needle.str, sv.size, needle.size) != NULL; }
+
+STC_INLINE bool csview_starts_with(csview sv, csview sub) {
+ if (sub.size > sv.size) return false;
+ return !memcmp(sv.str, sub.str, sub.size);
+}
+
+STC_INLINE bool csview_ends_with(csview sv, csview sub) {
+ if (sub.size > sv.size) return false;
+ return !memcmp(sv.str + sv.size - sub.size, sub.str, sub.size);
+}
+
+/* iterator */
+STC_INLINE csview_iter csview_begin(const csview* self)
+ { return c_make(csview_iter){.chr = {self->str, utf8_codep_size(self->str)}}; }
+
+STC_INLINE csview_iter csview_end(const csview* self)
+ { return c_make(csview_iter){self->str + self->size}; }
+
+STC_INLINE void csview_next(csview_iter* it)
+ { it->ref += it->chr.size; it->chr.size = utf8_codep_size(it->ref); }
/* utf8 */
-STC_INLINE bool csview_valid_u8(csview sv)
+STC_INLINE bool csview_valid_u8(csview sv) // depends on src/utf8utils.c
{ return utf8_valid_n(sv.str, sv.size); }
STC_INLINE size_t csview_size_u8(csview sv)
@@ -84,36 +95,50 @@ STC_INLINE csview csview_substr_u8(csview sv, size_t u8pos, size_t u8len) { /* csview interaction with cstr: */
#ifdef CSTR_H_INCLUDED
-STC_INLINE csview csview_from_s(const cstr* self)
- { return c_make(csview){cstr_str(self), cstr_size(*self)}; }
-
-STC_INLINE cstr cstr_from_sv(csview sv)
- { return cstr_from_n(sv.str, sv.size); }
-STC_INLINE csview cstr_substr(const cstr* self, intptr_t pos, size_t n)
- { return csview_substr(csview_from_s(self), pos, n); }
-STC_INLINE csview cstr_slice(const cstr* self, intptr_t p1, intptr_t p2)
- { return csview_slice(csview_from_s(self), p1, p2); }
-STC_INLINE csview cstr_assign_sv(cstr* self, csview sv)
- { return c_make(csview){cstr_assign_n(self, sv.str, sv.size), sv.size}; }
-STC_INLINE void cstr_append_sv(cstr* self, csview sv)
- { cstr_append_n(self, sv.str, sv.size); }
-STC_INLINE void cstr_insert_sv(cstr* self, size_t pos, csview sv)
- { cstr_replace_n(self, pos, 0, sv.str, sv.size); }
-STC_INLINE void cstr_replace_sv(cstr* self, csview sub, csview with)
- { cstr_replace_n(self, sub.str - cstr_str(self), sub.size, with.str, with.size); }
-STC_INLINE bool cstr_equals_sv(cstr s, csview sv)
- { return sv.size == cstr_size(s) && !memcmp(cstr_str(&s), sv.str, sv.size); }
-STC_INLINE size_t cstr_find_sv(cstr s, csview needle)
- { char* res = c_strnstrn(cstr_str(&s), needle.str, cstr_size(s), needle.size);
- return res ? res - cstr_str(&s) : cstr_npos; }
-STC_INLINE bool cstr_contains_sv(cstr s, csview needle)
- { return c_strnstrn(cstr_str(&s), needle.str, cstr_size(s), needle.size) != NULL; }
-STC_INLINE bool cstr_starts_with_sv(cstr s, csview sub)
- { if (sub.size > cstr_size(s)) return false;
- return !memcmp(cstr_str(&s), sub.str, sub.size); }
-STC_INLINE bool cstr_ends_with_sv(cstr s, csview sub)
- { if (sub.size > cstr_size(s)) return false;
- return !memcmp(cstr_str(&s) + cstr_size(s) - sub.size, sub.str, sub.size); }
+STC_INLINE csview csview_from_s(const cstr* self)
+ { return c_make(csview){cstr_str(self), cstr_size(*self)}; }
+
+STC_INLINE cstr cstr_from_sv(csview sv)
+ { return cstr_from_n(sv.str, sv.size); }
+
+STC_INLINE csview cstr_substr(const cstr* self, intptr_t pos, size_t n)
+ { return csview_substr(csview_from_s(self), pos, n); }
+
+STC_INLINE csview cstr_slice(const cstr* self, intptr_t p1, intptr_t p2)
+ { return csview_slice(csview_from_s(self), p1, p2); }
+
+STC_INLINE csview cstr_assign_sv(cstr* self, csview sv)
+ { return c_make(csview){cstr_assign_n(self, sv.str, sv.size), sv.size}; }
+
+STC_INLINE void cstr_append_sv(cstr* self, csview sv)
+ { cstr_append_n(self, sv.str, sv.size); }
+
+STC_INLINE void cstr_insert_sv(cstr* self, size_t pos, csview sv)
+ { cstr_replace_n(self, pos, 0, sv.str, sv.size); }
+
+STC_INLINE void cstr_replace_sv(cstr* self, csview sub, csview with)
+ { cstr_replace_n(self, sub.str - cstr_str(self), sub.size, with.str, with.size); }
+
+STC_INLINE bool cstr_equals_sv(cstr s, csview sv)
+ { return sv.size == cstr_size(s) && !memcmp(cstr_str(&s), sv.str, sv.size); }
+
+STC_INLINE size_t cstr_find_sv(cstr s, csview needle) {
+ char* res = c_strnstrn(cstr_str(&s), needle.str, cstr_size(s), needle.size);
+ return res ? res - cstr_str(&s) : cstr_npos;
+}
+
+STC_INLINE bool cstr_contains_sv(cstr s, csview needle)
+ { return c_strnstrn(cstr_str(&s), needle.str, cstr_size(s), needle.size) != NULL; }
+
+STC_INLINE bool cstr_starts_with_sv(cstr s, csview sub) {
+ if (sub.size > cstr_size(s)) return false;
+ return !memcmp(cstr_str(&s), sub.str, sub.size);
+}
+
+STC_INLINE bool cstr_ends_with_sv(cstr s, csview sub) {
+ if (sub.size > cstr_size(s)) return false;
+ return !memcmp(cstr_str(&s) + cstr_size(s) - sub.size, sub.str, sub.size);
+}
#endif
/* ---- Container helper functions ---- */
@@ -165,3 +190,6 @@ csview_token(csview sv, csview sep, size_t* start) { #endif
#endif
#undef i_opt
+#undef i_header
+#undef i_implement
+#undef i_static
diff --git a/include/stc/template.h b/include/stc/template.h index 4d2f0f58..db50a6ca 100644 --- a/include/stc/template.h +++ b/include/stc/template.h @@ -49,13 +49,6 @@ #define i_size uint32_t
#endif
-#if defined i_key_str || defined i_val_str || defined i_key_ssv || defined i_val_ssv
- #include "cstr.h"
- #if defined i_key_ssv || defined i_val_ssv
- #include "csview.h"
- #endif
-#endif
-
#if !(defined i_key || defined i_key_str || defined i_key_ssv || \
defined i_key_bind || defined i_key_arcbox)
#define _i_key_from_val
@@ -288,6 +281,10 @@ #undef i_keyto
#undef i_keydrop
+#undef i_header
+#undef i_implement
+#undef i_static
+
#undef _i_prefix
#undef _i_has_from
#undef _i_key_from_val
diff --git a/include/stc/utf8.h b/include/stc/utf8.h index f11af046..dffd9f15 100644 --- a/include/stc/utf8.h +++ b/include/stc/utf8.h @@ -25,12 +25,37 @@ int main() #include "ccommon.h"
#include <ctype.h>
+// utf8 methods defined in src/utf8utils.c:
+bool utf8_islower(uint32_t c);
+bool utf8_isupper(uint32_t c);
+bool utf8_isspace(uint32_t c);
+bool utf8_isdigit(uint32_t c);
+bool utf8_isxdigit(uint32_t c);
+bool utf8_isalpha(uint32_t c);
+bool utf8_isalnum(uint32_t c);
+uint32_t utf8_tolower(uint32_t c);
+uint32_t utf8_toupper(uint32_t c);
+bool utf8_valid(const char* s);
+bool utf8_valid_n(const char* s, size_t n);
+
+/* encode/decode next utf8 codepoint. */
enum { UTF8_OK = 0, UTF8_ERROR = 4 };
typedef struct { uint32_t state, codep, size; } utf8_decode_t;
-/* encode/decode next utf8 codepoint. */
-STC_API unsigned utf8_encode(char *out, uint32_t c);
-STC_API void utf8_decode(utf8_decode_t *d, const uint8_t b);
+void utf8_peek(utf8_decode_t* d, const char *s);
+unsigned utf8_encode(char *out, uint32_t c);
+void utf8_decode(utf8_decode_t *d, const uint8_t b);
+
+/* number of characters in the utf8 codepoint from s */
+STC_INLINE unsigned utf8_codep_size(const char *s) {
+ unsigned b = (uint8_t)*s;
+ if (b < 0x80) return 1;
+ if (b < 0xC2) return 0;
+ if (b < 0xE0) return 2;
+ if (b < 0xF0) return 3;
+ if (b < 0xF5) return 4;
+ return 0;
+}
/* number of codepoints in the utf8 string s */
STC_INLINE size_t utf8_size(const char *s) {
@@ -56,86 +81,4 @@ STC_INLINE const char* utf8_at(const char *s, size_t index) { STC_INLINE size_t utf8_pos(const char* s, size_t index)
{ return utf8_at(s, index) - s; }
-STC_INLINE void utf8_peek(const char *s, utf8_decode_t* d) {
- utf8_decode(d, (uint8_t)*s++);
- switch (d->size) {
- case 4: utf8_decode(d, (uint8_t)*s++);
- case 3: utf8_decode(d, (uint8_t)*s++);
- case 2: utf8_decode(d, (uint8_t)*s++);
- }
-}
-
-STC_INLINE unsigned utf8_codep_size(const char *s) {
- utf8_decode_t d = {UTF8_OK};
- utf8_decode(&d, (uint8_t)*s);
- return d.size;
-}
-
-STC_INLINE bool utf8_valid(const char* s) {
- utf8_decode_t d = {UTF8_OK};
- while (*s)
- utf8_decode(&d, (uint8_t)*s++);
- return d.state == UTF8_OK;
-}
-
-STC_INLINE bool utf8_valid_n(const char* s, size_t n) {
- utf8_decode_t d = {UTF8_OK};
- while ((n-- != 0) & (*s != 0))
- utf8_decode(&d, (uint8_t)*s++);
- return d.state == UTF8_OK;
-}
-
-// --------------------------- IMPLEMENTATION ---------------------------------
-#ifdef i_implement
-// https://news.ycombinator.com/item?id=15423674
-// https://gist.github.com/s4y/344a355f8c1f99c6a4cb2347ec4323cc
-
-STC_DEF void utf8_decode(utf8_decode_t *d, const uint8_t b)
-{
- switch (d->state) {
- case UTF8_OK:
- if (b < 0x80) d->codep = b, d->size = 1;
- else if (b < 0xC2) d->state = UTF8_ERROR, d->size = 0;
- else if (b < 0xE0) d->state = 1, d->codep = b & 0x1F, d->size = 2;
- else if (b < 0xF0) d->state = 2, d->codep = b & 0x0F, d->size = 3;
- else if (b < 0xF5) d->state = 3, d->codep = b & 0x07, d->size = 4;
- else d->state = UTF8_ERROR, d->size = 0;
- break;
- case 1: case 2: case 3:
- if ((b & 0xC0) == 0x80) {
- d->state -= 1;
- d->codep = (d->codep << 6) | (b & 0x3F);
- } else
- d->state = UTF8_ERROR, d->size = 0;
- }
-}
-
-STC_DEF unsigned utf8_encode(char *out, uint32_t c)
-{
- if (c < 0x80U) {
- out[0] = (char) c;
- return 1;
- } else if (c < 0x0800U) {
- out[0] = (char) ((c>>6 & 0x1F) | 0xC0);
- out[1] = (char) ((c & 0x3F) | 0x80);
- return 2;
- } else if (c < 0x010000U) {
- if ((c < 0xD800U) | (c >= 0xE000U)) {
- out[0] = (char) ((c>>12 & 0x0F) | 0xE0);
- out[1] = (char) ((c>>6 & 0x3F) | 0x80);
- out[2] = (char) ((c & 0x3F) | 0x80);
- return 3;
- }
- } else if (c < 0x110000U) {
- out[0] = (char) ((c>>18 & 0x07) | 0xF0);
- out[1] = (char) ((c>>12 & 0x3F) | 0x80);
- out[2] = (char) ((c>>6 & 0x3F) | 0x80);
- out[3] = (char) ((c & 0x3F) | 0x80);
- return 4;
- }
- return 0;
-}
-
-#endif
#endif
-#undef i_opt
|
