diff options
| author | tylov <[email protected]> | 2023-08-13 23:15:45 +0200 |
|---|---|---|
| committer | tylov <[email protected]> | 2023-08-13 23:15:45 +0200 |
| commit | 25dc58db206714dc02c1ae0548f6ba7dd3519d29 (patch) | |
| tree | dcf65b08300d82c4d6752284d5c0a5a00507f07f /docs | |
| parent | 8bb2f5618e4cefe668a663936354cf53191f2129 (diff) | |
| download | STC-modified-25dc58db206714dc02c1ae0548f6ba7dd3519d29.tar.gz STC-modified-25dc58db206714dc02c1ae0548f6ba7dd3519d29.zip | |
API CHANGES:
Changed csview: becomes a null-terminated string view.
Added csubview: a null-terminated string view/span, like previous csview.
Note that csview works like a csubview, so not much compability issues should arise.
However, some functions have changed from _sv suffix to _ss.
Diffstat (limited to 'docs')
| -rw-r--r-- | docs/cmap_api.md | 2 | ||||
| -rw-r--r-- | docs/coroutine_api.md | 2 | ||||
| -rw-r--r-- | docs/cregex_api.md | 28 | ||||
| -rw-r--r-- | docs/cstr_api.md | 44 | ||||
| -rw-r--r-- | docs/csubstr_api.md | 217 | ||||
| -rw-r--r-- | docs/csview_api.md | 151 |
6 files changed, 281 insertions, 163 deletions
diff --git a/docs/cmap_api.md b/docs/cmap_api.md index 4e6da57d..65777221 100644 --- a/docs/cmap_api.md +++ b/docs/cmap_api.md @@ -282,7 +282,7 @@ typedef struct { cstr country; } Viking; -#define Viking_init() ((Viking){cstr_null, cstr_null}) +#define Viking_init() ((Viking){.name={0}, .country={0}}) static inline int Viking_cmp(const Viking* a, const Viking* b) { int c = cstr_cmp(&a->name, &b->name); diff --git a/docs/coroutine_api.md b/docs/coroutine_api.md index f7d81a34..c44f4a4d 100644 --- a/docs/coroutine_api.md +++ b/docs/coroutine_api.md @@ -237,7 +237,7 @@ cco_task_struct (produce_items, int produce_items(struct produce_items* p, cco_runtime* rt) { cco_routine (p) { - p->str = cstr_null; + p->str = cstr_init(); p->next.cco_func = next_value; while (true) { diff --git a/docs/cregex_api.md b/docs/cregex_api.md index 52476e09..98161fe9 100644 --- a/docs/cregex_api.md +++ b/docs/cregex_api.md @@ -33,11 +33,11 @@ int cregex_compile(cregex *self, const char* pattern, int cflags = CREG_ int cregex_captures(const cregex* self); // return CREG_OK, CREG_NOMATCH, or CREG_MATCHERROR -int cregex_find(const cregex* re, const char* input, csview match[], int mflags = CREG_DEFAULT); +int cregex_find(const cregex* re, const char* input, csubstr match[], int mflags = CREG_DEFAULT); // Search inside input string-view only -int cregex_find_sv(const cregex* re, csview input, csview match[]); +int cregex_find_ss(const cregex* re, csubstr input, csubstr match[]); // All-in-one search (compile + find + drop) -int cregex_find_pattern(const char* pattern, const char* input, csview match[], int cmflags = CREG_DEFAULT); +int cregex_find_pattern(const char* pattern, const char* input, csubstr match[], int cmflags = CREG_DEFAULT); // Check if there are matches in input bool cregex_is_match(const cregex* re, const char* input); @@ -45,14 +45,14 @@ bool cregex_is_match(const cregex* re, const char* input); // Replace all matches in input cstr cregex_replace(const cregex* re, const char* input, const char* replace, int count = INT_MAX); // Replace count matches in input string-view. Optionally transform replacement. -cstr cregex_replace_sv(const cregex* re, csview input, const char* replace, int count = INT_MAX); -cstr cregex_replace_sv(const cregex* re, csview input, const char* replace, int count, - bool(*transform)(int group, csview match, cstr* result), int rflags); +cstr cregex_replace_ss(const cregex* re, csubstr input, const char* replace, int count = INT_MAX); +cstr cregex_replace_ss(const cregex* re, csubstr input, const char* replace, int count, + bool(*transform)(int group, csubstr match, cstr* result), int rflags); // All-in-one replacement (compile + find/replace + drop) cstr cregex_replace_pattern(const char* pattern, const char* input, const char* replace, int count = INT_MAX); cstr cregex_replace_pattern(const char* pattern, const char* input, const char* replace, int count, - bool(*transform)(int group, csview match, cstr* result), int rflags); + bool(*transform)(int group, csubstr match, cstr* result), int rflags); // destroy void cregex_drop(cregex* self); ``` @@ -109,9 +109,9 @@ int main(void) { cregex re = cregex_from(pattern); // Lets find the first date in the string: - csview match[4]; // full-match, year, month, date. + csubstr match[4]; // full-match, year, month, date. if (cregex_find(&re, input, match) == CREG_OK) - printf("Found date: %.*s\n", c_SV(match[0])); + printf("Found date: %.*s\n", c_SS(match[0])); else printf("Could not find any date\n"); @@ -127,7 +127,7 @@ int main(void) { For a single match you may use the all-in-one function: ```c if (cregex_find_pattern(pattern, input, match)) - printf("Found date: %.*s\n", c_SV(match[0])); + printf("Found date: %.*s\n", c_SS(match[0])); ``` To use: `gcc first_match.c src/cregex.c src/utf8code.c`. @@ -137,16 +137,16 @@ In order to use a callback function in the replace call, see `examples/regex_rep To iterate multiple matches in an input string, you may use ```c -csview match[5] = {0}; +csubstr match[5] = {0}; while (cregex_find(&re, input, match, CREG_NEXT) == CREG_OK) for (int k = 1; i <= cregex_captures(&re); ++k) - printf("submatch %d: %.*s\n", k, c_SV(match[k])); + printf("submatch %d: %.*s\n", k, c_SS(match[k])); ``` There is also a for-loop macro to simplify it: ```c c_formatch (it, &re, input) for (int k = 1; i <= cregex_captures(&re); ++k) - printf("submatch %d: %.*s\n", k, c_SV(it.match[k])); + printf("submatch %d: %.*s\n", k, c_SS(it.match[k])); ``` ## Using cregex in a project @@ -154,7 +154,7 @@ c_formatch (it, &re, input) The easiest is to `#define i_import` before `#include <stc/cregex.h>`. Make sure to do that in one translation unit only. For reference, **cregex** uses the following files: -- `stc/cregex.h`, `stc/utf8.h`, `stc/csview.h`, `stc/cstr.h`, `stc/ccommon.h`, `stc/forward.h` +- `stc/cregex.h`, `stc/utf8.h`, `stc/csubstr.h`, `stc/cstr.h`, `stc/ccommon.h`, `stc/forward.h` - `src/cregex.c`, `src/utf8code.c`. ## Regex Cheatsheet diff --git a/docs/cstr_api.md b/docs/cstr_api.md index dae5669f..36606b76 100644 --- a/docs/cstr_api.md +++ b/docs/cstr_api.md @@ -18,11 +18,11 @@ All cstr definitions and prototypes are available by including a single header f ## Methods ```c -cstr cstr_init(void); // constructor; same as cstr_null. +cstr cstr_init(void); // constructor; empty string cstr cstr_lit(const char literal_only[]); // cstr from literal; no strlen() call. cstr cstr_from(const char* str); // constructor using strlen() cstr cstr_from_n(const char* str, intptr_t n); // constructor with n first bytes of str -cstr cstr_from_sv(csview sv); // construct cstr from csview +cstr cstr_from_ss(csubstr sv); // construct cstr from csubstr cstr cstr_with_capacity(intptr_t cap); cstr cstr_with_size(intptr_t len, char fill); // repeat fill len times cstr cstr_from_fmt(const char* fmt, ...); // printf() formatting @@ -34,7 +34,7 @@ void cstr_drop(cstr* self); // destructo const char* cstr_str(const cstr* self); // cast to const char* char* cstr_data(cstr* self); // cast to mutable char* -csview cstr_sv(const cstr* self); // cast to string view +csubstr cstr_ss(const cstr* self); // cast to string view cstr_buf cstr_buffer(cstr* self); // cast to mutable buffer (with capacity) intptr_t cstr_size(const cstr* self); @@ -48,13 +48,13 @@ void cstr_clear(cstr* self); char* cstr_assign(cstr* self, const char* str); char* cstr_assign_n(cstr* self, const char* str, intptr_t n); // assign n first bytes of str -char* cstr_assign_sv(cstr* self, csview sv); +char* cstr_assign_ss(cstr* self, csubstr sv); char* cstr_copy(cstr* self, cstr s); // copy-assign a cstr int cstr_printf(cstr* self, const char* fmt, ...); // source and target must not overlap. char* cstr_append(cstr* self, const char* str); char* cstr_append_n(cstr* self, const char* str, intptr_t n); // append n first bytes of str -char* cstr_append_sv(cstr* self, csview str); +char* cstr_append_ss(cstr* self, csubstr str); char* cstr_append_s(cstr* self, cstr str); int cstr_append_fmt(cstr* self, const char* fmt, ...); // printf() formatting char* cstr_append_uninit(cstr* self, intptr_t len); // return ptr to start of uninited data @@ -63,19 +63,19 @@ void cstr_push(cstr* self, const char* chr); // append on void cstr_pop(cstr* self); // pop one utf8 char void cstr_insert(cstr* self, intptr_t pos, const char* ins); -void cstr_insert_sv(cstr* self, intptr_t pos, csview ins); +void cstr_insert_ss(cstr* self, intptr_t pos, csubstr ins); void cstr_insert_s(cstr* self, intptr_t pos, cstr ins); void cstr_erase(cstr* self, intptr_t pos, intptr_t len); // erase len bytes from pos void cstr_replace(cstr* self, const char* search, const char* repl, unsigned count = MAX_INT); -cstr cstr_replace_sv(csview in, csview search, csview repl, unsigned count); +cstr cstr_replace_ss(csubstr in, csubstr search, csubstr repl, unsigned count); void cstr_replace_at(cstr* self, intptr_t pos, intptr_t len, const char* repl); // replace at a pos -void cstr_replace_at_sv(cstr* self, intptr_t pos, intptr_t len, const csview repl); +void cstr_replace_at_ss(cstr* self, intptr_t pos, intptr_t len, const csubstr repl); void cstr_replace_at_s(cstr* self, intptr_t pos, intptr_t len, cstr repl); bool cstr_equals(const cstr* self, const char* str); -bool cstr_equals_sv(const cstr* self, csview sv); +bool cstr_equals_ss(const cstr* self, csubstr sv); bool cstr_equals_s(const cstr* self, cstr s); intptr_t cstr_find(const cstr* self, const char* search); @@ -83,11 +83,11 @@ intptr_t cstr_find_at(const cstr* self, intptr_t pos, const char* search); // bool cstr_contains(const cstr* self, const char* search); bool cstr_starts_with(const cstr* self, const char* str); -bool cstr_starts_with_sv(const cstr* self, csview sv); +bool cstr_starts_with_ss(const cstr* self, csubstr sv); bool cstr_starts_with_s(const cstr* self, cstr s); bool cstr_ends_with(const cstr* self, const char* str); -bool cstr_ends_with_sv(const cstr* self, csview sv); +bool cstr_ends_with_ss(const cstr* self, csubstr sv); bool cstr_ends_with_s(const cstr* self, cstr s); bool cstr_getline(cstr *self, FILE *stream); // cstr_getdelim(self, '\n', stream) @@ -100,8 +100,8 @@ intptr_t cstr_u8_size(const cstr* self); // number of intptr_t cstr_u8_size_n(const cstr self, intptr_t nbytes); // utf8 size within n bytes intptr_t cstr_u8_to_pos(const cstr* self, intptr_t u8idx); // byte pos offset at utf8 codepoint index const char* cstr_u8_at(const cstr* self, intptr_t u8idx); // char* position at utf8 codepoint index -csview cstr_u8_chr(const cstr* self, intptr_t u8idx); // get utf8 character as a csview -void cstr_u8_replace_at(cstr* self, intptr_t bytepos, intptr_t u8len, csview repl); // replace u8len utf8 chars +csubstr cstr_u8_chr(const cstr* self, intptr_t u8idx); // get utf8 character as a csubstr +void cstr_u8_replace_at(cstr* self, intptr_t bytepos, intptr_t u8len, csubstr repl); // replace u8len utf8 chars void cstr_u8_erase(cstr* self, intptr_t bytepos, intptr_t u8len); // erase u8len codepoints from pos // iterate utf8 codepoints @@ -112,14 +112,14 @@ cstr_iter cstr_advance(cstr_iter it, intptr_t n); // utf8 functions requires linking with src/utf8code.c symbols: bool cstr_valid_utf8(const cstr* self); // check if str is valid utf8 -cstr cstr_casefold_sv(csview sv); // returns new casefolded utf8 cstr +cstr cstr_casefold_ss(csubstr sv); // returns new casefolded utf8 cstr cstr cstr_tolower(const char* str); // returns new lowercase utf8 cstr -cstr cstr_tolower_sv(csview sv); // returns new lowercase utf8 cstr +cstr cstr_tolower_ss(csubstr sv); // returns new lowercase utf8 cstr void cstr_lowercase(cstr* self); // transform cstr to lowercase utf8 cstr cstr_toupper(const char* str); // returns new uppercase utf8 cstr -cstr cstr_toupper_sv(csview sv); // returns new uppercase utf8 cstr +cstr cstr_toupper_ss(csubstr sv); // returns new uppercase utf8 cstr void cstr_uppercase(cstr* self); // transform cstr to uppercase utf8 int cstr_icmp(const cstr* s1, const cstr* s2); // utf8 case-insensitive comparison @@ -132,11 +132,10 @@ Note that all methods with arguments `(..., const char* str, intptr_t n)`, `n` m #### Helper methods: ```c -int cstr_cmp(const cstr* s1, const cstr* s2); -bool cstr_eq(const cstr* s1, const cstr* s2); -bool cstr_hash(const cstr* self); - -char* cstrnstrn(const char* str, const char* search, intptr_t slen, intptr_t nlen); +int cstr_cmp(const cstr* s1, const cstr* s2); +bool cstr_eq(const cstr* s1, const cstr* s2); +bool cstr_hash(const cstr* self); +char* cstrnstrn(const char* str, const char* search, intptr_t slen, intptr_t nlen); ``` ## Types @@ -145,7 +144,7 @@ char* cstrnstrn(const char* str, const char* search, intptr_t slen, intpt |:----------------|:---------------------------------------------|:---------------------| | `cstr` | `struct { ... }` | The string type | | `cstr_value` | `char` | String element type | -| `csview` | `struct { const char *str; intptr_t size; }` | String view type | +| `csubstr` | `struct { const char *str; intptr_t size; }` | String view type | | `cstr_buf` | `struct { char *data; intptr_t size, cap; }` | String buffer type | ## Constants and macros @@ -153,7 +152,6 @@ char* cstrnstrn(const char* str, const char* search, intptr_t slen, intpt | Name | Value | |:------------------|:------------------| | `c_NPOS` | `INTPTR_MAX` | -| `cstr_null` | empty cstr value | ## Example ```c diff --git a/docs/csubstr_api.md b/docs/csubstr_api.md new file mode 100644 index 00000000..925c69db --- /dev/null +++ b/docs/csubstr_api.md @@ -0,0 +1,217 @@ +# STC [csubstr](../include/stc/csubstr.h): String View + + +The type **csubstr** is a string view and can refer to a constant contiguous sequence of char-elements with the first +element of the sequence at position zero. The implementation holds two members: a pointer to constant char and a size. + +**csubstr** is non-null terminated, and therefore not a replacent for `const char*` - see [csview](csview_api.md) for +that. **csubstr** never allocates memory, and therefore need not be destructed. +Its lifetime is limited by the source string storage. It keeps the length of the string, and does not need to call +*strlen()* to acquire the length. + +Note: a **csubstr** may ***not be null-terminated***, and must therefore be printed this way: +```c +printf("%.*s", c_SS(sstr)) +``` + +See the c++ class [std::basic_string_view](https://en.cppreference.com/w/cpp/string/basic_string_view) for a functional +description. + +## Header file + +All csubstr definitions and prototypes are available by including a single header file. + +```c +#define i_implement +#include <stc/cstr.h> +#include <stc/csubstr.h> // after cstr.h: include extra cstr-csubstr functions +``` +## Methods + +```c +csubstr c_ss(const char literal_only[]); // construct from literal, no strlen() +csubstr c_ss(const char* str, intptr_t n); // construct from str and length n +csubstr csubstr_from(const char* str); // construct from const char* +csubstr csubstr_from_n(const char* str, intptr_t n); // alias for c_ss(str, n) + +intptr_t csubstr_size(csubstr sv); +bool csubstr_empty(csubstr sv); +void csubstr_clear(csubstr* self); + +bool csubstr_equals(csubstr sv, csubstr sv2); +intptr_t csubstr_find(csubstr sv, const char* str); +intptr_t csubstr_find_ss(csubstr sv, csubstr find); +bool csubstr_contains(csubstr sv, const char* str); +bool csubstr_starts_with(csubstr sv, const char* str); +bool csubstr_ends_with(csubstr sv, const char* str); + +csubstr csubstr_substr_ex(csubstr sv, intptr_t pos, intptr_t n); // negative pos count from end +csubstr csubstr_slice_ex(csubstr sv, intptr_t p1, intptr_t p2); // negative p1, p2 count from end +csubstr csubstr_token(csubstr sv, const char* sep, intptr_t* start); // *start > sv.size after last token +``` + +#### UTF8 methods +```c +intptr_t csubstr_u8_size(csubstr sv); +csubstr csubstr_u8_substr(csubstr sv, intptr_t bytepos, intptr_t u8len); +bool csubstr_valid_utf8(csubstr sv); // requires linking with src/utf8code.c + +csubstr_iter csubstr_begin(const csubstr* self); +csubstr_iter csubstr_end(const csubstr* self); +void csubstr_next(csubstr_iter* it); // utf8 codepoint step, not byte! +csubstr_iter csubstr_advance(csubstr_iter it, intptr_t n); +``` + +#### Extended cstr methods +```c +csubstr cstr_substr(const cstr* self, intptr_t pos, intptr_t n); +csubstr cstr_substr_ex(const cstr* s, intptr_t pos, intptr_t n); // negative pos count from end +csubstr cstr_u8_substr(const cstr* self, intptr_t bytepos, intptr_t u8len); + +csubstr cstr_slice(const cstr* self, intptr_t p1, intptr_t p2); +csubstr cstr_slice_ex(const cstr* s, intptr_t p, intptr_t q); // negative p or q count from end +``` +#### Iterate tokens with *c_fortoken*, *c_fortoken_ss* + +To iterate tokens in an input string separated by a string: +```c +c_fortoken (i, "hello, one, two, three", ", ") + printf("token: %.*s\n", c_SS(i.token)); +``` + +#### Helper methods +```c +int csubstr_cmp(const csubstr* x, const csubstr* y); +int csubstr_icmp(const csubstr* x, const csubstr* y); +bool csubstr_eq(const csubstr* x, const csubstr* y); +uint64_t csubstr_hash(const csubstr* x); +``` + +## Types + +| Type name | Type definition | Used to represent... | +|:----------------|:-------------------------------------------|:-------------------------| +| `csubstr` | `struct { const char *str; intptr_t size; }` | The string view type | +| `csubstr_value` | `char` | The string element type | +| `csubstr_iter` | `struct { csubstr_value *ref; }` | UTF8 iterator | + +## Constants and macros + +| Name | Value | Usage | +|:---------------|:---------------------|:---------------------------------------------| +| `c_SS(sv)` | printf argument | `printf("sv: %.*s\n", c_SS(sv));` | + +## Example +```c +#define i_implement +#include <stc/cstr.h> +#include <stc/csubstr.h> + +int main(void) +{ + cstr str1 = cstr_lit("We think in generalities, but we live in details."); + // (quoting Alfred N. Whitehead) + + csubstr sv1 = cstr_substr_ex(&str1, 3, 5); // "think" + intptr_t pos = cstr_find(&str1, "live"); // position of "live" in str1 + csubstr sv2 = cstr_substr_ex(&str1, pos, 4); // get "live" + csubstr sv3 = cstr_slice_ex(&str1, -8, -1); // get "details" + printf("%.*s %.*s %.*s\n", + c_SS(sv1), c_SS(sv2), c_SS(sv3)); + cstr s1 = cstr_lit("Apples are red"); + cstr s2 = cstr_from_ss(cstr_substr_ex(&s1, -3, 3)); // "red" + cstr s3 = cstr_from_ss(cstr_substr_ex(&s1, 0, 6)); // "Apples" + printf("%s %s\n", cstr_str(&s2), cstr_str(&s3)); + + c_drop(cstr, &str1, &s1, &s2, &s3); +} +``` +Output: +``` +think live details +red Apples +``` + +### Example 2: UTF8 handling +```c +#define i_import // include dependent cstr, utf8 and cregex function definitions. +#include <stc/cstr.h> + +int main(void) +{ + cstr s1 = cstr_lit("hell😀 w😀rld"); + + cstr_u8_replace_at(&s1, cstr_find(&s1, "😀rld"), 1, c_ss("ø")); + printf("%s\n", cstr_str(&s1)); + + c_foreach (i, cstr, s1) + printf("%.*s,", c_SS(i.u8.chr)); + + cstr_drop(&s1); +} +``` +Output: +``` +hell😀 wørld +h,e,l,l,😀, ,w,ø,r,l,d, +``` + +### Example 3: csubstr tokenizer (string split) +Splits strings into tokens. *print_split()* makes **no** memory allocations or *strlen()* calls, +and does not depend on null-terminated strings. *string_split()* function returns a vector of cstr. +```c +#include <stdio.h> +#include <stc/csubstr.h> + +void print_split(csubstr input, const char* sep) +{ + c_fortoken_ss (i, input, sep) + printf("[%.*s]\n", c_SS(i.token)); + puts(""); +} +#define i_implement +#include <stc/cstr.h> +#define i_key_str +#include <stc/cstack.h> + +cstack_str string_split(csubstr input, const char* sep) +{ + cstack_str out = cstack_str_init(); + + c_fortoken_ss (i, input, sep) + cstack_str_push(&out, cstr_from_ss(i.token)); + + return out; +} + +int main(void) +{ + print_split(c_ss("//This is a//double-slash//separated//string"), "//"); + print_split(c_ss("This has no matching separator"), "xx"); + + cstack_str s = string_split(c_ss("Split,this,,string,now,"), ","); + + c_foreach (i, cstack_str, s) + printf("[%s]\n", cstr_str(i.ref)); + puts(""); + + cstack_str_drop(&s); +} +``` +Output: +``` +[] +[This is a] +[double-slash] +[separated] +[string] + +[This has no matching separator] + +[Split] +[this] +[] +[string] +[now] +[] +``` diff --git a/docs/csview_api.md b/docs/csview_api.md index 49e4f9d1..4fdff0d1 100644 --- a/docs/csview_api.md +++ b/docs/csview_api.md @@ -1,19 +1,14 @@ # STC [csview](../include/stc/csview.h): String View  -The type **csview** is a string view and can refer to a constant contiguous sequence of char-elements with the first -element of the sequence at position zero. The implementation holds two members: a pointer to constant char and a size. +The type **csview** is a ***null-terminated*** string view and refers to a constant contiguous sequence of +char-elements with the first element of the sequence at position zero. The implementation holds two +members: a pointer to constant char and a size. See [csubstr](csubstr_api.md) for a ***non null-terminated*** +string view/span type. -**csview** is an efficient replacent for `const char*`. It never allocates memory, and therefore need not be destructed. -Its lifetime is limited by the source string storage. It keeps the length of the string, and does not call *strlen()* -when passing it around. It is faster when using`csview` as convertion type (raw) than `const char*` in associative -containers with cstr keys. - -Note: a **csview** may ***not be null-terminated***, and must therefore be printed like: -`printf("%.*s", csview_ARG(sv))`. - -See the c++ class [std::basic_string_view](https://en.cppreference.com/w/cpp/string/basic_string_view) for a functional -description. +Because **csview** is null-terminated, it can be a more efficient replacent for `const char*`. It never +allocates memory, and need therefore not be destructed. Its lifetime is limited by the source string +storage. It keeps the length of the string, and does not call *strlen()* when passing it around. ## Header file @@ -42,17 +37,12 @@ intptr_t csview_find_sv(csview sv, csview find); bool csview_contains(csview sv, const char* str); bool csview_starts_with(csview sv, const char* str); bool csview_ends_with(csview sv, const char* str); - -csview csview_substr_ex(csview sv, intptr_t pos, intptr_t n); // negative pos count from end -csview csview_slice_ex(csview sv, intptr_t p1, intptr_t p2); // negative p1, p2 count from end -csview csview_token(csview sv, const char* sep, intptr_t* start); // *start > sv.size after last token ``` #### UTF8 methods ```c intptr_t csview_u8_size(csview sv); -csview csview_u8_substr(csview sv, intptr_t bytepos, intptr_t u8len); -bool csview_valid_utf8(csview sv); // requires linking with src/utf8code.c +bool csview_valid_utf8(csview sv); // depends on src/utf8code.c csview_iter csview_begin(const csview* self); csview_iter csview_end(const csview* self); @@ -74,27 +64,10 @@ uint32_t utf8_peek(const char* s); // codep uint32_t utf8_peek_off(const char* s, int offset); // codepoint value at utf8 pos (may be negative) ``` -#### Extended cstr methods -```c -csview cstr_substr(const cstr* self, intptr_t pos, intptr_t n); -csview cstr_substr_ex(const cstr* s, intptr_t pos, intptr_t n); // negative pos count from end -csview cstr_u8_substr(const cstr* self, intptr_t bytepos, intptr_t u8len); - -csview cstr_slice(const cstr* self, intptr_t p1, intptr_t p2); -csview cstr_slice_ex(const cstr* s, intptr_t p, intptr_t q); // negative p or q count from end -``` -#### Iterate tokens with *c_fortoken*, *c_fortoken_sv* - -To iterate tokens in an input string separated by a string: -```c -c_fortoken (i, "hello, one, two, three", ", ") - printf("token: %.*s\n", c_SV(i.token)); -``` - #### Helper methods ```c int csview_cmp(const csview* x, const csview* y); -int csview_icmp(const csview* x, const csview* y); +int csview_icmp(const csview* x, const csview* y); // depends on src/utf8code.c: bool csview_eq(const csview* x, const csview* y); uint64_t csview_hash(const csview* x); ``` @@ -107,46 +80,36 @@ uint64_t csview_hash(const csview* x); | `csview_value` | `char` | The string element type | | `csview_iter` | `struct { csview_value *ref; }` | UTF8 iterator | -## Constants and macros - -| Name | Value | Usage | -|:---------------|:---------------------|:---------------------------------------------| -| `c_SV(sv)` | printf argument | `printf("sv: %.*s\n", c_SV(sv));` | - -## Example +## Example: UTF8 iteration and case conversion ```c -#define i_implement +#define i_import #include <stc/cstr.h> #include <stc/csview.h> int main(void) { - cstr str1 = cstr_lit("We think in generalities, but we live in details."); - // (quoting Alfred N. Whitehead) - - csview sv1 = cstr_substr_ex(&str1, 3, 5); // "think" - intptr_t pos = cstr_find(&str1, "live"); // position of "live" in str1 - csview sv2 = cstr_substr_ex(&str1, pos, 4); // get "live" - csview sv3 = cstr_slice_ex(&str1, -8, -1); // get "details" - printf("%.*s %.*s %.*s\n", - c_SV(sv1), c_SV(sv2), c_SV(sv3)); - cstr s1 = cstr_lit("Apples are red"); - cstr s2 = cstr_from_sv(cstr_substr_ex(&s1, -3, 3)); // "red" - cstr s3 = cstr_from_sv(cstr_substr_ex(&s1, 0, 6)); // "Apples" - printf("%s %s\n", cstr_str(&s2), cstr_str(&s3)); - - c_drop(cstr, &str1, &s1, &s2, &s3); + cstr str = cstr_from("Liberté, égalité, fraternité."); + csview sv = cstr_sv(&str); + + c_foreach (i, csview, sv) + printf("%.*s ", c_SS(i.u8.chr)); + puts(""); + + cstr_uppercase(&str); + printf("%s\n", cstr_str(&str)); + + cstr_drop(&str); } ``` Output: ``` -think live details -red Apples +L i b e r t é , é g a l i t é , f r a t e r n i t é . +LIBERTÉ, ÉGALITÉ, FRATERNITÉ. ``` -### Example 2: UTF8 handling +### Example 2: UTF8 replace ```c -#define i_import // include dependent cstr, utf8 and cregex function definitions. +#define i_import // include dependent utf8 definitions. #include <stc/cstr.h> int main(void) @@ -157,7 +120,7 @@ int main(void) printf("%s\n", cstr_str(&s1)); c_foreach (i, cstr, s1) - printf("%.*s,", c_SV(i.u8.chr)); + printf("%.*s,", c_SS(i.u8.chr)); // u8.chr is a csubstr cstr_drop(&s1); } @@ -167,63 +130,3 @@ Output: hell😀 wørld h,e,l,l,😀, ,w,ø,r,l,d, ``` - -### Example 3: csview tokenizer (string split) -Splits strings into tokens. *print_split()* makes **no** memory allocations or *strlen()* calls, -and does not depend on null-terminated strings. *string_split()* function returns a vector of cstr. -```c -#include <stdio.h> -#include <stc/csview.h> - -void print_split(csview input, const char* sep) -{ - c_fortoken_sv (i, input, sep) - printf("[%.*s]\n", c_SV(i.token)); - puts(""); -} -#define i_implement -#include <stc/cstr.h> -#define i_key_str -#include <stc/cstack.h> - -cstack_str string_split(csview input, const char* sep) -{ - cstack_str out = cstack_str_init(); - - c_fortoken_sv (i, input, sep) - cstack_str_push(&out, cstr_from_sv(i.token)); - - return out; -} - -int main(void) -{ - print_split(c_sv("//This is a//double-slash//separated//string"), "//"); - print_split(c_sv("This has no matching separator"), "xx"); - - cstack_str s = string_split(c_sv("Split,this,,string,now,"), ","); - - c_foreach (i, cstack_str, s) - printf("[%s]\n", cstr_str(i.ref)); - puts(""); - - cstack_str_drop(&s); -} -``` -Output: -``` -[] -[This is a] -[double-slash] -[separated] -[string] - -[This has no matching separator] - -[Split] -[this] -[] -[string] -[now] -[] -``` |
