diff options
Diffstat (limited to 'docs/csview_api.md')
| -rw-r--r-- | docs/csview_api.md | 151 |
1 files changed, 27 insertions, 124 deletions
diff --git a/docs/csview_api.md b/docs/csview_api.md index 49e4f9d1..4fdff0d1 100644 --- a/docs/csview_api.md +++ b/docs/csview_api.md @@ -1,19 +1,14 @@ # STC [csview](../include/stc/csview.h): String View  -The type **csview** is a string view and can refer to a constant contiguous sequence of char-elements with the first -element of the sequence at position zero. The implementation holds two members: a pointer to constant char and a size. +The type **csview** is a ***null-terminated*** string view and refers to a constant contiguous sequence of +char-elements with the first element of the sequence at position zero. The implementation holds two +members: a pointer to constant char and a size. See [csubstr](csubstr_api.md) for a ***non null-terminated*** +string view/span type. -**csview** is an efficient replacent for `const char*`. It never allocates memory, and therefore need not be destructed. -Its lifetime is limited by the source string storage. It keeps the length of the string, and does not call *strlen()* -when passing it around. It is faster when using`csview` as convertion type (raw) than `const char*` in associative -containers with cstr keys. - -Note: a **csview** may ***not be null-terminated***, and must therefore be printed like: -`printf("%.*s", csview_ARG(sv))`. - -See the c++ class [std::basic_string_view](https://en.cppreference.com/w/cpp/string/basic_string_view) for a functional -description. +Because **csview** is null-terminated, it can be a more efficient replacent for `const char*`. It never +allocates memory, and need therefore not be destructed. Its lifetime is limited by the source string +storage. It keeps the length of the string, and does not call *strlen()* when passing it around. ## Header file @@ -42,17 +37,12 @@ intptr_t csview_find_sv(csview sv, csview find); bool csview_contains(csview sv, const char* str); bool csview_starts_with(csview sv, const char* str); bool csview_ends_with(csview sv, const char* str); - -csview csview_substr_ex(csview sv, intptr_t pos, intptr_t n); // negative pos count from end -csview csview_slice_ex(csview sv, intptr_t p1, intptr_t p2); // negative p1, p2 count from end -csview csview_token(csview sv, const char* sep, intptr_t* start); // *start > sv.size after last token ``` #### UTF8 methods ```c intptr_t csview_u8_size(csview sv); -csview csview_u8_substr(csview sv, intptr_t bytepos, intptr_t u8len); -bool csview_valid_utf8(csview sv); // requires linking with src/utf8code.c +bool csview_valid_utf8(csview sv); // depends on src/utf8code.c csview_iter csview_begin(const csview* self); csview_iter csview_end(const csview* self); @@ -74,27 +64,10 @@ uint32_t utf8_peek(const char* s); // codep uint32_t utf8_peek_off(const char* s, int offset); // codepoint value at utf8 pos (may be negative) ``` -#### Extended cstr methods -```c -csview cstr_substr(const cstr* self, intptr_t pos, intptr_t n); -csview cstr_substr_ex(const cstr* s, intptr_t pos, intptr_t n); // negative pos count from end -csview cstr_u8_substr(const cstr* self, intptr_t bytepos, intptr_t u8len); - -csview cstr_slice(const cstr* self, intptr_t p1, intptr_t p2); -csview cstr_slice_ex(const cstr* s, intptr_t p, intptr_t q); // negative p or q count from end -``` -#### Iterate tokens with *c_fortoken*, *c_fortoken_sv* - -To iterate tokens in an input string separated by a string: -```c -c_fortoken (i, "hello, one, two, three", ", ") - printf("token: %.*s\n", c_SV(i.token)); -``` - #### Helper methods ```c int csview_cmp(const csview* x, const csview* y); -int csview_icmp(const csview* x, const csview* y); +int csview_icmp(const csview* x, const csview* y); // depends on src/utf8code.c: bool csview_eq(const csview* x, const csview* y); uint64_t csview_hash(const csview* x); ``` @@ -107,46 +80,36 @@ uint64_t csview_hash(const csview* x); | `csview_value` | `char` | The string element type | | `csview_iter` | `struct { csview_value *ref; }` | UTF8 iterator | -## Constants and macros - -| Name | Value | Usage | -|:---------------|:---------------------|:---------------------------------------------| -| `c_SV(sv)` | printf argument | `printf("sv: %.*s\n", c_SV(sv));` | - -## Example +## Example: UTF8 iteration and case conversion ```c -#define i_implement +#define i_import #include <stc/cstr.h> #include <stc/csview.h> int main(void) { - cstr str1 = cstr_lit("We think in generalities, but we live in details."); - // (quoting Alfred N. Whitehead) - - csview sv1 = cstr_substr_ex(&str1, 3, 5); // "think" - intptr_t pos = cstr_find(&str1, "live"); // position of "live" in str1 - csview sv2 = cstr_substr_ex(&str1, pos, 4); // get "live" - csview sv3 = cstr_slice_ex(&str1, -8, -1); // get "details" - printf("%.*s %.*s %.*s\n", - c_SV(sv1), c_SV(sv2), c_SV(sv3)); - cstr s1 = cstr_lit("Apples are red"); - cstr s2 = cstr_from_sv(cstr_substr_ex(&s1, -3, 3)); // "red" - cstr s3 = cstr_from_sv(cstr_substr_ex(&s1, 0, 6)); // "Apples" - printf("%s %s\n", cstr_str(&s2), cstr_str(&s3)); - - c_drop(cstr, &str1, &s1, &s2, &s3); + cstr str = cstr_from("Liberté, égalité, fraternité."); + csview sv = cstr_sv(&str); + + c_foreach (i, csview, sv) + printf("%.*s ", c_SS(i.u8.chr)); + puts(""); + + cstr_uppercase(&str); + printf("%s\n", cstr_str(&str)); + + cstr_drop(&str); } ``` Output: ``` -think live details -red Apples +L i b e r t é , é g a l i t é , f r a t e r n i t é . +LIBERTÉ, ÉGALITÉ, FRATERNITÉ. ``` -### Example 2: UTF8 handling +### Example 2: UTF8 replace ```c -#define i_import // include dependent cstr, utf8 and cregex function definitions. +#define i_import // include dependent utf8 definitions. #include <stc/cstr.h> int main(void) @@ -157,7 +120,7 @@ int main(void) printf("%s\n", cstr_str(&s1)); c_foreach (i, cstr, s1) - printf("%.*s,", c_SV(i.u8.chr)); + printf("%.*s,", c_SS(i.u8.chr)); // u8.chr is a csubstr cstr_drop(&s1); } @@ -167,63 +130,3 @@ Output: hell😀 wørld h,e,l,l,😀, ,w,ø,r,l,d, ``` - -### Example 3: csview tokenizer (string split) -Splits strings into tokens. *print_split()* makes **no** memory allocations or *strlen()* calls, -and does not depend on null-terminated strings. *string_split()* function returns a vector of cstr. -```c -#include <stdio.h> -#include <stc/csview.h> - -void print_split(csview input, const char* sep) -{ - c_fortoken_sv (i, input, sep) - printf("[%.*s]\n", c_SV(i.token)); - puts(""); -} -#define i_implement -#include <stc/cstr.h> -#define i_key_str -#include <stc/cstack.h> - -cstack_str string_split(csview input, const char* sep) -{ - cstack_str out = cstack_str_init(); - - c_fortoken_sv (i, input, sep) - cstack_str_push(&out, cstr_from_sv(i.token)); - - return out; -} - -int main(void) -{ - print_split(c_sv("//This is a//double-slash//separated//string"), "//"); - print_split(c_sv("This has no matching separator"), "xx"); - - cstack_str s = string_split(c_sv("Split,this,,string,now,"), ","); - - c_foreach (i, cstack_str, s) - printf("[%s]\n", cstr_str(i.ref)); - puts(""); - - cstack_str_drop(&s); -} -``` -Output: -``` -[] -[This is a] -[double-slash] -[separated] -[string] - -[This has no matching separator] - -[Split] -[this] -[] -[string] -[now] -[] -``` |
