summaryrefslogtreecommitdiffhomepage
path: root/docs
diff options
context:
space:
mode:
authortylov <[email protected]>2023-08-13 23:15:45 +0200
committertylov <[email protected]>2023-08-13 23:15:45 +0200
commit25dc58db206714dc02c1ae0548f6ba7dd3519d29 (patch)
treedcf65b08300d82c4d6752284d5c0a5a00507f07f /docs
parent8bb2f5618e4cefe668a663936354cf53191f2129 (diff)
downloadSTC-modified-25dc58db206714dc02c1ae0548f6ba7dd3519d29.tar.gz
STC-modified-25dc58db206714dc02c1ae0548f6ba7dd3519d29.zip
API CHANGES:
Changed csview: becomes a null-terminated string view. Added csubview: a null-terminated string view/span, like previous csview. Note that csview works like a csubview, so not much compability issues should arise. However, some functions have changed from _sv suffix to _ss.
Diffstat (limited to 'docs')
-rw-r--r--docs/cmap_api.md2
-rw-r--r--docs/coroutine_api.md2
-rw-r--r--docs/cregex_api.md28
-rw-r--r--docs/cstr_api.md44
-rw-r--r--docs/csubstr_api.md217
-rw-r--r--docs/csview_api.md151
6 files changed, 281 insertions, 163 deletions
diff --git a/docs/cmap_api.md b/docs/cmap_api.md
index 4e6da57d..65777221 100644
--- a/docs/cmap_api.md
+++ b/docs/cmap_api.md
@@ -282,7 +282,7 @@ typedef struct {
cstr country;
} Viking;
-#define Viking_init() ((Viking){cstr_null, cstr_null})
+#define Viking_init() ((Viking){.name={0}, .country={0}})
static inline int Viking_cmp(const Viking* a, const Viking* b) {
int c = cstr_cmp(&a->name, &b->name);
diff --git a/docs/coroutine_api.md b/docs/coroutine_api.md
index f7d81a34..c44f4a4d 100644
--- a/docs/coroutine_api.md
+++ b/docs/coroutine_api.md
@@ -237,7 +237,7 @@ cco_task_struct (produce_items,
int produce_items(struct produce_items* p, cco_runtime* rt)
{
cco_routine (p) {
- p->str = cstr_null;
+ p->str = cstr_init();
p->next.cco_func = next_value;
while (true)
{
diff --git a/docs/cregex_api.md b/docs/cregex_api.md
index 52476e09..98161fe9 100644
--- a/docs/cregex_api.md
+++ b/docs/cregex_api.md
@@ -33,11 +33,11 @@ int cregex_compile(cregex *self, const char* pattern, int cflags = CREG_
int cregex_captures(const cregex* self);
// return CREG_OK, CREG_NOMATCH, or CREG_MATCHERROR
-int cregex_find(const cregex* re, const char* input, csview match[], int mflags = CREG_DEFAULT);
+int cregex_find(const cregex* re, const char* input, csubstr match[], int mflags = CREG_DEFAULT);
// Search inside input string-view only
-int cregex_find_sv(const cregex* re, csview input, csview match[]);
+int cregex_find_ss(const cregex* re, csubstr input, csubstr match[]);
// All-in-one search (compile + find + drop)
-int cregex_find_pattern(const char* pattern, const char* input, csview match[], int cmflags = CREG_DEFAULT);
+int cregex_find_pattern(const char* pattern, const char* input, csubstr match[], int cmflags = CREG_DEFAULT);
// Check if there are matches in input
bool cregex_is_match(const cregex* re, const char* input);
@@ -45,14 +45,14 @@ bool cregex_is_match(const cregex* re, const char* input);
// Replace all matches in input
cstr cregex_replace(const cregex* re, const char* input, const char* replace, int count = INT_MAX);
// Replace count matches in input string-view. Optionally transform replacement.
-cstr cregex_replace_sv(const cregex* re, csview input, const char* replace, int count = INT_MAX);
-cstr cregex_replace_sv(const cregex* re, csview input, const char* replace, int count,
- bool(*transform)(int group, csview match, cstr* result), int rflags);
+cstr cregex_replace_ss(const cregex* re, csubstr input, const char* replace, int count = INT_MAX);
+cstr cregex_replace_ss(const cregex* re, csubstr input, const char* replace, int count,
+ bool(*transform)(int group, csubstr match, cstr* result), int rflags);
// All-in-one replacement (compile + find/replace + drop)
cstr cregex_replace_pattern(const char* pattern, const char* input, const char* replace, int count = INT_MAX);
cstr cregex_replace_pattern(const char* pattern, const char* input, const char* replace, int count,
- bool(*transform)(int group, csview match, cstr* result), int rflags);
+ bool(*transform)(int group, csubstr match, cstr* result), int rflags);
// destroy
void cregex_drop(cregex* self);
```
@@ -109,9 +109,9 @@ int main(void) {
cregex re = cregex_from(pattern);
// Lets find the first date in the string:
- csview match[4]; // full-match, year, month, date.
+ csubstr match[4]; // full-match, year, month, date.
if (cregex_find(&re, input, match) == CREG_OK)
- printf("Found date: %.*s\n", c_SV(match[0]));
+ printf("Found date: %.*s\n", c_SS(match[0]));
else
printf("Could not find any date\n");
@@ -127,7 +127,7 @@ int main(void) {
For a single match you may use the all-in-one function:
```c
if (cregex_find_pattern(pattern, input, match))
- printf("Found date: %.*s\n", c_SV(match[0]));
+ printf("Found date: %.*s\n", c_SS(match[0]));
```
To use: `gcc first_match.c src/cregex.c src/utf8code.c`.
@@ -137,16 +137,16 @@ In order to use a callback function in the replace call, see `examples/regex_rep
To iterate multiple matches in an input string, you may use
```c
-csview match[5] = {0};
+csubstr match[5] = {0};
while (cregex_find(&re, input, match, CREG_NEXT) == CREG_OK)
for (int k = 1; i <= cregex_captures(&re); ++k)
- printf("submatch %d: %.*s\n", k, c_SV(match[k]));
+ printf("submatch %d: %.*s\n", k, c_SS(match[k]));
```
There is also a for-loop macro to simplify it:
```c
c_formatch (it, &re, input)
for (int k = 1; i <= cregex_captures(&re); ++k)
- printf("submatch %d: %.*s\n", k, c_SV(it.match[k]));
+ printf("submatch %d: %.*s\n", k, c_SS(it.match[k]));
```
## Using cregex in a project
@@ -154,7 +154,7 @@ c_formatch (it, &re, input)
The easiest is to `#define i_import` before `#include <stc/cregex.h>`. Make sure to do that in one translation unit only.
For reference, **cregex** uses the following files:
-- `stc/cregex.h`, `stc/utf8.h`, `stc/csview.h`, `stc/cstr.h`, `stc/ccommon.h`, `stc/forward.h`
+- `stc/cregex.h`, `stc/utf8.h`, `stc/csubstr.h`, `stc/cstr.h`, `stc/ccommon.h`, `stc/forward.h`
- `src/cregex.c`, `src/utf8code.c`.
## Regex Cheatsheet
diff --git a/docs/cstr_api.md b/docs/cstr_api.md
index dae5669f..36606b76 100644
--- a/docs/cstr_api.md
+++ b/docs/cstr_api.md
@@ -18,11 +18,11 @@ All cstr definitions and prototypes are available by including a single header f
## Methods
```c
-cstr cstr_init(void); // constructor; same as cstr_null.
+cstr cstr_init(void); // constructor; empty string
cstr cstr_lit(const char literal_only[]); // cstr from literal; no strlen() call.
cstr cstr_from(const char* str); // constructor using strlen()
cstr cstr_from_n(const char* str, intptr_t n); // constructor with n first bytes of str
-cstr cstr_from_sv(csview sv); // construct cstr from csview
+cstr cstr_from_ss(csubstr sv); // construct cstr from csubstr
cstr cstr_with_capacity(intptr_t cap);
cstr cstr_with_size(intptr_t len, char fill); // repeat fill len times
cstr cstr_from_fmt(const char* fmt, ...); // printf() formatting
@@ -34,7 +34,7 @@ void cstr_drop(cstr* self); // destructo
const char* cstr_str(const cstr* self); // cast to const char*
char* cstr_data(cstr* self); // cast to mutable char*
-csview cstr_sv(const cstr* self); // cast to string view
+csubstr cstr_ss(const cstr* self); // cast to string view
cstr_buf cstr_buffer(cstr* self); // cast to mutable buffer (with capacity)
intptr_t cstr_size(const cstr* self);
@@ -48,13 +48,13 @@ void cstr_clear(cstr* self);
char* cstr_assign(cstr* self, const char* str);
char* cstr_assign_n(cstr* self, const char* str, intptr_t n); // assign n first bytes of str
-char* cstr_assign_sv(cstr* self, csview sv);
+char* cstr_assign_ss(cstr* self, csubstr sv);
char* cstr_copy(cstr* self, cstr s); // copy-assign a cstr
int cstr_printf(cstr* self, const char* fmt, ...); // source and target must not overlap.
char* cstr_append(cstr* self, const char* str);
char* cstr_append_n(cstr* self, const char* str, intptr_t n); // append n first bytes of str
-char* cstr_append_sv(cstr* self, csview str);
+char* cstr_append_ss(cstr* self, csubstr str);
char* cstr_append_s(cstr* self, cstr str);
int cstr_append_fmt(cstr* self, const char* fmt, ...); // printf() formatting
char* cstr_append_uninit(cstr* self, intptr_t len); // return ptr to start of uninited data
@@ -63,19 +63,19 @@ void cstr_push(cstr* self, const char* chr); // append on
void cstr_pop(cstr* self); // pop one utf8 char
void cstr_insert(cstr* self, intptr_t pos, const char* ins);
-void cstr_insert_sv(cstr* self, intptr_t pos, csview ins);
+void cstr_insert_ss(cstr* self, intptr_t pos, csubstr ins);
void cstr_insert_s(cstr* self, intptr_t pos, cstr ins);
void cstr_erase(cstr* self, intptr_t pos, intptr_t len); // erase len bytes from pos
void cstr_replace(cstr* self, const char* search, const char* repl, unsigned count = MAX_INT);
-cstr cstr_replace_sv(csview in, csview search, csview repl, unsigned count);
+cstr cstr_replace_ss(csubstr in, csubstr search, csubstr repl, unsigned count);
void cstr_replace_at(cstr* self, intptr_t pos, intptr_t len, const char* repl); // replace at a pos
-void cstr_replace_at_sv(cstr* self, intptr_t pos, intptr_t len, const csview repl);
+void cstr_replace_at_ss(cstr* self, intptr_t pos, intptr_t len, const csubstr repl);
void cstr_replace_at_s(cstr* self, intptr_t pos, intptr_t len, cstr repl);
bool cstr_equals(const cstr* self, const char* str);
-bool cstr_equals_sv(const cstr* self, csview sv);
+bool cstr_equals_ss(const cstr* self, csubstr sv);
bool cstr_equals_s(const cstr* self, cstr s);
intptr_t cstr_find(const cstr* self, const char* search);
@@ -83,11 +83,11 @@ intptr_t cstr_find_at(const cstr* self, intptr_t pos, const char* search); //
bool cstr_contains(const cstr* self, const char* search);
bool cstr_starts_with(const cstr* self, const char* str);
-bool cstr_starts_with_sv(const cstr* self, csview sv);
+bool cstr_starts_with_ss(const cstr* self, csubstr sv);
bool cstr_starts_with_s(const cstr* self, cstr s);
bool cstr_ends_with(const cstr* self, const char* str);
-bool cstr_ends_with_sv(const cstr* self, csview sv);
+bool cstr_ends_with_ss(const cstr* self, csubstr sv);
bool cstr_ends_with_s(const cstr* self, cstr s);
bool cstr_getline(cstr *self, FILE *stream); // cstr_getdelim(self, '\n', stream)
@@ -100,8 +100,8 @@ intptr_t cstr_u8_size(const cstr* self); // number of
intptr_t cstr_u8_size_n(const cstr self, intptr_t nbytes); // utf8 size within n bytes
intptr_t cstr_u8_to_pos(const cstr* self, intptr_t u8idx); // byte pos offset at utf8 codepoint index
const char* cstr_u8_at(const cstr* self, intptr_t u8idx); // char* position at utf8 codepoint index
-csview cstr_u8_chr(const cstr* self, intptr_t u8idx); // get utf8 character as a csview
-void cstr_u8_replace_at(cstr* self, intptr_t bytepos, intptr_t u8len, csview repl); // replace u8len utf8 chars
+csubstr cstr_u8_chr(const cstr* self, intptr_t u8idx); // get utf8 character as a csubstr
+void cstr_u8_replace_at(cstr* self, intptr_t bytepos, intptr_t u8len, csubstr repl); // replace u8len utf8 chars
void cstr_u8_erase(cstr* self, intptr_t bytepos, intptr_t u8len); // erase u8len codepoints from pos
// iterate utf8 codepoints
@@ -112,14 +112,14 @@ cstr_iter cstr_advance(cstr_iter it, intptr_t n);
// utf8 functions requires linking with src/utf8code.c symbols:
bool cstr_valid_utf8(const cstr* self); // check if str is valid utf8
-cstr cstr_casefold_sv(csview sv); // returns new casefolded utf8 cstr
+cstr cstr_casefold_ss(csubstr sv); // returns new casefolded utf8 cstr
cstr cstr_tolower(const char* str); // returns new lowercase utf8 cstr
-cstr cstr_tolower_sv(csview sv); // returns new lowercase utf8 cstr
+cstr cstr_tolower_ss(csubstr sv); // returns new lowercase utf8 cstr
void cstr_lowercase(cstr* self); // transform cstr to lowercase utf8
cstr cstr_toupper(const char* str); // returns new uppercase utf8 cstr
-cstr cstr_toupper_sv(csview sv); // returns new uppercase utf8 cstr
+cstr cstr_toupper_ss(csubstr sv); // returns new uppercase utf8 cstr
void cstr_uppercase(cstr* self); // transform cstr to uppercase utf8
int cstr_icmp(const cstr* s1, const cstr* s2); // utf8 case-insensitive comparison
@@ -132,11 +132,10 @@ Note that all methods with arguments `(..., const char* str, intptr_t n)`, `n` m
#### Helper methods:
```c
-int cstr_cmp(const cstr* s1, const cstr* s2);
-bool cstr_eq(const cstr* s1, const cstr* s2);
-bool cstr_hash(const cstr* self);
-
-char* cstrnstrn(const char* str, const char* search, intptr_t slen, intptr_t nlen);
+int cstr_cmp(const cstr* s1, const cstr* s2);
+bool cstr_eq(const cstr* s1, const cstr* s2);
+bool cstr_hash(const cstr* self);
+char* cstrnstrn(const char* str, const char* search, intptr_t slen, intptr_t nlen);
```
## Types
@@ -145,7 +144,7 @@ char* cstrnstrn(const char* str, const char* search, intptr_t slen, intpt
|:----------------|:---------------------------------------------|:---------------------|
| `cstr` | `struct { ... }` | The string type |
| `cstr_value` | `char` | String element type |
-| `csview` | `struct { const char *str; intptr_t size; }` | String view type |
+| `csubstr` | `struct { const char *str; intptr_t size; }` | String view type |
| `cstr_buf` | `struct { char *data; intptr_t size, cap; }` | String buffer type |
## Constants and macros
@@ -153,7 +152,6 @@ char* cstrnstrn(const char* str, const char* search, intptr_t slen, intpt
| Name | Value |
|:------------------|:------------------|
| `c_NPOS` | `INTPTR_MAX` |
-| `cstr_null` | empty cstr value |
## Example
```c
diff --git a/docs/csubstr_api.md b/docs/csubstr_api.md
new file mode 100644
index 00000000..925c69db
--- /dev/null
+++ b/docs/csubstr_api.md
@@ -0,0 +1,217 @@
+# STC [csubstr](../include/stc/csubstr.h): String View
+![String](pics/string.jpg)
+
+The type **csubstr** is a string view and can refer to a constant contiguous sequence of char-elements with the first
+element of the sequence at position zero. The implementation holds two members: a pointer to constant char and a size.
+
+**csubstr** is non-null terminated, and therefore not a replacent for `const char*` - see [csview](csview_api.md) for
+that. **csubstr** never allocates memory, and therefore need not be destructed.
+Its lifetime is limited by the source string storage. It keeps the length of the string, and does not need to call
+*strlen()* to acquire the length.
+
+Note: a **csubstr** may ***not be null-terminated***, and must therefore be printed this way:
+```c
+printf("%.*s", c_SS(sstr))
+```
+
+See the c++ class [std::basic_string_view](https://en.cppreference.com/w/cpp/string/basic_string_view) for a functional
+description.
+
+## Header file
+
+All csubstr definitions and prototypes are available by including a single header file.
+
+```c
+#define i_implement
+#include <stc/cstr.h>
+#include <stc/csubstr.h> // after cstr.h: include extra cstr-csubstr functions
+```
+## Methods
+
+```c
+csubstr c_ss(const char literal_only[]); // construct from literal, no strlen()
+csubstr c_ss(const char* str, intptr_t n); // construct from str and length n
+csubstr csubstr_from(const char* str); // construct from const char*
+csubstr csubstr_from_n(const char* str, intptr_t n); // alias for c_ss(str, n)
+
+intptr_t csubstr_size(csubstr sv);
+bool csubstr_empty(csubstr sv);
+void csubstr_clear(csubstr* self);
+
+bool csubstr_equals(csubstr sv, csubstr sv2);
+intptr_t csubstr_find(csubstr sv, const char* str);
+intptr_t csubstr_find_ss(csubstr sv, csubstr find);
+bool csubstr_contains(csubstr sv, const char* str);
+bool csubstr_starts_with(csubstr sv, const char* str);
+bool csubstr_ends_with(csubstr sv, const char* str);
+
+csubstr csubstr_substr_ex(csubstr sv, intptr_t pos, intptr_t n); // negative pos count from end
+csubstr csubstr_slice_ex(csubstr sv, intptr_t p1, intptr_t p2); // negative p1, p2 count from end
+csubstr csubstr_token(csubstr sv, const char* sep, intptr_t* start); // *start > sv.size after last token
+```
+
+#### UTF8 methods
+```c
+intptr_t csubstr_u8_size(csubstr sv);
+csubstr csubstr_u8_substr(csubstr sv, intptr_t bytepos, intptr_t u8len);
+bool csubstr_valid_utf8(csubstr sv); // requires linking with src/utf8code.c
+
+csubstr_iter csubstr_begin(const csubstr* self);
+csubstr_iter csubstr_end(const csubstr* self);
+void csubstr_next(csubstr_iter* it); // utf8 codepoint step, not byte!
+csubstr_iter csubstr_advance(csubstr_iter it, intptr_t n);
+```
+
+#### Extended cstr methods
+```c
+csubstr cstr_substr(const cstr* self, intptr_t pos, intptr_t n);
+csubstr cstr_substr_ex(const cstr* s, intptr_t pos, intptr_t n); // negative pos count from end
+csubstr cstr_u8_substr(const cstr* self, intptr_t bytepos, intptr_t u8len);
+
+csubstr cstr_slice(const cstr* self, intptr_t p1, intptr_t p2);
+csubstr cstr_slice_ex(const cstr* s, intptr_t p, intptr_t q); // negative p or q count from end
+```
+#### Iterate tokens with *c_fortoken*, *c_fortoken_ss*
+
+To iterate tokens in an input string separated by a string:
+```c
+c_fortoken (i, "hello, one, two, three", ", ")
+ printf("token: %.*s\n", c_SS(i.token));
+```
+
+#### Helper methods
+```c
+int csubstr_cmp(const csubstr* x, const csubstr* y);
+int csubstr_icmp(const csubstr* x, const csubstr* y);
+bool csubstr_eq(const csubstr* x, const csubstr* y);
+uint64_t csubstr_hash(const csubstr* x);
+```
+
+## Types
+
+| Type name | Type definition | Used to represent... |
+|:----------------|:-------------------------------------------|:-------------------------|
+| `csubstr` | `struct { const char *str; intptr_t size; }` | The string view type |
+| `csubstr_value` | `char` | The string element type |
+| `csubstr_iter` | `struct { csubstr_value *ref; }` | UTF8 iterator |
+
+## Constants and macros
+
+| Name | Value | Usage |
+|:---------------|:---------------------|:---------------------------------------------|
+| `c_SS(sv)` | printf argument | `printf("sv: %.*s\n", c_SS(sv));` |
+
+## Example
+```c
+#define i_implement
+#include <stc/cstr.h>
+#include <stc/csubstr.h>
+
+int main(void)
+{
+ cstr str1 = cstr_lit("We think in generalities, but we live in details.");
+ // (quoting Alfred N. Whitehead)
+
+ csubstr sv1 = cstr_substr_ex(&str1, 3, 5); // "think"
+ intptr_t pos = cstr_find(&str1, "live"); // position of "live" in str1
+ csubstr sv2 = cstr_substr_ex(&str1, pos, 4); // get "live"
+ csubstr sv3 = cstr_slice_ex(&str1, -8, -1); // get "details"
+ printf("%.*s %.*s %.*s\n",
+ c_SS(sv1), c_SS(sv2), c_SS(sv3));
+ cstr s1 = cstr_lit("Apples are red");
+ cstr s2 = cstr_from_ss(cstr_substr_ex(&s1, -3, 3)); // "red"
+ cstr s3 = cstr_from_ss(cstr_substr_ex(&s1, 0, 6)); // "Apples"
+ printf("%s %s\n", cstr_str(&s2), cstr_str(&s3));
+
+ c_drop(cstr, &str1, &s1, &s2, &s3);
+}
+```
+Output:
+```
+think live details
+red Apples
+```
+
+### Example 2: UTF8 handling
+```c
+#define i_import // include dependent cstr, utf8 and cregex function definitions.
+#include <stc/cstr.h>
+
+int main(void)
+{
+ cstr s1 = cstr_lit("hell😀 w😀rld");
+
+ cstr_u8_replace_at(&s1, cstr_find(&s1, "😀rld"), 1, c_ss("ø"));
+ printf("%s\n", cstr_str(&s1));
+
+ c_foreach (i, cstr, s1)
+ printf("%.*s,", c_SS(i.u8.chr));
+
+ cstr_drop(&s1);
+}
+```
+Output:
+```
+hell😀 wørld
+h,e,l,l,😀, ,w,ø,r,l,d,
+```
+
+### Example 3: csubstr tokenizer (string split)
+Splits strings into tokens. *print_split()* makes **no** memory allocations or *strlen()* calls,
+and does not depend on null-terminated strings. *string_split()* function returns a vector of cstr.
+```c
+#include <stdio.h>
+#include <stc/csubstr.h>
+
+void print_split(csubstr input, const char* sep)
+{
+ c_fortoken_ss (i, input, sep)
+ printf("[%.*s]\n", c_SS(i.token));
+ puts("");
+}
+#define i_implement
+#include <stc/cstr.h>
+#define i_key_str
+#include <stc/cstack.h>
+
+cstack_str string_split(csubstr input, const char* sep)
+{
+ cstack_str out = cstack_str_init();
+
+ c_fortoken_ss (i, input, sep)
+ cstack_str_push(&out, cstr_from_ss(i.token));
+
+ return out;
+}
+
+int main(void)
+{
+ print_split(c_ss("//This is a//double-slash//separated//string"), "//");
+ print_split(c_ss("This has no matching separator"), "xx");
+
+ cstack_str s = string_split(c_ss("Split,this,,string,now,"), ",");
+
+ c_foreach (i, cstack_str, s)
+ printf("[%s]\n", cstr_str(i.ref));
+ puts("");
+
+ cstack_str_drop(&s);
+}
+```
+Output:
+```
+[]
+[This is a]
+[double-slash]
+[separated]
+[string]
+
+[This has no matching separator]
+
+[Split]
+[this]
+[]
+[string]
+[now]
+[]
+```
diff --git a/docs/csview_api.md b/docs/csview_api.md
index 49e4f9d1..4fdff0d1 100644
--- a/docs/csview_api.md
+++ b/docs/csview_api.md
@@ -1,19 +1,14 @@
# STC [csview](../include/stc/csview.h): String View
![String](pics/string.jpg)
-The type **csview** is a string view and can refer to a constant contiguous sequence of char-elements with the first
-element of the sequence at position zero. The implementation holds two members: a pointer to constant char and a size.
+The type **csview** is a ***null-terminated*** string view and refers to a constant contiguous sequence of
+char-elements with the first element of the sequence at position zero. The implementation holds two
+members: a pointer to constant char and a size. See [csubstr](csubstr_api.md) for a ***non null-terminated***
+string view/span type.
-**csview** is an efficient replacent for `const char*`. It never allocates memory, and therefore need not be destructed.
-Its lifetime is limited by the source string storage. It keeps the length of the string, and does not call *strlen()*
-when passing it around. It is faster when using`csview` as convertion type (raw) than `const char*` in associative
-containers with cstr keys.
-
-Note: a **csview** may ***not be null-terminated***, and must therefore be printed like:
-`printf("%.*s", csview_ARG(sv))`.
-
-See the c++ class [std::basic_string_view](https://en.cppreference.com/w/cpp/string/basic_string_view) for a functional
-description.
+Because **csview** is null-terminated, it can be a more efficient replacent for `const char*`. It never
+allocates memory, and need therefore not be destructed. Its lifetime is limited by the source string
+storage. It keeps the length of the string, and does not call *strlen()* when passing it around.
## Header file
@@ -42,17 +37,12 @@ intptr_t csview_find_sv(csview sv, csview find);
bool csview_contains(csview sv, const char* str);
bool csview_starts_with(csview sv, const char* str);
bool csview_ends_with(csview sv, const char* str);
-
-csview csview_substr_ex(csview sv, intptr_t pos, intptr_t n); // negative pos count from end
-csview csview_slice_ex(csview sv, intptr_t p1, intptr_t p2); // negative p1, p2 count from end
-csview csview_token(csview sv, const char* sep, intptr_t* start); // *start > sv.size after last token
```
#### UTF8 methods
```c
intptr_t csview_u8_size(csview sv);
-csview csview_u8_substr(csview sv, intptr_t bytepos, intptr_t u8len);
-bool csview_valid_utf8(csview sv); // requires linking with src/utf8code.c
+bool csview_valid_utf8(csview sv); // depends on src/utf8code.c
csview_iter csview_begin(const csview* self);
csview_iter csview_end(const csview* self);
@@ -74,27 +64,10 @@ uint32_t utf8_peek(const char* s); // codep
uint32_t utf8_peek_off(const char* s, int offset); // codepoint value at utf8 pos (may be negative)
```
-#### Extended cstr methods
-```c
-csview cstr_substr(const cstr* self, intptr_t pos, intptr_t n);
-csview cstr_substr_ex(const cstr* s, intptr_t pos, intptr_t n); // negative pos count from end
-csview cstr_u8_substr(const cstr* self, intptr_t bytepos, intptr_t u8len);
-
-csview cstr_slice(const cstr* self, intptr_t p1, intptr_t p2);
-csview cstr_slice_ex(const cstr* s, intptr_t p, intptr_t q); // negative p or q count from end
-```
-#### Iterate tokens with *c_fortoken*, *c_fortoken_sv*
-
-To iterate tokens in an input string separated by a string:
-```c
-c_fortoken (i, "hello, one, two, three", ", ")
- printf("token: %.*s\n", c_SV(i.token));
-```
-
#### Helper methods
```c
int csview_cmp(const csview* x, const csview* y);
-int csview_icmp(const csview* x, const csview* y);
+int csview_icmp(const csview* x, const csview* y); // depends on src/utf8code.c:
bool csview_eq(const csview* x, const csview* y);
uint64_t csview_hash(const csview* x);
```
@@ -107,46 +80,36 @@ uint64_t csview_hash(const csview* x);
| `csview_value` | `char` | The string element type |
| `csview_iter` | `struct { csview_value *ref; }` | UTF8 iterator |
-## Constants and macros
-
-| Name | Value | Usage |
-|:---------------|:---------------------|:---------------------------------------------|
-| `c_SV(sv)` | printf argument | `printf("sv: %.*s\n", c_SV(sv));` |
-
-## Example
+## Example: UTF8 iteration and case conversion
```c
-#define i_implement
+#define i_import
#include <stc/cstr.h>
#include <stc/csview.h>
int main(void)
{
- cstr str1 = cstr_lit("We think in generalities, but we live in details.");
- // (quoting Alfred N. Whitehead)
-
- csview sv1 = cstr_substr_ex(&str1, 3, 5); // "think"
- intptr_t pos = cstr_find(&str1, "live"); // position of "live" in str1
- csview sv2 = cstr_substr_ex(&str1, pos, 4); // get "live"
- csview sv3 = cstr_slice_ex(&str1, -8, -1); // get "details"
- printf("%.*s %.*s %.*s\n",
- c_SV(sv1), c_SV(sv2), c_SV(sv3));
- cstr s1 = cstr_lit("Apples are red");
- cstr s2 = cstr_from_sv(cstr_substr_ex(&s1, -3, 3)); // "red"
- cstr s3 = cstr_from_sv(cstr_substr_ex(&s1, 0, 6)); // "Apples"
- printf("%s %s\n", cstr_str(&s2), cstr_str(&s3));
-
- c_drop(cstr, &str1, &s1, &s2, &s3);
+ cstr str = cstr_from("Liberté, égalité, fraternité.");
+ csview sv = cstr_sv(&str);
+
+ c_foreach (i, csview, sv)
+ printf("%.*s ", c_SS(i.u8.chr));
+ puts("");
+
+ cstr_uppercase(&str);
+ printf("%s\n", cstr_str(&str));
+
+ cstr_drop(&str);
}
```
Output:
```
-think live details
-red Apples
+L i b e r t é , é g a l i t é , f r a t e r n i t é .
+LIBERTÉ, ÉGALITÉ, FRATERNITÉ.
```
-### Example 2: UTF8 handling
+### Example 2: UTF8 replace
```c
-#define i_import // include dependent cstr, utf8 and cregex function definitions.
+#define i_import // include dependent utf8 definitions.
#include <stc/cstr.h>
int main(void)
@@ -157,7 +120,7 @@ int main(void)
printf("%s\n", cstr_str(&s1));
c_foreach (i, cstr, s1)
- printf("%.*s,", c_SV(i.u8.chr));
+ printf("%.*s,", c_SS(i.u8.chr)); // u8.chr is a csubstr
cstr_drop(&s1);
}
@@ -167,63 +130,3 @@ Output:
hell😀 wørld
h,e,l,l,😀, ,w,ø,r,l,d,
```
-
-### Example 3: csview tokenizer (string split)
-Splits strings into tokens. *print_split()* makes **no** memory allocations or *strlen()* calls,
-and does not depend on null-terminated strings. *string_split()* function returns a vector of cstr.
-```c
-#include <stdio.h>
-#include <stc/csview.h>
-
-void print_split(csview input, const char* sep)
-{
- c_fortoken_sv (i, input, sep)
- printf("[%.*s]\n", c_SV(i.token));
- puts("");
-}
-#define i_implement
-#include <stc/cstr.h>
-#define i_key_str
-#include <stc/cstack.h>
-
-cstack_str string_split(csview input, const char* sep)
-{
- cstack_str out = cstack_str_init();
-
- c_fortoken_sv (i, input, sep)
- cstack_str_push(&out, cstr_from_sv(i.token));
-
- return out;
-}
-
-int main(void)
-{
- print_split(c_sv("//This is a//double-slash//separated//string"), "//");
- print_split(c_sv("This has no matching separator"), "xx");
-
- cstack_str s = string_split(c_sv("Split,this,,string,now,"), ",");
-
- c_foreach (i, cstack_str, s)
- printf("[%s]\n", cstr_str(i.ref));
- puts("");
-
- cstack_str_drop(&s);
-}
-```
-Output:
-```
-[]
-[This is a]
-[double-slash]
-[separated]
-[string]
-
-[This has no matching separator]
-
-[Split]
-[this]
-[]
-[string]
-[now]
-[]
-```