summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authortylov <[email protected]>2023-08-13 23:15:45 +0200
committertylov <[email protected]>2023-08-13 23:15:45 +0200
commit25dc58db206714dc02c1ae0548f6ba7dd3519d29 (patch)
treedcf65b08300d82c4d6752284d5c0a5a00507f07f
parent8bb2f5618e4cefe668a663936354cf53191f2129 (diff)
downloadSTC-modified-25dc58db206714dc02c1ae0548f6ba7dd3519d29.tar.gz
STC-modified-25dc58db206714dc02c1ae0548f6ba7dd3519d29.zip
API CHANGES:
Changed csview: becomes a null-terminated string view. Added csubview: a null-terminated string view/span, like previous csview. Note that csview works like a csubview, so not much compability issues should arise. However, some functions have changed from _sv suffix to _ss.
-rw-r--r--README.md30
-rw-r--r--docs/cmap_api.md2
-rw-r--r--docs/coroutine_api.md2
-rw-r--r--docs/cregex_api.md28
-rw-r--r--docs/cstr_api.md44
-rw-r--r--docs/csubstr_api.md217
-rw-r--r--docs/csview_api.md151
-rw-r--r--include/stc/ccommon.h8
-rw-r--r--include/stc/cregex.h36
-rw-r--r--include/stc/cstr.h100
-rw-r--r--include/stc/csubstr.h208
-rw-r--r--include/stc/csview.h155
-rw-r--r--include/stc/forward.h25
-rw-r--r--include/stc/priv/template.h14
-rw-r--r--include/stc/utf8.h4
-rw-r--r--misc/benchmarks/various/string_bench_STC.cpp32
-rw-r--r--misc/examples/algorithms/forfilter.c12
-rw-r--r--misc/examples/regularexpressions/regex2.c2
-rw-r--r--misc/examples/regularexpressions/regex_match.c4
-rw-r--r--misc/examples/regularexpressions/regex_replace.c6
-rw-r--r--misc/examples/strings/cstr_match.c12
-rw-r--r--misc/examples/strings/replace.c4
-rw-r--r--misc/examples/strings/splitstr.c8
-rw-r--r--misc/examples/strings/sso_substr.c16
-rw-r--r--misc/examples/strings/sview_split.c14
-rw-r--r--misc/examples/strings/utf8replace_c.c4
-rw-r--r--misc/tests/cregex_test.c32
-rw-r--r--src/cregex.c28
-rw-r--r--src/libstc.c2
-rw-r--r--src/singleupdate.sh2
-rw-r--r--src/utf8code.c2
31 files changed, 733 insertions, 471 deletions
diff --git a/README.md b/README.md
index d516f389..96479fa0 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@ Containers
- [***csset*** - **std::set** sorted set alike type](docs/csset_api.md)
- [***cstack*** - **std::stack** alike type](docs/cstack_api.md)
- [***cstr*** - **std::string** alike type](docs/cstr_api.md)
-- [***csview*** - **std::string_view** alike type](docs/csview_api.md)
+- [***csubstr*** - **std::string_view** alike type](docs/csubstr_api.md)
- [***cspan*** - **std::span/std::mdspan** alike type](docs/cspan_api.md)
- [***cdeq*** - **std::deque** alike type](docs/cdeq_api.md)
- [***cvec*** - **std::vector** alike type](docs/cvec_api.md)
@@ -351,11 +351,11 @@ linking, so *one* c-file must implement the templated container, e.g.:
#include "cvec_int.h"
```
The non-templated string type **cstr** uses shared linking by default, but can have static linking instead by
-`#define i_static`. Same for the string-view type **csview**, but most of its functions are static inlined, so
+`#define i_static`. Same for the string-view type **csubstr**, but most of its functions are static inlined, so
linking specifications and implementation are only needed for a few lesser used functions.
Conveniently, `src\libstc.c` implements all the non-templated functions with shared linking for **cstr**,
-**csview**, **cregex**, **utf8**, and **crand**.
+**csubstr**, **cregex**, **utf8**, and **crand**.
As a special case, you can `#define i_import` before including **cregex** or **cstr** to implement the dependent
**utf8** functions (proper utf8 case conversions, etc.). Or link with src\libstc.
@@ -401,8 +401,8 @@ Only functions required by the container type is required to be defined. E.g.:
- *Type_clone()* is not used if *#define i_opt c_no_clone* is specified.
- `i_key_str` - Sets `i_keyclass` = *cstr*, `i_tag` = *str*, and `i_keyraw` = *const char*\*. Defines both type convertion
`i_keyfrom`, `i_keyto`, and sets `i_cmp`, `i_eq`, `i_hash` functions with *const char\*\** as argument.
-- `i_key_ssv` - Sets `i_keyclass` = *cstr*, `i_tag` = *ssv*, and `i_keyraw` = *csview\**. Defines both type convertion
-`i_keyfrom`, `i_keyto`, and sets `i_cmp`, `i_eq`, `i_hash` functions with *csview\** as argument.
+- `i_key_ssv` - Sets `i_keyclass` = *cstr*, `i_tag` = *ssv*, and `i_keyraw` = *csubstr\**. Defines both type convertion
+`i_keyfrom`, `i_keyto`, and sets `i_cmp`, `i_eq`, `i_hash` functions with *csubstr\** as argument.
- `i_keyboxed` *Type* - Use when *Type* is a smart pointer **carc** or **cbox**. Defines *i_keyclass = Type*, and *i_keyraw = Type\**.
NB: Do not use when defining carc/cbox types themselves.
- `i_valclass` *Type*, `i_val_str`, `i_val_ssv`, `i_valboxed` - Similar rules as for ***key***.
@@ -640,7 +640,7 @@ void maptest()
STC is generally very memory efficient. Memory usage for the different containers:
- **cstr**, **cvec**, **cstack**, **cpque**: 1 pointer, 2 intptr_t + memory for elements.
-- **csview**, 1 pointer, 1 intptr_t. Does not own data!
+- **csubstr**, 1 pointer, 1 intptr_t. Does not own data!
- **cspan**, 1 pointer and 2 \* dimension \* int32_t. Does not own data!
- **clist**: Type size: 1 pointer. Each node allocates a struct to store its value and a next pointer.
- **cdeq**, **cqueue**: Type size: 2 pointers, 2 intptr_t. Otherwise like *cvec*.
@@ -654,7 +654,7 @@ STC is generally very memory efficient. Memory usage for the different container
## Version 4.3
- Breaking changes:
- - **cstr** and **csview** now uses *shared linking* by default. Implement by either defining `i_implement` or `i_static` before including.
+ - **cstr** and **csubstr** now uses *shared linking* by default. Implement by either defining `i_implement` or `i_static` before including.
- Renamed <stc/calgo.h> => `<stc/algorithm.h>`
- Moved <stc/algo/coroutine.h> => `<stc/coroutine.h>`
- Much improved with some new API and added features.
@@ -686,7 +686,7 @@ STC is generally very memory efficient. Memory usage for the different container
- Renamed c_flt_count(i) => `c_flt_counter(i)`
- Renamed c_flt_last(i) => `c_flt_getcount(i)`
- Renamed c_ARRAYLEN() => c_arraylen()
-- Removed deprecated c_ARGSV(). Use c_SV()
+- Removed deprecated c_ARGSV(). Use c_SS()
- Removed c_PAIR
## Version 4.1.1
@@ -699,7 +699,7 @@ Major changes:
- [crange](docs/algorithm_api.md#crange) - similar to [boost::irange](https://www.boost.org/doc/libs/release/libs/range/doc/html/range/reference/ranges/irange.html) integer range generator.
- [c_forfilter](docs/algorithm_api.md#c_forfilter) - ranges-like view filtering.
- [csort](include/stc/algo/sort.h) - [fast quicksort](misc/benchmarks/various/csort_bench.c) with custom inline comparison.
-- Renamed `c_ARGSV()` => `c_SV()`: **csview** print arg. Note `c_sv()` is shorthand for *csview_from()*.
+- Renamed `c_ARGSV()` => `c_SS()`: **csubstr** print arg. Note `c_ss()` is shorthand for *csubstr_from()*.
- Support for [uppercase flow-control](include/stc/priv/altnames.h) macro names in ccommon.h.
- Some API changes in **cregex** and **cstr**.
- Create single header container versions with python script.
@@ -713,18 +713,18 @@ Major changes:
- New + renamed loop iteration/scope macros:
- `c_forlist`: macro replacing `c_forarray` and `c_apply`. Iterate a compound literal list.
- `c_forrange`: macro replacing `c_forrange`. Iterate a `long long` type number sequence.
-- Updated **cstr**, now always takes self as pointer, like all containers except csview.
+- Updated **cstr**, now always takes self as pointer, like all containers except csubstr.
- Updated **cvec**, **cdeq**, changed `*_range*` function names.
## Changes version 3.8
-- Overhauled some **cstr** and **csview** API:
+- Overhauled some **cstr** and **csubstr** API:
- Changed cstr_replace*() => `cstr_replace_at*(self, pos, len, repl)`: Replace at specific position.
- Changed `cstr_replace_all() cstr_replace*(self, search, repl, count)`: Replace count occurences.
- Renamed `cstr_find_from()` => `cstr_find_at()`
- Renamed `cstr_*_u8()` => `cstr_u8_*()`
- - Renamed `csview_*_u8()` => `csview_u8_*()`
- - Added cstr_u8_slice() and csview_u8_slice().
- - Removed `csview_from_s()`: Use `cstr_sv(s)` instead.
+ - Renamed `csubstr_*_u8()` => `csubstr_u8_*()`
+ - Added cstr_u8_slice() and csubstr_u8_slice().
+ - Removed `csubstr_from_s()`: Use `cstr_ss(s)` instead.
- Added back file coption.h
- Simplified **cbits** usage: all inlined.
- Updated docs.
@@ -759,7 +759,7 @@ Major changes:
- Renamed: *cstr_new()* to `cstr_lit(literal)`, and *cstr_assign_fmt()* to `cstr_printf()`.
- Renamed: *c_default_fromraw()* to `c_default_from()`.
- Changed: the [**c_apply**](docs/algorithm_api.md) macros API.
-- Replaced: *csview_first_token()* and *csview_next_token()* with one function: `csview_token()`.
+- Replaced: *csubstr_first_token()* and *csubstr_next_token()* with one function: `csubstr_token()`.
- Added: **checkauto** tool for checking that c-source files uses `c_auto*` macros correctly.
- Added: general `i_keyclass` / `i_valclass` template parameters which auto-binds template functions.
- Added: `i_opt` template parameter: compile-time options: `c_no_clone`, `c_no_atomic`, `c_is_forward`; may be combined with `|`
diff --git a/docs/cmap_api.md b/docs/cmap_api.md
index 4e6da57d..65777221 100644
--- a/docs/cmap_api.md
+++ b/docs/cmap_api.md
@@ -282,7 +282,7 @@ typedef struct {
cstr country;
} Viking;
-#define Viking_init() ((Viking){cstr_null, cstr_null})
+#define Viking_init() ((Viking){.name={0}, .country={0}})
static inline int Viking_cmp(const Viking* a, const Viking* b) {
int c = cstr_cmp(&a->name, &b->name);
diff --git a/docs/coroutine_api.md b/docs/coroutine_api.md
index f7d81a34..c44f4a4d 100644
--- a/docs/coroutine_api.md
+++ b/docs/coroutine_api.md
@@ -237,7 +237,7 @@ cco_task_struct (produce_items,
int produce_items(struct produce_items* p, cco_runtime* rt)
{
cco_routine (p) {
- p->str = cstr_null;
+ p->str = cstr_init();
p->next.cco_func = next_value;
while (true)
{
diff --git a/docs/cregex_api.md b/docs/cregex_api.md
index 52476e09..98161fe9 100644
--- a/docs/cregex_api.md
+++ b/docs/cregex_api.md
@@ -33,11 +33,11 @@ int cregex_compile(cregex *self, const char* pattern, int cflags = CREG_
int cregex_captures(const cregex* self);
// return CREG_OK, CREG_NOMATCH, or CREG_MATCHERROR
-int cregex_find(const cregex* re, const char* input, csview match[], int mflags = CREG_DEFAULT);
+int cregex_find(const cregex* re, const char* input, csubstr match[], int mflags = CREG_DEFAULT);
// Search inside input string-view only
-int cregex_find_sv(const cregex* re, csview input, csview match[]);
+int cregex_find_ss(const cregex* re, csubstr input, csubstr match[]);
// All-in-one search (compile + find + drop)
-int cregex_find_pattern(const char* pattern, const char* input, csview match[], int cmflags = CREG_DEFAULT);
+int cregex_find_pattern(const char* pattern, const char* input, csubstr match[], int cmflags = CREG_DEFAULT);
// Check if there are matches in input
bool cregex_is_match(const cregex* re, const char* input);
@@ -45,14 +45,14 @@ bool cregex_is_match(const cregex* re, const char* input);
// Replace all matches in input
cstr cregex_replace(const cregex* re, const char* input, const char* replace, int count = INT_MAX);
// Replace count matches in input string-view. Optionally transform replacement.
-cstr cregex_replace_sv(const cregex* re, csview input, const char* replace, int count = INT_MAX);
-cstr cregex_replace_sv(const cregex* re, csview input, const char* replace, int count,
- bool(*transform)(int group, csview match, cstr* result), int rflags);
+cstr cregex_replace_ss(const cregex* re, csubstr input, const char* replace, int count = INT_MAX);
+cstr cregex_replace_ss(const cregex* re, csubstr input, const char* replace, int count,
+ bool(*transform)(int group, csubstr match, cstr* result), int rflags);
// All-in-one replacement (compile + find/replace + drop)
cstr cregex_replace_pattern(const char* pattern, const char* input, const char* replace, int count = INT_MAX);
cstr cregex_replace_pattern(const char* pattern, const char* input, const char* replace, int count,
- bool(*transform)(int group, csview match, cstr* result), int rflags);
+ bool(*transform)(int group, csubstr match, cstr* result), int rflags);
// destroy
void cregex_drop(cregex* self);
```
@@ -109,9 +109,9 @@ int main(void) {
cregex re = cregex_from(pattern);
// Lets find the first date in the string:
- csview match[4]; // full-match, year, month, date.
+ csubstr match[4]; // full-match, year, month, date.
if (cregex_find(&re, input, match) == CREG_OK)
- printf("Found date: %.*s\n", c_SV(match[0]));
+ printf("Found date: %.*s\n", c_SS(match[0]));
else
printf("Could not find any date\n");
@@ -127,7 +127,7 @@ int main(void) {
For a single match you may use the all-in-one function:
```c
if (cregex_find_pattern(pattern, input, match))
- printf("Found date: %.*s\n", c_SV(match[0]));
+ printf("Found date: %.*s\n", c_SS(match[0]));
```
To use: `gcc first_match.c src/cregex.c src/utf8code.c`.
@@ -137,16 +137,16 @@ In order to use a callback function in the replace call, see `examples/regex_rep
To iterate multiple matches in an input string, you may use
```c
-csview match[5] = {0};
+csubstr match[5] = {0};
while (cregex_find(&re, input, match, CREG_NEXT) == CREG_OK)
for (int k = 1; i <= cregex_captures(&re); ++k)
- printf("submatch %d: %.*s\n", k, c_SV(match[k]));
+ printf("submatch %d: %.*s\n", k, c_SS(match[k]));
```
There is also a for-loop macro to simplify it:
```c
c_formatch (it, &re, input)
for (int k = 1; i <= cregex_captures(&re); ++k)
- printf("submatch %d: %.*s\n", k, c_SV(it.match[k]));
+ printf("submatch %d: %.*s\n", k, c_SS(it.match[k]));
```
## Using cregex in a project
@@ -154,7 +154,7 @@ c_formatch (it, &re, input)
The easiest is to `#define i_import` before `#include <stc/cregex.h>`. Make sure to do that in one translation unit only.
For reference, **cregex** uses the following files:
-- `stc/cregex.h`, `stc/utf8.h`, `stc/csview.h`, `stc/cstr.h`, `stc/ccommon.h`, `stc/forward.h`
+- `stc/cregex.h`, `stc/utf8.h`, `stc/csubstr.h`, `stc/cstr.h`, `stc/ccommon.h`, `stc/forward.h`
- `src/cregex.c`, `src/utf8code.c`.
## Regex Cheatsheet
diff --git a/docs/cstr_api.md b/docs/cstr_api.md
index dae5669f..36606b76 100644
--- a/docs/cstr_api.md
+++ b/docs/cstr_api.md
@@ -18,11 +18,11 @@ All cstr definitions and prototypes are available by including a single header f
## Methods
```c
-cstr cstr_init(void); // constructor; same as cstr_null.
+cstr cstr_init(void); // constructor; empty string
cstr cstr_lit(const char literal_only[]); // cstr from literal; no strlen() call.
cstr cstr_from(const char* str); // constructor using strlen()
cstr cstr_from_n(const char* str, intptr_t n); // constructor with n first bytes of str
-cstr cstr_from_sv(csview sv); // construct cstr from csview
+cstr cstr_from_ss(csubstr sv); // construct cstr from csubstr
cstr cstr_with_capacity(intptr_t cap);
cstr cstr_with_size(intptr_t len, char fill); // repeat fill len times
cstr cstr_from_fmt(const char* fmt, ...); // printf() formatting
@@ -34,7 +34,7 @@ void cstr_drop(cstr* self); // destructo
const char* cstr_str(const cstr* self); // cast to const char*
char* cstr_data(cstr* self); // cast to mutable char*
-csview cstr_sv(const cstr* self); // cast to string view
+csubstr cstr_ss(const cstr* self); // cast to string view
cstr_buf cstr_buffer(cstr* self); // cast to mutable buffer (with capacity)
intptr_t cstr_size(const cstr* self);
@@ -48,13 +48,13 @@ void cstr_clear(cstr* self);
char* cstr_assign(cstr* self, const char* str);
char* cstr_assign_n(cstr* self, const char* str, intptr_t n); // assign n first bytes of str
-char* cstr_assign_sv(cstr* self, csview sv);
+char* cstr_assign_ss(cstr* self, csubstr sv);
char* cstr_copy(cstr* self, cstr s); // copy-assign a cstr
int cstr_printf(cstr* self, const char* fmt, ...); // source and target must not overlap.
char* cstr_append(cstr* self, const char* str);
char* cstr_append_n(cstr* self, const char* str, intptr_t n); // append n first bytes of str
-char* cstr_append_sv(cstr* self, csview str);
+char* cstr_append_ss(cstr* self, csubstr str);
char* cstr_append_s(cstr* self, cstr str);
int cstr_append_fmt(cstr* self, const char* fmt, ...); // printf() formatting
char* cstr_append_uninit(cstr* self, intptr_t len); // return ptr to start of uninited data
@@ -63,19 +63,19 @@ void cstr_push(cstr* self, const char* chr); // append on
void cstr_pop(cstr* self); // pop one utf8 char
void cstr_insert(cstr* self, intptr_t pos, const char* ins);
-void cstr_insert_sv(cstr* self, intptr_t pos, csview ins);
+void cstr_insert_ss(cstr* self, intptr_t pos, csubstr ins);
void cstr_insert_s(cstr* self, intptr_t pos, cstr ins);
void cstr_erase(cstr* self, intptr_t pos, intptr_t len); // erase len bytes from pos
void cstr_replace(cstr* self, const char* search, const char* repl, unsigned count = MAX_INT);
-cstr cstr_replace_sv(csview in, csview search, csview repl, unsigned count);
+cstr cstr_replace_ss(csubstr in, csubstr search, csubstr repl, unsigned count);
void cstr_replace_at(cstr* self, intptr_t pos, intptr_t len, const char* repl); // replace at a pos
-void cstr_replace_at_sv(cstr* self, intptr_t pos, intptr_t len, const csview repl);
+void cstr_replace_at_ss(cstr* self, intptr_t pos, intptr_t len, const csubstr repl);
void cstr_replace_at_s(cstr* self, intptr_t pos, intptr_t len, cstr repl);
bool cstr_equals(const cstr* self, const char* str);
-bool cstr_equals_sv(const cstr* self, csview sv);
+bool cstr_equals_ss(const cstr* self, csubstr sv);
bool cstr_equals_s(const cstr* self, cstr s);
intptr_t cstr_find(const cstr* self, const char* search);
@@ -83,11 +83,11 @@ intptr_t cstr_find_at(const cstr* self, intptr_t pos, const char* search); //
bool cstr_contains(const cstr* self, const char* search);
bool cstr_starts_with(const cstr* self, const char* str);
-bool cstr_starts_with_sv(const cstr* self, csview sv);
+bool cstr_starts_with_ss(const cstr* self, csubstr sv);
bool cstr_starts_with_s(const cstr* self, cstr s);
bool cstr_ends_with(const cstr* self, const char* str);
-bool cstr_ends_with_sv(const cstr* self, csview sv);
+bool cstr_ends_with_ss(const cstr* self, csubstr sv);
bool cstr_ends_with_s(const cstr* self, cstr s);
bool cstr_getline(cstr *self, FILE *stream); // cstr_getdelim(self, '\n', stream)
@@ -100,8 +100,8 @@ intptr_t cstr_u8_size(const cstr* self); // number of
intptr_t cstr_u8_size_n(const cstr self, intptr_t nbytes); // utf8 size within n bytes
intptr_t cstr_u8_to_pos(const cstr* self, intptr_t u8idx); // byte pos offset at utf8 codepoint index
const char* cstr_u8_at(const cstr* self, intptr_t u8idx); // char* position at utf8 codepoint index
-csview cstr_u8_chr(const cstr* self, intptr_t u8idx); // get utf8 character as a csview
-void cstr_u8_replace_at(cstr* self, intptr_t bytepos, intptr_t u8len, csview repl); // replace u8len utf8 chars
+csubstr cstr_u8_chr(const cstr* self, intptr_t u8idx); // get utf8 character as a csubstr
+void cstr_u8_replace_at(cstr* self, intptr_t bytepos, intptr_t u8len, csubstr repl); // replace u8len utf8 chars
void cstr_u8_erase(cstr* self, intptr_t bytepos, intptr_t u8len); // erase u8len codepoints from pos
// iterate utf8 codepoints
@@ -112,14 +112,14 @@ cstr_iter cstr_advance(cstr_iter it, intptr_t n);
// utf8 functions requires linking with src/utf8code.c symbols:
bool cstr_valid_utf8(const cstr* self); // check if str is valid utf8
-cstr cstr_casefold_sv(csview sv); // returns new casefolded utf8 cstr
+cstr cstr_casefold_ss(csubstr sv); // returns new casefolded utf8 cstr
cstr cstr_tolower(const char* str); // returns new lowercase utf8 cstr
-cstr cstr_tolower_sv(csview sv); // returns new lowercase utf8 cstr
+cstr cstr_tolower_ss(csubstr sv); // returns new lowercase utf8 cstr
void cstr_lowercase(cstr* self); // transform cstr to lowercase utf8
cstr cstr_toupper(const char* str); // returns new uppercase utf8 cstr
-cstr cstr_toupper_sv(csview sv); // returns new uppercase utf8 cstr
+cstr cstr_toupper_ss(csubstr sv); // returns new uppercase utf8 cstr
void cstr_uppercase(cstr* self); // transform cstr to uppercase utf8
int cstr_icmp(const cstr* s1, const cstr* s2); // utf8 case-insensitive comparison
@@ -132,11 +132,10 @@ Note that all methods with arguments `(..., const char* str, intptr_t n)`, `n` m
#### Helper methods:
```c
-int cstr_cmp(const cstr* s1, const cstr* s2);
-bool cstr_eq(const cstr* s1, const cstr* s2);
-bool cstr_hash(const cstr* self);
-
-char* cstrnstrn(const char* str, const char* search, intptr_t slen, intptr_t nlen);
+int cstr_cmp(const cstr* s1, const cstr* s2);
+bool cstr_eq(const cstr* s1, const cstr* s2);
+bool cstr_hash(const cstr* self);
+char* cstrnstrn(const char* str, const char* search, intptr_t slen, intptr_t nlen);
```
## Types
@@ -145,7 +144,7 @@ char* cstrnstrn(const char* str, const char* search, intptr_t slen, intpt
|:----------------|:---------------------------------------------|:---------------------|
| `cstr` | `struct { ... }` | The string type |
| `cstr_value` | `char` | String element type |
-| `csview` | `struct { const char *str; intptr_t size; }` | String view type |
+| `csubstr` | `struct { const char *str; intptr_t size; }` | String view type |
| `cstr_buf` | `struct { char *data; intptr_t size, cap; }` | String buffer type |
## Constants and macros
@@ -153,7 +152,6 @@ char* cstrnstrn(const char* str, const char* search, intptr_t slen, intpt
| Name | Value |
|:------------------|:------------------|
| `c_NPOS` | `INTPTR_MAX` |
-| `cstr_null` | empty cstr value |
## Example
```c
diff --git a/docs/csubstr_api.md b/docs/csubstr_api.md
new file mode 100644
index 00000000..925c69db
--- /dev/null
+++ b/docs/csubstr_api.md
@@ -0,0 +1,217 @@
+# STC [csubstr](../include/stc/csubstr.h): String View
+![String](pics/string.jpg)
+
+The type **csubstr** is a string view and can refer to a constant contiguous sequence of char-elements with the first
+element of the sequence at position zero. The implementation holds two members: a pointer to constant char and a size.
+
+**csubstr** is non-null terminated, and therefore not a replacent for `const char*` - see [csview](csview_api.md) for
+that. **csubstr** never allocates memory, and therefore need not be destructed.
+Its lifetime is limited by the source string storage. It keeps the length of the string, and does not need to call
+*strlen()* to acquire the length.
+
+Note: a **csubstr** may ***not be null-terminated***, and must therefore be printed this way:
+```c
+printf("%.*s", c_SS(sstr))
+```
+
+See the c++ class [std::basic_string_view](https://en.cppreference.com/w/cpp/string/basic_string_view) for a functional
+description.
+
+## Header file
+
+All csubstr definitions and prototypes are available by including a single header file.
+
+```c
+#define i_implement
+#include <stc/cstr.h>
+#include <stc/csubstr.h> // after cstr.h: include extra cstr-csubstr functions
+```
+## Methods
+
+```c
+csubstr c_ss(const char literal_only[]); // construct from literal, no strlen()
+csubstr c_ss(const char* str, intptr_t n); // construct from str and length n
+csubstr csubstr_from(const char* str); // construct from const char*
+csubstr csubstr_from_n(const char* str, intptr_t n); // alias for c_ss(str, n)
+
+intptr_t csubstr_size(csubstr sv);
+bool csubstr_empty(csubstr sv);
+void csubstr_clear(csubstr* self);
+
+bool csubstr_equals(csubstr sv, csubstr sv2);
+intptr_t csubstr_find(csubstr sv, const char* str);
+intptr_t csubstr_find_ss(csubstr sv, csubstr find);
+bool csubstr_contains(csubstr sv, const char* str);
+bool csubstr_starts_with(csubstr sv, const char* str);
+bool csubstr_ends_with(csubstr sv, const char* str);
+
+csubstr csubstr_substr_ex(csubstr sv, intptr_t pos, intptr_t n); // negative pos count from end
+csubstr csubstr_slice_ex(csubstr sv, intptr_t p1, intptr_t p2); // negative p1, p2 count from end
+csubstr csubstr_token(csubstr sv, const char* sep, intptr_t* start); // *start > sv.size after last token
+```
+
+#### UTF8 methods
+```c
+intptr_t csubstr_u8_size(csubstr sv);
+csubstr csubstr_u8_substr(csubstr sv, intptr_t bytepos, intptr_t u8len);
+bool csubstr_valid_utf8(csubstr sv); // requires linking with src/utf8code.c
+
+csubstr_iter csubstr_begin(const csubstr* self);
+csubstr_iter csubstr_end(const csubstr* self);
+void csubstr_next(csubstr_iter* it); // utf8 codepoint step, not byte!
+csubstr_iter csubstr_advance(csubstr_iter it, intptr_t n);
+```
+
+#### Extended cstr methods
+```c
+csubstr cstr_substr(const cstr* self, intptr_t pos, intptr_t n);
+csubstr cstr_substr_ex(const cstr* s, intptr_t pos, intptr_t n); // negative pos count from end
+csubstr cstr_u8_substr(const cstr* self, intptr_t bytepos, intptr_t u8len);
+
+csubstr cstr_slice(const cstr* self, intptr_t p1, intptr_t p2);
+csubstr cstr_slice_ex(const cstr* s, intptr_t p, intptr_t q); // negative p or q count from end
+```
+#### Iterate tokens with *c_fortoken*, *c_fortoken_ss*
+
+To iterate tokens in an input string separated by a string:
+```c
+c_fortoken (i, "hello, one, two, three", ", ")
+ printf("token: %.*s\n", c_SS(i.token));
+```
+
+#### Helper methods
+```c
+int csubstr_cmp(const csubstr* x, const csubstr* y);
+int csubstr_icmp(const csubstr* x, const csubstr* y);
+bool csubstr_eq(const csubstr* x, const csubstr* y);
+uint64_t csubstr_hash(const csubstr* x);
+```
+
+## Types
+
+| Type name | Type definition | Used to represent... |
+|:----------------|:-------------------------------------------|:-------------------------|
+| `csubstr` | `struct { const char *str; intptr_t size; }` | The string view type |
+| `csubstr_value` | `char` | The string element type |
+| `csubstr_iter` | `struct { csubstr_value *ref; }` | UTF8 iterator |
+
+## Constants and macros
+
+| Name | Value | Usage |
+|:---------------|:---------------------|:---------------------------------------------|
+| `c_SS(sv)` | printf argument | `printf("sv: %.*s\n", c_SS(sv));` |
+
+## Example
+```c
+#define i_implement
+#include <stc/cstr.h>
+#include <stc/csubstr.h>
+
+int main(void)
+{
+ cstr str1 = cstr_lit("We think in generalities, but we live in details.");
+ // (quoting Alfred N. Whitehead)
+
+ csubstr sv1 = cstr_substr_ex(&str1, 3, 5); // "think"
+ intptr_t pos = cstr_find(&str1, "live"); // position of "live" in str1
+ csubstr sv2 = cstr_substr_ex(&str1, pos, 4); // get "live"
+ csubstr sv3 = cstr_slice_ex(&str1, -8, -1); // get "details"
+ printf("%.*s %.*s %.*s\n",
+ c_SS(sv1), c_SS(sv2), c_SS(sv3));
+ cstr s1 = cstr_lit("Apples are red");
+ cstr s2 = cstr_from_ss(cstr_substr_ex(&s1, -3, 3)); // "red"
+ cstr s3 = cstr_from_ss(cstr_substr_ex(&s1, 0, 6)); // "Apples"
+ printf("%s %s\n", cstr_str(&s2), cstr_str(&s3));
+
+ c_drop(cstr, &str1, &s1, &s2, &s3);
+}
+```
+Output:
+```
+think live details
+red Apples
+```
+
+### Example 2: UTF8 handling
+```c
+#define i_import // include dependent cstr, utf8 and cregex function definitions.
+#include <stc/cstr.h>
+
+int main(void)
+{
+ cstr s1 = cstr_lit("hell😀 w😀rld");
+
+ cstr_u8_replace_at(&s1, cstr_find(&s1, "😀rld"), 1, c_ss("ø"));
+ printf("%s\n", cstr_str(&s1));
+
+ c_foreach (i, cstr, s1)
+ printf("%.*s,", c_SS(i.u8.chr));
+
+ cstr_drop(&s1);
+}
+```
+Output:
+```
+hell😀 wørld
+h,e,l,l,😀, ,w,ø,r,l,d,
+```
+
+### Example 3: csubstr tokenizer (string split)
+Splits strings into tokens. *print_split()* makes **no** memory allocations or *strlen()* calls,
+and does not depend on null-terminated strings. *string_split()* function returns a vector of cstr.
+```c
+#include <stdio.h>
+#include <stc/csubstr.h>
+
+void print_split(csubstr input, const char* sep)
+{
+ c_fortoken_ss (i, input, sep)
+ printf("[%.*s]\n", c_SS(i.token));
+ puts("");
+}
+#define i_implement
+#include <stc/cstr.h>
+#define i_key_str
+#include <stc/cstack.h>
+
+cstack_str string_split(csubstr input, const char* sep)
+{
+ cstack_str out = cstack_str_init();
+
+ c_fortoken_ss (i, input, sep)
+ cstack_str_push(&out, cstr_from_ss(i.token));
+
+ return out;
+}
+
+int main(void)
+{
+ print_split(c_ss("//This is a//double-slash//separated//string"), "//");
+ print_split(c_ss("This has no matching separator"), "xx");
+
+ cstack_str s = string_split(c_ss("Split,this,,string,now,"), ",");
+
+ c_foreach (i, cstack_str, s)
+ printf("[%s]\n", cstr_str(i.ref));
+ puts("");
+
+ cstack_str_drop(&s);
+}
+```
+Output:
+```
+[]
+[This is a]
+[double-slash]
+[separated]
+[string]
+
+[This has no matching separator]
+
+[Split]
+[this]
+[]
+[string]
+[now]
+[]
+```
diff --git a/docs/csview_api.md b/docs/csview_api.md
index 49e4f9d1..4fdff0d1 100644
--- a/docs/csview_api.md
+++ b/docs/csview_api.md
@@ -1,19 +1,14 @@
# STC [csview](../include/stc/csview.h): String View
![String](pics/string.jpg)
-The type **csview** is a string view and can refer to a constant contiguous sequence of char-elements with the first
-element of the sequence at position zero. The implementation holds two members: a pointer to constant char and a size.
+The type **csview** is a ***null-terminated*** string view and refers to a constant contiguous sequence of
+char-elements with the first element of the sequence at position zero. The implementation holds two
+members: a pointer to constant char and a size. See [csubstr](csubstr_api.md) for a ***non null-terminated***
+string view/span type.
-**csview** is an efficient replacent for `const char*`. It never allocates memory, and therefore need not be destructed.
-Its lifetime is limited by the source string storage. It keeps the length of the string, and does not call *strlen()*
-when passing it around. It is faster when using`csview` as convertion type (raw) than `const char*` in associative
-containers with cstr keys.
-
-Note: a **csview** may ***not be null-terminated***, and must therefore be printed like:
-`printf("%.*s", csview_ARG(sv))`.
-
-See the c++ class [std::basic_string_view](https://en.cppreference.com/w/cpp/string/basic_string_view) for a functional
-description.
+Because **csview** is null-terminated, it can be a more efficient replacent for `const char*`. It never
+allocates memory, and need therefore not be destructed. Its lifetime is limited by the source string
+storage. It keeps the length of the string, and does not call *strlen()* when passing it around.
## Header file
@@ -42,17 +37,12 @@ intptr_t csview_find_sv(csview sv, csview find);
bool csview_contains(csview sv, const char* str);
bool csview_starts_with(csview sv, const char* str);
bool csview_ends_with(csview sv, const char* str);
-
-csview csview_substr_ex(csview sv, intptr_t pos, intptr_t n); // negative pos count from end
-csview csview_slice_ex(csview sv, intptr_t p1, intptr_t p2); // negative p1, p2 count from end
-csview csview_token(csview sv, const char* sep, intptr_t* start); // *start > sv.size after last token
```
#### UTF8 methods
```c
intptr_t csview_u8_size(csview sv);
-csview csview_u8_substr(csview sv, intptr_t bytepos, intptr_t u8len);
-bool csview_valid_utf8(csview sv); // requires linking with src/utf8code.c
+bool csview_valid_utf8(csview sv); // depends on src/utf8code.c
csview_iter csview_begin(const csview* self);
csview_iter csview_end(const csview* self);
@@ -74,27 +64,10 @@ uint32_t utf8_peek(const char* s); // codep
uint32_t utf8_peek_off(const char* s, int offset); // codepoint value at utf8 pos (may be negative)
```
-#### Extended cstr methods
-```c
-csview cstr_substr(const cstr* self, intptr_t pos, intptr_t n);
-csview cstr_substr_ex(const cstr* s, intptr_t pos, intptr_t n); // negative pos count from end
-csview cstr_u8_substr(const cstr* self, intptr_t bytepos, intptr_t u8len);
-
-csview cstr_slice(const cstr* self, intptr_t p1, intptr_t p2);
-csview cstr_slice_ex(const cstr* s, intptr_t p, intptr_t q); // negative p or q count from end
-```
-#### Iterate tokens with *c_fortoken*, *c_fortoken_sv*
-
-To iterate tokens in an input string separated by a string:
-```c
-c_fortoken (i, "hello, one, two, three", ", ")
- printf("token: %.*s\n", c_SV(i.token));
-```
-
#### Helper methods
```c
int csview_cmp(const csview* x, const csview* y);
-int csview_icmp(const csview* x, const csview* y);
+int csview_icmp(const csview* x, const csview* y); // depends on src/utf8code.c:
bool csview_eq(const csview* x, const csview* y);
uint64_t csview_hash(const csview* x);
```
@@ -107,46 +80,36 @@ uint64_t csview_hash(const csview* x);
| `csview_value` | `char` | The string element type |
| `csview_iter` | `struct { csview_value *ref; }` | UTF8 iterator |
-## Constants and macros
-
-| Name | Value | Usage |
-|:---------------|:---------------------|:---------------------------------------------|
-| `c_SV(sv)` | printf argument | `printf("sv: %.*s\n", c_SV(sv));` |
-
-## Example
+## Example: UTF8 iteration and case conversion
```c
-#define i_implement
+#define i_import
#include <stc/cstr.h>
#include <stc/csview.h>
int main(void)
{
- cstr str1 = cstr_lit("We think in generalities, but we live in details.");
- // (quoting Alfred N. Whitehead)
-
- csview sv1 = cstr_substr_ex(&str1, 3, 5); // "think"
- intptr_t pos = cstr_find(&str1, "live"); // position of "live" in str1
- csview sv2 = cstr_substr_ex(&str1, pos, 4); // get "live"
- csview sv3 = cstr_slice_ex(&str1, -8, -1); // get "details"
- printf("%.*s %.*s %.*s\n",
- c_SV(sv1), c_SV(sv2), c_SV(sv3));
- cstr s1 = cstr_lit("Apples are red");
- cstr s2 = cstr_from_sv(cstr_substr_ex(&s1, -3, 3)); // "red"
- cstr s3 = cstr_from_sv(cstr_substr_ex(&s1, 0, 6)); // "Apples"
- printf("%s %s\n", cstr_str(&s2), cstr_str(&s3));
-
- c_drop(cstr, &str1, &s1, &s2, &s3);
+ cstr str = cstr_from("Liberté, égalité, fraternité.");
+ csview sv = cstr_sv(&str);
+
+ c_foreach (i, csview, sv)
+ printf("%.*s ", c_SS(i.u8.chr));
+ puts("");
+
+ cstr_uppercase(&str);
+ printf("%s\n", cstr_str(&str));
+
+ cstr_drop(&str);
}
```
Output:
```
-think live details
-red Apples
+L i b e r t é , é g a l i t é , f r a t e r n i t é .
+LIBERTÉ, ÉGALITÉ, FRATERNITÉ.
```
-### Example 2: UTF8 handling
+### Example 2: UTF8 replace
```c
-#define i_import // include dependent cstr, utf8 and cregex function definitions.
+#define i_import // include dependent utf8 definitions.
#include <stc/cstr.h>
int main(void)
@@ -157,7 +120,7 @@ int main(void)
printf("%s\n", cstr_str(&s1));
c_foreach (i, cstr, s1)
- printf("%.*s,", c_SV(i.u8.chr));
+ printf("%.*s,", c_SS(i.u8.chr)); // u8.chr is a csubstr
cstr_drop(&s1);
}
@@ -167,63 +130,3 @@ Output:
hell😀 wørld
h,e,l,l,😀, ,w,ø,r,l,d,
```
-
-### Example 3: csview tokenizer (string split)
-Splits strings into tokens. *print_split()* makes **no** memory allocations or *strlen()* calls,
-and does not depend on null-terminated strings. *string_split()* function returns a vector of cstr.
-```c
-#include <stdio.h>
-#include <stc/csview.h>
-
-void print_split(csview input, const char* sep)
-{
- c_fortoken_sv (i, input, sep)
- printf("[%.*s]\n", c_SV(i.token));
- puts("");
-}
-#define i_implement
-#include <stc/cstr.h>
-#define i_key_str
-#include <stc/cstack.h>
-
-cstack_str string_split(csview input, const char* sep)
-{
- cstack_str out = cstack_str_init();
-
- c_fortoken_sv (i, input, sep)
- cstack_str_push(&out, cstr_from_sv(i.token));
-
- return out;
-}
-
-int main(void)
-{
- print_split(c_sv("//This is a//double-slash//separated//string"), "//");
- print_split(c_sv("This has no matching separator"), "xx");
-
- cstack_str s = string_split(c_sv("Split,this,,string,now,"), ",");
-
- c_foreach (i, cstack_str, s)
- printf("[%s]\n", cstr_str(i.ref));
- puts("");
-
- cstack_str_drop(&s);
-}
-```
-Output:
-```
-[]
-[This is a]
-[double-slash]
-[separated]
-[string]
-
-[This has no matching separator]
-
-[Split]
-[this]
-[]
-[string]
-[now]
-[]
-```
diff --git a/include/stc/ccommon.h b/include/stc/ccommon.h
index 45fa01c6..24967a10 100644
--- a/include/stc/ccommon.h
+++ b/include/stc/ccommon.h
@@ -130,11 +130,15 @@ typedef const char* ccharptr;
#define ccharptr_clone(s) (s)
#define ccharptr_drop(p) ((void)p)
+#define c_ss(...) c_MACRO_OVERLOAD(c_ss, __VA_ARGS__)
+#define c_ss_1(literal) c_ss_2(literal, c_litstrlen(literal))
+#define c_ss_2(str, n) (c_LITERAL(csubstr){str, n})
+#define c_SS(ss) (int)(ss).size, (ss).str // printf("%.*s\n", c_SS(ss));
+
#define c_sv(...) c_MACRO_OVERLOAD(c_sv, __VA_ARGS__)
-#define c_sv_1(lit) c_sv_2(lit, c_litstrlen(lit))
+#define c_sv_1(literal) c_sv_2(literal, c_litstrlen(literal))
#define c_sv_2(str, n) (c_LITERAL(csview){str, n})
-#define c_SV(sv) (int)(sv).size, (sv).str // print csview: use format "%.*s"
#define c_ROTL(x, k) (x << (k) | x >> (8*sizeof(x) - (k)))
STC_INLINE uint64_t cfasthash(const void* key, intptr_t len) {
diff --git a/include/stc/cregex.h b/include/stc/cregex.h
index bce94b04..3aab3c8b 100644
--- a/include/stc/cregex.h
+++ b/include/stc/cregex.h
@@ -34,7 +34,7 @@ THE SOFTWARE.
*/
#include <stdbool.h>
#include <string.h>
-#include "forward.h" // csview
+#include "forward.h" // csubstr
#include "ccommon.h"
enum {
@@ -82,7 +82,7 @@ typedef struct {
typedef struct {
const cregex* re;
const char* input;
- csview match[CREG_MAX_CAPTURES];
+ csubstr match[CREG_MAX_CAPTURES];
} cregex_iter;
#define c_formatch(it, Re, Input) \
@@ -115,11 +115,11 @@ int cregex_captures(const cregex* re);
/* return CREG_OK, CREG_NOMATCH or CREG_MATCHERROR. */
#define cregex_find(...) c_MACRO_OVERLOAD(cregex_find, __VA_ARGS__)
#define cregex_find_3(re, input, match) cregex_find_4(re, input, match, CREG_DEFAULT)
-int cregex_find_4(const cregex* re, const char* input, csview match[], int mflags);
+int cregex_find_4(const cregex* re, const char* input, csubstr match[], int mflags);
-/* find with csview as input. */
-STC_INLINE int cregex_find_sv(const cregex* re, csview input, csview match[]) {
- csview *mp = NULL;
+/* find with csubstr as input. */
+STC_INLINE int cregex_find_ss(const cregex* re, csubstr input, csubstr match[]) {
+ csubstr *mp = NULL;
if (match) { match[0] = input; mp = match; }
return cregex_find(re, input.str, mp, CREG_STARTEND);
}
@@ -129,27 +129,27 @@ STC_INLINE int cregex_find_sv(const cregex* re, csview input, csview match[]) {
#define cregex_find_pattern_3(pattern, input, match) \
cregex_find_pattern_4(pattern, input, match, CREG_DEFAULT)
int cregex_find_pattern_4(const char* pattern, const char* input,
- csview match[], int cmflags);
+ csubstr match[], int cmflags);
STC_INLINE bool cregex_is_match(const cregex* re, const char* input)
{ return cregex_find_4(re, input, NULL, CREG_DEFAULT) == CREG_OK; }
-/* replace csview input with replace using regular expression pattern */
-#define cregex_replace_sv(...) c_MACRO_OVERLOAD(cregex_replace_sv, __VA_ARGS__)
-#define cregex_replace_sv_3(pattern, input, replace) \
- cregex_replace_sv_4(pattern, input, replace, INT32_MAX)
-#define cregex_replace_sv_4(pattern, input, replace, count) \
- cregex_replace_sv_6(pattern, input, replace, count, NULL, CREG_DEFAULT)
-cstr cregex_replace_sv_6(const cregex* re, csview input, const char* replace, int count,
- bool (*transform)(int group, csview match, cstr* result), int rflags);
+/* replace csubstr input with replace using regular expression pattern */
+#define cregex_replace_ss(...) c_MACRO_OVERLOAD(cregex_replace_ss, __VA_ARGS__)
+#define cregex_replace_ss_3(pattern, input, replace) \
+ cregex_replace_ss_4(pattern, input, replace, INT32_MAX)
+#define cregex_replace_ss_4(pattern, input, replace, count) \
+ cregex_replace_ss_6(pattern, input, replace, count, NULL, CREG_DEFAULT)
+cstr cregex_replace_ss_6(const cregex* re, csubstr input, const char* replace, int count,
+ bool (*transform)(int group, csubstr match, cstr* result), int rflags);
/* replace input with replace using regular expression */
#define cregex_replace(...) c_MACRO_OVERLOAD(cregex_replace, __VA_ARGS__)
#define cregex_replace_3(re, input, replace) cregex_replace_4(re, input, replace, INT32_MAX)
STC_INLINE cstr cregex_replace_4(const cregex* re, const char* input, const char* replace, int count) {
- csview sv = {input, c_strlen(input)};
- return cregex_replace_sv_4(re, sv, replace, count);
+ csubstr ss = {input, c_strlen(input)};
+ return cregex_replace_ss_4(re, ss, replace, count);
}
/* replace + compile RE pattern, and extra arguments */
@@ -159,7 +159,7 @@ STC_INLINE cstr cregex_replace_4(const cregex* re, const char* input, const char
#define cregex_replace_pattern_4(pattern, input, replace, count) \
cregex_replace_pattern_6(pattern, input, replace, count, NULL, CREG_DEFAULT)
cstr cregex_replace_pattern_6(const char* pattern, const char* input, const char* replace, int count,
- bool (*transform)(int group, csview match, cstr* result), int crflags);
+ bool (*transform)(int group, csubstr match, cstr* result), int crflags);
/* destroy regex */
void cregex_drop(cregex* re);
diff --git a/include/stc/cstr.h b/include/stc/cstr.h
index f12d29b6..47cf65da 100644
--- a/include/stc/cstr.h
+++ b/include/stc/cstr.h
@@ -75,7 +75,7 @@ STC_API char* cstr_reserve(cstr* self, intptr_t cap);
STC_API void cstr_shrink_to_fit(cstr* self);
STC_API char* cstr_resize(cstr* self, intptr_t size, char value);
STC_API intptr_t cstr_find_at(const cstr* self, intptr_t pos, const char* search);
-STC_API intptr_t cstr_find_sv(const cstr* self, csview search);
+STC_API intptr_t cstr_find_ss(const cstr* self, csubstr search);
STC_API char* cstr_assign_n(cstr* self, const char* str, intptr_t len);
STC_API char* cstr_append_n(cstr* self, const char* str, intptr_t len);
STC_API char* cstr_append_uninit(cstr *self, intptr_t len);
@@ -85,7 +85,7 @@ STC_API void cstr_u8_erase(cstr* self, intptr_t bytepos, intptr_t u8len);
STC_API cstr cstr_from_fmt(const char* fmt, ...);
STC_API intptr_t cstr_append_fmt(cstr* self, const char* fmt, ...);
STC_API intptr_t cstr_printf(cstr* self, const char* fmt, ...);
-STC_API cstr cstr_replace_sv(csview sv, csview search, csview repl, int32_t count);
+STC_API cstr cstr_replace_ss(csubstr sv, csubstr search, csubstr repl, int32_t count);
STC_API uint64_t cstr_hash(const cstr *self);
STC_INLINE cstr_buf cstr_buffer(cstr* s) {
@@ -94,9 +94,11 @@ STC_INLINE cstr_buf cstr_buffer(cstr* s) {
: c_LITERAL(cstr_buf){s->sml.data, cstr_s_size(s), cstr_s_cap};
}
STC_INLINE csview cstr_sv(const cstr* s) {
- return cstr_is_long(s) ? c_LITERAL(csview){s->lon.data, cstr_l_size(s)}
- : c_LITERAL(csview){s->sml.data, cstr_s_size(s)};
+ return cstr_is_long(s) ? c_sv_2(s->lon.data, cstr_l_size(s))
+ : c_sv_2(s->sml.data, cstr_s_size(s));
}
+STC_INLINE csubstr cstr_ss(const cstr* s)
+ { csview sv = cstr_sv(s); return c_ss_2(sv.str, sv.size); }
STC_INLINE cstr cstr_init(void)
{ return cstr_null; }
@@ -110,7 +112,10 @@ STC_INLINE cstr cstr_from_n(const char* str, const intptr_t len) {
STC_INLINE cstr cstr_from(const char* str)
{ return cstr_from_n(str, c_strlen(str)); }
-STC_INLINE cstr cstr_from_sv(csview sv)
+STC_INLINE cstr cstr_from_ss(csubstr sv)
+ { return cstr_from_n(sv.str, sv.size); }
+
+STC_INLINE cstr cstr_from_v(csview sv)
{ return cstr_from_n(sv.str, sv.size); }
STC_INLINE cstr cstr_with_size(const intptr_t size, const char value) {
@@ -170,9 +175,9 @@ STC_INLINE intptr_t cstr_capacity(const cstr* self)
// utf8 methods defined in/depending on src/utf8code.c:
-extern cstr cstr_casefold_sv(csview sv);
-extern cstr cstr_tolower_sv(csview sv);
-extern cstr cstr_toupper_sv(csview sv);
+extern cstr cstr_casefold_ss(csubstr sv);
+extern cstr cstr_tolower_ss(csubstr sv);
+extern cstr cstr_toupper_ss(csubstr sv);
extern cstr cstr_tolower(const char* str);
extern cstr cstr_toupper(const char* str);
extern void cstr_lowercase(cstr* self);
@@ -193,9 +198,9 @@ STC_INLINE intptr_t cstr_u8_to_pos(const cstr* self, intptr_t u8idx)
STC_INLINE const char* cstr_u8_at(const cstr* self, intptr_t u8idx)
{ return utf8_at(cstr_str(self), u8idx); }
-STC_INLINE csview cstr_u8_chr(const cstr* self, intptr_t u8idx) {
+STC_INLINE csubstr cstr_u8_chr(const cstr* self, intptr_t u8idx) {
const char* str = cstr_str(self);
- csview sv;
+ csubstr sv;
sv.str = utf8_at(str, u8idx);
sv.size = utf8_chr_size(sv.str);
return sv;
@@ -205,7 +210,7 @@ STC_INLINE csview cstr_u8_chr(const cstr* self, intptr_t u8idx) {
STC_INLINE cstr_iter cstr_begin(const cstr* self) {
csview sv = cstr_sv(self);
- if (!sv.size) return c_LITERAL(cstr_iter){NULL};
+ if (!sv.size) return c_LITERAL(cstr_iter){.ref = NULL};
return c_LITERAL(cstr_iter){.u8 = {{sv.str, utf8_chr_size(sv.str)}}};
}
STC_INLINE cstr_iter cstr_end(const cstr* self) {
@@ -244,6 +249,9 @@ STC_INLINE bool cstr_eq(const cstr* s1, const cstr* s2) {
STC_INLINE bool cstr_equals(const cstr* self, const char* str)
{ return !strcmp(cstr_str(self), str); }
+STC_INLINE bool cstr_equals_ss(const cstr* self, csubstr sv)
+ { return sv.size == cstr_size(self) && !c_memcmp(cstr_str(self), sv.str, sv.size); }
+
STC_INLINE bool cstr_equals_sv(const cstr* self, csview sv)
{ return sv.size == cstr_size(self) && !c_memcmp(cstr_str(self), sv.str, sv.size); }
@@ -266,14 +274,14 @@ STC_INLINE intptr_t cstr_find_s(const cstr* self, cstr search)
STC_INLINE bool cstr_contains(const cstr* self, const char* search)
{ return strstr((char*)cstr_str(self), search) != NULL; }
-STC_INLINE bool cstr_contains_sv(const cstr* self, csview search)
- { return cstr_find_sv(self, search) != c_NPOS; }
+STC_INLINE bool cstr_contains_ss(const cstr* self, csubstr search)
+ { return cstr_find_ss(self, search) != c_NPOS; }
STC_INLINE bool cstr_contains_s(const cstr* self, cstr search)
{ return strstr((char*)cstr_str(self), cstr_str(&search)) != NULL; }
-STC_INLINE bool cstr_starts_with_sv(const cstr* self, csview sub) {
+STC_INLINE bool cstr_starts_with_ss(const cstr* self, csubstr sub) {
if (sub.size > cstr_size(self)) return false;
return !c_memcmp(cstr_str(self), sub.str, sub.size);
}
@@ -285,26 +293,26 @@ STC_INLINE bool cstr_starts_with(const cstr* self, const char* sub) {
}
STC_INLINE bool cstr_starts_with_s(const cstr* self, cstr sub)
- { return cstr_starts_with_sv(self, cstr_sv(&sub)); }
+ { return cstr_starts_with_ss(self, cstr_ss(&sub)); }
STC_INLINE bool cstr_istarts_with(const cstr* self, const char* sub) {
- csview sv = cstr_sv(self);
+ csubstr sv = cstr_ss(self);
intptr_t len = c_strlen(sub);
- return len <= sv.size && !utf8_icmp_sv(sv, c_sv(sub, len));
+ return len <= sv.size && !utf8_icmp_ss(sv, c_ss(sub, len));
}
-STC_INLINE bool cstr_ends_with_sv(const cstr* self, csview sub) {
+STC_INLINE bool cstr_ends_with_ss(const cstr* self, csubstr sub) {
csview sv = cstr_sv(self);
if (sub.size > sv.size) return false;
return !c_memcmp(sv.str + sv.size - sub.size, sub.str, sub.size);
}
STC_INLINE bool cstr_ends_with_s(const cstr* self, cstr sub)
- { return cstr_ends_with_sv(self, cstr_sv(&sub)); }
+ { return cstr_ends_with_ss(self, cstr_ss(&sub)); }
STC_INLINE bool cstr_ends_with(const cstr* self, const char* sub)
- { return cstr_ends_with_sv(self, c_sv(sub, c_strlen(sub))); }
+ { return cstr_ends_with_ss(self, c_ss(sub, c_strlen(sub))); }
STC_INLINE bool cstr_iends_with(const cstr* self, const char* sub) {
csview sv = cstr_sv(self);
@@ -316,7 +324,7 @@ STC_INLINE bool cstr_iends_with(const cstr* self, const char* sub) {
STC_INLINE char* cstr_assign(cstr* self, const char* str)
{ return cstr_assign_n(self, str, c_strlen(str)); }
-STC_INLINE char* cstr_assign_sv(cstr* self, csview sv)
+STC_INLINE char* cstr_assign_ss(cstr* self, csubstr sv)
{ return cstr_assign_n(self, sv.str, sv.size); }
STC_INLINE char* cstr_copy(cstr* self, cstr s) {
@@ -338,44 +346,42 @@ STC_INLINE void cstr_pop(cstr* self) {
STC_INLINE char* cstr_append(cstr* self, const char* str)
{ return cstr_append_n(self, str, c_strlen(str)); }
-STC_INLINE char* cstr_append_sv(cstr* self, csview sv)
+STC_INLINE char* cstr_append_ss(cstr* self, csubstr sv)
{ return cstr_append_n(self, sv.str, sv.size); }
STC_INLINE char* cstr_append_s(cstr* self, cstr s)
- { return cstr_append_sv(self, cstr_sv(&s)); }
+ { return cstr_append_ss(self, cstr_ss(&s)); }
#define cstr_replace(...) c_MACRO_OVERLOAD(cstr_replace, __VA_ARGS__)
#define cstr_replace_3(self, search, repl) cstr_replace_4(self, search, repl, INT32_MAX)
STC_INLINE void cstr_replace_4(cstr* self, const char* search, const char* repl, int32_t count) {
- cstr_take(self, cstr_replace_sv(cstr_sv(self), c_sv(search, c_strlen(search)),
- c_sv(repl, c_strlen(repl)), count));
+ cstr_take(self, cstr_replace_ss(cstr_ss(self), c_ss(search, c_strlen(search)),
+ c_ss(repl, c_strlen(repl)), count));
}
-STC_INLINE void cstr_replace_at_sv(cstr* self, intptr_t pos, intptr_t len, const csview repl) {
+STC_INLINE void cstr_replace_at_ss(cstr* self, intptr_t pos, intptr_t len, const csubstr repl) {
char* d = _cstr_internal_move(self, pos + len, pos + repl.size);
c_memcpy(d + pos, repl.str, repl.size);
}
STC_INLINE void cstr_replace_at(cstr* self, intptr_t pos, intptr_t len, const char* repl)
- { cstr_replace_at_sv(self, pos, len, c_sv(repl, c_strlen(repl))); }
+ { cstr_replace_at_ss(self, pos, len, c_ss(repl, c_strlen(repl))); }
STC_INLINE void cstr_replace_at_s(cstr* self, intptr_t pos, intptr_t len, cstr repl)
- { cstr_replace_at_sv(self, pos, len, cstr_sv(&repl)); }
+ { cstr_replace_at_ss(self, pos, len, cstr_ss(&repl)); }
-STC_INLINE void cstr_u8_replace_at(cstr* self, intptr_t bytepos, intptr_t u8len, csview repl)
- { cstr_replace_at_sv(self, bytepos, utf8_pos(cstr_str(self) + bytepos, u8len), repl); }
+STC_INLINE void cstr_u8_replace_at(cstr* self, intptr_t bytepos, intptr_t u8len, csubstr repl)
+ { cstr_replace_at_ss(self, bytepos, utf8_pos(cstr_str(self) + bytepos, u8len), repl); }
STC_INLINE void cstr_insert(cstr* self, intptr_t pos, const char* str)
- { cstr_replace_at_sv(self, pos, 0, c_sv(str, c_strlen(str))); }
+ { cstr_replace_at_ss(self, pos, 0, c_ss(str, c_strlen(str))); }
-STC_INLINE void cstr_insert_sv(cstr* self, intptr_t pos, csview sv)
- { cstr_replace_at_sv(self, pos, 0, sv); }
+STC_INLINE void cstr_insert_ss(cstr* self, intptr_t pos, csubstr sv)
+ { cstr_replace_at_ss(self, pos, 0, sv); }
-STC_INLINE void cstr_insert_s(cstr* self, intptr_t pos, cstr s) {
- csview sv = cstr_sv(&s);
- cstr_replace_at_sv(self, pos, 0, sv);
-}
+STC_INLINE void cstr_insert_s(cstr* self, intptr_t pos, cstr s)
+ { cstr_replace_at_ss(self, pos, 0, cstr_ss(&s)); }
STC_INLINE bool cstr_getline(cstr *self, FILE *fp)
{ return cstr_getdelim(self, '\n', fp); }
@@ -394,7 +400,7 @@ fn_tocase[] = {{tolower, utf8_casefold},
{tolower, utf8_tolower},
{toupper, utf8_toupper}};
-static cstr cstr_tocase(csview sv, int k) {
+static cstr cstr_tocase(csubstr sv, int k) {
cstr out = cstr_init();
char *buf = cstr_reserve(&out, sv.size*3/2);
const char *end = sv.str + sv.size;
@@ -415,26 +421,26 @@ static cstr cstr_tocase(csview sv, int k) {
return out;
}
-cstr cstr_casefold_sv(csview sv)
+cstr cstr_casefold_ss(csubstr sv)
{ return cstr_tocase(sv, 0); }
-cstr cstr_tolower_sv(csview sv)
+cstr cstr_tolower_ss(csubstr sv)
{ return cstr_tocase(sv, 1); }
-cstr cstr_toupper_sv(csview sv)
+cstr cstr_toupper_ss(csubstr sv)
{ return cstr_tocase(sv, 2); }
cstr cstr_tolower(const char* str)
- { return cstr_tolower_sv(c_sv(str, c_strlen(str))); }
+ { return cstr_tolower_ss(c_ss(str, c_strlen(str))); }
cstr cstr_toupper(const char* str)
- { return cstr_toupper_sv(c_sv(str, c_strlen(str))); }
+ { return cstr_toupper_ss(c_ss(str, c_strlen(str))); }
void cstr_lowercase(cstr* self)
- { cstr_take(self, cstr_tolower_sv(cstr_sv(self))); }
+ { cstr_take(self, cstr_tolower_ss(cstr_ss(self))); }
void cstr_uppercase(cstr* self)
- { cstr_take(self, cstr_toupper_sv(cstr_sv(self))); }
+ { cstr_take(self, cstr_toupper_ss(cstr_ss(self))); }
bool cstr_valid_utf8(const cstr* self)
{ return utf8_valid(cstr_str(self)); }
@@ -450,7 +456,7 @@ STC_DEF uint64_t cstr_hash(const cstr *self) {
return cfasthash(sv.str, sv.size);
}
-STC_DEF intptr_t cstr_find_sv(const cstr* self, csview search) {
+STC_DEF intptr_t cstr_find_ss(const cstr* self, csubstr search) {
csview sv = cstr_sv(self);
char* res = cstrnstrn(sv.str, search.str, sv.size, search.size);
return res ? (res - sv.str) : c_NPOS;
@@ -580,7 +586,7 @@ STC_DEF bool cstr_getdelim(cstr *self, const int delim, FILE *fp) {
}
}
-STC_DEF cstr cstr_replace_sv(csview in, csview search, csview repl, int32_t count) {
+STC_DEF cstr cstr_replace_ss(csubstr in, csubstr search, csubstr repl, int32_t count) {
cstr out = cstr_null;
intptr_t from = 0; char* res;
if (!count) count = INT32_MAX;
diff --git a/include/stc/csubstr.h b/include/stc/csubstr.h
new file mode 100644
index 00000000..152f7041
--- /dev/null
+++ b/include/stc/csubstr.h
@@ -0,0 +1,208 @@
+/* MIT License
+ *
+ * Copyright (c) 2023 Tyge Løvset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#define i_header // external linkage by default. override with i_static.
+#define _i_inc_utf8
+#include "utf8.h"
+
+#ifndef CSUBSTR_H_INCLUDED
+#define CSUBSTR_H_INCLUDED
+
+#define csubstr_init() c_ss_1("")
+#define csubstr_drop(p) c_default_drop(p)
+#define csubstr_clone(ss) c_default_clone(ss)
+#define csubstr_from_n(str, n) c_ss_2(str, n)
+
+STC_API csubstr_iter csubstr_advance(csubstr_iter it, intptr_t pos);
+STC_API intptr_t csubstr_find_ss(csubstr ss, csubstr search);
+STC_API uint64_t csubstr_hash(const csubstr *self);
+STC_API csubstr csubstr_slice_ex(csubstr ss, intptr_t p1, intptr_t p2);
+STC_API csubstr csubstr_substr_ex(csubstr ss, intptr_t pos, intptr_t n);
+STC_API csubstr csubstr_token(csubstr ss, const char* sep, intptr_t* start);
+
+STC_INLINE csubstr csubstr_from(const char* str)
+ { return c_LITERAL(csubstr){str, c_strlen(str)}; }
+STC_INLINE void csubstr_clear(csubstr* self) { *self = csubstr_init(); }
+STC_INLINE intptr_t csubstr_size(csubstr ss) { return ss.size; }
+STC_INLINE bool csubstr_empty(csubstr ss) { return ss.size == 0; }
+
+STC_INLINE bool csubstr_equals(csubstr ss, const char* str)
+ { intptr_t n = c_strlen(str); return ss.size == n && !c_memcmp(ss.str, str, n); }
+
+STC_INLINE intptr_t csubstr_find(csubstr ss, const char* str)
+ { return csubstr_find_ss(ss, c_ss_2(str, c_strlen(str))); }
+
+STC_INLINE bool csubstr_contains(csubstr ss, const char* str)
+ { return csubstr_find(ss, str) != c_NPOS; }
+
+STC_INLINE bool csubstr_starts_with(csubstr ss, const char* str) {
+ intptr_t n = c_strlen(str);
+ return n > ss.size ? false : !c_memcmp(ss.str, str, n);
+}
+
+STC_INLINE bool csubstr_ends_with(csubstr ss, const char* str) {
+ intptr_t n = c_strlen(str);
+ return n > ss.size ? false : !c_memcmp(ss.str + ss.size - n, str, n);
+}
+
+STC_INLINE csubstr csubstr_substr(csubstr ss, intptr_t pos, intptr_t n) {
+ if (pos + n > ss.size) n = ss.size - pos;
+ ss.str += pos, ss.size = n;
+ return ss;
+}
+
+STC_INLINE csubstr csubstr_slice(csubstr ss, intptr_t p1, intptr_t p2) {
+ if (p2 > ss.size) p2 = ss.size;
+ ss.str += p1, ss.size = p2 > p1 ? p2 - p1 : 0;
+ return ss;
+}
+
+/* utf8 iterator */
+STC_INLINE csubstr_iter csubstr_begin(const csubstr* self) {
+ if (!self->size) return c_LITERAL(csubstr_iter){NULL};
+ return c_LITERAL(csubstr_iter){.u8 = {{self->str, utf8_chr_size(self->str)},
+ self->str + self->size}};
+}
+STC_INLINE csubstr_iter csubstr_end(const csubstr* self) {
+ return c_LITERAL(csubstr_iter){.u8 = {{NULL}, self->str + self->size}};
+}
+STC_INLINE void csubstr_next(csubstr_iter* it) {
+ it->ref += it->u8.chr.size;
+ it->u8.chr.size = utf8_chr_size(it->ref);
+ if (it->ref == it->u8.end) it->ref = NULL;
+}
+
+/* utf8 */
+STC_INLINE intptr_t csubstr_u8_size(csubstr ss)
+ { return utf8_size_n(ss.str, ss.size); }
+
+STC_INLINE csubstr csubstr_u8_substr(csubstr ss, intptr_t bytepos, intptr_t u8len) {
+ ss.str += bytepos;
+ ss.size = utf8_pos(ss.str, u8len);
+ return ss;
+}
+
+STC_INLINE bool csubstr_valid_utf8(csubstr ss) // depends on src/utf8code.c
+ { return utf8_valid_n(ss.str, ss.size); }
+
+#define c_fortoken_ss(it, inputss, sep) \
+ for (struct { csubstr _inp, token, *ref; const char *_sep; intptr_t pos; } \
+ it = {._inp=inputss, .token=it._inp, .ref=&it.token, ._sep=sep} \
+ ; it.pos <= it._inp.size && (it.token = csubstr_token(it._inp, it._sep, &it.pos)).str ; )
+
+#define c_fortoken(it, input, sep) \
+ c_fortoken_ss(it, csubstr_from(input), sep)
+
+/* ---- Container helper functions ---- */
+
+STC_INLINE int csubstr_cmp(const csubstr* x, const csubstr* y) {
+ intptr_t n = x->size < y->size ? x->size : y->size;
+ int c = c_memcmp(x->str, y->str, n);
+ return c ? c : (int)(x->size - y->size);
+}
+
+STC_INLINE int csubstr_icmp(const csubstr* x, const csubstr* y)
+ { return utf8_icmp_ss(*x, *y); }
+
+STC_INLINE bool csubstr_eq(const csubstr* x, const csubstr* y)
+ { return x->size == y->size && !c_memcmp(x->str, y->str, x->size); }
+
+#endif // CSUBSTR_H_INCLUDED
+
+/* csubstr interaction with cstr: */
+#ifdef CSTR_H_INCLUDED
+
+STC_INLINE csubstr cstr_substr(const cstr* self, intptr_t pos, intptr_t n)
+ { return csubstr_substr(cstr_ss(self), pos, n); }
+
+STC_INLINE csubstr cstr_slice(const cstr* self, intptr_t p1, intptr_t p2)
+ { return csubstr_slice(cstr_ss(self), p1, p2); }
+
+STC_INLINE csubstr cstr_substr_ex(const cstr* self, intptr_t pos, intptr_t n)
+ { return csubstr_substr_ex(cstr_ss(self), pos, n); }
+
+STC_INLINE csubstr cstr_slice_ex(const cstr* self, intptr_t p1, intptr_t p2)
+ { return csubstr_slice_ex(cstr_ss(self), p1, p2); }
+
+STC_INLINE csubstr cstr_u8_substr(const cstr* self , intptr_t bytepos, intptr_t u8len)
+ { return csubstr_u8_substr(cstr_ss(self), bytepos, u8len); }
+#endif
+
+/* -------------------------- IMPLEMENTATION ------------------------- */
+#if defined i_implement || defined i_static
+#ifndef CSUBSTR_C_INCLUDED
+#define CSUBSTR_C_INCLUDED
+
+STC_DEF csubstr_iter csubstr_advance(csubstr_iter it, intptr_t pos) {
+ int inc = -1;
+ if (pos > 0) pos = -pos, inc = 1;
+ while (pos && it.ref != it.u8.end) pos += (*(it.ref += inc) & 0xC0) != 0x80;
+ it.u8.chr.size = utf8_chr_size(it.ref);
+ if (it.ref == it.u8.end) it.ref = NULL;
+ return it;
+}
+
+STC_DEF intptr_t csubstr_find_ss(csubstr ss, csubstr search) {
+ char* res = cstrnstrn(ss.str, search.str, ss.size, search.size);
+ return res ? (res - ss.str) : c_NPOS;
+}
+
+STC_DEF uint64_t csubstr_hash(const csubstr *self)
+ { return cfasthash(self->str, self->size); }
+
+STC_DEF csubstr csubstr_substr_ex(csubstr ss, intptr_t pos, intptr_t n) {
+ if (pos < 0) {
+ pos += ss.size;
+ if (pos < 0) pos = 0;
+ }
+ if (pos > ss.size) pos = ss.size;
+ if (pos + n > ss.size) n = ss.size - pos;
+ ss.str += pos, ss.size = n;
+ return ss;
+}
+
+STC_DEF csubstr csubstr_slice_ex(csubstr ss, intptr_t p1, intptr_t p2) {
+ if (p1 < 0) {
+ p1 += ss.size;
+ if (p1 < 0) p1 = 0;
+ }
+ if (p2 < 0) p2 += ss.size;
+ if (p2 > ss.size) p2 = ss.size;
+ ss.str += p1, ss.size = (p2 > p1 ? p2 - p1 : 0);
+ return ss;
+}
+
+STC_DEF csubstr csubstr_token(csubstr ss, const char* sep, intptr_t* start) {
+ intptr_t sep_size = c_strlen(sep);
+ csubstr slice = {ss.str + *start, ss.size - *start};
+ const char* res = cstrnstrn(slice.str, sep, slice.size, sep_size);
+ csubstr tok = {slice.str, res ? (res - slice.str) : slice.size};
+ *start += tok.size + sep_size;
+ return tok;
+}
+#endif // CSUBSTR_C_INCLUDED
+#endif // i_implement
+#undef i_static
+#undef i_header
+#undef i_implement
+#undef i_import
+#undef i_opt
diff --git a/include/stc/csview.h b/include/stc/csview.h
index bbf7cd8e..0d1ca36c 100644
--- a/include/stc/csview.h
+++ b/include/stc/csview.h
@@ -20,7 +20,6 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#define i_header // external linkage by default. override with i_static.
#define _i_inc_utf8
#include "utf8.h"
@@ -32,25 +31,26 @@
#define csview_clone(sv) c_default_clone(sv)
#define csview_from_n(str, n) c_sv_2(str, n)
-STC_API csview_iter csview_advance(csview_iter it, intptr_t pos);
-STC_API intptr_t csview_find_sv(csview sv, csview search);
-STC_API uint64_t csview_hash(const csview *self);
-STC_API csview csview_slice_ex(csview sv, intptr_t p1, intptr_t p2);
-STC_API csview csview_substr_ex(csview sv, intptr_t pos, intptr_t n);
-STC_API csview csview_token(csview sv, const char* sep, intptr_t* start);
-
STC_INLINE csview csview_from(const char* str)
- { return c_LITERAL(csview){str, c_strlen(str)}; }
+ { return csview_from_n(str, c_strlen(str)); }
STC_INLINE void csview_clear(csview* self) { *self = csview_init(); }
+STC_INLINE csubstr csview_ss(csview sv) { return c_ss_2(sv.str, sv.size); }
-STC_INLINE intptr_t csview_size(csview sv) { return sv.size; }
+STC_INLINE intptr_t csview_size(csview sv) { return sv.size; }
STC_INLINE bool csview_empty(csview sv) { return sv.size == 0; }
-STC_INLINE bool csview_equals(csview sv, const char* str)
- { intptr_t n = c_strlen(str); return sv.size == n && !c_memcmp(sv.str, str, n); }
+STC_INLINE bool csview_equals(csview sv, const char* str) {
+ intptr_t n = c_strlen(str);
+ return sv.size == n && !c_memcmp(sv.str, str, n);
+}
+
+STC_INLINE intptr_t csview_find_v(csview sv, csview search) {
+ char* res = cstrnstrn(sv.str, search.str, sv.size, search.size);
+ return res ? (res - sv.str) : c_NPOS;
+}
STC_INLINE intptr_t csview_find(csview sv, const char* str)
- { return csview_find_sv(sv, c_sv_2(str, c_strlen(str))); }
+ { return csview_find_v(sv, c_sv_2(str, c_strlen(str))); }
STC_INLINE bool csview_contains(csview sv, const char* str)
{ return csview_find(sv, str) != c_NPOS; }
@@ -65,55 +65,40 @@ STC_INLINE bool csview_ends_with(csview sv, const char* str) {
return n > sv.size ? false : !c_memcmp(sv.str + sv.size - n, str, n);
}
-STC_INLINE csview csview_substr(csview sv, intptr_t pos, intptr_t n) {
- if (pos + n > sv.size) n = sv.size - pos;
- sv.str += pos, sv.size = n;
- return sv;
-}
-
-STC_INLINE csview csview_slice(csview sv, intptr_t p1, intptr_t p2) {
- if (p2 > sv.size) p2 = sv.size;
- sv.str += p1, sv.size = p2 > p1 ? p2 - p1 : 0;
- return sv;
-}
-
/* utf8 iterator */
STC_INLINE csview_iter csview_begin(const csview* self) {
- if (!self->size) return c_LITERAL(csview_iter){NULL};
- return c_LITERAL(csview_iter){.u8 = {{self->str, utf8_chr_size(self->str)},
- self->str + self->size}};
+ if (!self->size) return c_LITERAL(csview_iter){.ref = NULL};
+ return c_LITERAL(csview_iter){.u8 = {{self->str, utf8_chr_size(self->str)}}};
}
STC_INLINE csview_iter csview_end(const csview* self) {
- return c_LITERAL(csview_iter){.u8 = {{NULL}, self->str + self->size}};
+ (void)self; return c_LITERAL(csview_iter){.ref = NULL};
}
STC_INLINE void csview_next(csview_iter* it) {
it->ref += it->u8.chr.size;
it->u8.chr.size = utf8_chr_size(it->ref);
- if (it->ref == it->u8.end) it->ref = NULL;
+ if (!*it->ref) it->ref = NULL;
+}
+STC_INLINE csview_iter csview_advance(csview_iter it, intptr_t pos) {
+ int inc = -1;
+ if (pos > 0) pos = -pos, inc = 1;
+ while (pos && *it.ref) pos += (*(it.ref += inc) & 0xC0) != 0x80;
+ it.u8.chr.size = utf8_chr_size(it.ref);
+ if (!*it.ref) it.ref = NULL;
+ return it;
}
-/* utf8 */
+/* utf8 size */
STC_INLINE intptr_t csview_u8_size(csview sv)
{ return utf8_size_n(sv.str, sv.size); }
-STC_INLINE csview csview_u8_substr(csview sv, intptr_t bytepos, intptr_t u8len) {
- sv.str += bytepos;
- sv.size = utf8_pos(sv.str, u8len);
- return sv;
-}
-
-STC_INLINE bool csview_valid_utf8(csview sv) // depends on src/utf8code.c
+/* utf8 validation: depends on src/utf8code.c */
+STC_INLINE bool csview_valid_utf8(csview sv)
{ return utf8_valid_n(sv.str, sv.size); }
-#define c_fortoken_sv(it, inputsv, sep) \
- for (struct { csview _inp, token, *ref; const char *_sep; intptr_t pos; } \
- it = {._inp=inputsv, .token=it._inp, .ref=&it.token, ._sep=sep} \
- ; it.pos <= it._inp.size && (it.token = csview_token(it._inp, it._sep, &it.pos)).str ; )
-
-#define c_fortoken(it, input, sep) \
- c_fortoken_sv(it, csview_from(input), sep)
+/* utf8 ignore case cmp: depends on src/utf8code.c */
+STC_INLINE int csview_icmp(const csview* x, const csview* y)
+ { return utf8_icmp_ss(c_ss_2(x->str, x->size), c_ss_2(y->str, y->size)); }
-/* ---- Container helper functions ---- */
STC_INLINE int csview_cmp(const csview* x, const csview* y) {
intptr_t n = x->size < y->size ? x->size : y->size;
@@ -121,87 +106,13 @@ STC_INLINE int csview_cmp(const csview* x, const csview* y) {
return c ? c : (int)(x->size - y->size);
}
-STC_INLINE int csview_icmp(const csview* x, const csview* y)
- { return utf8_icmp_sv(*x, *y); }
-
STC_INLINE bool csview_eq(const csview* x, const csview* y)
{ return x->size == y->size && !c_memcmp(x->str, y->str, x->size); }
-#endif // CSVIEW_H_INCLUDED
-
-/* csview interaction with cstr: */
-#ifdef CSTR_H_INCLUDED
-
-STC_INLINE csview cstr_substr(const cstr* self, intptr_t pos, intptr_t n)
- { return csview_substr(cstr_sv(self), pos, n); }
-
-STC_INLINE csview cstr_slice(const cstr* self, intptr_t p1, intptr_t p2)
- { return csview_slice(cstr_sv(self), p1, p2); }
-
-STC_INLINE csview cstr_substr_ex(const cstr* self, intptr_t pos, intptr_t n)
- { return csview_substr_ex(cstr_sv(self), pos, n); }
-
-STC_INLINE csview cstr_slice_ex(const cstr* self, intptr_t p1, intptr_t p2)
- { return csview_slice_ex(cstr_sv(self), p1, p2); }
-
-STC_INLINE csview cstr_u8_substr(const cstr* self , intptr_t bytepos, intptr_t u8len)
- { return csview_u8_substr(cstr_sv(self), bytepos, u8len); }
-#endif
-
-/* -------------------------- IMPLEMENTATION ------------------------- */
-#if defined i_implement || defined i_static
-#ifndef CSVIEW_C_INCLUDED
-#define CSVIEW_C_INCLUDED
-
-STC_DEF csview_iter csview_advance(csview_iter it, intptr_t pos) {
- int inc = -1;
- if (pos > 0) pos = -pos, inc = 1;
- while (pos && it.ref != it.u8.end) pos += (*(it.ref += inc) & 0xC0) != 0x80;
- it.u8.chr.size = utf8_chr_size(it.ref);
- if (it.ref == it.u8.end) it.ref = NULL;
- return it;
-}
-
-STC_DEF intptr_t csview_find_sv(csview sv, csview search) {
- char* res = cstrnstrn(sv.str, search.str, sv.size, search.size);
- return res ? (res - sv.str) : c_NPOS;
-}
-
-STC_DEF uint64_t csview_hash(const csview *self)
+STC_INLINE uint64_t csview_hash(const csview *self)
{ return cfasthash(self->str, self->size); }
-STC_DEF csview csview_substr_ex(csview sv, intptr_t pos, intptr_t n) {
- if (pos < 0) {
- pos += sv.size;
- if (pos < 0) pos = 0;
- }
- if (pos > sv.size) pos = sv.size;
- if (pos + n > sv.size) n = sv.size - pos;
- sv.str += pos, sv.size = n;
- return sv;
-}
-
-STC_DEF csview csview_slice_ex(csview sv, intptr_t p1, intptr_t p2) {
- if (p1 < 0) {
- p1 += sv.size;
- if (p1 < 0) p1 = 0;
- }
- if (p2 < 0) p2 += sv.size;
- if (p2 > sv.size) p2 = sv.size;
- sv.str += p1, sv.size = (p2 > p1 ? p2 - p1 : 0);
- return sv;
-}
-
-STC_DEF csview csview_token(csview sv, const char* sep, intptr_t* start) {
- intptr_t sep_size = c_strlen(sep);
- csview slice = {sv.str + *start, sv.size - *start};
- const char* res = cstrnstrn(slice.str, sep, slice.size, sep_size);
- csview tok = {slice.str, res ? (res - slice.str) : slice.size};
- *start += tok.size + sep_size;
- return tok;
-}
-#endif // CSVIEW_C_INCLUDED
-#endif // i_implement
+#endif // CSVIEW_H_INCLUDED
#undef i_static
#undef i_header
#undef i_implement
diff --git a/include/stc/forward.h b/include/stc/forward.h
index 572a319f..5c9c4f4d 100644
--- a/include/stc/forward.h
+++ b/include/stc/forward.h
@@ -39,8 +39,21 @@
#define forward_cqueue(CX, VAL) _c_cdeq_types(CX, VAL)
#define forward_cvec(CX, VAL) _c_cvec_types(CX, VAL)
-// csview
-typedef const char csview_value;
+// csubstr : non-null terminated string view
+typedef const char csubstr_value;
+typedef struct csubstr {
+ csubstr_value* str;
+ intptr_t size;
+} csubstr;
+
+typedef union {
+ csubstr_value* ref;
+ struct { csubstr chr; csubstr_value* end; } u8;
+} csubstr_iter;
+
+
+// csview : null-terminated string view
+typedef csubstr_value csview_value;
typedef struct csview {
csview_value* str;
intptr_t size;
@@ -48,10 +61,11 @@ typedef struct csview {
typedef union {
csview_value* ref;
- struct { csview chr; csview_value* end; } u8;
+ struct { csubstr chr; } u8;
} csview_iter;
-// cstr
+
+// cstr : null-terminated string (short string optimized - sso)
typedef char cstr_value;
typedef struct { cstr_value* data; intptr_t size, cap; } cstr_buf;
typedef union cstr {
@@ -61,9 +75,10 @@ typedef union cstr {
typedef union {
cstr_value* ref;
- struct { csview chr; } u8;
+ struct { csubstr chr; } u8;
} cstr_iter;
+
#define c_true(...) __VA_ARGS__
#define c_false(...)
diff --git a/include/stc/priv/template.h b/include/stc/priv/template.h
index 65dee203..47225ec8 100644
--- a/include/stc/priv/template.h
+++ b/include/stc/priv/template.h
@@ -114,10 +114,10 @@
#endif
#elif defined i_key_ssv
#define i_keyclass cstr
- #define i_rawclass csview
- #define i_keyfrom cstr_from_sv
- #define i_keyto cstr_sv
- #define i_eq csview_eq
+ #define i_rawclass csubstr
+ #define i_keyfrom cstr_from_ss
+ #define i_keyto cstr_ss
+ #define i_eq csubstr_eq
#ifndef i_tag
#define i_tag ssv
#endif
@@ -232,9 +232,9 @@
#define i_valraw const char*
#elif defined i_val_ssv
#define i_valclass cstr
- #define i_valraw csview
- #define i_valfrom cstr_from_sv
- #define i_valto cstr_sv
+ #define i_valraw csubstr
+ #define i_valfrom cstr_from_ss
+ #define i_valto cstr_ss
#elif defined i_valboxed
#define i_valclass i_valboxed
#define i_valraw c_PASTE(i_valboxed, _raw)
diff --git a/include/stc/utf8.h b/include/stc/utf8.h
index 6d12856f..7d2adee0 100644
--- a/include/stc/utf8.h
+++ b/include/stc/utf8.h
@@ -48,7 +48,7 @@ extern uint32_t utf8_toupper(uint32_t c);
extern bool utf8_iscased(uint32_t c);
extern bool utf8_isword(uint32_t c);
extern bool utf8_valid_n(const char* s, intptr_t nbytes);
-extern int utf8_icmp_sv(csview s1, csview s2);
+extern int utf8_icmp_ss(csubstr s1, csubstr s2);
extern int utf8_encode(char *out, uint32_t c);
extern uint32_t utf8_peek_off(const char *s, int offset);
@@ -92,7 +92,7 @@ STC_INLINE uint32_t utf8_peek(const char* s) {
/* case-insensitive utf8 string comparison */
STC_INLINE int utf8_icmp(const char* s1, const char* s2) {
- return utf8_icmp_sv(c_sv(s1, INTPTR_MAX), c_sv(s2, INTPTR_MAX));
+ return utf8_icmp_ss(c_ss(s1, INTPTR_MAX), c_ss(s2, INTPTR_MAX));
}
STC_INLINE bool utf8_valid(const char* s) {
diff --git a/misc/benchmarks/various/string_bench_STC.cpp b/misc/benchmarks/various/string_bench_STC.cpp
index a5dfd901..9173d4b6 100644
--- a/misc/benchmarks/various/string_bench_STC.cpp
+++ b/misc/benchmarks/various/string_bench_STC.cpp
@@ -7,16 +7,16 @@
#define i_implement
#include <stc/cstr.h> // string
#define i_implement
-#include <stc/csview.h> // string_view
+#include <stc/csubstr.h> // string_view
#include <stc/algo/raii.h>
#define i_key_str
#include <stc/cvec.h> // vec of cstr with const char* lookup
-#define i_type cvec_sv // override default type name (cvec_csview)
-#define i_key csview
-#define i_cmp csview_cmp
-#include <stc/cvec.h> // cvec_vs: vec of csview
+#define i_type cvec_ss // override default type name (cvec_csubstr)
+#define i_key csubstr
+#define i_cmp csubstr_cmp
+#include <stc/cvec.h> // cvec_vs: vec of csubstr
#define i_key_str
#define i_val size_t
@@ -24,7 +24,7 @@
#define i_key_ssv
#define i_val size_t
-#include <stc/csmap.h> // sorted map of cstr, csview lookup
+#include <stc/csmap.h> // sorted map of cstr, csubstr lookup
#define i_key_str
#define i_val size_t
@@ -32,7 +32,7 @@
#define i_key_ssv
#define i_val size_t
-#include <stc/cmap.h> // unordered map of cstr, csview lookup
+#include <stc/cmap.h> // unordered map of cstr, csubstr lookup
cvec_str read_file(const char* name)
@@ -67,7 +67,7 @@ private:
std::chrono::high_resolution_clock::time_point begin;
};
-void initShortStringVec(cvec_str* vs, cvec_sv* vsv)
+void initShortStringVec(cvec_str* vs, cvec_ss* vsv)
{
cvec_str_drop(vs);
cvec_sv_clear(vsv);
@@ -101,14 +101,14 @@ void initShortStringVec(cvec_str* vs, cvec_sv* vsv)
size_t num = 0;
c_foreach (i, cvec_str, *vs)
{
- cvec_sv_push_back(vsv, cstr_sv(i.ref));
+ cvec_sv_push_back(vsv, cstr_ss(i.ref));
num += cstr_size(i.ref);
}
std::cout << "num strings: " << cvec_sv_size(vsv) << std::endl;
std::cout << "avg str len: " << num / (float)cvec_sv_size(vsv) << std::endl;
}
-void initLongStringVec(cvec_str* vs, cvec_sv* vsv)
+void initLongStringVec(cvec_str* vs, cvec_ss* vsv)
{
cvec_str_drop(vs);
cvec_sv_clear(vsv);
@@ -147,7 +147,7 @@ void initLongStringVec(cvec_str* vs, cvec_sv* vsv)
size_t num = 0;
c_foreach (i, cvec_str, *vs)
{
- cvec_sv_push_back(vsv, cstr_sv(i.ref));
+ cvec_sv_push_back(vsv, cstr_ss(i.ref));
num += cstr_size(i.ref);
}
std::cout << "num strings: " << cvec_sv_size(vsv) << std::endl;
@@ -175,7 +175,7 @@ void initMaps(const cvec_str* vs, csmap_str* mapTrans, csmap_ssv* mapSview,
void benchmark(
const cvec_str* vec_string,
- const cvec_sv* vec_stringview,
+ const cvec_ss* vec_stringview,
const csmap_str* mapTrans,
const csmap_ssv* mapSview,
const cmap_str* unordmapTrans,
@@ -187,7 +187,7 @@ const size_t MAX_LOOP = 2000;
int main(void)
{
c_auto (cvec_str, vec_string)
- c_auto (cvec_sv, vec_stringview)
+ c_auto (cvec_ss, vec_stringview)
c_auto (csmap_str, mapTrans)
c_auto (csmap_ssv, mapSview)
c_auto (cmap_str, unordmapTrans)
@@ -229,7 +229,7 @@ int main(void)
void benchmark(
const cvec_str* vec_string,
- const cvec_sv* vec_stringview,
+ const cvec_ss* vec_stringview,
const csmap_str* mapTrans,
const csmap_ssv* mapSview,
const cmap_str* unordmapTrans,
@@ -258,7 +258,7 @@ void benchmark(
stopwatch.start("Trans Map with string_view");
for (size_t i = 0; i < MAX_LOOP; ++i)
{
- c_foreach (j, cvec_sv, *vec_stringview)
+ c_foreach (j, cvec_ss, *vec_stringview)
{
const csmap_ssv_value* v = csmap_ssv_get(mapSview, *j.ref);
if (v)
@@ -286,7 +286,7 @@ void benchmark(
stopwatch.start("Trans Unord Map with string_view");
for (size_t i = 0; i < MAX_LOOP; ++i)
{
- c_foreach (j, cvec_sv, *vec_stringview)
+ c_foreach (j, cvec_ss, *vec_stringview)
{
const cmap_ssv_value* v = cmap_ssv_get(unordmapSview, *j.ref);
if (v)
diff --git a/misc/examples/algorithms/forfilter.c b/misc/examples/algorithms/forfilter.c
index c1426045..d058660d 100644
--- a/misc/examples/algorithms/forfilter.c
+++ b/misc/examples/algorithms/forfilter.c
@@ -2,7 +2,7 @@
#define i_import
#include <stc/cstr.h>
#define i_implement
-#include <stc/csview.h>
+#include <stc/csubstr.h>
#include <stc/algorithm.h>
#define i_type IVec
@@ -82,7 +82,7 @@ fn main() {
}
*/
#define i_type SVec
-#define i_keyclass csview
+#define i_keyclass csubstr
#include <stc/cstack.h>
void demo3(void)
@@ -94,11 +94,11 @@ void demo3(void)
SVec words_containing_i = {0};
c_forfilter (w, SVec, words,
- csview_contains(*w.ref, "i"))
+ csubstr_contains(*w.ref, "i"))
SVec_push(&words_containing_i, *w.ref);
c_foreach (w, SVec, words_containing_i)
- printf(" %.*s", c_SV(*w.ref));
+ printf(" %.*s", c_SS(*w.ref));
puts("");
c_drop(SVec, &words, &words_containing_i);
@@ -107,10 +107,10 @@ void demo3(void)
void demo4(void)
{
// Keep only uppercase letters and convert them to lowercase:
- csview s = c_sv("ab123cReAghNGnΩoEp"); // Ω = multi-byte
+ csubstr s = c_ss("ab123cReAghNGnΩoEp"); // Ω = multi-byte
cstr out = {0};
- c_forfilter (i, csview, s, utf8_isupper(utf8_peek(i.ref))) {
+ c_forfilter (i, csubstr, s, utf8_isupper(utf8_peek(i.ref))) {
char chr[4];
utf8_encode(chr, utf8_tolower(utf8_peek(i.ref)));
cstr_push(&out, chr);
diff --git a/misc/examples/regularexpressions/regex2.c b/misc/examples/regularexpressions/regex2.c
index a798b1a1..85890070 100644
--- a/misc/examples/regularexpressions/regex2.c
+++ b/misc/examples/regularexpressions/regex2.c
@@ -27,7 +27,7 @@ int main(void)
c_formatch (j, &re, s[i].input) {
c_forrange (k, cregex_captures(&re) + 1)
- printf(" submatch %lld: %.*s\n", k, c_SV(j.match[k]));
+ printf(" submatch %lld: %.*s\n", k, c_SS(j.match[k]));
}
}
cregex_drop(&re);
diff --git a/misc/examples/regularexpressions/regex_match.c b/misc/examples/regularexpressions/regex_match.c
index 11426d2d..6eaea781 100644
--- a/misc/examples/regularexpressions/regex_match.c
+++ b/misc/examples/regularexpressions/regex_match.c
@@ -1,7 +1,7 @@
#define i_import
#include <stc/cregex.h>
#define i_implement
-#include <stc/csview.h>
+#include <stc/csubstr.h>
#define i_key float
#include <stc/cstack.h>
@@ -28,7 +28,7 @@ int main(void)
printf(" %g\n", (double)*i.ref);
// extracts the numbers only to a comma separated string.
- cstr nums = cregex_replace_sv(&re, csview_from(str), " $0,", 0, NULL, CREG_STRIP);
+ cstr nums = cregex_replace_ss(&re, csubstr_from(str), " $0,", 0, NULL, CREG_STRIP);
printf("\n%s\n", cstr_str(&nums));
cstr_drop(&nums);
diff --git a/misc/examples/regularexpressions/regex_replace.c b/misc/examples/regularexpressions/regex_replace.c
index f1ea2711..f5fd8691 100644
--- a/misc/examples/regularexpressions/regex_replace.c
+++ b/misc/examples/regularexpressions/regex_replace.c
@@ -1,8 +1,8 @@
#define i_import
#include <stc/cregex.h>
-#include <stc/csview.h>
+#include <stc/csubstr.h>
-bool add_10_years(int i, csview match, cstr* out) {
+bool add_10_years(int i, csubstr match, cstr* out) {
if (i == 1) { // group 1 matches year
int year;
sscanf(match.str, "%4d", &year); // scan 4 chars only
@@ -47,7 +47,7 @@ int main(void)
printf("euros: %s\n", cstr_str(&str));
/* Strip out everything but the matches */
- cstr_take(&str, cregex_replace_sv(&re, csview_from(input), "$3.$2.$1;", 0, NULL, CREG_STRIP));
+ cstr_take(&str, cregex_replace_ss(&re, csubstr_from(input), "$3.$2.$1;", 0, NULL, CREG_STRIP));
printf("strip: %s\n", cstr_str(&str));
/* Wrap all words in ${} */
diff --git a/misc/examples/strings/cstr_match.c b/misc/examples/strings/cstr_match.c
index be03e981..3c41bd43 100644
--- a/misc/examples/strings/cstr_match.c
+++ b/misc/examples/strings/cstr_match.c
@@ -1,11 +1,11 @@
#define i_implement
#include <stc/cstr.h>
-#include <stc/csview.h>
+#include <stc/csubstr.h>
#include <stdio.h>
int main(void)
{
- cstr ss = cstr_lit("The quick brown fox jumps over the lazy dog.JPG");
+ cstr ss = cstr_from("The quick brown fox jumps over the lazy dog.JPG");
intptr_t pos = cstr_find_at(&ss, 0, "brown");
printf("%" c_ZI " [%s]\n", pos, pos == c_NPOS ? "<NULL>" : cstr_str(&ss) + pos);
@@ -16,11 +16,11 @@ int main(void)
printf("ends_with: %d\n", cstr_ends_with(&ss, ".JPG"));
cstr s1 = cstr_lit("hell😀 w😀rl🐨");
- csview ch1 = cstr_u8_chr(&s1, 7);
- csview ch2 = cstr_u8_chr(&s1, 10);
+ csubstr ch1 = cstr_u8_chr(&s1, 7);
+ csubstr ch2 = cstr_u8_chr(&s1, 10);
printf("%s\nsize: %" c_ZI ", %" c_ZI "\n", cstr_str(&s1), cstr_u8_size(&s1), cstr_size(&s1));
- printf("ch1: %.*s\n", c_SV(ch1));
- printf("ch2: %.*s\n", c_SV(ch2));
+ printf("ch1: %.*s\n", c_SS(ch1));
+ printf("ch2: %.*s\n", c_SS(ch2));
c_drop(cstr, &ss, &s1);
}
diff --git a/misc/examples/strings/replace.c b/misc/examples/strings/replace.c
index 59a56bf7..2411f1a7 100644
--- a/misc/examples/strings/replace.c
+++ b/misc/examples/strings/replace.c
@@ -20,13 +20,13 @@ int main(void)
cstr_replace_at(&s, 9, 5, s2); // "this is an example string." (1)
printf("(1) %s\n", cstr_str(&s));
- cstr_replace_at_sv(&s, 19, 6, c_sv(s3+7, 6)); // "this is an example phrase." (2)
+ cstr_replace_at_ss(&s, 19, 6, c_ss(s3+7, 6)); // "this is an example phrase." (2)
printf("(2) %s\n", cstr_str(&s));
cstr_replace_at(&s, 8, 10, "just a"); // "this is just a phrase." (3)
printf("(3) %s\n", cstr_str(&s));
- cstr_replace_at_sv(&s, 8, 6, c_sv("a shorty", 7)); // "this is a short phrase." (4)
+ cstr_replace_at_ss(&s, 8, 6, c_ss("a shorty", 7)); // "this is a short phrase." (4)
printf("(4) %s\n", cstr_str(&s));
cstr_replace_at(&s, 22, 1, "!!!"); // "this is a short phrase!!!" (5)
diff --git a/misc/examples/strings/splitstr.c b/misc/examples/strings/splitstr.c
index ef7ed174..6fa76d34 100644
--- a/misc/examples/strings/splitstr.c
+++ b/misc/examples/strings/splitstr.c
@@ -2,20 +2,20 @@
#define i_import // cstr + utf8 functions
#include <stc/cregex.h>
#define i_implement
-#include <stc/csview.h>
+#include <stc/csubstr.h>
int main(void)
{
- puts("Split with c_fortoken (csview):");
+ puts("Split with c_fortoken (csubstr):");
c_fortoken (i, "Hello World C99!", " ")
- printf("'%.*s'\n", c_SV(i.token));
+ printf("'%.*s'\n", c_SS(i.token));
puts("\nSplit with c_formatch (regex):");
cregex re = cregex_from("[^ ]+");
c_formatch (i, &re, " Hello World C99! ")
- printf("'%.*s'\n", c_SV(i.match[0]));
+ printf("'%.*s'\n", c_SS(i.match[0]));
cregex_drop(&re);
}
diff --git a/misc/examples/strings/sso_substr.c b/misc/examples/strings/sso_substr.c
index 687658df..3c6b1046 100644
--- a/misc/examples/strings/sso_substr.c
+++ b/misc/examples/strings/sso_substr.c
@@ -1,20 +1,20 @@
#define i_implement
#include <stc/cstr.h>
#define i_implement
-#include <stc/csview.h>
+#include <stc/csubstr.h>
int main(void)
{
cstr str = cstr_lit("We think in generalities, but we live in details.");
- csview sv1 = cstr_substr_ex(&str, 3, 5); // "think"
- intptr_t pos = cstr_find(&str, "live"); // position of "live"
- csview sv2 = cstr_substr_ex(&str, pos, 4); // "live"
- csview sv3 = cstr_slice_ex(&str, -8, -1); // "details"
- printf("%.*s, %.*s, %.*s\n", c_SV(sv1), c_SV(sv2), c_SV(sv3));
+ csubstr sv1 = cstr_substr_ex(&str, 3, 5); // "think"
+ intptr_t pos = cstr_find(&str, "live"); // position of "live"
+ csubstr sv2 = cstr_substr_ex(&str, pos, 4); // "live"
+ csubstr sv3 = cstr_slice_ex(&str, -8, -1); // "details"
+ printf("%.*s, %.*s, %.*s\n", c_SS(sv1), c_SS(sv2), c_SS(sv3));
cstr_assign(&str, "apples are green or red");
- cstr s2 = cstr_from_sv(cstr_substr_ex(&str, -3, 3)); // "red"
- cstr s3 = cstr_from_sv(cstr_substr_ex(&str, 0, 6)); // "apples"
+ cstr s2 = cstr_from_ss(cstr_substr_ex(&str, -3, 3)); // "red"
+ cstr s3 = cstr_from_ss(cstr_substr_ex(&str, 0, 6)); // "apples"
printf("%s %s: %d, %d\n", cstr_str(&s2), cstr_str(&s3),
cstr_is_long(&str), cstr_is_long(&s2));
c_drop (cstr, &str, &s2, &s3);
diff --git a/misc/examples/strings/sview_split.c b/misc/examples/strings/sview_split.c
index ac275da0..6abbf5e7 100644
--- a/misc/examples/strings/sview_split.c
+++ b/misc/examples/strings/sview_split.c
@@ -1,20 +1,20 @@
#define i_implement
#include <stc/cstr.h>
#define i_implement
-#include <stc/csview.h>
+#include <stc/csubstr.h>
int main(void)
{
// No memory allocations or string length calculations!
- const csview date = c_sv("2021/03/12");
+ const csubstr date = c_ss("2021/03/12");
intptr_t pos = 0;
- const csview year = csview_token(date, "/", &pos);
- const csview month = csview_token(date, "/", &pos);
- const csview day = csview_token(date, "/", &pos);
+ const csubstr year = csubstr_token(date, "/", &pos);
+ const csubstr month = csubstr_token(date, "/", &pos);
+ const csubstr day = csubstr_token(date, "/", &pos);
- printf("%.*s, %.*s, %.*s\n", c_SV(year), c_SV(month), c_SV(day));
+ printf("%.*s, %.*s, %.*s\n", c_SS(year), c_SS(month), c_SS(day));
- cstr y = cstr_from_sv(year), m = cstr_from_sv(month), d = cstr_from_sv(day);
+ cstr y = cstr_from_ss(year), m = cstr_from_ss(month), d = cstr_from_ss(day);
printf("%s, %s, %s\n", cstr_str(&y), cstr_str(&m), cstr_str(&d));
c_drop(cstr, &y, &m, &d);
}
diff --git a/misc/examples/strings/utf8replace_c.c b/misc/examples/strings/utf8replace_c.c
index 1d54486f..03a0442f 100644
--- a/misc/examples/strings/utf8replace_c.c
+++ b/misc/examples/strings/utf8replace_c.c
@@ -10,12 +10,12 @@ int main(void)
cstr_u8_replace_at(&hello,
cstr_u8_to_pos(&hello, 7),
1,
- c_sv("🐨")
+ c_ss("🐨")
);
printf("%s\n", cstr_str(&hello));
c_foreach (c, cstr, hello)
- printf("%.*s,", c_SV(c.u8.chr));
+ printf("%.*s,", c_SS(c.u8.chr));
cstr str = cstr_lit("scooby, dooby doo");
cstr_replace(&str, "oo", "00");
diff --git a/misc/tests/cregex_test.c b/misc/tests/cregex_test.c
index 4e192de6..7cd03930 100644
--- a/misc/tests/cregex_test.c
+++ b/misc/tests/cregex_test.c
@@ -1,6 +1,6 @@
#define i_import
#include <stc/cregex.h>
-#include <stc/csview.h>
+#include <stc/csubstr.h>
#include <stc/algo/raii.h>
#include "ctest.h"
@@ -14,7 +14,7 @@ CTEST(cregex, compile_match_char)
cregex re = cregex_from("äsdf");
ASSERT_EQ(re.error, 0);
- csview match;
+ csubstr match;
ASSERT_EQ(cregex_find(&re, inp="äsdf", &match, CREG_FULLMATCH), CREG_OK);
ASSERT_EQ(M_START(match), 0);
ASSERT_EQ(M_END(match), 5); // ä is two bytes wide
@@ -32,7 +32,7 @@ CTEST(cregex, compile_match_anchors)
cregex re = cregex_from(inp="^äs.f$");
ASSERT_EQ(re.error, 0);
- csview match;
+ csubstr match;
ASSERT_EQ(cregex_find(&re, inp="äsdf", &match), CREG_OK);
ASSERT_EQ(M_START(match), 0);
ASSERT_EQ(M_END(match), 5);
@@ -50,7 +50,7 @@ CTEST(cregex, compile_match_quantifiers1)
re = cregex_from("ä+");
ASSERT_EQ(re.error, 0);
- csview match;
+ csubstr match;
ASSERT_EQ(cregex_find(&re, inp="ääb", &match), CREG_OK);
ASSERT_EQ(M_START(match), 0);
ASSERT_EQ(M_END(match), 4);
@@ -70,7 +70,7 @@ CTEST(cregex, compile_match_quantifiers2)
re = cregex_from("bä*");
ASSERT_EQ(re.error, 0);
- csview match;
+ csubstr match;
ASSERT_EQ(cregex_find(&re, inp="bääb", &match), CREG_OK);
ASSERT_EQ(M_START(match), 0);
ASSERT_EQ(M_END(match), 5);
@@ -90,7 +90,7 @@ CTEST(cregex, compile_match_escaped_chars)
cregex re = cregex_from("\\n\\r\\t\\{");
ASSERT_EQ(re.error, 0);
- csview match;
+ csubstr match;
ASSERT_EQ(cregex_find(&re, "\n\r\t{", &match), CREG_OK);
ASSERT_EQ(cregex_find(&re, "\n\r\t", &match), CREG_NOMATCH);
@@ -108,7 +108,7 @@ CTEST(cregex, compile_match_class_simple)
re3 = cregex_from("\\D");
ASSERT_EQ(re3.error, 0);
- csview match;
+ csubstr match;
ASSERT_EQ(cregex_find(&re1, " " , &match), CREG_OK);
ASSERT_EQ(cregex_find(&re1, "\r", &match), CREG_OK);
ASSERT_EQ(cregex_find(&re1, "\n", &match), CREG_OK);
@@ -129,7 +129,7 @@ CTEST(cregex, compile_match_or)
re = cregex_from("as|df");
ASSERT_EQ(re.error, 0);
- csview match[4];
+ csubstr match[4];
ASSERT_EQ(cregex_find(&re, "as", match), CREG_OK);
ASSERT_EQ(cregex_find(&re, "df", match), CREG_OK);
@@ -146,7 +146,7 @@ CTEST(cregex, compile_match_class_complex_0)
cregex re = cregex_from("[asdf]");
ASSERT_EQ(re.error, 0);
- csview match;
+ csubstr match;
ASSERT_EQ(cregex_find(&re, "a", &match), CREG_OK);
ASSERT_EQ(cregex_find(&re, "s", &match), CREG_OK);
ASSERT_EQ(cregex_find(&re, "d", &match), CREG_OK);
@@ -160,7 +160,7 @@ CTEST(cregex, compile_match_class_complex_1)
cregex re = cregex_from("[a-zä0-9öA-Z]");
ASSERT_EQ(re.error, 0);
- csview match;
+ csubstr match;
ASSERT_EQ(cregex_find(&re, "a", &match), CREG_OK);
ASSERT_EQ(cregex_find(&re, "5", &match), CREG_OK);
ASSERT_EQ(cregex_find(&re, "A", &match), CREG_OK);
@@ -175,7 +175,7 @@ CTEST(cregex, compile_match_cap)
cregex re = cregex_from("(abc)d");
ASSERT_EQ(re.error, 0);
- csview match[4];
+ csubstr match[4];
ASSERT_EQ(cregex_find(&re, "abcd", match), CREG_OK);
ASSERT_EQ(cregex_find(&re, "llljabcdkk", match), CREG_OK);
ASSERT_EQ(cregex_find(&re, "abc", match), CREG_NOMATCH);
@@ -189,7 +189,7 @@ CTEST(cregex, search_all)
c_auto (cregex, re)
{
re = cregex_from("ab");
- csview m = {0};
+ csubstr m = {0};
int res;
ASSERT_EQ(re.error, CREG_OK);
inp="ab,ab,ab";
@@ -220,9 +220,9 @@ CTEST(cregex, captures_cap)
re = cregex_from("(ab)((cd)+)");
ASSERT_EQ(cregex_captures(&re), 3);
- csview cap[5];
+ csubstr cap[5];
ASSERT_EQ(cregex_find(&re, inp="xxabcdcde", cap), CREG_OK);
- ASSERT_TRUE(csview_equals(cap[0], "abcdcd"));
+ ASSERT_TRUE(csubstr_equals(cap[0], "abcdcd"));
ASSERT_EQ(M_END(cap[0]), 8);
ASSERT_EQ(M_START(cap[1]), 2);
@@ -235,7 +235,7 @@ CTEST(cregex, captures_cap)
}
}
-static bool add_10_years(int i, csview match, cstr* out) {
+static bool add_10_years(int i, csubstr match, cstr* out) {
if (i == 1) { // group 1 matches year
int year;
sscanf(match.str, "%4d", &year); // scan 4 chars only
@@ -280,7 +280,7 @@ CTEST(cregex, replace)
ASSERT_STREQ(cstr_str(&str), "start date: 31.12.2015, end date: 28.02.2022");
// Strip out everything but the matches
- cstr_take(&str, cregex_replace_sv(&re, csview_from(input), "$3.$2.$1;", 0, NULL, CREG_STRIP));
+ cstr_take(&str, cregex_replace_ss(&re, csubstr_from(input), "$3.$2.$1;", 0, NULL, CREG_STRIP));
ASSERT_STREQ(cstr_str(&str), "31.12.2015;28.02.2022;");
}
}
diff --git a/src/cregex.c b/src/cregex.c
index 975a5104..c045b9f3 100644
--- a/src/cregex.c
+++ b/src/cregex.c
@@ -100,7 +100,7 @@ typedef struct _Reprog
/*
* Sub expression matches
*/
-typedef csview _Resub;
+typedef csubstr _Resub;
/*
* substitution list
@@ -1215,8 +1215,8 @@ _regexec(const _Reprog *progp, /* program to run */
static void
-_build_subst(const char* replace, int nmatch, const csview match[],
- bool (*mfun)(int, csview, cstr*), cstr* subst) {
+_build_subst(const char* replace, int nmatch, const csubstr match[],
+ bool (*mfun)(int, csubstr, cstr*), cstr* subst) {
cstr_buf buf = cstr_buffer(subst);
intptr_t len = 0, cap = buf.cap;
char* dst = buf.data;
@@ -1233,7 +1233,7 @@ _build_subst(const char* replace, int nmatch, const csview match[],
if (replace[1] >= '0' && replace[1] <= '9' && replace[2] == ';')
{ g = g*10 + (replace[1] - '0'); replace += 2; }
if (g < nmatch) {
- csview m = mfun && mfun(g, match[g], &mstr) ? cstr_sv(&mstr) : match[g];
+ csubstr m = mfun && mfun(g, match[g], &mstr) ? cstr_ss(&mstr) : match[g];
if (len + m.size > cap)
dst = cstr_reserve(subst, cap += cap/2 + m.size);
for (int i = 0; i < m.size; ++i)
@@ -1270,7 +1270,7 @@ cregex_captures(const cregex* self) {
}
int
-cregex_find_4(const cregex* re, const char* input, csview match[], int mflags) {
+cregex_find_4(const cregex* re, const char* input, csubstr match[], int mflags) {
int res = _regexec(re->prog, input, cregex_captures(re) + 1, match, mflags);
switch (res) {
case 1: return CREG_OK;
@@ -1281,7 +1281,7 @@ cregex_find_4(const cregex* re, const char* input, csview match[], int mflags) {
int
cregex_find_pattern_4(const char* pattern, const char* input,
- csview match[], int cmflags) {
+ csubstr match[], int cmflags) {
cregex re = cregex_init();
int res = cregex_compile(&re, pattern, cmflags);
if (res != CREG_OK) return res;
@@ -1291,16 +1291,16 @@ cregex_find_pattern_4(const char* pattern, const char* input,
}
cstr
-cregex_replace_sv_6(const cregex* re, csview input, const char* replace, int count,
- bool (*mfun)(int, csview, cstr*), int rflags) {
+cregex_replace_ss_6(const cregex* re, csubstr input, const char* replace, int count,
+ bool (*mfun)(int, csubstr, cstr*), int rflags) {
cstr out = cstr_init();
cstr subst = cstr_init();
- csview match[CREG_MAX_CAPTURES];
+ csubstr match[CREG_MAX_CAPTURES];
int nmatch = cregex_captures(re) + 1;
if (!count) count = INT32_MAX;
bool copy = !(rflags & CREG_STRIP);
- while (count-- && cregex_find_sv(re, input, match) == CREG_OK) {
+ while (count-- && cregex_find_ss(re, input, match) == CREG_OK) {
_build_subst(replace, nmatch, match, mfun, &subst);
const intptr_t mpos = (match[0].str - input.str);
if (copy & (mpos > 0)) cstr_append_n(&out, input.str, mpos);
@@ -1308,19 +1308,19 @@ cregex_replace_sv_6(const cregex* re, csview input, const char* replace, int cou
input.str = match[0].str + match[0].size;
input.size -= mpos + match[0].size;
}
- if (copy) cstr_append_sv(&out, input);
+ if (copy) cstr_append_ss(&out, input);
cstr_drop(&subst);
return out;
}
cstr
cregex_replace_pattern_6(const char* pattern, const char* input, const char* replace, int count,
- bool (*mfun)(int, csview, cstr*), int crflags) {
+ bool (*mfun)(int, csubstr, cstr*), int crflags) {
cregex re = cregex_init();
if (cregex_compile(&re, pattern, crflags) != CREG_OK)
assert(0);
- csview sv = {input, c_strlen(input)};
- cstr out = cregex_replace_sv(&re, sv, replace, count, mfun, crflags);
+ csubstr ss = c_ss(input, c_strlen(input));
+ cstr out = cregex_replace_ss(&re, ss, replace, count, mfun, crflags);
cregex_drop(&re);
return out;
}
diff --git a/src/libstc.c b/src/libstc.c
index 462c97c4..b0d27350 100644
--- a/src/libstc.c
+++ b/src/libstc.c
@@ -1,7 +1,7 @@
#define i_import
#include "../include/stc/cregex.h" /* cstr. utf8, and cregex */
#define i_implement
-#include "../include/stc/csview.h"
+#include "../include/stc/csubstr.h"
#define i_implement
#include "../include/stc/crand.h"
#if __STDC_VERSION__ >= 201112L
diff --git a/src/singleupdate.sh b/src/singleupdate.sh
index e706dd97..be99d4a7 100644
--- a/src/singleupdate.sh
+++ b/src/singleupdate.sh
@@ -17,12 +17,12 @@ python singleheader.py $d/include/stc/cqueue.h $d/../stcsingle/stc/cqueue.h
python singleheader.py $d/include/stc/crand.h $d/../stcsingle/stc/crand.h
python singleheader.py $d/include/stc/cregex.h $d/../stcsingle/stc/cregex.h
python singleheader.py $d/include/stc/cset.h $d/../stcsingle/stc/cset.h
-
python singleheader.py $d/include/stc/csmap.h $d/../stcsingle/stc/csmap.h
python singleheader.py $d/include/stc/cspan.h $d/../stcsingle/stc/cspan.h
python singleheader.py $d/include/stc/csset.h $d/../stcsingle/stc/csset.h
python singleheader.py $d/include/stc/cstack.h $d/../stcsingle/stc/cstack.h
python singleheader.py $d/include/stc/cstr.h $d/../stcsingle/stc/cstr.h
+python singleheader.py $d/include/stc/csubstr.h $d/../stcsingle/stc/csubstr.h
python singleheader.py $d/include/stc/csview.h $d/../stcsingle/stc/csview.h
python singleheader.py $d/include/stc/cvec.h $d/../stcsingle/stc/cvec.h
python singleheader.py $d/include/stc/extend.h $d/../stcsingle/stc/extend.h
diff --git a/src/utf8code.c b/src/utf8code.c
index 4abf10ea..ddc4cb97 100644
--- a/src/utf8code.c
+++ b/src/utf8code.c
@@ -101,7 +101,7 @@ uint32_t utf8_toupper(uint32_t c) {
return c;
}
-int utf8_icmp_sv(const csview s1, const csview s2) {
+int utf8_icmp_ss(const csubstr s1, const csubstr s2) {
utf8_decode_t d1 = {.state=0}, d2 = {.state=0};
intptr_t j1 = 0, j2 = 0;
while ((j1 < s1.size) & (j2 < s2.size)) {