Reverted csubstr => csview. Sorry about that!

Added crawstr to become the null-terminated string view.
author: Tyge Løvset <[email protected]> 2023-08-14 16:46:24 +0200
committer: Tyge Løvset <[email protected]> 2023-08-14 16:46:24 +0200
commit: 78d8668e6d527070568a405408ed906e51055bf4 (patch)
tree: 60f963a36826acce264c7ecd0af3eb80502a4335 /docs
parent: 2b6b4785c5c26bc47d800c1a7c7a48784df2d57b (diff)
download: STC-modified-78d8668e6d527070568a405408ed906e51055bf4.tar.gz
STC-modified-78d8668e6d527070568a405408ed906e51055bf4.zip
5 files changed, 315 insertions, 315 deletions
diff --git a/docs/crawstr_api.md b/docs/crawstr_api.md
new file mode 100644
index 00000000..d44c302d
--- /dev/null
+++ b/docs/crawstr_api.md
@@ -0,0 +1,130 @@
+# STC [crawstr](../include/stc/crawstr.h): Null-terminated UTF8 String View
+![String](pics/string.jpg)
+
+The type **crawstr** is a ***null-terminated*** string view and refers to a constant contiguous sequence of
+char-elements with the first element of the sequence at position zero. The implementation holds two
+members: a pointer to constant char and a size. See [csview](csview_api.md) for a ***non null-terminated***
+string view/span type.
+
+Because **crawstr** is null-terminated, it can be an efficient replacent for `const char*`. It never
+allocates memory, and therefore need not be destructed. Its lifetime is limited by the source string
+storage. It keeps the length of the string, i.e. no need to call *strlen()* for various operations.
+
+## Header file
+
+All crawstr definitions and prototypes are available by including a single header file.
+
+```c
+#define i_implement
+#include <stc/cstr.h>
+#include <stc/crawstr.h>
+```
+## Methods
+
+```c
+crawstr         crawstr_from(const char* str);                          // construct from const char*
+crawstr         c_rs(const char literal_only[]);                        // construct from literal, no strlen()
+
+intptr_t        crawstr_size(crawstr rs);
+bool            crawstr_empty(crawstr rs);                              // check if size == 0
+void            crawstr_clear(crawstr* self);
+csview          crawstr_sv(crawstr rs);                                 // convert to csview type
+
+bool            crawstr_equals(crawstr rs, const char* str);
+intptr_t        crawstr_find(crawstr rs, const char* str);
+bool            crawstr_contains(crawstr rs, const char* str);
+bool            crawstr_starts_with(crawstr rs, const char* str);
+bool            crawstr_ends_with(crawstr rs, const char* str);
+```
+
+#### UTF8 methods
+```c
+intptr_t        crawstr_u8_size(crawstr rs);
+bool            crawstr_valid_utf8(crawstr rs);                         // depends on src/utf8code.c
+
+crawstr_iter    crawstr_begin(const crawstr* self);
+crawstr_iter    crawstr_end(const crawstr* self);
+void            crawstr_next(crawstr_iter* it);                         // utf8 codepoint step, not byte!
+crawstr_iter    crawstr_advance(crawstr_iter it, intptr_t n);
+
+                // from utf8.h
+intptr_t        utf8_size(const char *s);
+intptr_t        utf8_size_n(const char *s, intptr_t nbytes);            // number of UTF8 codepoints within n bytes
+const char*     utf8_at(const char *s, intptr_t index);                 // from UTF8 index to char* position
+intptr_t        utf8_pos(const char* s, intptr_t index);                // from UTF8 index to byte index position
+unsigned        utf8_chr_size(const char* s);                           // UTF8 character size: 1-4
+                // implemented in src/utf8code.c:
+bool            utf8_valid(const char* s);
+bool            utf8_valid_n(const char* s, intptr_t nbytes);
+uint32_t        utf8_decode(utf8_decode_t *d, uint8_t byte);            // decode next byte to utf8, return state.
+unsigned        utf8_encode(char *out, uint32_t codepoint);             // encode unicode cp into out buffer
+uint32_t        utf8_peek(const char* s);                               // codepoint value of character at s
+uint32_t        utf8_peek_off(const char* s, int offset);               // codepoint value at utf8 pos (may be negative)
+```
+
+#### Helper methods
+```c
+int             crawstr_cmp(const crawstr* x, const crawstr* y);
+int             crawstr_icmp(const crawstr* x, const crawstr* y);       // depends on src/utf8code.c:
+bool            crawstr_eq(const crawstr* x, const crawstr* y);
+uint64_t        crawstr_hash(const crawstr* x);
+```
+
+## Types
+
+| Type name       | Type definition                            | Used to represent...     |
+|:----------------|:-------------------------------------------|:-------------------------|
+| `crawstr`       | `struct { const char *str; intptr_t size; }` | The string view type     |
+| `crawstr_value` | `char`                                     | The string element type  |
+| `crawstr_iter`  | `struct { crawstr_value *ref; }`           | UTF8 iterator            |
+
+## Example: UTF8 iteration and case conversion
+```c
+#define i_import
+#include <stc/cstr.h>
+#include <stc/crawstr.h>
+
+int main(void)
+{
+    cstr str = cstr_from("Liberté, égalité, fraternité.");
+    crawstr rs = cstr_rs(&str);
+
+    c_foreach (i, crawstr, rs)
+        printf("%.*s ", c_SV(i.u8.chr));
+    puts("");
+
+    cstr_uppercase(&str);
+    printf("%s\n", cstr_str(&str));
+
+    cstr_drop(&str);
+}
+```
+Output:
+```
+L i b e r t é ,   é g a l i t é ,   f r a t e r n i t é . 
+LIBERTÉ, ÉGALITÉ, FRATERNITÉ.
+```
+
+### Example 2: UTF8 replace
+```c
+#define i_import // include dependent utf8 definitions.
+#include <stc/cstr.h>
+
+int main(void)
+{
+    cstr s1 = cstr_lit("hell😀 w😀rld");
+
+    cstr_u8_replace_at(&s1, cstr_find(&s1, "😀rld"), 1, c_rs("ø"));
+    printf("%s\n", cstr_str(&s1));
+
+    c_foreach (i, cstr, s1)
+        printf("%.*s,", c_SV(i.u8.chr)); // u8.chr is a csview
+
+    cstr_drop(&s1);
+}
+```
+Output:
+```
+hell😀 wørld
+h,e,l,l,😀, ,w,ø,r,l,d,
+```
diff --git a/docs/cregex_api.md b/docs/cregex_api.md
index 98161fe9..52476e09 100644
--- a/docs/cregex_api.md
+++ b/docs/cregex_api.md
@@ -33,11 +33,11 @@ int         cregex_compile(cregex *self, const char* pattern, int cflags = CREG_
 int         cregex_captures(const cregex* self); 
 
             // return CREG_OK, CREG_NOMATCH, or CREG_MATCHERROR
-int         cregex_find(const cregex* re, const char* input, csubstr match[], int mflags = CREG_DEFAULT);
+int         cregex_find(const cregex* re, const char* input, csview match[], int mflags = CREG_DEFAULT);
             // Search inside input string-view only
-int         cregex_find_ss(const cregex* re, csubstr input, csubstr match[]);
+int         cregex_find_sv(const cregex* re, csview input, csview match[]);
             // All-in-one search (compile + find + drop)
-int         cregex_find_pattern(const char* pattern, const char* input, csubstr match[], int cmflags = CREG_DEFAULT);
+int         cregex_find_pattern(const char* pattern, const char* input, csview match[], int cmflags = CREG_DEFAULT);
 
             // Check if there are matches in input
 bool        cregex_is_match(const cregex* re, const char* input);
@@ -45,14 +45,14 @@ bool        cregex_is_match(const cregex* re, const char* input);
             // Replace all matches in input
 cstr        cregex_replace(const cregex* re, const char* input, const char* replace, int count = INT_MAX);
             // Replace count matches in input string-view. Optionally transform replacement.
-cstr        cregex_replace_ss(const cregex* re, csubstr input, const char* replace, int count = INT_MAX);
-cstr        cregex_replace_ss(const cregex* re, csubstr input, const char* replace, int count,
-                              bool(*transform)(int group, csubstr match, cstr* result), int rflags);
+cstr        cregex_replace_sv(const cregex* re, csview input, const char* replace, int count = INT_MAX);
+cstr        cregex_replace_sv(const cregex* re, csview input, const char* replace, int count,
+                              bool(*transform)(int group, csview match, cstr* result), int rflags);
 
             // All-in-one replacement (compile + find/replace + drop)
 cstr        cregex_replace_pattern(const char* pattern, const char* input, const char* replace, int count = INT_MAX);
 cstr        cregex_replace_pattern(const char* pattern, const char* input, const char* replace, int count,
-                                   bool(*transform)(int group, csubstr match, cstr* result), int rflags);
+                                   bool(*transform)(int group, csview match, cstr* result), int rflags);
             // destroy
 void        cregex_drop(cregex* self);
 ```
@@ -109,9 +109,9 @@ int main(void) {
     cregex re = cregex_from(pattern);
 
     // Lets find the first date in the string:
-    csubstr match[4]; // full-match, year, month, date.
+    csview match[4]; // full-match, year, month, date.
     if (cregex_find(&re, input, match) == CREG_OK)
-        printf("Found date: %.*s\n", c_SS(match[0]));
+        printf("Found date: %.*s\n", c_SV(match[0]));
     else
         printf("Could not find any date\n");
 
@@ -127,7 +127,7 @@ int main(void) {
 For a single match you may use the all-in-one function:
 ```c
 if (cregex_find_pattern(pattern, input, match))
-    printf("Found date: %.*s\n", c_SS(match[0]));
+    printf("Found date: %.*s\n", c_SV(match[0]));
 ```
 
 To use: `gcc first_match.c src/cregex.c src/utf8code.c`.
@@ -137,16 +137,16 @@ In order to use a callback function in the replace call, see `examples/regex_rep
 
 To iterate multiple matches in an input string, you may use
 ```c
-csubstr match[5] = {0};
+csview match[5] = {0};
 while (cregex_find(&re, input, match, CREG_NEXT) == CREG_OK)
     for (int k = 1; i <= cregex_captures(&re); ++k)
-        printf("submatch %d: %.*s\n", k, c_SS(match[k]));
+        printf("submatch %d: %.*s\n", k, c_SV(match[k]));
 ```
 There is also a for-loop macro to simplify it:
 ```c
 c_formatch (it, &re, input)
     for (int k = 1; i <= cregex_captures(&re); ++k)
-        printf("submatch %d: %.*s\n", k, c_SS(it.match[k]));
+        printf("submatch %d: %.*s\n", k, c_SV(it.match[k]));
 ```
 
 ## Using cregex in a project
@@ -154,7 +154,7 @@ c_formatch (it, &re, input)
 The easiest is to `#define i_import` before `#include <stc/cregex.h>`. Make sure to do that in one translation unit only.
 
 For reference, **cregex** uses the following files: 
-- `stc/cregex.h`, `stc/utf8.h`, `stc/csubstr.h`, `stc/cstr.h`, `stc/ccommon.h`, `stc/forward.h`
+- `stc/cregex.h`, `stc/utf8.h`, `stc/csview.h`, `stc/cstr.h`, `stc/ccommon.h`, `stc/forward.h`
 - `src/cregex.c`, `src/utf8code.c`.
 
 ## Regex Cheatsheet
diff --git a/docs/cstr_api.md b/docs/cstr_api.md
index 07b9b4c8..1da57b0c 100644
--- a/docs/cstr_api.md
+++ b/docs/cstr_api.md
@@ -22,8 +22,8 @@ cstr        cstr_init(void);                                        // construct
 cstr        cstr_lit(const char literal_only[]);                    // cstr from literal; no strlen() call.
 cstr        cstr_from(const char* str);                             // constructor using strlen()
 cstr        cstr_from_n(const char* str, intptr_t n);               // constructor with n first bytes of str
-cstr        cstr_from_sv(csview sv);                                // construct cstr from csview
-cstr        cstr_from_ss(csubstr ss);                               // construct cstr from csubstr
+cstr        cstr_from_sv(csview ss);                                // construct cstr from csview
+cstr        cstr_from_rs(crawstr rs);                               // construct cstr from crawstr
 cstr        cstr_with_capacity(intptr_t cap);
 cstr        cstr_with_size(intptr_t len, char fill);                // repeat fill len times
 cstr        cstr_from_fmt(const char* fmt, ...);                    // printf() formatting
@@ -35,7 +35,7 @@ void        cstr_drop(cstr* self);                                  // destructo
 
 const char* cstr_str(const cstr* self);                             // to const char*
 csview      cstr_sv(const cstr* self);                              // to csview
-csubstr     cstr_ss(const cstr* self);                              // to csubstr
+crawstr     cstr_rs(const cstr* self);                              // to crawstr
 char*       cstr_data(cstr* self);                                  // to mutable char*
 cstr_buf    cstr_buffer(cstr* self);                                // to mutable buffer (with capacity)
 
@@ -50,13 +50,13 @@ void        cstr_clear(cstr* self);
 
 char*       cstr_assign(cstr* self, const char* str);
 char*       cstr_assign_n(cstr* self, const char* str, intptr_t n); // assign n first bytes of str
-char*       cstr_assign_ss(cstr* self, csubstr ss);
+char*       cstr_assign_sv(cstr* self, csview ss);
 char*       cstr_copy(cstr* self, cstr s);                          // copy-assign a cstr
 int         cstr_printf(cstr* self, const char* fmt, ...);          // source and target must not overlap.
 
 char*       cstr_append(cstr* self, const char* str);
 char*       cstr_append_n(cstr* self, const char* str, intptr_t n); // append n first bytes of str
-char*       cstr_append_ss(cstr* self, csubstr str);
+char*       cstr_append_sv(cstr* self, csview str);
 char*       cstr_append_s(cstr* self, cstr str);
 int         cstr_append_fmt(cstr* self, const char* fmt, ...);      // printf() formatting
 char*       cstr_append_uninit(cstr* self, intptr_t len);           // return ptr to start of uninited data
@@ -65,19 +65,19 @@ void        cstr_push(cstr* self, const char* chr);                 // append on
 void        cstr_pop(cstr* self);                                   // pop one utf8 char
 
 void        cstr_insert(cstr* self, intptr_t pos, const char* ins);
-void        cstr_insert_ss(cstr* self, intptr_t pos, csubstr ins);
+void        cstr_insert_sv(cstr* self, intptr_t pos, csview ins);
 void        cstr_insert_s(cstr* self, intptr_t pos, cstr ins);
 
 void        cstr_erase(cstr* self, intptr_t pos, intptr_t len);     // erase len bytes from pos
 
 void        cstr_replace(cstr* self, const char* search, const char* repl, unsigned count = MAX_INT);
-cstr        cstr_replace_ss(csubstr in, csubstr search, csubstr repl, unsigned count);
+cstr        cstr_replace_sv(csview in, csview search, csview repl, unsigned count);
 void        cstr_replace_at(cstr* self, intptr_t pos, intptr_t len, const char* repl); // replace at a pos
-void        cstr_replace_at_ss(cstr* self, intptr_t pos, intptr_t len, const csubstr repl);
+void        cstr_replace_at_sv(cstr* self, intptr_t pos, intptr_t len, const csview repl);
 void        cstr_replace_at_s(cstr* self, intptr_t pos, intptr_t len, cstr repl);
 
 bool        cstr_equals(const cstr* self, const char* str);
-bool        cstr_equals_ss(const cstr* self, csubstr ss);
+bool        cstr_equals_sv(const cstr* self, csview ss);
 bool        cstr_equals_s(const cstr* self, cstr s);
 
 intptr_t    cstr_find(const cstr* self, const char* search);
@@ -85,11 +85,11 @@ intptr_t    cstr_find_at(const cstr* self, intptr_t pos, const char* search); //
 bool        cstr_contains(const cstr* self, const char* search);
 
 bool        cstr_starts_with(const cstr* self, const char* str);
-bool        cstr_starts_with_ss(const cstr* self, csubstr ss);
+bool        cstr_starts_with_sv(const cstr* self, csview ss);
 bool        cstr_starts_with_s(const cstr* self, cstr s);
 
 bool        cstr_ends_with(const cstr* self, const char* str);
-bool        cstr_ends_with_ss(const cstr* self, csubstr ss);
+bool        cstr_ends_with_sv(const cstr* self, csview ss);
 bool        cstr_ends_with_s(const cstr* self, cstr s);
 
 bool        cstr_getline(cstr *self, FILE *stream);                 // cstr_getdelim(self, '\n', stream)
@@ -102,8 +102,8 @@ intptr_t    cstr_u8_size(const cstr* self);                         // number of
 intptr_t    cstr_u8_size_n(const cstr self, intptr_t nbytes);       // utf8 size within n bytes  
 intptr_t    cstr_u8_to_pos(const cstr* self, intptr_t u8idx);       // byte pos offset at utf8 codepoint index
 const char* cstr_u8_at(const cstr* self, intptr_t u8idx);           // char* position at utf8 codepoint index
-csubstr     cstr_u8_chr(const cstr* self, intptr_t u8idx);          // get utf8 character as a csubstr
-void        cstr_u8_replace_at(cstr* self, intptr_t bytepos, intptr_t u8len, csubstr repl); // replace u8len utf8 chars
+csview     cstr_u8_chr(const cstr* self, intptr_t u8idx);          // get utf8 character as a csview
+void        cstr_u8_replace_at(cstr* self, intptr_t bytepos, intptr_t u8len, csview repl); // replace u8len utf8 chars
 void        cstr_u8_erase(cstr* self, intptr_t bytepos, intptr_t u8len); // erase u8len codepoints from pos
 
 // iterate utf8 codepoints
@@ -114,14 +114,14 @@ cstr_iter   cstr_advance(cstr_iter it, intptr_t n);
 
 // utf8 functions requires linking with src/utf8code.c symbols:
 bool        cstr_valid_utf8(const cstr* self);                      // check if str is valid utf8
-cstr        cstr_casefold_ss(csubstr ss);                           // returns new casefolded utf8 cstr
+cstr        cstr_casefold_sv(csview ss);                           // returns new casefolded utf8 cstr
 
 cstr        cstr_tolower(const char* str);                          // returns new lowercase utf8 cstr
-cstr        cstr_tolower_ss(csubstr ss);                            // returns new lowercase utf8 cstr
+cstr        cstr_tolower_sv(csview ss);                            // returns new lowercase utf8 cstr
 void        cstr_lowercase(cstr* self);                             // transform cstr to lowercase utf8
 
 cstr        cstr_toupper(const char* str);                          // returns new uppercase utf8 cstr
-cstr        cstr_toupper_ss(csubstr ss);                            // returns new uppercase utf8 cstr
+cstr        cstr_toupper_sv(csview ss);                            // returns new uppercase utf8 cstr
 void        cstr_uppercase(cstr* self);                             // transform cstr to uppercase utf8
  
 int         cstr_icmp(const cstr* s1, const cstr* s2);              // utf8 case-insensitive comparison
@@ -146,7 +146,7 @@ char*       cstrnstrn(const char* str, const char* search, intptr_t slen, intptr
 |:----------------|:---------------------------------------------|:---------------------|
 | `cstr`          | `struct { ... }`                             | The string type      |
 | `cstr_value`    | `char`                                       | String element type  |
-| `csubstr`       | `struct { const char *str; intptr_t size; }` | String view type     |
+| `csview`        | `struct { const char *str; intptr_t size; }` | String view type     |
 | `cstr_buf`      | `struct { char *data; intptr_t size, cap; }` | String buffer type   |
 
 ## Constants and macros
diff --git a/docs/csubstr_api.md b/docs/csubstr_api.md
deleted file mode 100644
index 7094cf82..00000000
--- a/docs/csubstr_api.md
+++ /dev/null
@@ -1,221 +0,0 @@
-# STC [csubstr](../include/stc/csubstr.h): Sub-string View
-![String](pics/string.jpg)
-
-The type **csubstr** is a non-null terminated string view and can refer to a constant contiguous sequence of
-char-elements with the first element of the sequence at position zero. The implementation holds two members:
-a pointer to constant char and a size.
-
-Because **csubstr** is non-null terminated, it is not a replacent view for `const char*` - see [csview](csview_api.md)
-for that. **csubstr** never allocates memory, and therefore need not be destructed. Its lifetime is limited by
-the source string storage. It keeps the length of the string, and does not need to call *strlen()* to acquire
-the length.
-
-- **csubstr** iterators works on UTF8 codepoints - like **cstr** and **csview** (see Example 2).
-- Because it is null-terminated, it must be printed the following way:
-```c
-printf("%.*s", c_SS(sstr));
-```
-
-See the c++ class [std::basic_string_view](https://en.cppreference.com/w/cpp/string/basic_string_view) for a functional
-description.
-
-## Header file
-
-All csubstr definitions and prototypes are available by including a single header file.
-
-```c
-#define i_implement
-#include <stc/cstr.h>
-#include <stc/csubstr.h> // after cstr.h: include extra cstr-csubstr functions
-```
-## Methods
-
-```c
-csubstr         c_ss(const char literal_only[]);                         // construct from literal, no strlen()
-csubstr         c_ss(const char* str, intptr_t n);                       // construct from str and length n
-csubstr         csubstr_from(const char* str);                           // construct from const char*
-csubstr         csubstr_from_n(const char* str, intptr_t n);             // alias for c_ss(str, n)
-
-intptr_t        csubstr_size(csubstr ss);
-bool            csubstr_empty(csubstr ss);
-void            csubstr_clear(csubstr* self);
-
-bool            csubstr_equals(csubstr ss, const char* str);
-intptr_t        csubstr_equals_ss(csubstr ss, csubstr find);
-intptr_t        csubstr_find(csubstr ss, const char* str);
-intptr_t        csubstr_find_ss(csubstr ss, csubstr find);
-bool            csubstr_contains(csubstr ss, const char* str);
-bool            csubstr_starts_with(csubstr ss, const char* str);
-bool            csubstr_ends_with(csubstr ss, const char* str);
-csubstr         csubstr_substr(csubstr ss, intptr_t pos, intptr_t n);
-csubstr         csubstr_slice(csubstr ss, intptr_t pos1, intptr_t pos2);
-
-csubstr         csubstr_substr_ex(csubstr ss, intptr_t pos, intptr_t n);     // negative pos count from end
-csubstr         csubstr_slice_ex(csubstr ss, intptr_t pos1, intptr_t pos2);  // negative pos1, pos2 count from end
-csubstr         csubstr_token(csubstr ss, const char* sep, intptr_t* start); // *start > ss.size after last token
-```
-
-#### UTF8 methods
-```c
-intptr_t        csubstr_u8_size(csubstr ss);
-csubstr         csubstr_u8_substr(csubstr ss, intptr_t bytepos, intptr_t u8len);
-bool            csubstr_valid_utf8(csubstr ss);                              // requires linking with src/utf8code.c
- 
-csubstr_iter    csubstr_begin(const csubstr* self);
-csubstr_iter    csubstr_end(const csubstr* self);
-void            csubstr_next(csubstr_iter* it);                              // utf8 codepoint step, not byte!
-csubstr_iter    csubstr_advance(csubstr_iter it, intptr_t n);
-```
-
-#### cstr methods returning csubstr
-```c
-csubstr         cstr_slice(const cstr* self, intptr_t pos1, intptr_t pos2);
-csubstr         cstr_slice_ex(const cstr* self, intptr_t pos1, intptr_t pos2); // see csubstr_slice_ex()
-csubstr         cstr_substr(const cstr* self, intptr_t pos, intptr_t n);
-csubstr         cstr_substr_ex(const cstr* self, intptr_t pos, intptr_t n);    // see csubstr_substr_ex()
-csubstr         cstr_u8_substr(const cstr* self, intptr_t bytepos, intptr_t u8len);
-```
-#### Iterate tokens with *c_fortoken*, *c_fortoken_ss*
-
-To iterate tokens in an input string separated by a string:
-```c
-c_fortoken (i, "hello, one, two, three", ", ")
-    printf("token: %.*s\n", c_SS(i.token));
-```
-
-#### Helper methods
-```c
-int             csubstr_cmp(const csubstr* x, const csubstr* y);
-int             csubstr_icmp(const csubstr* x, const csubstr* y);
-bool            csubstr_eq(const csubstr* x, const csubstr* y);
-uint64_t        csubstr_hash(const csubstr* x);
-```
-
-## Types
-
-| Type name       | Type definition                            | Used to represent...     |
-|:----------------|:-------------------------------------------|:-------------------------|
-| `csubstr`        | `struct { const char *str; intptr_t size; }` | The string view type     |
-| `csubstr_value`  | `char`                                     | The string element type  |
-| `csubstr_iter`   | `struct { csubstr_value *ref; }`            | UTF8 iterator            |
-
-## Constants and macros
-
-| Name           | Value                | Usage                                        |
-|:---------------|:---------------------|:---------------------------------------------|
-| `c_SS(ss)`     | printf argument      | `printf("ss: %.*s\n", c_SS(ss));`            |
-
-## Example
-```c
-#define i_implement
-#include <stc/cstr.h>
-#include <stc/csubstr.h>
-
-int main(void)
-{
-    cstr str1 = cstr_from("We think in generalities, but we live in details.");
-                                                        // (quoting Alfred N. Whitehead)
-
-    csubstr ss1 = cstr_substr_ex(&str1, 3, 5);          // "think"
-    intptr_t pos = cstr_find(&str1, "live");            // position of "live" in str1
-    csubstr ss2 = cstr_substr_ex(&str1, pos, 4);        // get "live"
-    csubstr ss3 = cstr_slice_ex(&str1, -8, -1);         // get "details"
-    printf("%.*s %.*s %.*s\n",
-        c_SS(ss1), c_SS(ss2), c_SS(ss3));
-    cstr s1 = cstr_lit("Apples are red");
-    cstr s2 = cstr_from_ss(cstr_substr_ex(&s1, -3, 3)); // "red"
-    cstr s3 = cstr_from_ss(cstr_substr_ex(&s1, 0, 6));  // "Apples"
-    printf("%s %s\n", cstr_str(&s2), cstr_str(&s3));
-
-    c_drop(cstr, &str1, &s1, &s2, &s3);
-}
-```
-Output:
-```
-think live details
-red Apples
-```
-
-### Example 2: UTF8 handling
-```c
-#define i_import // include dependent cstr, utf8 and cregex function definitions.
-#include <stc/cstr.h>
-
-int main(void)
-{
-    cstr s1 = cstr_lit("hell😀 w😀rld");
-
-    cstr_u8_replace_at(&s1, cstr_find(&s1, "😀rld"), 1, c_ss("ø"));
-    printf("%s\n", cstr_str(&s1));
-
-    c_foreach (i, cstr, s1)
-        printf("%.*s,", c_SS(i.u8.chr));
-
-    cstr_drop(&s1);
-}
-```
-Output:
-```
-hell😀 wørld
-h,e,l,l,😀, ,w,ø,r,l,d,
-```
-
-### Example 3: csubstr tokenizer (string split)
-Splits strings into tokens. *print_split()* makes **no** memory allocations or *strlen()* calls,
-and does not depend on null-terminated strings. *string_split()* function returns a vector of cstr.
-```c
-#include <stdio.h>
-#include <stc/csubstr.h>
-
-void print_split(csubstr input, const char* sep)
-{
-    c_fortoken_ss (i, input, sep)
-        printf("[%.*s]\n", c_SS(i.token));
-    puts("");
-}
-#define i_implement
-#include <stc/cstr.h>
-#define i_key_str
-#include <stc/cstack.h>
-
-cstack_str string_split(csubstr input, const char* sep)
-{
-    cstack_str out = cstack_str_init();
-    
-    c_fortoken_ss (i, input, sep)
-        cstack_str_push(&out, cstr_from_ss(i.token));
-
-    return out;
-}
-
-int main(void)
-{
-    print_split(c_ss("//This is a//double-slash//separated//string"), "//");
-    print_split(c_ss("This has no matching separator"), "xx");
-
-    cstack_str s = string_split(c_ss("Split,this,,string,now,"), ",");
-
-    c_foreach (i, cstack_str, s)
-        printf("[%s]\n", cstr_str(i.ref));
-    puts("");
-
-    cstack_str_drop(&s);
-}
-```
-Output:
-```
-[]
-[This is a]
-[double-slash]
-[separated]
-[string]
-
-[This has no matching separator]
-
-[Split]
-[this]
-[]
-[string]
-[now]
-[]
-```
diff --git a/docs/csview_api.md b/docs/csview_api.md
index d28e3ed2..eafc6854 100644
--- a/docs/csview_api.md
+++ b/docs/csview_api.md
@@ -1,14 +1,23 @@
-# STC [csview](../include/stc/csview.h): Null-terminated UTF8 String View
+# STC [csview](../include/stc/csview.h): Sub-string View
 ![String](pics/string.jpg)
 
-The type **csview** is a ***null-terminated*** string view and refers to a constant contiguous sequence of
-char-elements with the first element of the sequence at position zero. The implementation holds two
-members: a pointer to constant char and a size. See [csubstr](csubstr_api.md) for a ***non null-terminated***
-string view/span type.
+The type **csview** is a non-null terminated string view and can refer to a constant contiguous sequence of
+char-elements with the first element of the sequence at position zero. The implementation holds two members:
+a pointer to constant char and a size.
 
-Because **csview** is null-terminated, it can be an efficient replacent for `const char*`. It never
-allocates memory, and therefore need not be destructed. Its lifetime is limited by the source string
-storage. It keeps the length of the string, i.e. no need to call *strlen()* for various operations.
+Because **csview** is non-null terminated, it is not an ideal replacent view for `const char*` - see [crawstr](crawstr_api.md)
+for that. **csview** never allocates memory, and therefore need not be destructed. Its lifetime is limited by
+the source string storage. It keeps the length of the string, and does not need to call *strlen()* to acquire
+the length.
+
+- **csview** iterators works on UTF8 codepoints - like **cstr** and **crawstr** (see Example 2).
+- Because it is null-terminated, it must be printed the following way:
+```c
+printf("%.*s", c_SV(sstr));
+```
+
+See the c++ class [std::basic_string_view](https://en.cppreference.com/w/cpp/string/basic_string_view) for a functional
+description.
 
 ## Header file
 
@@ -22,52 +31,64 @@ All csview definitions and prototypes are available by including a single header
 ## Methods
 
 ```c
-csview          c_sv(const char literal_only[]);                        // construct from literal, no strlen()
-csview          csview_from(const char* str);                           // construct from const char*
+csview         c_sv(const char literal_only[]);                         // construct from literal, no strlen()
+csview         c_sv(const char* str, intptr_t n);                       // construct from str and length n
+csview         csview_from(const char* str);                           // construct from const char*
+csview         csview_from_n(const char* str, intptr_t n);             // alias for c_sv(str, n)
+
+intptr_t       csview_size(csview sv);
+bool           csview_empty(csview sv);
+void           csview_clear(csview* self);
+
+bool           csview_equals(csview sv, const char* str);
+intptr_t       csview_equals_sv(csview sv, csview find);
+intptr_t       csview_find(csview sv, const char* str);
+intptr_t       csview_find_sv(csview sv, csview find);
+bool           csview_contains(csview sv, const char* str);
+bool           csview_starts_with(csview sv, const char* str);
+bool           csview_ends_with(csview sv, const char* str);
+csview         csview_substr(csview sv, intptr_t pos, intptr_t n);
+csview         csview_slice(csview sv, intptr_t pos1, intptr_t pos2);
+
+csview         csview_substr_ex(csview sv, intptr_t pos, intptr_t n);     // negative pos count from end
+csview         csview_slice_ex(csview sv, intptr_t pos1, intptr_t pos2);  // negative pos1, pos2 count from end
+csview         csview_token(csview sv, const char* sep, intptr_t* start); // *start > sv.size after last token
+```
 
-intptr_t        csview_size(csview sv);
-bool            csview_empty(csview sv);                                // check if size == 0
-void            csview_clear(csview* self);
-csubstr         csview_ss(csview sv);                                   // convert to csubstr type
+#### UTF8 methods
+```c
+intptr_t       csview_u8_size(csview sv);
+csview         csview_u8_substr(csview sv, intptr_t bytepos, intptr_t u8len);
+bool           csview_valid_utf8(csview sv);                              // requires linking with src/utf8code.c
+ 
+csview_iter    csview_begin(const csview* self);
+csview_iter    csview_end(const csview* self);
+void           csview_next(csview_iter* it);                              // utf8 codepoint step, not byte!
+csview_iter    csview_advance(csview_iter it, intptr_t n);
+```
 
-bool            csview_equals(csview sv, const char* str);
-intptr_t        csview_find(csview sv, const char* str);
-bool            csview_contains(csview sv, const char* str);
-bool            csview_starts_with(csview sv, const char* str);
-bool            csview_ends_with(csview sv, const char* str);
+#### cstr methods returning csview
+```c
+csview         cstr_slice(const cstr* self, intptr_t pos1, intptr_t pos2);
+csview         cstr_slice_ex(const cstr* self, intptr_t pos1, intptr_t pos2); // see csview_slice_ex()
+csview         cstr_substr(const cstr* self, intptr_t pos, intptr_t n);
+csview         cstr_substr_ex(const cstr* self, intptr_t pos, intptr_t n);    // see csview_substr_ex()
+csview         cstr_u8_substr(const cstr* self, intptr_t bytepos, intptr_t u8len);
 ```
+#### Iterate tokens with *c_fortoken*, *c_fortoken_sv*
 
-#### UTF8 methods
+To iterate tokens in an input string separated by a string:
 ```c
-intptr_t        csview_u8_size(csview sv);
-bool            csview_valid_utf8(csview sv);                           // depends on src/utf8code.c
-
-csview_iter     csview_begin(const csview* self);
-csview_iter     csview_end(const csview* self);
-void            csview_next(csview_iter* it);                           // utf8 codepoint step, not byte!
-csview_iter     csview_advance(csview_iter it, intptr_t n);
-
-                // from utf8.h
-intptr_t        utf8_size(const char *s);
-intptr_t        utf8_size_n(const char *s, intptr_t nbytes);            // number of UTF8 codepoints within n bytes
-const char*     utf8_at(const char *s, intptr_t index);                 // from UTF8 index to char* position
-intptr_t        utf8_pos(const char* s, intptr_t index);                // from UTF8 index to byte index position
-unsigned        utf8_chr_size(const char* s);                           // UTF8 character size: 1-4
-                // implemented in src/utf8code.c:
-bool            utf8_valid(const char* s);
-bool            utf8_valid_n(const char* s, intptr_t nbytes);
-uint32_t        utf8_decode(utf8_decode_t *d, uint8_t byte);            // decode next byte to utf8, return state.
-unsigned        utf8_encode(char *out, uint32_t codepoint);             // encode unicode cp into out buffer
-uint32_t        utf8_peek(const char* s);                               // codepoint value of character at s
-uint32_t        utf8_peek_off(const char* s, int offset);               // codepoint value at utf8 pos (may be negative)
+c_fortoken (i, "hello, one, two, three", ", ")
+    printf("token: %.*s\n", c_SV(i.token));
 ```
 
 #### Helper methods
 ```c
-int             csview_cmp(const csview* x, const csview* y);
-int             csview_icmp(const csview* x, const csview* y);          // depends on src/utf8code.c:
-bool            csview_eq(const csview* x, const csview* y);
-uint64_t        csview_hash(const csview* x);
+int            csview_cmp(const csview* x, const csview* y);
+int            csview_icmp(const csview* x, const csview* y);
+bool           csview_eq(const csview* x, const csview* y);
+uint64_t       csview_hash(const csview* x);
 ```
 
 ## Types
@@ -78,36 +99,46 @@ uint64_t        csview_hash(const csview* x);
 | `csview_value`  | `char`                                     | The string element type  |
 | `csview_iter`   | `struct { csview_value *ref; }`            | UTF8 iterator            |
 
-## Example: UTF8 iteration and case conversion
+## Constants and macros
+
+| Name           | Value                | Usage                                        |
+|:---------------|:---------------------|:---------------------------------------------|
+| `c_SV(sv)`     | printf argument      | `printf("sv: %.*s\n", c_SV(sv));`            |
+
+## Example
 ```c
-#define i_import
+#define i_implement
 #include <stc/cstr.h>
 #include <stc/csview.h>
 
 int main(void)
 {
-    cstr str = cstr_from("Liberté, égalité, fraternité.");
-    csview sv = cstr_sv(&str);
-
-    c_foreach (i, csview, sv)
-        printf("%.*s ", c_SS(i.u8.chr));
-    puts("");
-
-    cstr_uppercase(&str);
-    printf("%s\n", cstr_str(&str));
-
-    cstr_drop(&str);
+    cstr str1 = cstr_from("We think in generalities, but we live in details.");
+                                                        // (quoting Alfred N. Whitehead)
+
+    csview ss1 = cstr_substr_ex(&str1, 3, 5);          // "think"
+    intptr_t pos = cstr_find(&str1, "live");            // position of "live" in str1
+    csview ss2 = cstr_substr_ex(&str1, pos, 4);        // get "live"
+    csview ss3 = cstr_slice_ex(&str1, -8, -1);         // get "details"
+    printf("%.*s %.*s %.*s\n",
+        c_SV(ss1), c_SV(ss2), c_SV(ss3));
+    cstr s1 = cstr_lit("Apples are red");
+    cstr s2 = cstr_from_sv(cstr_substr_ex(&s1, -3, 3)); // "red"
+    cstr s3 = cstr_from_sv(cstr_substr_ex(&s1, 0, 6));  // "Apples"
+    printf("%s %s\n", cstr_str(&s2), cstr_str(&s3));
+
+    c_drop(cstr, &str1, &s1, &s2, &s3);
 }
 ```
 Output:
 ```
-L i b e r t é ,   é g a l i t é ,   f r a t e r n i t é . 
-LIBERTÉ, ÉGALITÉ, FRATERNITÉ.
+think live details
+red Apples
 ```
 
-### Example 2: UTF8 replace
+### Example 2: UTF8 handling
 ```c
-#define i_import // include dependent utf8 definitions.
+#define i_import // include dependent cstr, utf8 and cregex function definitions.
 #include <stc/cstr.h>
 
 int main(void)
@@ -118,7 +149,7 @@ int main(void)
     printf("%s\n", cstr_str(&s1));
 
     c_foreach (i, cstr, s1)
-        printf("%.*s,", c_SS(i.u8.chr)); // u8.chr is a csubstr
+        printf("%.*s,", c_SV(i.u8.chr));
 
     cstr_drop(&s1);
 }
@@ -128,3 +159,63 @@ Output:
 hell😀 wørld
 h,e,l,l,😀, ,w,ø,r,l,d,
 ```
+
+### Example 3: csview tokenizer (string split)
+Splits strings into tokens. *print_split()* makes **no** memory allocations or *strlen()* calls,
+and does not depend on null-terminated strings. *string_split()* function returns a vector of cstr.
+```c
+#include <stdio.h>
+#include <stc/csview.h>
+
+void print_split(csview input, const char* sep)
+{
+    c_fortoken_sv (i, input, sep)
+        printf("[%.*s]\n", c_SV(i.token));
+    puts("");
+}
+#define i_implement
+#include <stc/cstr.h>
+#define i_key_str
+#include <stc/cstack.h>
+
+cstack_str string_split(csview input, const char* sep)
+{
+    cstack_str out = cstack_str_init();
+    
+    c_fortoken_sv (i, input, sep)
+        cstack_str_push(&out, cstr_from_sv(i.token));
+
+    return out;
+}
+
+int main(void)
+{
+    print_split(c_sv("//This is a//double-slash//separated//string"), "//");
+    print_split(c_sv("This has no matching separator"), "xx");
+
+    cstack_str s = string_split(c_sv("Split,this,,string,now,"), ",");
+
+    c_foreach (i, cstack_str, s)
+        printf("[%s]\n", cstr_str(i.ref));
+    puts("");
+
+    cstack_str_drop(&s);
+}
+```
+Output:
+```
+[]
+[This is a]
+[double-slash]
+[separated]
+[string]
+
+[This has no matching separator]
+
+[Split]
+[this]
+[]
+[string]
+[now]
+[]
+```
author	Tyge Løvset <[email protected]>	2023-08-14 16:46:24 +0200
committer	Tyge Løvset <[email protected]>	2023-08-14 16:46:24 +0200
commit	78d8668e6d527070568a405408ed906e51055bf4 (patch)
tree	60f963a36826acce264c7ecd0af3eb80502a4335 /docs
parent	2b6b4785c5c26bc47d800c1a7c7a48784df2d57b (diff)
download	STC-modified-78d8668e6d527070568a405408ed906e51055bf4.tar.gz STC-modified-78d8668e6d527070568a405408ed906e51055bf4.zip