summaryrefslogtreecommitdiffhomepage
path: root/docs/csview_api.md
diff options
context:
space:
mode:
author_Tradam <[email protected]>2023-09-08 01:29:47 +0000
committerGitHub <[email protected]>2023-09-08 01:29:47 +0000
commit3c76c7f3d5db3f9586a90d03f8fbb02d79de9acd (patch)
treeafbe4b540967223911f7c5de36559b82154f02f3 /docs/csview_api.md
parent0841165881871ee01b782129be681209aeed2423 (diff)
parent1a72205fe05c2375cfd380dd8381a8460d9ed8d1 (diff)
downloadSTC-modified-modified.tar.gz
STC-modified-modified.zip
Merge branch 'stclib:master' into modifiedHEADmodified
Diffstat (limited to 'docs/csview_api.md')
-rw-r--r--docs/csview_api.md166
1 files changed, 79 insertions, 87 deletions
diff --git a/docs/csview_api.md b/docs/csview_api.md
index ec3bf121..76a803a8 100644
--- a/docs/csview_api.md
+++ b/docs/csview_api.md
@@ -1,87 +1,79 @@
-# STC [csview](../include/stc/csview.h): String View
+# STC [csview](../include/stc/csview.h): Sub-string View
![String](pics/string.jpg)
-The type **csview** is a string view and can refer to a constant contiguous sequence of char-elements with the first
-element of the sequence at position zero. The implementation holds two members: a pointer to constant char and a size.
+The type **csview** is a non-null terminated string view and can refer to a constant contiguous
+sequence of char-elements with the first element of the sequence at position zero. The implementation
+holds two members: a pointer to constant char and a size.
-**csview** is an efficient replacent for `const char*`. It never allocates memory, and therefore need not be destructed.
-Its lifetime is limited by the source string storage. It keeps the length of the string, and does not call *strlen()*
-when passing it around. It is faster when using`csview` as convertion type (raw) than `const char*` in associative
-containers with cstr keys.
+Because **csview** is non-null terminated, it cannot be a replacent view for `const char*` -
+see [crawstr](crawstr_api.md) for that. **csview** never allocates memory, and therefore need not be
+destructed. Its lifetime is limited by the source string storage. It keeps the length of the string,
+and does not need to call *strlen()* to acquire the length.
-Note: a **csview** may ***not be null-terminated***, and must therefore be printed like:
-`printf("%.*s", csview_ARG(sv))`.
+- **csview** iterators works on UTF8 codepoints - like **cstr** and **crawstr** (see Example 2).
+- Because it is null-terminated, it must be printed the following way:
+```c
+printf("%.*s", c_SV(sstr));
+```
-See the c++ class [std::basic_string_view](https://en.cppreference.com/w/cpp/string/basic_string_view) for a functional
-description.
+See the c++ class [std::basic_string_view](https://en.cppreference.com/w/cpp/string/basic_string_view)
+for a functional description.
## Header file
All csview definitions and prototypes are available by including a single header file.
```c
-#include <stc/cstr.h> // optional, include cstr+csview functionality
-#include <stc/csview.h>
+#define i_implement
+#include <stc/cstr.h>
+#include <stc/csview.h> // after cstr.h: include extra cstr-csview functions
```
## Methods
```c
-csview c_sv(const char literal_only[]); // construct from literal, no strlen()
-csview c_sv(const char* str, intptr_t n); // construct from str and length n
-csview csview_lit(const char literal_only[]); // alias for c_sv(lit)
-csview csview_from(const char* str); // construct from const char*
-csview csview_from_n(const char* str, intptr_t n); // alias for c_sv(str, n)
-
-intptr_t csview_size(csview sv);
-bool csview_empty(csview sv);
-void csview_clear(csview* self);
-
-bool csview_equals(csview sv, csview sv2);
-intptr_t csview_find(csview sv, const char* str);
-intptr_t csview_find_sv(csview sv, csview find);
-bool csview_contains(csview sv, const char* str);
-bool csview_starts_with(csview sv, const char* str);
-bool csview_ends_with(csview sv, const char* str);
-
-csview csview_substr_ex(csview sv, intptr_t pos, intptr_t n); // negative pos count from end
-csview csview_slice_ex(csview sv, intptr_t p1, intptr_t p2); // negative p1, p2 count from end
-csview csview_token(csview sv, const char* sep, intptr_t* start); // *start > sv.size after last token
+csview c_sv(const char literal_only[]); // construct from literal, no strlen()
+csview c_sv(const char* str, intptr_t n); // construct from str and length n
+csview csview_from(const char* str); // construct from const char*
+csview csview_from_n(const char* str, intptr_t n); // alias for c_sv(str, n)
+
+intptr_t csview_size(csview sv);
+bool csview_empty(csview sv);
+void csview_clear(csview* self);
+
+bool csview_equals(csview sv, const char* str);
+intptr_t csview_equals_sv(csview sv, csview find);
+intptr_t csview_find(csview sv, const char* str);
+intptr_t csview_find_sv(csview sv, csview find);
+bool csview_contains(csview sv, const char* str);
+bool csview_starts_with(csview sv, const char* str);
+bool csview_ends_with(csview sv, const char* str);
+csview csview_substr(csview sv, intptr_t pos, intptr_t n);
+csview csview_slice(csview sv, intptr_t pos1, intptr_t pos2);
+
+csview csview_substr_ex(csview sv, intptr_t pos, intptr_t n); // negative pos count from end
+csview csview_slice_ex(csview sv, intptr_t pos1, intptr_t pos2); // negative pos1, pos2 count from end
+csview csview_token(csview sv, const char* sep, intptr_t* start); // *start > sv.size after last token
```
#### UTF8 methods
```c
-intptr_t csview_u8_size(csview sv);
-csview csview_u8_substr(csview sv, intptr_t bytepos, intptr_t u8len);
-bool csview_valid_utf8(csview sv); // requires linking with src/utf8code.c
-
-csview_iter csview_begin(const csview* self);
-csview_iter csview_end(const csview* self);
-void csview_next(csview_iter* it); // utf8 codepoint step, not byte!
-csview_iter csview_advance(csview_iter it, intptr_t n);
-
- // from utf8.h
-intptr_t utf8_size(const char *s);
-intptr_t utf8_size_n(const char *s, intptr_t nbytes); // number of UTF8 codepoints within n bytes
-const char* utf8_at(const char *s, intptr_t index); // from UTF8 index to char* position
-intptr_t utf8_pos(const char* s, intptr_t index); // from UTF8 index to byte index position
-unsigned utf8_chr_size(const char* s); // UTF8 character size: 1-4
- // implemented in src/utf8code.c:
-bool utf8_valid(const char* s);
-bool utf8_valid_n(const char* s, intptr_t nbytes);
-uint32_t utf8_decode(utf8_decode_t *d, uint8_t byte); // decode next byte to utf8, return state.
-unsigned utf8_encode(char *out, uint32_t codepoint); // encode unicode cp into out buffer
-uint32_t utf8_peek(const char* s); // codepoint value of character at s
-uint32_t utf8_peek_off(const char* s, int offset); // codepoint value at utf8 pos (may be negative)
-```
-
-#### Extended cstr methods
-```c
-csview cstr_substr(const cstr* self, intptr_t pos, intptr_t n);
-csview cstr_substr_ex(const cstr* s, intptr_t pos, intptr_t n); // negative pos count from end
-csview cstr_u8_substr(const cstr* self, intptr_t bytepos, intptr_t u8len);
+intptr_t csview_u8_size(csview sv);
+csview csview_u8_substr(csview sv, intptr_t bytepos, intptr_t u8len);
+bool csview_valid_utf8(csview sv); // requires linking with src/utf8code.c
+
+csview_iter csview_begin(const csview* self);
+csview_iter csview_end(const csview* self);
+void csview_next(csview_iter* it); // utf8 codepoint step, not byte!
+csview_iter csview_advance(csview_iter it, intptr_t n);
+```
-csview cstr_slice(const cstr* self, intptr_t p1, intptr_t p2);
-csview cstr_slice_ex(const cstr* s, intptr_t p, intptr_t q); // negative p or q count from end
+#### cstr methods returning csview
+```c
+csview cstr_slice(const cstr* self, intptr_t pos1, intptr_t pos2);
+csview cstr_slice_ex(const cstr* self, intptr_t pos1, intptr_t pos2); // see csview_slice_ex()
+csview cstr_substr(const cstr* self, intptr_t pos, intptr_t n);
+csview cstr_substr_ex(const cstr* self, intptr_t pos, intptr_t n); // see csview_substr_ex()
+csview cstr_u8_substr(const cstr* self, intptr_t bytepos, intptr_t u8len);
```
#### Iterate tokens with *c_fortoken*, *c_fortoken_sv*
@@ -93,46 +85,46 @@ c_fortoken (i, "hello, one, two, three", ", ")
#### Helper methods
```c
-int csview_cmp(const csview* x, const csview* y);
-int csview_icmp(const csview* x, const csview* y);
-bool csview_eq(const csview* x, const csview* y);
-uint64_t csview_hash(const csview* x);
+int csview_cmp(const csview* x, const csview* y);
+int csview_icmp(const csview* x, const csview* y);
+bool csview_eq(const csview* x, const csview* y);
+uint64_t csview_hash(const csview* x);
```
## Types
| Type name | Type definition | Used to represent... |
|:----------------|:-------------------------------------------|:-------------------------|
-| `csview` | `struct { const char *str; intptr_t size; }` | The string view type |
-| `csview_value` | `char` | The string element type |
-| `csview_iter` | `struct { csview_value *ref; }` | UTF8 iterator |
+| `csview` | `struct { const char *buf; intptr_t size; }` | The string view type |
+| `csview_value` | `const char` | The string element type |
+| `csview_iter` | `union { csview_value *ref; csview chr; }` | UTF8 iterator |
## Constants and macros
| Name | Value | Usage |
|:---------------|:---------------------|:---------------------------------------------|
-| `csview_NULL` | same as `c_sv("")` | `sview = csview_NULL;` |
| `c_SV(sv)` | printf argument | `printf("sv: %.*s\n", c_SV(sv));` |
## Example
```c
+#define i_implement
#include <stc/cstr.h>
#include <stc/csview.h>
-int main ()
+int main(void)
{
- cstr str1 = cstr_lit("We think in generalities, but we live in details.");
+ cstr str1 = cstr_from("We think in generalities, but we live in details.");
// (quoting Alfred N. Whitehead)
- csview sv1 = cstr_substr(&str1, 3, 5); // "think"
- intptr_t pos = cstr_find(&str1, "live"); // position of "live" in str1
- csview sv2 = cstr_substr(&str1, pos, 4); // get "live"
- csview sv3 = cstr_slice(&str1, -8, -1); // get "details"
+ csview ss1 = cstr_substr_ex(&str1, 3, 5); // "think"
+ intptr_t pos = cstr_find(&str1, "live"); // position of "live" in str1
+ csview ss2 = cstr_substr_ex(&str1, pos, 4); // get "live"
+ csview ss3 = cstr_slice_ex(&str1, -8, -1); // get "details"
printf("%.*s %.*s %.*s\n",
- c_SV(sv1), c_SV(sv2), c_SV(sv3));
+ c_SV(ss1), c_SV(ss2), c_SV(ss3));
cstr s1 = cstr_lit("Apples are red");
- cstr s2 = cstr_from_sv(cstr_substr(&s1, -3, 3)); // "red"
- cstr s3 = cstr_from_sv(cstr_substr(&s1, 0, 6)); // "Apples"
+ cstr s2 = cstr_from_sv(cstr_substr_ex(&s1, -3, 3)); // "red"
+ cstr s3 = cstr_from_sv(cstr_substr_ex(&s1, 0, 6)); // "Apples"
printf("%s %s\n", cstr_str(&s2), cstr_str(&s3));
c_drop(cstr, &str1, &s1, &s2, &s3);
@@ -146,10 +138,10 @@ red Apples
### Example 2: UTF8 handling
```c
+#define i_import // include dependent cstr, utf8 and cregex function definitions.
#include <stc/cstr.h>
-#include <stc/csview.h>
-int main()
+int main(void)
{
cstr s1 = cstr_lit("hell😀 w😀rld");
@@ -157,7 +149,7 @@ int main()
printf("%s\n", cstr_str(&s1));
c_foreach (i, cstr, s1)
- printf("%.*s,", c_SV(i.u8.chr));
+ printf("%.*s,", c_SV(i.chr));
cstr_drop(&s1);
}
@@ -181,9 +173,9 @@ void print_split(csview input, const char* sep)
printf("[%.*s]\n", c_SV(i.token));
puts("");
}
-
+#define i_implement
#include <stc/cstr.h>
-#define i_val_str
+#define i_key_str
#include <stc/cstack.h>
cstack_str string_split(csview input, const char* sep)
@@ -196,7 +188,7 @@ cstack_str string_split(csview input, const char* sep)
return out;
}
-int main()
+int main(void)
{
print_split(c_sv("//This is a//double-slash//separated//string"), "//");
print_split(c_sv("This has no matching separator"), "xx");