summaryrefslogtreecommitdiffhomepage
path: root/docs/csview_api.md
blob: 4085a7081cc9451d4760b692990bd9bf17d109b0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# STC [csview](../include/stc/csview.h): String View
![String](pics/string.jpg)

The type **csview** is a string view and can refer to a constant contiguous sequence of char-elements with the first
element of the sequence at position zero. The implementation holds two members: a pointer to constant char and a size.

**csview** is an efficient replacent for `const char*`. It never allocates memory, and therefore need not be destructed.
Its lifetime is limited by the source string storage. It keeps the length of the string, and does not call *strlen()*
when passing it around. It is faster when using`csview` as convertion type (raw) than `const char*` in associative
containers with cstr keys.

Note: a **csview** may ***not be null-terminated***, and must therefore be printed like: 
`printf("%.*s", csview_ARG(sv))`.

See the c++ class [std::basic_string_view](https://en.cppreference.com/w/cpp/string/basic_string_view) for a functional
description.

## Header file

All csview definitions and prototypes are available by including a single header file.

```c
#include <stc/cstr.h> // optional, include cstr+csview functionality
#include <stc/csview.h>
```
## Methods

```c
csview          c_SV(const char literal_only[]);                         // construct from literal, no strlen()
csview          c_SV(const char* str, size_t n);                         // construct from str and length n
csview          csview_lit(const char literal_only[]);                   // alias for c_SV(lit)
csview          csview_from(const char* str);                            // construct from const char*
csview          csview_from_n(const char* str, size_t n);                // alias for c_SV(str, n)

size_t          csview_size(csview sv);
bool            csview_empty(csview sv);
void            csview_clear(csview* self);

bool            csview_equals(csview sv, csview sv2);
size_t          csview_find(csview sv, const char* str);
size_t          csview_find_sv(csview sv, csview find);
bool            csview_contains(csview sv, const char* str);
bool            csview_starts_with(csview sv, const char* str);
bool            csview_ends_with(csview sv, const char* str);

csview          csview_substr_ex(csview sv, intptr_t pos, size_t n);     // negative pos count from end
csview          csview_slice_ex(csview sv, intptr_t p1, intptr_t p2);    // negative p1, p2 count from end
csview          csview_token(csview sv, const char* sep, size_t* start); // *start > sv.size after last token
```

#### UTF8 methods
```c
size_t          csview_u8_size(csview sv);
csview          csview_u8_substr(csview sv, size_t bytepos, size_t u8len);
bool            csview_valid_utf8(csview sv);                            // requires linking with src/utf8code.c

csview_iter     csview_begin(const csview* self);
csview_iter     csview_end(const csview* self);
void            csview_next(csview_iter* it);                            // utf8 codepoint step, not byte!
csview_iter     csview_advance(csview_iter it, intptr_t n);

// from utf8.h
size_t          utf8_size(const char *s);
size_t          utf8_size_n(const char *s, size_t nbytes);               // number of UTF8 codepoints within n bytes
const char*     utf8_at(const char *s, size_t index);                    // from UTF8 index to char* position
size_t          utf8_pos(const char* s, size_t index);                   // from UTF8 index to byte index position
unsigned        utf8_chr_size(const char* s);                            // UTF8 character size: 1-4
// implemented in src/utf8code.c:
bool            utf8_valid(const char* s);
bool            utf8_valid_n(const char* s, size_t nbytes);
uint32_t        utf8_decode(utf8_decode_t *d, uint8_t byte);             // decode next byte to utf8, return state.
unsigned        utf8_encode(char *out, uint32_t codepoint);              // encode unicode cp into out buffer
uint32_t        utf8_peek(const char* s);                                // codepoint value of character at s
uint32_t        utf8_peek_off(const char* s, int offset);                // codepoint value at utf8 pos (may be negative)
```

#### Extended cstr methods
```c
csview          cstr_substr(const cstr* self, size_t pos, size_t n);
csview          cstr_substr_ex(const cstr* s, intptr_t pos, size_t n);   // negative pos count from end
csview          cstr_u8_substr(const cstr* self, size_t bytepos, size_t u8len);

csview          cstr_slice(const cstr* self, size_t p1, size_t p2);
csview          cstr_slice_ex(const cstr* s, intptr_t p, intptr_t q);    // negative p or q count from end
```
#### Iterate tokens with *c_FORTOKEN*, *c_FORTOKEN_SV*

To iterate tokens in an input string separated by a string:
```c
c_FORTOKEN (i, "hello, one, two, three", ", ")
    printf("token: %.*s\n", c_ARGSV(i.token));
```

#### Helper methods
```c
int             csview_cmp(const csview* x, const csview* y);
int             csview_icmp(const csview* x, const csview* y);
bool            csview_eq(const csview* x, const csview* y);
uint64_t        csview_hash(const csview* x);
```

## Types

| Type name       | Type definition                            | Used to represent...     |
|:----------------|:-------------------------------------------|:-------------------------|
| `csview`        | `struct { const char *str; size_t size; }` | The string view type     |
| `csview_value`  | `char`                                     | The string element type  |
| `csview_iter`   | `struct { csview_value *ref; }`            | UTF8 iterator            |

## Constants and macros

| Name           | Value                | Usage                                        |
|:---------------|:---------------------|:---------------------------------------------|
| `csview_NULL`  | same as `c_SV("")`   | `sview = csview_NULL;`                       |
| `c_ARGSV(sv)`  | printf argument      | `printf("sv: %.*s\n", c_ARGSV(sv));`         |

## Example
```c
#include <stc/cstr.h>
#include <stc/csview.h>

int main ()
{
    cstr str1 = cstr_lit("We think in generalities, but we live in details.");
                                                        // (quoting Alfred N. Whitehead)

    csview sv1 = cstr_substr(&str1, 3, 5);              // "think"
    size_t pos = cstr_find(&str1, "live");              // position of "live" in str1
    csview sv2 = cstr_substr(&str1, pos, 4);            // get "live"
    csview sv3 = cstr_slice(&str1, -8, -1);             // get "details"
    printf("%.*s %.*s %.*s\n",
        c_ARGSV(sv1), c_ARGSV(sv2), c_ARGSV(sv3));
    cstr s1 = cstr_lit("Apples are red");
    cstr s2 = cstr_from_sv(cstr_substr(&s1, -3, 3));    // "red"
    cstr s3 = cstr_from_sv(cstr_substr(&s1, 0, 6));     // "Apples"
    printf("%s %s\n", cstr_str(&s2), cstr_str(&s3));

    c_drop(cstr, &str1, &s1, &s2, &s3);
}
```
Output:
```
think live details
red Apples
```

### Example 2: UTF8 handling
```c
#include <stc/cstr.h>
#include <stc/csview.h>

int main()
{
    c_AUTO (cstr, s1) {
        s1 = cstr_lit("hell😀 w😀rld");
        cstr_u8_replace_at(&s1, cstr_find(&s1, "😀rld"), 1, c_SV("ø"));
        printf("%s\n", cstr_str(&s1));

        c_FOREACH (i, cstr, s1)
            printf("%.*s,", c_ARGSV(i.u8.chr));
    }
}
```
Output:
```
hell😀 wørld
h,e,l,l,😀, ,w,ø,r,l,d,
```

### Example 3: csview tokenizer (string split)
Splits strings into tokens. *print_split()* makes **no** memory allocations or *strlen()* calls,
and does not depend on null-terminated strings. *string_split()* function returns a vector of cstr.
```c
#include <stdio.h>
#include <stc/csview.h>

void print_split(csview input, const char* sep)
{
    c_FORTOKEN_SV (i, input, sep)
        printf("[%.*s]\n", c_ARGSV(i.token));
}

#include <stc/cstr.h>
#define i_val_str
#include <stc/cstack.h>

cstack_str string_split(csview input, const char* sep)
{
    cstack_str out = cstack_str_init();
    
    c_FORTOKEN_SV (i, input, sep)
        cstack_str_push(&out, cstr_from_sv(i.token));

    return out;
}

int main()
{
    print_split(c_SV("//This is a//double-slash//separated//string"), "//");
    puts("");
    print_split(c_SV("This has no matching separator"), "xx");
    puts("");

    c_WITH (cstack_str s = string_split(c_SV("Split,this,,string,now,"), ","), cstack_str_drop(&s))
        c_FOREACH (i, cstack_str, s)
            printf("[%s]\n", cstr_str(i.ref));
}
```
Output:
```
[]
[This is a]
[double-slash]
[separated]
[string]

[This has no matching separator]

[Split]
[this]
[]
[string]
[now]
[]
```