summaryrefslogtreecommitdiffhomepage
path: root/docs/csubstr_api.md
blob: 925c69db7c7fdaa853288e6c5888de231270c533 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# STC [csubstr](../include/stc/csubstr.h): String View
![String](pics/string.jpg)

The type **csubstr** is a string view and can refer to a constant contiguous sequence of char-elements with the first
element of the sequence at position zero. The implementation holds two members: a pointer to constant char and a size.

**csubstr** is non-null terminated, and therefore not a replacent for `const char*` - see [csview](csview_api.md) for
that. **csubstr** never allocates memory, and therefore need not be destructed.
Its lifetime is limited by the source string storage. It keeps the length of the string, and does not need to call
*strlen()* to acquire the length.

Note: a **csubstr** may ***not be null-terminated***, and must therefore be printed this way:
```c
printf("%.*s", c_SS(sstr))
```

See the c++ class [std::basic_string_view](https://en.cppreference.com/w/cpp/string/basic_string_view) for a functional
description.

## Header file

All csubstr definitions and prototypes are available by including a single header file.

```c
#define i_implement
#include <stc/cstr.h>
#include <stc/csubstr.h> // after cstr.h: include extra cstr-csubstr functions
```
## Methods

```c
csubstr         c_ss(const char literal_only[]);                        // construct from literal, no strlen()
csubstr         c_ss(const char* str, intptr_t n);                      // construct from str and length n
csubstr         csubstr_from(const char* str);                           // construct from const char*
csubstr         csubstr_from_n(const char* str, intptr_t n);             // alias for c_ss(str, n)

intptr_t        csubstr_size(csubstr sv);
bool            csubstr_empty(csubstr sv);
void            csubstr_clear(csubstr* self);

bool            csubstr_equals(csubstr sv, csubstr sv2);
intptr_t        csubstr_find(csubstr sv, const char* str);
intptr_t        csubstr_find_ss(csubstr sv, csubstr find);
bool            csubstr_contains(csubstr sv, const char* str);
bool            csubstr_starts_with(csubstr sv, const char* str);
bool            csubstr_ends_with(csubstr sv, const char* str);

csubstr         csubstr_substr_ex(csubstr sv, intptr_t pos, intptr_t n);  // negative pos count from end
csubstr         csubstr_slice_ex(csubstr sv, intptr_t p1, intptr_t p2);   // negative p1, p2 count from end
csubstr         csubstr_token(csubstr sv, const char* sep, intptr_t* start); // *start > sv.size after last token
```

#### UTF8 methods
```c
intptr_t        csubstr_u8_size(csubstr sv);
csubstr         csubstr_u8_substr(csubstr sv, intptr_t bytepos, intptr_t u8len);
bool            csubstr_valid_utf8(csubstr sv);                           // requires linking with src/utf8code.c

csubstr_iter    csubstr_begin(const csubstr* self);
csubstr_iter    csubstr_end(const csubstr* self);
void            csubstr_next(csubstr_iter* it);                           // utf8 codepoint step, not byte!
csubstr_iter    csubstr_advance(csubstr_iter it, intptr_t n);
```

#### Extended cstr methods
```c
csubstr         cstr_substr(const cstr* self, intptr_t pos, intptr_t n);
csubstr         cstr_substr_ex(const cstr* s, intptr_t pos, intptr_t n); // negative pos count from end
csubstr         cstr_u8_substr(const cstr* self, intptr_t bytepos, intptr_t u8len);

csubstr         cstr_slice(const cstr* self, intptr_t p1, intptr_t p2);
csubstr         cstr_slice_ex(const cstr* s, intptr_t p, intptr_t q);    // negative p or q count from end
```
#### Iterate tokens with *c_fortoken*, *c_fortoken_ss*

To iterate tokens in an input string separated by a string:
```c
c_fortoken (i, "hello, one, two, three", ", ")
    printf("token: %.*s\n", c_SS(i.token));
```

#### Helper methods
```c
int             csubstr_cmp(const csubstr* x, const csubstr* y);
int             csubstr_icmp(const csubstr* x, const csubstr* y);
bool            csubstr_eq(const csubstr* x, const csubstr* y);
uint64_t        csubstr_hash(const csubstr* x);
```

## Types

| Type name       | Type definition                            | Used to represent...     |
|:----------------|:-------------------------------------------|:-------------------------|
| `csubstr`        | `struct { const char *str; intptr_t size; }` | The string view type     |
| `csubstr_value`  | `char`                                     | The string element type  |
| `csubstr_iter`   | `struct { csubstr_value *ref; }`            | UTF8 iterator            |

## Constants and macros

| Name           | Value                | Usage                                        |
|:---------------|:---------------------|:---------------------------------------------|
| `c_SS(sv)`     | printf argument      | `printf("sv: %.*s\n", c_SS(sv));`            |

## Example
```c
#define i_implement
#include <stc/cstr.h>
#include <stc/csubstr.h>

int main(void)
{
    cstr str1 = cstr_lit("We think in generalities, but we live in details.");
                                                            // (quoting Alfred N. Whitehead)

    csubstr sv1 = cstr_substr_ex(&str1, 3, 5);               // "think"
    intptr_t pos = cstr_find(&str1, "live");                // position of "live" in str1
    csubstr sv2 = cstr_substr_ex(&str1, pos, 4);             // get "live"
    csubstr sv3 = cstr_slice_ex(&str1, -8, -1);              // get "details"
    printf("%.*s %.*s %.*s\n",
        c_SS(sv1), c_SS(sv2), c_SS(sv3));
    cstr s1 = cstr_lit("Apples are red");
    cstr s2 = cstr_from_ss(cstr_substr_ex(&s1, -3, 3));     // "red"
    cstr s3 = cstr_from_ss(cstr_substr_ex(&s1, 0, 6));      // "Apples"
    printf("%s %s\n", cstr_str(&s2), cstr_str(&s3));

    c_drop(cstr, &str1, &s1, &s2, &s3);
}
```
Output:
```
think live details
red Apples
```

### Example 2: UTF8 handling
```c
#define i_import // include dependent cstr, utf8 and cregex function definitions.
#include <stc/cstr.h>

int main(void)
{
    cstr s1 = cstr_lit("hell😀 w😀rld");

    cstr_u8_replace_at(&s1, cstr_find(&s1, "😀rld"), 1, c_ss("ø"));
    printf("%s\n", cstr_str(&s1));

    c_foreach (i, cstr, s1)
        printf("%.*s,", c_SS(i.u8.chr));

    cstr_drop(&s1);
}
```
Output:
```
hell😀 wørld
h,e,l,l,😀, ,w,ø,r,l,d,
```

### Example 3: csubstr tokenizer (string split)
Splits strings into tokens. *print_split()* makes **no** memory allocations or *strlen()* calls,
and does not depend on null-terminated strings. *string_split()* function returns a vector of cstr.
```c
#include <stdio.h>
#include <stc/csubstr.h>

void print_split(csubstr input, const char* sep)
{
    c_fortoken_ss (i, input, sep)
        printf("[%.*s]\n", c_SS(i.token));
    puts("");
}
#define i_implement
#include <stc/cstr.h>
#define i_key_str
#include <stc/cstack.h>

cstack_str string_split(csubstr input, const char* sep)
{
    cstack_str out = cstack_str_init();
    
    c_fortoken_ss (i, input, sep)
        cstack_str_push(&out, cstr_from_ss(i.token));

    return out;
}

int main(void)
{
    print_split(c_ss("//This is a//double-slash//separated//string"), "//");
    print_split(c_ss("This has no matching separator"), "xx");

    cstack_str s = string_split(c_ss("Split,this,,string,now,"), ",");

    c_foreach (i, cstack_str, s)
        printf("[%s]\n", cstr_str(i.ref));
    puts("");

    cstack_str_drop(&s);
}
```
Output:
```
[]
[This is a]
[double-slash]
[separated]
[string]

[This has no matching separator]

[Split]
[this]
[]
[string]
[now]
[]
```