summaryrefslogtreecommitdiffhomepage
path: root/src/utf8code.c
diff options
context:
space:
mode:
authorTyge Lovset <[email protected]>2022-07-20 23:13:11 +0200
committerTyge Lovset <[email protected]>2022-07-20 23:13:11 +0200
commit3f89c290700618eae78eaa289bdb88d1cfb3514d (patch)
tree32dd44f8ca210aee69039f8779be611f7885cd5b /src/utf8code.c
parent78cb61301df13fee995d3afd1bd1d8d63310d819 (diff)
downloadSTC-modified-3f89c290700618eae78eaa289bdb88d1cfb3514d.tar.gz
STC-modified-3f89c290700618eae78eaa289bdb88d1cfb3514d.zip
Added cregex_replace*() [implemented in utf8code.c]. Added examples/regex_replace.c. Docs not ready, i.e. API not fixed. Some other refactoring and minor fixes/improvements. cstr_assign_sv() now returns char* like the other cstr_assign*().
Diffstat (limited to 'src/utf8code.c')
-rw-r--r--src/utf8code.c75
1 files changed, 74 insertions, 1 deletions
diff --git a/src/utf8code.c b/src/utf8code.c
index dff10409..44120cee 100644
--- a/src/utf8code.c
+++ b/src/utf8code.c
@@ -1,6 +1,7 @@
#include <ctype.h>
#define i_header
#include <stc/cstr.h>
+#include <stc/cregex.h>
#include "utf8tabs.inc"
const uint8_t utf8_dtab[] = {
@@ -16,7 +17,7 @@ const uint8_t utf8_dtab[] = {
12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
- 12,36,12,12,12,12,12,12,12,12,12,12,
+ 12,36,12,12,12,12,12,12,12,12,12,12,
};
unsigned utf8_encode(char *out, uint32_t c)
@@ -220,3 +221,75 @@ void cstr_lowercase(cstr* self) {
void cstr_uppercase(cstr* self) {
cstr_take(self, cstr_tocase(self, fn_toupper));
}
+
+
+void cregex_build_replace(const char* repl, unsigned nmatch, const csview match[],
+ cstr (*mfun)(int i, csview match), cstr* sub) {
+ cstr_clear(sub);
+ unsigned len = 0, cap = cstr_capacity(*sub);
+ char* dst = cstr_data(sub);
+
+ while (*repl != '\0') {
+ if (*repl == '\\') {
+ const char num = *++repl;
+ int i;
+ switch (num) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ i = num - '0';
+ if (i < nmatch) {
+ csview m;
+ cstr s = cstr_null;
+ if (mfun) { s = mfun(i, match[i]); m = cstr_sv(&s); }
+ else m = match[i];
+ if (len + m.size >= cap)
+ dst = cstr_reserve(sub, cap = cap*3/2 + m.size);
+ for (const char* rp = m.str; rp != (m.str + m.size); ++rp)
+ dst[len++] = *rp;
+ cstr_drop(&s);
+ }
+ ++repl;
+ case '\0':
+ continue;
+ }
+ }
+ if (len == cap)
+ dst = cstr_reserve(sub, cap = cap*3/2 + 4);
+ dst[len++] = *repl++;
+ }
+ _cstr_set_size(sub, len);
+}
+
+
+cstr cregex_replace_re(const char* input, const cregex* re, const char* repl,
+ cstr (*mfun)(int i, csview match), int cflags, unsigned count) {
+ cstr out = cstr_null;
+ cstr sub = cstr_null;
+ size_t from = 0;
+ csview match[cregex_MAXCAPTURES];
+ unsigned nmatch = cregex_captures(re);
+ if (!count) count = ~0;
+
+ while (count-- && cregex_match(re, input + from, nmatch, match, 0) > 0) {
+ cregex_build_replace(repl, nmatch, match, mfun, &sub);
+ const size_t pos = match[0].str - input;
+ cstr_append_n(&out, input + from, pos - from);
+ cstr_append_s(&out, sub);
+ from = pos + match[0].size;
+ }
+ cstr_append(&out, input + from);
+ cstr_drop(&sub);
+ return out;
+}
+
+
+cstr cregex_replace_fn(const char* input, const char* pattern, const char* repl,
+ cstr (*mfun)(int i, csview match), int cflags, unsigned count) {
+ cregex re = cregex_init();
+ int res = cregex_compile(&re, pattern, cflags);
+ if (res < 0)
+ return cstr_new("[[cregex_replace_fn]]: invalid pattern");
+ cstr out = cregex_replace_re(input, &re, repl, mfun, cflags, count);
+ cregex_drop(&re);
+ return out;
+}