summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorTyge Løvset <[email protected]>2022-07-30 10:37:50 +0200
committerTyge Løvset <[email protected]>2022-07-30 10:37:50 +0200
commit90b4371d8a36ab609c2200289634c171a19f7fff (patch)
tree0db596e22fe671ad6f55dfde427a92eed9941c2a
parent8329299b810b20cb90c2e3aa7ea465dcefa162aa (diff)
downloadSTC-modified-90b4371d8a36ab609c2200289634c171a19f7fff.tar.gz
STC-modified-90b4371d8a36ab609c2200289634c171a19f7fff.zip
Added support for more than 9 capture group replacements, e.g. "$12;". Added an inline function cregex_find_sv().
-rw-r--r--docs/cregex_api.md4
-rw-r--r--include/stc/cregex.h5
-rw-r--r--src/cregex.c32
3 files changed, 25 insertions, 16 deletions
diff --git a/docs/cregex_api.md b/docs/cregex_api.md
index fc34e0a8..a71ae31b 100644
--- a/docs/cregex_api.md
+++ b/docs/cregex_api.md
@@ -33,6 +33,7 @@ int cregex_captures(const cregex* self);
// return 1=match, 0=nomatch, -1=error. match array size: at least num groups in RE (1+).
int cregex_find(const char* input, const cregex* re, csview match[], int mflags);
+int cregex_find_sv(csview input, const cregex* re, csview match[]);
// takes string pattern instead of re. (for one-time matches)
int cregex_find_p(const char* input, const char* pattern, csview match[], int cmflags);
@@ -173,7 +174,8 @@ In order to use a callback function in the replace call, see `examples/regex_rep
| [[:punct:]] [[:space:]] [[:upper:]] | Match ASCII character class | * |
| [[:xdigit:]] [[:word:]] | Match ASCII character class | * |
| [[:^***class***:]] | Do not match ASCII character class | * |
-| $***n*** | *n*-th substitution backreference to capture group. *n* in 0-9. $0 is the entire match.
+| $***n*** | *n*-th substitution backreference to capture group. ***n*** in 0-9. $0 is the entire match. | * |
+| $***nn***; | As above, but can handle ***nn*** < cre_MAXCAPTURES. | * |
## Limitations
diff --git a/include/stc/cregex.h b/include/stc/cregex.h
index 4247197c..8f6464d4 100644
--- a/include/stc/cregex.h
+++ b/include/stc/cregex.h
@@ -94,6 +94,11 @@ int cregex_captures(const cregex* self);
/* return 1 on match, 0 on nomatch, and -1 on failure. */
int cregex_find(const char* input, const cregex* re,
csview match[], int mflags);
+static inline
+int cregex_find_sv(csview input, const cregex* re, csview match[]) {
+ match[0] = input;
+ return cregex_find(input.str, re, match, cre_m_startend);
+}
/* match + compile RE pattern */
int cregex_find_p(const char* input, const char* pattern,
diff --git a/src/cregex.c b/src/cregex.c
index c50e870f..612a6965 100644
--- a/src/cregex.c
+++ b/src/cregex.c
@@ -1154,25 +1154,27 @@ regexec(const Reprog *progp, /* program to run */
static void
build_subst_string(const char* replace, unsigned nmatch, const csview match[],
bool (*mfun)(int i, csview match, cstr* mstr), cstr* subst) {
- cstr_clear(subst);
- unsigned len = 0, cap = cstr_capacity(*subst);
- char* dst = cstr_data(subst);
+ cstr_buf buf = cstr_buffer(subst);
+ unsigned len = 0, cap = buf.cap;
+ char* dst = buf.data;
cstr mstr = cstr_null;
while (*replace != '\0') {
if (*replace == '$') {
- const char num = *++replace;
- int i;
- switch (num) {
+ const int arg = *++replace;
+ int g;
+ switch (arg) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
- i = num - '0';
- if (i < nmatch) {
- csview m = mfun && mfun(i, match[i], &mstr) ? cstr_sv(&mstr) : match[i];
- if (len + m.size >= cap)
+ g = arg - '0';
+ if (replace[1] >= '0' && replace[1] <= '9' && replace[2] == ';')
+ { g = g*10 + (replace[1] - '0'); replace += 2; }
+ if (g < nmatch) {
+ csview m = mfun && mfun(g, match[g], &mstr) ? cstr_sv(&mstr) : match[g];
+ if (len + m.size > cap)
dst = cstr_reserve(subst, cap = cap*3/2 + m.size);
- for (const char* rp = m.str; rp != (m.str + m.size); ++rp)
- dst[len++] = *rp;
+ for (unsigned i = 0; i < m.size; ++i)
+ dst[len++] = m.str[i];
}
++replace;
case '\0':
@@ -1220,7 +1222,7 @@ int cregex_find_p(const char* input, const char* pattern,
csview match[], int cmflags) {
cregex re = cregex_init();
int res = cregex_compile(&re, pattern, cmflags);
- if (res < 0) return res;
+ if (res != cre_success) return res;
res = cregex_find(input, &re, match, cmflags);
cregex_drop(&re);
return res;
@@ -1237,7 +1239,7 @@ cregex_replace_re(const char* input, const cregex* re, const char* replace,
if (!count) count = ~0;
bool copy = !(rflags & cre_r_strip);
- while (count-- && cregex_find(input + from, re, match, 0) == 1) {
+ while (count-- && cregex_find(input + from, re, match, 0) == cre_success) {
build_subst_string(replace, nmatch, match, mfun, &subst);
const size_t pos = match[0].str - input;
if (copy) cstr_append_n(&out, input + from, pos - from);
@@ -1254,7 +1256,7 @@ cregex_replace_pe(const char* input, const char* pattern, const char* replace,
bool (*mfun)(int i, csview match, cstr* mstr), unsigned count, int crflags) {
cregex re = cregex_init();
int res = cregex_compile(&re, pattern, crflags);
- if (res < 0)
+ if (res != cre_success)
return cstr_new("[[error: invalid regex pattern]]");
cstr out = cregex_replace_re(input, &re, replace, mfun, count, crflags);
cregex_drop(&re);