summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--examples/regex_match.c8
-rw-r--r--include/stc/cstr.h2
-rw-r--r--include/stc/utf8.h7
-rw-r--r--src/cregex.c18
-rw-r--r--src/utf8code.c9
5 files changed, 24 insertions, 20 deletions
diff --git a/examples/regex_match.c b/examples/regex_match.c
index e60fd519..05161b90 100644
--- a/examples/regex_match.c
+++ b/examples/regex_match.c
@@ -24,6 +24,14 @@ int main()
printf("%" c_PRIsv " ; ", c_ARGsv(m[0]));
}
puts("");
+
+ res = cregex_compile(&re, "(.+)\\b(.+)", 0);
+ printf("groups: %d\n", res);
+ if ((res = cregex_find(&re, "hello@wørld", 10, m, 0)) > 0) {
+ c_forrange (i, res)
+ printf("match: [%" c_PRIsv "]\n", c_ARGsv(m[i]));
+ } else
+ printf("err: %d\n", res);
}
}
diff --git a/include/stc/cstr.h b/include/stc/cstr.h
index 8116fce2..9c2c9c0c 100644
--- a/include/stc/cstr.h
+++ b/include/stc/cstr.h
@@ -171,7 +171,7 @@ STC_INLINE size_t cstr_capacity(cstr s)
extern cstr cstr_tofold(const cstr* self);
extern cstr cstr_tolower(const cstr* self);
extern cstr cstr_toupper(const cstr* self);
-extern void cstr_foldcase(cstr* self);
+extern void cstr_casefold(cstr* self);
extern void cstr_lowercase(cstr* self);
extern void cstr_uppercase(cstr* self);
diff --git a/include/stc/utf8.h b/include/stc/utf8.h
index fb06de62..41d2f315 100644
--- a/include/stc/utf8.h
+++ b/include/stc/utf8.h
@@ -36,7 +36,6 @@ bool utf8_isalnum(uint32_t c);
uint32_t utf8_casefold(uint32_t c);
uint32_t utf8_tolower(uint32_t c);
uint32_t utf8_toupper(uint32_t c);
-bool utf8_valid(const char* s);
bool utf8_valid_n(const char* s, size_t nbytes);
int utf8_icmp_n(size_t u8max, const char* s1, size_t n1,
const char* s2, size_t n2);
@@ -59,7 +58,11 @@ STC_INLINE int utf8_icmp(const char* s1, const char* s2) {
return utf8_icmp_n(~(size_t)0, s1, ~(size_t)0, s2, ~(size_t)0);
}
-/* number of characters in the utf8 codepoint from s */
+STC_INLINE bool utf8_valid(const char* s) {
+ return utf8_valid_n(s, ~(size_t)0);
+}
+
+/* number of bytes in the utf8 codepoint from s */
STC_INLINE unsigned utf8_chr_size(const char *s) {
unsigned b = (uint8_t)*s;
if (b < 0x80) return 1;
diff --git a/src/cregex.c b/src/cregex.c
index 575f995c..69fc6cbb 100644
--- a/src/cregex.c
+++ b/src/cregex.c
@@ -210,10 +210,10 @@ static const char*
utfruneicase(const char *s, Rune c)
{
Rune r;
- c = utf8_tolower(c);
+ c = utf8_casefold(c);
for (;;) {
int n = chartorune(&r, s);
- if (utf8_tolower(r) == c) return s;
+ if (utf8_casefold(r) == c) return s;
if ((r == 0) | (n == 0)) return NULL;
s += n;
}
@@ -793,17 +793,17 @@ bldcclass(Parser *par)
}
static Reprog*
-regcomp1(Parser *par, const char *s, int cflags)
+regcomp1(Reprog *progp, Parser *par, const char *s, int cflags)
{
Token token;
Reprog *volatile pp;
/* get memory for the program. estimated max usage */
const int instcap = 5 + 6*strlen(s);
- pp = (Reprog *)malloc(sizeof(Reprog) + instcap*sizeof(Reinst));
+ pp = (Reprog *)realloc(progp, sizeof(Reprog) + instcap*sizeof(Reinst));
if (pp == NULL) {
+ pp = progp;
rcerror(par, creg_outofmemory);
- return NULL;
}
pp->flags.caseless = (cflags & creg_caseless) != 0;
pp->flags.dotall = (cflags & creg_dotall) != 0;
@@ -918,7 +918,7 @@ runematch(Rune s, Rune r, bool icase)
case UTF_XD: inv = 1;
case UTF_xd: return inv ^ utf8_isxdigit(r);
}
- return icase ? utf8_tolower(s) == utf8_tolower(r) : s == r;
+ return icase ? utf8_casefold(s) == utf8_casefold(r) : s == r;
}
/*
@@ -1033,8 +1033,8 @@ regexec1(const Reprog *progp, /* program to run */
case NWBOUND:
ok = true;
case WBOUND: /* fallthrough */
- if (ok ^ (s == bol || s == j->eol || ((utf8_isalnum(s[-1]) || s[-1] == '_')
- ^ (utf8_isalnum(s[ 0]) || s[ 0] == '_'))))
+ if (ok ^ (s == bol || s == j->eol || ((utf8_isalnum(utf8_peek(s, -1)) || s[-1] == '_')
+ ^ (utf8_isalnum(utf8_peek(s, 0)) || s[0] == '_'))))
continue;
break;
case NCCLASS:
@@ -1205,7 +1205,7 @@ void cregex_replace(
int cregex_compile(cregex *rx, const char* pattern, int cflags) {
Parser par;
- rx->prog = regcomp1(&par, pattern, cflags);
+ rx->prog = regcomp1(rx->prog, &par, pattern, cflags);
if (rx->prog)
return 1 + rx->prog->nsubids;
return par.errors;
diff --git a/src/utf8code.c b/src/utf8code.c
index f64ede70..dff10409 100644
--- a/src/utf8code.c
+++ b/src/utf8code.c
@@ -56,13 +56,6 @@ uint32_t utf8_peek(const char* s, int pos) {
return d.codep;
}
-bool utf8_valid(const char* s) {
- utf8_decode_t d = {.state=0};
- while (*s)
- utf8_decode(&d, (uint8_t)*s++);
- return d.state == 0;
-}
-
bool utf8_valid_n(const char* s, size_t nbytes) {
utf8_decode_t d = {.state=0};
while ((nbytes-- != 0) & (*s != 0))
@@ -216,7 +209,7 @@ cstr cstr_toupper(const cstr* self) {
return cstr_tocase(self, fn_toupper);
}
-void cstr_foldcase(cstr* self) {
+void cstr_casefold(cstr* self) {
cstr_take(self, cstr_tocase(self, fn_tofold));
}