5 files changed, 24 insertions, 20 deletions
diff --git a/examples/regex_match.c b/examples/regex_match.c
index e60fd519..05161b90 100644
--- a/examples/regex_match.c
+++ b/examples/regex_match.c
@@ -24,6 +24,14 @@ int main()
             printf("%" c_PRIsv " ; ", c_ARGsv(m[0]));
         }
         puts("");
+
+        res = cregex_compile(&re, "(.+)\\b(.+)", 0);
+        printf("groups: %d\n", res);
+        if ((res = cregex_find(&re, "hello@wørld", 10, m, 0)) > 0) {
+            c_forrange (i, res) 
+                printf("match: [%" c_PRIsv "]\n", c_ARGsv(m[i]));
+        } else
+            printf("err: %d\n", res);
     }
 }
 
diff --git a/include/stc/cstr.h b/include/stc/cstr.h
index 8116fce2..9c2c9c0c 100644
--- a/include/stc/cstr.h
+++ b/include/stc/cstr.h
@@ -171,7 +171,7 @@ STC_INLINE size_t cstr_capacity(cstr s)
 extern cstr cstr_tofold(const cstr* self);
 extern cstr cstr_tolower(const cstr* self);
 extern cstr cstr_toupper(const cstr* self);
-extern void cstr_foldcase(cstr* self);
+extern void cstr_casefold(cstr* self);
 extern void cstr_lowercase(cstr* self);
 extern void cstr_uppercase(cstr* self);
 
diff --git a/include/stc/utf8.h b/include/stc/utf8.h
index fb06de62..41d2f315 100644
--- a/include/stc/utf8.h
+++ b/include/stc/utf8.h
@@ -36,7 +36,6 @@ bool        utf8_isalnum(uint32_t c);
 uint32_t    utf8_casefold(uint32_t c);
 uint32_t    utf8_tolower(uint32_t c);
 uint32_t    utf8_toupper(uint32_t c);
-bool        utf8_valid(const char* s);
 bool        utf8_valid_n(const char* s, size_t nbytes);
 int         utf8_icmp_n(size_t u8max, const char* s1, size_t n1,
                                       const char* s2, size_t n2);
@@ -59,7 +58,11 @@ STC_INLINE int utf8_icmp(const char* s1, const char* s2) {
     return utf8_icmp_n(~(size_t)0, s1, ~(size_t)0, s2, ~(size_t)0);
 }
 
-/* number of characters in the utf8 codepoint from s */
+STC_INLINE bool utf8_valid(const char* s) {
+    return utf8_valid_n(s, ~(size_t)0);
+}
+
+/* number of bytes in the utf8 codepoint from s */
 STC_INLINE unsigned utf8_chr_size(const char *s) {
     unsigned b = (uint8_t)*s;
     if (b < 0x80) return 1;
diff --git a/src/cregex.c b/src/cregex.c
index 575f995c..69fc6cbb 100644
--- a/src/cregex.c
+++ b/src/cregex.c
@@ -210,10 +210,10 @@ static const char*
 utfruneicase(const char *s, Rune c)
 {
     Rune r;
-    c = utf8_tolower(c);
+    c = utf8_casefold(c);
     for (;;) {
         int n = chartorune(&r, s);
-        if (utf8_tolower(r) == c) return s;
+        if (utf8_casefold(r) == c) return s;
         if ((r == 0) | (n == 0)) return NULL;
         s += n;
     }
@@ -793,17 +793,17 @@ bldcclass(Parser *par)
 }
 
 static Reprog*
-regcomp1(Parser *par, const char *s, int cflags)
+regcomp1(Reprog *progp, Parser *par, const char *s, int cflags)
 {
     Token token;
     Reprog *volatile pp;
 
     /* get memory for the program. estimated max usage */
     const int instcap = 5 + 6*strlen(s);
-    pp = (Reprog *)malloc(sizeof(Reprog) + instcap*sizeof(Reinst));
+    pp = (Reprog *)realloc(progp, sizeof(Reprog) + instcap*sizeof(Reinst));
     if (pp == NULL) {
+        pp = progp;
         rcerror(par, creg_outofmemory);
-        return NULL;
     }
     pp->flags.caseless = (cflags & creg_caseless) != 0;
     pp->flags.dotall = (cflags & creg_dotall) != 0;
@@ -918,7 +918,7 @@ runematch(Rune s, Rune r, bool icase)
     case UTF_XD: inv = 1;
     case UTF_xd: return inv ^ utf8_isxdigit(r);
     }
-    return icase ? utf8_tolower(s) == utf8_tolower(r) : s == r;
+    return icase ? utf8_casefold(s) == utf8_casefold(r) : s == r;
 }
 
 /*
@@ -1033,8 +1033,8 @@ regexec1(const Reprog *progp,    /* program to run */
                 case NWBOUND:
                     ok = true;
                 case WBOUND: /* fallthrough */
-                    if (ok ^ (s == bol || s == j->eol || ((utf8_isalnum(s[-1]) || s[-1] == '_')
-                                                        ^ (utf8_isalnum(s[ 0]) || s[ 0] == '_'))))
+                    if (ok ^ (s == bol || s == j->eol || ((utf8_isalnum(utf8_peek(s, -1)) || s[-1] == '_')
+                                                        ^ (utf8_isalnum(utf8_peek(s, 0)) || s[0] == '_'))))
                         continue;
                     break;
                 case NCCLASS:
@@ -1205,7 +1205,7 @@ void cregex_replace(
 
 int cregex_compile(cregex *rx, const char* pattern, int cflags) {
     Parser par;
-    rx->prog = regcomp1(&par, pattern, cflags);
+    rx->prog = regcomp1(rx->prog, &par, pattern, cflags);
     if (rx->prog)
         return 1 + rx->prog->nsubids;
     return par.errors;
diff --git a/src/utf8code.c b/src/utf8code.c
index f64ede70..dff10409 100644
--- a/src/utf8code.c
+++ b/src/utf8code.c
@@ -56,13 +56,6 @@ uint32_t utf8_peek(const char* s, int pos) {
     return d.codep;
 }
 
-bool utf8_valid(const char* s) {
-    utf8_decode_t d = {.state=0};
-    while (*s)
-        utf8_decode(&d, (uint8_t)*s++);
-    return d.state == 0;
-}
-
 bool utf8_valid_n(const char* s, size_t nbytes) {
     utf8_decode_t d = {.state=0};
     while ((nbytes-- != 0) & (*s != 0))
@@ -216,7 +209,7 @@ cstr cstr_toupper(const cstr* self) {
     return cstr_tocase(self, fn_toupper);
 }
 
-void cstr_foldcase(cstr* self) {
+void cstr_casefold(cstr* self) {
     cstr_take(self, cstr_tocase(self, fn_tofold));
 }