5 files changed, 43 insertions, 41 deletions
diff --git a/docs/cregex_api.md b/docs/cregex_api.md
index 689fd33d..e528c83b 100644
--- a/docs/cregex_api.md
+++ b/docs/cregex_api.md
@@ -186,17 +186,16 @@ For reference, **cregex** uses the following files:
 | \p{XDigit} | Match UTF8 hex number | * |
 | \p{Nd} or \p{Digit} | Match UTF8 decimal number | * |
 | \p{Nl} | Match UTF8 numeric letter | * |
-| \p{Ll} or \p{Lower} | Match UTF8 lower case letter | * |
-| \p{Lu} or \p{Upper} | Match UTF8 upper case letter | * |
+| \p{Ll} or \p{Lower} | Match UTF8 lowercase letter | * |
+| \p{Lu} or \p{Upper} | Match UTF8 uppercase letter | * |
 | \p{Lt} | Match UTF8 titlecase letter | * |
 | \p{L&} or \p{Alpha} | Match UTF8 cased letter | * |
 | \p{Cc} | Match UTF8 control char | * |
 | \p{Pc} | Match UTF8 connector punctuation | * |
 | \p{Pd} | Match UTF8 dash punctuation | * |
-| \p{Pf} | Match UTF8 final punctuation | * |
 | \p{Pi} | Match UTF8 initial punctuation | * |
+| \p{Pf} | Match UTF8 final punctuation | * |
 | \p{Sc} | Match UTF8 currency symbol | * |
-| \p{Sk} | Match UTF8 modifier symbol | * |
 | \p{Zl} | Match UTF8 line separator | * |
 | \p{Zp} | Match UTF8 paragraph separator | * |
 | \p{Sz} or \p{Space} | Match UTF8 whitespace separator | * |
diff --git a/include/stc/utf8.h b/include/stc/utf8.h
index b30b0061..da6643ea 100644
--- a/include/stc/utf8.h
+++ b/include/stc/utf8.h
@@ -5,7 +5,11 @@
 #include "ccommon.h"
 
 // utf8 methods defined in src/utf8code.c:
-
+enum {
+    U8G_Cc, U8G_Lt, U8G_Nd, U8G_Nl,
+    U8G_Pc, U8G_Pd, U8G_Pf, U8G_Pi,
+    U8G_Sc, U8G_Zl, U8G_Zp, U8G_Zs,
+};
 extern bool     utf8_isspace(uint32_t c);
 extern bool     utf8_isdigit(uint32_t c);
 extern bool     utf8_isxdigit(uint32_t c);
diff --git a/misc/examples/regex2.c b/misc/examples/regex2.c
index 55f8cfc2..abae5695 100644
--- a/misc/examples/regex2.c
+++ b/misc/examples/regex2.c
@@ -10,7 +10,8 @@ int main()
         {"(https?://|ftp://|www\\.)([0-9A-Za-z@:%_+~#=-]+\\.)+([a-z][a-z][a-z]?)(/[/0-9A-Za-z\\.@:%_+~#=\\?&-]*)?",
          "https://en.cppreference.com/w/cpp/regex/regex_search"
         },
-        {"!((abc|123)+)!", "!123abcabc!"}
+        {"!((abc|123)+)!", "!123abcabc!"},
+        {"(\\p{L&}+ )+(\\p{Nd}+)", "Großpackung süßigkeiten 199"},
     };
 
     c_AUTO (cregex, re)
@@ -21,12 +22,11 @@ int main()
             printf("error in regex pattern: %d\n", res);
             continue;
         }
-        printf("input: %s\n", s[i].input);
+        printf("\ninput: %s\n", s[i].input);
 
         c_FORMATCH (j, &re, s[i].input) {
             c_FORRANGE (k, cregex_captures(&re))
                 printf("  submatch %lld: %.*s\n", k, c_ARGSV(j.match[k]));
-            puts("");
         }
     }
 }
diff --git a/src/cregex.c b/src/cregex.c
index 7b528550..006c2d05 100644
--- a/src/cregex.c
+++ b/src/cregex.c
@@ -133,22 +133,22 @@ enum {
     UTF_an      , UTF_AN,       /* utf8 alphanumeric */
     UTF_wr      , UTF_WR,       /* utf8 word */
     UTF_xd      , UTF_XD,       /* utf8 hex digit */
-    U8G_tmp     , U8G = U8G_tmp + (U8G_tmp & 1), /* force even */
-    UTF_cc = U8G, UTF_CC,       /* utf8 control char */
     UTF_lc      , UTF_LC,       /* utf8 letter cased */
     UTF_ll      , UTF_LL,       /* utf8 letter lowercase */
-    UTF_lt      , UTF_LT,       /* utf8 letter titlecase */
     UTF_lu      , UTF_LU,       /* utf8 letter uppercase */
-    UTF_nd      , UTF_ND,       /* utf8 number decimal */
-    UTF_nl      , UTF_NL,       /* utf8 number letter */
-    UTF_pc      , UTF_PC,       /* utf8 punct connector */
-    UTF_pd      , UTF_PD,       /* utf8 punct dash */
-    UTF_pf      , UTF_PF,       /* utf8 punct final */
-    UTF_pi      , UTF_PI,       /* utf8 punct initial */
-    UTF_sc      , UTF_SC,       /* utf8 symbol currency */
-    UTF_zl      , UTF_ZL,       /* utf8 separator line */
-    UTF_zp      , UTF_ZP,       /* utf8 separator paragraph */
-    UTF_zs      , UTF_ZS,       /* utf8 separator space */
+    UTF_GRP = 0x8150000,
+    UTF_cc = UTF_GRP+2*U8G_Cc, UTF_CC, /* utf8 control char */
+    UTF_lt = UTF_GRP+2*U8G_Lt, UTF_LT, /* utf8 letter titlecase */
+    UTF_nd = UTF_GRP+2*U8G_Nd, UTF_ND, /* utf8 number decimal */
+    UTF_nl = UTF_GRP+2*U8G_Nl, UTF_NL, /* utf8 number letter */
+    UTF_pc = UTF_GRP+2*U8G_Pc, UTF_PC, /* utf8 punct connector */
+    UTF_pd = UTF_GRP+2*U8G_Pd, UTF_PD, /* utf8 punct dash */
+    UTF_pf = UTF_GRP+2*U8G_Pf, UTF_PF, /* utf8 punct final */
+    UTF_pi = UTF_GRP+2*U8G_Pi, UTF_PI, /* utf8 punct initial */
+    UTF_sc = UTF_GRP+2*U8G_Sc, UTF_SC, /* utf8 symbol currency */
+    UTF_zl = UTF_GRP+2*U8G_Zl, UTF_ZL, /* utf8 separator line */
+    UTF_zp = UTF_GRP+2*U8G_Zp, UTF_ZP, /* utf8 separator paragraph */
+    UTF_zs = UTF_GRP+2*U8G_Zs, UTF_ZS, /* utf8 separator space */
     TOK_ANY     = 0x8200000,    /* Any character except newline, . */
     TOK_ANYNL   ,               /* Any character including newline, . */
     TOK_NOP     ,               /* No operation, internal use only */
@@ -896,7 +896,8 @@ out:
 static int
 _runematch(_Rune s, _Rune r)
 {
-    int inv = 0, n;
+    int inv = 0;
+    uint32_t n;
     switch (s) {
     case ASC_D: inv = 1; case ASC_d: return inv ^ (isdigit(r) != 0);
     case ASC_S: inv = 1; case ASC_s: return inv ^ (isspace(r) != 0);
@@ -915,11 +916,11 @@ _runematch(_Rune s, _Rune r)
     case UTF_AN: inv = 1; case UTF_an: return inv ^ utf8_isalnum(r);
     case UTF_WR: inv = 1; case UTF_wr: return inv ^ (utf8_isalnum(r) | (r == '_'));
     case UTF_XD: inv = 1; case UTF_xd: return inv ^ utf8_isxdigit(r);
+    case UTF_LL: inv = 1; case UTF_ll: return inv ^ utf8_islower(r);
+    case UTF_LU: inv = 1; case UTF_lu: return inv ^ utf8_isupper(r);
     case UTF_LC: inv = 1; case UTF_lc: return inv ^ utf8_isalpha(r); 
     case UTF_CC: case UTF_cc:
-    case UTF_LL: case UTF_ll:
     case UTF_LT: case UTF_lt:
-    case UTF_LU: case UTF_lu:
     case UTF_ND: case UTF_nd:
     case UTF_NL: case UTF_nl:
     case UTF_PC: case UTF_pc:
@@ -930,7 +931,7 @@ _runematch(_Rune s, _Rune r)
     case UTF_ZL: case UTF_zl:
     case UTF_ZP: case UTF_zp:
     case UTF_ZS: case UTF_zs:
-        n = s - U8G;
+        n = s - UTF_GRP;
         inv = n & 1;
         return inv ^ utf8_isgroup(n / 2, r);
     }
diff --git a/src/utf8code.c b/src/utf8code.c
index 6fe8515e..8f2ce107 100644
--- a/src/utf8code.c
+++ b/src/utf8code.c
@@ -125,7 +125,6 @@ typedef struct {
 
 static const UGroup unicode_groups[];
 static const int num_unicode_groups;
-static const int Lt_group;
 
 bool utf8_isgroup(int group, uint32_t c) {
     for (int j=0; j<unicode_groups[group].nr16; ++j) {
@@ -153,7 +152,7 @@ bool utf8_isalnum(uint32_t c) {
 
 bool utf8_isalpha(uint32_t c) {
     if (c < 128) return isalpha(c) != 0;
-    return utf8_islower(c) || utf8_isupper(c) || utf8_isgroup(Lt_group, c);
+    return utf8_islower(c) || utf8_isupper(c) || utf8_isgroup(U8G_Lt, c);
 }
 
 static const URange16 Cc_range16[] = { // Control
@@ -321,24 +320,23 @@ static const URange16 Zs_range16[] = { // Space separator
 };
 
 #define UNI_ENTRY(Code) \
-    { #Code, Code##_range16, sizeof(Code##_range16)/2 }
+    { #Code, Code##_range16, sizeof(Code##_range16)/(2*2) }
 
 static const UGroup unicode_groups[] = {
-    UNI_ENTRY(Cc),
-    UNI_ENTRY(Lt),
-    UNI_ENTRY(Nd),
-    UNI_ENTRY(Nl),
-    UNI_ENTRY(Pc),
-    UNI_ENTRY(Pd),
-    UNI_ENTRY(Pf),
-    UNI_ENTRY(Pi),
-    UNI_ENTRY(Sc),
-    UNI_ENTRY(Zl),
-    UNI_ENTRY(Zp),
-    UNI_ENTRY(Zs),
+    [U8G_Cc] = UNI_ENTRY(Cc),
+    [U8G_Lt] = UNI_ENTRY(Lt),
+    [U8G_Nd] = UNI_ENTRY(Nd),
+    [U8G_Nl] = UNI_ENTRY(Nl),
+    [U8G_Pc] = UNI_ENTRY(Pc),
+    [U8G_Pd] = UNI_ENTRY(Pd),
+    [U8G_Pf] = UNI_ENTRY(Pf),
+    [U8G_Pi] = UNI_ENTRY(Pi),
+    [U8G_Sc] = UNI_ENTRY(Sc),
+    [U8G_Zl] = UNI_ENTRY(Zl),
+    [U8G_Zp] = UNI_ENTRY(Zp),
+    [U8G_Zs] = UNI_ENTRY(Zs),
 };
 
-static const int Lt_group = 1;
 static const int num_unicode_groups = sizeof unicode_groups / sizeof unicode_groups[0];
 
 #endif