diff options
| author | Tyge Løvset <[email protected]> | 2023-01-02 10:33:04 +0100 |
|---|---|---|
| committer | Tyge Løvset <[email protected]> | 2023-01-02 10:33:04 +0100 |
| commit | 5c454e721656618c36674e0df676091106592c2d (patch) | |
| tree | 95d7e4f6355c8fbfebd8aba5474ab90e3f13f120 /src | |
| parent | 8c6ba8a3444e4b8640e7fe04f565cef57c850432 (diff) | |
| download | STC-modified-5c454e721656618c36674e0df676091106592c2d.tar.gz STC-modified-5c454e721656618c36674e0df676091106592c2d.zip | |
Fixes prev. commit.
Diffstat (limited to 'src')
| -rw-r--r-- | src/cregex.c | 35 | ||||
| -rw-r--r-- | src/utf8code.c | 30 |
2 files changed, 32 insertions, 33 deletions
diff --git a/src/cregex.c b/src/cregex.c index 7b528550..006c2d05 100644 --- a/src/cregex.c +++ b/src/cregex.c @@ -133,22 +133,22 @@ enum { UTF_an , UTF_AN, /* utf8 alphanumeric */ UTF_wr , UTF_WR, /* utf8 word */ UTF_xd , UTF_XD, /* utf8 hex digit */ - U8G_tmp , U8G = U8G_tmp + (U8G_tmp & 1), /* force even */ - UTF_cc = U8G, UTF_CC, /* utf8 control char */ UTF_lc , UTF_LC, /* utf8 letter cased */ UTF_ll , UTF_LL, /* utf8 letter lowercase */ - UTF_lt , UTF_LT, /* utf8 letter titlecase */ UTF_lu , UTF_LU, /* utf8 letter uppercase */ - UTF_nd , UTF_ND, /* utf8 number decimal */ - UTF_nl , UTF_NL, /* utf8 number letter */ - UTF_pc , UTF_PC, /* utf8 punct connector */ - UTF_pd , UTF_PD, /* utf8 punct dash */ - UTF_pf , UTF_PF, /* utf8 punct final */ - UTF_pi , UTF_PI, /* utf8 punct initial */ - UTF_sc , UTF_SC, /* utf8 symbol currency */ - UTF_zl , UTF_ZL, /* utf8 separator line */ - UTF_zp , UTF_ZP, /* utf8 separator paragraph */ - UTF_zs , UTF_ZS, /* utf8 separator space */ + UTF_GRP = 0x8150000, + UTF_cc = UTF_GRP+2*U8G_Cc, UTF_CC, /* utf8 control char */ + UTF_lt = UTF_GRP+2*U8G_Lt, UTF_LT, /* utf8 letter titlecase */ + UTF_nd = UTF_GRP+2*U8G_Nd, UTF_ND, /* utf8 number decimal */ + UTF_nl = UTF_GRP+2*U8G_Nl, UTF_NL, /* utf8 number letter */ + UTF_pc = UTF_GRP+2*U8G_Pc, UTF_PC, /* utf8 punct connector */ + UTF_pd = UTF_GRP+2*U8G_Pd, UTF_PD, /* utf8 punct dash */ + UTF_pf = UTF_GRP+2*U8G_Pf, UTF_PF, /* utf8 punct final */ + UTF_pi = UTF_GRP+2*U8G_Pi, UTF_PI, /* utf8 punct initial */ + UTF_sc = UTF_GRP+2*U8G_Sc, UTF_SC, /* utf8 symbol currency */ + UTF_zl = UTF_GRP+2*U8G_Zl, UTF_ZL, /* utf8 separator line */ + UTF_zp = UTF_GRP+2*U8G_Zp, UTF_ZP, /* utf8 separator paragraph */ + UTF_zs = UTF_GRP+2*U8G_Zs, UTF_ZS, /* utf8 separator space */ TOK_ANY = 0x8200000, /* Any character except newline, . */ TOK_ANYNL , /* Any character including newline, . */ TOK_NOP , /* No operation, internal use only */ @@ -896,7 +896,8 @@ out: static int _runematch(_Rune s, _Rune r) { - int inv = 0, n; + int inv = 0; + uint32_t n; switch (s) { case ASC_D: inv = 1; case ASC_d: return inv ^ (isdigit(r) != 0); case ASC_S: inv = 1; case ASC_s: return inv ^ (isspace(r) != 0); @@ -915,11 +916,11 @@ _runematch(_Rune s, _Rune r) case UTF_AN: inv = 1; case UTF_an: return inv ^ utf8_isalnum(r); case UTF_WR: inv = 1; case UTF_wr: return inv ^ (utf8_isalnum(r) | (r == '_')); case UTF_XD: inv = 1; case UTF_xd: return inv ^ utf8_isxdigit(r); + case UTF_LL: inv = 1; case UTF_ll: return inv ^ utf8_islower(r); + case UTF_LU: inv = 1; case UTF_lu: return inv ^ utf8_isupper(r); case UTF_LC: inv = 1; case UTF_lc: return inv ^ utf8_isalpha(r); case UTF_CC: case UTF_cc: - case UTF_LL: case UTF_ll: case UTF_LT: case UTF_lt: - case UTF_LU: case UTF_lu: case UTF_ND: case UTF_nd: case UTF_NL: case UTF_nl: case UTF_PC: case UTF_pc: @@ -930,7 +931,7 @@ _runematch(_Rune s, _Rune r) case UTF_ZL: case UTF_zl: case UTF_ZP: case UTF_zp: case UTF_ZS: case UTF_zs: - n = s - U8G; + n = s - UTF_GRP; inv = n & 1; return inv ^ utf8_isgroup(n / 2, r); } diff --git a/src/utf8code.c b/src/utf8code.c index 6fe8515e..8f2ce107 100644 --- a/src/utf8code.c +++ b/src/utf8code.c @@ -125,7 +125,6 @@ typedef struct { static const UGroup unicode_groups[]; static const int num_unicode_groups; -static const int Lt_group; bool utf8_isgroup(int group, uint32_t c) { for (int j=0; j<unicode_groups[group].nr16; ++j) { @@ -153,7 +152,7 @@ bool utf8_isalnum(uint32_t c) { bool utf8_isalpha(uint32_t c) { if (c < 128) return isalpha(c) != 0; - return utf8_islower(c) || utf8_isupper(c) || utf8_isgroup(Lt_group, c); + return utf8_islower(c) || utf8_isupper(c) || utf8_isgroup(U8G_Lt, c); } static const URange16 Cc_range16[] = { // Control @@ -321,24 +320,23 @@ static const URange16 Zs_range16[] = { // Space separator }; #define UNI_ENTRY(Code) \ - { #Code, Code##_range16, sizeof(Code##_range16)/2 } + { #Code, Code##_range16, sizeof(Code##_range16)/(2*2) } static const UGroup unicode_groups[] = { - UNI_ENTRY(Cc), - UNI_ENTRY(Lt), - UNI_ENTRY(Nd), - UNI_ENTRY(Nl), - UNI_ENTRY(Pc), - UNI_ENTRY(Pd), - UNI_ENTRY(Pf), - UNI_ENTRY(Pi), - UNI_ENTRY(Sc), - UNI_ENTRY(Zl), - UNI_ENTRY(Zp), - UNI_ENTRY(Zs), + [U8G_Cc] = UNI_ENTRY(Cc), + [U8G_Lt] = UNI_ENTRY(Lt), + [U8G_Nd] = UNI_ENTRY(Nd), + [U8G_Nl] = UNI_ENTRY(Nl), + [U8G_Pc] = UNI_ENTRY(Pc), + [U8G_Pd] = UNI_ENTRY(Pd), + [U8G_Pf] = UNI_ENTRY(Pf), + [U8G_Pi] = UNI_ENTRY(Pi), + [U8G_Sc] = UNI_ENTRY(Sc), + [U8G_Zl] = UNI_ENTRY(Zl), + [U8G_Zp] = UNI_ENTRY(Zp), + [U8G_Zs] = UNI_ENTRY(Zs), }; -static const int Lt_group = 1; static const int num_unicode_groups = sizeof unicode_groups / sizeof unicode_groups[0]; #endif |
