diff options
| author | Tyge Løvset <[email protected]> | 2023-01-02 22:36:51 +0100 |
|---|---|---|
| committer | Tyge Løvset <[email protected]> | 2023-01-02 22:36:51 +0100 |
| commit | 16e004c62f8d8d502152a85b2ffd384a1c91a470 (patch) | |
| tree | 368ed5b319c18f88d0ae2e8c291b0c3889ac72c5 | |
| parent | 364b8833cb5d91bbe2c7640869912cde4de12846 (diff) | |
| download | STC-modified-16e004c62f8d8d502152a85b2ffd384a1c91a470.tar.gz STC-modified-16e004c62f8d8d502152a85b2ffd384a1c91a470.zip | |
Replaced c_STATIC_ASSERT() which works for C99 (also multiple on same line).
Some regex updates.
| -rw-r--r-- | docs/cregex_api.md | 20 | ||||
| -rw-r--r-- | include/stc/ccommon.h | 5 | ||||
| -rw-r--r-- | src/utf8code.c | 2 |
3 files changed, 13 insertions, 14 deletions
diff --git a/docs/cregex_api.md b/docs/cregex_api.md index 91868235..20cb5d6d 100644 --- a/docs/cregex_api.md +++ b/docs/cregex_api.md @@ -177,17 +177,11 @@ For reference, **cregex** uses the following files: | \B | Not UTF8 word boundary | * | | \Q | Start literal input mode | * | | \E | End literal input mode | * | -| (?i) (?-i) | Ignore case on/off (override global) | * | -| (?s) (?-s) | Dot matches newline on/off (override global) | * | +| (?i) (?-i) | Ignore case on/off (override CREG_C_ICASE) | * | +| (?s) (?-s) | Dot matches newline on/off (override CREG_C_DOTALL) | * | | \n \t \r | Match UTF8 newline, tab, carriage return | | | \d \s \w | Match UTF8 digit, whitespace, alphanumeric character | | | \D \S \W | Do not match the groups described above | | -| \p{Alpha} | Match UTF8 alpha (L& Ll) | * | -| \p{Alnum} | Match UTF8 alphanumeric (Lu Ll Nd Nl) | * | -| \p{Blank} | Match UTF8 blank (Zs \t) | * | -| \p{Space} | Match UTF8 whitespace: (Zs \t\r\n\v\f] | * | -| \p{Word} | Match UTF8 word character: (Alnum Pc) | * | -| \p{XDigit} | Match hex number | * | | \p{Cc} or \p{Cntrl} | Match UTF8 control char | * | | \p{Ll} or \p{Lower} | Match UTF8 lowercase letter | * | | \p{Lu} or \p{Upper} | Match UTF8 uppercase letter | * | @@ -203,6 +197,12 @@ For reference, **cregex** uses the following files: | \p{Zl} | Match UTF8 line separator | * | | \p{Zp} | Match UTF8 paragraph separator | * | | \p{Zs} | Match UTF8 space separator | * | +| \p{Alpha} | Match UTF8 alphabetic letter (L& Nl) | * | +| \p{Alnum} | Match UTF8 alpha-numeric letter (L& Nl Nd) | * | +| \p{Blank} | Match UTF8 blank (Zs \t) | * | +| \p{Space} | Match UTF8 whitespace: (Zs \t\r\n\v\f] | * | +| \p{Word} | Match UTF8 word character: (Alnum Pc) | * | +| \p{XDigit} | Match hex number | * | | \P{***Class***} | Do not match the classes described above | * | | [:alnum:] [:alpha:] [:ascii:] | Match ASCII character class. NB: only to be used inside [] brackets | * | | [:blank:] [:cntrl:] [:digit:] | " | * | @@ -210,7 +210,7 @@ For reference, **cregex** uses the following files: | [:punct:] [:space:] [:upper:] | " | * | | [:xdigit:] [:word:] | " | * | | [:^***class***:] | Match character not in the ASCII class | * | -| $***n*** | *n*-th substitution backreference to capture group. ***n*** in 0-9. $0 is the entire match. | * | +| $***n*** | *n*-th replace backreference to capture group. ***n*** in 0-9. $0 is the entire match. | * | | $***nn;*** | As above, but can handle ***nn*** < CREG_MAX_CAPTURES. | * | ## Limitations @@ -219,6 +219,6 @@ The main goal of **cregex** is to be small and fast with limited but useful unic - In order to limit table sizes, most general UTF8 character classes are missing, like \p{L}, \p{S}, and all specific scripts like \p{Greek} etc. Some/all of these may be added in the future as an alternative source file with unicode tables to link with. - {n, m} syntax for repeating previous token min-max times. - Non-capturing groups -- Lookaround and backreferences +- Lookaround and backreferences (cannot be implemented efficiently). If you need a more feature complete, but bigger library, use [RE2 with C-wrapper](https://github.com/google/re2) which uses the same type of regex engine as **cregex**, or use [PCRE2](https://www.pcre.org/). diff --git a/include/stc/ccommon.h b/include/stc/ccommon.h index ce6bba84..9cdbc9e5 100644 --- a/include/stc/ccommon.h +++ b/include/stc/ccommon.h @@ -56,14 +56,13 @@ #define c_PASTE(a, b) c_CONCAT(a, b) #define c_EXPAND(...) __VA_ARGS__ #define c_NUMARGS(...) _c_APPLY_ARG_N((__VA_ARGS__, _c_RSEQ_N)) - #define _c_APPLY_ARG_N(args) c_EXPAND(_c_ARG_N args) #define _c_RSEQ_N 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 #define _c_ARG_N(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, \ _14, _15, _16, N, ...) N -#define c_STATIC_ASSERT(cond) \ - typedef char c_PASTE(_static_assert_line_, __LINE__)[(cond) ? 1 : -1] +#define c_STATIC_ASSERT(cond, msg) \ + ((void)sizeof(int[(cond) ? 1 : -1])) #define c_CONTAINER_OF(p, T, m) \ ((T*)((char*)(p) + 0*sizeof((p) == &((T*)0)->m) - offsetof(T, m))) diff --git a/src/utf8code.c b/src/utf8code.c index 71b086c2..4b657cc4 100644 --- a/src/utf8code.c +++ b/src/utf8code.c @@ -148,7 +148,7 @@ bool utf8_isalnum(uint32_t c) { } bool utf8_isblank(uint32_t c) { - if (c < 128) return isblank(c) != 0; + if (c < 128) return (c == ' ') | (c == '\t'); return utf8_isgroup(U8G_Zs, c); } |
