Replaced c_STATIC_ASSERT() which works for C99 (also multiple on same line).

Some regex updates.
author: Tyge Løvset <[email protected]> 2023-01-02 22:36:51 +0100
committer: Tyge Løvset <[email protected]> 2023-01-02 22:36:51 +0100
commit: 16e004c62f8d8d502152a85b2ffd384a1c91a470 (patch)
tree: 368ed5b319c18f88d0ae2e8c291b0c3889ac72c5
parent: 364b8833cb5d91bbe2c7640869912cde4de12846 (diff)
download: STC-modified-16e004c62f8d8d502152a85b2ffd384a1c91a470.tar.gz
STC-modified-16e004c62f8d8d502152a85b2ffd384a1c91a470.zip
3 files changed, 13 insertions, 14 deletions
diff --git a/docs/cregex_api.md b/docs/cregex_api.md
index 91868235..20cb5d6d 100644
--- a/docs/cregex_api.md
+++ b/docs/cregex_api.md
@@ -177,17 +177,11 @@ For reference, **cregex** uses the following files:
 | \B | Not UTF8 word boundary | * |
 | \Q | Start literal input mode | * |
 | \E | End literal input mode | * |
-| (?i) (?-i)  | Ignore case on/off (override global) | * |
-| (?s) (?-s)  | Dot matches newline on/off (override global) | * |
+| (?i) (?-i)  | Ignore case on/off (override CREG_C_ICASE) | * |
+| (?s) (?-s)  | Dot matches newline on/off (override CREG_C_DOTALL) | * |
 | \n \t \r | Match UTF8 newline, tab, carriage return | |
 | \d \s \w | Match UTF8 digit, whitespace, alphanumeric character | |
 | \D \S \W | Do not match the groups described above | |
-| \p{Alpha} | Match UTF8 alpha (L& Ll) | * |
-| \p{Alnum} | Match UTF8 alphanumeric (Lu Ll Nd Nl) | * |
-| \p{Blank} | Match UTF8 blank (Zs \t) | * |
-| \p{Space} | Match UTF8 whitespace: (Zs \t\r\n\v\f] | * |
-| \p{Word} | Match UTF8 word character: (Alnum Pc) | * |
-| \p{XDigit} | Match hex number | * |
 | \p{Cc} or \p{Cntrl} | Match UTF8 control char | * |
 | \p{Ll} or \p{Lower} | Match UTF8 lowercase letter | * |
 | \p{Lu} or \p{Upper} | Match UTF8 uppercase letter | * |
@@ -203,6 +197,12 @@ For reference, **cregex** uses the following files:
 | \p{Zl} | Match UTF8 line separator | * |
 | \p{Zp} | Match UTF8 paragraph separator | * |
 | \p{Zs} | Match UTF8 space separator | * |
+| \p{Alpha} | Match UTF8 alphabetic letter (L& Nl) | * |
+| \p{Alnum} | Match UTF8 alpha-numeric letter (L& Nl Nd) | * |
+| \p{Blank} | Match UTF8 blank (Zs \t) | * |
+| \p{Space} | Match UTF8 whitespace: (Zs \t\r\n\v\f] | * |
+| \p{Word} | Match UTF8 word character: (Alnum Pc) | * |
+| \p{XDigit} | Match hex number | * |
 | \P{***Class***} | Do not match the classes described above | * |
 | [:alnum:] [:alpha:] [:ascii:] | Match ASCII character class. NB: only to be used inside [] brackets | * |
 | [:blank:] [:cntrl:] [:digit:] | " | * |
@@ -210,7 +210,7 @@ For reference, **cregex** uses the following files:
 | [:punct:] [:space:] [:upper:] | " | * |
 | [:xdigit:] [:word:] | " | * |
 | [:^***class***:] | Match character not in the ASCII class | * |
-| $***n*** | *n*-th substitution backreference to capture group. ***n*** in 0-9. $0 is the entire match. | * |
+| $***n*** | *n*-th replace backreference to capture group. ***n*** in 0-9. $0 is the entire match. | * |
 | $***nn;*** | As above, but can handle ***nn*** < CREG_MAX_CAPTURES. | * |
 
 ## Limitations
@@ -219,6 +219,6 @@ The main goal of **cregex** is to be small and fast with limited but useful unic
 - In order to limit table sizes, most general UTF8 character classes are missing, like \p{L}, \p{S}, and all specific scripts like \p{Greek} etc. Some/all of these may be added in the future as an alternative source file with unicode tables to link with.
 - {n, m} syntax for repeating previous token min-max times.
 - Non-capturing groups
-- Lookaround and backreferences
+- Lookaround and backreferences (cannot be implemented efficiently).
 
 If you need a more feature complete, but bigger library, use [RE2 with C-wrapper](https://github.com/google/re2) which uses the same type of regex engine as **cregex**, or use [PCRE2](https://www.pcre.org/).
diff --git a/include/stc/ccommon.h b/include/stc/ccommon.h
index ce6bba84..9cdbc9e5 100644
--- a/include/stc/ccommon.h
+++ b/include/stc/ccommon.h
@@ -56,14 +56,13 @@
 #define c_PASTE(a, b) c_CONCAT(a, b)
 #define c_EXPAND(...) __VA_ARGS__
 #define c_NUMARGS(...) _c_APPLY_ARG_N((__VA_ARGS__, _c_RSEQ_N))
-
 #define _c_APPLY_ARG_N(args) c_EXPAND(_c_ARG_N args)
 #define _c_RSEQ_N 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
 #define _c_ARG_N(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, \
                  _14, _15, _16, N, ...) N
 
-#define c_STATIC_ASSERT(cond) \
-    typedef char c_PASTE(_static_assert_line_, __LINE__)[(cond) ? 1 : -1]
+#define c_STATIC_ASSERT(cond, msg) \
+    ((void)sizeof(int[(cond) ? 1 : -1]))
 #define c_CONTAINER_OF(p, T, m) \
     ((T*)((char*)(p) + 0*sizeof((p) == &((T*)0)->m) - offsetof(T, m)))
 
diff --git a/src/utf8code.c b/src/utf8code.c
index 71b086c2..4b657cc4 100644
--- a/src/utf8code.c
+++ b/src/utf8code.c
@@ -148,7 +148,7 @@ bool utf8_isalnum(uint32_t c) {
 }
 
 bool utf8_isblank(uint32_t c) {
-    if (c < 128) return isblank(c) != 0;
+    if (c < 128) return (c == ' ') | (c == '\t');
     return utf8_isgroup(U8G_Zs, c);
 }
author	Tyge Løvset <[email protected]>	2023-01-02 22:36:51 +0100
committer	Tyge Løvset <[email protected]>	2023-01-02 22:36:51 +0100
commit	16e004c62f8d8d502152a85b2ffd384a1c91a470 (patch)
tree	368ed5b319c18f88d0ae2e8c291b0c3889ac72c5
parent	364b8833cb5d91bbe2c7640869912cde4de12846 (diff)
download	STC-modified-16e004c62f8d8d502152a85b2ffd384a1c91a470.tar.gz STC-modified-16e004c62f8d8d502152a85b2ffd384a1c91a470.zip