summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorTyge Løvset <[email protected]>2023-01-02 22:36:51 +0100
committerTyge Løvset <[email protected]>2023-01-02 22:36:51 +0100
commit16e004c62f8d8d502152a85b2ffd384a1c91a470 (patch)
tree368ed5b319c18f88d0ae2e8c291b0c3889ac72c5
parent364b8833cb5d91bbe2c7640869912cde4de12846 (diff)
downloadSTC-modified-16e004c62f8d8d502152a85b2ffd384a1c91a470.tar.gz
STC-modified-16e004c62f8d8d502152a85b2ffd384a1c91a470.zip
Replaced c_STATIC_ASSERT() which works for C99 (also multiple on same line).
Some regex updates.
-rw-r--r--docs/cregex_api.md20
-rw-r--r--include/stc/ccommon.h5
-rw-r--r--src/utf8code.c2
3 files changed, 13 insertions, 14 deletions
diff --git a/docs/cregex_api.md b/docs/cregex_api.md
index 91868235..20cb5d6d 100644
--- a/docs/cregex_api.md
+++ b/docs/cregex_api.md
@@ -177,17 +177,11 @@ For reference, **cregex** uses the following files:
| \B | Not UTF8 word boundary | * |
| \Q | Start literal input mode | * |
| \E | End literal input mode | * |
-| (?i) (?-i) | Ignore case on/off (override global) | * |
-| (?s) (?-s) | Dot matches newline on/off (override global) | * |
+| (?i) (?-i) | Ignore case on/off (override CREG_C_ICASE) | * |
+| (?s) (?-s) | Dot matches newline on/off (override CREG_C_DOTALL) | * |
| \n \t \r | Match UTF8 newline, tab, carriage return | |
| \d \s \w | Match UTF8 digit, whitespace, alphanumeric character | |
| \D \S \W | Do not match the groups described above | |
-| \p{Alpha} | Match UTF8 alpha (L& Ll) | * |
-| \p{Alnum} | Match UTF8 alphanumeric (Lu Ll Nd Nl) | * |
-| \p{Blank} | Match UTF8 blank (Zs \t) | * |
-| \p{Space} | Match UTF8 whitespace: (Zs \t\r\n\v\f] | * |
-| \p{Word} | Match UTF8 word character: (Alnum Pc) | * |
-| \p{XDigit} | Match hex number | * |
| \p{Cc} or \p{Cntrl} | Match UTF8 control char | * |
| \p{Ll} or \p{Lower} | Match UTF8 lowercase letter | * |
| \p{Lu} or \p{Upper} | Match UTF8 uppercase letter | * |
@@ -203,6 +197,12 @@ For reference, **cregex** uses the following files:
| \p{Zl} | Match UTF8 line separator | * |
| \p{Zp} | Match UTF8 paragraph separator | * |
| \p{Zs} | Match UTF8 space separator | * |
+| \p{Alpha} | Match UTF8 alphabetic letter (L& Nl) | * |
+| \p{Alnum} | Match UTF8 alpha-numeric letter (L& Nl Nd) | * |
+| \p{Blank} | Match UTF8 blank (Zs \t) | * |
+| \p{Space} | Match UTF8 whitespace: (Zs \t\r\n\v\f] | * |
+| \p{Word} | Match UTF8 word character: (Alnum Pc) | * |
+| \p{XDigit} | Match hex number | * |
| \P{***Class***} | Do not match the classes described above | * |
| [:alnum:] [:alpha:] [:ascii:] | Match ASCII character class. NB: only to be used inside [] brackets | * |
| [:blank:] [:cntrl:] [:digit:] | " | * |
@@ -210,7 +210,7 @@ For reference, **cregex** uses the following files:
| [:punct:] [:space:] [:upper:] | " | * |
| [:xdigit:] [:word:] | " | * |
| [:^***class***:] | Match character not in the ASCII class | * |
-| $***n*** | *n*-th substitution backreference to capture group. ***n*** in 0-9. $0 is the entire match. | * |
+| $***n*** | *n*-th replace backreference to capture group. ***n*** in 0-9. $0 is the entire match. | * |
| $***nn;*** | As above, but can handle ***nn*** < CREG_MAX_CAPTURES. | * |
## Limitations
@@ -219,6 +219,6 @@ The main goal of **cregex** is to be small and fast with limited but useful unic
- In order to limit table sizes, most general UTF8 character classes are missing, like \p{L}, \p{S}, and all specific scripts like \p{Greek} etc. Some/all of these may be added in the future as an alternative source file with unicode tables to link with.
- {n, m} syntax for repeating previous token min-max times.
- Non-capturing groups
-- Lookaround and backreferences
+- Lookaround and backreferences (cannot be implemented efficiently).
If you need a more feature complete, but bigger library, use [RE2 with C-wrapper](https://github.com/google/re2) which uses the same type of regex engine as **cregex**, or use [PCRE2](https://www.pcre.org/).
diff --git a/include/stc/ccommon.h b/include/stc/ccommon.h
index ce6bba84..9cdbc9e5 100644
--- a/include/stc/ccommon.h
+++ b/include/stc/ccommon.h
@@ -56,14 +56,13 @@
#define c_PASTE(a, b) c_CONCAT(a, b)
#define c_EXPAND(...) __VA_ARGS__
#define c_NUMARGS(...) _c_APPLY_ARG_N((__VA_ARGS__, _c_RSEQ_N))
-
#define _c_APPLY_ARG_N(args) c_EXPAND(_c_ARG_N args)
#define _c_RSEQ_N 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
#define _c_ARG_N(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, \
_14, _15, _16, N, ...) N
-#define c_STATIC_ASSERT(cond) \
- typedef char c_PASTE(_static_assert_line_, __LINE__)[(cond) ? 1 : -1]
+#define c_STATIC_ASSERT(cond, msg) \
+ ((void)sizeof(int[(cond) ? 1 : -1]))
#define c_CONTAINER_OF(p, T, m) \
((T*)((char*)(p) + 0*sizeof((p) == &((T*)0)->m) - offsetof(T, m)))
diff --git a/src/utf8code.c b/src/utf8code.c
index 71b086c2..4b657cc4 100644
--- a/src/utf8code.c
+++ b/src/utf8code.c
@@ -148,7 +148,7 @@ bool utf8_isalnum(uint32_t c) {
}
bool utf8_isblank(uint32_t c) {
- if (c < 128) return isblank(c) != 0;
+ if (c < 128) return (c == ' ') | (c == '\t');
return utf8_isgroup(U8G_Zs, c);
}