summaryrefslogtreecommitdiffhomepage
path: root/include
diff options
context:
space:
mode:
authorTyge Løvset <[email protected]>2022-01-07 15:08:43 +0100
committerTyge Løvset <[email protected]>2022-01-07 15:08:43 +0100
commit0a9910eee6582e6ee414071a0d5e7062448989cf (patch)
tree591b3fa6278b950862a1d533052195907562e93b /include
parentbcd76fdeb1b7b5ac01ac9a204db74b537361c8b0 (diff)
downloadSTC-modified-0a9910eee6582e6ee414071a0d5e7062448989cf.tar.gz
STC-modified-0a9910eee6582e6ee414071a0d5e7062448989cf.zip
Removed tabs. Changed cregex_match members to start, end.
Diffstat (limited to 'include')
-rw-r--r--include/stc/cregex.h1373
1 files changed, 686 insertions, 687 deletions
diff --git a/include/stc/cregex.h b/include/stc/cregex.h
index 036508db..eafefc4c 100644
--- a/include/stc/cregex.h
+++ b/include/stc/cregex.h
@@ -32,12 +32,12 @@
#include <stdarg.h>
typedef struct {
- union cregex_node *nodes;
+ union cregex_node *nodes;
} cregex;
typedef struct {
- size_t match_begin;
- size_t match_end;
+ size_t start;
+ size_t end;
} cregex_match;
#define i_type cregex_result
@@ -45,15 +45,15 @@ typedef struct {
#include "cstack.h"
typedef enum {
- cregex_OK = 0,
- cregex_FAILED_ALLOC,
- cregex_INVALID_UTF8,
- cregex_INVALID_PARAMS,
- cregex_EARLY_QUANTIFIER,
- cregex_INVALID_COMPLEX_QUANT,
- cregex_UNEXPECTED_EOL,
- cregex_INVALID_COMPLEX_CLASS,
- cregex_UNCLOSED_SUBEXPRESSION,
+ cregex_OK = 0,
+ cregex_FAILED_ALLOC,
+ cregex_INVALID_UTF8,
+ cregex_INVALID_PARAMS,
+ cregex_EARLY_QUANTIFIER,
+ cregex_INVALID_COMPLEX_QUANT,
+ cregex_UNEXPECTED_EOL,
+ cregex_INVALID_COMPLEX_CLASS,
+ cregex_UNCLOSED_SUBEXPRESSION,
} cregex_error_t;
/* check if a given string is valid utf8 */
@@ -98,56 +98,56 @@ enum {
};
static const uint8_t _rx_utf8d[] = {
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
- 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
- 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
- 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
- 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
- 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
- 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
- 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
- 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
+ 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
+ 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
+ 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
+ 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
+ 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
+ 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
+ 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
};
static inline uint32_t _rx_utf8_decode(uint32_t *state, uint32_t *codep,
- const uint32_t byte)
+ const uint32_t byte)
{
- const uint32_t type = _rx_utf8d[byte];
- const uint32_t x = (uint32_t) -(*state != _rx_UTF8_ACCEPT);
+ const uint32_t type = _rx_utf8d[byte];
+ const uint32_t x = (uint32_t) -(*state != _rx_UTF8_ACCEPT);
- *codep = (x & ((byte & 0x3fu) | (*codep << 6)))
- | (~x & ((0xff >> type) & (byte)));
+ *codep = (x & ((byte & 0x3fu) | (*codep << 6)))
+ | (~x & ((0xff >> type) & (byte)));
- *state = _rx_utf8d[256 + (*state << 4) + type];
- return *state;
+ *state = _rx_utf8d[256 + (*state << 4) + type];
+ return *state;
}
static bool _rx_utf8_count_codepoints(size_t *count, const uint8_t *s)
{
- uint32_t state = _rx_UTF8_ACCEPT, codepoint;
-
- for (*count = 0; *s; ++s)
- *count += !_rx_utf8_decode(&state, &codepoint, *s);
- return state == _rx_UTF8_ACCEPT;
+ uint32_t state = _rx_UTF8_ACCEPT, codepoint;
+
+ for (*count = 0; *s; ++s)
+ *count += !_rx_utf8_decode(&state, &codepoint, *s);
+ return state == _rx_UTF8_ACCEPT;
}
STC_DEF bool cregex_valid_utf8(const char *s)
{
- size_t count;
- bool valid = _rx_utf8_count_codepoints(&count, (const uint8_t *)s);
- return valid;
+ size_t count;
+ bool valid = _rx_utf8_count_codepoints(&count, (const uint8_t *)s);
+ return valid;
}
static inline uint32_t _rx_utf8_peek(const char *s)
{
- uint32_t state = _rx_UTF8_ACCEPT, codepoint;
- _rx_utf8_decode(&state, &codepoint, (uint8_t)s[0]);
- return codepoint;
+ uint32_t state = _rx_UTF8_ACCEPT, codepoint;
+ _rx_utf8_decode(&state, &codepoint, (uint8_t)s[0]);
+ return codepoint;
}
static inline uint32_t _rx_utf8_char_width(uint8_t c)
@@ -162,411 +162,411 @@ static inline uint32_t _rx_utf8_char_width(uint8_t c)
static inline const char *_rx_utf8_next(const char *s)
{
- const char* t = s + _rx_utf8_char_width((uint8_t)s[0]);
-
- uintptr_t p = (uintptr_t)t;
- p &= (uintptr_t) -(*s != 0);
- return (const char *)p;
+ const char* t = s + _rx_utf8_char_width((uint8_t)s[0]);
+
+ uintptr_t p = (uintptr_t)t;
+ p &= (uintptr_t) -(*s != 0);
+ return (const char *)p;
}
/* function pointer type used to evaluate if a regex node
* matched a given string */
typedef bool (*_rx_MatchFunc)(union cregex_node *node, const char *orig,
- const char *cur, const char **next);
+ const char *cur, const char **next);
typedef struct _rx_GenericNode {
- union cregex_node *prev;
- union cregex_node *next;
- _rx_MatchFunc match;
+ union cregex_node *prev;
+ union cregex_node *next;
+ _rx_MatchFunc match;
} _rx_GenericNode;
typedef struct {
- _rx_GenericNode generic;
- uint32_t chr;
+ _rx_GenericNode generic;
+ uint32_t chr;
} _rx_CharNode;
typedef struct {
- _rx_GenericNode generic;
- union cregex_node *subexp;
- size_t min, max;
+ _rx_GenericNode generic;
+ union cregex_node *subexp;
+ size_t min, max;
} _rx_QuantNode;
typedef struct {
- _rx_GenericNode generic;
- uint32_t first, last;
+ _rx_GenericNode generic;
+ uint32_t first, last;
} _rx_RangeNode;
typedef struct {
- _rx_GenericNode generic;
- _rx_RangeNode *ranges;
- bool negate;
+ _rx_GenericNode generic;
+ _rx_RangeNode *ranges;
+ bool negate;
} _rx_ClassNode;
typedef struct {
- _rx_GenericNode generic;
- union cregex_node *subexp;
- cregex_match cap;
+ _rx_GenericNode generic;
+ union cregex_node *subexp;
+ cregex_match cap;
} _rx_CapNode;
typedef struct {
- _rx_GenericNode generic;
- union cregex_node *left;
- union cregex_node *right;
+ _rx_GenericNode generic;
+ union cregex_node *left;
+ union cregex_node *right;
} _rx_OrNode;
typedef union cregex_node {
- _rx_GenericNode generic;
- _rx_CharNode chr;
- _rx_QuantNode quant;
- _rx_ClassNode cls;
- _rx_RangeNode range;
- _rx_CapNode cap;
- _rx_OrNode ior;
+ _rx_GenericNode generic;
+ _rx_CharNode chr;
+ _rx_QuantNode quant;
+ _rx_ClassNode cls;
+ _rx_RangeNode range;
+ _rx_CapNode cap;
+ _rx_OrNode ior;
} cregex_node;
static bool _rx_is_match(cregex_node *node, const char *orig, const char *cur,
- const char **next)
+ const char **next)
{
- if (node == NULL) {
- *next = cur;
- return true;
- } else {
- return ((node->generic.match)(node, orig, cur, next)) &&
- _rx_is_match(node->generic.next, orig, *next, next);
- }
+ if (node == NULL) {
+ *next = cur;
+ return true;
+ } else {
+ return ((node->generic.match)(node, orig, cur, next)) &&
+ _rx_is_match(node->generic.next, orig, *next, next);
+ }
}
static bool _rx_char_is_match(cregex_node *node, const char *orig, const char *cur,
- const char **next)
+ const char **next)
{
- if (*cur == 0) {
- return false;
- }
+ if (*cur == 0) {
+ return false;
+ }
- *next = _rx_utf8_next(cur);
- return node->chr.chr == _rx_utf8_peek(cur);
+ *next = _rx_utf8_next(cur);
+ return node->chr.chr == _rx_utf8_peek(cur);
}
static bool _rx_start_is_match(cregex_node *node, const char *orig, const char *cur,
- const char **next)
+ const char **next)
{
- *next = cur;
- return true;
+ *next = cur;
+ return true;
}
static bool _rx_anchor_begin_is_match(cregex_node *node, const char *orig,
- const char *cur, const char **next)
+ const char *cur, const char **next)
{
- *next = cur;
- return strlen(orig) == strlen(cur);
+ *next = cur;
+ return strlen(orig) == strlen(cur);
}
static bool _rx_anchor_end_is_match(cregex_node *node, const char *orig,
- const char *cur, const char **next)
+ const char *cur, const char **next)
{
- *next = cur;
- return strlen(cur) == 0;
+ *next = cur;
+ return strlen(cur) == 0;
}
static bool _rx_any_is_match(cregex_node *node, const char *orig, const char *cur,
- const char **next)
+ const char **next)
{
- if (*cur) {
- *next = _rx_utf8_next(cur);
- return true;
- }
+ if (*cur) {
+ *next = _rx_utf8_next(cur);
+ return true;
+ }
- return false;
+ return false;
}
static bool _rx_quant_is_match(cregex_node *node, const char *orig, const char *cur,
- const char **next)
+ const char **next)
{
- _rx_QuantNode *quant = (_rx_QuantNode *)node;
- size_t matches = 0;
+ _rx_QuantNode *quant = (_rx_QuantNode *)node;
+ size_t matches = 0;
- while (_rx_is_match(quant->subexp, orig, cur, next)) {
- matches++;
- cur = *next;
+ while (_rx_is_match(quant->subexp, orig, cur, next)) {
+ matches++;
+ cur = *next;
- if (matches >= quant->max)
- break;
- }
+ if (matches >= quant->max)
+ break;
+ }
- *next = cur;
- return matches >= quant->min;
+ *next = cur;
+ return matches >= quant->min;
}
static bool _rx_class_is_match(cregex_node *node, const char *orig, const char *cur,
- const char **next)
+ const char **next)
{
- _rx_ClassNode *cls = (_rx_ClassNode *)node;
+ _rx_ClassNode *cls = (_rx_ClassNode *)node;
- if (*cur == 0)
- return false;
+ if (*cur == 0)
+ return false;
- const uint32_t chr = _rx_utf8_peek(cur);
- *next = _rx_utf8_next(cur);
+ const uint32_t chr = _rx_utf8_peek(cur);
+ *next = _rx_utf8_next(cur);
- bool found = false;
- for (_rx_RangeNode *range = cls->ranges; range != NULL;
- range = (_rx_RangeNode *)range->generic.next) {
- if (chr >= range->first && chr <= range->last) {
- found = true;
- break;
- }
- }
+ bool found = false;
+ for (_rx_RangeNode *range = cls->ranges; range != NULL;
+ range = (_rx_RangeNode *)range->generic.next) {
+ if (chr >= range->first && chr <= range->last) {
+ found = true;
+ break;
+ }
+ }
- if (cls->negate)
- found = !found;
+ if (cls->negate)
+ found = !found;
- return found;
+ return found;
}
static bool _rx_cap_is_match(cregex_node *node, const char *orig, const char *cur,
- const char **next)
+ const char **next)
{
- _rx_CapNode *cap = (_rx_CapNode *)node;
+ _rx_CapNode *cap = (_rx_CapNode *)node;
- if (_rx_is_match(cap->subexp, orig, cur, next)) {
- cap->cap.match_begin = cur - orig;
- cap->cap.match_end = (*next) - orig;
- return true;
- }
+ if (_rx_is_match(cap->subexp, orig, cur, next)) {
+ cap->cap.start = cur - orig;
+ cap->cap.end = (*next) - orig;
+ return true;
+ }
- return false;
+ return false;
}
static bool _rx_or_is_match(cregex_node *node, const char *orig, const char *cur,
- const char **next)
+ const char **next)
{
- _rx_OrNode *ior = (_rx_OrNode *)node;
+ _rx_OrNode *ior = (_rx_OrNode *)node;
- if (ior->generic.next != NULL) {
- ior->right = ior->generic.next;
- ior->generic.next = NULL;
- }
+ if (ior->generic.next != NULL) {
+ ior->right = ior->generic.next;
+ ior->generic.next = NULL;
+ }
- if (_rx_is_match(ior->left, orig, cur, next) && ior->left != NULL) {
- return true;
- }
+ if (_rx_is_match(ior->left, orig, cur, next) && ior->left != NULL) {
+ return true;
+ }
- return _rx_is_match(ior->right, orig, cur, next) && ior->right != NULL;
+ return _rx_is_match(ior->right, orig, cur, next) && ior->right != NULL;
}
/* Global error value with callback address */
struct {
- cregex_error_t err;
- const char *s;
- jmp_buf buf;
+ cregex_error_t err;
+ const char *s;
+ jmp_buf buf;
} _rx_CompileException;
/* set global error value to the default value */
static inline void clear_compile_exception(void)
{
- _rx_CompileException.err = cregex_OK;
- _rx_CompileException.s = NULL;
+ _rx_CompileException.err = cregex_OK;
+ _rx_CompileException.s = NULL;
}
/* set global error value and jump back to the exception handler */
static void _rx_throw_compile_exception(cregex_error_t err, const char *s)
{
- _rx_CompileException.err = err;
- _rx_CompileException.s = s;
- longjmp(_rx_CompileException.buf, 1);
+ _rx_CompileException.err = err;
+ _rx_CompileException.s = s;
+ longjmp(_rx_CompileException.buf, 1);
}
static size_t _rx_calc_compiled_escaped_len(const char *s, const char **leftover)
{
- if (*s == 0)
- _rx_throw_compile_exception(cregex_UNEXPECTED_EOL, s);
+ if (*s == 0)
+ _rx_throw_compile_exception(cregex_UNEXPECTED_EOL, s);
- const uint32_t chr = _rx_utf8_peek(s);
- *leftover = _rx_utf8_next(s);
+ const uint32_t chr = _rx_utf8_peek(s);
+ *leftover = _rx_utf8_next(s);
- switch (chr) {
- case 's':
- return 5;
+ switch (chr) {
+ case 's':
+ return 5;
- case 'S':
- return 5;
+ case 'S':
+ return 5;
- case 'd':
- return 2;
+ case 'd':
+ return 2;
- case 'D':
- return 2;
+ case 'D':
+ return 2;
- case 'w':
- return 5;
+ case 'w':
+ return 5;
- case 'W':
- return 5;
+ case 'W':
+ return 5;
- default:
- return 1;
- }
+ default:
+ return 1;
+ }
}
static const size_t _rx_calc_compiled_class_len(const char *s,
- const char **leftover)
+ const char **leftover)
{
- if (*s == '^')
- s++;
-
- size_t ret = 1;
-
- while (*s && *s != ']') {
- uint32_t chr = _rx_utf8_peek(s);
- s = _rx_utf8_next(s);
- if (chr == '\\') {
- s = _rx_utf8_next(s);
- }
-
- if (*s == '-' && s[1] != ']') {
- s++;
- chr = _rx_utf8_peek(s);
- s = _rx_utf8_next(s);
-
- if (chr == '\\')
- s = _rx_utf8_next(s);
- }
-
- ret++;
- }
-
- if (*s == ']') {
- s++;
- *leftover = s;
- } else {
- _rx_throw_compile_exception(cregex_INVALID_COMPLEX_CLASS, s);
- }
-
- return ret;
+ if (*s == '^')
+ s++;
+
+ size_t ret = 1;
+
+ while (*s && *s != ']') {
+ uint32_t chr = _rx_utf8_peek(s);
+ s = _rx_utf8_next(s);
+ if (chr == '\\') {
+ s = _rx_utf8_next(s);
+ }
+
+ if (*s == '-' && s[1] != ']') {
+ s++;
+ chr = _rx_utf8_peek(s);
+ s = _rx_utf8_next(s);
+
+ if (chr == '\\')
+ s = _rx_utf8_next(s);
+ }
+
+ ret++;
+ }
+
+ if (*s == ']') {
+ s++;
+ *leftover = s;
+ } else {
+ _rx_throw_compile_exception(cregex_INVALID_COMPLEX_CLASS, s);
+ }
+
+ return ret;
}
/* get required amount of memory in amount of nodes
* to _rx_compile regular expressions */
static const size_t _rx_calc_compiled_len(const char *s)
{
- if (*s == 0) {
- return 1;
- } else {
- const uint32_t chr = _rx_utf8_peek(s);
- size_t ret = 0;
- s = _rx_utf8_next(s);
-
- switch (chr) {
- case '{': {
- const char *end = strstr(s, "}");
-
- if (end == NULL)
- _rx_throw_compile_exception(
- cregex_INVALID_COMPLEX_QUANT, s);
-
- s = end + 1;
- ret = 1;
- break;
- }
-
- case '\\':
- ret = _rx_calc_compiled_escaped_len(s, &s);
- break;
-
- case '[':
- ret = _rx_calc_compiled_class_len(s, &s);
- break;
-
- default:
- ret = 1;
- break;
- }
-
- return ret + _rx_calc_compiled_len(s);
- }
+ if (*s == 0) {
+ return 1;
+ } else {
+ const uint32_t chr = _rx_utf8_peek(s);
+ size_t ret = 0;
+ s = _rx_utf8_next(s);
+
+ switch (chr) {
+ case '{': {
+ const char *end = strstr(s, "}");
+
+ if (end == NULL)
+ _rx_throw_compile_exception(
+ cregex_INVALID_COMPLEX_QUANT, s);
+
+ s = end + 1;
+ ret = 1;
+ break;
+ }
+
+ case '\\':
+ ret = _rx_calc_compiled_escaped_len(s, &s);
+ break;
+
+ case '[':
+ ret = _rx_calc_compiled_class_len(s, &s);
+ break;
+
+ default:
+ ret = 1;
+ break;
+ }
+
+ return ret + _rx_calc_compiled_len(s);
+ }
}
static void _rx_append_quant(cregex_node **prev, cregex_node *cur, size_t min,
- size_t max, const char *re)
+ size_t max, const char *re)
{
- cur->generic.match = _rx_quant_is_match;
- cur->generic.next = NULL;
- cur->generic.prev = NULL;
+ cur->generic.match = _rx_quant_is_match;
+ cur->generic.next = NULL;
+ cur->generic.prev = NULL;
- cur->quant.max = max;
- cur->quant.min = min;
- cur->quant.subexp = *prev;
+ cur->quant.max = max;
+ cur->quant.min = min;
+ cur->quant.subexp = *prev;
- *prev = (*prev)->generic.prev;
- if (*prev == NULL)
- _rx_throw_compile_exception(cregex_EARLY_QUANTIFIER, re);
+ *prev = (*prev)->generic.prev;
+ if (*prev == NULL)
+ _rx_throw_compile_exception(cregex_EARLY_QUANTIFIER, re);
- cur->quant.subexp->generic.next = NULL;
- cur->quant.subexp->generic.prev = NULL;
+ cur->quant.subexp->generic.next = NULL;
+ cur->quant.subexp->generic.prev = NULL;
}
static inline bool _rx_is_digit(uint32_t c)
{
- return c >= '0' && c <= '9';
+ return c >= '0' && c <= '9';
}
static size_t _rx_parse_digit(const char *s, const char **leftover)
{
- size_t ret = 0;
-
- while (*s) {
- uint32_t chr = _rx_utf8_peek(s);
-
- if (_rx_is_digit(chr)) {
- ret *= 10;
- ret += chr - '0';
- s = _rx_utf8_next(s);
- } else {
- break;
- }
- }
-
- *leftover = s;
- return ret;
+ size_t ret = 0;
+
+ while (*s) {
+ uint32_t chr = _rx_utf8_peek(s);
+
+ if (_rx_is_digit(chr)) {
+ ret *= 10;
+ ret += chr - '0';
+ s = _rx_utf8_next(s);
+ } else {
+ break;
+ }
+ }
+
+ *leftover = s;
+ return ret;
}
/* parse complex quantifier of format {m,n}
* valid formats: {,} {m,} {,n} {m} {m,n} */
static void _rx_parse_complex_quant(const char *re, const char **leftover,
- size_t *min_p, size_t *max_p)
+ size_t *min_p, size_t *max_p)
{
- if (*re == 0)
- _rx_throw_compile_exception(cregex_INVALID_COMPLEX_QUANT, re);
-
- uint32_t tmp = _rx_utf8_peek(re);
- size_t min = 0, max = SIZE_MAX;
-
- if (_rx_is_digit(tmp)) {
- min = _rx_parse_digit(re, &re);
- } else if (tmp != ',') {
- _rx_throw_compile_exception(cregex_INVALID_COMPLEX_QUANT, re);
- }
-
- tmp = _rx_utf8_peek(re);
-
- if (tmp == ',') {
- re = _rx_utf8_next(re);
- if (_rx_is_digit(_rx_utf8_peek(re)))
- max = _rx_parse_digit(re, &re);
- else
- max = SIZE_MAX;
- } else {
- max = min;
- }
-
- tmp = _rx_utf8_peek(re);
- if (tmp == '}') {
- *leftover = re + 1;
- *min_p = min;
- *max_p = max;
- } else {
- _rx_throw_compile_exception(cregex_INVALID_COMPLEX_QUANT, re);
- }
+ if (*re == 0)
+ _rx_throw_compile_exception(cregex_INVALID_COMPLEX_QUANT, re);
+
+ uint32_t tmp = _rx_utf8_peek(re);
+ size_t min = 0, max = SIZE_MAX;
+
+ if (_rx_is_digit(tmp)) {
+ min = _rx_parse_digit(re, &re);
+ } else if (tmp != ',') {
+ _rx_throw_compile_exception(cregex_INVALID_COMPLEX_QUANT, re);
+ }
+
+ tmp = _rx_utf8_peek(re);
+
+ if (tmp == ',') {
+ re = _rx_utf8_next(re);
+ if (_rx_is_digit(_rx_utf8_peek(re)))
+ max = _rx_parse_digit(re, &re);
+ else
+ max = SIZE_MAX;
+ } else {
+ max = min;
+ }
+
+ tmp = _rx_utf8_peek(re);
+ if (tmp == '}') {
+ *leftover = re + 1;
+ *min_p = min;
+ *max_p = max;
+ } else {
+ _rx_throw_compile_exception(cregex_INVALID_COMPLEX_QUANT, re);
+ }
}
/* append character class to linked list of nodes with
@@ -574,487 +574,486 @@ static void _rx_parse_complex_quant(const char *re, const char **leftover,
* to next */
static cregex_node *_rx_append_class(cregex_node *cur, bool negate, size_t n, ...)
{
- cur->cls.negate = negate;
- cur->cls.ranges = (_rx_RangeNode *)(n ? cur + 1 : NULL);
- cur->generic.match = _rx_class_is_match;
- cur->generic.next = NULL;
- cur->generic.prev = NULL;
+ cur->cls.negate = negate;
+ cur->cls.ranges = (_rx_RangeNode *)(n ? cur + 1 : NULL);
+ cur->generic.match = _rx_class_is_match;
+ cur->generic.next = NULL;
+ cur->generic.prev = NULL;
- va_list ap;
- va_start(ap, n);
- cregex_node *prev = NULL;
- cur = cur + 1;
+ va_list ap;
+ va_start(ap, n);
+ cregex_node *prev = NULL;
+ cur = cur + 1;
- for (size_t i = 0; i < n; ++i) {
- const uint32_t first = va_arg(ap, uint32_t);
- const uint32_t last = va_arg(ap, uint32_t);
+ for (size_t i = 0; i < n; ++i) {
+ const uint32_t first = va_arg(ap, uint32_t);
+ const uint32_t last = va_arg(ap, uint32_t);
- cur->generic.next = NULL;
- cur->generic.prev = prev;
+ cur->generic.next = NULL;
+ cur->generic.prev = prev;
- if (prev)
- prev->generic.next = cur;
+ if (prev)
+ prev->generic.next = cur;
- cur->range.first = first;
- cur->range.last = last;
+ cur->range.first = first;
+ cur->range.last = last;
- prev = cur;
- cur = cur + 1;
- }
+ prev = cur;
+ cur = cur + 1;
+ }
- va_end(ap);
+ va_end(ap);
- return cur;
+ return cur;
}
/** _rx_compile escaped characters. return pointer to the next free node. */
static cregex_node *_rx_compile_next_escaped(const char *re, const char **leftover,
- cregex_node *cur)
+ cregex_node *cur)
{
- if (*re == 0)
- _rx_throw_compile_exception(cregex_UNEXPECTED_EOL, re);
-
- const uint32_t chr = _rx_utf8_peek(re);
- *leftover = _rx_utf8_next(re);
- cregex_node *ret = cur + 1;
-
- switch (chr) {
- case 'n':
- cur->chr.chr = '\n';
- cur->generic.match = _rx_char_is_match;
- break;
-
- case 't':
- cur->chr.chr = '\t';
- cur->generic.match = _rx_char_is_match;
- break;
-
- case 'r':
- cur->chr.chr = '\r';
- cur->generic.match = _rx_char_is_match;
- break;
-
- case 's':
- ret = _rx_append_class(cur, false, 4, ' ', ' ', '\t', '\t', '\r',
- '\r', '\n', '\n');
- break;
-
- case 'S':
- ret = _rx_append_class(cur, true, 4, ' ', ' ', '\t', '\t', '\r',
- '\r', '\n', '\n');
- break;
-
- case 'w':
- ret = _rx_append_class(cur, false, 4, 'a', 'z', 'A', 'Z', '0', '9',
- '_', '_');
- break;
-
- case 'W':
- ret = _rx_append_class(cur, true, 4, 'a', 'z', 'A', 'Z', '0', '9',
- '_', '_');
- break;
-
- case 'd':
- ret = _rx_append_class(cur, false, 1, '0', '9');
- break;
-
- case 'D':
- ret = _rx_append_class(cur, true, 1, '0', '9');
- break;
-
- default:
- cur->chr.chr = chr;
- cur->generic.match = _rx_char_is_match;
- break;
- }
-
- return ret;
+ if (*re == 0)
+ _rx_throw_compile_exception(cregex_UNEXPECTED_EOL, re);
+
+ const uint32_t chr = _rx_utf8_peek(re);
+ *leftover = _rx_utf8_next(re);
+ cregex_node *ret = cur + 1;
+
+ switch (chr) {
+ case 'n':
+ cur->chr.chr = '\n';
+ cur->generic.match = _rx_char_is_match;
+ break;
+
+ case 't':
+ cur->chr.chr = '\t';
+ cur->generic.match = _rx_char_is_match;
+ break;
+
+ case 'r':
+ cur->chr.chr = '\r';
+ cur->generic.match = _rx_char_is_match;
+ break;
+
+ case 's':
+ ret = _rx_append_class(cur, false, 4, ' ', ' ', '\t', '\t', '\r',
+ '\r', '\n', '\n');
+ break;
+
+ case 'S':
+ ret = _rx_append_class(cur, true, 4, ' ', ' ', '\t', '\t', '\r',
+ '\r', '\n', '\n');
+ break;
+
+ case 'w':
+ ret = _rx_append_class(cur, false, 4, 'a', 'z', 'A', 'Z', '0', '9',
+ '_', '_');
+ break;
+
+ case 'W':
+ ret = _rx_append_class(cur, true, 4, 'a', 'z', 'A', 'Z', '0', '9',
+ '_', '_');
+ break;
+
+ case 'd':
+ ret = _rx_append_class(cur, false, 1, '0', '9');
+ break;
+
+ case 'D':
+ ret = _rx_append_class(cur, true, 1, '0', '9');
+ break;
+
+ default:
+ cur->chr.chr = chr;
+ cur->generic.match = _rx_char_is_match;
+ break;
+ }
+
+ return ret;
}
static cregex_node *_rx_compile_next_complex_class(const char *re,
- const char **leftover,
- cregex_node *cur)
+ const char **leftover,
+ cregex_node *cur)
{
- cur->generic.match = _rx_class_is_match;
- cur->generic.next = NULL;
- cur->generic.prev = NULL;
-
- if (*re == '^') {
- re++;
- cur->cls.negate = true;
- } else {
- cur->cls.negate = false;
- }
-
- cur->cls.ranges = NULL;
-
- cur = cur + 1;
- cregex_node *prev = NULL;
-
- while (*re && *re != ']') {
- uint32_t first = 0, last = 0;
-
- first = _rx_utf8_peek(re);
- re = _rx_utf8_next(re);
- if (first == '\\') {
- if (*re == 0)
- _rx_throw_compile_exception(
- cregex_INVALID_COMPLEX_CLASS, re);
-
- first = _rx_utf8_peek(re);
- re = _rx_utf8_next(re);
- }
-
- if (*re == '-' && re[1] != ']' && re[1]) {
- re++;
- last = _rx_utf8_peek(re);
- re = _rx_utf8_next(re);
-
- if (last == '\\') {
- if (*re == 0)
- _rx_throw_compile_exception(
- cregex_INVALID_COMPLEX_CLASS,
- re);
-
- last = _rx_utf8_peek(re);
- re = _rx_utf8_next(re);
- }
- } else {
- last = first;
- }
-
- cur->range.first = first;
- cur->range.last = last;
- cur->generic.prev = prev;
- cur->generic.next = NULL;
-
- if (prev == NULL) {
- (cur - 1)->cls.ranges = (_rx_RangeNode *)cur;
- } else {
- prev->generic.next = cur;
- }
-
- prev = cur;
- cur++;
- }
-
- if (*re == ']') {
- *leftover = re + 1;
- return cur;
- } else {
- _rx_throw_compile_exception(cregex_INVALID_COMPLEX_CLASS, re);
- return NULL; // Unreachable
- }
+ cur->generic.match = _rx_class_is_match;
+ cur->generic.next = NULL;
+ cur->generic.prev = NULL;
+
+ if (*re == '^') {
+ re++;
+ cur->cls.negate = true;
+ } else {
+ cur->cls.negate = false;
+ }
+
+ cur->cls.ranges = NULL;
+
+ cur = cur + 1;
+ cregex_node *prev = NULL;
+
+ while (*re && *re != ']') {
+ uint32_t first = 0, last = 0;
+
+ first = _rx_utf8_peek(re);
+ re = _rx_utf8_next(re);
+ if (first == '\\') {
+ if (*re == 0)
+ _rx_throw_compile_exception(
+ cregex_INVALID_COMPLEX_CLASS, re);
+
+ first = _rx_utf8_peek(re);
+ re = _rx_utf8_next(re);
+ }
+
+ if (*re == '-' && re[1] != ']' && re[1]) {
+ re++;
+ last = _rx_utf8_peek(re);
+ re = _rx_utf8_next(re);
+
+ if (last == '\\') {
+ if (*re == 0)
+ _rx_throw_compile_exception(
+ cregex_INVALID_COMPLEX_CLASS,
+ re);
+
+ last = _rx_utf8_peek(re);
+ re = _rx_utf8_next(re);
+ }
+ } else {
+ last = first;
+ }
+
+ cur->range.first = first;
+ cur->range.last = last;
+ cur->generic.prev = prev;
+ cur->generic.next = NULL;
+
+ if (prev == NULL) {
+ (cur - 1)->cls.ranges = (_rx_RangeNode *)cur;
+ } else {
+ prev->generic.next = cur;
+ }
+
+ prev = cur;
+ cur++;
+ }
+
+ if (*re == ']') {
+ *leftover = re + 1;
+ return cur;
+ } else {
+ _rx_throw_compile_exception(cregex_INVALID_COMPLEX_CLASS, re);
+ return NULL; // Unreachable
+ }
}
static const char *_rx_find_closing_par(const char *s)
{
- size_t level = 1;
-
- for (; *s && level != 0; ++s) {
- if (*s == '\\')
- s++;
- else if (*s == '(')
- level++;
- else if (*s == ')')
- level--;
- }
-
- if (level == 0)
- return s;
- else
- return NULL;
+ size_t level = 1;
+
+ for (; *s && level != 0; ++s) {
+ if (*s == '\\')
+ s++;
+ else if (*s == '(')
+ level++;
+ else if (*s == ')')
+ level--;
+ }
+
+ if (level == 0)
+ return s;
+ else
+ return NULL;
}
static cregex_node *_rx_compile(const char *re, const char *end, cregex_node *nodes);
static cregex_node *_rx_compile_next_cap(const char *re, const char **leftover,
- cregex_node *cur)
+ cregex_node *cur)
{
- cur->cap.cap.match_begin = 0;
- cur->cap.cap.match_end = 0;
- cur->cap.subexp = cur + 1;
- cur->generic.next = NULL;
- cur->generic.prev = NULL;
- cur->generic.match = _rx_cap_is_match;
+ cur->cap.cap.start = 0;
+ cur->cap.cap.end = 0;
+ cur->cap.subexp = cur + 1;
+ cur->generic.next = NULL;
+ cur->generic.prev = NULL;
+ cur->generic.match = _rx_cap_is_match;
- const char *end = _rx_find_closing_par(re);
+ const char *end = _rx_find_closing_par(re);
- if (end == NULL)
- _rx_throw_compile_exception(cregex_UNCLOSED_SUBEXPRESSION, re);
+ if (end == NULL)
+ _rx_throw_compile_exception(cregex_UNCLOSED_SUBEXPRESSION, re);
- *leftover = end;
- return _rx_compile(re, end - 1, cur + 1);
+ *leftover = end;
+ return _rx_compile(re, end - 1, cur + 1);
}
static cregex_node *insert_or(cregex_node *cur, cregex_node **prev) {
- cur->generic.match = _rx_or_is_match;
- cur->generic.next = NULL;
- cur->generic.prev = NULL;
+ cur->generic.match = _rx_or_is_match;
+ cur->generic.next = NULL;
+ cur->generic.prev = NULL;
- // Find last start node
- cregex_node *begin = *prev;
+ // Find last start node
+ cregex_node *begin = *prev;
- while (begin->generic.match != _rx_start_is_match) {
- begin = begin->generic.prev;
- }
+ while (begin->generic.match != _rx_start_is_match) {
+ begin = begin->generic.prev;
+ }
- cur->ior.left = begin->generic.next;
- *prev = begin;
+ cur->ior.left = begin->generic.next;
+ *prev = begin;
- return cur + 1;
+ return cur + 1;
}
/* _rx_compile next node. returns address of next available node.
* returns NULL if re is empty */
static cregex_node *_rx_compile_next(const char *re, const char **leftover,
- cregex_node *prev, cregex_node *cur)
+ cregex_node *prev, cregex_node *cur)
{
- if (*re == 0)
- return NULL;
-
- const uint32_t chr = _rx_utf8_peek(re);
- re = _rx_utf8_next(re);
- cregex_node *next = cur + 1;
-
- switch (chr) {
- case '^':
- cur->generic.match = _rx_anchor_begin_is_match;
- break;
-
- case '$':
- cur->generic.match = _rx_anchor_end_is_match;
- break;
-
- case '.':
- cur->generic.match = _rx_any_is_match;
- break;
-
- case '*':
- _rx_append_quant(&prev, cur, 0, SIZE_MAX, re);
- break;
-
- case '+':
- _rx_append_quant(&prev, cur, 1, SIZE_MAX, re);
- break;
-
- case '?':
- _rx_append_quant(&prev, cur, 0, 1, re);
- break;
-
- case '{': {
- size_t min = 0, max = SIZE_MAX;
- const char *leftover = NULL;
- _rx_parse_complex_quant(re, &leftover, &min, &max);
-
- _rx_append_quant(&prev, cur, min, max, re);
- re = leftover;
- break;
- }
-
- case '[':
- next = _rx_compile_next_complex_class(re, &re, cur);
- break;
-
- case '(':
- next = _rx_compile_next_cap(re, &re, cur);
- break;
-
- case '\\':
- next = _rx_compile_next_escaped(re, &re, cur);
- break;
-
- case '|':
- next = insert_or(cur, &prev);
- break;
-
- default:
- cur->chr.chr = chr;
- cur->generic.match = _rx_char_is_match;
- break;
- }
-
- cur->generic.next = NULL;
- cur->generic.prev = prev;
- prev->generic.next = cur;
- *leftover = re;
-
- return next;
+ if (*re == 0)
+ return NULL;
+
+ const uint32_t chr = _rx_utf8_peek(re);
+ re = _rx_utf8_next(re);
+ cregex_node *next = cur + 1;
+
+ switch (chr) {
+ case '^':
+ cur->generic.match = _rx_anchor_begin_is_match;
+ break;
+
+ case '$':
+ cur->generic.match = _rx_anchor_end_is_match;
+ break;
+
+ case '.':
+ cur->generic.match = _rx_any_is_match;
+ break;
+
+ case '*':
+ _rx_append_quant(&prev, cur, 0, SIZE_MAX, re);
+ break;
+
+ case '+':
+ _rx_append_quant(&prev, cur, 1, SIZE_MAX, re);
+ break;
+
+ case '?':
+ _rx_append_quant(&prev, cur, 0, 1, re);
+ break;
+
+ case '{': {
+ size_t min = 0, max = SIZE_MAX;
+ const char *leftover = NULL;
+ _rx_parse_complex_quant(re, &leftover, &min, &max);
+
+ _rx_append_quant(&prev, cur, min, max, re);
+ re = leftover;
+ break;
+ }
+
+ case '[':
+ next = _rx_compile_next_complex_class(re, &re, cur);
+ break;
+
+ case '(':
+ next = _rx_compile_next_cap(re, &re, cur);
+ break;
+
+ case '\\':
+ next = _rx_compile_next_escaped(re, &re, cur);
+ break;
+
+ case '|':
+ next = insert_or(cur, &prev);
+ break;
+
+ default:
+ cur->chr.chr = chr;
+ cur->generic.match = _rx_char_is_match;
+ break;
+ }
+
+ cur->generic.next = NULL;
+ cur->generic.prev = prev;
+ prev->generic.next = cur;
+ *leftover = re;
+
+ return next;
}
/* _rx_compile raw regular expression into a linked list of nodes. return leftover nodes */
static cregex_node *_rx_compile(const char *re, const char *end, cregex_node *nodes)
{
- cregex_node *prev = nodes;
- cregex_node *cur = nodes + 1;
+ cregex_node *prev = nodes;
+ cregex_node *cur = nodes + 1;
- prev->generic.next = NULL;
- prev->generic.prev = NULL;
- prev->generic.match = _rx_start_is_match;
+ prev->generic.next = NULL;
+ prev->generic.prev = NULL;
+ prev->generic.match = _rx_start_is_match;
- while (cur != NULL && re != NULL && re < end) {
- const char *next = NULL;
- cregex_node *next_node = _rx_compile_next(re, &next, prev, cur);
+ while (cur != NULL && re != NULL && re < end) {
+ const char *next = NULL;
+ cregex_node *next_node = _rx_compile_next(re, &next, prev, cur);
- prev = cur;
- cur = next_node;
- re = next;
- }
+ prev = cur;
+ cur = next_node;
+ re = next;
+ }
- return cur;
+ return cur;
}
STC_DEF cregex cregex_new(const char *re)
{
cregex ret = {NULL};
- clear_compile_exception();
- if (re == NULL) {
- _rx_CompileException.err = cregex_INVALID_PARAMS;
- return ret;
- }
+ clear_compile_exception();
+ if (re == NULL) {
+ _rx_CompileException.err = cregex_INVALID_PARAMS;
+ return ret;
+ }
- if (!cregex_valid_utf8(re)) {
- _rx_CompileException.err = cregex_INVALID_UTF8;
- _rx_CompileException.s = NULL;
- return ret;
- }
+ if (!cregex_valid_utf8(re)) {
+ _rx_CompileException.err = cregex_INVALID_UTF8;
+ _rx_CompileException.s = NULL;
+ return ret;
+ }
- cregex_node *nodes = NULL;
+ cregex_node *nodes = NULL;
- if (setjmp(_rx_CompileException.buf)) {
- // Error callback
- free(nodes);
- return ret;
- }
+ if (setjmp(_rx_CompileException.buf)) {
+ // Error callback
+ free(nodes);
+ return ret;
+ }
- const size_t compile_len = _rx_calc_compiled_len(re);
- nodes = (cregex_node *)calloc(compile_len, sizeof(cregex_node));
- _rx_compile(re, re + strlen(re), nodes);
- ret.nodes = nodes;
+ const size_t compile_len = _rx_calc_compiled_len(re);
+ nodes = (cregex_node *)calloc(compile_len, sizeof(cregex_node));
+ _rx_compile(re, re + strlen(re), nodes);
+ ret.nodes = nodes;
- return ret;
+ return ret;
}
STC_DEF cregex_error_t cregex_error(void)
{
- return _rx_CompileException.err;
+ return _rx_CompileException.err;
}
STC_DEF bool cregex_is_match(cregex re, const char *s)
{
- const char *next = NULL;
- return _rx_is_match(re.nodes, s, s, &next);
+ const char *next = NULL;
+ return _rx_is_match(re.nodes, s, s, &next);
}
STC_DEF bool cregex_find(cregex re, const char *s, cregex_match *m)
{
- m->match_begin = SIZE_MAX;
- m->match_end = SIZE_MAX;
-
- for (const char *tmp_s = s; *tmp_s; tmp_s = _rx_utf8_next(tmp_s)) {
- const char *next = NULL;
- if (_rx_is_match(re.nodes, s, tmp_s, &next)) {
- m->match_begin = tmp_s - s;
- m->match_end = next - s;
- return true;
- }
- }
-
- return false;
+ m->start = SIZE_MAX;
+ m->end = SIZE_MAX;
+
+ for (const char *tmp_s = s; *tmp_s; tmp_s = _rx_utf8_next(tmp_s)) {
+ const char *next = NULL;
+ if (_rx_is_match(re.nodes, s, tmp_s, &next)) {
+ m->start = tmp_s - s;
+ m->end = next - s;
+ return true;
+ }
+ }
+
+ return false;
}
STC_DEF void cregex_drop(cregex *re)
{
- free(re->nodes);
+ free(re->nodes);
}
STC_DEF cregex_result cregex_find_all(cregex re, const char *s)
{
- cregex_result matches = cregex_result_init();
- size_t offset = 0;
-
- const char *s_end = s + strlen(s);
- while (s < s_end) {
- cregex_match tmp;
- if (cregex_find(re, s, &tmp)) {
- size_t end = tmp.match_end;
- s += end;
- tmp.match_begin += offset;
- tmp.match_end += offset;
-
- offset += end;
- cregex_result_push(&matches, tmp);
- } else {
- break;
- }
- }
-
- return matches;
+ cregex_result matches = cregex_result_init();
+ size_t offset = 0;
+
+ const char *s_end = s + strlen(s);
+ while (s < s_end) {
+ cregex_match tmp;
+ if (cregex_find(re, s, &tmp)) {
+ size_t end = tmp.end;
+ s += end;
+ tmp.start += offset;
+ tmp.end += offset;
+
+ offset += end;
+ cregex_result_push(&matches, tmp);
+ } else {
+ break;
+ }
+ }
+
+ return matches;
}
/* calculate amount of capture groups
* inside a regular expression */
static size_t _rx_cap_node_count(cregex_node *nodes)
{
- if (nodes == NULL) {
- return 0;
- } else if (nodes->generic.match == _rx_quant_is_match) {
- return _rx_cap_node_count(nodes->quant.subexp) +
- _rx_cap_node_count(nodes->generic.next);
- } else if (nodes->generic.match == _rx_cap_is_match) {
- return _rx_cap_node_count(nodes->quant.subexp) +
- _rx_cap_node_count(nodes->generic.next) + 1;
- } else {
- return _rx_cap_node_count(nodes->generic.next);
- }
+ if (nodes == NULL) {
+ return 0;
+ } else if (nodes->generic.match == _rx_quant_is_match) {
+ return _rx_cap_node_count(nodes->quant.subexp) +
+ _rx_cap_node_count(nodes->generic.next);
+ } else if (nodes->generic.match == _rx_cap_is_match) {
+ return _rx_cap_node_count(nodes->quant.subexp) +
+ _rx_cap_node_count(nodes->generic.next) + 1;
+ } else {
+ return _rx_cap_node_count(nodes->generic.next);
+ }
}
STC_DEF size_t cregex_captures_len(cregex re)
{
- return _rx_cap_node_count(re.nodes);
+ return _rx_cap_node_count(re.nodes);
}
static cregex_node *_rx_find_capture_node(cregex_node *node, size_t index)
{
- if (node == NULL) {
- return NULL;
- } else if (node->generic.match == _rx_cap_is_match) {
- if (index == 0) {
- return node;
- } else {
- const size_t subexp_len =
- _rx_cap_node_count(node->cap.subexp);
- if (index <= subexp_len) {
- return _rx_find_capture_node(node->cap.subexp,
- index - subexp_len);
- } else {
- return _rx_find_capture_node(node->generic.next,
- index - 1 -
- subexp_len);
- }
- }
- } else if (node->generic.match == _rx_quant_is_match) {
- const size_t subexp_len = _rx_cap_node_count(node->quant.subexp);
- if (index < subexp_len) {
- return _rx_find_capture_node(node->quant.subexp, index);
- } else {
- return _rx_find_capture_node(node->generic.next, index);
- }
- } else {
- return _rx_find_capture_node(node->generic.next, index);
- }
+ if (node == NULL) {
+ return NULL;
+ } else if (node->generic.match == _rx_cap_is_match) {
+ if (index == 0) {
+ return node;
+ } else {
+ const size_t subexp_len =
+ _rx_cap_node_count(node->cap.subexp);
+ if (index <= subexp_len) {
+ return _rx_find_capture_node(node->cap.subexp,
+ index - subexp_len);
+ } else {
+ return _rx_find_capture_node(node->generic.next,
+ index - 1 - subexp_len);
+ }
+ }
+ } else if (node->generic.match == _rx_quant_is_match) {
+ const size_t subexp_len = _rx_cap_node_count(node->quant.subexp);
+ if (index < subexp_len) {
+ return _rx_find_capture_node(node->quant.subexp, index);
+ } else {
+ return _rx_find_capture_node(node->generic.next, index);
+ }
+ } else {
+ return _rx_find_capture_node(node->generic.next, index);
+ }
}
STC_DEF cregex_match cregex_capture(cregex re, size_t index)
{
- _rx_CapNode *cap = (_rx_CapNode *)_rx_find_capture_node(re.nodes, index);
+ _rx_CapNode *cap = (_rx_CapNode *)_rx_find_capture_node(re.nodes, index);
- if (cap == NULL) {
- return c_make(cregex_match){0, 0};
- }
+ if (cap == NULL) {
+ return c_make(cregex_match){0, 0};
+ }
- return cap->cap;
+ return cap->cap;
}
#endif