summaryrefslogtreecommitdiffhomepage
path: root/src/cregex.c
diff options
context:
space:
mode:
author_Tradam <[email protected]>2023-09-08 01:29:47 +0000
committerGitHub <[email protected]>2023-09-08 01:29:47 +0000
commit3c76c7f3d5db3f9586a90d03f8fbb02d79de9acd (patch)
treeafbe4b540967223911f7c5de36559b82154f02f3 /src/cregex.c
parent0841165881871ee01b782129be681209aeed2423 (diff)
parent1a72205fe05c2375cfd380dd8381a8460d9ed8d1 (diff)
downloadSTC-modified-modified.tar.gz
STC-modified-modified.zip
Merge branch 'stclib:master' into modifiedHEADmodified
Diffstat (limited to 'src/cregex.c')
-rw-r--r--src/cregex.c126
1 files changed, 71 insertions, 55 deletions
diff --git a/src/cregex.c b/src/cregex.c
index 0688d9e1..7907ddd9 100644
--- a/src/cregex.c
+++ b/src/cregex.c
@@ -25,9 +25,24 @@ THE SOFTWARE.
*/
#ifndef CREGEX_C_INCLUDED
#define CREGEX_C_INCLUDED
-#include <stc/cstr.h>
-#include <stc/cregex.h> // header only
+
#include <setjmp.h>
+#ifdef i_import
+# define _i_import
+#endif
+#ifndef CREGEX_H_INCLUDED
+# include "../include/stc/cregex.h"
+#endif
+#ifdef _i_import
+# include "utf8code.c"
+#endif
+#ifdef _i_import
+# define i_implement
+#else
+# undef i_implement
+#endif
+#undef _i_import
+#include "../include/stc/cstr.h"
typedef uint32_t _Rune; /* Utf8 code point */
typedef int32_t _Token;
@@ -249,8 +264,8 @@ _renewmatch(_Resub *mp, int ms, _Resublist *sp, int nsubids)
{
if (mp==NULL || ms==0)
return;
- if (mp[0].str == NULL || sp->m[0].str < mp[0].str ||
- (sp->m[0].str == mp[0].str && sp->m[0].size > mp[0].size)) {
+ if (mp[0].buf == NULL || sp->m[0].buf < mp[0].buf ||
+ (sp->m[0].buf == mp[0].buf && sp->m[0].size > mp[0].size)) {
for (int i=0; i<ms && i<=nsubids; i++)
mp[i] = sp->m[i];
}
@@ -271,7 +286,7 @@ _renewthread(_Relist *lp, /* _relist to add to */
for (p=lp; p->inst; p++) {
if (p->inst == ip) {
- if (sep->m[0].str < p->se.m[0].str) {
+ if (sep->m[0].buf < p->se.m[0].buf) {
if (ms > 1)
p->se = *sep;
else
@@ -303,10 +318,10 @@ _renewemptythread(_Relist *lp, /* _relist to add to */
for (p=lp; p->inst; p++) {
if (p->inst == ip) {
- if (sp < p->se.m[0].str) {
+ if (sp < p->se.m[0].buf) {
if (ms > 1)
memset(&p->se, 0, sizeof(p->se));
- p->se.m[0].str = sp;
+ p->se.m[0].buf = sp;
}
return 0;
}
@@ -314,7 +329,7 @@ _renewemptythread(_Relist *lp, /* _relist to add to */
p->inst = ip;
if (ms > 1)
memset(&p->se, 0, sizeof(p->se));
- p->se.m[0].str = sp;
+ p->se.m[0].buf = sp;
(++p)->inst = NULL;
return p;
}
@@ -347,7 +362,7 @@ typedef struct _Parser
bool lastwasand; /* Last token was _operand */
short nbra;
short nclass;
- size_t instcap;
+ intptr_t instcap;
_Rune yyrune; /* last lex'd rune */
_Reclass *yyclassp; /* last lex'd class */
_Reclass* classp;
@@ -547,11 +562,11 @@ _optimize(_Parser *par, _Reprog *pp)
* necessary. Reallocate to the actual space used
* and then relocate the code.
*/
- if ((par->freep - pp->firstinst)*2 > (ptrdiff_t)par->instcap)
+ if ((par->freep - pp->firstinst)*2 > par->instcap)
return pp;
- intptr_t ipp = (intptr_t)pp;
- size_t size = sizeof(_Reprog) + (size_t)(par->freep - pp->firstinst)*sizeof(_Reinst);
+ intptr_t ipp = (intptr_t)pp; // convert pointer to intptr_t!
+ intptr_t size = c_sizeof(_Reprog) + (par->freep - pp->firstinst)*c_sizeof(_Reinst);
_Reprog *npp = (_Reprog *)c_realloc(pp, size);
ptrdiff_t diff = (intptr_t)npp - ipp;
@@ -842,20 +857,21 @@ _bldcclass(_Parser *par)
static _Reprog*
-_regcomp1(_Reprog *progp, _Parser *par, const char *s, int cflags)
+_regcomp1(_Reprog *pp, _Parser *par, const char *s, int cflags)
{
_Token token;
/* get memory for the program. estimated max usage */
- par->instcap = 5U + 6*strlen(s);
- _Reprog* pp = (_Reprog *)c_realloc(progp, sizeof(_Reprog) + par->instcap*sizeof(_Reinst));
- if (pp == NULL) {
+ par->instcap = 5 + 6*c_strlen(s);
+ _Reprog* old_pp = pp;
+ pp = (_Reprog *)c_realloc(pp, c_sizeof(_Reprog) + par->instcap*c_sizeof(_Reinst));
+ if (! pp) {
+ c_free(old_pp);
par->error = CREG_OUTOFMEMORY;
- c_free(progp);
return NULL;
}
- pp->flags.icase = (cflags & CREG_C_ICASE) != 0;
- pp->flags.dotall = (cflags & CREG_C_DOTALL) != 0;
+ pp->flags.icase = (cflags & CREG_ICASE) != 0;
+ pp->flags.dotall = (cflags & CREG_DOTALL) != 0;
par->freep = pp->firstinst;
par->classp = pp->cclass;
par->error = 0;
@@ -928,14 +944,14 @@ _runematch(_Rune s, _Rune r)
case ASC_LO: inv = 1; case ASC_lo: return inv ^ (islower((int)r) != 0);
case ASC_UP: inv = 1; case ASC_up: return inv ^ (isupper((int)r) != 0);
case ASC_XD: inv = 1; case ASC_xd: return inv ^ (isxdigit((int)r) != 0);
- case UTF_AN: inv = 1; case UTF_an: return inv ^ utf8_isalnum(r);
- case UTF_BL: inv = 1; case UTF_bl: return inv ^ utf8_isblank(r);
- case UTF_SP: inv = 1; case UTF_sp: return inv ^ utf8_isspace(r);
- case UTF_LL: inv = 1; case UTF_ll: return inv ^ utf8_islower(r);
- case UTF_LU: inv = 1; case UTF_lu: return inv ^ utf8_isupper(r);
- case UTF_LC: inv = 1; case UTF_lc: return inv ^ utf8_iscased(r);
- case UTF_AL: inv = 1; case UTF_al: return inv ^ utf8_isalpha(r);
- case UTF_WR: inv = 1; case UTF_wr: return inv ^ utf8_isword(r);
+ case UTF_AN: inv = 1; case UTF_an: return inv ^ (int)utf8_isalnum(r);
+ case UTF_BL: inv = 1; case UTF_bl: return inv ^ (int)utf8_isblank(r);
+ case UTF_SP: inv = 1; case UTF_sp: return inv ^ (int)utf8_isspace(r);
+ case UTF_LL: inv = 1; case UTF_ll: return inv ^ (int)utf8_islower(r);
+ case UTF_LU: inv = 1; case UTF_lu: return inv ^ (int)utf8_isupper(r);
+ case UTF_LC: inv = 1; case UTF_lc: return inv ^ (int)utf8_iscased(r);
+ case UTF_AL: inv = 1; case UTF_al: return inv ^ (int)utf8_isalpha(r);
+ case UTF_WR: inv = 1; case UTF_wr: return inv ^ (int)utf8_isword(r);
case UTF_cc: case UTF_CC:
case UTF_lt: case UTF_LT:
case UTF_nd: case UTF_ND:
@@ -956,7 +972,7 @@ _runematch(_Rune s, _Rune r)
case UTF_latin: case UTF_LATIN:
n = (int)s - UTF_GRP;
inv = n & 1;
- return inv ^ utf8_isgroup(n / 2, r);
+ return inv ^ (int)utf8_isgroup(n / 2, r);
}
return s == r;
}
@@ -989,7 +1005,7 @@ _regexec1(const _Reprog *progp, /* program to run */
checkstart = j->starttype;
if (mp)
for (i=0; i<ms; i++) {
- mp[i].str = NULL;
+ mp[i].buf = NULL;
mp[i].size = 0;
}
j->relist[0][0].inst = NULL;
@@ -1050,10 +1066,10 @@ _regexec1(const _Reprog *progp, /* program to run */
icase = inst->type == TOK_ICASE;
continue;
case TOK_LBRA:
- tlp->se.m[inst->r.subid].str = s;
+ tlp->se.m[inst->r.subid].buf = s;
continue;
case TOK_RBRA:
- tlp->se.m[inst->r.subid].size = (s - tlp->se.m[inst->r.subid].str);
+ tlp->se.m[inst->r.subid].size = (s - tlp->se.m[inst->r.subid].buf);
continue;
case TOK_ANY:
ok = (r != '\n');
@@ -1100,10 +1116,10 @@ _regexec1(const _Reprog *progp, /* program to run */
/* efficiency: advance and re-evaluate */
continue;
case TOK_END: /* Match! */
- match = !(mflags & CREG_M_FULLMATCH) ||
+ match = !(mflags & CREG_FULLMATCH) ||
((s == j->eol || r == 0 || r == '\n') &&
- (tlp->se.m[0].str == bol || tlp->se.m[0].str[-1] == '\n'));
- tlp->se.m[0].size = (s - tlp->se.m[0].str);
+ (tlp->se.m[0].buf == bol || tlp->se.m[0].buf[-1] == '\n'));
+ tlp->se.m[0].size = (s - tlp->se.m[0].buf);
if (mp != NULL)
_renewmatch(mp, ms, &tlp->se, progp->nsubids);
break;
@@ -1136,7 +1152,7 @@ _regexec2(const _Reprog *progp, /* program to run */
_Relist *relists;
/* mark space */
- relists = (_Relist *)c_malloc(2 * _BIGLISTSIZE*sizeof(_Relist));
+ relists = (_Relist *)c_malloc(2 * _BIGLISTSIZE*c_sizeof(_Relist));
if (relists == NULL)
return -1;
@@ -1168,10 +1184,10 @@ _regexec(const _Reprog *progp, /* program to run */
j.eol = NULL;
if (mp && mp[0].size) {
- if (mflags & CREG_M_STARTEND)
- j.starts = mp[0].str, j.eol = mp[0].str + mp[0].size;
- else if (mflags & CREG_M_NEXT)
- j.starts = mp[0].str + mp[0].size;
+ if (mflags & CREG_STARTEND)
+ j.starts = mp[0].buf, j.eol = mp[0].buf + mp[0].size;
+ else if (mflags & CREG_NEXT)
+ j.starts = mp[0].buf + mp[0].size;
}
j.starttype = 0;
@@ -1204,7 +1220,7 @@ _build_subst(const char* replace, int nmatch, const csview match[],
cstr_buf buf = cstr_buffer(subst);
intptr_t len = 0, cap = buf.cap;
char* dst = buf.data;
- cstr mstr = cstr_NULL;
+ cstr mstr = cstr_init();
while (*replace != '\0') {
if (*replace == '$') {
@@ -1216,12 +1232,12 @@ _build_subst(const char* replace, int nmatch, const csview match[],
g = arg - '0';
if (replace[1] >= '0' && replace[1] <= '9' && replace[2] == ';')
{ g = g*10 + (replace[1] - '0'); replace += 2; }
- if (g < (int)nmatch) {
+ if (g < nmatch) {
csview m = mfun && mfun(g, match[g], &mstr) ? cstr_sv(&mstr) : match[g];
if (len + m.size > cap)
- dst = cstr_reserve(subst, cap = cap*3/2 + m.size);
- for (int i = 0; i < (int)m.size; ++i)
- dst[len++] = m.str[i];
+ dst = cstr_reserve(subst, cap += cap/2 + m.size);
+ for (int i = 0; i < m.size; ++i)
+ dst[len++] = m.buf[i];
}
++replace;
case '\0':
@@ -1229,7 +1245,7 @@ _build_subst(const char* replace, int nmatch, const csview match[],
}
}
if (len == cap)
- dst = cstr_reserve(subst, cap = cap*3/2 + 4);
+ dst = cstr_reserve(subst, cap += cap/2 + 4);
dst[len++] = *replace++;
}
cstr_drop(&mstr);
@@ -1250,12 +1266,12 @@ cregex_compile_3(cregex *self, const char* pattern, int cflags) {
int
cregex_captures(const cregex* self) {
- return self->prog ? 1 + self->prog->nsubids : 0;
+ return self->prog ? self->prog->nsubids : 0;
}
int
cregex_find_4(const cregex* re, const char* input, csview match[], int mflags) {
- int res = _regexec(re->prog, input, cregex_captures(re), match, mflags);
+ int res = _regexec(re->prog, input, cregex_captures(re) + 1, match, mflags);
switch (res) {
case 1: return CREG_OK;
case 0: return CREG_NOMATCH;
@@ -1277,19 +1293,19 @@ cregex_find_pattern_4(const char* pattern, const char* input,
cstr
cregex_replace_sv_6(const cregex* re, csview input, const char* replace, int count,
bool (*mfun)(int, csview, cstr*), int rflags) {
- cstr out = cstr_NULL;
- cstr subst = cstr_NULL;
+ cstr out = cstr_init();
+ cstr subst = cstr_init();
csview match[CREG_MAX_CAPTURES];
- int nmatch = cregex_captures(re);
+ int nmatch = cregex_captures(re) + 1;
if (!count) count = INT32_MAX;
- bool copy = !(rflags & CREG_R_STRIP);
+ bool copy = !(rflags & CREG_STRIP);
while (count-- && cregex_find_sv(re, input, match) == CREG_OK) {
_build_subst(replace, nmatch, match, mfun, &subst);
- const intptr_t mpos = (match[0].str - input.str);
- if (copy & (mpos > 0)) cstr_append_n(&out, input.str, mpos);
+ const intptr_t mpos = (match[0].buf - input.buf);
+ if (copy & (mpos > 0)) cstr_append_n(&out, input.buf, mpos);
cstr_append_s(&out, subst);
- input.str = match[0].str + match[0].size;
+ input.buf = match[0].buf + match[0].size;
input.size -= mpos + match[0].size;
}
if (copy) cstr_append_sv(&out, input);
@@ -1303,7 +1319,7 @@ cregex_replace_pattern_6(const char* pattern, const char* input, const char* rep
cregex re = cregex_init();
if (cregex_compile(&re, pattern, crflags) != CREG_OK)
assert(0);
- csview sv = {input, c_strlen(input)};
+ csview sv = c_sv(input, c_strlen(input));
cstr out = cregex_replace_sv(&re, sv, replace, count, mfun, crflags);
cregex_drop(&re);
return out;