Converted all files with DOS line endings to LINUX.

author: Tyge Løvset <[email protected]> 2022-06-01 16:28:07 +0200
committer: Tyge Løvset <[email protected]> 2022-06-01 16:28:07 +0200
commit: de629774cb912aa3d563f24d99258142713c3fcd (patch)
tree: c37e2851d6cb049bc0863a59b6ecf5945fb88619 /src
parent: 7fb43a24a17da787dd809114ca26c1231b058493 (diff)
download: STC-modified-de629774cb912aa3d563f24d99258142713c3fcd.tar.gz
STC-modified-de629774cb912aa3d563f24d99258142713c3fcd.zip
1 files changed, 1186 insertions, 1186 deletions
diff --git a/src/cregex.c b/src/cregex.c
index 7ca9a336..712056c4 100644
--- a/src/cregex.c
+++ b/src/cregex.c
@@ -1,1186 +1,1186 @@
-/*
-This is a Unix port of the Plan 9 regular expression library, by Rob Pike.
-Please send comments about the packaging to Russ Cox <[email protected]>.
-
-Copyright © 2021 Plan 9 Foundation
-Copyright © 2022 Tyge Løvset, for additions made in 2022.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include <stdlib.h>
-#include <stdint.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <setjmp.h>
-#include <string.h>
-#include <ctype.h>
-#include <stdio.h>
-#include <stc/cregex.h>
-#include <stc/utf8.h>
-
-typedef uint32_t Rune; /* Utf8 code point */
-typedef int32_t Token;
-/* max character classes per program */
-#define NCLASS creg_max_classes
-/* max subexpressions */
-#define NSUBEXP creg_max_captures
-/* max rune ranges per character class */
-#define NCCRUNE (NSUBEXP * 2)
-
-/*
- *    character class, each pair of rune's defines a range
- */
-typedef struct
-{
-    Rune *end;
-    Rune spans[NCCRUNE];
-} Reclass;
-
-/*
- *    Machine instructions
- */
-typedef struct Reinst
-{
-    Token type;
-    union {
-        Reclass *classp;        /* class pointer */
-        Rune    rune;           /* character */
-        int     subid;          /* sub-expression id for RBRA and LBRA */
-        struct Reinst *right;   /* right child of OR */
-    } r;
-    union {    /* regexp relies on these two being in the same union */
-        struct Reinst *left;    /* left child of OR */
-        struct Reinst *next;    /* next instruction for CAT & LBRA */
-    } l;
-} Reinst;
-
-typedef struct {
-    bool caseless;
-    bool dotall;
-} Reflags;
-
-/*
- *    Reprogram definition
- */
-typedef struct Reprog
-{
-    Reinst  *startinst;     /* start pc */
-    Reflags flags;
-    int nsubids;
-    Reclass cclass[NCLASS]; /* .data */
-    Reinst  firstinst[];    /* .text : originally 5 elements? */
-} Reprog;
-
-/*
- *    Sub expression matches
- */
-typedef cregmatch Resub;
-
-/*
- *  substitution list
- */
-typedef struct Resublist
-{
-    Resub m[NSUBEXP];
-} Resublist;
-
-/*
- * Actions and Tokens (Reinst types)
- *
- *  0x800000-0x80FFFF: operators, value => precedence
- *  0x810000-0x81FFFF: RUNE and char classes.
- *  0x820000-0x82FFFF: tokens, i.e. operands for operators
- */
-enum {
-    MASK        = 0xFF00000,
-    OPERATOR    = 0x8000000, /* Bitmask of all operators */
-    START       = 0x8000001, /* Start, used for marker on stack */
-    RBRA        ,           /* Right bracket, ) */
-    LBRA        ,           /* Left bracket, ( */
-    OR          ,           /* Alternation, | */
-    CAT         ,           /* Concatentation, implicit operator */
-    STAR        ,           /* Closure, * */
-    PLUS        ,           /* a+ == aa* */
-    QUEST       ,           /* a? == a|nothing, i.e. 0 or 1 a's */
-    RUNE        = 0x8100000,
-    IRUNE,
-    ASC_bl      , ASC_BL,   /* blank */
-    ASC_ct      , ASC_CT,   /* ctrl */
-    ASC_gr      , ASC_GR,   /* graphic */
-    ASC_pr      , ASC_PR,   /* print */
-    ASC_pt      , ASC_PT,   /* punct */
-    U8_Nd       , U8N_Nd,    /* dec digit, non-digit */
-    U8_LC       , U8N_LC,   /* utf8 letter cased */
-    U8_Ll       , U8N_Ll,   /* utf8 letter lower */
-    U8_Lu       , U8N_Lu,   /* utf8 letter upper */
-    U8_Zs       , U8N_Zs,   /* utf8 white space */
-    U8_Xnx      , U8N_Xnx,  /* utf8 hex digit */
-    U8_Xan      , U8N_Xan,  /* utf8 alphanumeric */
-    U8_Xw       , U8N_Xw,   /* utf8 word */
-    ANY         = 0x8200000, /* Any character except newline, . */
-    ANYNL       ,           /* Any character including newline, . */
-    NOP         ,           /* No operation, internal use only */
-    BOL         , BOS,      /* Beginning of line, string, ^ */
-    EOL         , EOS, EOZ, /* End of line, string, $ */
-    CCLASS      ,           /* Character class, [] */
-    NCCLASS     ,           /* Negated character class, [] */
-    WBOUND      ,           /* Non-word boundary, not consuming meta char */
-    NWBOUND     ,           /* Word boundary, not consuming meta char */
-    END         = 0x82FFFFF, /* Terminate: match found */
-};
-
-/*
- *  regexec execution lists
- */
-#define LISTSIZE    10
-#define BIGLISTSIZE (10*LISTSIZE)
-
-typedef struct Relist
-{
-    Reinst*     inst;       /* Reinstruction of the thread */
-    Resublist   se;         /* matched subexpressions in this thread */
-} Relist;
-
-typedef struct Reljunk
-{
-    Relist*     relist[2];
-    Relist*     reliste[2];
-    int         starttype;
-    Rune        startchar;
-    const char* starts;
-    const char* eol;
-} Reljunk;
-
-/*
- * utf8 and Rune code
- */
-
-static int
-chartorune(Rune *rune, const char *s)
-{
-    utf8_decode_t ctx = {.state=0};
-    const uint8_t *b = (const uint8_t*)s;
-    do { utf8_decode(&ctx, *b++); } while (ctx.state);
-    *rune = ctx.codep;
-    return (const char*)b - s;
-}
-
-static const char*
-utfrune(const char *s, Rune c)
-{
-    Rune r;
-
-    if (c < 128)        /* ascii */
-        return strchr((char *)s, c);
-
-    for (;;) {
-        int n = chartorune(&r, s);
-        if (r == c) return s;
-        if ((r == 0) | (n == 0)) return NULL;
-        s += n;
-    }
-}
-
-static const char*
-utfruneicase(const char *s, Rune c)
-{
-    Rune r;
-    c = utf8_tolower(c);
-    for (;;) {
-        int n = chartorune(&r, s);
-        if (utf8_tolower(r) == c) return s;
-        if ((r == 0) | (n == 0)) return NULL;
-        s += n;
-    }
-}
-
-/************
- * regaux.c *
- ************/
-
-/*
- *  save a new match in mp
- */
-static void
-_renewmatch(Resub *mp, int ms, Resublist *sp, int nsubids)
-{
-    int i;
-
-    if (mp==NULL || ms<=0)
-        return;
-    if (mp[0].str == NULL || sp->m[0].str < mp[0].str ||
-       (sp->m[0].str == mp[0].str && sp->m[0].size > mp[0].size)) {
-        for (i=0; i<ms && i<=nsubids; i++)
-            mp[i] = sp->m[i];
-    }
-}
-
-/*
- * Note optimization in _renewthread:
- *   *lp must be pending when _renewthread called; if *l has been looked
- *   at already, the optimization is a bug.
- */
-static Relist*
-_renewthread(Relist *lp,  /* _relist to add to */
-    Reinst *ip,           /* instruction to add */
-    int ms,
-    Resublist *sep)       /* pointers to subexpressions */
-{
-    Relist *p;
-
-    for (p=lp; p->inst; p++) {
-        if (p->inst == ip) {
-            if (sep->m[0].str < p->se.m[0].str) {
-                if (ms > 1)
-                    p->se = *sep;
-                else
-                    p->se.m[0] = sep->m[0];
-            }
-            return 0;
-        }
-    }
-    p->inst = ip;
-    if (ms > 1)
-        p->se = *sep;
-    else
-        p->se.m[0] = sep->m[0];
-    (++p)->inst = NULL;
-    return p;
-}
-
-/*
- * same as renewthread, but called with
- * initial empty start pointer.
- */
-static Relist*
-_renewemptythread(Relist *lp,   /* _relist to add to */
-    Reinst *ip,                 /* instruction to add */
-    int ms,
-    const char *sp)             /* pointers to subexpressions */
-{
-    Relist *p;
-
-    for (p=lp; p->inst; p++) {
-        if (p->inst == ip) {
-            if (sp < p->se.m[0].str) {
-                if (ms > 1)
-                    memset(&p->se, 0, sizeof(p->se));
-                p->se.m[0].str = sp;
-            }
-            return 0;
-        }
-    }
-    p->inst = ip;
-    if (ms > 1)
-        memset(&p->se, 0, sizeof(p->se));
-    p->se.m[0].str = sp;
-    (++p)->inst = NULL;
-    return p;
-}
-
-/*
- * Parser Information
- */
-typedef struct Node
-{
-    Reinst*    first;
-    Reinst*    last;
-} Node;
-
-#define NSTACK 20
-typedef struct Parser
-{
-    const char* exprp;   /* pointer to next character in source expression */
-    Node andstack[NSTACK];
-    Node* andp;
-    Token atorstack[NSTACK];
-    Token* atorp;
-    short subidstack[NSTACK]; /* parallel to atorstack */
-    short* subidp;
-    short cursubid;      /* id of current subexpression */
-    int errors;
-    Reflags flags;
-    int dot_type;
-    int rune_type;
-    bool litmode;
-    bool lastwasand;     /* Last token was operand */
-    bool lexdone;
-    short nbra;
-    short nclass;
-    Rune yyrune;         /* last lex'd rune */
-    Reclass *yyclassp;   /* last lex'd class */
-    Reclass* classp;
-    Reinst* freep;
-    jmp_buf regkaboom;
-} Parser;
-
-/* predeclared crap */
-static void _operator(Parser *par, Token type);
-static void pushand(Parser *par, Reinst *first, Reinst *last);
-static void pushator(Parser *par, Token type);
-static void evaluntil(Parser *par, Token type);
-static int  bldcclass(Parser *par);
-
-static void
-rcerror(Parser *par, cregex_error_t err)
-{
-    par->errors = err;
-    longjmp(par->regkaboom, 1);
-}
-
-static Reinst*
-newinst(Parser *par, Token t)
-{
-    par->freep->type = t;
-    par->freep->l.left = 0;
-    par->freep->r.right = 0;
-    return par->freep++;
-}
-
-static void
-operand(Parser *par, Token t)
-{
-    Reinst *i;
-
-    if (par->lastwasand)
-        _operator(par, CAT);    /* catenate is implicit */
-    i = newinst(par, t);
-
-    if ((t == CCLASS) | (t == NCCLASS))
-        i->r.classp = par->yyclassp;
-    if ((t == RUNE) | (t == IRUNE))
-        i->r.rune = par->yyrune;
-
-    pushand(par, i, i);
-    par->lastwasand = true;
-}
-
-static void
-_operator(Parser *par, Token t)
-{
-    if (t==RBRA && --par->nbra<0)
-        rcerror(par, creg_unmatchedrightparenthesis);
-    if (t==LBRA) {
-        if (++par->cursubid >= NSUBEXP)
-            rcerror(par, creg_toomanysubexpressions);
-        par->nbra++;
-        if (par->lastwasand)
-            _operator(par, CAT);
-    } else
-        evaluntil(par, t);
-    if (t != RBRA)
-        pushator(par, t);
-    par->lastwasand = 0;
-    if (t==STAR || t==QUEST || t==PLUS || t==RBRA)
-        par->lastwasand = true;    /* these look like operands */
-}
-
-static void
-pushand(Parser *par, Reinst *f, Reinst *l)
-{
-    if (par->andp >= &par->andstack[NSTACK])
-        rcerror(par, creg_operandstackoverflow);
-    par->andp->first = f;
-    par->andp->last = l;
-    par->andp++;
-}
-
-static void
-pushator(Parser *par, Token t)
-{
-    if (par->atorp >= &par->atorstack[NSTACK])
-        rcerror(par, creg_operatorstackoverflow);
-    *par->atorp++ = t;
-    *par->subidp++ = par->cursubid;
-}
-
-static Node*
-popand(Parser *par, Token op)
-{
-    Reinst *inst;
-
-    if (par->andp <= &par->andstack[0]) {
-        rcerror(par, creg_missingoperand);
-        inst = newinst(par, NOP);
-        pushand(par, inst, inst);
-    }
-    return --par->andp;
-}
-
-static Token
-popator(Parser *par)
-{
-    if (par->atorp <= &par->atorstack[0])
-        rcerror(par, creg_operatorstackunderflow);
-    --par->subidp;
-    return *--par->atorp;
-}
-
-static void
-evaluntil(Parser *par, Token pri)
-{
-    Node *op1, *op2;
-    Reinst *inst1, *inst2;
-
-    while (pri==RBRA || par->atorp[-1]>=pri) {
-        switch (popator(par)) {
-        default:
-            rcerror(par, creg_unknownoperator);
-            break;
-        case LBRA:        /* must have been RBRA */
-            op1 = popand(par, '(');
-            inst2 = newinst(par, RBRA);
-            inst2->r.subid = *par->subidp;
-            op1->last->l.next = inst2;
-            inst1 = newinst(par, LBRA);
-            inst1->r.subid = *par->subidp;
-            inst1->l.next = op1->first;
-            pushand(par, inst1, inst2);
-            return;
-        case OR:
-            op2 = popand(par, '|');
-            op1 = popand(par, '|');
-            inst2 = newinst(par, NOP);
-            op2->last->l.next = inst2;
-            op1->last->l.next = inst2;
-            inst1 = newinst(par, OR);
-            inst1->r.right = op1->first;
-            inst1->l.left = op2->first;
-            pushand(par, inst1, inst2);
-            break;
-        case CAT:
-            op2 = popand(par, 0);
-            op1 = popand(par, 0);
-            op1->last->l.next = op2->first;
-            pushand(par, op1->first, op2->last);
-            break;
-        case STAR:
-            op2 = popand(par, '*');
-            inst1 = newinst(par, OR);
-            op2->last->l.next = inst1;
-            inst1->r.right = op2->first;
-            pushand(par, inst1, inst1);
-            break;
-        case PLUS:
-            op2 = popand(par, '+');
-            inst1 = newinst(par, OR);
-            op2->last->l.next = inst1;
-            inst1->r.right = op2->first;
-            pushand(par, op2->first, inst1);
-            break;
-        case QUEST:
-            op2 = popand(par, '?');
-            inst1 = newinst(par, OR);
-            inst2 = newinst(par, NOP);
-            inst1->l.left = inst2;
-            inst1->r.right = op2->first;
-            op2->last->l.next = inst2;
-            pushand(par, inst1, inst2);
-            break;
-        }
-    }
-}
-
-static Reprog*
-optimize(Parser *par, Reprog *pp)
-{
-    Reinst *inst, *target;
-    size_t size;
-    Reprog *npp;
-    Reclass *cl;
-    ptrdiff_t diff;
-
-    /*
-     *  get rid of NOOP chains
-     */
-    for (inst = pp->firstinst; inst->type != END; inst++) {
-        target = inst->l.next;
-        while (target->type == NOP)
-            target = target->l.next;
-        inst->l.next = target;
-    }
-
-    /*
-     *  The original allocation is for an area larger than
-     *  necessary.  Reallocate to the actual space used
-     *  and then relocate the code.
-     */
-    size = sizeof(Reprog) + (par->freep - pp->firstinst)*sizeof(Reinst);
-    npp = (Reprog *)realloc(pp, size);
-    if (npp==NULL || npp==pp)
-        return pp;
-    diff = (char *)npp - (char *)pp;
-    par->freep = (Reinst *)((char *)par->freep + diff);
-    for (inst = npp->firstinst; inst < par->freep; inst++) {
-        switch (inst->type) {
-        case OR:
-        case STAR:
-        case PLUS:
-        case QUEST:
-            inst->r.right = (Reinst *)((char*)inst->r.right + diff);
-            break;
-        case CCLASS:
-        case NCCLASS:
-            inst->r.right = (Reinst *)((char*)inst->r.right + diff);
-            cl = inst->r.classp;
-            cl->end = (Rune *)((char*)cl->end + diff);
-            break;
-        }
-        inst->l.left = (Reinst *)((char*)inst->l.left + diff);
-    }
-    npp->startinst = (Reinst *)((char*)npp->startinst + diff);
-    return npp;
-}
-
-static Reclass*
-newclass(Parser *par)
-{
-    if (par->nclass >= NCLASS)
-        rcerror(par, creg_toomanycharacterclasses);
-    return &(par->classp[par->nclass++]);
-}
-
-static int
-nextc(Parser *par, Rune *rp)
-{
-    if (par->lexdone) {
-        *rp = 0;
-        return 1;
-    }
-    par->exprp += chartorune(rp, par->exprp);
-    if (*rp == '\\') {
-        if (par->litmode && *par->exprp != 'E')
-            return 1;
-        par->exprp += chartorune(rp, par->exprp);
-        switch (*rp) {
-            case 'E': return par->litmode + 1;
-            case 't': *rp = '\t'; break;
-            case 'n': *rp = '\n'; break;
-            case 'r': *rp = '\r'; break;
-            case 'v': *rp = '\v'; break;
-            case 'f': *rp = '\f'; break;
-            case 'd': *rp = U8_Nd; break;
-            case 'D': *rp = U8N_Nd; break;
-            case 's': *rp = U8_Zs; break;
-            case 'S': *rp = U8N_Zs; break;
-            case 'w': *rp = U8_Xw; break;
-            case 'W': *rp = U8N_Xw; break;
-            case 'x': if (*par->exprp != '{') break;
-                *rp = 0; sscanf(++par->exprp, "%x", rp);
-                while (*par->exprp) if (*(par->exprp++) == '}') break;
-                if (par->exprp[-1] != '}')
-                    rcerror(par, creg_unmatchedrightparenthesis);
-                return 2;
-            case 'p': case 'P': { /* https://www.regular-expressions.info/unicode.html */
-                static struct { const char* c; int n, r; } cls[] = {
-                    {"{Alpha}", 7, U8_LC}, {"{LC}", 4, U8_LC},
-                    {"{Alnum}", 7, U8_Xan},
-                    {"{Digit}", 7, U8_Nd}, {"{Nd}", 4, U8_Nd},
-                    {"{Lower}", 7, U8_Ll}, {"{Ll}", 4, U8_Ll},
-                    {"{Space}", 7, U8_Zs}, {"{Zs}", 4, U8_Zs},
-                    {"{Upper}", 7, U8_Lu}, {"{Lu}", 4, U8_Lu},
-                    {"{XDigit}", 8, U8_Xnx},
-                    {"{Blank}", 7, ASC_bl},
-                    {"{Graph}", 7, ASC_gr},
-                    {"{Print}", 7, ASC_pr},
-                    {"{Punct}", 7, ASC_pt},
-                };
-                int inv = *rp == 'P';
-                for (unsigned i = 0; i < (sizeof cls/sizeof *cls); ++i)
-                    if (!strncmp(par->exprp, cls[i].c, cls[i].n)) {
-                        if (par->rune_type == IRUNE && (cls[i].r == U8_Ll || cls[i].r == U8_Lu))
-                            *rp = U8_LC + inv;
-                        else
-                            *rp = cls[i].r + inv;
-                        par->exprp += cls[i].n;
-                        break;
-                    }
-                if (*rp < OPERATOR) {
-                    rcerror(par, creg_unknownoperator);
-                    *rp = 0;
-                }
-                break;
-            }
-        }
-        return 1;
-    }
-    if (*rp == 0)
-        par->lexdone = true;
-    return par->litmode;
-}
-
-static Token
-lex(Parser *par)
-{
-    int quoted;
-    start:
-    quoted = nextc(par, &par->yyrune);
-    if (quoted) {
-        if (quoted == 2) {
-            if (par->litmode && par->yyrune == 'E') {
-                par->litmode = false;
-                goto start;
-            }
-            return par->yyrune == 0 ? END : par->rune_type;
-        }
-        switch (par->yyrune) {
-        case  0 : return END;
-        case 'b': return WBOUND;
-        case 'B': return NWBOUND;
-        case 'A': return BOS;
-        case 'z': return EOS;
-        case 'Z': return EOZ;
-        case 'Q': par->litmode = true;
-                  goto start;
-        default : return par->rune_type;
-        }
-    }
-
-    switch (par->yyrune) {
-    case  0 : return END;
-    case '*': return STAR;
-    case '?': return QUEST;
-    case '+': return PLUS;
-    case '|': return OR;
-    case '.': return par->dot_type;
-    case '(':
-        if (par->exprp[0] == '?') {
-            for (int k = 1, enable = 1; ; ++k) switch (par->exprp[k]) {
-                case  0 : par->exprp += k; return END;
-                case ')': par->exprp += k + 1; goto start;
-                case '-': enable = 0; break;
-                case 's': if (!par->flags.dotall) par->dot_type = ANY + enable; break;
-                case 'i': if (!par->flags.caseless) par->rune_type = RUNE + enable; break;
-                default: rcerror(par, creg_unknownoperator); return 0;
-            }
-        }
-        return LBRA;
-    case ')': return RBRA;
-    case '^': return BOL;
-    case '$': return EOL;
-    case '[': return bldcclass(par);
-    }
-    return par->rune_type;
-}
-
-static Token
-bldcclass(Parser *par)
-{
-    Token type;
-    Rune r[NCCRUNE];
-    Rune *p, *ep, *np;
-    Rune rune;
-    int quoted;
-
-    /* we have already seen the '[' */
-    type = CCLASS;
-    par->yyclassp = newclass(par);
-
-    /* look ahead for negation */
-    /* SPECIAL CASE!!! negated classes don't match \n */
-    ep = r;
-    quoted = nextc(par, &rune);
-    if (!quoted && rune == '^') {
-        type = NCCLASS;
-        quoted = nextc(par, &rune);
-        *ep++ = '\n';
-        *ep++ = '\n';
-    }
-
-    /* parse class into a set of spans */
-    for (; ep < &r[NCCRUNE]; quoted = nextc(par, &rune)) {
-        if (rune == 0) {
-            rcerror(par, creg_malformedcharacterclass);
-            return 0;
-        }
-        if (!quoted) {
-            if (rune == ']')
-                break;
-            if (rune == '-') {
-                if (ep != r && *par->exprp != ']') {
-                    quoted = nextc(par, &rune);
-                    if (rune == 0) {
-                        rcerror(par, creg_malformedcharacterclass);
-                        return 0;
-                    }
-                    ep[-1] = rune;
-                    continue;
-                }
-            }
-        }
-        *ep++ = rune;
-        *ep++ = rune;
-    }
-
-    /* sort on span start */
-    for (p = r; p < ep; p += 2) {
-        for (np = p; np < ep; np += 2)
-            if (*np < *p) {
-                rune = np[0];
-                np[0] = p[0];
-                p[0] = rune;
-                rune = np[1];
-                np[1] = p[1];
-                p[1] = rune;
-            }
-    }
-
-    /* merge spans */
-    np = par->yyclassp->spans;
-    p = r;
-    if (r == ep)
-        par->yyclassp->end = np;
-    else {
-        np[0] = *p++;
-        np[1] = *p++;
-        for (; p < ep; p += 2)
-            if (p[0] <= np[1]) {
-                if (p[1] > np[1])
-                    np[1] = p[1];
-            } else {
-                np += 2;
-                np[0] = p[0];
-                np[1] = p[1];
-            }
-        par->yyclassp->end = np+2;
-    }
-
-    return type;
-}
-
-static Reprog*
-regcomp1(Parser *par, const char *s, int cflags)
-{
-    Token token;
-    Reprog *volatile pp;
-
-    /* get memory for the program. estimated max usage */
-    const int instcap = 5 + 6*strlen(s);
-    pp = (Reprog *)malloc(sizeof(Reprog) + instcap*sizeof(Reinst));
-    if (pp == NULL) {
-        rcerror(par, creg_outofmemory);
-        return NULL;
-    }
-    pp->flags.caseless = (cflags & creg_caseless) != 0;
-    pp->flags.dotall = (cflags & creg_dotall) != 0;
-    par->freep = pp->firstinst;
-    par->classp = pp->cclass;
-    par->errors = 0;
-
-    if (setjmp(par->regkaboom))
-        goto out;
-
-    /* go compile the sucker */
-    par->lexdone = false;
-    par->flags = pp->flags;
-    par->rune_type = pp->flags.caseless ? IRUNE : RUNE;
-    par->dot_type = pp->flags.dotall ? ANYNL : ANY;
-    par->litmode = false;
-    par->exprp = s;
-    par->nclass = 0;
-    par->nbra = 0;
-    par->atorp = par->atorstack;
-    par->andp = par->andstack;
-    par->subidp = par->subidstack;
-    par->lastwasand = false;
-    par->cursubid = 0;
-
-    /* Start with a low priority operator to prime parser */
-    pushator(par, START-1);
-    while ((token = lex(par)) != END) {
-        if ((token & MASK) == OPERATOR)
-            _operator(par, token);
-        else
-            operand(par, token);
-    }
-
-    /* Close with a low priority operator */
-    evaluntil(par, START);
-
-    /* Force END */
-    operand(par, END);
-    evaluntil(par, START);
-#ifdef DEBUG
-    dumpstack(par);
-#endif
-    if (par->nbra)
-        rcerror(par, creg_unmatchedleftparenthesis);
-    --par->andp;    /* points to first and only operand */
-    pp->startinst = par->andp->first;
-#ifdef DEBUG
-    dump(pp);
-#endif
-    pp = optimize(par, pp);
-    pp->nsubids = par->cursubid;
-#ifdef DEBUG
-    print("start: %d\n", par->andp->first-pp->firstinst);
-    dump(pp);
-#endif
-out:
-    if (par->errors) {
-        free(pp);
-        pp = NULL;
-    }
-    return pp;
-}
-
-
-static int
-runematch(Rune s, Rune r, bool icase)
-{
-    int inv = 0;
-    switch (s) {
-    case ASC_BL: inv = 1; /* fallthrough */
-    case ASC_bl: return inv ^ ((r == ' ') | (r == '\t'));
-    case ASC_CT: inv = 1;
-    case ASC_ct: return inv ^ (iscntrl(r) != 0);
-    case ASC_GR: inv = 1;
-    case ASC_gr: return inv ^ (isgraph(r) != 0);
-    case ASC_PR: inv = 1;
-    case ASC_pr: return inv ^ (isprint(r) != 0);
-    case ASC_PT: inv = 1;
-    case ASC_pt: return inv ^ (ispunct(r) != 0);
-    case U8N_Nd: inv = 1;
-    case U8_Nd: return inv ^ (utf8_isdigit(r));
-    case U8N_LC: inv = 1;
-    case U8_LC: return inv ^ utf8_isalpha(r);
-    case U8N_Ll: inv = 1;
-    case U8_Ll: return inv ^ utf8_islower(r);
-    case U8N_Lu: inv = 1;
-    case U8_Lu: return inv ^ utf8_isupper(r);
-    case U8N_Zs: inv = 1;
-    case U8_Zs: return inv ^ utf8_isspace(r);
-    case U8N_Xan: inv = 1;
-    case U8_Xan: return inv ^ utf8_isalnum(r);
-    case U8N_Xnx: inv = 1;
-    case U8_Xnx: return inv ^ utf8_isxdigit(r);
-    case U8N_Xw: inv = 1;
-    case U8_Xw: return inv ^ (utf8_isalnum(r) | (r == '_'));
-    }
-    return icase ? utf8_tolower(s) == utf8_tolower(r) : s == r;
-}
-
-/*
- *  return 0 if no match
- *        >0 if a match
- *        <0 if we ran out of _relist space
- */
-static int
-regexec1(const Reprog *progp,    /* program to run */
-    const char *bol,    /* string to run machine on */
-    Resub *mp,    /* subexpression elements */
-    int ms,        /* number of elements at mp */
-    Reljunk *j,
-    int mflags
-)
-{
-    int flag=0;
-    Reinst *inst;
-    Relist *tlp;
-    Relist *tl, *nl;    /* This list, next list */
-    Relist *tle, *nle;  /* Ends of this and next list */
-    const char *s, *p;
-    int i, n, checkstart;
-    Rune r, *rp, *ep;
-    int match = 0;
-
-    bool icase = progp->flags.caseless;
-    checkstart = j->starttype;
-    if (mp)
-        for (i=0; i<ms; i++) {
-            mp[i].str = NULL;
-            mp[i].size = 0;
-        }
-    j->relist[0][0].inst = NULL;
-    j->relist[1][0].inst = NULL;
-
-    /* Execute machine once for each character, including terminal NUL */
-    s = j->starts;
-    do {
-        /* fast check for first char */
-        if (checkstart) {
-            switch (j->starttype) {
-            case IRUNE:
-                p = utfruneicase(s, j->startchar);
-                goto next1;
-            case RUNE:
-                p = utfrune(s, j->startchar);
-                next1:
-                if (p == NULL || s == j->eol)
-                    return match;
-                s = p;
-                break;
-            case BOL:
-                if (s == bol)
-                    break;
-                p = utfrune(s, '\n');
-                if (p == NULL || s == j->eol)
-                    return match;
-                s = p+1;
-                break;
-            }
-        }
-        n = chartorune(&r, s);
-
-        /* switch run lists */
-        tl = j->relist[flag];
-        tle = j->reliste[flag];
-        nl = j->relist[flag^=1];
-        nle = j->reliste[flag];
-        nl->inst = NULL;
-
-        /* Add first instruction to current list */
-        if (match == 0)
-            _renewemptythread(tl, progp->startinst, ms, s);
-
-        /* Execute machine until current list is empty */
-        for (tlp=tl; tlp->inst; tlp++) {    /* assignment = */
-            for (inst = tlp->inst; ; inst = inst->l.next) {
-                int ok = false;
-
-                switch (inst->type) {
-                case RUNE:
-                case IRUNE:    /* regular character */
-                    ok = runematch(inst->r.rune, r, (icase = inst->type==IRUNE));
-                    break;
-                case LBRA:
-                    tlp->se.m[inst->r.subid].str = s;
-                    continue;
-                case RBRA:
-                    tlp->se.m[inst->r.subid].size = s - tlp->se.m[inst->r.subid].str;
-                    continue;
-                case ANY:
-                    ok = (r != '\n');
-                    break;
-                case ANYNL:
-                    ok = true;
-                    break;
-                case BOL:
-                    if (s == bol || s[-1] == '\n') continue;
-                    break;
-                case BOS:
-                    if (s == bol) continue;
-                    break;
-                case EOL:
-                    if (r == '\n') continue;
-                case EOS: /* fallthrough */
-                    if (s == j->eol || r == 0) continue;
-                    break;
-                case EOZ:
-                    if (s == j->eol || r == 0 || (r == '\n' && s[1] == 0)) continue;
-                    break;
-                case NWBOUND:
-                    ok = true;
-                case WBOUND: /* fallthrough */
-                    if (ok ^ (s == bol || s == j->eol || ((utf8_isalnum(s[-1]) || s[-1] == '_')
-                                                        ^ (utf8_isalnum(s[ 0]) || s[ 0] == '_'))))
-                        continue;
-                    break;
-                case NCCLASS:
-                    ok = true;
-                case CCLASS: /* fallthrough */
-                    ep = inst->r.classp->end;
-                    for (rp = inst->r.classp->spans; rp < ep; rp += 2) {
-                        if ((r >= rp[0] && r <= rp[1]) || (rp[0] == rp[1] && runematch(rp[0], r, icase)))
-                            break;
-                    }
-                    ok ^= (rp < ep);
-                    break;
-                case OR:
-                    /* evaluate right choice later */
-                    if (_renewthread(tlp, inst->r.right, ms, &tlp->se) == tle)
-                        return -1;
-                    /* efficiency: advance and re-evaluate */
-                    continue;
-                case END:    /* Match! */
-                    match = !(mflags & creg_fullmatch) ||
-                            ((s == j->eol || r == 0 || r == '\n') &&
-                            (tlp->se.m[0].str == bol || tlp->se.m[0].str[-1] == '\n'));
-                    tlp->se.m[0].size = s - tlp->se.m[0].str;
-                    if (mp != NULL)
-                        _renewmatch(mp, ms, &tlp->se, progp->nsubids);
-                    break;
-                }
-
-                if (ok && _renewthread(nl, inst->l.next, ms, &tlp->se) == nle)
-                    return -1;
-                break;
-            }
-        }
-        if (s == j->eol)
-            break;
-        checkstart = j->starttype && nl->inst==NULL;
-        s += n;
-    } while (r);
-    return match;
-}
-
-static int
-regexec2(const Reprog *progp,    /* program to run */
-    const char *bol,    /* string to run machine on */
-    Resub *mp,    /* subexpression elements */
-    int ms,        /* number of elements at mp */
-    Reljunk *j,
-    int mflags
-)
-{
-    int rv;
-    Relist *relists;
-
-    /* mark space */
-    relists = (Relist *)malloc(2 * BIGLISTSIZE*sizeof(Relist));
-    if (relists == NULL)
-        return -1;
-
-    j->relist[0] = relists;
-    j->relist[1] = relists + BIGLISTSIZE;
-    j->reliste[0] = relists + BIGLISTSIZE - 2;
-    j->reliste[1] = relists + 2*BIGLISTSIZE - 2;
-
-    rv = regexec1(progp, bol, mp, ms, j, mflags);
-    free(relists);
-    return rv;
-}
-
-static int
-regexec9(const Reprog *progp,    /* program to run */
-    const char *bol,    /* string to run machine on */
-    int ms,             /* number of elements at mp */
-    Resub mp[],         /* subexpression elements */
-    int mflags)
-{
-    Reljunk j;
-    Relist relist0[LISTSIZE], relist1[LISTSIZE];
-    int rv;
-
-    /*
-      *  use user-specified starting/ending location if specified
-     */
-    j.starts = bol;
-    j.eol = NULL;
-
-    if (mp && mp->str && ms>0) {
-        if (mflags & creg_startend)
-            j.starts = mp->str, j.eol = mp->str + mp->size;
-        else if (mflags & creg_next)
-            j.starts = mp->str + mp->size;
-    }
-
-    j.starttype = 0;
-    j.startchar = 0;
-    int rune_type = progp->flags.caseless ? IRUNE : RUNE;
-    if (progp->startinst->type == rune_type && progp->startinst->r.rune < 128) {
-        j.starttype = rune_type;
-        j.startchar = progp->startinst->r.rune;
-    }
-    if (progp->startinst->type == BOL)
-        j.starttype = BOL;
-
-    /* mark space */
-    j.relist[0] = relist0;
-    j.relist[1] = relist1;
-    j.reliste[0] = relist0 + LISTSIZE - 2;
-    j.reliste[1] = relist1 + LISTSIZE - 2;
-
-    rv = regexec1(progp, bol, mp, ms, &j, mflags);
-    if (rv >= 0)
-        return rv;
-    rv = regexec2(progp, bol, mp, ms, &j, mflags);
-    return rv;
-}
-
-/*
- * API functions
- */
-
-/* substitute into one string using the matches from the last regexec() */
-void cregex_replace(
-    const char *sp,     /* source string */
-    char *dp,           /* destination string */
-    int dlen,
-    int ms,             /* number of elements pointed to by mp */
-    const cregmatch mp[])   /* subexpression elements */
-{
-    const char *ssp, *ep;
-    int i;
-
-    ep = dp+dlen-1;
-    while (*sp != '\0') {
-        if (*sp == '\\') {
-            switch (*++sp) {
-            case '0': case '1': case '2': case '3': case '4':
-            case '5': case '6': case '7': case '8': case '9':
-                i = *sp - '0';
-                if (mp[i].str != NULL && mp != NULL && ms > i)
-                    for (ssp = mp[i].str; ssp < (mp[i].str + mp[i].size); ssp++)
-                        if (dp < ep)
-                            *dp++ = *ssp;
-                break;
-            case '\\':
-                if (dp < ep)
-                    *dp++ = '\\';
-                break;
-            case '\0':
-                sp--;
-                break;
-            default:
-                if (dp < ep)
-                    *dp++ = *sp;
-                break;
-            }
-        } else if (*sp == '&') {
-            if (mp[0].str != NULL && mp != NULL && ms > 0)
-                for (ssp = mp[0].str; ssp < (mp[0].str + mp[0].size); ssp++)
-                    if (dp < ep)
-                        *dp++ = *ssp;
-        } else {
-            if (dp < ep)
-                *dp++ = *sp;
-        }
-        sp++;
-    }
-    *dp = '\0';
-}
-
-int cregex_compile(cregex *rx, const char* pattern, int cflags) {
-    Parser par;
-    rx->prog = regcomp1(&par, pattern, cflags);
-    if (rx->prog)
-        return 1 + rx->prog->nsubids;
-    return par.errors;
-}
-
-int cregex_captures(cregex rx) {
-    return rx.prog ? 1 + rx.prog->nsubids : 0;
-}
-
-int cregex_find(const cregex *rx, const char* string,
-                size_t nmatch, cregmatch match[], int mflags) {
-    int res = regexec9(rx->prog, string, nmatch, match, mflags);
-    switch (res) {
-    case 1: return 1 + rx->prog->nsubids;
-    case 0: return creg_nomatch;
-    default: return creg_matcherror;
-    }
-}
-
-void cregex_drop(cregex* self) {
-    free(self->prog);
-}
+/*
+This is a Unix port of the Plan 9 regular expression library, by Rob Pike.
+Please send comments about the packaging to Russ Cox <[email protected]>.
+
+Copyright © 2021 Plan 9 Foundation
+Copyright © 2022 Tyge Løvset, for additions made in 2022.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <setjmp.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stc/cregex.h>
+#include <stc/utf8.h>
+
+typedef uint32_t Rune; /* Utf8 code point */
+typedef int32_t Token;
+/* max character classes per program */
+#define NCLASS creg_max_classes
+/* max subexpressions */
+#define NSUBEXP creg_max_captures
+/* max rune ranges per character class */
+#define NCCRUNE (NSUBEXP * 2)
+
+/*
+ *    character class, each pair of rune's defines a range
+ */
+typedef struct
+{
+    Rune *end;
+    Rune spans[NCCRUNE];
+} Reclass;
+
+/*
+ *    Machine instructions
+ */
+typedef struct Reinst
+{
+    Token type;
+    union {
+        Reclass *classp;        /* class pointer */
+        Rune    rune;           /* character */
+        int     subid;          /* sub-expression id for RBRA and LBRA */
+        struct Reinst *right;   /* right child of OR */
+    } r;
+    union {    /* regexp relies on these two being in the same union */
+        struct Reinst *left;    /* left child of OR */
+        struct Reinst *next;    /* next instruction for CAT & LBRA */
+    } l;
+} Reinst;
+
+typedef struct {
+    bool caseless;
+    bool dotall;
+} Reflags;
+
+/*
+ *    Reprogram definition
+ */
+typedef struct Reprog
+{
+    Reinst  *startinst;     /* start pc */
+    Reflags flags;
+    int nsubids;
+    Reclass cclass[NCLASS]; /* .data */
+    Reinst  firstinst[];    /* .text : originally 5 elements? */
+} Reprog;
+
+/*
+ *    Sub expression matches
+ */
+typedef cregmatch Resub;
+
+/*
+ *  substitution list
+ */
+typedef struct Resublist
+{
+    Resub m[NSUBEXP];
+} Resublist;
+
+/*
+ * Actions and Tokens (Reinst types)
+ *
+ *  0x800000-0x80FFFF: operators, value => precedence
+ *  0x810000-0x81FFFF: RUNE and char classes.
+ *  0x820000-0x82FFFF: tokens, i.e. operands for operators
+ */
+enum {
+    MASK        = 0xFF00000,
+    OPERATOR    = 0x8000000, /* Bitmask of all operators */
+    START       = 0x8000001, /* Start, used for marker on stack */
+    RBRA        ,           /* Right bracket, ) */
+    LBRA        ,           /* Left bracket, ( */
+    OR          ,           /* Alternation, | */
+    CAT         ,           /* Concatentation, implicit operator */
+    STAR        ,           /* Closure, * */
+    PLUS        ,           /* a+ == aa* */
+    QUEST       ,           /* a? == a|nothing, i.e. 0 or 1 a's */
+    RUNE        = 0x8100000,
+    IRUNE,
+    ASC_bl      , ASC_BL,   /* blank */
+    ASC_ct      , ASC_CT,   /* ctrl */
+    ASC_gr      , ASC_GR,   /* graphic */
+    ASC_pr      , ASC_PR,   /* print */
+    ASC_pt      , ASC_PT,   /* punct */
+    U8_Nd       , U8N_Nd,    /* dec digit, non-digit */
+    U8_LC       , U8N_LC,   /* utf8 letter cased */
+    U8_Ll       , U8N_Ll,   /* utf8 letter lower */
+    U8_Lu       , U8N_Lu,   /* utf8 letter upper */
+    U8_Zs       , U8N_Zs,   /* utf8 white space */
+    U8_Xnx      , U8N_Xnx,  /* utf8 hex digit */
+    U8_Xan      , U8N_Xan,  /* utf8 alphanumeric */
+    U8_Xw       , U8N_Xw,   /* utf8 word */
+    ANY         = 0x8200000, /* Any character except newline, . */
+    ANYNL       ,           /* Any character including newline, . */
+    NOP         ,           /* No operation, internal use only */
+    BOL         , BOS,      /* Beginning of line, string, ^ */
+    EOL         , EOS, EOZ, /* End of line, string, $ */
+    CCLASS      ,           /* Character class, [] */
+    NCCLASS     ,           /* Negated character class, [] */
+    WBOUND      ,           /* Non-word boundary, not consuming meta char */
+    NWBOUND     ,           /* Word boundary, not consuming meta char */
+    END         = 0x82FFFFF, /* Terminate: match found */
+};
+
+/*
+ *  regexec execution lists
+ */
+#define LISTSIZE    10
+#define BIGLISTSIZE (10*LISTSIZE)
+
+typedef struct Relist
+{
+    Reinst*     inst;       /* Reinstruction of the thread */
+    Resublist   se;         /* matched subexpressions in this thread */
+} Relist;
+
+typedef struct Reljunk
+{
+    Relist*     relist[2];
+    Relist*     reliste[2];
+    int         starttype;
+    Rune        startchar;
+    const char* starts;
+    const char* eol;
+} Reljunk;
+
+/*
+ * utf8 and Rune code
+ */
+
+static int
+chartorune(Rune *rune, const char *s)
+{
+    utf8_decode_t ctx = {.state=0};
+    const uint8_t *b = (const uint8_t*)s;
+    do { utf8_decode(&ctx, *b++); } while (ctx.state);
+    *rune = ctx.codep;
+    return (const char*)b - s;
+}
+
+static const char*
+utfrune(const char *s, Rune c)
+{
+    Rune r;
+
+    if (c < 128)        /* ascii */
+        return strchr((char *)s, c);
+
+    for (;;) {
+        int n = chartorune(&r, s);
+        if (r == c) return s;
+        if ((r == 0) | (n == 0)) return NULL;
+        s += n;
+    }
+}
+
+static const char*
+utfruneicase(const char *s, Rune c)
+{
+    Rune r;
+    c = utf8_tolower(c);
+    for (;;) {
+        int n = chartorune(&r, s);
+        if (utf8_tolower(r) == c) return s;
+        if ((r == 0) | (n == 0)) return NULL;
+        s += n;
+    }
+}
+
+/************
+ * regaux.c *
+ ************/
+
+/*
+ *  save a new match in mp
+ */
+static void
+_renewmatch(Resub *mp, int ms, Resublist *sp, int nsubids)
+{
+    int i;
+
+    if (mp==NULL || ms<=0)
+        return;
+    if (mp[0].str == NULL || sp->m[0].str < mp[0].str ||
+       (sp->m[0].str == mp[0].str && sp->m[0].size > mp[0].size)) {
+        for (i=0; i<ms && i<=nsubids; i++)
+            mp[i] = sp->m[i];
+    }
+}
+
+/*
+ * Note optimization in _renewthread:
+ *   *lp must be pending when _renewthread called; if *l has been looked
+ *   at already, the optimization is a bug.
+ */
+static Relist*
+_renewthread(Relist *lp,  /* _relist to add to */
+    Reinst *ip,           /* instruction to add */
+    int ms,
+    Resublist *sep)       /* pointers to subexpressions */
+{
+    Relist *p;
+
+    for (p=lp; p->inst; p++) {
+        if (p->inst == ip) {
+            if (sep->m[0].str < p->se.m[0].str) {
+                if (ms > 1)
+                    p->se = *sep;
+                else
+                    p->se.m[0] = sep->m[0];
+            }
+            return 0;
+        }
+    }
+    p->inst = ip;
+    if (ms > 1)
+        p->se = *sep;
+    else
+        p->se.m[0] = sep->m[0];
+    (++p)->inst = NULL;
+    return p;
+}
+
+/*
+ * same as renewthread, but called with
+ * initial empty start pointer.
+ */
+static Relist*
+_renewemptythread(Relist *lp,   /* _relist to add to */
+    Reinst *ip,                 /* instruction to add */
+    int ms,
+    const char *sp)             /* pointers to subexpressions */
+{
+    Relist *p;
+
+    for (p=lp; p->inst; p++) {
+        if (p->inst == ip) {
+            if (sp < p->se.m[0].str) {
+                if (ms > 1)
+                    memset(&p->se, 0, sizeof(p->se));
+                p->se.m[0].str = sp;
+            }
+            return 0;
+        }
+    }
+    p->inst = ip;
+    if (ms > 1)
+        memset(&p->se, 0, sizeof(p->se));
+    p->se.m[0].str = sp;
+    (++p)->inst = NULL;
+    return p;
+}
+
+/*
+ * Parser Information
+ */
+typedef struct Node
+{
+    Reinst*    first;
+    Reinst*    last;
+} Node;
+
+#define NSTACK 20
+typedef struct Parser
+{
+    const char* exprp;   /* pointer to next character in source expression */
+    Node andstack[NSTACK];
+    Node* andp;
+    Token atorstack[NSTACK];
+    Token* atorp;
+    short subidstack[NSTACK]; /* parallel to atorstack */
+    short* subidp;
+    short cursubid;      /* id of current subexpression */
+    int errors;
+    Reflags flags;
+    int dot_type;
+    int rune_type;
+    bool litmode;
+    bool lastwasand;     /* Last token was operand */
+    bool lexdone;
+    short nbra;
+    short nclass;
+    Rune yyrune;         /* last lex'd rune */
+    Reclass *yyclassp;   /* last lex'd class */
+    Reclass* classp;
+    Reinst* freep;
+    jmp_buf regkaboom;
+} Parser;
+
+/* predeclared crap */
+static void _operator(Parser *par, Token type);
+static void pushand(Parser *par, Reinst *first, Reinst *last);
+static void pushator(Parser *par, Token type);
+static void evaluntil(Parser *par, Token type);
+static int  bldcclass(Parser *par);
+
+static void
+rcerror(Parser *par, cregex_error_t err)
+{
+    par->errors = err;
+    longjmp(par->regkaboom, 1);
+}
+
+static Reinst*
+newinst(Parser *par, Token t)
+{
+    par->freep->type = t;
+    par->freep->l.left = 0;
+    par->freep->r.right = 0;
+    return par->freep++;
+}
+
+static void
+operand(Parser *par, Token t)
+{
+    Reinst *i;
+
+    if (par->lastwasand)
+        _operator(par, CAT);    /* catenate is implicit */
+    i = newinst(par, t);
+
+    if ((t == CCLASS) | (t == NCCLASS))
+        i->r.classp = par->yyclassp;
+    if ((t == RUNE) | (t == IRUNE))
+        i->r.rune = par->yyrune;
+
+    pushand(par, i, i);
+    par->lastwasand = true;
+}
+
+static void
+_operator(Parser *par, Token t)
+{
+    if (t==RBRA && --par->nbra<0)
+        rcerror(par, creg_unmatchedrightparenthesis);
+    if (t==LBRA) {
+        if (++par->cursubid >= NSUBEXP)
+            rcerror(par, creg_toomanysubexpressions);
+        par->nbra++;
+        if (par->lastwasand)
+            _operator(par, CAT);
+    } else
+        evaluntil(par, t);
+    if (t != RBRA)
+        pushator(par, t);
+    par->lastwasand = 0;
+    if (t==STAR || t==QUEST || t==PLUS || t==RBRA)
+        par->lastwasand = true;    /* these look like operands */
+}
+
+static void
+pushand(Parser *par, Reinst *f, Reinst *l)
+{
+    if (par->andp >= &par->andstack[NSTACK])
+        rcerror(par, creg_operandstackoverflow);
+    par->andp->first = f;
+    par->andp->last = l;
+    par->andp++;
+}
+
+static void
+pushator(Parser *par, Token t)
+{
+    if (par->atorp >= &par->atorstack[NSTACK])
+        rcerror(par, creg_operatorstackoverflow);
+    *par->atorp++ = t;
+    *par->subidp++ = par->cursubid;
+}
+
+static Node*
+popand(Parser *par, Token op)
+{
+    Reinst *inst;
+
+    if (par->andp <= &par->andstack[0]) {
+        rcerror(par, creg_missingoperand);
+        inst = newinst(par, NOP);
+        pushand(par, inst, inst);
+    }
+    return --par->andp;
+}
+
+static Token
+popator(Parser *par)
+{
+    if (par->atorp <= &par->atorstack[0])
+        rcerror(par, creg_operatorstackunderflow);
+    --par->subidp;
+    return *--par->atorp;
+}
+
+static void
+evaluntil(Parser *par, Token pri)
+{
+    Node *op1, *op2;
+    Reinst *inst1, *inst2;
+
+    while (pri==RBRA || par->atorp[-1]>=pri) {
+        switch (popator(par)) {
+        default:
+            rcerror(par, creg_unknownoperator);
+            break;
+        case LBRA:        /* must have been RBRA */
+            op1 = popand(par, '(');
+            inst2 = newinst(par, RBRA);
+            inst2->r.subid = *par->subidp;
+            op1->last->l.next = inst2;
+            inst1 = newinst(par, LBRA);
+            inst1->r.subid = *par->subidp;
+            inst1->l.next = op1->first;
+            pushand(par, inst1, inst2);
+            return;
+        case OR:
+            op2 = popand(par, '|');
+            op1 = popand(par, '|');
+            inst2 = newinst(par, NOP);
+            op2->last->l.next = inst2;
+            op1->last->l.next = inst2;
+            inst1 = newinst(par, OR);
+            inst1->r.right = op1->first;
+            inst1->l.left = op2->first;
+            pushand(par, inst1, inst2);
+            break;
+        case CAT:
+            op2 = popand(par, 0);
+            op1 = popand(par, 0);
+            op1->last->l.next = op2->first;
+            pushand(par, op1->first, op2->last);
+            break;
+        case STAR:
+            op2 = popand(par, '*');
+            inst1 = newinst(par, OR);
+            op2->last->l.next = inst1;
+            inst1->r.right = op2->first;
+            pushand(par, inst1, inst1);
+            break;
+        case PLUS:
+            op2 = popand(par, '+');
+            inst1 = newinst(par, OR);
+            op2->last->l.next = inst1;
+            inst1->r.right = op2->first;
+            pushand(par, op2->first, inst1);
+            break;
+        case QUEST:
+            op2 = popand(par, '?');
+            inst1 = newinst(par, OR);
+            inst2 = newinst(par, NOP);
+            inst1->l.left = inst2;
+            inst1->r.right = op2->first;
+            op2->last->l.next = inst2;
+            pushand(par, inst1, inst2);
+            break;
+        }
+    }
+}
+
+static Reprog*
+optimize(Parser *par, Reprog *pp)
+{
+    Reinst *inst, *target;
+    size_t size;
+    Reprog *npp;
+    Reclass *cl;
+    ptrdiff_t diff;
+
+    /*
+     *  get rid of NOOP chains
+     */
+    for (inst = pp->firstinst; inst->type != END; inst++) {
+        target = inst->l.next;
+        while (target->type == NOP)
+            target = target->l.next;
+        inst->l.next = target;
+    }
+
+    /*
+     *  The original allocation is for an area larger than
+     *  necessary.  Reallocate to the actual space used
+     *  and then relocate the code.
+     */
+    size = sizeof(Reprog) + (par->freep - pp->firstinst)*sizeof(Reinst);
+    npp = (Reprog *)realloc(pp, size);
+    if (npp==NULL || npp==pp)
+        return pp;
+    diff = (char *)npp - (char *)pp;
+    par->freep = (Reinst *)((char *)par->freep + diff);
+    for (inst = npp->firstinst; inst < par->freep; inst++) {
+        switch (inst->type) {
+        case OR:
+        case STAR:
+        case PLUS:
+        case QUEST:
+            inst->r.right = (Reinst *)((char*)inst->r.right + diff);
+            break;
+        case CCLASS:
+        case NCCLASS:
+            inst->r.right = (Reinst *)((char*)inst->r.right + diff);
+            cl = inst->r.classp;
+            cl->end = (Rune *)((char*)cl->end + diff);
+            break;
+        }
+        inst->l.left = (Reinst *)((char*)inst->l.left + diff);
+    }
+    npp->startinst = (Reinst *)((char*)npp->startinst + diff);
+    return npp;
+}
+
+static Reclass*
+newclass(Parser *par)
+{
+    if (par->nclass >= NCLASS)
+        rcerror(par, creg_toomanycharacterclasses);
+    return &(par->classp[par->nclass++]);
+}
+
+static int
+nextc(Parser *par, Rune *rp)
+{
+    if (par->lexdone) {
+        *rp = 0;
+        return 1;
+    }
+    par->exprp += chartorune(rp, par->exprp);
+    if (*rp == '\\') {
+        if (par->litmode && *par->exprp != 'E')
+            return 1;
+        par->exprp += chartorune(rp, par->exprp);
+        switch (*rp) {
+            case 'E': return par->litmode + 1;
+            case 't': *rp = '\t'; break;
+            case 'n': *rp = '\n'; break;
+            case 'r': *rp = '\r'; break;
+            case 'v': *rp = '\v'; break;
+            case 'f': *rp = '\f'; break;
+            case 'd': *rp = U8_Nd; break;
+            case 'D': *rp = U8N_Nd; break;
+            case 's': *rp = U8_Zs; break;
+            case 'S': *rp = U8N_Zs; break;
+            case 'w': *rp = U8_Xw; break;
+            case 'W': *rp = U8N_Xw; break;
+            case 'x': if (*par->exprp != '{') break;
+                *rp = 0; sscanf(++par->exprp, "%x", rp);
+                while (*par->exprp) if (*(par->exprp++) == '}') break;
+                if (par->exprp[-1] != '}')
+                    rcerror(par, creg_unmatchedrightparenthesis);
+                return 2;
+            case 'p': case 'P': { /* https://www.regular-expressions.info/unicode.html */
+                static struct { const char* c; int n, r; } cls[] = {
+                    {"{Alpha}", 7, U8_LC}, {"{LC}", 4, U8_LC},
+                    {"{Alnum}", 7, U8_Xan},
+                    {"{Digit}", 7, U8_Nd}, {"{Nd}", 4, U8_Nd},
+                    {"{Lower}", 7, U8_Ll}, {"{Ll}", 4, U8_Ll},
+                    {"{Space}", 7, U8_Zs}, {"{Zs}", 4, U8_Zs},
+                    {"{Upper}", 7, U8_Lu}, {"{Lu}", 4, U8_Lu},
+                    {"{XDigit}", 8, U8_Xnx},
+                    {"{Blank}", 7, ASC_bl},
+                    {"{Graph}", 7, ASC_gr},
+                    {"{Print}", 7, ASC_pr},
+                    {"{Punct}", 7, ASC_pt},
+                };
+                int inv = *rp == 'P';
+                for (unsigned i = 0; i < (sizeof cls/sizeof *cls); ++i)
+                    if (!strncmp(par->exprp, cls[i].c, cls[i].n)) {
+                        if (par->rune_type == IRUNE && (cls[i].r == U8_Ll || cls[i].r == U8_Lu))
+                            *rp = U8_LC + inv;
+                        else
+                            *rp = cls[i].r + inv;
+                        par->exprp += cls[i].n;
+                        break;
+                    }
+                if (*rp < OPERATOR) {
+                    rcerror(par, creg_unknownoperator);
+                    *rp = 0;
+                }
+                break;
+            }
+        }
+        return 1;
+    }
+    if (*rp == 0)
+        par->lexdone = true;
+    return par->litmode;
+}
+
+static Token
+lex(Parser *par)
+{
+    int quoted;
+    start:
+    quoted = nextc(par, &par->yyrune);
+    if (quoted) {
+        if (quoted == 2) {
+            if (par->litmode && par->yyrune == 'E') {
+                par->litmode = false;
+                goto start;
+            }
+            return par->yyrune == 0 ? END : par->rune_type;
+        }
+        switch (par->yyrune) {
+        case  0 : return END;
+        case 'b': return WBOUND;
+        case 'B': return NWBOUND;
+        case 'A': return BOS;
+        case 'z': return EOS;
+        case 'Z': return EOZ;
+        case 'Q': par->litmode = true;
+                  goto start;
+        default : return par->rune_type;
+        }
+    }
+
+    switch (par->yyrune) {
+    case  0 : return END;
+    case '*': return STAR;
+    case '?': return QUEST;
+    case '+': return PLUS;
+    case '|': return OR;
+    case '.': return par->dot_type;
+    case '(':
+        if (par->exprp[0] == '?') {
+            for (int k = 1, enable = 1; ; ++k) switch (par->exprp[k]) {
+                case  0 : par->exprp += k; return END;
+                case ')': par->exprp += k + 1; goto start;
+                case '-': enable = 0; break;
+                case 's': if (!par->flags.dotall) par->dot_type = ANY + enable; break;
+                case 'i': if (!par->flags.caseless) par->rune_type = RUNE + enable; break;
+                default: rcerror(par, creg_unknownoperator); return 0;
+            }
+        }
+        return LBRA;
+    case ')': return RBRA;
+    case '^': return BOL;
+    case '$': return EOL;
+    case '[': return bldcclass(par);
+    }
+    return par->rune_type;
+}
+
+static Token
+bldcclass(Parser *par)
+{
+    Token type;
+    Rune r[NCCRUNE];
+    Rune *p, *ep, *np;
+    Rune rune;
+    int quoted;
+
+    /* we have already seen the '[' */
+    type = CCLASS;
+    par->yyclassp = newclass(par);
+
+    /* look ahead for negation */
+    /* SPECIAL CASE!!! negated classes don't match \n */
+    ep = r;
+    quoted = nextc(par, &rune);
+    if (!quoted && rune == '^') {
+        type = NCCLASS;
+        quoted = nextc(par, &rune);
+        *ep++ = '\n';
+        *ep++ = '\n';
+    }
+
+    /* parse class into a set of spans */
+    for (; ep < &r[NCCRUNE]; quoted = nextc(par, &rune)) {
+        if (rune == 0) {
+            rcerror(par, creg_malformedcharacterclass);
+            return 0;
+        }
+        if (!quoted) {
+            if (rune == ']')
+                break;
+            if (rune == '-') {
+                if (ep != r && *par->exprp != ']') {
+                    quoted = nextc(par, &rune);
+                    if (rune == 0) {
+                        rcerror(par, creg_malformedcharacterclass);
+                        return 0;
+                    }
+                    ep[-1] = rune;
+                    continue;
+                }
+            }
+        }
+        *ep++ = rune;
+        *ep++ = rune;
+    }
+
+    /* sort on span start */
+    for (p = r; p < ep; p += 2) {
+        for (np = p; np < ep; np += 2)
+            if (*np < *p) {
+                rune = np[0];
+                np[0] = p[0];
+                p[0] = rune;
+                rune = np[1];
+                np[1] = p[1];
+                p[1] = rune;
+            }
+    }
+
+    /* merge spans */
+    np = par->yyclassp->spans;
+    p = r;
+    if (r == ep)
+        par->yyclassp->end = np;
+    else {
+        np[0] = *p++;
+        np[1] = *p++;
+        for (; p < ep; p += 2)
+            if (p[0] <= np[1]) {
+                if (p[1] > np[1])
+                    np[1] = p[1];
+            } else {
+                np += 2;
+                np[0] = p[0];
+                np[1] = p[1];
+            }
+        par->yyclassp->end = np+2;
+    }
+
+    return type;
+}
+
+static Reprog*
+regcomp1(Parser *par, const char *s, int cflags)
+{
+    Token token;
+    Reprog *volatile pp;
+
+    /* get memory for the program. estimated max usage */
+    const int instcap = 5 + 6*strlen(s);
+    pp = (Reprog *)malloc(sizeof(Reprog) + instcap*sizeof(Reinst));
+    if (pp == NULL) {
+        rcerror(par, creg_outofmemory);
+        return NULL;
+    }
+    pp->flags.caseless = (cflags & creg_caseless) != 0;
+    pp->flags.dotall = (cflags & creg_dotall) != 0;
+    par->freep = pp->firstinst;
+    par->classp = pp->cclass;
+    par->errors = 0;
+
+    if (setjmp(par->regkaboom))
+        goto out;
+
+    /* go compile the sucker */
+    par->lexdone = false;
+    par->flags = pp->flags;
+    par->rune_type = pp->flags.caseless ? IRUNE : RUNE;
+    par->dot_type = pp->flags.dotall ? ANYNL : ANY;
+    par->litmode = false;
+    par->exprp = s;
+    par->nclass = 0;
+    par->nbra = 0;
+    par->atorp = par->atorstack;
+    par->andp = par->andstack;
+    par->subidp = par->subidstack;
+    par->lastwasand = false;
+    par->cursubid = 0;
+
+    /* Start with a low priority operator to prime parser */
+    pushator(par, START-1);
+    while ((token = lex(par)) != END) {
+        if ((token & MASK) == OPERATOR)
+            _operator(par, token);
+        else
+            operand(par, token);
+    }
+
+    /* Close with a low priority operator */
+    evaluntil(par, START);
+
+    /* Force END */
+    operand(par, END);
+    evaluntil(par, START);
+#ifdef DEBUG
+    dumpstack(par);
+#endif
+    if (par->nbra)
+        rcerror(par, creg_unmatchedleftparenthesis);
+    --par->andp;    /* points to first and only operand */
+    pp->startinst = par->andp->first;
+#ifdef DEBUG
+    dump(pp);
+#endif
+    pp = optimize(par, pp);
+    pp->nsubids = par->cursubid;
+#ifdef DEBUG
+    print("start: %d\n", par->andp->first-pp->firstinst);
+    dump(pp);
+#endif
+out:
+    if (par->errors) {
+        free(pp);
+        pp = NULL;
+    }
+    return pp;
+}
+
+
+static int
+runematch(Rune s, Rune r, bool icase)
+{
+    int inv = 0;
+    switch (s) {
+    case ASC_BL: inv = 1; /* fallthrough */
+    case ASC_bl: return inv ^ ((r == ' ') | (r == '\t'));
+    case ASC_CT: inv = 1;
+    case ASC_ct: return inv ^ (iscntrl(r) != 0);
+    case ASC_GR: inv = 1;
+    case ASC_gr: return inv ^ (isgraph(r) != 0);
+    case ASC_PR: inv = 1;
+    case ASC_pr: return inv ^ (isprint(r) != 0);
+    case ASC_PT: inv = 1;
+    case ASC_pt: return inv ^ (ispunct(r) != 0);
+    case U8N_Nd: inv = 1;
+    case U8_Nd: return inv ^ (utf8_isdigit(r));
+    case U8N_LC: inv = 1;
+    case U8_LC: return inv ^ utf8_isalpha(r);
+    case U8N_Ll: inv = 1;
+    case U8_Ll: return inv ^ utf8_islower(r);
+    case U8N_Lu: inv = 1;
+    case U8_Lu: return inv ^ utf8_isupper(r);
+    case U8N_Zs: inv = 1;
+    case U8_Zs: return inv ^ utf8_isspace(r);
+    case U8N_Xan: inv = 1;
+    case U8_Xan: return inv ^ utf8_isalnum(r);
+    case U8N_Xnx: inv = 1;
+    case U8_Xnx: return inv ^ utf8_isxdigit(r);
+    case U8N_Xw: inv = 1;
+    case U8_Xw: return inv ^ (utf8_isalnum(r) | (r == '_'));
+    }
+    return icase ? utf8_tolower(s) == utf8_tolower(r) : s == r;
+}
+
+/*
+ *  return 0 if no match
+ *        >0 if a match
+ *        <0 if we ran out of _relist space
+ */
+static int
+regexec1(const Reprog *progp,    /* program to run */
+    const char *bol,    /* string to run machine on */
+    Resub *mp,    /* subexpression elements */
+    int ms,        /* number of elements at mp */
+    Reljunk *j,
+    int mflags
+)
+{
+    int flag=0;
+    Reinst *inst;
+    Relist *tlp;
+    Relist *tl, *nl;    /* This list, next list */
+    Relist *tle, *nle;  /* Ends of this and next list */
+    const char *s, *p;
+    int i, n, checkstart;
+    Rune r, *rp, *ep;
+    int match = 0;
+
+    bool icase = progp->flags.caseless;
+    checkstart = j->starttype;
+    if (mp)
+        for (i=0; i<ms; i++) {
+            mp[i].str = NULL;
+            mp[i].size = 0;
+        }
+    j->relist[0][0].inst = NULL;
+    j->relist[1][0].inst = NULL;
+
+    /* Execute machine once for each character, including terminal NUL */
+    s = j->starts;
+    do {
+        /* fast check for first char */
+        if (checkstart) {
+            switch (j->starttype) {
+            case IRUNE:
+                p = utfruneicase(s, j->startchar);
+                goto next1;
+            case RUNE:
+                p = utfrune(s, j->startchar);
+                next1:
+                if (p == NULL || s == j->eol)
+                    return match;
+                s = p;
+                break;
+            case BOL:
+                if (s == bol)
+                    break;
+                p = utfrune(s, '\n');
+                if (p == NULL || s == j->eol)
+                    return match;
+                s = p+1;
+                break;
+            }
+        }
+        n = chartorune(&r, s);
+
+        /* switch run lists */
+        tl = j->relist[flag];
+        tle = j->reliste[flag];
+        nl = j->relist[flag^=1];
+        nle = j->reliste[flag];
+        nl->inst = NULL;
+
+        /* Add first instruction to current list */
+        if (match == 0)
+            _renewemptythread(tl, progp->startinst, ms, s);
+
+        /* Execute machine until current list is empty */
+        for (tlp=tl; tlp->inst; tlp++) {    /* assignment = */
+            for (inst = tlp->inst; ; inst = inst->l.next) {
+                int ok = false;
+
+                switch (inst->type) {
+                case RUNE:
+                case IRUNE:    /* regular character */
+                    ok = runematch(inst->r.rune, r, (icase = inst->type==IRUNE));
+                    break;
+                case LBRA:
+                    tlp->se.m[inst->r.subid].str = s;
+                    continue;
+                case RBRA:
+                    tlp->se.m[inst->r.subid].size = s - tlp->se.m[inst->r.subid].str;
+                    continue;
+                case ANY:
+                    ok = (r != '\n');
+                    break;
+                case ANYNL:
+                    ok = true;
+                    break;
+                case BOL:
+                    if (s == bol || s[-1] == '\n') continue;
+                    break;
+                case BOS:
+                    if (s == bol) continue;
+                    break;
+                case EOL:
+                    if (r == '\n') continue;
+                case EOS: /* fallthrough */
+                    if (s == j->eol || r == 0) continue;
+                    break;
+                case EOZ:
+                    if (s == j->eol || r == 0 || (r == '\n' && s[1] == 0)) continue;
+                    break;
+                case NWBOUND:
+                    ok = true;
+                case WBOUND: /* fallthrough */
+                    if (ok ^ (s == bol || s == j->eol || ((utf8_isalnum(s[-1]) || s[-1] == '_')
+                                                        ^ (utf8_isalnum(s[ 0]) || s[ 0] == '_'))))
+                        continue;
+                    break;
+                case NCCLASS:
+                    ok = true;
+                case CCLASS: /* fallthrough */
+                    ep = inst->r.classp->end;
+                    for (rp = inst->r.classp->spans; rp < ep; rp += 2) {
+                        if ((r >= rp[0] && r <= rp[1]) || (rp[0] == rp[1] && runematch(rp[0], r, icase)))
+                            break;
+                    }
+                    ok ^= (rp < ep);
+                    break;
+                case OR:
+                    /* evaluate right choice later */
+                    if (_renewthread(tlp, inst->r.right, ms, &tlp->se) == tle)
+                        return -1;
+                    /* efficiency: advance and re-evaluate */
+                    continue;
+                case END:    /* Match! */
+                    match = !(mflags & creg_fullmatch) ||
+                            ((s == j->eol || r == 0 || r == '\n') &&
+                            (tlp->se.m[0].str == bol || tlp->se.m[0].str[-1] == '\n'));
+                    tlp->se.m[0].size = s - tlp->se.m[0].str;
+                    if (mp != NULL)
+                        _renewmatch(mp, ms, &tlp->se, progp->nsubids);
+                    break;
+                }
+
+                if (ok && _renewthread(nl, inst->l.next, ms, &tlp->se) == nle)
+                    return -1;
+                break;
+            }
+        }
+        if (s == j->eol)
+            break;
+        checkstart = j->starttype && nl->inst==NULL;
+        s += n;
+    } while (r);
+    return match;
+}
+
+static int
+regexec2(const Reprog *progp,    /* program to run */
+    const char *bol,    /* string to run machine on */
+    Resub *mp,    /* subexpression elements */
+    int ms,        /* number of elements at mp */
+    Reljunk *j,
+    int mflags
+)
+{
+    int rv;
+    Relist *relists;
+
+    /* mark space */
+    relists = (Relist *)malloc(2 * BIGLISTSIZE*sizeof(Relist));
+    if (relists == NULL)
+        return -1;
+
+    j->relist[0] = relists;
+    j->relist[1] = relists + BIGLISTSIZE;
+    j->reliste[0] = relists + BIGLISTSIZE - 2;
+    j->reliste[1] = relists + 2*BIGLISTSIZE - 2;
+
+    rv = regexec1(progp, bol, mp, ms, j, mflags);
+    free(relists);
+    return rv;
+}
+
+static int
+regexec9(const Reprog *progp,    /* program to run */
+    const char *bol,    /* string to run machine on */
+    int ms,             /* number of elements at mp */
+    Resub mp[],         /* subexpression elements */
+    int mflags)
+{
+    Reljunk j;
+    Relist relist0[LISTSIZE], relist1[LISTSIZE];
+    int rv;
+
+    /*
+      *  use user-specified starting/ending location if specified
+     */
+    j.starts = bol;
+    j.eol = NULL;
+
+    if (mp && mp->str && ms>0) {
+        if (mflags & creg_startend)
+            j.starts = mp->str, j.eol = mp->str + mp->size;
+        else if (mflags & creg_next)
+            j.starts = mp->str + mp->size;
+    }
+
+    j.starttype = 0;
+    j.startchar = 0;
+    int rune_type = progp->flags.caseless ? IRUNE : RUNE;
+    if (progp->startinst->type == rune_type && progp->startinst->r.rune < 128) {
+        j.starttype = rune_type;
+        j.startchar = progp->startinst->r.rune;
+    }
+    if (progp->startinst->type == BOL)
+        j.starttype = BOL;
+
+    /* mark space */
+    j.relist[0] = relist0;
+    j.relist[1] = relist1;
+    j.reliste[0] = relist0 + LISTSIZE - 2;
+    j.reliste[1] = relist1 + LISTSIZE - 2;
+
+    rv = regexec1(progp, bol, mp, ms, &j, mflags);
+    if (rv >= 0)
+        return rv;
+    rv = regexec2(progp, bol, mp, ms, &j, mflags);
+    return rv;
+}
+
+/*
+ * API functions
+ */
+
+/* substitute into one string using the matches from the last regexec() */
+void cregex_replace(
+    const char *sp,     /* source string */
+    char *dp,           /* destination string */
+    int dlen,
+    int ms,             /* number of elements pointed to by mp */
+    const cregmatch mp[])   /* subexpression elements */
+{
+    const char *ssp, *ep;
+    int i;
+
+    ep = dp+dlen-1;
+    while (*sp != '\0') {
+        if (*sp == '\\') {
+            switch (*++sp) {
+            case '0': case '1': case '2': case '3': case '4':
+            case '5': case '6': case '7': case '8': case '9':
+                i = *sp - '0';
+                if (mp[i].str != NULL && mp != NULL && ms > i)
+                    for (ssp = mp[i].str; ssp < (mp[i].str + mp[i].size); ssp++)
+                        if (dp < ep)
+                            *dp++ = *ssp;
+                break;
+            case '\\':
+                if (dp < ep)
+                    *dp++ = '\\';
+                break;
+            case '\0':
+                sp--;
+                break;
+            default:
+                if (dp < ep)
+                    *dp++ = *sp;
+                break;
+            }
+        } else if (*sp == '&') {
+            if (mp[0].str != NULL && mp != NULL && ms > 0)
+                for (ssp = mp[0].str; ssp < (mp[0].str + mp[0].size); ssp++)
+                    if (dp < ep)
+                        *dp++ = *ssp;
+        } else {
+            if (dp < ep)
+                *dp++ = *sp;
+        }
+        sp++;
+    }
+    *dp = '\0';
+}
+
+int cregex_compile(cregex *rx, const char* pattern, int cflags) {
+    Parser par;
+    rx->prog = regcomp1(&par, pattern, cflags);
+    if (rx->prog)
+        return 1 + rx->prog->nsubids;
+    return par.errors;
+}
+
+int cregex_captures(cregex rx) {
+    return rx.prog ? 1 + rx.prog->nsubids : 0;
+}
+
+int cregex_find(const cregex *rx, const char* string,
+                size_t nmatch, cregmatch match[], int mflags) {
+    int res = regexec9(rx->prog, string, nmatch, match, mflags);
+    switch (res) {
+    case 1: return 1 + rx->prog->nsubids;
+    case 0: return creg_nomatch;
+    default: return creg_matcherror;
+    }
+}
+
+void cregex_drop(cregex* self) {
+    free(self->prog);
+}
author	Tyge Løvset <[email protected]>	2022-06-01 16:28:07 +0200
committer	Tyge Løvset <[email protected]>	2022-06-01 16:28:07 +0200
commit	de629774cb912aa3d563f24d99258142713c3fcd (patch)
tree	c37e2851d6cb049bc0863a59b6ecf5945fb88619 /src
parent	7fb43a24a17da787dd809114ca26c1231b058493 (diff)
download	STC-modified-de629774cb912aa3d563f24d99258142713c3fcd.tar.gz STC-modified-de629774cb912aa3d563f24d99258142713c3fcd.zip