summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorTyge Løvset <[email protected]>2022-02-06 16:15:41 +0100
committerTyge Løvset <[email protected]>2022-02-06 16:15:41 +0100
commitfcdbc1ac6b7e6f65d4dde37ed7552707863fcf80 (patch)
treee69151631d9e823eac1f284723fffe96cc9ea901
parentd68d26d55881cac66be4ab357c08add8c348f083 (diff)
downloadSTC-modified-fcdbc1ac6b7e6f65d4dde37ed7552707863fcf80.tar.gz
STC-modified-fcdbc1ac6b7e6f65d4dde37ed7552707863fcf80.zip
Misc improvements.
-rw-r--r--docs/cmap_api.md6
-rw-r--r--examples/regex1.c4
-rw-r--r--examples/regex_match.c2
-rw-r--r--include/stc/cregex.h17
-rw-r--r--include/stc/cstr.h18
-rw-r--r--include/stc/csview.h4
-rw-r--r--src/cregex.c72
-rw-r--r--src/cregex_utf8.c4
8 files changed, 62 insertions, 65 deletions
diff --git a/docs/cmap_api.md b/docs/cmap_api.md
index 13f21ef5..656d4853 100644
--- a/docs/cmap_api.md
+++ b/docs/cmap_api.md
@@ -387,9 +387,9 @@ int main()
{
c_auto (Vikings, vikings) {
c_apply(v, Vikings_emplace(&vikings, v), c_pair(v), Vikings_raw, {
- {{"Einar", "Norway"}, 20},
- {{"Olaf", "Denmark"}, 24},
- {{"Harald", "Iceland"}, 12},
+ { {"Einar", "Norway"}, 20 },
+ { {"Olaf", "Denmark"}, 24 },
+ { {"Harald", "Iceland"}, 12 },
});
Vikings_emplace_or_assign(&vikings, (RViking){"Bjorn", "Sweden"}, 10);
diff --git a/examples/regex1.c b/examples/regex1.c
index 894fe2b1..e244c984 100644
--- a/examples/regex1.c
+++ b/examples/regex1.c
@@ -10,9 +10,9 @@ int main(int argc, char* argv[])
c_auto (cstr, input)
c_auto (cregex, float_expr)
{
- float_expr = cregex_new("^[+-]?[0-9]+((\\.[0-9]*)?|\\.[0-9]+)$", 0);
+ int res = cregex_compile(&float_expr, "^[+-]?[0-9]+((\\.[0-9]*)?|\\.[0-9]+)$", 0);
// Until "q" is given, ask for another number
- while (true)
+ if (res > 0) while (true)
{
printf("Enter a double precision number (q for quit): ");
cstr_getline(&input, stdin);
diff --git a/examples/regex_match.c b/examples/regex_match.c
index c5798b60..f543fc31 100644
--- a/examples/regex_match.c
+++ b/examples/regex_match.c
@@ -25,7 +25,7 @@ int main()
printf("Could not find any digits\n");
}
- while (cregex_find(&re, s, 1, m, creg_next) > 0) {
+ while (cregex_find(&re, s, 10, m, creg_next) > 0) {
printf("%.*s ; ", (int)m[0].len, m[0].str);
}
puts("");
diff --git a/include/stc/cregex.h b/include/stc/cregex.h
index a6079c38..38d7042c 100644
--- a/include/stc/cregex.h
+++ b/include/stc/cregex.h
@@ -72,28 +72,23 @@ typedef struct {
size_t len;
} cregmatch;
-/* return number of capture groups on success, or (negative) error code on failure. */
-int cregex_compile(cregex *rx, const char* pattern, int cflags);
-
static inline cregex cregex_init(void) {
cregex rx = {NULL}; return rx;
}
-static inline cregex cregex_new(const char* pattern, int cflags) {
- cregex rx;
- cregex_compile(&rx, pattern, cflags);
- return rx;
-}
-/* number of capture groups in the regex pattern */
+/* return number of capture groups on success, or (negative) error code on failure. */
+int cregex_compile(cregex *self, const char* pattern, int cflags);
+
+/* number of capture groups in a regex pattern */
int cregex_captures(cregex rx);
/* return number of capture groups on success, or (negative) error code on failure. */
-int cregex_find(const cregex *rx, const char* string,
+int cregex_find(const cregex *self, const char* string,
size_t nmatch, cregmatch match[], int mflags);
void cregex_replace(const char* src, char* dst, int dsize,
int nmatch, const cregmatch match[]);
-void cregex_drop(cregex* preg);
+void cregex_drop(cregex* self);
#endif
diff --git a/include/stc/cstr.h b/include/stc/cstr.h
index 78fd4cce..937e5151 100644
--- a/include/stc/cstr.h
+++ b/include/stc/cstr.h
@@ -32,18 +32,18 @@
#include <ctype.h>
#define cstr_npos (SIZE_MAX >> 1)
-typedef struct { size_t size, cap; char str[sizeof(size_t)]; } _cstr_rep_t;
-#define _cstr_rep(self) c_container_of((self)->str, _cstr_rep_t, str)
+typedef struct { size_t size, cap; char chr; } _cstr_rep_t;
+#define _cstr_rep(self) c_container_of((self)->str, _cstr_rep_t, chr)
#ifdef _i_static
- static _cstr_rep_t _cstr_nullrep = {0, 0, {0}};
- static const cstr cstr_null = {_cstr_nullrep.str};
+ static _cstr_rep_t _cstr_nullrep = {0, 0, 0};
+ static const cstr cstr_null = {&_cstr_nullrep.chr};
#else
extern const cstr cstr_null;
#endif
/* optimal memory: based on malloc_usable_size() sequence: 24, 40, 56, ... */
-#define _cstr_opt_mem(cap) ((((offsetof(_cstr_rep_t, str) + (cap) + 8)>>4)<<4) + 8)
+#define _cstr_opt_mem(cap) ((((offsetof(_cstr_rep_t, chr) + (cap) + 8)>>4)<<4) + 8)
/* optimal string capacity: 7, 23, 39, ... */
-#define _cstr_opt_cap(cap) (_cstr_opt_mem(cap) - offsetof(_cstr_rep_t, str) - 1)
+#define _cstr_opt_cap(cap) (_cstr_opt_mem(cap) - offsetof(_cstr_rep_t, chr) - 1)
STC_API cstr cstr_from_n(const char* str, size_t n);
STC_API cstr cstr_from_fmt(const char* fmt, ...);
@@ -186,7 +186,7 @@ cstr_reserve(cstr* self, const size_t cap) {
const size_t oldcap = rep->cap;
if (cap > oldcap) {
rep = (_cstr_rep_t*) c_realloc(oldcap ? rep : NULL, _cstr_opt_mem(cap));
- self->str = rep->str;
+ self->str = &rep->chr;
if (oldcap == 0) self->str[rep->size = 0] = '\0';
return (rep->cap = _cstr_opt_cap(cap));
}
@@ -205,9 +205,9 @@ STC_DEF cstr
cstr_from_n(const char* str, const size_t n) {
if (n == 0) return cstr_null;
_cstr_rep_t* rep = (_cstr_rep_t*) c_malloc(_cstr_opt_mem(n));
- rep->str[rep->size = n] = '\0';
+ cstr s = {(char *) memcpy(&rep->chr, str, n)};
+ s.str[rep->size = n] = '\0';
rep->cap = _cstr_opt_cap(n);
- cstr s = {(char *) memcpy(rep->str, str, n)};
return s;
}
diff --git a/include/stc/csview.h b/include/stc/csview.h
index 2979b2da..c2bd041d 100644
--- a/include/stc/csview.h
+++ b/include/stc/csview.h
@@ -144,7 +144,7 @@ csview_substr(csview sv, intptr_t pos, size_t n) {
pos += sv.size;
if (pos < 0) pos = 0;
}
- if (pos > sv.size) pos = sv.size;
+ if (pos > (intptr_t)sv.size) pos = sv.size;
if (pos + n > sv.size) n = sv.size - pos;
sv.str += pos, sv.size = n;
return sv;
@@ -157,7 +157,7 @@ csview_slice(csview sv, intptr_t p1, intptr_t p2) {
if (p1 < 0) p1 = 0;
}
if (p2 < 0) p2 += sv.size;
- if (p2 > sv.size) p2 = sv.size;
+ if (p2 > (intptr_t)sv.size) p2 = sv.size;
sv.str += p1, sv.size = p2 > p1 ? p2 - p1 : 0;
return sv;
}
diff --git a/src/cregex.c b/src/cregex.c
index aaa6e62a..3e4b8796 100644
--- a/src/cregex.c
+++ b/src/cregex.c
@@ -34,7 +34,8 @@ THE SOFTWARE.
#include <stc/cregex.h>
#include "cregex_utf8.c"
-typedef uint32_t Rune;
+typedef uint32_t Rune; /* Utf8 code point */
+typedef int32_t Token;
/* max character classes per program */
#define NCLASS creg_max_classes
/* max subexpressions */
@@ -56,7 +57,7 @@ typedef struct
*/
typedef struct Reinst
{
- int type;
+ Token type;
union {
Reclass *classp; /* class pointer */
Rune rune; /* character */
@@ -102,21 +103,22 @@ typedef struct Resublist
/*
* Actions and Tokens (Reinst types)
*
- * 0x80-0x8F: operators, value => precedence
- * 0x90-0xAF: RUNE and char classes.
- * 0xB0-0xBF: tokens, i.e. operands for operators
+ * 0x800000-0x80FFFF: operators, value => precedence
+ * 0x810000-0x81FFFF: RUNE and char classes.
+ * 0x820000-0x82FFFF: tokens, i.e. operands for operators
*/
enum {
- OPERATOR = 0x80, /* Bitmask of all operators */
- START = 0x80, /* Start, used for marker on stack */
- RBRA , /* Right bracket, ) */
- LBRA , /* Left bracket, ( */
- OR , /* Alternation, | */
- CAT , /* Concatentation, implicit operator */
- STAR , /* Closure, * */
- PLUS , /* a+ == aa* */
- QUEST , /* a? == a|nothing, i.e. 0 or 1 a's */
- RUNE = 0x90,
+ MASK = 0xFF0000,
+ OPERATOR = 0x800000, /* Bitmask of all operators */
+ START = 0x800001, /* Start, used for marker on stack */
+ RBRA , /* Right bracket, ) */
+ LBRA , /* Left bracket, ( */
+ OR , /* Alternation, | */
+ CAT , /* Concatentation, implicit operator */
+ STAR , /* Closure, * */
+ PLUS , /* a+ == aa* */
+ QUEST , /* a? == a|nothing, i.e. 0 or 1 a's */
+ RUNE = 0x810000,
CLS_d , CLS_D, /* digit, non-digit */
CLS_s , CLS_S, /* space, non-space */
CLS_w , CLS_W, /* word, non-word */
@@ -130,7 +132,7 @@ enum {
CLS_pr , CLS_PR, /* print */
CLS_up , CLS_UP, /* upper */
CLS_xd , CLS_XD, /* xdigit */
- ANY = 0xB0, /* Any character except newline, . */
+ ANY = 0x820000, /* Any character except newline, . */
ANYNL , /* Any character including newline, . */
NOP , /* No operation, internal use only */
BOL , /* Beginning of line, ^ */
@@ -139,7 +141,7 @@ enum {
NCCLASS , /* Negated character class, [] */
WBOUND , /* Non-word boundary, not consuming meta char */
NWBOUND , /* Word boundary, not consuming meta char */
- END = 0xBF, /* Terminate: match found */
+ END = 0x82FFFF, /* Terminate: match found */
};
/*
@@ -158,7 +160,7 @@ typedef struct Reljunk
{
Relist* relist[2];
Relist* reliste[2];
- int starttype;
+ Token starttype;
Rune startchar;
const char* starts;
const char* eol;
@@ -311,8 +313,8 @@ typedef struct Parser
const char* exprp; /* pointer to next character in source expression */
Node andstack[NSTACK];
Node* andp;
- short atorstack[NSTACK];
- short* atorp;
+ Token atorstack[NSTACK];
+ Token* atorp;
short subidstack[NSTACK]; /* parallel to atorstack */
short* subidp;
short cursubid; /* id of current subexpression */
@@ -330,10 +332,10 @@ typedef struct Parser
} Parser;
/* predeclared crap */
-static void _operator(Parser *par, int type);
+static void _operator(Parser *par, Token type);
static void pushand(Parser *par, Reinst *first, Reinst *last);
-static void pushator(Parser *par, int type);
-static void evaluntil(Parser *par, int type);
+static void pushator(Parser *par, Token type);
+static void evaluntil(Parser *par, Token type);
static int bldcclass(Parser *par);
static void
@@ -344,7 +346,7 @@ rcerror(Parser *par, cregex_error_t err)
}
static Reinst*
-newinst(Parser *par, int t)
+newinst(Parser *par, Token t)
{
par->freep->type = t;
par->freep->l.left = 0;
@@ -353,7 +355,7 @@ newinst(Parser *par, int t)
}
static void
-operand(Parser *par, int t)
+operand(Parser *par, Token t)
{
Reinst *i;
@@ -371,7 +373,7 @@ operand(Parser *par, int t)
}
static void
-_operator(Parser *par, int t)
+_operator(Parser *par, Token t)
{
if (t==RBRA && --par->nbra<0)
rcerror(par, creg_unmatchedrightparenthesis);
@@ -401,7 +403,7 @@ pushand(Parser *par, Reinst *f, Reinst *l)
}
static void
-pushator(Parser *par, int t)
+pushator(Parser *par, Token t)
{
if (par->atorp >= &par->atorstack[NSTACK])
rcerror(par, creg_operatorstackoverflow);
@@ -410,7 +412,7 @@ pushator(Parser *par, int t)
}
static Node*
-popand(Parser *par, int op)
+popand(Parser *par, Token op)
{
Reinst *inst;
@@ -422,7 +424,7 @@ popand(Parser *par, int op)
return --par->andp;
}
-static int
+static Token
popator(Parser *par)
{
if (par->atorp <= &par->atorstack[0])
@@ -432,7 +434,7 @@ popator(Parser *par)
}
static void
-evaluntil(Parser *par, int pri)
+evaluntil(Parser *par, Token pri)
{
Node *op1, *op2;
Reinst *inst1, *inst2;
@@ -666,10 +668,10 @@ lex(Parser *par, int* dot_type)
return RUNE;
}
-static int
+static Token
bldcclass(Parser *par)
{
- int type;
+ Token type;
Rune r[NCCRUNE];
Rune *p, *ep, *np;
Rune rune;
@@ -767,9 +769,9 @@ bldcclass(Parser *par)
}
static Reprog*
-regcomp1(Parser *par, const char *s, int dot_type)
+regcomp1(Parser *par, const char *s, Token dot_type)
{
- int token;
+ Token token;
Reprog *volatile pp;
/* get memory for the program. estimated max usage */
@@ -803,7 +805,7 @@ regcomp1(Parser *par, const char *s, int dot_type)
/* Start with a low priority operator to prime parser */
pushator(par, START-1);
while ((token = lex(par, &dot_type)) != END) {
- if ((token & 0xF0) == OPERATOR)
+ if ((token & MASK) == OPERATOR)
_operator(par, token);
else
operand(par, token);
diff --git a/src/cregex_utf8.c b/src/cregex_utf8.c
index a121542c..ff6ad8b1 100644
--- a/src/cregex_utf8.c
+++ b/src/cregex_utf8.c
@@ -1046,12 +1046,12 @@ static inline int utf8_islower(uint32_t codep) {
return (idx != -1) & (idx & 1);
}
-static inline int utf8_toupper(uint32_t codep) {
+static inline uint32_t utf8_toupper(uint32_t codep) {
int idx = cfold_lookup(codep);
return (idx == -1) | !(idx & 1) ? codep : cfold_tab[idx - 1];
}
-static inline int utf8_tolower(uint32_t codep) {
+static inline uint32_t utf8_tolower(uint32_t codep) {
int idx = cfold_lookup(codep);
return (idx == -1) | (idx & 1) ? codep : cfold_tab[idx + 1];
}