summaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorTyge Løvset <[email protected]>2022-09-22 13:04:58 +0200
committerTyge Løvset <[email protected]>2022-09-22 13:04:58 +0200
commit4c5f0ba23ba6294e144243555ed3c02a72259692 (patch)
tree380b249e1c36b83b65507da5a2af4f843252a474 /src
parent61c222312bb1cc03ab987fdbc541039723c9e243 (diff)
downloadSTC-modified-4c5f0ba23ba6294e144243555ed3c02a72259692.tar.gz
STC-modified-4c5f0ba23ba6294e144243555ed3c02a72259692.zip
Updated _advance() iter methods. Some have signed offsets.
utf8_peek()/utf8_peek_off() added/changed.
Diffstat (limited to 'src')
-rw-r--r--src/cregex.c4
-rw-r--r--src/utf8code.c7
2 files changed, 4 insertions, 7 deletions
diff --git a/src/cregex.c b/src/cregex.c
index 6b4e9f27..17284dfe 100644
--- a/src/cregex.c
+++ b/src/cregex.c
@@ -1028,8 +1028,8 @@ regexec1(const Reprog *progp, /* program to run */
case NWBOUND:
ok = true;
case WBOUND: /* fallthrough */
- if (ok ^ (s == bol || s == j->eol || ((utf8_isalnum(utf8_peek(s, -1)) || s[-1] == '_')
- ^ (utf8_isalnum(utf8_peek(s, 0)) || s[0] == '_'))))
+ if (ok ^ (s == bol || s == j->eol || ((utf8_isalnum(utf8_peek_off(s, -1)) || s[-1] == '_')
+ ^ (utf8_isalnum(utf8_peek(s)) || s[0] == '_'))))
continue;
break;
case NCCLASS:
diff --git a/src/utf8code.c b/src/utf8code.c
index 25244f07..abd21736 100644
--- a/src/utf8code.c
+++ b/src/utf8code.c
@@ -46,14 +46,11 @@ unsigned utf8_encode(char *out, uint32_t c)
return 0;
}
-uint32_t utf8_peek(const char* s, int pos) {
+uint32_t utf8_peek_off(const char* s, int pos) {
int inc = -1;
if (pos > 0) pos = -pos, inc = 1;
while (pos) pos += (*(s += inc) & 0xC0) != 0x80;
-
- utf8_decode_t d = {.state=0};
- do { utf8_decode(&d, (uint8_t)*s++); } while (d.state);
- return d.codep;
+ return utf8_peek(s);
}
bool utf8_valid_n(const char* s, size_t nbytes) {