summaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorTyge Løvset <[email protected]>2023-02-04 23:35:56 +0100
committerGitHub <[email protected]>2023-02-04 23:35:56 +0100
commitadc47cefc2976768c3f0b773bd26bfd1062e8a53 (patch)
tree4923f88afb0d091d5d39ae03d65a4998a0517652 /src
parent0c4c4f8bba17562735b67b2923cd23c773aa53a7 (diff)
parentd2ff84c53aa9bd3857fdf22dcf7cd9398a4780be (diff)
downloadSTC-modified-adc47cefc2976768c3f0b773bd26bfd1062e8a53.tar.gz
STC-modified-adc47cefc2976768c3f0b773bd26bfd1062e8a53.zip
Merge pull request #46 from tylov/newinit
Version 4.1 RC2: signed sizes and indices, cspan with numpy slicing.
Diffstat (limited to 'src')
-rw-r--r--src/cregex.c112
-rw-r--r--src/libstc.c11
-rw-r--r--src/singleheader.py79
-rw-r--r--src/utf8code.c263
4 files changed, 269 insertions, 196 deletions
diff --git a/src/cregex.c b/src/cregex.c
index d2b54ef1..c17e1967 100644
--- a/src/cregex.c
+++ b/src/cregex.c
@@ -78,7 +78,7 @@ typedef struct _Reprog
{
_Reinst *startinst; /* start pc */
_Reflags flags;
- unsigned nsubids;
+ int nsubids;
_Reclass cclass[_NCLASS]; /* .data */
_Reinst firstinst[]; /* .text : originally 5 elements? */
} _Reprog;
@@ -214,7 +214,7 @@ utfrune(const char *s, _Rune c)
if (c < 128) /* ascii */
return strchr((char *)s, (int)c);
int n;
- for (_Rune r = *s; r; s += n, r = *(unsigned char*)s) {
+ for (_Rune r = (uint32_t)*s; r; s += n, r = *(unsigned char*)s) {
if (r < 128) { n = 1; continue; }
n = chartorune(&r, s);
if (r == c) return s;
@@ -225,10 +225,10 @@ utfrune(const char *s, _Rune c)
static const char*
utfruneicase(const char *s, _Rune c)
{
- _Rune r = *s;
+ _Rune r = (uint32_t)*s;
int n;
- if (c < 128) for (c = tolower(c); r; ++s, r = *(unsigned char*)s) {
- if (r < 128 && (unsigned)tolower(r) == c) return s;
+ if (c < 128) for (c = (_Rune)tolower((int)c); r; ++s, r = *(unsigned char*)s) {
+ if (r < 128 && (_Rune)tolower((int)r) == c) return s;
}
else for (c = utf8_casefold(c); r; s += n, r = *(unsigned char*)s) {
if (r < 128) { n = 1; continue; }
@@ -246,13 +246,13 @@ utfruneicase(const char *s, _Rune c)
* save a new match in mp
*/
static void
-_renewmatch(_Resub *mp, unsigned ms, _Resublist *sp, unsigned nsubids)
+_renewmatch(_Resub *mp, int ms, _Resublist *sp, int nsubids)
{
if (mp==NULL || ms==0)
return;
if (mp[0].str == NULL || sp->m[0].str < mp[0].str ||
(sp->m[0].str == mp[0].str && sp->m[0].size > mp[0].size)) {
- for (unsigned i=0; i<ms && i<=nsubids; i++)
+ for (int i=0; i<ms && i<=nsubids; i++)
mp[i] = sp->m[i];
}
}
@@ -265,7 +265,7 @@ _renewmatch(_Resub *mp, unsigned ms, _Resublist *sp, unsigned nsubids)
static _Relist*
_renewthread(_Relist *lp, /* _relist to add to */
_Reinst *ip, /* instruction to add */
- unsigned ms,
+ int ms,
_Resublist *sep) /* pointers to subexpressions */
{
_Relist *p;
@@ -297,7 +297,7 @@ _renewthread(_Relist *lp, /* _relist to add to */
static _Relist*
_renewemptythread(_Relist *lp, /* _relist to add to */
_Reinst *ip, /* instruction to add */
- unsigned ms,
+ int ms,
const char *sp) /* pointers to subexpressions */
{
_Relist *p;
@@ -549,7 +549,7 @@ _optimize(_Parser *par, _Reprog *pp)
*/
intptr_t ipp = (intptr_t)pp;
size_t size = sizeof(_Reprog) + (size_t)(par->freep - pp->firstinst)*sizeof(_Reinst);
- _Reprog *npp = (_Reprog *)c_REALLOC(pp, size);
+ _Reprog *npp = (_Reprog *)c_realloc(pp, size);
ptrdiff_t diff = (intptr_t)npp - ipp;
if ((npp == NULL) | (diff == 0))
@@ -661,7 +661,7 @@ _lexutfclass(_Parser *par, _Rune *rp)
{"{Devanagari}", 10, UTF_devanagari}, {"{Greek}", 7, UTF_greek},
{"{Han}", 5, UTF_han}, {"{Latin}", 7, UTF_latin},
};
- int inv = (*rp == 'P');
+ unsigned inv = (*rp == 'P');
for (unsigned i = 0; i < (sizeof cls/sizeof *cls); ++i) {
if (!strncmp(par->exprp, cls[i].c, (size_t)cls[i].n)) {
if (par->rune_type == TOK_IRUNE && (cls[i].r == UTF_ll || cls[i].r == UTF_lu))
@@ -844,10 +844,10 @@ _regcomp1(_Reprog *progp, _Parser *par, const char *s, int cflags)
/* get memory for the program. estimated max usage */
const size_t instcap = 5 + 6*strlen(s);
- _Reprog* pp = (_Reprog *)c_REALLOC(progp, sizeof(_Reprog) + instcap*sizeof(_Reinst));
+ _Reprog* pp = (_Reprog *)c_realloc(progp, sizeof(_Reprog) + instcap*sizeof(_Reinst));
if (pp == NULL) {
par->error = CREG_OUTOFMEMORY;
- c_FREE(progp);
+ c_free(progp);
return NULL;
}
pp->flags.icase = (cflags & CREG_C_ICASE) != 0;
@@ -895,10 +895,10 @@ _regcomp1(_Reprog *progp, _Parser *par, const char *s, int cflags)
pp->startinst = par->andp->first;
pp = _optimize(par, pp);
- pp->nsubids = (unsigned)par->cursubid;
+ pp->nsubids = par->cursubid;
out:
if (par->error) {
- c_FREE(pp);
+ c_free(pp);
pp = NULL;
}
return pp;
@@ -908,23 +908,22 @@ out:
static int
_runematch(_Rune s, _Rune r)
{
- int inv = 0;
- uint32_t n;
+ int inv = 0, n;
switch (s) {
- case ASC_D: inv = 1; case ASC_d: return inv ^ (isdigit(r) != 0);
- case ASC_S: inv = 1; case ASC_s: return inv ^ (isspace(r) != 0);
- case ASC_W: inv = 1; case ASC_w: return inv ^ ((isalnum(r) != 0) | (r == '_'));
- case ASC_AL: inv = 1; case ASC_al: return inv ^ (isalpha(r) != 0);
- case ASC_AN: inv = 1; case ASC_an: return inv ^ (isalnum(r) != 0);
+ case ASC_D: inv = 1; case ASC_d: return inv ^ (isdigit((int)r) != 0);
+ case ASC_S: inv = 1; case ASC_s: return inv ^ (isspace((int)r) != 0);
+ case ASC_W: inv = 1; case ASC_w: return inv ^ ((isalnum((int)r) != 0) | (r == '_'));
+ case ASC_AL: inv = 1; case ASC_al: return inv ^ (isalpha((int)r) != 0);
+ case ASC_AN: inv = 1; case ASC_an: return inv ^ (isalnum((int)r) != 0);
case ASC_AS: return (r >= 128); case ASC_as: return (r < 128);
case ASC_BL: inv = 1; case ASC_bl: return inv ^ ((r == ' ') | (r == '\t'));
- case ASC_CT: inv = 1; case ASC_ct: return inv ^ (iscntrl(r) != 0);
- case ASC_GR: inv = 1; case ASC_gr: return inv ^ (isgraph(r) != 0);
- case ASC_PR: inv = 1; case ASC_pr: return inv ^ (isprint(r) != 0);
- case ASC_PU: inv = 1; case ASC_pu: return inv ^ (ispunct(r) != 0);
- case ASC_LO: inv = 1; case ASC_lo: return inv ^ (islower(r) != 0);
- case ASC_UP: inv = 1; case ASC_up: return inv ^ (isupper(r) != 0);
- case ASC_XD: inv = 1; case ASC_xd: return inv ^ (isxdigit(r) != 0);
+ case ASC_CT: inv = 1; case ASC_ct: return inv ^ (iscntrl((int)r) != 0);
+ case ASC_GR: inv = 1; case ASC_gr: return inv ^ (isgraph((int)r) != 0);
+ case ASC_PR: inv = 1; case ASC_pr: return inv ^ (isprint((int)r) != 0);
+ case ASC_PU: inv = 1; case ASC_pu: return inv ^ (ispunct((int)r) != 0);
+ case ASC_LO: inv = 1; case ASC_lo: return inv ^ (islower((int)r) != 0);
+ case ASC_UP: inv = 1; case ASC_up: return inv ^ (isupper((int)r) != 0);
+ case ASC_XD: inv = 1; case ASC_xd: return inv ^ (isxdigit((int)r) != 0);
case UTF_AN: inv = 1; case UTF_an: return inv ^ utf8_isalnum(r);
case UTF_BL: inv = 1; case UTF_bl: return inv ^ utf8_isblank(r);
case UTF_SP: inv = 1; case UTF_sp: return inv ^ utf8_isspace(r);
@@ -951,7 +950,7 @@ _runematch(_Rune s, _Rune r)
case UTF_greek: case UTF_GREEK:
case UTF_han: case UTF_HAN:
case UTF_latin: case UTF_LATIN:
- n = s - UTF_GRP;
+ n = (int)s - UTF_GRP;
inv = n & 1;
return inv ^ utf8_isgroup(n / 2, r);
}
@@ -967,7 +966,7 @@ static int
_regexec1(const _Reprog *progp, /* program to run */
const char *bol, /* string to run machine on */
_Resub *mp, /* subexpression elements */
- unsigned ms, /* number of elements at mp */
+ int ms, /* number of elements at mp */
_Reljunk *j,
int mflags
)
@@ -980,7 +979,7 @@ _regexec1(const _Reprog *progp, /* program to run */
const char *s, *p;
_Rune r, *rp, *ep;
int n, checkstart, match = 0;
- unsigned i;
+ int i;
bool icase = progp->flags.icase;
checkstart = j->starttype;
@@ -1050,7 +1049,7 @@ _regexec1(const _Reprog *progp, /* program to run */
tlp->se.m[inst->r.subid].str = s;
continue;
case TOK_RBRA:
- tlp->se.m[inst->r.subid].size = (size_t)(s - tlp->se.m[inst->r.subid].str);
+ tlp->se.m[inst->r.subid].size = (s - tlp->se.m[inst->r.subid].str);
continue;
case TOK_ANY:
ok = (r != '\n');
@@ -1100,7 +1099,7 @@ _regexec1(const _Reprog *progp, /* program to run */
match = !(mflags & CREG_M_FULLMATCH) ||
((s == j->eol || r == 0 || r == '\n') &&
(tlp->se.m[0].str == bol || tlp->se.m[0].str[-1] == '\n'));
- tlp->se.m[0].size = (size_t)(s - tlp->se.m[0].str);
+ tlp->se.m[0].size = (s - tlp->se.m[0].str);
if (mp != NULL)
_renewmatch(mp, ms, &tlp->se, progp->nsubids);
break;
@@ -1124,7 +1123,7 @@ static int
_regexec2(const _Reprog *progp, /* program to run */
const char *bol, /* string to run machine on */
_Resub *mp, /* subexpression elements */
- unsigned ms, /* number of elements at mp */
+ int ms, /* number of elements at mp */
_Reljunk *j,
int mflags
)
@@ -1133,7 +1132,7 @@ _regexec2(const _Reprog *progp, /* program to run */
_Relist *relists;
/* mark space */
- relists = (_Relist *)c_MALLOC(2 * _BIGLISTSIZE*sizeof(_Relist));
+ relists = (_Relist *)c_malloc(2 * _BIGLISTSIZE*sizeof(_Relist));
if (relists == NULL)
return -1;
@@ -1143,14 +1142,14 @@ _regexec2(const _Reprog *progp, /* program to run */
j->reliste[1] = relists + 2*_BIGLISTSIZE - 2;
rv = _regexec1(progp, bol, mp, ms, j, mflags);
- c_FREE(relists);
+ c_free(relists);
return rv;
}
static int
_regexec(const _Reprog *progp, /* program to run */
const char *bol, /* string to run machine on */
- unsigned ms, /* number of elements at mp */
+ int ms, /* number of elements at mp */
_Resub mp[], /* subexpression elements */
int mflags)
{
@@ -1196,10 +1195,10 @@ _regexec(const _Reprog *progp, /* program to run */
static void
-_build_subst(const char* replace, unsigned nmatch, const csview match[],
+_build_subst(const char* replace, int nmatch, const csview match[],
bool (*mfun)(int, csview, cstr*), cstr* subst) {
cstr_buf buf = cstr_buffer(subst);
- size_t len = 0, cap = buf.cap;
+ intptr_t len = 0, cap = buf.cap;
char* dst = buf.data;
cstr mstr = cstr_NULL;
@@ -1217,7 +1216,7 @@ _build_subst(const char* replace, unsigned nmatch, const csview match[],
csview m = mfun && mfun(g, match[g], &mstr) ? cstr_sv(&mstr) : match[g];
if (len + m.size > cap)
dst = cstr_reserve(subst, cap = cap*3/2 + m.size);
- for (unsigned i = 0; i < m.size; ++i)
+ for (int i = 0; i < (int)m.size; ++i)
dst[len++] = m.str[i];
}
++replace;
@@ -1239,20 +1238,19 @@ _build_subst(const char* replace, unsigned nmatch, const csview match[],
*/
int
-cregex_compile(cregex *self, const char* pattern, int cflags) {
+cregex_compile_3(cregex *self, const char* pattern, int cflags) {
_Parser par;
self->prog = _regcomp1(self->prog, &par, pattern, cflags);
return self->error = par.error;
}
-unsigned
+int
cregex_captures(const cregex* self) {
- return self->prog ? 1U + self->prog->nsubids : 0U;
+ return self->prog ? 1 + self->prog->nsubids : 0;
}
int
-cregex_find(const cregex* re, const char* input,
- csview match[], int mflags) {
+cregex_find_4(const cregex* re, const char* input, csview match[], int mflags) {
int res = _regexec(re->prog, input, cregex_captures(re), match, mflags);
switch (res) {
case 1: return CREG_OK;
@@ -1262,8 +1260,8 @@ cregex_find(const cregex* re, const char* input,
}
int
-cregex_find_pattern(const char* pattern, const char* input,
- csview match[], int cmflags) {
+cregex_find_pattern_4(const char* pattern, const char* input,
+ csview match[], int cmflags) {
cregex re = cregex_init();
int res = cregex_compile(&re, pattern, cmflags);
if (res != CREG_OK) return res;
@@ -1273,18 +1271,18 @@ cregex_find_pattern(const char* pattern, const char* input,
}
cstr
-cregex_replace_sv(const cregex* re, csview input, const char* replace, unsigned count,
- bool (*mfun)(int, csview, cstr*), int rflags) {
+cregex_replace_sv_6(const cregex* re, csview input, const char* replace, int count,
+ bool (*mfun)(int, csview, cstr*), int rflags) {
cstr out = cstr_NULL;
cstr subst = cstr_NULL;
csview match[CREG_MAX_CAPTURES];
- unsigned nmatch = cregex_captures(re);
- if (!count) count = ~0U;
+ int nmatch = cregex_captures(re);
+ if (!count) count = INT32_MAX;
bool copy = !(rflags & CREG_R_STRIP);
while (count-- && cregex_find_sv(re, input, match) == CREG_OK) {
_build_subst(replace, nmatch, match, mfun, &subst);
- const size_t mpos = (size_t)(match[0].str - input.str);
+ const intptr_t mpos = (match[0].str - input.str);
if (copy & (mpos > 0)) cstr_append_n(&out, input.str, mpos);
cstr_append_s(&out, subst);
input.str = match[0].str + match[0].size;
@@ -1296,12 +1294,12 @@ cregex_replace_sv(const cregex* re, csview input, const char* replace, unsigned
}
cstr
-cregex_replace_pattern_ex(const char* pattern, const char* input, const char* replace, unsigned count,
- bool (*mfun)(int, csview, cstr*), int crflags) {
+cregex_replace_pattern_6(const char* pattern, const char* input, const char* replace, int count,
+ bool (*mfun)(int, csview, cstr*), int crflags) {
cregex re = cregex_init();
if (cregex_compile(&re, pattern, crflags) != CREG_OK)
assert(0);
- csview sv = {input, strlen(input)};
+ csview sv = {input, c_strlen(input)};
cstr out = cregex_replace_sv(&re, sv, replace, count, mfun, crflags);
cregex_drop(&re);
return out;
@@ -1309,6 +1307,6 @@ cregex_replace_pattern_ex(const char* pattern, const char* input, const char* re
void
cregex_drop(cregex* self) {
- c_FREE(self->prog);
+ c_free(self->prog);
}
#endif
diff --git a/src/libstc.c b/src/libstc.c
deleted file mode 100644
index e8980a7a..00000000
--- a/src/libstc.c
+++ /dev/null
@@ -1,11 +0,0 @@
-#define STC_EXTERN // implement common extern, non-templated functions and dependencies.
-
-#define i_val int
-#define i_header // don't implement clist_int itself, just dummy declare it.
-#include "../include/stc/clist.h"
-
-#define STC_IMPLEMENT // implement the following.
-
-#include "../include/stc/cregex.h"
-#include "../include/stc/csview.h"
-//#include "../include/stc/crandom.h"
diff --git a/src/singleheader.py b/src/singleheader.py
new file mode 100644
index 00000000..f5272cfb
--- /dev/null
+++ b/src/singleheader.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+
+import re
+import sys
+import os
+from os.path import dirname, join as path_join, abspath, basename, exists
+
+extra_paths = [path_join(dirname(abspath(__file__)), "include")]
+
+
+def find_file(included_name, current_file):
+ current_dir = dirname(abspath(current_file))
+ for idir in [current_dir] + extra_paths:
+ try_path = path_join(idir, included_name)
+ if exists(try_path):
+ return try_path
+ return None
+
+
+def process_file(
+ file_path,
+ out_lines=[],
+ processed_files=[],
+):
+ out_lines += "// ### BEGIN_FILE_INCLUDE: " + basename(file_path) + '\n'
+ comment_block = False
+ with open(file_path, "r") as f:
+ for line in f:
+ is_comment = comment_block
+ if re.search('/\*.*?\*/', line):
+ pass
+ elif re.search('^\\s*/\*', line):
+ comment_block, is_comment = True, True
+ elif re.search('\*/', line):
+ comment_block = False
+
+ if is_comment:
+ continue
+
+ m_inc = re.search('^\\s*# *include\\s*[<"](.+)[>"]', line) if not is_comment else False
+ if m_inc:
+ inc_name = m_inc.group(1)
+ inc_path = find_file(inc_name, file_path)
+ if inc_path not in processed_files:
+ if inc_path is not None:
+ processed_files += [inc_path]
+ process_file(
+ inc_path,
+ out_lines,
+ processed_files,
+ )
+ else:
+ # assume it's a system header
+ out_lines += [line]
+ continue
+ m_once = re.match('^\\s*# *pragma once\\s*', line) if not is_comment else False
+ # ignore pragma once; we're handling it here
+ if m_once:
+ continue
+ # otherwise, just add the line to the output
+ if line[-1] != '\n':
+ line += '\n'
+ out_lines += [line]
+ out_lines += "// ### END_FILE_INCLUDE: " + basename(file_path) + '\n'
+ return (
+ "".join(out_lines)
+ )
+
+
+if __name__ == "__main__":
+ print(
+ process_file(
+ abspath(sys.argv[1]),
+ [],
+ # We use an include guard instead of `#pragma once` because Godbolt will
+ # cause complaints about `#pragma once` when they are used in URL includes.
+ [abspath(sys.argv[1])],
+ )
+ )
diff --git a/src/utf8code.c b/src/utf8code.c
index ecf79880..a892f5fd 100644
--- a/src/utf8code.c
+++ b/src/utf8code.c
@@ -20,7 +20,7 @@ const uint8_t utf8_dtab[] = {
12,36,12,12,12,12,12,12,12,12,12,12,
};
-unsigned utf8_encode(char *out, uint32_t c)
+int utf8_encode(char *out, uint32_t c)
{
if (c < 0x80U) {
out[0] = (char) c;
@@ -53,7 +53,7 @@ uint32_t utf8_peek_off(const char* s, int pos) {
return utf8_peek(s);
}
-bool utf8_valid_n(const char* s, size_t nbytes) {
+bool utf8_valid_n(const char* s, intptr_t nbytes) {
utf8_decode_t d = {.state=0};
while ((nbytes-- != 0) & (*s != 0))
utf8_decode(&d, (uint8_t)*s++);
@@ -61,7 +61,7 @@ bool utf8_valid_n(const char* s, size_t nbytes) {
}
uint32_t utf8_casefold(uint32_t c) {
- for (size_t i=0; i < casefold_len; ++i) {
+ for (int i=0; i < casefold_len; ++i) {
const struct CaseMapping entry = casemappings[i];
if (c <= entry.c2) {
if (c < entry.c1) return c;
@@ -74,7 +74,7 @@ uint32_t utf8_casefold(uint32_t c) {
}
uint32_t utf8_tolower(uint32_t c) {
- for (size_t i=0; i < sizeof upcase_ind/sizeof *upcase_ind; ++i) {
+ for (int i=0; i < (int)(sizeof upcase_ind/sizeof *upcase_ind); ++i) {
const struct CaseMapping entry = casemappings[upcase_ind[i]];
if (c <= entry.c2) {
if (c < entry.c1) return c;
@@ -87,7 +87,7 @@ uint32_t utf8_tolower(uint32_t c) {
}
uint32_t utf8_toupper(uint32_t c) {
- for (size_t i=0; i < sizeof lowcase_ind/sizeof *lowcase_ind; ++i) {
+ for (int i=0; i < (int)(sizeof lowcase_ind/sizeof *lowcase_ind); ++i) {
const struct CaseMapping entry = casemappings[lowcase_ind[i]];
if (c <= entry.m2) {
int d = entry.m2 - entry.c2;
@@ -101,7 +101,7 @@ uint32_t utf8_toupper(uint32_t c) {
int utf8_icmp_sv(const csview s1, const csview s2) {
utf8_decode_t d1 = {.state=0}, d2 = {.state=0};
- size_t j1 = 0, j2 = 0;
+ intptr_t j1 = 0, j2 = 0;
while ((j1 < s1.size) & (j2 < s2.size)) {
do { utf8_decode(&d1, (uint8_t)s1.str[j1++]); } while (d1.state);
do { utf8_decode(&d2, (uint8_t)s2.str[j2++]); } while (d2.state);
@@ -122,13 +122,18 @@ typedef struct {
int nr16;
} UGroup;
-static const UGroup unicode_groups[U8G_SIZE];
+#ifndef __cplusplus
+static
+#else
+extern
+#endif
+const UGroup _utf8_unicode_groups[U8G_SIZE];
bool utf8_isgroup(int group, uint32_t c) {
- for (int j=0; j<unicode_groups[group].nr16; ++j) {
- if (c < unicode_groups[group].r16[j].lo)
+ for (int j=0; j<_utf8_unicode_groups[group].nr16; ++j) {
+ if (c < _utf8_unicode_groups[group].r16[j].lo)
return false;
- if (c <= unicode_groups[group].r16[j].hi)
+ if (c <= _utf8_unicode_groups[group].r16[j].hi)
return true;
}
return false;
@@ -137,21 +142,21 @@ bool utf8_isgroup(int group, uint32_t c) {
bool utf8_isalpha(uint32_t c) {
static int16_t groups[] = {U8G_Latin, U8G_Nl, U8G_Greek, U8G_Cyrillic,
U8G_Han, U8G_Devanagari, U8G_Arabic};
- if (c < 128) return isalpha(c) != 0;
- for (unsigned j=0; j < c_ARRAYLEN(groups); ++j)
+ if (c < 128) return isalpha((int)c) != 0;
+ for (int j=0; j < c_ARRAYLEN(groups); ++j)
if (utf8_isgroup(groups[j], c))
return true;
return false;
}
bool utf8_iscased(uint32_t c) {
- if (c < 128) return isalpha(c) != 0;
+ if (c < 128) return isalpha((int)c) != 0;
return utf8_islower(c) || utf8_isupper(c) ||
utf8_isgroup(U8G_Lt, c);
}
bool utf8_isword(uint32_t c) {
- if (c < 128) return (isalnum(c) != 0) | (c == '_');
+ if (c < 128) return (isalnum((int)c) != 0) | (c == '_');
return utf8_isalpha(c) || utf8_isgroup(U8G_Nd, c) ||
utf8_isgroup(U8G_Pc, c);
}
@@ -323,139 +328,141 @@ static const URange16 Zs_range16[] = { // Space separator
};
static const URange16 Arabic_range16[] = {
- { 1536, 1540 },
- { 1542, 1547 },
- { 1549, 1562 },
- { 1564, 1566 },
- { 1568, 1599 },
- { 1601, 1610 },
- { 1622, 1647 },
- { 1649, 1756 },
- { 1758, 1791 },
- { 1872, 1919 },
- { 2160, 2190 },
- { 2192, 2193 },
- { 2200, 2273 },
- { 2275, 2303 },
- { 64336, 64450 },
- { 64467, 64829 },
- { 64832, 64911 },
- { 64914, 64967 },
- { 64975, 64975 },
- { 65008, 65023 },
- { 65136, 65140 },
- { 65142, 65276 },
+ { 1536, 1540 },
+ { 1542, 1547 },
+ { 1549, 1562 },
+ { 1564, 1566 },
+ { 1568, 1599 },
+ { 1601, 1610 },
+ { 1622, 1647 },
+ { 1649, 1756 },
+ { 1758, 1791 },
+ { 1872, 1919 },
+ { 2160, 2190 },
+ { 2192, 2193 },
+ { 2200, 2273 },
+ { 2275, 2303 },
+ { 64336, 64450 },
+ { 64467, 64829 },
+ { 64832, 64911 },
+ { 64914, 64967 },
+ { 64975, 64975 },
+ { 65008, 65023 },
+ { 65136, 65140 },
+ { 65142, 65276 },
};
static const URange16 Cyrillic_range16[] = {
- { 1024, 1156 },
- { 1159, 1327 },
- { 7296, 7304 },
- { 7467, 7467 },
- { 7544, 7544 },
- { 11744, 11775 },
- { 42560, 42655 },
- { 65070, 65071 },
+ { 1024, 1156 },
+ { 1159, 1327 },
+ { 7296, 7304 },
+ { 7467, 7467 },
+ { 7544, 7544 },
+ { 11744, 11775 },
+ { 42560, 42655 },
+ { 65070, 65071 },
};
static const URange16 Devanagari_range16[] = {
- { 2304, 2384 },
- { 2389, 2403 },
- { 2406, 2431 },
- { 43232, 43263 },
+ { 2304, 2384 },
+ { 2389, 2403 },
+ { 2406, 2431 },
+ { 43232, 43263 },
};
static const URange16 Greek_range16[] = {
- { 880, 883 },
- { 885, 887 },
- { 890, 893 },
- { 895, 895 },
- { 900, 900 },
- { 902, 902 },
- { 904, 906 },
- { 908, 908 },
- { 910, 929 },
- { 931, 993 },
- { 1008, 1023 },
- { 7462, 7466 },
- { 7517, 7521 },
- { 7526, 7530 },
- { 7615, 7615 },
- { 7936, 7957 },
- { 7960, 7965 },
- { 7968, 8005 },
- { 8008, 8013 },
- { 8016, 8023 },
- { 8025, 8025 },
- { 8027, 8027 },
- { 8029, 8029 },
- { 8031, 8061 },
- { 8064, 8116 },
- { 8118, 8132 },
- { 8134, 8147 },
- { 8150, 8155 },
- { 8157, 8175 },
- { 8178, 8180 },
- { 8182, 8190 },
- { 8486, 8486 },
- { 43877, 43877 },
+ { 880, 883 },
+ { 885, 887 },
+ { 890, 893 },
+ { 895, 895 },
+ { 900, 900 },
+ { 902, 902 },
+ { 904, 906 },
+ { 908, 908 },
+ { 910, 929 },
+ { 931, 993 },
+ { 1008, 1023 },
+ { 7462, 7466 },
+ { 7517, 7521 },
+ { 7526, 7530 },
+ { 7615, 7615 },
+ { 7936, 7957 },
+ { 7960, 7965 },
+ { 7968, 8005 },
+ { 8008, 8013 },
+ { 8016, 8023 },
+ { 8025, 8025 },
+ { 8027, 8027 },
+ { 8029, 8029 },
+ { 8031, 8061 },
+ { 8064, 8116 },
+ { 8118, 8132 },
+ { 8134, 8147 },
+ { 8150, 8155 },
+ { 8157, 8175 },
+ { 8178, 8180 },
+ { 8182, 8190 },
+ { 8486, 8486 },
+ { 43877, 43877 },
};
static const URange16 Han_range16[] = {
- { 11904, 11929 },
- { 11931, 12019 },
- { 12032, 12245 },
- { 12293, 12293 },
- { 12295, 12295 },
- { 12321, 12329 },
- { 12344, 12347 },
- { 13312, 19903 },
- { 19968, 40959 },
- { 63744, 64109 },
- { 64112, 64217 },
+ { 11904, 11929 },
+ { 11931, 12019 },
+ { 12032, 12245 },
+ { 12293, 12293 },
+ { 12295, 12295 },
+ { 12321, 12329 },
+ { 12344, 12347 },
+ { 13312, 19903 },
+ { 19968, 40959 },
+ { 63744, 64109 },
+ { 64112, 64217 },
};
static const URange16 Latin_range16[] = {
- { 65, 90 },
- { 97, 122 },
- { 170, 170 },
- { 186, 186 },
- { 192, 214 },
- { 216, 246 },
- { 248, 696 },
- { 736, 740 },
- { 7424, 7461 },
- { 7468, 7516 },
- { 7522, 7525 },
- { 7531, 7543 },
- { 7545, 7614 },
- { 7680, 7935 },
- { 8305, 8305 },
- { 8319, 8319 },
- { 8336, 8348 },
- { 8490, 8491 },
- { 8498, 8498 },
- { 8526, 8526 },
- { 8544, 8584 },
- { 11360, 11391 },
- { 42786, 42887 },
- { 42891, 42954 },
- { 42960, 42961 },
- { 42963, 42963 },
- { 42965, 42969 },
- { 42994, 43007 },
- { 43824, 43866 },
- { 43868, 43876 },
- { 43878, 43881 },
- { 64256, 64262 },
- { 65313, 65338 },
- { 65345, 65370 },
+ { 65, 90 },
+ { 97, 122 },
+ { 170, 170 },
+ { 186, 186 },
+ { 192, 214 },
+ { 216, 246 },
+ { 248, 696 },
+ { 736, 740 },
+ { 7424, 7461 },
+ { 7468, 7516 },
+ { 7522, 7525 },
+ { 7531, 7543 },
+ { 7545, 7614 },
+ { 7680, 7935 },
+ { 8305, 8305 },
+ { 8319, 8319 },
+ { 8336, 8348 },
+ { 8490, 8491 },
+ { 8498, 8498 },
+ { 8526, 8526 },
+ { 8544, 8584 },
+ { 11360, 11391 },
+ { 42786, 42887 },
+ { 42891, 42954 },
+ { 42960, 42961 },
+ { 42963, 42963 },
+ { 42965, 42969 },
+ { 42994, 43007 },
+ { 43824, 43866 },
+ { 43868, 43876 },
+ { 43878, 43881 },
+ { 64256, 64262 },
+ { 65313, 65338 },
+ { 65345, 65370 },
};
#define UNI_ENTRY(Code) \
{ Code##_range16, sizeof(Code##_range16)/sizeof(URange16) }
-
-static const UGroup unicode_groups[U8G_SIZE] = {
+#ifndef __cplusplus
+static
+#endif
+const UGroup _utf8_unicode_groups[U8G_SIZE] = {
[U8G_Cc] = UNI_ENTRY(Cc),
[U8G_Lt] = UNI_ENTRY(Lt),
[U8G_Nd] = UNI_ENTRY(Nd),