Merge pull request #46 from tylov/newinit

Version 4.1 RC2: signed sizes and indices, cspan with numpy slicing.
author: Tyge Løvset <[email protected]> 2023-02-04 23:35:56 +0100
committer: GitHub <[email protected]> 2023-02-04 23:35:56 +0100
commit: adc47cefc2976768c3f0b773bd26bfd1062e8a53 (patch)
tree: 4923f88afb0d091d5d39ae03d65a4998a0517652 /src
parent: 0c4c4f8bba17562735b67b2923cd23c773aa53a7 (diff)
parent: d2ff84c53aa9bd3857fdf22dcf7cd9398a4780be (diff)
download: STC-modified-adc47cefc2976768c3f0b773bd26bfd1062e8a53.tar.gz
STC-modified-adc47cefc2976768c3f0b773bd26bfd1062e8a53.zip
4 files changed, 269 insertions, 196 deletions
diff --git a/src/cregex.c b/src/cregex.c
index d2b54ef1..c17e1967 100644
--- a/src/cregex.c
+++ b/src/cregex.c
@@ -78,7 +78,7 @@ typedef struct _Reprog
 {
     _Reinst  *startinst;     /* start pc */
     _Reflags flags;
-    unsigned nsubids;
+    int nsubids;
     _Reclass cclass[_NCLASS]; /* .data */
     _Reinst  firstinst[];    /* .text : originally 5 elements? */
 } _Reprog;
@@ -214,7 +214,7 @@ utfrune(const char *s, _Rune c)
     if (c < 128)        /* ascii */
         return strchr((char *)s, (int)c);
     int n;
-    for (_Rune r = *s; r; s += n, r = *(unsigned char*)s) {
+    for (_Rune r = (uint32_t)*s; r; s += n, r = *(unsigned char*)s) {
         if (r < 128) { n = 1; continue; }
         n = chartorune(&r, s);
         if (r == c) return s;
@@ -225,10 +225,10 @@ utfrune(const char *s, _Rune c)
 static const char*
 utfruneicase(const char *s, _Rune c)
 {
-    _Rune r = *s;
+    _Rune r = (uint32_t)*s;
     int n;
-    if (c < 128) for (c = tolower(c); r; ++s, r = *(unsigned char*)s) {
-        if (r < 128 && (unsigned)tolower(r) == c) return s;
+    if (c < 128) for (c = (_Rune)tolower((int)c); r; ++s, r = *(unsigned char*)s) {
+        if (r < 128 && (_Rune)tolower((int)r) == c) return s;
     }
     else for (c = utf8_casefold(c); r; s += n, r = *(unsigned char*)s) {
         if (r < 128) { n = 1; continue; }
@@ -246,13 +246,13 @@ utfruneicase(const char *s, _Rune c)
  *  save a new match in mp
  */
 static void
-_renewmatch(_Resub *mp, unsigned ms, _Resublist *sp, unsigned nsubids)
+_renewmatch(_Resub *mp, int ms, _Resublist *sp, int nsubids)
 {
     if (mp==NULL || ms==0)
         return;
     if (mp[0].str == NULL || sp->m[0].str < mp[0].str ||
        (sp->m[0].str == mp[0].str && sp->m[0].size > mp[0].size)) {
-        for (unsigned i=0; i<ms && i<=nsubids; i++)
+        for (int i=0; i<ms && i<=nsubids; i++)
             mp[i] = sp->m[i];
     }
 }
@@ -265,7 +265,7 @@ _renewmatch(_Resub *mp, unsigned ms, _Resublist *sp, unsigned nsubids)
 static _Relist*
 _renewthread(_Relist *lp,  /* _relist to add to */
     _Reinst *ip,           /* instruction to add */
-    unsigned ms,
+    int ms,
     _Resublist *sep)       /* pointers to subexpressions */
 {
     _Relist *p;
@@ -297,7 +297,7 @@ _renewthread(_Relist *lp,  /* _relist to add to */
 static _Relist*
 _renewemptythread(_Relist *lp,   /* _relist to add to */
     _Reinst *ip,                 /* instruction to add */
-    unsigned ms,
+    int ms,
     const char *sp)             /* pointers to subexpressions */
 {
     _Relist *p;
@@ -549,7 +549,7 @@ _optimize(_Parser *par, _Reprog *pp)
      */
     intptr_t ipp = (intptr_t)pp;
     size_t size = sizeof(_Reprog) + (size_t)(par->freep - pp->firstinst)*sizeof(_Reinst);
-    _Reprog *npp = (_Reprog *)c_REALLOC(pp, size);
+    _Reprog *npp = (_Reprog *)c_realloc(pp, size);
     ptrdiff_t diff = (intptr_t)npp - ipp;
 
     if ((npp == NULL) | (diff == 0))
@@ -661,7 +661,7 @@ _lexutfclass(_Parser *par, _Rune *rp)
         {"{Devanagari}", 10, UTF_devanagari}, {"{Greek}", 7, UTF_greek},
         {"{Han}", 5, UTF_han}, {"{Latin}", 7, UTF_latin},
     };
-    int inv = (*rp == 'P');
+    unsigned inv = (*rp == 'P');
     for (unsigned i = 0; i < (sizeof cls/sizeof *cls); ++i) {
         if (!strncmp(par->exprp, cls[i].c, (size_t)cls[i].n)) {
             if (par->rune_type == TOK_IRUNE && (cls[i].r == UTF_ll || cls[i].r == UTF_lu))
@@ -844,10 +844,10 @@ _regcomp1(_Reprog *progp, _Parser *par, const char *s, int cflags)
 
     /* get memory for the program. estimated max usage */
     const size_t instcap = 5 + 6*strlen(s);
-    _Reprog* pp = (_Reprog *)c_REALLOC(progp, sizeof(_Reprog) + instcap*sizeof(_Reinst));
+    _Reprog* pp = (_Reprog *)c_realloc(progp, sizeof(_Reprog) + instcap*sizeof(_Reinst));
     if (pp == NULL) {
         par->error = CREG_OUTOFMEMORY;
-        c_FREE(progp);
+        c_free(progp);
         return NULL;
     }
     pp->flags.icase = (cflags & CREG_C_ICASE) != 0;
@@ -895,10 +895,10 @@ _regcomp1(_Reprog *progp, _Parser *par, const char *s, int cflags)
     pp->startinst = par->andp->first;
 
     pp = _optimize(par, pp);
-    pp->nsubids = (unsigned)par->cursubid;
+    pp->nsubids = par->cursubid;
 out:
     if (par->error) {
-        c_FREE(pp);
+        c_free(pp);
         pp = NULL;
     }
     return pp;
@@ -908,23 +908,22 @@ out:
 static int
 _runematch(_Rune s, _Rune r)
 {
-    int inv = 0;
-    uint32_t n;
+    int inv = 0, n;
     switch (s) {
-    case ASC_D: inv = 1; case ASC_d: return inv ^ (isdigit(r) != 0);
-    case ASC_S: inv = 1; case ASC_s: return inv ^ (isspace(r) != 0);
-    case ASC_W: inv = 1; case ASC_w: return inv ^ ((isalnum(r) != 0) | (r == '_'));
-    case ASC_AL: inv = 1; case ASC_al: return inv ^ (isalpha(r) != 0);
-    case ASC_AN: inv = 1; case ASC_an: return inv ^ (isalnum(r) != 0);
+    case ASC_D: inv = 1; case ASC_d: return inv ^ (isdigit((int)r) != 0);
+    case ASC_S: inv = 1; case ASC_s: return inv ^ (isspace((int)r) != 0);
+    case ASC_W: inv = 1; case ASC_w: return inv ^ ((isalnum((int)r) != 0) | (r == '_'));
+    case ASC_AL: inv = 1; case ASC_al: return inv ^ (isalpha((int)r) != 0);
+    case ASC_AN: inv = 1; case ASC_an: return inv ^ (isalnum((int)r) != 0);
     case ASC_AS: return (r >= 128); case ASC_as: return (r < 128);
     case ASC_BL: inv = 1; case ASC_bl: return inv ^ ((r == ' ') | (r == '\t'));
-    case ASC_CT: inv = 1; case ASC_ct: return inv ^ (iscntrl(r) != 0);
-    case ASC_GR: inv = 1; case ASC_gr: return inv ^ (isgraph(r) != 0);
-    case ASC_PR: inv = 1; case ASC_pr: return inv ^ (isprint(r) != 0);
-    case ASC_PU: inv = 1; case ASC_pu: return inv ^ (ispunct(r) != 0);
-    case ASC_LO: inv = 1; case ASC_lo: return inv ^ (islower(r) != 0);
-    case ASC_UP: inv = 1; case ASC_up: return inv ^ (isupper(r) != 0);
-    case ASC_XD: inv = 1; case ASC_xd: return inv ^ (isxdigit(r) != 0);
+    case ASC_CT: inv = 1; case ASC_ct: return inv ^ (iscntrl((int)r) != 0);
+    case ASC_GR: inv = 1; case ASC_gr: return inv ^ (isgraph((int)r) != 0);
+    case ASC_PR: inv = 1; case ASC_pr: return inv ^ (isprint((int)r) != 0);
+    case ASC_PU: inv = 1; case ASC_pu: return inv ^ (ispunct((int)r) != 0);
+    case ASC_LO: inv = 1; case ASC_lo: return inv ^ (islower((int)r) != 0);
+    case ASC_UP: inv = 1; case ASC_up: return inv ^ (isupper((int)r) != 0);
+    case ASC_XD: inv = 1; case ASC_xd: return inv ^ (isxdigit((int)r) != 0);
     case UTF_AN: inv = 1; case UTF_an: return inv ^ utf8_isalnum(r);
     case UTF_BL: inv = 1; case UTF_bl: return inv ^ utf8_isblank(r);
     case UTF_SP: inv = 1; case UTF_sp: return inv ^ utf8_isspace(r);
@@ -951,7 +950,7 @@ _runematch(_Rune s, _Rune r)
     case UTF_greek: case UTF_GREEK:
     case UTF_han: case UTF_HAN:
     case UTF_latin: case UTF_LATIN:
-        n = s - UTF_GRP;
+        n = (int)s - UTF_GRP;
         inv = n & 1;
         return inv ^ utf8_isgroup(n / 2, r);
     }
@@ -967,7 +966,7 @@ static int
 _regexec1(const _Reprog *progp,  /* program to run */
     const char *bol,    /* string to run machine on */
     _Resub *mp,         /* subexpression elements */
-    unsigned ms,        /* number of elements at mp */
+    int ms,        /* number of elements at mp */
     _Reljunk *j,
     int mflags
 )
@@ -980,7 +979,7 @@ _regexec1(const _Reprog *progp,  /* program to run */
     const char *s, *p;
     _Rune r, *rp, *ep;
     int n, checkstart, match = 0;
-    unsigned i;
+    int i;
 
     bool icase = progp->flags.icase;
     checkstart = j->starttype;
@@ -1050,7 +1049,7 @@ _regexec1(const _Reprog *progp,  /* program to run */
                     tlp->se.m[inst->r.subid].str = s;
                     continue;
                 case TOK_RBRA:
-                    tlp->se.m[inst->r.subid].size = (size_t)(s - tlp->se.m[inst->r.subid].str);
+                    tlp->se.m[inst->r.subid].size = (s - tlp->se.m[inst->r.subid].str);
                     continue;
                 case TOK_ANY:
                     ok = (r != '\n');
@@ -1100,7 +1099,7 @@ _regexec1(const _Reprog *progp,  /* program to run */
                     match = !(mflags & CREG_M_FULLMATCH) ||
                             ((s == j->eol || r == 0 || r == '\n') &&
                             (tlp->se.m[0].str == bol || tlp->se.m[0].str[-1] == '\n'));
-                    tlp->se.m[0].size = (size_t)(s - tlp->se.m[0].str);
+                    tlp->se.m[0].size = (s - tlp->se.m[0].str);
                     if (mp != NULL)
                         _renewmatch(mp, ms, &tlp->se, progp->nsubids);
                     break;
@@ -1124,7 +1123,7 @@ static int
 _regexec2(const _Reprog *progp,    /* program to run */
     const char *bol,    /* string to run machine on */
     _Resub *mp,         /* subexpression elements */
-    unsigned ms,        /* number of elements at mp */
+    int ms,             /* number of elements at mp */
     _Reljunk *j,
     int mflags
 )
@@ -1133,7 +1132,7 @@ _regexec2(const _Reprog *progp,    /* program to run */
     _Relist *relists;
 
     /* mark space */
-    relists = (_Relist *)c_MALLOC(2 * _BIGLISTSIZE*sizeof(_Relist));
+    relists = (_Relist *)c_malloc(2 * _BIGLISTSIZE*sizeof(_Relist));
     if (relists == NULL)
         return -1;
 
@@ -1143,14 +1142,14 @@ _regexec2(const _Reprog *progp,    /* program to run */
     j->reliste[1] = relists + 2*_BIGLISTSIZE - 2;
 
     rv = _regexec1(progp, bol, mp, ms, j, mflags);
-    c_FREE(relists);
+    c_free(relists);
     return rv;
 }
 
 static int
 _regexec(const _Reprog *progp,    /* program to run */
     const char *bol,    /* string to run machine on */
-    unsigned ms,        /* number of elements at mp */
+    int ms,             /* number of elements at mp */
     _Resub mp[],        /* subexpression elements */
     int mflags)
 {
@@ -1196,10 +1195,10 @@ _regexec(const _Reprog *progp,    /* program to run */
 
 
 static void
-_build_subst(const char* replace, unsigned nmatch, const csview match[],
+_build_subst(const char* replace, int nmatch, const csview match[],
              bool (*mfun)(int, csview, cstr*), cstr* subst) {
     cstr_buf buf = cstr_buffer(subst);
-    size_t len = 0, cap = buf.cap;
+    intptr_t len = 0, cap = buf.cap;
     char* dst = buf.data;
     cstr mstr = cstr_NULL;
 
@@ -1217,7 +1216,7 @@ _build_subst(const char* replace, unsigned nmatch, const csview match[],
                     csview m = mfun && mfun(g, match[g], &mstr) ? cstr_sv(&mstr) : match[g];
                     if (len + m.size > cap)
                         dst = cstr_reserve(subst, cap = cap*3/2 + m.size);
-                    for (unsigned i = 0; i < m.size; ++i)
+                    for (int i = 0; i < (int)m.size; ++i)
                         dst[len++] = m.str[i];
                 }
                 ++replace;
@@ -1239,20 +1238,19 @@ _build_subst(const char* replace, unsigned nmatch, const csview match[],
  */
 
 int 
-cregex_compile(cregex *self, const char* pattern, int cflags) {
+cregex_compile_3(cregex *self, const char* pattern, int cflags) {
     _Parser par;
     self->prog = _regcomp1(self->prog, &par, pattern, cflags);
     return self->error = par.error;
 }
 
-unsigned
+int
 cregex_captures(const cregex* self) {
-    return self->prog ? 1U + self->prog->nsubids : 0U;
+    return self->prog ? 1 + self->prog->nsubids : 0;
 }
 
 int
-cregex_find(const cregex* re, const char* input,
-            csview match[], int mflags) {
+cregex_find_4(const cregex* re, const char* input, csview match[], int mflags) {
     int res = _regexec(re->prog, input, cregex_captures(re), match, mflags);
     switch (res) {
     case 1: return CREG_OK;
@@ -1262,8 +1260,8 @@ cregex_find(const cregex* re, const char* input,
 }
 
 int
-cregex_find_pattern(const char* pattern, const char* input,
-                    csview match[], int cmflags) {
+cregex_find_pattern_4(const char* pattern, const char* input,
+                      csview match[], int cmflags) {
     cregex re = cregex_init();
     int res = cregex_compile(&re, pattern, cmflags);
     if (res != CREG_OK) return res;
@@ -1273,18 +1271,18 @@ cregex_find_pattern(const char* pattern, const char* input,
 }
 
 cstr
-cregex_replace_sv(const cregex* re, csview input, const char* replace, unsigned count,
-                  bool (*mfun)(int, csview, cstr*), int rflags) {
+cregex_replace_sv_6(const cregex* re, csview input, const char* replace, int count,
+                    bool (*mfun)(int, csview, cstr*), int rflags) {
     cstr out = cstr_NULL;
     cstr subst = cstr_NULL;
     csview match[CREG_MAX_CAPTURES];
-    unsigned nmatch = cregex_captures(re);
-    if (!count) count = ~0U;
+    int nmatch = cregex_captures(re);
+    if (!count) count = INT32_MAX;
     bool copy = !(rflags & CREG_R_STRIP);
 
     while (count-- && cregex_find_sv(re, input, match) == CREG_OK) {
         _build_subst(replace, nmatch, match, mfun, &subst);
-        const size_t mpos = (size_t)(match[0].str - input.str);
+        const intptr_t mpos = (match[0].str - input.str);
         if (copy & (mpos > 0)) cstr_append_n(&out, input.str, mpos);
         cstr_append_s(&out, subst);
         input.str = match[0].str + match[0].size;
@@ -1296,12 +1294,12 @@ cregex_replace_sv(const cregex* re, csview input, const char* replace, unsigned
 }
 
 cstr
-cregex_replace_pattern_ex(const char* pattern, const char* input, const char* replace, unsigned count,
-                          bool (*mfun)(int, csview, cstr*), int crflags) {
+cregex_replace_pattern_6(const char* pattern, const char* input, const char* replace, int count,
+                         bool (*mfun)(int, csview, cstr*), int crflags) {
     cregex re = cregex_init();
     if (cregex_compile(&re, pattern, crflags) != CREG_OK)
         assert(0);
-    csview sv = {input, strlen(input)};
+    csview sv = {input, c_strlen(input)};
     cstr out = cregex_replace_sv(&re, sv, replace, count, mfun, crflags);
     cregex_drop(&re);
     return out;
@@ -1309,6 +1307,6 @@ cregex_replace_pattern_ex(const char* pattern, const char* input, const char* re
 
 void
 cregex_drop(cregex* self) {
-    c_FREE(self->prog);
+    c_free(self->prog);
 }
 #endif
diff --git a/src/libstc.c b/src/libstc.c
deleted file mode 100644
index e8980a7a..00000000
--- a/src/libstc.c
+++ /dev/null
@@ -1,11 +0,0 @@
-#define STC_EXTERN    // implement common extern, non-templated functions and dependencies.
-
-#define i_val int
-#define i_header      // don't implement clist_int itself, just dummy declare it.
-#include "../include/stc/clist.h"
-
-#define STC_IMPLEMENT // implement the following.
-
-#include "../include/stc/cregex.h"
-#include "../include/stc/csview.h"
-//#include "../include/stc/crandom.h"
diff --git a/src/singleheader.py b/src/singleheader.py
new file mode 100644
index 00000000..f5272cfb
--- /dev/null
+++ b/src/singleheader.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+
+import re
+import sys
+import os
+from os.path import dirname, join as path_join, abspath, basename, exists
+
+extra_paths = [path_join(dirname(abspath(__file__)), "include")]
+
+
+def find_file(included_name, current_file):
+    current_dir = dirname(abspath(current_file))
+    for idir in [current_dir] + extra_paths:
+        try_path = path_join(idir, included_name)
+        if exists(try_path):
+            return try_path
+    return None
+
+
+def process_file(
+    file_path,
+    out_lines=[],
+    processed_files=[],
+):
+    out_lines += "// ### BEGIN_FILE_INCLUDE: " + basename(file_path) + '\n'
+    comment_block = False
+    with open(file_path, "r") as f:
+        for line in f:
+            is_comment = comment_block
+            if re.search('/\*.*?\*/', line):
+                pass
+            elif re.search('^\\s*/\*', line):
+                comment_block, is_comment = True, True
+            elif re.search('\*/', line):
+                comment_block = False
+
+            if is_comment:
+                continue
+
+            m_inc = re.search('^\\s*# *include\\s*[<"](.+)[>"]', line) if not is_comment else False
+            if m_inc:
+                inc_name = m_inc.group(1)
+                inc_path = find_file(inc_name, file_path)
+                if inc_path not in processed_files:
+                    if inc_path is not None:
+                        processed_files += [inc_path]
+                        process_file(
+                            inc_path,
+                            out_lines,
+                            processed_files,
+                        )
+                    else:
+                        # assume it's a system header
+                        out_lines += [line]
+                continue
+            m_once = re.match('^\\s*# *pragma once\\s*', line) if not is_comment else False
+            # ignore pragma once; we're handling it here
+            if m_once:
+                continue
+            # otherwise, just add the line to the output
+            if line[-1] != '\n':
+                line += '\n'
+            out_lines += [line]
+    out_lines += "// ### END_FILE_INCLUDE: " + basename(file_path) + '\n'
+    return (
+        "".join(out_lines)
+    )
+
+
+if __name__ == "__main__":
+    print(
+        process_file(
+            abspath(sys.argv[1]),
+            [],
+            # We use an include guard instead of `#pragma once` because Godbolt will
+            # cause complaints about `#pragma once` when they are used in URL includes.
+            [abspath(sys.argv[1])],
+        )
+    )
diff --git a/src/utf8code.c b/src/utf8code.c
index ecf79880..a892f5fd 100644
--- a/src/utf8code.c
+++ b/src/utf8code.c
@@ -20,7 +20,7 @@ const uint8_t utf8_dtab[] = {
   12,36,12,12,12,12,12,12,12,12,12,12,
 };
 
-unsigned utf8_encode(char *out, uint32_t c)
+int utf8_encode(char *out, uint32_t c)
 {
     if (c < 0x80U) {
         out[0] = (char) c;
@@ -53,7 +53,7 @@ uint32_t utf8_peek_off(const char* s, int pos) {
     return utf8_peek(s);
 }
 
-bool utf8_valid_n(const char* s, size_t nbytes) {
+bool utf8_valid_n(const char* s, intptr_t nbytes) {
     utf8_decode_t d = {.state=0};
     while ((nbytes-- != 0) & (*s != 0))
         utf8_decode(&d, (uint8_t)*s++);
@@ -61,7 +61,7 @@ bool utf8_valid_n(const char* s, size_t nbytes) {
 }
 
 uint32_t utf8_casefold(uint32_t c) {
-    for (size_t i=0; i < casefold_len; ++i) {
+    for (int i=0; i < casefold_len; ++i) {
         const struct CaseMapping entry = casemappings[i];
         if (c <= entry.c2) {
             if (c < entry.c1) return c;
@@ -74,7 +74,7 @@ uint32_t utf8_casefold(uint32_t c) {
 }
 
 uint32_t utf8_tolower(uint32_t c) {
-    for (size_t i=0; i < sizeof upcase_ind/sizeof *upcase_ind; ++i) {
+    for (int i=0; i < (int)(sizeof upcase_ind/sizeof *upcase_ind); ++i) {
         const struct CaseMapping entry = casemappings[upcase_ind[i]];
         if (c <= entry.c2) {
             if (c < entry.c1) return c;
@@ -87,7 +87,7 @@ uint32_t utf8_tolower(uint32_t c) {
 }
 
 uint32_t utf8_toupper(uint32_t c) {
-    for (size_t i=0; i < sizeof lowcase_ind/sizeof *lowcase_ind; ++i) {
+    for (int i=0; i < (int)(sizeof lowcase_ind/sizeof *lowcase_ind); ++i) {
         const struct CaseMapping entry = casemappings[lowcase_ind[i]];
         if (c <= entry.m2) {
             int d = entry.m2 - entry.c2;
@@ -101,7 +101,7 @@ uint32_t utf8_toupper(uint32_t c) {
 
 int utf8_icmp_sv(const csview s1, const csview s2) {
     utf8_decode_t d1 = {.state=0}, d2 = {.state=0};
-    size_t j1 = 0, j2 = 0;
+    intptr_t j1 = 0, j2 = 0;
     while ((j1 < s1.size) & (j2 < s2.size)) {
         do { utf8_decode(&d1, (uint8_t)s1.str[j1++]); } while (d1.state);
         do { utf8_decode(&d2, (uint8_t)s2.str[j2++]); } while (d2.state);
@@ -122,13 +122,18 @@ typedef struct {
   int nr16;
 } UGroup;
 
-static const UGroup unicode_groups[U8G_SIZE];
+#ifndef __cplusplus
+static
+#else
+extern
+#endif
+const UGroup _utf8_unicode_groups[U8G_SIZE];
 
 bool utf8_isgroup(int group, uint32_t c) {
-    for (int j=0; j<unicode_groups[group].nr16; ++j) {
-        if (c < unicode_groups[group].r16[j].lo)
+    for (int j=0; j<_utf8_unicode_groups[group].nr16; ++j) {
+        if (c < _utf8_unicode_groups[group].r16[j].lo)
             return false;
-        if (c <= unicode_groups[group].r16[j].hi)
+        if (c <= _utf8_unicode_groups[group].r16[j].hi)
             return true;
     }
     return false;
@@ -137,21 +142,21 @@ bool utf8_isgroup(int group, uint32_t c) {
 bool utf8_isalpha(uint32_t c) {
     static int16_t groups[] = {U8G_Latin, U8G_Nl, U8G_Greek, U8G_Cyrillic,
                                U8G_Han, U8G_Devanagari, U8G_Arabic};
-    if (c < 128) return isalpha(c) != 0;
-    for (unsigned j=0; j < c_ARRAYLEN(groups); ++j)
+    if (c < 128) return isalpha((int)c) != 0;
+    for (int j=0; j < c_ARRAYLEN(groups); ++j)
         if (utf8_isgroup(groups[j], c))
             return true;
     return false;
 }
 
 bool utf8_iscased(uint32_t c) {
-    if (c < 128) return isalpha(c) != 0;
+    if (c < 128) return isalpha((int)c) != 0;
     return utf8_islower(c) || utf8_isupper(c) || 
            utf8_isgroup(U8G_Lt, c);
 }
 
 bool utf8_isword(uint32_t c) {
-    if (c < 128) return (isalnum(c) != 0) | (c == '_');
+    if (c < 128) return (isalnum((int)c) != 0) | (c == '_');
     return utf8_isalpha(c) || utf8_isgroup(U8G_Nd, c) ||
            utf8_isgroup(U8G_Pc, c);
 }
@@ -323,139 +328,141 @@ static const URange16 Zs_range16[] = { // Space separator
 };
 
 static const URange16 Arabic_range16[] = {
-	{ 1536, 1540 },
-	{ 1542, 1547 },
-	{ 1549, 1562 },
-	{ 1564, 1566 },
-	{ 1568, 1599 },
-	{ 1601, 1610 },
-	{ 1622, 1647 },
-	{ 1649, 1756 },
-	{ 1758, 1791 },
-	{ 1872, 1919 },
-	{ 2160, 2190 },
-	{ 2192, 2193 },
-	{ 2200, 2273 },
-	{ 2275, 2303 },
-	{ 64336, 64450 },
-	{ 64467, 64829 },
-	{ 64832, 64911 },
-	{ 64914, 64967 },
-	{ 64975, 64975 },
-	{ 65008, 65023 },
-	{ 65136, 65140 },
-	{ 65142, 65276 },
+    { 1536, 1540 },
+    { 1542, 1547 },
+    { 1549, 1562 },
+    { 1564, 1566 },
+    { 1568, 1599 },
+    { 1601, 1610 },
+    { 1622, 1647 },
+    { 1649, 1756 },
+    { 1758, 1791 },
+    { 1872, 1919 },
+    { 2160, 2190 },
+    { 2192, 2193 },
+    { 2200, 2273 },
+    { 2275, 2303 },
+    { 64336, 64450 },
+    { 64467, 64829 },
+    { 64832, 64911 },
+    { 64914, 64967 },
+    { 64975, 64975 },
+    { 65008, 65023 },
+    { 65136, 65140 },
+    { 65142, 65276 },
 };
 
 static const URange16 Cyrillic_range16[] = {
-	{ 1024, 1156 },
-	{ 1159, 1327 },
-	{ 7296, 7304 },
-	{ 7467, 7467 },
-	{ 7544, 7544 },
-	{ 11744, 11775 },
-	{ 42560, 42655 },
-	{ 65070, 65071 },
+    { 1024, 1156 },
+    { 1159, 1327 },
+    { 7296, 7304 },
+    { 7467, 7467 },
+    { 7544, 7544 },
+    { 11744, 11775 },
+    { 42560, 42655 },
+    { 65070, 65071 },
 };
 
 static const URange16 Devanagari_range16[] = {
-	{ 2304, 2384 },
-	{ 2389, 2403 },
-	{ 2406, 2431 },
-	{ 43232, 43263 },
+    { 2304, 2384 },
+    { 2389, 2403 },
+    { 2406, 2431 },
+    { 43232, 43263 },
 };
 
 static const URange16 Greek_range16[] = {
-	{ 880, 883 },
-	{ 885, 887 },
-	{ 890, 893 },
-	{ 895, 895 },
-	{ 900, 900 },
-	{ 902, 902 },
-	{ 904, 906 },
-	{ 908, 908 },
-	{ 910, 929 },
-	{ 931, 993 },
-	{ 1008, 1023 },
-	{ 7462, 7466 },
-	{ 7517, 7521 },
-	{ 7526, 7530 },
-	{ 7615, 7615 },
-	{ 7936, 7957 },
-	{ 7960, 7965 },
-	{ 7968, 8005 },
-	{ 8008, 8013 },
-	{ 8016, 8023 },
-	{ 8025, 8025 },
-	{ 8027, 8027 },
-	{ 8029, 8029 },
-	{ 8031, 8061 },
-	{ 8064, 8116 },
-	{ 8118, 8132 },
-	{ 8134, 8147 },
-	{ 8150, 8155 },
-	{ 8157, 8175 },
-	{ 8178, 8180 },
-	{ 8182, 8190 },
-	{ 8486, 8486 },
-	{ 43877, 43877 },
+    { 880, 883 },
+    { 885, 887 },
+    { 890, 893 },
+    { 895, 895 },
+    { 900, 900 },
+    { 902, 902 },
+    { 904, 906 },
+    { 908, 908 },
+    { 910, 929 },
+    { 931, 993 },
+    { 1008, 1023 },
+    { 7462, 7466 },
+    { 7517, 7521 },
+    { 7526, 7530 },
+    { 7615, 7615 },
+    { 7936, 7957 },
+    { 7960, 7965 },
+    { 7968, 8005 },
+    { 8008, 8013 },
+    { 8016, 8023 },
+    { 8025, 8025 },
+    { 8027, 8027 },
+    { 8029, 8029 },
+    { 8031, 8061 },
+    { 8064, 8116 },
+    { 8118, 8132 },
+    { 8134, 8147 },
+    { 8150, 8155 },
+    { 8157, 8175 },
+    { 8178, 8180 },
+    { 8182, 8190 },
+    { 8486, 8486 },
+    { 43877, 43877 },
 };
 
 static const URange16 Han_range16[] = {
-	{ 11904, 11929 },
-	{ 11931, 12019 },
-	{ 12032, 12245 },
-	{ 12293, 12293 },
-	{ 12295, 12295 },
-	{ 12321, 12329 },
-	{ 12344, 12347 },
-	{ 13312, 19903 },
-	{ 19968, 40959 },
-	{ 63744, 64109 },
-	{ 64112, 64217 },
+    { 11904, 11929 },
+    { 11931, 12019 },
+    { 12032, 12245 },
+    { 12293, 12293 },
+    { 12295, 12295 },
+    { 12321, 12329 },
+    { 12344, 12347 },
+    { 13312, 19903 },
+    { 19968, 40959 },
+    { 63744, 64109 },
+    { 64112, 64217 },
 };
 
 static const URange16 Latin_range16[] = {
-	{ 65, 90 },
-	{ 97, 122 },
-	{ 170, 170 },
-	{ 186, 186 },
-	{ 192, 214 },
-	{ 216, 246 },
-	{ 248, 696 },
-	{ 736, 740 },
-	{ 7424, 7461 },
-	{ 7468, 7516 },
-	{ 7522, 7525 },
-	{ 7531, 7543 },
-	{ 7545, 7614 },
-	{ 7680, 7935 },
-	{ 8305, 8305 },
-	{ 8319, 8319 },
-	{ 8336, 8348 },
-	{ 8490, 8491 },
-	{ 8498, 8498 },
-	{ 8526, 8526 },
-	{ 8544, 8584 },
-	{ 11360, 11391 },
-	{ 42786, 42887 },
-	{ 42891, 42954 },
-	{ 42960, 42961 },
-	{ 42963, 42963 },
-	{ 42965, 42969 },
-	{ 42994, 43007 },
-	{ 43824, 43866 },
-	{ 43868, 43876 },
-	{ 43878, 43881 },
-	{ 64256, 64262 },
-	{ 65313, 65338 },
-	{ 65345, 65370 },
+    { 65, 90 },
+    { 97, 122 },
+    { 170, 170 },
+    { 186, 186 },
+    { 192, 214 },
+    { 216, 246 },
+    { 248, 696 },
+    { 736, 740 },
+    { 7424, 7461 },
+    { 7468, 7516 },
+    { 7522, 7525 },
+    { 7531, 7543 },
+    { 7545, 7614 },
+    { 7680, 7935 },
+    { 8305, 8305 },
+    { 8319, 8319 },
+    { 8336, 8348 },
+    { 8490, 8491 },
+    { 8498, 8498 },
+    { 8526, 8526 },
+    { 8544, 8584 },
+    { 11360, 11391 },
+    { 42786, 42887 },
+    { 42891, 42954 },
+    { 42960, 42961 },
+    { 42963, 42963 },
+    { 42965, 42969 },
+    { 42994, 43007 },
+    { 43824, 43866 },
+    { 43868, 43876 },
+    { 43878, 43881 },
+    { 64256, 64262 },
+    { 65313, 65338 },
+    { 65345, 65370 },
 };
 
 #define UNI_ENTRY(Code) \
     { Code##_range16, sizeof(Code##_range16)/sizeof(URange16) }
-
-static const UGroup unicode_groups[U8G_SIZE] = {
+#ifndef __cplusplus
+static
+#endif
+const UGroup _utf8_unicode_groups[U8G_SIZE] = {
     [U8G_Cc] = UNI_ENTRY(Cc),
     [U8G_Lt] = UNI_ENTRY(Lt),
     [U8G_Nd] = UNI_ENTRY(Nd),
author	Tyge Løvset <[email protected]>	2023-02-04 23:35:56 +0100
committer	GitHub <[email protected]>	2023-02-04 23:35:56 +0100
commit	adc47cefc2976768c3f0b773bd26bfd1062e8a53 (patch)
tree	4923f88afb0d091d5d39ae03d65a4998a0517652 /src
parent	0c4c4f8bba17562735b67b2923cd23c773aa53a7 (diff)
parent	d2ff84c53aa9bd3857fdf22dcf7cd9398a4780be (diff)
download	STC-modified-adc47cefc2976768c3f0b773bd26bfd1062e8a53.tar.gz STC-modified-adc47cefc2976768c3f0b773bd26bfd1062e8a53.zip