docs and utf8 updates.

author: Tyge Løvset <[email protected]> 2023-01-12 18:26:16 +0100
committer: Tyge Løvset <[email protected]> 2023-01-12 18:26:16 +0100
commit: 350bb65a2f68b14ce16a21ea8670cc087e39f4ce (patch)
tree: 4c92b37b05aa5c1a3ccb695466fa75dbd938bf17
parent: 891aebc0a971df8e57618c16ed214d982072cbd3 (diff)
download: STC-modified-350bb65a2f68b14ce16a21ea8670cc087e39f4ce.tar.gz
STC-modified-350bb65a2f68b14ce16a21ea8670cc087e39f4ce.zip
5 files changed, 29 insertions, 29 deletions
diff --git a/docs/ccommon_api.md b/docs/ccommon_api.md
index 8c194a97..1488253d 100644
--- a/docs/ccommon_api.md
+++ b/docs/ccommon_api.md
@@ -21,23 +21,18 @@ For multiple variables, use either multiple **c_WITH** in sequence, or declare v
 scope and use **c_SCOPE**. For convenience, **c_AUTO** support up to 4 variables.
 ```c
 // `c_WITH` is similar to python `with`: it declares and can drop a variable after going out of scope.
-c_WITH (uint8_t* buf = malloc(BUF_SIZE), free(buf))
-c_WITH (FILE* fp = fopen(fname, "rb"), fclose(fp))
+bool ok = false;
+c_WITH (uint8_t* buf = malloc(BUF_SIZE), buf != NULL, free(buf))
+c_WITH (FILE* fp = fopen(fname, "rb"), fp != NULL, fclose(fp))
 {
-    int n = 0;
-    if (fp && buf) {
-        n = fread(buf, 1, BUF_SIZE, fp);
-        doSomething(buf, n);
-    }
-}
-
-c_WITH (cstr str = cstr_lit("Hello"), cstr_drop(&str))
-{
-    cstr_append(&str, " world");
-    printf("%s\n", cstr_str(&str));
+    int n = fread(buf, 1, BUF_SIZE, fp);
+    if (n <= 0) continue; // auto cleanup! NB do not break or return here.
+    ...
+    ok = true;
 }
+return ok;
 
-// `c_AUTO` automatically initialize and destruct up to 4 variables, like `c_WITH`.
+// `c_AUTO` automatically initialize and destruct up to 4 variables, like c_WITH.
 c_AUTO (cstr, s1, s2)
 {
     cstr_append(&s1, "Hello");
@@ -62,7 +57,7 @@ c_SCOPE (pthread_mutex_lock(&mut), pthread_mutex_unlock(&mut))
     /* Do syncronized work. */
 }
 
-// `c_DEFER` executes the expressions when leaving scope.
+// `c_DEFER` executes the expressions when leaving scope. Prefer c_WITH or c_SCOPE.
 cstr s1 = cstr_lit("Hello"), s2 = cstr_lit("world");
 c_DEFER (cstr_drop(&s1), cstr_drop(&s2))
 {
@@ -82,10 +77,10 @@ cvec_str readFile(const char* name)
 {
     cvec_str vec = cvec_str_init(); // returned
 
-    c_WITH (FILE* fp = fopen(name, "r"), fclose(fp))
-        c_WITH (cstr line = cstr_NULL, cstr_drop(&line))
-            while (cstr_getline(&line, fp))
-                cvec_str_emplace_back(&vec, cstr_str(&line));
+    c_WITH (FILE* fp = fopen(name, "r"), fp != NULL, fclose(fp))
+    c_WITH (cstr line = cstr_NULL, cstr_drop(&line))
+        while (cstr_getline(&line, fp))
+            cvec_str_emplace_back(&vec, cstr_str(&line));
     return vec;
 }
 
diff --git a/docs/cmap_api.md b/docs/cmap_api.md
index 2c690d13..a33715fc 100644
--- a/docs/cmap_api.md
+++ b/docs/cmap_api.md
@@ -220,8 +220,8 @@ int main()
         cmap_vi_insert(&vecs, (Vec3i){  0,   0, 100}, 3);
         cmap_vi_insert(&vecs, (Vec3i){100, 100, 100}, 4);
 
-        c_FORPAIR (vec, num, cmap_vi, vecs)
-            printf("{ %3d, %3d, %3d }: %d\n", _.vec->x, _.vec->y, _.vec->z, *_.num);
+        c_FORPAIR (v3, num, cmap_vi, vecs)
+            printf("{ %3d, %3d, %3d }: %d\n", _.v3->x, _.v3->y, _.v3->z, *_.num);
     }
 }
 ```
@@ -253,8 +253,8 @@ int main()
         cmap_iv_insert(&vecs, 3, (Vec3i){  0,   0, 100});
         cmap_iv_insert(&vecs, 4, (Vec3i){100, 100, 100});
 
-        c_FORPAIR (num, vec, cmap_iv, vecs)
-            printf("%d: { %3d, %3d, %3d }\n", *_.num, _.vec->x, _.vec->y, _.vec->z);
+        c_FORPAIR (num, v3, cmap_iv, vecs)
+            printf("%d: { %3d, %3d, %3d }\n", *_.num, _.v3->x, _.v3->y, _.v3->z);
     }
 }
 ```
@@ -378,7 +378,7 @@ static inline RViking Viking_toraw(const Viking* vp) {
 
 // With this in place, we define the Viking => int hash map type:
 #define i_type      Vikings
-#define i_keyclass Viking
+#define i_keyclass  Viking
 #define i_keyraw    RViking
 #define i_keyfrom   Viking_from
 #define i_opt       c_no_clone // disable map cloning
diff --git a/include/stc/utf8.h b/include/stc/utf8.h
index 001f4191..f30e76ac 100644
--- a/include/stc/utf8.h
+++ b/include/stc/utf8.h
@@ -22,6 +22,7 @@ extern uint32_t utf8_casefold(uint32_t c);
 extern uint32_t utf8_tolower(uint32_t c);
 extern uint32_t utf8_toupper(uint32_t c);
 extern bool     utf8_iscased(uint32_t c);
+extern bool     utf8_isword(uint32_t c);
 extern bool     utf8_valid_n(const char* s, size_t nbytes);
 extern int      utf8_icmp_sv(csview s1, csview s2);
 extern unsigned utf8_encode(char *out, uint32_t c);
@@ -38,9 +39,6 @@ STC_INLINE bool utf8_isalnum(uint32_t c) {
     return utf8_isalpha(c) || utf8_isgroup(U8G_Nd, c);
 }
 
-STC_INLINE bool utf8_isword(uint32_t c)
-    { return utf8_isalnum(c) || utf8_isgroup(U8G_Pc, c); }
-
 STC_INLINE bool utf8_isblank(uint32_t c) {
     if (c < 128) return (c == ' ') | (c == '\t');
     return utf8_isgroup(U8G_Zs, c);
diff --git a/misc/examples/regex2.c b/misc/examples/regex2.c
index 66ab9f72..e24cce51 100644
--- a/misc/examples/regex2.c
+++ b/misc/examples/regex2.c
@@ -11,7 +11,7 @@ int main()
          "https://en.cppreference.com/w/cpp/regex/regex_search"
         },
         {"!((abc|123)+)!", "!123abcabc!"},
-        {"(\\p{L&}+ )+(\\p{Nd}+)", "Großpackung süßigkeiten 199"},
+        {"(\\p{Alpha}+ )+(\\p{Nd}+)", "Großpackung süßigkeiten 199"},
         {"\\p{Han}+", "This is Han: 王明：那是杂志吗？"},
     };
 
diff --git a/src/utf8code.c b/src/utf8code.c
index a8e4c9ab..ecf79880 100644
--- a/src/utf8code.c
+++ b/src/utf8code.c
@@ -136,7 +136,8 @@ bool utf8_isgroup(int group, uint32_t c) {
 
 bool utf8_isalpha(uint32_t c) {
     static int16_t groups[] = {U8G_Latin, U8G_Nl, U8G_Greek, U8G_Cyrillic,
-                               U8G_Han, U8G_Arabic, U8G_Devanagari};
+                               U8G_Han, U8G_Devanagari, U8G_Arabic};
+    if (c < 128) return isalpha(c) != 0;
     for (unsigned j=0; j < c_ARRAYLEN(groups); ++j)
         if (utf8_isgroup(groups[j], c))
             return true;
@@ -149,6 +150,12 @@ bool utf8_iscased(uint32_t c) {
            utf8_isgroup(U8G_Lt, c);
 }
 
+bool utf8_isword(uint32_t c) {
+    if (c < 128) return (isalnum(c) != 0) | (c == '_');
+    return utf8_isalpha(c) || utf8_isgroup(U8G_Nd, c) ||
+           utf8_isgroup(U8G_Pc, c);
+}
+
 /* The tables below are extracted from the RE2 library */
 
 static const URange16 Cc_range16[] = { // Control
author	Tyge Løvset <[email protected]>	2023-01-12 18:26:16 +0100
committer	Tyge Løvset <[email protected]>	2023-01-12 18:26:16 +0100
commit	350bb65a2f68b14ce16a21ea8670cc087e39f4ce (patch)
tree	4c92b37b05aa5c1a3ccb695466fa75dbd938bf17
parent	891aebc0a971df8e57618c16ed214d982072cbd3 (diff)
download	STC-modified-350bb65a2f68b14ce16a21ea8670cc087e39f4ce.tar.gz STC-modified-350bb65a2f68b14ce16a21ea8670cc087e39f4ce.zip