summaryrefslogtreecommitdiffhomepage
path: root/include/stc/utf8.h
diff options
context:
space:
mode:
authorTyge Løvset <[email protected]>2022-08-07 08:03:46 +0200
committerTyge Løvset <[email protected]>2022-08-07 08:20:34 +0200
commitc87898773d1af364a9847610401a9959f6019fe7 (patch)
tree5e64c9d96fb9e12192ce298f1d2909d43b72571a /include/stc/utf8.h
parent618b5704e6f85cfe1b6e5c9c9373abe76a8bb628 (diff)
downloadSTC-modified-c87898773d1af364a9847610401a9959f6019fe7.tar.gz
STC-modified-c87898773d1af364a9847610401a9959f6019fe7.zip
Internal: moved some functions in csview/cstr to implementation sections.
Diffstat (limited to 'include/stc/utf8.h')
-rw-r--r--include/stc/utf8.h61
1 files changed, 22 insertions, 39 deletions
diff --git a/include/stc/utf8.h b/include/stc/utf8.h
index c6fb6944..34368737 100644
--- a/include/stc/utf8.h
+++ b/include/stc/utf8.h
@@ -1,46 +1,27 @@
#ifndef UTF8_H_INCLUDED
#define UTF8_H_INCLUDED
-/*
-// Example:
-#include <stc/cstr.h>
-#include <stc/csview.h>
-int main()
-{
- c_auto (cstr, s1) {
- s1 = cstr_new("hell😀 w😀rld");
- printf("%s\n", cstr_str(&s1));
- cstr_replace_sv(&s1, utf8_substr(cstr_str(&s1), 7, 1), c_sv("🐨"));
- printf("%s\n", cstr_str(&s1));
-
- c_foreach (i, cstr, s1)
- printf("%.*s,", c_ARGsv(i.chr));
- }
-}
-// Output:
-// hell😀 w😀rld
-// hell😀 w🐨rld
-// h,e,l,l,😀, ,w,🐨,r,l,d,
-*/
#include "ccommon.h"
#include <ctype.h>
// utf8 methods defined in src/utf8code.c:
-bool utf8_islower(uint32_t c);
-bool utf8_isupper(uint32_t c);
-bool utf8_isspace(uint32_t c);
-bool utf8_isdigit(uint32_t c);
-bool utf8_isxdigit(uint32_t c);
-bool utf8_isalpha(uint32_t c);
-bool utf8_isalnum(uint32_t c);
-uint32_t utf8_casefold(uint32_t c);
-uint32_t utf8_tolower(uint32_t c);
-uint32_t utf8_toupper(uint32_t c);
-bool utf8_valid_n(const char* s, size_t nbytes);
-int utf8_icmp_n(size_t u8max, const char* s1, size_t n1,
- const char* s2, size_t n2);
-unsigned utf8_encode(char *out, uint32_t c);
-uint32_t utf8_peek(const char *s, int u8pos);
+extern bool utf8_islower(uint32_t c);
+extern bool utf8_isupper(uint32_t c);
+extern bool utf8_isspace(uint32_t c);
+extern bool utf8_isdigit(uint32_t c);
+extern bool utf8_isxdigit(uint32_t c);
+extern bool utf8_isalpha(uint32_t c);
+extern bool utf8_isalnum(uint32_t c);
+extern uint32_t utf8_casefold(uint32_t c);
+extern uint32_t utf8_tolower(uint32_t c);
+extern uint32_t utf8_toupper(uint32_t c);
+extern bool utf8_valid_n(const char* s, size_t nbytes);
+extern int utf8_icmp_n(size_t u8max, const char* s1, size_t n1,
+ const char* s2, size_t n2);
+extern unsigned utf8_encode(char *out, uint32_t c);
+extern uint32_t utf8_peek(const char *s, int u8pos);
+
+/* following functions uses src/utf8code.c */
/* decode next utf8 codepoint. https://bjoern.hoehrmann.de/utf-8/decoder/dfa */
typedef struct { uint32_t state, codep; } utf8_decode_t;
@@ -62,15 +43,17 @@ STC_INLINE bool utf8_valid(const char* s) {
return utf8_valid_n(s, ~(size_t)0);
}
+/* following functions are independent but assume valid utf8 strings: */
+
/* number of bytes in the utf8 codepoint from s */
STC_INLINE unsigned utf8_chr_size(const char *s) {
unsigned b = (uint8_t)*s;
if (b < 0x80) return 1;
- if (b < 0xC2) return 0;
+ /*if (b < 0xC2) return 0;*/
if (b < 0xE0) return 2;
if (b < 0xF0) return 3;
- if (b < 0xF5) return 4;
- return 0;
+ /*if (b < 0xF5)*/ return 4;
+ /*return 0;*/
}
/* number of codepoints in the utf8 string s */