diff options
| author | Tyge Løvset <[email protected]> | 2022-10-14 10:25:55 +0200 |
|---|---|---|
| committer | Tyge Løvset <[email protected]> | 2022-10-14 10:25:55 +0200 |
| commit | 54c08dc031c622e78dc134353690ed359b9173d8 (patch) | |
| tree | 38d827af29aa30350099a71d84d0caf0510b05ed | |
| parent | b2103dc2084f2f0b05645acc7879c9ff5ad84a05 (diff) | |
| download | STC-modified-54c08dc031c622e78dc134353690ed359b9173d8.tar.gz STC-modified-54c08dc031c622e78dc134353690ed359b9173d8.zip | |
Update sso_bench.cpp benchmark, and some internal improvements.
| -rw-r--r-- | benchmarks/misc/sso_bench.cpp | 130 | ||||
| -rw-r--r-- | docs/cregex_api.md | 32 | ||||
| -rw-r--r-- | include/stc/alt/csmap.h | 2 | ||||
| -rw-r--r-- | include/stc/cregex.h | 34 | ||||
| -rw-r--r-- | src/cregex.c | 14 |
5 files changed, 120 insertions, 92 deletions
diff --git a/benchmarks/misc/sso_bench.cpp b/benchmarks/misc/sso_bench.cpp index bcc43ba0..7742700a 100644 --- a/benchmarks/misc/sso_bench.cpp +++ b/benchmarks/misc/sso_bench.cpp @@ -1,107 +1,135 @@ #include <string> #include <iostream> -#include <vector> -#include <unordered_set> #include <chrono> -#define i_static + #include <stc/crandom.h> -#define i_static #include <stc/cstr.h> -#define i_type stccon +#define i_type StcVec +#define i_val_str +#include <stc/cstack.h> + +#define i_type StcSet #define i_val_str -#include <stc/cvec.h> +#include <stc/csset.h> + +#include <vector> +using StdVec = std::vector<std::string>; +#include <set> +using StdSet = std::set<std::string>; -using stdcon = std::vector<std::string>; -static const int BENCHMARK_SIZE = 5000000; -static const int MAX_STRING_LENGTH = 40; +static const int BENCHMARK_SIZE = 4000000; +static const int MAX_STRING_SIZE = 50; static const char CHARS[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz=+-"; using time_point = std::chrono::high_resolution_clock::time_point; -void addRandomString_STD(stdcon& con, int length) { - std::string s(length, 0); +static inline std::string randomString_STD(int strsize) { + std::string s(strsize, 0); char* p = &s[0]; union { uint64_t u8; uint8_t b[8]; } r; - for (int i = 0; i < length; ++i) { + for (int i = 0; i < strsize; ++i) { if ((i & 7) == 0) r.u8 = crandom() & 0x3f3f3f3f3f3f3f3f; p[i] = CHARS[r.b[i & 7]]; } - con.push_back(std::move(s)); + return s; } -void addRandomString_STC(stccon& con, int length) { - cstr s = cstr_with_size(length, 0); +static inline cstr randomString_STC(int strsize) { + cstr s = cstr_with_size(strsize, 0); char* p = cstr_data(&s); union { uint64_t u8; uint8_t b[8]; } r; - for (int i = 0; i < length; ++i) { + for (int i = 0; i < strsize; ++i) { if ((i & 7) == 0) r.u8 = crandom() & 0x3f3f3f3f3f3f3f3f; p[i] = CHARS[r.b[i & 7]]; } - stccon_push_back(&con, s); + return s; +} + + +void addRandomString(StdVec& vec, int strsize) { + vec.push_back(std::move(randomString_STD(strsize))); +} + +void addRandomString(StcVec& vec, int strsize) { + StcVec_push(&vec, randomString_STC(strsize)); +} + +void addRandomString(StdSet& set, int strsize) { + set.insert(std::move(randomString_STD(strsize))); +} + +void addRandomString(StcSet& set, int strsize) { + StcSet_insert(&set, randomString_STC(strsize)); } -template <class L, typename R> -int benchmark(L& con, const int length, R addRandomString) { + +template <class C> +int benchmark(C& container, const int n, const int strsize) { time_point t1 = std::chrono::high_resolution_clock::now(); - if (length == 0) - for (int i = 0; i < BENCHMARK_SIZE; i++) - addRandomString(con, (crandom() & 63) + 1); - else - for (int i = 0; i < BENCHMARK_SIZE; i++) - addRandomString(con, length); + for (int i = 0; i < n; i++) + addRandomString(container, strsize); time_point t2 = std::chrono::high_resolution_clock::now(); const auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count(); - std::cerr << (length ? length : 32) << "\t" << duration; + std::cerr << (strsize ? strsize : 32) << "\t" << duration; return (int)duration; } int main() { uint64_t seed = 4321; - csrandom(seed); int sum, n; - std::cerr << "length\ttime\tstd::string\n"; - for (int k = 0; k < 4; k++) { - stdcon con; con.reserve(BENCHMARK_SIZE); - benchmark(con, 0, addRandomString_STD); - std::cout << '\t' << *con.begin() << '\n'; + + // VECTOR WITH STRINGS + + csrandom(seed); + sum = 0, n = 0; + std::cerr << "\nstrsize\tmsecs\tstd::vector<std::string>, size=" << BENCHMARK_SIZE << "\n"; + for (int strsize = 1; strsize <= MAX_STRING_SIZE; strsize += 2) { + StdVec vec; vec.reserve(BENCHMARK_SIZE); + sum += benchmark(vec, BENCHMARK_SIZE, strsize), ++n; + std::cout << '\t' << vec.front() << '\n'; } + std::cout << "Avg:\t" << sum/n << '\n'; csrandom(seed); - std::cerr << "\nlength\ttime\tSTC string\n"; - for (int k = 0; k < 4; k++) { - stccon con = stccon_with_capacity(BENCHMARK_SIZE); - benchmark(con, 0, addRandomString_STC); - std::cout << '\t' << cstr_str(stccon_begin(&con).ref) << '\n'; - stccon_drop(&con); + sum = 0, n = 0; + std::cerr << "\nstrsize\tmsecs\tcvec<cstr>, size=" << BENCHMARK_SIZE << "\n"; + for (int strsize = 1; strsize <= MAX_STRING_SIZE; strsize += 2) { + StcVec vec = StcVec_with_capacity(BENCHMARK_SIZE); + sum += benchmark(vec, BENCHMARK_SIZE, strsize), ++n; + std::cout << '\t' << cstr_str(&vec.data[0]) << '\n'; + StcVec_drop(&vec); } + std::cout << "Avg:\t" << sum/n << '\n'; + // SORTED SET WITH STRINGS + csrandom(seed); sum = 0, n = 0; - std::cerr << "\nlength\ttime\tstd::string\n"; - for (int length = 1; length <= MAX_STRING_LENGTH; length += 2) { - stdcon con; con.reserve(BENCHMARK_SIZE); - sum += benchmark(con, length, addRandomString_STD), ++n; - std::cout << '\t' << *con.begin() << '\n'; + std::cerr << "\nstrsize\tmsecs\tstd::set<std::string>, size=" << BENCHMARK_SIZE/16 << "\n"; + for (int strsize = 1; strsize <= MAX_STRING_SIZE; strsize += 2) { + StdSet set; + sum += benchmark(set, BENCHMARK_SIZE/16, strsize), ++n; + std::cout << '\t' << *set.begin() << '\n'; } std::cout << "Avg:\t" << sum/n << '\n'; csrandom(seed); sum = 0, n = 0; - std::cerr << "\nlength\ttime\tSTC string\n"; - for (int length = 1; length <= MAX_STRING_LENGTH; length += 2) { - stccon con = stccon_with_capacity(BENCHMARK_SIZE); - sum += benchmark(con, length, addRandomString_STC), ++n; - std::cout << '\t' << cstr_str(stccon_begin(&con).ref) << '\n'; - stccon_drop(&con); + std::cerr << "\nstrsize\tmsecs\tcset<cstr>, size=" << BENCHMARK_SIZE/16 << "\n"; + for (int strsize = 1; strsize <= MAX_STRING_SIZE; strsize += 2) { + StcSet set = StcSet_with_capacity(BENCHMARK_SIZE/16); + sum += benchmark(set, BENCHMARK_SIZE/16, strsize), ++n; + std::cout << '\t' << cstr_str(StcSet_front(&set)) << '\n'; + StcSet_drop(&set); } std::cout << "Avg:\t" << sum/n << '\n'; - std::cerr << "sizeof std::string : " << sizeof(std::string) << std::endl - << "sizeof STC string : " << sizeof(cstr) << std::endl; + std::cerr << "sizeof(std::string) : " << sizeof(std::string) << std::endl + << "sizeof(cstr) : " << sizeof(cstr) << std::endl; return 0; } diff --git a/docs/cregex_api.md b/docs/cregex_api.md index 66b97834..b715791d 100644 --- a/docs/cregex_api.md +++ b/docs/cregex_api.md @@ -25,13 +25,13 @@ enum { cregex cregex_init(void); cregex cregex_from(const char* pattern, int cflags); - // return 1 = success, negative = error. + // return cre_success, or negative error code on failure. int cregex_compile(cregex *self, const char* pattern, int cflags); // num. of capture groups in regex. 0 if RE is invalid. First group is the full match. int cregex_captures(const cregex* self); - // return 1=match, 0=nomatch, -1=error. match array size: at least num groups in RE (1+). + // return cre_success, cre_nomatch, or cre_matcherror. int cregex_find(const cregex* re, const char* input, csview match[], int mflags); int cregex_find_sv(const cregex* re, csview input, csview match[]); int cregex_find_pattern(const char* pattern, const char* input, csview match[], int cmflags); @@ -48,20 +48,20 @@ void cregex_drop(cregex* self); // destroy ``` ### Error codes -- cre_success = 1 -- cre_nomatch = 0 -- cre_matcherror = -1 -- cre_outofmemory = -2 -- cre_unmatchedleftparenthesis = -3 -- cre_unmatchedrightparenthesis = -4 -- cre_toomanysubexpressions = -5 -- cre_toomanycharacterclasses = -6 -- cre_malformedcharacterclass = -7 -- cre_missingoperand = -8 -- cre_unknownoperator = -9 -- cre_operandstackoverflow = -10 -- cre_operatorstackoverflow = -11 -- cre_operatorstackunderflow = -12 +- cre_success = 0 +- cre_nomatch = -1 +- cre_matcherror = -2 +- cre_outofmemory = -3 +- cre_unmatchedleftparenthesis = -4 +- cre_unmatchedrightparenthesis = -5 +- cre_toomanysubexpressions = -6 +- cre_toomanycharacterclasses = -7 +- cre_malformedcharacterclass = -8 +- cre_missingoperand = -9 +- cre_unknownoperator = -10 +- cre_operandstackoverflow = -11 +- cre_operatorstackoverflow = -12 +- cre_operatorstackunderflow = -13 ### Limits - cre_MAXCLASSES diff --git a/include/stc/alt/csmap.h b/include/stc/alt/csmap.h index 4b90fb78..086071f5 100644 --- a/include/stc/alt/csmap.h +++ b/include/stc/alt/csmap.h @@ -473,6 +473,7 @@ STC_DEF _cx_self _cx_memb(_clone)(_cx_self cx) { return c_make(_cx_self){_cx_memb(_clone_r_)(cx.root), cx.size}; } +#endif // !_i_no_clone #if !defined _i_no_emplace STC_DEF _cx_result @@ -485,7 +486,6 @@ _cx_memb(_emplace)(_cx_self* self, i_keyraw rkey _i_MAP_ONLY(, i_valraw rmapped) return res; } #endif // _i_no_emplace -#endif // !_i_no_clone static void _cx_memb(_drop_r_)(_cx_node* tn) { diff --git a/include/stc/cregex.h b/include/stc/cregex.h index 6c817f1c..e3703ca8 100644 --- a/include/stc/cregex.h +++ b/include/stc/cregex.h @@ -35,21 +35,21 @@ THE SOFTWARE. #include "forward.h" // csview typedef enum { - cre_success = 1, - cre_nomatch = 0, - cre_matcherror = -1, - cre_outofmemory = -2, - cre_unmatchedleftparenthesis = -3, - cre_unmatchedrightparenthesis = -4, - cre_toomanysubexpressions = -5, - cre_toomanycharacterclasses = -6, - cre_malformedcharacterclass = -7, - cre_missingoperand = -8, - cre_unknownoperator = -9, - cre_operandstackoverflow = -10, - cre_operatorstackoverflow = -11, - cre_operatorstackunderflow = -12, -} cregex_error_t; + cre_success = 0, + cre_nomatch = -1, + cre_matcherror = -2, + cre_outofmemory = -3, + cre_unmatchedleftparenthesis = -4, + cre_unmatchedrightparenthesis = -5, + cre_toomanysubexpressions = -6, + cre_toomanycharacterclasses = -7, + cre_malformedcharacterclass = -8, + cre_missingoperand = -9, + cre_unknownoperator = -10, + cre_operandstackoverflow = -11, + cre_operatorstackoverflow = -12, + cre_operatorstackunderflow = -13, +} cregex_result; enum { /* compile-flags */ @@ -87,7 +87,7 @@ cregex cregex_init(void) { return re; } -/* return 1 on success, or negative error code on failure. */ +/* return cre_success, or negative error code on failure. */ int cregex_compile(cregex *self, const char* pattern, int cflags); static inline @@ -100,7 +100,7 @@ cregex cregex_from(const char* pattern, int cflags) { /* number of capture groups in a regex pattern, 0 if regex is invalid */ int cregex_captures(const cregex* self); -/* return 1 on match, 0 on nomatch, and -1 on failure. */ +/* return cre_success, cre_nomatch or cre_matcherror. */ int cregex_find(const cregex* re, const char* input, csview match[], int mflags); static inline diff --git a/src/cregex.c b/src/cregex.c index 17284dfe..849601da 100644 --- a/src/cregex.c +++ b/src/cregex.c @@ -344,7 +344,7 @@ static void evaluntil(Parser *par, Token type); static int bldcclass(Parser *par); static void -rcerror(Parser *par, cregex_error_t err) +rcerror(Parser *par, cregex_result err) { par->error = err; longjmp(par->regkaboom, 1); @@ -736,13 +736,14 @@ bldcclass(Parser *par) int inv = par->exprp[1] == '^', off = 1 + inv; for (unsigned i = 0; i < (sizeof cls/sizeof *cls); ++i) if (!strncmp(par->exprp + off, cls[i].c, cls[i].n)) { - if (par->rune_type == IRUNE && (cls[i].r == ASC_lo || cls[i].r == ASC_up)) - rune = ASC_al + inv; - else - rune = cls[i].r + inv; + rune = cls[i].r; par->exprp += off + cls[i].n; break; } + if (par->rune_type == IRUNE && (rune == ASC_lo || rune == ASC_up)) + rune = ASC_al; + if (inv && rune != '[') + rune += 1; } } *ep++ = rune; @@ -1192,8 +1193,7 @@ int cregex_compile(cregex *self, const char* pattern, int cflags) { Parser par; self->prog = regcomp1(self->prog, &par, pattern, cflags); - self->error = par.error; - return self->prog ? cre_success : par.error; + return self->error = par.error; } int |
