summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorTyge Løvset <[email protected]>2022-10-14 10:25:55 +0200
committerTyge Løvset <[email protected]>2022-10-14 10:25:55 +0200
commit54c08dc031c622e78dc134353690ed359b9173d8 (patch)
tree38d827af29aa30350099a71d84d0caf0510b05ed
parentb2103dc2084f2f0b05645acc7879c9ff5ad84a05 (diff)
downloadSTC-modified-54c08dc031c622e78dc134353690ed359b9173d8.tar.gz
STC-modified-54c08dc031c622e78dc134353690ed359b9173d8.zip
Update sso_bench.cpp benchmark, and some internal improvements.
-rw-r--r--benchmarks/misc/sso_bench.cpp130
-rw-r--r--docs/cregex_api.md32
-rw-r--r--include/stc/alt/csmap.h2
-rw-r--r--include/stc/cregex.h34
-rw-r--r--src/cregex.c14
5 files changed, 120 insertions, 92 deletions
diff --git a/benchmarks/misc/sso_bench.cpp b/benchmarks/misc/sso_bench.cpp
index bcc43ba0..7742700a 100644
--- a/benchmarks/misc/sso_bench.cpp
+++ b/benchmarks/misc/sso_bench.cpp
@@ -1,107 +1,135 @@
#include <string>
#include <iostream>
-#include <vector>
-#include <unordered_set>
#include <chrono>
-#define i_static
+
#include <stc/crandom.h>
-#define i_static
#include <stc/cstr.h>
-#define i_type stccon
+#define i_type StcVec
+#define i_val_str
+#include <stc/cstack.h>
+
+#define i_type StcSet
#define i_val_str
-#include <stc/cvec.h>
+#include <stc/csset.h>
+
+#include <vector>
+using StdVec = std::vector<std::string>;
+#include <set>
+using StdSet = std::set<std::string>;
-using stdcon = std::vector<std::string>;
-static const int BENCHMARK_SIZE = 5000000;
-static const int MAX_STRING_LENGTH = 40;
+static const int BENCHMARK_SIZE = 4000000;
+static const int MAX_STRING_SIZE = 50;
static const char CHARS[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz=+-";
using time_point = std::chrono::high_resolution_clock::time_point;
-void addRandomString_STD(stdcon& con, int length) {
- std::string s(length, 0);
+static inline std::string randomString_STD(int strsize) {
+ std::string s(strsize, 0);
char* p = &s[0];
union { uint64_t u8; uint8_t b[8]; } r;
- for (int i = 0; i < length; ++i) {
+ for (int i = 0; i < strsize; ++i) {
if ((i & 7) == 0) r.u8 = crandom() & 0x3f3f3f3f3f3f3f3f;
p[i] = CHARS[r.b[i & 7]];
}
- con.push_back(std::move(s));
+ return s;
}
-void addRandomString_STC(stccon& con, int length) {
- cstr s = cstr_with_size(length, 0);
+static inline cstr randomString_STC(int strsize) {
+ cstr s = cstr_with_size(strsize, 0);
char* p = cstr_data(&s);
union { uint64_t u8; uint8_t b[8]; } r;
- for (int i = 0; i < length; ++i) {
+ for (int i = 0; i < strsize; ++i) {
if ((i & 7) == 0) r.u8 = crandom() & 0x3f3f3f3f3f3f3f3f;
p[i] = CHARS[r.b[i & 7]];
}
- stccon_push_back(&con, s);
+ return s;
+}
+
+
+void addRandomString(StdVec& vec, int strsize) {
+ vec.push_back(std::move(randomString_STD(strsize)));
+}
+
+void addRandomString(StcVec& vec, int strsize) {
+ StcVec_push(&vec, randomString_STC(strsize));
+}
+
+void addRandomString(StdSet& set, int strsize) {
+ set.insert(std::move(randomString_STD(strsize)));
+}
+
+void addRandomString(StcSet& set, int strsize) {
+ StcSet_insert(&set, randomString_STC(strsize));
}
-template <class L, typename R>
-int benchmark(L& con, const int length, R addRandomString) {
+
+template <class C>
+int benchmark(C& container, const int n, const int strsize) {
time_point t1 = std::chrono::high_resolution_clock::now();
- if (length == 0)
- for (int i = 0; i < BENCHMARK_SIZE; i++)
- addRandomString(con, (crandom() & 63) + 1);
- else
- for (int i = 0; i < BENCHMARK_SIZE; i++)
- addRandomString(con, length);
+ for (int i = 0; i < n; i++)
+ addRandomString(container, strsize);
time_point t2 = std::chrono::high_resolution_clock::now();
const auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count();
- std::cerr << (length ? length : 32) << "\t" << duration;
+ std::cerr << (strsize ? strsize : 32) << "\t" << duration;
return (int)duration;
}
int main() {
uint64_t seed = 4321;
- csrandom(seed);
int sum, n;
- std::cerr << "length\ttime\tstd::string\n";
- for (int k = 0; k < 4; k++) {
- stdcon con; con.reserve(BENCHMARK_SIZE);
- benchmark(con, 0, addRandomString_STD);
- std::cout << '\t' << *con.begin() << '\n';
+
+ // VECTOR WITH STRINGS
+
+ csrandom(seed);
+ sum = 0, n = 0;
+ std::cerr << "\nstrsize\tmsecs\tstd::vector<std::string>, size=" << BENCHMARK_SIZE << "\n";
+ for (int strsize = 1; strsize <= MAX_STRING_SIZE; strsize += 2) {
+ StdVec vec; vec.reserve(BENCHMARK_SIZE);
+ sum += benchmark(vec, BENCHMARK_SIZE, strsize), ++n;
+ std::cout << '\t' << vec.front() << '\n';
}
+ std::cout << "Avg:\t" << sum/n << '\n';
csrandom(seed);
- std::cerr << "\nlength\ttime\tSTC string\n";
- for (int k = 0; k < 4; k++) {
- stccon con = stccon_with_capacity(BENCHMARK_SIZE);
- benchmark(con, 0, addRandomString_STC);
- std::cout << '\t' << cstr_str(stccon_begin(&con).ref) << '\n';
- stccon_drop(&con);
+ sum = 0, n = 0;
+ std::cerr << "\nstrsize\tmsecs\tcvec<cstr>, size=" << BENCHMARK_SIZE << "\n";
+ for (int strsize = 1; strsize <= MAX_STRING_SIZE; strsize += 2) {
+ StcVec vec = StcVec_with_capacity(BENCHMARK_SIZE);
+ sum += benchmark(vec, BENCHMARK_SIZE, strsize), ++n;
+ std::cout << '\t' << cstr_str(&vec.data[0]) << '\n';
+ StcVec_drop(&vec);
}
+ std::cout << "Avg:\t" << sum/n << '\n';
+ // SORTED SET WITH STRINGS
+
csrandom(seed);
sum = 0, n = 0;
- std::cerr << "\nlength\ttime\tstd::string\n";
- for (int length = 1; length <= MAX_STRING_LENGTH; length += 2) {
- stdcon con; con.reserve(BENCHMARK_SIZE);
- sum += benchmark(con, length, addRandomString_STD), ++n;
- std::cout << '\t' << *con.begin() << '\n';
+ std::cerr << "\nstrsize\tmsecs\tstd::set<std::string>, size=" << BENCHMARK_SIZE/16 << "\n";
+ for (int strsize = 1; strsize <= MAX_STRING_SIZE; strsize += 2) {
+ StdSet set;
+ sum += benchmark(set, BENCHMARK_SIZE/16, strsize), ++n;
+ std::cout << '\t' << *set.begin() << '\n';
}
std::cout << "Avg:\t" << sum/n << '\n';
csrandom(seed);
sum = 0, n = 0;
- std::cerr << "\nlength\ttime\tSTC string\n";
- for (int length = 1; length <= MAX_STRING_LENGTH; length += 2) {
- stccon con = stccon_with_capacity(BENCHMARK_SIZE);
- sum += benchmark(con, length, addRandomString_STC), ++n;
- std::cout << '\t' << cstr_str(stccon_begin(&con).ref) << '\n';
- stccon_drop(&con);
+ std::cerr << "\nstrsize\tmsecs\tcset<cstr>, size=" << BENCHMARK_SIZE/16 << "\n";
+ for (int strsize = 1; strsize <= MAX_STRING_SIZE; strsize += 2) {
+ StcSet set = StcSet_with_capacity(BENCHMARK_SIZE/16);
+ sum += benchmark(set, BENCHMARK_SIZE/16, strsize), ++n;
+ std::cout << '\t' << cstr_str(StcSet_front(&set)) << '\n';
+ StcSet_drop(&set);
}
std::cout << "Avg:\t" << sum/n << '\n';
- std::cerr << "sizeof std::string : " << sizeof(std::string) << std::endl
- << "sizeof STC string : " << sizeof(cstr) << std::endl;
+ std::cerr << "sizeof(std::string) : " << sizeof(std::string) << std::endl
+ << "sizeof(cstr) : " << sizeof(cstr) << std::endl;
return 0;
}
diff --git a/docs/cregex_api.md b/docs/cregex_api.md
index 66b97834..b715791d 100644
--- a/docs/cregex_api.md
+++ b/docs/cregex_api.md
@@ -25,13 +25,13 @@ enum {
cregex cregex_init(void);
cregex cregex_from(const char* pattern, int cflags);
- // return 1 = success, negative = error.
+ // return cre_success, or negative error code on failure.
int cregex_compile(cregex *self, const char* pattern, int cflags);
// num. of capture groups in regex. 0 if RE is invalid. First group is the full match.
int cregex_captures(const cregex* self);
- // return 1=match, 0=nomatch, -1=error. match array size: at least num groups in RE (1+).
+ // return cre_success, cre_nomatch, or cre_matcherror.
int cregex_find(const cregex* re, const char* input, csview match[], int mflags);
int cregex_find_sv(const cregex* re, csview input, csview match[]);
int cregex_find_pattern(const char* pattern, const char* input, csview match[], int cmflags);
@@ -48,20 +48,20 @@ void cregex_drop(cregex* self); // destroy
```
### Error codes
-- cre_success = 1
-- cre_nomatch = 0
-- cre_matcherror = -1
-- cre_outofmemory = -2
-- cre_unmatchedleftparenthesis = -3
-- cre_unmatchedrightparenthesis = -4
-- cre_toomanysubexpressions = -5
-- cre_toomanycharacterclasses = -6
-- cre_malformedcharacterclass = -7
-- cre_missingoperand = -8
-- cre_unknownoperator = -9
-- cre_operandstackoverflow = -10
-- cre_operatorstackoverflow = -11
-- cre_operatorstackunderflow = -12
+- cre_success = 0
+- cre_nomatch = -1
+- cre_matcherror = -2
+- cre_outofmemory = -3
+- cre_unmatchedleftparenthesis = -4
+- cre_unmatchedrightparenthesis = -5
+- cre_toomanysubexpressions = -6
+- cre_toomanycharacterclasses = -7
+- cre_malformedcharacterclass = -8
+- cre_missingoperand = -9
+- cre_unknownoperator = -10
+- cre_operandstackoverflow = -11
+- cre_operatorstackoverflow = -12
+- cre_operatorstackunderflow = -13
### Limits
- cre_MAXCLASSES
diff --git a/include/stc/alt/csmap.h b/include/stc/alt/csmap.h
index 4b90fb78..086071f5 100644
--- a/include/stc/alt/csmap.h
+++ b/include/stc/alt/csmap.h
@@ -473,6 +473,7 @@ STC_DEF _cx_self
_cx_memb(_clone)(_cx_self cx) {
return c_make(_cx_self){_cx_memb(_clone_r_)(cx.root), cx.size};
}
+#endif // !_i_no_clone
#if !defined _i_no_emplace
STC_DEF _cx_result
@@ -485,7 +486,6 @@ _cx_memb(_emplace)(_cx_self* self, i_keyraw rkey _i_MAP_ONLY(, i_valraw rmapped)
return res;
}
#endif // _i_no_emplace
-#endif // !_i_no_clone
static void
_cx_memb(_drop_r_)(_cx_node* tn) {
diff --git a/include/stc/cregex.h b/include/stc/cregex.h
index 6c817f1c..e3703ca8 100644
--- a/include/stc/cregex.h
+++ b/include/stc/cregex.h
@@ -35,21 +35,21 @@ THE SOFTWARE.
#include "forward.h" // csview
typedef enum {
- cre_success = 1,
- cre_nomatch = 0,
- cre_matcherror = -1,
- cre_outofmemory = -2,
- cre_unmatchedleftparenthesis = -3,
- cre_unmatchedrightparenthesis = -4,
- cre_toomanysubexpressions = -5,
- cre_toomanycharacterclasses = -6,
- cre_malformedcharacterclass = -7,
- cre_missingoperand = -8,
- cre_unknownoperator = -9,
- cre_operandstackoverflow = -10,
- cre_operatorstackoverflow = -11,
- cre_operatorstackunderflow = -12,
-} cregex_error_t;
+ cre_success = 0,
+ cre_nomatch = -1,
+ cre_matcherror = -2,
+ cre_outofmemory = -3,
+ cre_unmatchedleftparenthesis = -4,
+ cre_unmatchedrightparenthesis = -5,
+ cre_toomanysubexpressions = -6,
+ cre_toomanycharacterclasses = -7,
+ cre_malformedcharacterclass = -8,
+ cre_missingoperand = -9,
+ cre_unknownoperator = -10,
+ cre_operandstackoverflow = -11,
+ cre_operatorstackoverflow = -12,
+ cre_operatorstackunderflow = -13,
+} cregex_result;
enum {
/* compile-flags */
@@ -87,7 +87,7 @@ cregex cregex_init(void) {
return re;
}
-/* return 1 on success, or negative error code on failure. */
+/* return cre_success, or negative error code on failure. */
int cregex_compile(cregex *self, const char* pattern, int cflags);
static inline
@@ -100,7 +100,7 @@ cregex cregex_from(const char* pattern, int cflags) {
/* number of capture groups in a regex pattern, 0 if regex is invalid */
int cregex_captures(const cregex* self);
-/* return 1 on match, 0 on nomatch, and -1 on failure. */
+/* return cre_success, cre_nomatch or cre_matcherror. */
int cregex_find(const cregex* re, const char* input,
csview match[], int mflags);
static inline
diff --git a/src/cregex.c b/src/cregex.c
index 17284dfe..849601da 100644
--- a/src/cregex.c
+++ b/src/cregex.c
@@ -344,7 +344,7 @@ static void evaluntil(Parser *par, Token type);
static int bldcclass(Parser *par);
static void
-rcerror(Parser *par, cregex_error_t err)
+rcerror(Parser *par, cregex_result err)
{
par->error = err;
longjmp(par->regkaboom, 1);
@@ -736,13 +736,14 @@ bldcclass(Parser *par)
int inv = par->exprp[1] == '^', off = 1 + inv;
for (unsigned i = 0; i < (sizeof cls/sizeof *cls); ++i)
if (!strncmp(par->exprp + off, cls[i].c, cls[i].n)) {
- if (par->rune_type == IRUNE && (cls[i].r == ASC_lo || cls[i].r == ASC_up))
- rune = ASC_al + inv;
- else
- rune = cls[i].r + inv;
+ rune = cls[i].r;
par->exprp += off + cls[i].n;
break;
}
+ if (par->rune_type == IRUNE && (rune == ASC_lo || rune == ASC_up))
+ rune = ASC_al;
+ if (inv && rune != '[')
+ rune += 1;
}
}
*ep++ = rune;
@@ -1192,8 +1193,7 @@ int
cregex_compile(cregex *self, const char* pattern, int cflags) {
Parser par;
self->prog = regcomp1(self->prog, &par, pattern, cflags);
- self->error = par.error;
- return self->prog ? cre_success : par.error;
+ return self->error = par.error;
}
int