summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorYukihiro "Matz" Matsumoto <[email protected]>2018-11-15 04:44:27 +0900
committerYukihiro "Matz" Matsumoto <[email protected]>2018-11-15 04:51:27 +0900
commit485955eccc4993df89bdd227cdf9b0e2f6c95c9c (patch)
tree529bdd1419b48ba2c0514cfdea897620083865a1
parent4550f4e38153c623537e6df53a4fe7c1c063adc0 (diff)
downloadmruby-485955eccc4993df89bdd227cdf9b0e2f6c95c9c.tar.gz
mruby-485955eccc4993df89bdd227cdf9b0e2f6c95c9c.zip
`String#{squeeze,delete,count}` to use bitmap for matching; ref #4163
-rw-r--r--mrbgems/mruby-string-ext/src/string.c79
1 files changed, 67 insertions, 12 deletions
diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c
index cfc194906..6d661c352 100644
--- a/mrbgems/mruby-string-ext/src/string.c
+++ b/mrbgems/mruby-string-ext/src/string.c
@@ -418,6 +418,59 @@ tr_get_character(const struct tr_pattern *pat, const char *pat_str, mrb_int n_th
return -1;
}
+static inline void
+tr_bitmap_set(uint8_t bitmap[32], uint8_t ch)
+{
+ uint8_t idx1 = ch / 8;
+ uint8_t idx2 = ch % 8;
+ bitmap[idx1] |= (1<<idx2);
+}
+
+static inline mrb_bool
+tr_bitmap_detect(uint8_t bitmap[32], uint8_t ch)
+{
+ uint8_t idx1 = ch / 8;
+ uint8_t idx2 = ch % 8;
+ if (bitmap[idx1] & (1<<idx2))
+ return TRUE;
+ return FALSE;
+}
+
+/* compile patter to bitmap */
+static void
+tr_compile_pattern(const struct tr_pattern *pat, mrb_value pstr, uint8_t bitmap[32])
+{
+ const char *pattern = RSTRING_PTR(pstr);
+ mrb_int flag_reverse = pat ? pat->flag_reverse : 0;
+ int i;
+
+ for (i=0; i<32; i++) {
+ bitmap[i] = 0;
+ }
+ while (pat != NULL) {
+ if (pat->type == TR_IN_ORDER) {
+ for (i = 0; i < pat->n; i++) {
+ tr_bitmap_set(bitmap, pattern[pat->val.start_pos + i]);
+ }
+ }
+ else if (pat->type == TR_RANGE) {
+ for (i = pat->val.ch[0]; i < pat->val.ch[1]; i++) {
+ tr_bitmap_set(bitmap, i);
+ }
+ }
+ else {
+ mrb_assert(pat->type == TR_UNINITIALIZED);
+ }
+ pat = pat->next;
+ }
+
+ if (flag_reverse) {
+ for (i=0; i<32; i++) {
+ bitmap[i] ^= 0xff;
+ }
+ }
+}
+
static mrb_bool
str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squeeze)
{
@@ -593,20 +646,21 @@ str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat)
mrb_int len;
mrb_bool flag_changed = FALSE;
mrb_int lastch = -1;
+ uint8_t bitmap[32];
mrb_str_modify(mrb, mrb_str_ptr(str));
if (!mrb_nil_p(v_pat)) {
pat = tr_parse_pattern(mrb, &pat_storage, v_pat, TRUE);
+ tr_compile_pattern(pat, v_pat, bitmap);
+ tr_free_pattern(mrb, pat);
}
s = RSTRING_PTR(str);
len = RSTRING_LEN(str);
if (pat) {
for (i=j=0; i<len; i++,j++) {
- mrb_int n = tr_find_character(pat, RSTRING_PTR(v_pat), s[i]);
-
if (i>j) s[j] = s[i];
- if (n >= 0 && s[i] == lastch) {
+ if (tr_bitmap_detect(bitmap, s[i]) && s[i] == lastch) {
flag_changed = TRUE;
j--;
}
@@ -623,7 +677,6 @@ str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat)
lastch = s[i];
}
}
- tr_free_pattern(mrb, pat);
if (flag_changed) {
RSTR_SET_LEN(RSTRING(str), j);
@@ -685,22 +738,23 @@ str_delete(mrb_state *mrb, mrb_value str, mrb_value v_pat)
char *s;
mrb_int len;
mrb_bool flag_changed = FALSE;
+ uint8_t bitmap[32];
mrb_str_modify(mrb, mrb_str_ptr(str));
tr_parse_pattern(mrb, &pat, v_pat, TRUE);
+ tr_compile_pattern(&pat, v_pat, bitmap);
+ tr_free_pattern(mrb, &pat);
+
s = RSTRING_PTR(str);
len = RSTRING_LEN(str);
for (i=j=0; i<len; i++,j++) {
- mrb_int n = tr_find_character(&pat, RSTRING_PTR(v_pat), s[i]);
-
if (i>j) s[j] = s[i];
- if (n >= 0) {
+ if (tr_bitmap_detect(bitmap, s[i])) {
flag_changed = TRUE;
j--;
}
}
- tr_free_pattern(mrb, &pat);
if (flag_changed) {
RSTR_SET_LEN(RSTRING(str), j);
RSTRING_PTR(str)[j] = 0;
@@ -752,17 +806,18 @@ mrb_str_count(mrb_state *mrb, mrb_value str)
mrb_int len;
mrb_int count = 0;
struct tr_pattern pat = STATIC_TR_PATTERN;
+ uint8_t bitmap[32];
mrb_get_args(mrb, "S", &v_pat);
tr_parse_pattern(mrb, &pat, v_pat, TRUE);
+ tr_compile_pattern(&pat, v_pat, bitmap);
+ tr_free_pattern(mrb, &pat);
+
s = RSTRING_PTR(str);
len = RSTRING_LEN(str);
for (i = 0; i < len; i++) {
- mrb_int n = tr_find_character(&pat, RSTRING_PTR(v_pat), s[i]);
-
- if (n >= 0) count++;
+ if (tr_bitmap_detect(bitmap, s[i])) count++;
}
- tr_free_pattern(mrb, &pat);
return mrb_fixnum_value(count);
}