From 3622f2c4b51d4b1f9eb0a2470ea7cd2ee24fe3a5 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Tue, 31 Aug 2021 13:00:15 +0900 Subject: string.c: implement `__sub_replace()` in C. To reduce number of string allocation. --- mrblib/string.rb | 37 +++++-------------------------------- src/string.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 32 deletions(-) diff --git a/mrblib/string.rb b/mrblib/string.rb index 7c90303ae..2b3178688 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -42,32 +42,6 @@ class String self end - # private method for gsub/sub - def __sub_replace(rep, pre, m, post) - s = "" - i = 0 - while j = rep.index("\\", i) - break if j == rep.length-1 - t = case rep[j+1] - when "\\" - "\\" - when "`" - pre - when "&", "0" - m - when "'" - post - when "1", "2", "3", "4", "5", "6", "7", "8", "9" - "" - else - rep[j, 2] - end - s += rep[i, j-i] + t - i = j + 2 - end - s + rep[i, rep.length-i] - end - ## # Replace all matches of +pattern+ with +replacement+. # Call block (if given) for each match and replace @@ -92,7 +66,7 @@ class String result << if block block.call(pattern).to_s else - __sub_replace(replace, self[0, found], pattern, self[offset..-1] || "") + self.__sub_replace(replace, pattern, found) end if plen == 0 result << self[offset, 1] @@ -145,17 +119,16 @@ class String block = nil end result = [] - this = dup found = index(pattern) - return this unless found - result << this[0, found] + return self.dup unless found + result << self[0, found] offset = found + pattern.length result << if block block.call(pattern).to_s else - __sub_replace(replace, this[0, found], pattern, this[offset..-1] || "") + self.__sub_replace(replace, pattern, found) end - result << this[offset..-1] if offset < length + result << self[offset..-1] if offset < length result.join end diff --git a/src/string.c b/src/string.c index b9cc2485f..21be97db6 100644 --- a/src/string.c +++ b/src/string.c @@ -2847,6 +2847,51 @@ mrb_str_byteslice(mrb_state *mrb, mrb_value str) } } +static mrb_value +sub_replace(mrb_state *mrb, mrb_value self) +{ + char *p, *match; + mrb_int plen, mlen; + mrb_int found, offset; + mrb_value result; + + mrb_get_args(mrb, "ssi", &p, &plen, &match, &mlen, &found); + result = mrb_str_new(mrb, 0, 0); + for (mrb_int i=0; i offset) { + mrb_str_cat(mrb, result, RSTRING_PTR(self)+offset, RSTRING_LEN(self)-offset); + } + break; + case '1': case '2': case '3': + case '4': case '5': case '6': + case '7': case '8': case '9': + /* ignore sub-group match (no Regexp supported) */ + break; + default: + mrb_str_cat(mrb, result, &p[i-1], 2); + break; + } + } + return result; +} + /* ---------------------------*/ void mrb_init_string(mrb_state *mrb) @@ -2908,4 +2953,6 @@ mrb_init_string(mrb_state *mrb) mrb_define_method(mrb, s, "getbyte", mrb_str_getbyte, MRB_ARGS_REQ(1)); mrb_define_method(mrb, s, "setbyte", mrb_str_setbyte, MRB_ARGS_REQ(2)); mrb_define_method(mrb, s, "byteslice", mrb_str_byteslice, MRB_ARGS_ARG(1,1)); + + mrb_define_method(mrb, s, "__sub_replace", sub_replace, MRB_ARGS_REQ(3)); /* internal */ } -- cgit v1.2.3