From 292bffb32e7434b4725f39924adf0e9f8af5d1cf Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Mon, 8 Apr 2019 20:27:28 +0900 Subject: Avoid infinite loop when no `Regexp` class is available; fix #4363 --- mrblib/string.rb | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'mrblib/string.rb') diff --git a/mrblib/string.rb b/mrblib/string.rb index 64e85c5b6..62c925885 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -242,25 +242,27 @@ class String end end + def _regexp(re, mid) + if String === re + if Object.const_defined?(:Regexp) + return Regexp.new(re) + else + raise NotImplementedError, "String##{mid} needs Regexp class" + end + end + re + end + ## # ISO 15.2.10.5.3 def =~(re) - re =~ self + _regexp(re, :=~) =~ self end ## # ISO 15.2.10.5.27 def match(re, &block) - if String === re - if Object.const_defined?(:Regexp) - r = Regexp.new(re) - r.match(self, &block) - else - raise NotImplementedError, "String#match needs Regexp class" - end - else - re.match(self, &block) - end + _regexp(re, :match).match(self, &block) end end -- cgit v1.2.3 From 916045921e629b59264a6f1e00e2347e64ef85cb Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Thu, 18 Apr 2019 19:58:05 +0900 Subject: Remove duplicated `include Comparable` in `mrblib/string.rb` --- mrblib/string.rb | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'mrblib/string.rb') diff --git a/mrblib/string.rb b/mrblib/string.rb index 62c925885..9ad8e8e73 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -3,7 +3,9 @@ # # ISO 15.2.10 class String + # ISO 15.2.10.3 include Comparable + ## # Calls the given block for each line # and pass the respective line. @@ -265,12 +267,3 @@ class String _regexp(re, :match).match(self, &block) end end - -## -# String is comparable -# -# ISO 15.2.10.3 -module Comparable; end -class String - include Comparable -end -- cgit v1.2.3 From 4a8b88f7757f71d7d88b89764b533dd5ba59fd44 Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Fri, 19 Apr 2019 20:14:23 +0900 Subject: Add type check (conversion) in `String#[]=` Before this patch: 'a'[0] = 1 #=> 1 'a'[:a] = '1' #=> ArgumentError 'a'[:a, 0] = '1' #=> ArgumentError 'a'[0, :a] = '1' #=> ArgumentError 'a'[0, 1] = 1 #=> 1 After this patch / Ruby: 'a'[0] = 1 #=> TypeError 'a'[:a] = '1' #=> TypeError 'a'[:a, 0] = '1' #=> TypeError 'a'[0, :a] = '1' #=> TypeError 'a'[0, 1] = 1 #=> TypeError --- mrblib/string.rb | 11 ++++++----- test/t/string.rb | 7 +++++++ 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'mrblib/string.rb') diff --git a/mrblib/string.rb b/mrblib/string.rb index 9ad8e8e73..e9eb2be1d 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -197,12 +197,12 @@ class String def []=(*args) anum = args.size if anum == 2 - pos, value = args + pos, value = args[0], args[1].__to_str case pos when String posnum = self.index(pos) if posnum - b = self[0, posnum.to_i] + b = self[0, posnum] a = self[(posnum + pos.length)..-1] self.replace([b, value, a].join('')) else @@ -217,17 +217,18 @@ class String end return self[head, tail-head]=value else + pos = pos.__to_int pos += self.length if pos < 0 if pos < 0 || pos > self.length raise IndexError, "index #{args[0]} out of string" end - b = self[0, pos.to_i] + b = self[0, pos] a = self[pos + 1..-1] self.replace([b, value, a].join('')) end return value elsif anum == 3 - pos, len, value = args + pos, len, value = args[0].__to_int, args[1].__to_int, args[2].__to_str pos += self.length if pos < 0 if pos < 0 || pos > self.length raise IndexError, "index #{args[0]} out of string" @@ -235,7 +236,7 @@ class String if len < 0 raise IndexError, "negative length #{len}" end - b = self[0, pos.to_i] + b = self[0, pos] a = self[pos + len..-1] self.replace([b, value, a].join('')) return value diff --git a/test/t/string.rb b/test/t/string.rb index cf3702cbe..404cf03e1 100644 --- a/test/t/string.rb +++ b/test/t/string.rb @@ -161,6 +161,9 @@ assert('String#[]=') do assert_equal 'aXc', e end + assert_raise(TypeError) { 'a'[0] = 1 } + assert_raise(TypeError) { 'a'[:a] = '1' } + # length of args is 2 a1 = 'abc' assert_raise(IndexError) do @@ -197,6 +200,10 @@ assert('String#[]=') do assert_raise(IndexError) do b3['XX'] = 'Y' end + + assert_raise(TypeError) { 'a'[:a, 0] = '1' } + assert_raise(TypeError) { 'a'[0, :a] = '1' } + assert_raise(TypeError) { 'a'[0, 1] = 1 } end assert('String#capitalize', '15.2.10.5.7') do -- cgit v1.2.3 From cdb458ed4e07698ecb028bfe397fa273ed454e13 Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Sun, 21 Apr 2019 20:34:39 +0900 Subject: Commented out `String#scan` because it is not implemented yet --- mrbgems/mruby-enumerator/mrblib/enumerator.rb | 7 ++++--- mrblib/string.rb | 21 +++++++++------------ test/t/string.rb | 4 +++- 3 files changed, 16 insertions(+), 16 deletions(-) (limited to 'mrblib/string.rb') diff --git a/mrbgems/mruby-enumerator/mrblib/enumerator.rb b/mrbgems/mruby-enumerator/mrblib/enumerator.rb index 457687268..89472ef01 100644 --- a/mrbgems/mruby-enumerator/mrblib/enumerator.rb +++ b/mrbgems/mruby-enumerator/mrblib/enumerator.rb @@ -244,9 +244,10 @@ class Enumerator # # === Examples # - # "Hello, world!".scan(/\w+/) #=> ["Hello", "world"] - # "Hello, world!".to_enum(:scan, /\w+/).to_a #=> ["Hello", "world"] - # "Hello, world!".to_enum(:scan).each(/\w+/).to_a #=> ["Hello", "world"] + # Array.new(3) #=> [nil, nil, nil] + # Array.new(3) { |i| i } #=> [0, 1, 2] + # Array.to_enum(:new, 3).to_a #=> [0, 1, 2] + # Array.to_enum(:new).each(3).to_a #=> [0, 1, 2] # # obj = Object.new # diff --git a/mrblib/string.rb b/mrblib/string.rb index e9eb2be1d..c92a9e7be 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -105,18 +105,15 @@ class String self.replace(str) end - ## - # Calls the given block for each match of +pattern+ - # If no block is given return an array with all - # matches of +pattern+. - # - # ISO 15.2.10.5.32 - def scan(reg, &block) - ### *** TODO *** ### - unless Object.const_defined?(:Regexp) - raise NotImplementedError, "scan not available (yet)" - end - end +# ## +# # Calls the given block for each match of +pattern+ +# # If no block is given return an array with all +# # matches of +pattern+. +# # +# # ISO 15.2.10.5.32 +# def scan(pattern, &block) +# # TODO: String#scan is not implemented yet +# end ## # Replace only the first match of +pattern+ with diff --git a/test/t/string.rb b/test/t/string.rb index 404cf03e1..e563db55a 100644 --- a/test/t/string.rb +++ b/test/t/string.rb @@ -509,7 +509,9 @@ assert('String#rindex(UTF-8)', '15.2.10.5.31') do assert_equal nil, str.index("さ") end if UTF8STRING -# 'String#scan', '15.2.10.5.32' will be tested in mrbgems. +# assert('String#scan', '15.2.10.5.32') do +# # Not implemented yet +# end assert('String#size', '15.2.10.5.33') do assert_equal 3, 'abc'.size -- cgit v1.2.3 From 270131253f62d806ea480ef4793e0b39cd068ee4 Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Sat, 27 Apr 2019 12:50:02 +0900 Subject: Remove duplicated `String#each_char` --- mrbgems/mruby-io/test/io.rb | 4 ++-- mrbgems/mruby-method/test/method.rb | 2 +- mrbgems/mruby-string-ext/mrblib/string.rb | 10 +++++++--- mrbgems/mruby-string-ext/test/string.rb | 12 ++++++------ mrblib/string.rb | 12 ------------ 5 files changed, 16 insertions(+), 24 deletions(-) (limited to 'mrblib/string.rb') diff --git a/mrbgems/mruby-io/test/io.rb b/mrbgems/mruby-io/test/io.rb index 44eaca6be..2b3f9cf13 100644 --- a/mrbgems/mruby-io/test/io.rb +++ b/mrbgems/mruby-io/test/io.rb @@ -84,7 +84,7 @@ end assert('IO#getc', '15.2.20.5.8') do io = IO.new(IO.sysopen($mrbtest_io_rfname)) - $mrbtest_io_msg.each_char { |ch| + $mrbtest_io_msg.split("").each { |ch| assert_equal ch, io.getc } assert_equal nil, io.getc @@ -127,7 +127,7 @@ end assert('IO#readchar', '15.2.20.5.15') do # almost same as IO#getc IO.open(IO.sysopen($mrbtest_io_rfname)) do |io| - $mrbtest_io_msg.each_char { |ch| + $mrbtest_io_msg.split("").each { |ch| assert_equal ch, io.readchar } assert_raise(EOFError) do diff --git a/mrbgems/mruby-method/test/method.rb b/mrbgems/mruby-method/test/method.rb index dfddde9cc..0b67d3e61 100644 --- a/mrbgems/mruby-method/test/method.rb +++ b/mrbgems/mruby-method/test/method.rb @@ -21,7 +21,7 @@ class Interpreter } def interpret(string) @ret = "" - string.each_char {|b| Dispatcher[b].bind(self).call } + string.split("").each {|b| Dispatcher[b].bind(self).call } end end diff --git a/mrbgems/mruby-string-ext/mrblib/string.rb b/mrbgems/mruby-string-ext/mrblib/string.rb index 311803ea2..fdaf2f960 100644 --- a/mrbgems/mruby-string-ext/mrblib/string.rb +++ b/mrbgems/mruby-string-ext/mrblib/string.rb @@ -310,11 +310,15 @@ class String end end + ## + # Call the given block for each character of + # +self+. def each_char(&block) return to_enum :each_char unless block - - split('').each do |i| - block.call(i) + pos = 0 + while pos < self.size + block.call(self[pos]) + pos += 1 end self end diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index 44ca1fde2..02777e594 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -657,19 +657,19 @@ assert('String#chars(UTF-8)') do end if UTF8STRING assert('String#each_char') do - s = "" + chars = [] "hello!".each_char do |x| - s += x + chars << x end - assert_equal "hello!", s + assert_equal ["h", "e", "l", "l", "o", "!"], chars end assert('String#each_char(UTF-8)') do - s = "" + chars = [] "こんにちは世界!".each_char do |x| - s += x + chars << x end - assert_equal "こんにちは世界!", s + assert_equal ["こ", "ん", "に", "ち", "は", "世", "界", "!"], chars end if UTF8STRING assert('String#codepoints') do diff --git a/mrblib/string.rb b/mrblib/string.rb index c92a9e7be..506f23c83 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -164,18 +164,6 @@ class String self.replace(str) end - ## - # Call the given block for each character of - # +self+. - def each_char(&block) - pos = 0 - while pos < self.size - block.call(self[pos]) - pos += 1 - end - self - end - ## # Call the given block for each byte of +self+. def each_byte(&block) -- cgit v1.2.3 From fd37bc53deb2d52fe3134838eab002dfb9ac35ab Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Wed, 15 May 2019 09:55:21 +0900 Subject: Remove `String#=~` and `String#match` that requires `Regexp`. --- mrblib/string.rb | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) (limited to 'mrblib/string.rb') diff --git a/mrblib/string.rb b/mrblib/string.rb index 506f23c83..42d594055 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -230,26 +230,16 @@ class String end end - def _regexp(re, mid) - if String === re - if Object.const_defined?(:Regexp) - return Regexp.new(re) - else - raise NotImplementedError, "String##{mid} needs Regexp class" - end - end - re - end - + # those two methods requires Regexp that is optional in mruby ## # ISO 15.2.10.5.3 - def =~(re) - _regexp(re, :=~) =~ self - end + #def =~(re) + # re =~ self + #end ## # ISO 15.2.10.5.27 - def match(re, &block) - _regexp(re, :match).match(self, &block) - end + #def match(re, &block) + # re.match(self, &block) + #end end -- cgit v1.2.3 From 780342eddb880a1c0a457311b005398ce36d0c9f Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Wed, 15 May 2019 14:59:48 +0900 Subject: Add Enumerator support to `String#each_byte`. `String#each_byte` is not defined in ISO Ruby but it is implemented in the core mruby because it's useful. --- mrblib/string.rb | 1 + 1 file changed, 1 insertion(+) (limited to 'mrblib/string.rb') diff --git a/mrblib/string.rb b/mrblib/string.rb index 42d594055..b0fe4033e 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -167,6 +167,7 @@ class String ## # Call the given block for each byte of +self+. def each_byte(&block) + return to_enum(:each_byte, &block) unless block bytes = self.bytes pos = 0 while pos < bytes.size -- cgit v1.2.3 From 0f516bbe1dd61759faf2609970bd5cd855931221 Mon Sep 17 00:00:00 2001 From: Ryan Lopopolo Date: Sat, 22 Jun 2019 12:07:21 +0100 Subject: Add paragraph mode to String#each_line in mrblib mruby/mruby#4511 demonstrated an infinite loop in `String#each_line` when given an empty string separator. In MRI, an empty separator places String#each_line in paragraph mode, where the String is separated on successive runs of newlines. In paragraph mode, the String `"abc\n\n\ndef\nxyz"` is split into `["abc\n\n\n", "def\nxyz"]`. This commit makes the String#each_line implementation as close to ruby/spec compliant as possible given the limitations of mruby core. With this patch, the following specs fail for `String#each_line`: - uses `$/` as the separator when none is given (can be fixed by aliasing and redefining the method to use $/ as the default value of separator in mruby-io) - when no block is given returned Enumerator size should return nil (`Enumerator#size` is not supported on mruby) - tries to convert the separator to a string using to_str (`String#to_str` is not implemented on mruby) This patch has similar memory consumption compared to the prior implementation and is takes 4x the time the prior implementation takes to execute: ```console /usr/bin/time -l ./bin/mruby -e '("aaa\n\nbbbbb\n\n\n\n\ncccc" * 100000).each_line("\n") { }'; ``` --- mrblib/string.rb | 67 ++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 11 deletions(-) (limited to 'mrblib/string.rb') diff --git a/mrblib/string.rb b/mrblib/string.rb index b0fe4033e..adf19d00c 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -11,18 +11,63 @@ class String # and pass the respective line. # # ISO 15.2.10.5.15 - def each_line(rs = "\n", &block) - return to_enum(:each_line, rs, &block) unless block - return block.call(self) if rs.nil? - rs.__to_str - offset = 0 - rs_len = rs.length - this = dup - while pos = this.index(rs, offset) - block.call(this[offset, pos + rs_len - offset]) - offset = pos + rs_len + def each_line(separator = "\n", getline_args = nil) + return to_enum(:each_line, separator, getline_args) unless block_given? + + if separator.nil? + yield self + return self + end + raise TypeError unless separator.is_a?(String) + + start = 0 + pointer = 0 + string = dup + + if separator.empty? + matched_newlines = 0 + while pointer < string.length + c = string[pointer] + if c == "\n" + matched_newlines += 1 + elsif matched_newlines > 1 && self.class == String + yield string[start...pointer] + matched_newlines = 0 + start = pointer + elsif matched_newlines > 1 + yield self.class.new(string[start...pointer]) + matched_newlines = 0 + start = pointer + else + matched_newlines = 0 + end + pointer += 1 + end + else + matched_length = 0 + separator_length = separator.length + while pointer < string.length + c = string[pointer] + pointer += 1 + matched_length += 1 if c == separator[matched_length] + next unless matched_length == separator_length + + if self.class == String + yield string[start...pointer] + else + yield self.class.new(string[start...pointer]) + end + matched_length = 0 + start = pointer + end + end + return self if start == string.length + + if self.class == String + yield string[start..-1] + else + yield self.class.new(string[start..-1]) end - block.call(this[offset, this.size - offset]) if this.size > offset self end -- cgit v1.2.3 From a932b8c96a4312753993ce0b98d2e9eedec17de0 Mon Sep 17 00:00:00 2001 From: Ryan Lopopolo Date: Sat, 22 Jun 2019 12:55:02 +0100 Subject: Speed up base case by 2x Make non-paragraph mode twice as fast. Performance is within a factor of 2 of the original implementation. --- mrblib/string.rb | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'mrblib/string.rb') diff --git a/mrblib/string.rb b/mrblib/string.rb index adf19d00c..0bac52a20 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -44,20 +44,13 @@ class String pointer += 1 end else - matched_length = 0 - separator_length = separator.length - while pointer < string.length - c = string[pointer] - pointer += 1 - matched_length += 1 if c == separator[matched_length] - next unless matched_length == separator_length - + while (pointer = string.index(separator, start)) + pointer += separator.length if self.class == String yield string[start...pointer] else yield self.class.new(string[start...pointer]) end - matched_length = 0 start = pointer end end -- cgit v1.2.3 From 8ea45b862aa9ff358b3642ac14b03304fcd6846e Mon Sep 17 00:00:00 2001 From: Ryan Lopopolo Date: Sun, 23 Jun 2019 03:29:47 +0100 Subject: Optimize String#each_line --- mrblib/string.rb | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'mrblib/string.rb') diff --git a/mrblib/string.rb b/mrblib/string.rb index 0bac52a20..60129a845 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -11,7 +11,7 @@ class String # and pass the respective line. # # ISO 15.2.10.5.15 - def each_line(separator = "\n", getline_args = nil) + def each_line(separator = "\n") return to_enum(:each_line, separator, getline_args) unless block_given? if separator.nil? @@ -20,22 +20,25 @@ class String end raise TypeError unless separator.is_a?(String) - start = 0 pointer = 0 + start = 0 string = dup + self_len = length + sep_len = separator.length + should_yield_subclass_instances = self.class != String if separator.empty? matched_newlines = 0 - while pointer < string.length + while pointer < self_len c = string[pointer] if c == "\n" matched_newlines += 1 - elsif matched_newlines > 1 && self.class == String - yield string[start...pointer] + elsif matched_newlines > 1 && should_yield_subclass_instances + yield self.class.new(string[start, pointer - start]) matched_newlines = 0 start = pointer elsif matched_newlines > 1 - yield self.class.new(string[start...pointer]) + yield string[start, pointer - start] matched_newlines = 0 start = pointer else @@ -45,21 +48,21 @@ class String end else while (pointer = string.index(separator, start)) - pointer += separator.length - if self.class == String - yield string[start...pointer] + pointer += sep_len + if should_yield_subclass_instances + yield self.class.new(string[start, pointer - start]) else - yield self.class.new(string[start...pointer]) + yield string[start, pointer - start] end start = pointer end end - return self if start == string.length + return self if start == self_len - if self.class == String - yield string[start..-1] + if should_yield_subclass_instances + yield self.class.new(string[start, self_len - start]) else - yield self.class.new(string[start..-1]) + yield string[start, self_len - start] end self end -- cgit v1.2.3 From 6b92acf361c8f1979dcda95c41324cccc8d767a9 Mon Sep 17 00:00:00 2001 From: Ryan Lopopolo Date: Sun, 23 Jun 2019 03:40:39 +0100 Subject: Use explicit block parameter --- mrblib/string.rb | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'mrblib/string.rb') diff --git a/mrblib/string.rb b/mrblib/string.rb index 60129a845..f1c7f3ba4 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -11,11 +11,11 @@ class String # and pass the respective line. # # ISO 15.2.10.5.15 - def each_line(separator = "\n") - return to_enum(:each_line, separator, getline_args) unless block_given? + def each_line(separator = "\n", &block) + return to_enum(:each_line, separator) unless block if separator.nil? - yield self + block.call(self) return self end raise TypeError unless separator.is_a?(String) @@ -34,11 +34,11 @@ class String if c == "\n" matched_newlines += 1 elsif matched_newlines > 1 && should_yield_subclass_instances - yield self.class.new(string[start, pointer - start]) + block.call(self.class.new(string[start, pointer - start])) matched_newlines = 0 start = pointer elsif matched_newlines > 1 - yield string[start, pointer - start] + block.call(string[start, pointer - start]) matched_newlines = 0 start = pointer else @@ -50,9 +50,9 @@ class String while (pointer = string.index(separator, start)) pointer += sep_len if should_yield_subclass_instances - yield self.class.new(string[start, pointer - start]) + block.call(self.class.new(string[start, pointer - start])) else - yield string[start, pointer - start] + block.call(string[start, pointer - start]) end start = pointer end @@ -60,9 +60,9 @@ class String return self if start == self_len if should_yield_subclass_instances - yield self.class.new(string[start, self_len - start]) + block.call(self.class.new(string[start, self_len - start])) else - yield string[start, self_len - start] + block.call(string[start, self_len - start]) end self end -- cgit v1.2.3 From 46c972f746f37a26104b1e9f317ce5a02daa9f40 Mon Sep 17 00:00:00 2001 From: Ryan Lopopolo Date: Tue, 25 Jun 2019 00:01:51 +0200 Subject: Unify loops to minimize bytecode size --- mrblib/string.rb | 42 +++++++++++++----------------------------- 1 file changed, 13 insertions(+), 29 deletions(-) (limited to 'mrblib/string.rb') diff --git a/mrblib/string.rb b/mrblib/string.rb index f1c7f3ba4..d72002209 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -20,42 +20,26 @@ class String end raise TypeError unless separator.is_a?(String) - pointer = 0 + paragraph_mode = false + if separator.empty? + paragraph_mode = true + separator = "\n\n" + end start = 0 string = dup self_len = length sep_len = separator.length should_yield_subclass_instances = self.class != String - if separator.empty? - matched_newlines = 0 - while pointer < self_len - c = string[pointer] - if c == "\n" - matched_newlines += 1 - elsif matched_newlines > 1 && should_yield_subclass_instances - block.call(self.class.new(string[start, pointer - start])) - matched_newlines = 0 - start = pointer - elsif matched_newlines > 1 - block.call(string[start, pointer - start]) - matched_newlines = 0 - start = pointer - else - matched_newlines = 0 - end - pointer += 1 - end - else - while (pointer = string.index(separator, start)) - pointer += sep_len - if should_yield_subclass_instances - block.call(self.class.new(string[start, pointer - start])) - else - block.call(string[start, pointer - start]) - end - start = pointer + while (pointer = string.index(separator, start)) + pointer += sep_len + pointer += 1 while paragraph_mode && string[pointer] == "\n" + if should_yield_subclass_instances + block.call(self.class.new(string[start, pointer - start])) + else + block.call(string[start, pointer - start]) end + start = pointer end return self if start == self_len -- cgit v1.2.3 From 0d452073f46fc46496200db610ce785e514cdb65 Mon Sep 17 00:00:00 2001 From: dearblue Date: Tue, 21 May 2019 22:02:11 +0900 Subject: Replace `String#[]=` method by C implements The purpose is to eliminate string objects that are temporarily created during processing. --- mrblib/string.rb | 54 ----------------- src/string.c | 176 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 173 insertions(+), 57 deletions(-) (limited to 'mrblib/string.rb') diff --git a/mrblib/string.rb b/mrblib/string.rb index b0fe4033e..c26cdb1e2 100644 --- a/mrblib/string.rb +++ b/mrblib/string.rb @@ -177,60 +177,6 @@ class String self end - ## - # Modify +self+ by replacing the content of +self+. - # The portion of the string affected is determined using the same criteria as +String#[]+. - def []=(*args) - anum = args.size - if anum == 2 - pos, value = args[0], args[1].__to_str - case pos - when String - posnum = self.index(pos) - if posnum - b = self[0, posnum] - a = self[(posnum + pos.length)..-1] - self.replace([b, value, a].join('')) - else - raise IndexError, "string not matched" - end - when Range - head = pos.begin - tail = pos.end - tail += self.length if tail < 0 - unless pos.exclude_end? - tail += 1 - end - return self[head, tail-head]=value - else - pos = pos.__to_int - pos += self.length if pos < 0 - if pos < 0 || pos > self.length - raise IndexError, "index #{args[0]} out of string" - end - b = self[0, pos] - a = self[pos + 1..-1] - self.replace([b, value, a].join('')) - end - return value - elsif anum == 3 - pos, len, value = args[0].__to_int, args[1].__to_int, args[2].__to_str - pos += self.length if pos < 0 - if pos < 0 || pos > self.length - raise IndexError, "index #{args[0]} out of string" - end - if len < 0 - raise IndexError, "negative length #{len}" - end - b = self[0, pos] - a = self[pos + len..-1] - self.replace([b, value, a].join('')) - return value - else - raise ArgumentError, "wrong number of arguments (#{anum} for 2..3)" - end - end - # those two methods requires Regexp that is optional in mruby ## # ISO 15.2.10.5.3 diff --git a/src/string.c b/src/string.c index c8a9e61ec..43bf8a841 100644 --- a/src/string.c +++ b/src/string.c @@ -427,13 +427,18 @@ mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) } return mrb_obj_value(s); } + +static void +str_range_to_bytes(mrb_value str, mrb_int *pos, mrb_int *len) +{ + *pos = chars2bytes(str, 0, *pos); + *len = chars2bytes(str, *pos, *len); +} #ifdef MRB_UTF8_STRING static inline mrb_value str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) { - beg = chars2bytes(str, 0, beg); - len = chars2bytes(str, beg, len); - + str_range_to_bytes(str, &beg, &len); return mrb_str_byte_subseq(mrb, str, beg, len); } #else @@ -1010,6 +1015,68 @@ mrb_str_dup(mrb_state *mrb, mrb_value str) return str_replace(mrb, dup, s); } +enum str_convert_range { + /* `beg` and `len` are byte unit in `0 ... str.bytesize` */ + STR_BYTE_RANGE_CORRECTED = 1, + + /* `beg` and `len` are char unit in any range */ + STR_CHAR_RANGE = 2, + + /* `beg` and `len` are char unit in `0 ... str.size` */ + STR_CHAR_RANGE_CORRECTED = 3, + + /* `beg` is out of range */ + STR_OUT_OF_RANGE = -1 +}; + +static enum str_convert_range +str_convert_range(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_int *beg, mrb_int *len) +{ + if (!mrb_undef_p(alen)) { + *beg = mrb_int(mrb, indx); + *len = mrb_int(mrb, alen); + return STR_CHAR_RANGE; + } + else { + switch (mrb_type(indx)) { + case MRB_TT_FIXNUM: + *beg = mrb_fixnum(indx); + *len = 1; + return STR_CHAR_RANGE; + + case MRB_TT_STRING: + *beg = str_index_str(mrb, str, indx, 0); + if (*beg < 0) { break; } + *len = RSTRING_LEN(indx); + return STR_BYTE_RANGE_CORRECTED; + + case MRB_TT_RANGE: + goto range_arg; + + default: + indx = mrb_to_int(mrb, indx); + if (mrb_fixnum_p(indx)) { + *beg = mrb_fixnum(indx); + *len = 1; + return STR_CHAR_RANGE; + } +range_arg: + *len = RSTRING_CHAR_LEN(str); + switch (mrb_range_beg_len(mrb, indx, beg, len, *len, TRUE)) { + case MRB_RANGE_OK: + return STR_CHAR_RANGE_CORRECTED; + case MRB_RANGE_OUT: + return STR_OUT_OF_RANGE; + default: + break; + } + + mrb_raise(mrb, E_TYPE_ERROR, "can't convert to Fixnum"); + } + } + return STR_OUT_OF_RANGE; +} + static mrb_value mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx) { @@ -1113,6 +1180,108 @@ mrb_str_aref_m(mrb_state *mrb, mrb_value str) return mrb_str_aref(mrb, str, a1); } +static mrb_noreturn void +str_out_of_index(mrb_state *mrb, mrb_value index) +{ + mrb_raisef(mrb, E_INDEX_ERROR, "index %S out of string", index); +} + +static mrb_value +str_replace_partial(mrb_state *mrb, mrb_value src, mrb_int pos, mrb_int end, mrb_value rep) +{ + const mrb_int shrink_threshold = 256; + struct RString *str = mrb_str_ptr(src); + mrb_int len = RSTR_LEN(str); + mrb_int replen, newlen; + char *strp; + + if (end > len) { end = len; } + + if (pos < 0 || pos > len) { + str_out_of_index(mrb, mrb_fixnum_value(pos)); + } + + replen = (mrb_nil_p(rep) ? 0 : RSTRING_LEN(rep)); + newlen = replen + len - (end - pos); + + if (newlen >= MRB_INT_MAX || newlen < replen /* overflowed */) { + mrb_raise(mrb, E_RUNTIME_ERROR, "string size too big"); + } + + mrb_str_modify(mrb, str); + + if (len < newlen || len - newlen >= shrink_threshold) { + resize_capa(mrb, str, newlen); + } + + strp = RSTR_PTR(str); + + memmove(strp + newlen - (len - end), strp + end, len - end); + if (!mrb_nil_p(rep)) { + memcpy(strp + pos, RSTRING_PTR(rep), replen); + } + RSTR_SET_LEN(str, newlen); + strp[newlen] = '\0'; + + return src; +} + +static void +mrb_str_aset(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_value replace) +{ + mrb_int beg, len, charlen; + + replace = mrb_to_str(mrb, replace); + + switch (str_convert_range(mrb, str, indx, alen, &beg, &len)) { + case STR_OUT_OF_RANGE: + default: + mrb_raise(mrb, E_INDEX_ERROR, "string not matched"); + case STR_CHAR_RANGE: + if (len < 0) { + mrb_raisef(mrb, E_INDEX_ERROR, "negative length %S", alen); + } + charlen = RSTRING_CHAR_LEN(str); + if (beg < 0) { beg += charlen; } + if (beg < 0 || beg > charlen) { str_out_of_index(mrb, indx); } + /* fall through */ + case STR_CHAR_RANGE_CORRECTED: + str_range_to_bytes(str, &beg, &len); + /* fall through */ + case STR_BYTE_RANGE_CORRECTED: + str_replace_partial(mrb, str, beg, beg + len, replace); + } +} + +/* + * call-seq: + * str[fixnum] = replace + * str[fixnum, fixnum] = replace + * str[range] = replace + * str[regexp] = replace + * str[regexp, fixnum] = replace + * str[other_str] = replace + * + * Modify +self+ by replacing the content of +self+. + * The portion of the string affected is determined using the same criteria as +String#[]+. + */ +static mrb_value +mrb_str_aset_m(mrb_state *mrb, mrb_value str) +{ + mrb_value indx, alen, replace; + + switch (mrb_get_args(mrb, "oo|S!", &indx, &alen, &replace)) { + case 2: + replace = alen; + alen = mrb_undef_value(); + break; + case 3: + break; + } + mrb_str_aset(mrb, str, indx, alen, replace); + return str; +} + /* 15.2.10.5.8 */ /* * call-seq: @@ -2678,6 +2847,7 @@ mrb_init_string(mrb_state *mrb) mrb_define_method(mrb, s, "+", mrb_str_plus_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.4 */ mrb_define_method(mrb, s, "*", mrb_str_times, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */ mrb_define_method(mrb, s, "[]", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.6 */ + mrb_define_method(mrb, s, "[]=", mrb_str_aset_m, MRB_ARGS_ANY()); mrb_define_method(mrb, s, "capitalize", mrb_str_capitalize, MRB_ARGS_NONE()); /* 15.2.10.5.7 */ mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, MRB_ARGS_NONE()); /* 15.2.10.5.8 */ mrb_define_method(mrb, s, "chomp", mrb_str_chomp, MRB_ARGS_ANY()); /* 15.2.10.5.9 */ -- cgit v1.2.3