diff options
Diffstat (limited to 'mrbgems/mruby-string-ext')
| -rw-r--r-- | mrbgems/mruby-string-ext/mrbgem.rake | 2 | ||||
| -rw-r--r-- | mrbgems/mruby-string-ext/mrblib/string.rb | 295 | ||||
| -rw-r--r-- | mrbgems/mruby-string-ext/src/string.c | 1022 | ||||
| -rw-r--r-- | mrbgems/mruby-string-ext/test/numeric.rb | 29 | ||||
| -rw-r--r-- | mrbgems/mruby-string-ext/test/range.rb | 26 | ||||
| -rw-r--r-- | mrbgems/mruby-string-ext/test/string.rb | 453 |
6 files changed, 1687 insertions, 140 deletions
diff --git a/mrbgems/mruby-string-ext/mrbgem.rake b/mrbgems/mruby-string-ext/mrbgem.rake index 688589933..f2df5a783 100644 --- a/mrbgems/mruby-string-ext/mrbgem.rake +++ b/mrbgems/mruby-string-ext/mrbgem.rake @@ -1,5 +1,5 @@ MRuby::Gem::Specification.new('mruby-string-ext') do |spec| spec.license = 'MIT' spec.author = 'mruby developers' - spec.summary = 'extensional String class' + spec.summary = 'String class extension' end diff --git a/mrbgems/mruby-string-ext/mrblib/string.rb b/mrbgems/mruby-string-ext/mrblib/string.rb index 34744cc38..2b3071567 100644 --- a/mrbgems/mruby-string-ext/mrblib/string.rb +++ b/mrbgems/mruby-string-ext/mrblib/string.rb @@ -26,7 +26,7 @@ class String def lstrip a = 0 z = self.size - 1 - a += 1 while " \f\n\r\t\v".include?(self[a]) and a <= z + a += 1 while a <= z and " \f\n\r\t\v".include?(self[a]) (z >= 0) ? self[a..z] : "" end @@ -43,7 +43,7 @@ class String def rstrip a = 0 z = self.size - 1 - z -= 1 while " \f\n\r\t\v\0".include?(self[z]) and a <= z + z -= 1 while a <= z and " \f\n\r\t\v\0".include?(self[z]) (z >= 0) ? self[a..z] : "" end @@ -59,8 +59,8 @@ class String def strip a = 0 z = self.size - 1 - a += 1 while " \f\n\r\t\v".include?(self[a]) and a <= z - z -= 1 while " \f\n\r\t\v\0".include?(self[z]) and a <= z + a += 1 while a <= z and " \f\n\r\t\v".include?(self[a]) + z -= 1 while a <= z and " \f\n\r\t\v\0".include?(self[z]) (z >= 0) ? self[a..z] : "" end @@ -76,6 +76,7 @@ class String # "hello".lstrip! #=> nil # def lstrip! + raise FrozenError, "can't modify frozen String" if frozen? s = self.lstrip (s == self) ? nil : self.replace(s) end @@ -92,6 +93,7 @@ class String # "hello".rstrip! #=> nil # def rstrip! + raise FrozenError, "can't modify frozen String" if frozen? s = self.rstrip (s == self) ? nil : self.replace(s) end @@ -104,6 +106,7 @@ class String # <code>nil</code> if <i>str</i> was not altered. # def strip! + raise FrozenError, "can't modify frozen String" if frozen? s = self.strip (s == self) ? nil : self.replace(s) end @@ -120,9 +123,22 @@ class String # "abcdef".casecmp("ABCDEF") #=> 0 # def casecmp(str) - self.downcase <=> str.to_str.downcase + self.downcase <=> str.__to_str.downcase rescue NoMethodError - raise TypeError, "no implicit conversion of #{str.class} into String" + nil + end + + ## + # call-seq: + # str.casecmp?(other) -> true, false, or nil + # + # Returns true if str and other_str are equal after case folding, + # false if they are not equal, and nil if other_str is not a string. + + def casecmp?(str) + c = self.casecmp(str) + return nil if c.nil? + return c == 0 end def partition(sep) @@ -132,7 +148,7 @@ class String m = n + sep.size [ slice(0, n), sep, slice(m, size - m) ] else - [ self, "", "" ] + [ self[0..-1], "", "" ] end end @@ -164,9 +180,10 @@ class String # string #=> "thsa sting" # def slice!(arg1, arg2=nil) - raise "wrong number of arguments (for 1..2)" if arg1 == nil && arg2 == nil + raise FrozenError, "can't modify frozen String" if frozen? + raise "wrong number of arguments (for 1..2)" if arg1.nil? && arg2.nil? - if arg1 != nil && arg2 != nil + if !arg1.nil? && !arg2.nil? idx = arg1 idx += self.size if arg1 < 0 if idx >= 0 && idx <= self.size && arg2 > 0 @@ -188,7 +205,7 @@ class String else idx = arg1 idx += self.size if arg1 < 0 - validated = true if idx >=0 && arg1 < self.size + validated = true if idx >=0 && arg1 < self.size end if validated str = self[arg1] @@ -196,8 +213,8 @@ class String return nil end end - unless str == nil || str == "" - if arg1 != nil && arg2 !=nil + unless str.nil? || str == "" + if !arg1.nil? && !arg2.nil? idx = arg1 >= 0 ? arg1 : self.size+arg1 str2 = self[0...idx] + self[idx+arg2..-1].to_s else @@ -207,14 +224,264 @@ class String str2 = self[0...idx] + self[idx2+1..-1].to_s elsif arg1.kind_of?(String) idx = self.index(arg1) - str2 = self[0...idx] + self[idx+arg1.size..-1] unless idx == nil + str2 = self[0...idx] + self[idx+arg1.size..-1] unless idx.nil? else idx = arg1 >= 0 ? arg1 : self.size+arg1 str2 = self[0...idx] + self[idx+1..-1].to_s end end - self.replace(str2) unless str2 == nil + self.replace(str2) unless str2.nil? end str end + + ## + # call-seq: + # str.insert(index, other_str) -> str + # + # Inserts <i>other_str</i> before the character at the given + # <i>index</i>, modifying <i>str</i>. Negative indices count from the + # end of the string, and insert <em>after</em> the given character. + # The intent is insert <i>aString</i> so that it starts at the given + # <i>index</i>. + # + # "abcd".insert(0, 'X') #=> "Xabcd" + # "abcd".insert(3, 'X') #=> "abcXd" + # "abcd".insert(4, 'X') #=> "abcdX" + # "abcd".insert(-3, 'X') #=> "abXcd" + # "abcd".insert(-1, 'X') #=> "abcdX" + # + def insert(idx, str) + if idx == -1 + return self << str + elsif idx < 0 + idx += 1 + end + self[idx, 0] = str + self + end + + ## + # call-seq: + # str.ljust(integer, padstr=' ') -> new_str + # + # If <i>integer</i> is greater than the length of <i>str</i>, returns a new + # <code>String</code> of length <i>integer</i> with <i>str</i> left justified + # and padded with <i>padstr</i>; otherwise, returns <i>str</i>. + # + # "hello".ljust(4) #=> "hello" + # "hello".ljust(20) #=> "hello " + # "hello".ljust(20, '1234') #=> "hello123412341234123" + def ljust(idx, padstr = ' ') + raise ArgumentError, 'zero width padding' if padstr == '' + return self if idx <= self.size + pad_repetitions = idx / padstr.size + padding = (padstr * pad_repetitions)[0, idx-self.size] + self + padding + end + + ## + # call-seq: + # str.rjust(integer, padstr=' ') -> new_str + # + # If <i>integer</i> is greater than the length of <i>str</i>, returns a new + # <code>String</code> of length <i>integer</i> with <i>str</i> right justified + # and padded with <i>padstr</i>; otherwise, returns <i>str</i>. + # + # "hello".rjust(4) #=> "hello" + # "hello".rjust(20) #=> " hello" + # "hello".rjust(20, '1234') #=> "123412341234123hello" + def rjust(idx, padstr = ' ') + raise ArgumentError, 'zero width padding' if padstr == '' + return self if idx <= self.size + pad_repetitions = idx / padstr.size + padding = (padstr * pad_repetitions)[0, idx-self.size] + padding + self + end + + ## + # call-seq: + # str.center(width, padstr=' ') -> new_str + # + # Centers +str+ in +width+. If +width+ is greater than the length of +str+, + # returns a new String of length +width+ with +str+ centered and padded with + # +padstr+; otherwise, returns +str+. + # + # "hello".center(4) #=> "hello" + # "hello".center(20) #=> " hello " + # "hello".center(20, '123') #=> "1231231hello12312312" + def center(width, padstr = ' ') + raise ArgumentError, 'zero width padding' if padstr == '' + return self if width <= self.size + width -= self.size + pad1 = width / 2 + pad2 = width - pad1 + (padstr*pad1)[0,pad1] + self + (padstr*pad2)[0,pad2] + end + + def chars(&block) + if block_given? + self.split('').each do |i| + block.call(i) + end + self + else + self.split('') + end + end + + ## + # Call the given block for each character of + # +self+. + def each_char(&block) + return to_enum :each_char unless block + pos = 0 + while pos < self.size + block.call(self[pos]) + pos += 1 + end + self + end + + def codepoints(&block) + if block_given? + self.split('').each do|x| + block.call(x.ord) + end + self + else + self.split('').map{|x| x.ord} + end + end + alias each_codepoint codepoints + + ## + # call-seq: + # str.prepend(other_str) -> str + # + # Prepend---Prepend the given string to <i>str</i>. + # + # a = "world" + # a.prepend("hello ") #=> "hello world" + # a #=> "hello world" + def prepend(arg) + self[0, 0] = arg + self + end + + ## + # call-seq: + # string.lines -> array of string + # string.lines {|s| block} -> array of string + # + # Returns strings per line; + # + # a = "abc\ndef" + # a.lines #=> ["abc\n", "def"] + # + # If a block is given, it works the same as <code>each_line</code>. + def lines(&blk) + lines = self.__lines + if blk + lines.each do |line| + blk.call(line) + end + end + lines + end + + ## + # call-seq: + # str.upto(other_str, exclusive=false) {|s| block } -> str + # str.upto(other_str, exclusive=false) -> an_enumerator + # + # Iterates through successive values, starting at <i>str</i> and + # ending at <i>other_str</i> inclusive, passing each value in turn to + # the block. The <code>String#succ</code> method is used to generate + # each value. If optional second argument exclusive is omitted or is false, + # the last value will be included; otherwise it will be excluded. + # + # If no block is given, an enumerator is returned instead. + # + # "a8".upto("b6") {|s| print s, ' ' } + # for s in "a8".."b6" + # print s, ' ' + # end + # + # <em>produces:</em> + # + # a8 a9 b0 b1 b2 b3 b4 b5 b6 + # a8 a9 b0 b1 b2 b3 b4 b5 b6 + # + # If <i>str</i> and <i>other_str</i> contains only ascii numeric characters, + # both are recognized as decimal numbers. In addition, the width of + # string (e.g. leading zeros) is handled appropriately. + # + # "9".upto("11").to_a #=> ["9", "10", "11"] + # "25".upto("5").to_a #=> [] + # "07".upto("11").to_a #=> ["07", "08", "09", "10", "11"] + def upto(max, exclusive=false, &block) + return to_enum(:upto, max, exclusive) unless block + raise TypeError, "no implicit conversion of #{max.class} into String" unless max.kind_of? String + + len = self.length + maxlen = max.length + # single character + if len == 1 and maxlen == 1 + c = self.ord + e = max.ord + while c <= e + break if exclusive and c == e + yield c.chr(__ENCODING__) + c += 1 + end + return self + end + # both edges are all digits + bi = self.to_i(10) + ei = max.to_i(10) + if (bi > 0 or bi == "0"*len) and (ei > 0 or ei == "0"*maxlen) + while bi <= ei + break if exclusive and bi == ei + s = bi.to_s + s = s.rjust(len, "0") if s.length < len + yield s + bi += 1 + end + return self + end + bs = self + while true + n = (bs <=> max) + break if n > 0 + break if exclusive and n == 0 + yield bs + break if n == 0 + bsiz = bs.size + break if bsiz > max.size || bsiz == 0 + bs = bs.succ + end + self + end + + def __upto_endless(&block) + len = self.length + # both edges are all digits + bi = self.to_i(10) + if bi > 0 or bi == "0"*len + while true + s = bi.to_s + s = s.rjust(len, "0") if s.length < len + yield s + bi += 1 + end + return self + end + bs = self + while true + yield bs + bs = bs.succ + end + self + end end diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index 9db3589c7..158cb5193 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -1,23 +1,94 @@ -#include <ctype.h> #include <string.h> -#include "mruby.h" -#include "mruby/array.h" -#include "mruby/class.h" -#include "mruby/string.h" +#include <mruby.h> +#include <mruby/array.h> +#include <mruby/class.h> +#include <mruby/string.h> +#include <mruby/range.h> + +#define ENC_ASCII_8BIT "ASCII-8BIT" +#define ENC_BINARY "BINARY" +#define ENC_UTF8 "UTF-8" + +#define ENC_COMP_P(enc, enc_lit) \ + str_casecmp_p(RSTRING_PTR(enc), RSTRING_LEN(enc), enc_lit, sizeof(enc_lit"")-1) + +#ifdef MRB_NO_FLOAT +# define mrb_float_p(o) FALSE +#endif + +static mrb_bool +str_casecmp_p(const char *s1, mrb_int len1, const char *s2, mrb_int len2) +{ + const char *e1, *e2; + + if (len1 != len2) return FALSE; + e1 = s1 + len1; + e2 = s2 + len2; + while (s1 < e1 && s2 < e2) { + if (*s1 != *s2 && TOUPPER(*s1) != TOUPPER(*s2)) return FALSE; + ++s1; + ++s2; + } + return TRUE; +} static mrb_value -mrb_str_getbyte(mrb_state *mrb, mrb_value str) +int_chr_binary(mrb_state *mrb, mrb_value num) { - mrb_int pos; - mrb_get_args(mrb, "i", &pos); + mrb_int cp = mrb_as_int(mrb, num); + char c; + mrb_value str; - if (pos < 0) - pos += RSTRING_LEN(str); - if (pos < 0 || RSTRING_LEN(str) <= pos) - return mrb_nil_value(); + if (cp < 0 || 0xff < cp) { + mrb_raisef(mrb, E_RANGE_ERROR, "%v out of char range", num); + } + c = (char)cp; + str = mrb_str_new(mrb, &c, 1); + RSTR_SET_ASCII_FLAG(mrb_str_ptr(str)); + return str; +} + +#ifdef MRB_UTF8_STRING +static mrb_value +int_chr_utf8(mrb_state *mrb, mrb_value num) +{ + mrb_int cp = mrb_int(mrb, num); + char utf8[4]; + mrb_int len; + mrb_value str; + uint32_t ascii_flag = 0; - return mrb_fixnum_value((unsigned char)RSTRING_PTR(str)[pos]); + if (cp < 0 || 0x10FFFF < cp) { + mrb_raisef(mrb, E_RANGE_ERROR, "%v out of char range", num); + } + if (cp < 0x80) { + utf8[0] = (char)cp; + len = 1; + ascii_flag = MRB_STR_ASCII; + } + else if (cp < 0x800) { + utf8[0] = (char)(0xC0 | (cp >> 6)); + utf8[1] = (char)(0x80 | (cp & 0x3F)); + len = 2; + } + else if (cp < 0x10000) { + utf8[0] = (char)(0xE0 | (cp >> 12)); + utf8[1] = (char)(0x80 | ((cp >> 6) & 0x3F)); + utf8[2] = (char)(0x80 | ( cp & 0x3F)); + len = 3; + } + else { + utf8[0] = (char)(0xF0 | (cp >> 18)); + utf8[1] = (char)(0x80 | ((cp >> 12) & 0x3F)); + utf8[2] = (char)(0x80 | ((cp >> 6) & 0x3F)); + utf8[3] = (char)(0x80 | ( cp & 0x3F)); + len = 4; + } + str = mrb_str_new(mrb, utf8, len); + mrb_str_ptr(str)->flags |= ascii_flag; + return str; } +#endif /* * call-seq: @@ -83,18 +154,27 @@ mrb_str_swapcase(mrb_state *mrb, mrb_value self) * * Append---Concatenates the given object to <i>str</i>. If the object is a * <code>Integer</code>, it is considered as a codepoint, and is converted - * to a character before concatenation. + * to a character before concatenation + * (equivalent to <code>str.concat(integer.chr(__ENCODING__))</code>). * * a = "hello " * a << "world" #=> "hello world" * a.concat(33) #=> "hello world!" */ static mrb_value -mrb_str_concat2(mrb_state *mrb, mrb_value self) +mrb_str_concat_m(mrb_state *mrb, mrb_value self) { - mrb_value str; - mrb_get_args(mrb, "S", &str); - mrb_str_concat(mrb, self, str); + mrb_value str = mrb_get_arg1(mrb); + + if (mrb_integer_p(str) || mrb_float_p(str)) +#ifdef MRB_UTF8_STRING + str = int_chr_utf8(mrb, str); +#else + str = int_chr_binary(mrb, str); +#endif + else + mrb_ensure_string_type(mrb, str); + mrb_str_cat_str(mrb, self, str); return self; } @@ -114,14 +194,15 @@ mrb_str_concat2(mrb_state *mrb, mrb_value self) static mrb_value mrb_str_start_with(mrb_state *mrb, mrb_value self) { - mrb_value *argv, sub; + const mrb_value *argv; + mrb_value sub; mrb_int argc, i; mrb_get_args(mrb, "*", &argv, &argc); for (i = 0; i < argc; i++) { size_t len_l, len_r; int ai = mrb_gc_arena_save(mrb); - sub = mrb_string_type(mrb, argv[i]); + sub = mrb_ensure_string_type(mrb, argv[i]); mrb_gc_arena_restore(mrb, ai); len_l = RSTRING_LEN(self); len_r = RSTRING_LEN(sub); @@ -143,14 +224,15 @@ mrb_str_start_with(mrb_state *mrb, mrb_value self) static mrb_value mrb_str_end_with(mrb_state *mrb, mrb_value self) { - mrb_value *argv, sub; + const mrb_value *argv; + mrb_value sub; mrb_int argc, i; mrb_get_args(mrb, "*", &argv, &argc); for (i = 0; i < argc; i++) { size_t len_l, len_r; int ai = mrb_gc_arena_save(mrb); - sub = mrb_string_type(mrb, argv[i]); + sub = mrb_ensure_string_type(mrb, argv[i]); mrb_gc_arena_restore(mrb, ai); len_l = RSTRING_LEN(self); len_r = RSTRING_LEN(sub); @@ -165,6 +247,591 @@ mrb_str_end_with(mrb_state *mrb, mrb_value self) return mrb_false_value(); } +enum tr_pattern_type { + TR_UNINITIALIZED = 0, + TR_IN_ORDER = 1, + TR_RANGE = 2, +}; + +/* + #tr Pattern syntax + + <syntax> ::= (<pattern>)* | '^' (<pattern>)* + <pattern> ::= <in order> | <range> + <in order> ::= (<ch>)+ + <range> ::= <ch> '-' <ch> +*/ +struct tr_pattern { + uint8_t type; // 1:in-order, 2:range + mrb_bool flag_reverse : 1; + mrb_bool flag_on_heap : 1; + uint16_t n; + union { + uint16_t start_pos; + char ch[2]; + } val; + struct tr_pattern *next; +}; + +#define STATIC_TR_PATTERN { 0 } + +static inline void +tr_free_pattern(mrb_state *mrb, struct tr_pattern *pat) +{ + while (pat) { + struct tr_pattern *p = pat->next; + if (pat->flag_on_heap) { + mrb_free(mrb, pat); + } + pat = p; + } +} + +static struct tr_pattern* +tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_pattern, mrb_bool flag_reverse_enable) +{ + const char *pattern = RSTRING_PTR(v_pattern); + mrb_int pattern_length = RSTRING_LEN(v_pattern); + mrb_bool flag_reverse = FALSE; + struct tr_pattern *pat1; + mrb_int i = 0; + + if(flag_reverse_enable && pattern_length >= 2 && pattern[0] == '^') { + flag_reverse = TRUE; + i++; + } + + while (i < pattern_length) { + /* is range pattern ? */ + mrb_bool const ret_uninit = (ret->type == TR_UNINITIALIZED); + pat1 = ret_uninit + ? ret + : (struct tr_pattern*)mrb_malloc_simple(mrb, sizeof(struct tr_pattern)); + if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-') { + if (pat1 == NULL && ret) { + nomem: + tr_free_pattern(mrb, ret); + mrb_exc_raise(mrb, mrb_obj_value(mrb->nomem_err)); + return NULL; /* not reached */ + } + pat1->type = TR_RANGE; + pat1->flag_reverse = flag_reverse; + pat1->flag_on_heap = !ret_uninit; + pat1->n = pattern[i+2] - pattern[i] + 1; + pat1->next = NULL; + pat1->val.ch[0] = pattern[i]; + pat1->val.ch[1] = pattern[i+2]; + i += 3; + } + else { + /* in order pattern. */ + mrb_int start_pos = i++; + mrb_int len; + + while (i < pattern_length) { + if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-') + break; + i++; + } + + len = i - start_pos; + if (len > UINT16_MAX) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "tr pattern too long (max 65535)"); + } + if (pat1 == NULL && ret) { + goto nomem; + } + pat1->type = TR_IN_ORDER; + pat1->flag_reverse = flag_reverse; + pat1->flag_on_heap = !ret_uninit; + pat1->n = (uint16_t)len; + pat1->next = NULL; + pat1->val.start_pos = (uint16_t)start_pos; + } + + if (ret == NULL || ret_uninit) { + ret = pat1; + } + else { + struct tr_pattern *p = ret; + while (p->next != NULL) { + p = p->next; + } + p->next = pat1; + } + } + + return ret; +} + +static inline mrb_int +tr_find_character(const struct tr_pattern *pat, const char *pat_str, int ch) +{ + mrb_int ret = -1; + mrb_int n_sum = 0; + mrb_int flag_reverse = pat ? pat->flag_reverse : 0; + + while (pat != NULL) { + if (pat->type == TR_IN_ORDER) { + int i; + for (i = 0; i < pat->n; i++) { + if (pat_str[pat->val.start_pos + i] == ch) ret = n_sum + i; + } + } + else if (pat->type == TR_RANGE) { + if (pat->val.ch[0] <= ch && ch <= pat->val.ch[1]) + ret = n_sum + ch - pat->val.ch[0]; + } + else { + mrb_assert(pat->type == TR_UNINITIALIZED); + } + n_sum += pat->n; + pat = pat->next; + } + + if (flag_reverse) { + return (ret < 0) ? MRB_INT_MAX : -1; + } + return ret; +} + +static inline mrb_int +tr_get_character(const struct tr_pattern *pat, const char *pat_str, mrb_int n_th) +{ + mrb_int n_sum = 0; + + while (pat != NULL) { + if (n_th < (n_sum + pat->n)) { + mrb_int i = (n_th - n_sum); + + switch (pat->type) { + case TR_IN_ORDER: + return pat_str[pat->val.start_pos + i]; + case TR_RANGE: + return pat->val.ch[0]+i; + case TR_UNINITIALIZED: + return -1; + } + } + if (pat->next == NULL) { + switch (pat->type) { + case TR_IN_ORDER: + return pat_str[pat->val.start_pos + pat->n - 1]; + case TR_RANGE: + return pat->val.ch[1]; + case TR_UNINITIALIZED: + return -1; + } + } + n_sum += pat->n; + pat = pat->next; + } + + return -1; +} + +static inline void +tr_bitmap_set(uint8_t bitmap[32], uint8_t ch) +{ + uint8_t idx1 = ch / 8; + uint8_t idx2 = ch % 8; + bitmap[idx1] |= (1<<idx2); +} + +static inline mrb_bool +tr_bitmap_detect(uint8_t bitmap[32], uint8_t ch) +{ + uint8_t idx1 = ch / 8; + uint8_t idx2 = ch % 8; + if (bitmap[idx1] & (1<<idx2)) + return TRUE; + return FALSE; +} + +/* compile patter to bitmap */ +static void +tr_compile_pattern(const struct tr_pattern *pat, mrb_value pstr, uint8_t bitmap[32]) +{ + const char *pattern = RSTRING_PTR(pstr); + mrb_int flag_reverse = pat ? pat->flag_reverse : 0; + int i; + + for (i=0; i<32; i++) { + bitmap[i] = 0; + } + while (pat != NULL) { + if (pat->type == TR_IN_ORDER) { + for (i = 0; i < pat->n; i++) { + tr_bitmap_set(bitmap, pattern[pat->val.start_pos + i]); + } + } + else if (pat->type == TR_RANGE) { + for (i = pat->val.ch[0]; i < pat->val.ch[1]; i++) { + tr_bitmap_set(bitmap, i); + } + } + else { + mrb_assert(pat->type == TR_UNINITIALIZED); + } + pat = pat->next; + } + + if (flag_reverse) { + for (i=0; i<32; i++) { + bitmap[i] ^= 0xff; + } + } +} + +static mrb_bool +str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squeeze) +{ + struct tr_pattern pat = STATIC_TR_PATTERN; + struct tr_pattern rep_storage = STATIC_TR_PATTERN; + char *s; + mrb_int len; + mrb_int i; + mrb_int j; + mrb_bool flag_changed = FALSE; + mrb_int lastch = -1; + struct tr_pattern *rep; + + mrb_str_modify(mrb, mrb_str_ptr(str)); + tr_parse_pattern(mrb, &pat, p1, TRUE); + rep = tr_parse_pattern(mrb, &rep_storage, p2, FALSE); + s = RSTRING_PTR(str); + len = RSTRING_LEN(str); + + for (i=j=0; i<len; i++,j++) { + mrb_int n = tr_find_character(&pat, RSTRING_PTR(p1), s[i]); + + if (i>j) s[j] = s[i]; + if (n >= 0) { + flag_changed = TRUE; + if (rep == NULL) { + j--; + } + else { + mrb_int c = tr_get_character(rep, RSTRING_PTR(p2), n); + + if (c < 0 || (squeeze && c == lastch)) { + j--; + continue; + } + if (c > 0x80) { + mrb_raisef(mrb, E_ARGUMENT_ERROR, "character (%i) out of range", c); + } + lastch = c; + s[i] = (char)c; + } + } + } + + tr_free_pattern(mrb, &pat); + tr_free_pattern(mrb, rep); + + if (flag_changed) { + RSTR_SET_LEN(RSTRING(str), j); + RSTRING_PTR(str)[j] = 0; + } + return flag_changed; +} + +/* + * call-seq: + * str.tr(from_str, to_str) => new_str + * + * Returns a copy of str with the characters in from_str replaced by the + * corresponding characters in to_str. If to_str is shorter than from_str, + * it is padded with its last character in order to maintain the + * correspondence. + * + * "hello".tr('el', 'ip') #=> "hippo" + * "hello".tr('aeiou', '*') #=> "h*ll*" + * "hello".tr('aeiou', 'AA*') #=> "hAll*" + * + * Both strings may use the c1-c2 notation to denote ranges of characters, + * and from_str may start with a ^, which denotes all characters except + * those listed. + * + * "hello".tr('a-y', 'b-z') #=> "ifmmp" + * "hello".tr('^aeiou', '*') #=> "*e**o" + * + * The backslash character \ can be used to escape ^ or - and is otherwise + * ignored unless it appears at the end of a range or the end of the + * from_str or to_str: + * + * + * "hello^world".tr("\\^aeiou", "*") #=> "h*ll**w*rld" + * "hello-world".tr("a\\-eo", "*") #=> "h*ll**w*rld" + * + * "hello\r\nworld".tr("\r", "") #=> "hello\nworld" + * "hello\r\nworld".tr("\\r", "") #=> "hello\r\nwold" + * "hello\r\nworld".tr("\\\r", "") #=> "hello\nworld" + * + * "X['\\b']".tr("X\\", "") #=> "['b']" + * "X['\\b']".tr("X-\\]", "") #=> "'b'" + * + * Note: conversion is effective only in ASCII region. + */ +static mrb_value +mrb_str_tr(mrb_state *mrb, mrb_value str) +{ + mrb_value dup; + mrb_value p1, p2; + + mrb_get_args(mrb, "SS", &p1, &p2); + dup = mrb_str_dup(mrb, str); + str_tr(mrb, dup, p1, p2, FALSE); + return dup; +} + +/* + * call-seq: + * str.tr!(from_str, to_str) -> str or nil + * + * Translates str in place, using the same rules as String#tr. + * Returns str, or nil if no changes were made. + */ +static mrb_value +mrb_str_tr_bang(mrb_state *mrb, mrb_value str) +{ + mrb_value p1, p2; + + mrb_get_args(mrb, "SS", &p1, &p2); + if (str_tr(mrb, str, p1, p2, FALSE)) { + return str; + } + return mrb_nil_value(); +} + +/* + * call-seq: + * str.tr_s(from_str, to_str) -> new_str + * + * Processes a copy of str as described under String#tr, then removes + * duplicate characters in regions that were affected by the translation. + * + * "hello".tr_s('l', 'r') #=> "hero" + * "hello".tr_s('el', '*') #=> "h*o" + * "hello".tr_s('el', 'hx') #=> "hhxo" + */ +static mrb_value +mrb_str_tr_s(mrb_state *mrb, mrb_value str) +{ + mrb_value dup; + mrb_value p1, p2; + + mrb_get_args(mrb, "SS", &p1, &p2); + dup = mrb_str_dup(mrb, str); + str_tr(mrb, dup, p1, p2, TRUE); + return dup; +} + +/* + * call-seq: + * str.tr_s!(from_str, to_str) -> str or nil + * + * Performs String#tr_s processing on str in place, returning + * str, or nil if no changes were made. + */ +static mrb_value +mrb_str_tr_s_bang(mrb_state *mrb, mrb_value str) +{ + mrb_value p1, p2; + + mrb_get_args(mrb, "SS", &p1, &p2); + if (str_tr(mrb, str, p1, p2, TRUE)) { + return str; + } + return mrb_nil_value(); +} + +static mrb_bool +str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat) +{ + struct tr_pattern pat_storage = STATIC_TR_PATTERN; + struct tr_pattern *pat = NULL; + mrb_int i, j; + char *s; + mrb_int len; + mrb_bool flag_changed = FALSE; + mrb_int lastch = -1; + uint8_t bitmap[32]; + + mrb_str_modify(mrb, mrb_str_ptr(str)); + if (!mrb_nil_p(v_pat)) { + pat = tr_parse_pattern(mrb, &pat_storage, v_pat, TRUE); + tr_compile_pattern(pat, v_pat, bitmap); + tr_free_pattern(mrb, pat); + } + s = RSTRING_PTR(str); + len = RSTRING_LEN(str); + + if (pat) { + for (i=j=0; i<len; i++,j++) { + if (i>j) s[j] = s[i]; + if (tr_bitmap_detect(bitmap, s[i]) && s[i] == lastch) { + flag_changed = TRUE; + j--; + } + lastch = s[i]; + } + } + else { + for (i=j=0; i<len; i++,j++) { + if (i>j) s[j] = s[i]; + if (s[i] >= 0 && s[i] == lastch) { + flag_changed = TRUE; + j--; + } + lastch = s[i]; + } + } + + if (flag_changed) { + RSTR_SET_LEN(RSTRING(str), j); + RSTRING_PTR(str)[j] = 0; + } + return flag_changed; +} + +/* + * call-seq: + * str.squeeze([other_str]) -> new_str + * + * Builds a set of characters from the other_str + * parameter(s) using the procedure described for String#count. Returns a + * new string where runs of the same character that occur in this set are + * replaced by a single character. If no arguments are given, all runs of + * identical characters are replaced by a single character. + * + * "yellow moon".squeeze #=> "yelow mon" + * " now is the".squeeze(" ") #=> " now is the" + * "putters shoot balls".squeeze("m-z") #=> "puters shot balls" + */ +static mrb_value +mrb_str_squeeze(mrb_state *mrb, mrb_value str) +{ + mrb_value pat = mrb_nil_value(); + mrb_value dup; + + mrb_get_args(mrb, "|S", &pat); + dup = mrb_str_dup(mrb, str); + str_squeeze(mrb, dup, pat); + return dup; +} + +/* + * call-seq: + * str.squeeze!([other_str]) -> str or nil + * + * Squeezes str in place, returning either str, or nil if no + * changes were made. + */ +static mrb_value +mrb_str_squeeze_bang(mrb_state *mrb, mrb_value str) +{ + mrb_value pat = mrb_nil_value(); + + mrb_get_args(mrb, "|S", &pat); + if (str_squeeze(mrb, str, pat)) { + return str; + } + return mrb_nil_value(); +} + +static mrb_bool +str_delete(mrb_state *mrb, mrb_value str, mrb_value v_pat) +{ + struct tr_pattern pat = STATIC_TR_PATTERN; + mrb_int i, j; + char *s; + mrb_int len; + mrb_bool flag_changed = FALSE; + uint8_t bitmap[32]; + + mrb_str_modify(mrb, mrb_str_ptr(str)); + tr_parse_pattern(mrb, &pat, v_pat, TRUE); + tr_compile_pattern(&pat, v_pat, bitmap); + tr_free_pattern(mrb, &pat); + + s = RSTRING_PTR(str); + len = RSTRING_LEN(str); + + for (i=j=0; i<len; i++,j++) { + if (i>j) s[j] = s[i]; + if (tr_bitmap_detect(bitmap, s[i])) { + flag_changed = TRUE; + j--; + } + } + if (flag_changed) { + RSTR_SET_LEN(RSTRING(str), j); + RSTRING_PTR(str)[j] = 0; + } + return flag_changed; +} + +static mrb_value +mrb_str_delete(mrb_state *mrb, mrb_value str) +{ + mrb_value pat; + mrb_value dup; + + mrb_get_args(mrb, "S", &pat); + dup = mrb_str_dup(mrb, str); + str_delete(mrb, dup, pat); + return dup; +} + +static mrb_value +mrb_str_delete_bang(mrb_state *mrb, mrb_value str) +{ + mrb_value pat; + + mrb_get_args(mrb, "S", &pat); + if (str_delete(mrb, str, pat)) { + return str; + } + return mrb_nil_value(); +} + +/* + * call_seq: + * str.count([other_str]) -> integer + * + * Each other_str parameter defines a set of characters to count. The + * intersection of these sets defines the characters to count in str. Any + * other_str that starts with a caret ^ is negated. The sequence c1-c2 + * means all characters between c1 and c2. The backslash character \ can + * be used to escape ^ or - and is otherwise ignored unless it appears at + * the end of a sequence or the end of a other_str. + */ +static mrb_value +mrb_str_count(mrb_state *mrb, mrb_value str) +{ + mrb_value v_pat = mrb_nil_value(); + mrb_int i; + char *s; + mrb_int len; + mrb_int count = 0; + struct tr_pattern pat = STATIC_TR_PATTERN; + uint8_t bitmap[32]; + + mrb_get_args(mrb, "S", &v_pat); + tr_parse_pattern(mrb, &pat, v_pat, TRUE); + tr_compile_pattern(&pat, v_pat, bitmap); + tr_free_pattern(mrb, &pat); + + s = RSTRING_PTR(str); + len = RSTRING_LEN(str); + for (i = 0; i < len; i++) { + if (tr_bitmap_detect(bitmap, s[i])) count++; + } + return mrb_fixnum_value(count); +} + static mrb_value mrb_str_hex(mrb_state *mrb, mrb_value self) { @@ -194,49 +861,40 @@ mrb_str_chr(mrb_state *mrb, mrb_value self) /* * call-seq: - * string.lines -> array of string + * int.chr([encoding]) -> string * - * Returns strings per line; + * Returns a string containing the character represented by the +int+'s value + * according to +encoding+. +"ASCII-8BIT"+ (+"BINARY"+) and +"UTF-8"+ (only + * with +MRB_UTF8_STRING+) can be specified as +encoding+ (default is + * +"ASCII-8BIT"+). * - * a = "abc\ndef" - * a.lines #=> ["abc\n", "def"] + * 65.chr #=> "A" + * 230.chr #=> "\xE6" + * 230.chr("ASCII-8BIT") #=> "\xE6" + * 230.chr("UTF-8") #=> "\u00E6" */ static mrb_value -mrb_str_lines(mrb_state *mrb, mrb_value self) +mrb_int_chr(mrb_state *mrb, mrb_value num) { - mrb_value result; - mrb_value blk; - int ai; - mrb_int len; - mrb_value arg; - char *p = RSTRING_PTR(self), *t; - char *e = p + RSTRING_LEN(self); - - mrb_get_args(mrb, "&", &blk); + mrb_value enc; + mrb_bool enc_given; - result = mrb_ary_new(mrb); - - if (!mrb_nil_p(blk)) { - while (p < e) { - t = p; - while (p < e && *p != '\n') p++; - if (*p == '\n') p++; - len = (mrb_int) (p - t); - arg = mrb_str_new(mrb, t, len); - mrb_yield_argv(mrb, blk, 1, &arg); - } - return self; + mrb_get_args(mrb, "|S?", &enc, &enc_given); + if (!enc_given || + ENC_COMP_P(enc, ENC_ASCII_8BIT) || + ENC_COMP_P(enc, ENC_BINARY)) { + return int_chr_binary(mrb, num); } - while (p < e) { - ai = mrb_gc_arena_save(mrb); - t = p; - while (p < e && *p != '\n') p++; - if (*p == '\n') p++; - len = (mrb_int) (p - t); - mrb_ary_push(mrb, result, mrb_str_new(mrb, t, len)); - mrb_gc_arena_restore(mrb, ai); +#ifdef MRB_UTF8_STRING + else if (ENC_COMP_P(enc, ENC_UTF8)) { + return int_chr_utf8(mrb, num); } - return result; +#endif + else { + mrb_raisef(mrb, E_ARGUMENT_ERROR, "unknown encoding name - %v", enc); + } + /* not reached */ + return mrb_nil_value(); } /* @@ -253,9 +911,9 @@ mrb_str_succ_bang(mrb_state *mrb, mrb_value self) { mrb_value result; unsigned char *p, *e, *b, *t; - char *prepend; + const char *prepend; struct RString *s = mrb_str_ptr(self); - size_t l; + mrb_int l; if (RSTRING_LEN(self) == 0) return self; @@ -275,7 +933,8 @@ mrb_str_succ_bang(mrb_state *mrb, mrb_value self) if (e < b) { e = p + l - 1; result = mrb_str_new_lit(mrb, ""); - } else { + } + else { // find leading letter of the ascii/number b = e; while (b > p) { @@ -293,7 +952,8 @@ mrb_str_succ_bang(mrb_state *mrb, mrb_value self) if (*e == 0xff) { mrb_str_cat_lit(mrb, result, "\x01"); (*e) = 0; - } else + } + else (*e)++; break; } @@ -301,13 +961,16 @@ mrb_str_succ_bang(mrb_state *mrb, mrb_value self) if (*e == '9') { if (e == b) prepend = "1"; *e = '0'; - } else if (*e == 'z') { + } + else if (*e == 'z') { if (e == b) prepend = "a"; *e = 'a'; - } else if (*e == 'Z') { + } + else if (*e == 'Z') { if (e == b) prepend = "A"; *e = 'A'; - } else { + } + else { (*e)++; break; } @@ -331,27 +994,246 @@ mrb_str_succ(mrb_state *mrb, mrb_value self) return str; } +#ifdef MRB_UTF8_STRING +static const char utf8len_codepage_zero[256] = +{ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, +}; + +static mrb_int +utf8code(unsigned char* p) +{ + mrb_int len; + + if (p[0] < 0x80) + return p[0]; + + len = utf8len_codepage_zero[p[0]]; + if (len > 1 && (p[1] & 0xc0) == 0x80) { + if (len == 2) + return ((p[0] & 0x1f) << 6) + (p[1] & 0x3f); + if ((p[2] & 0xc0) == 0x80) { + if (len == 3) + return ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) + + (p[2] & 0x3f); + if ((p[3] & 0xc0) == 0x80) { + if (len == 4) + return ((p[0] & 0x07) << 18) + ((p[1] & 0x3f) << 12) + + ((p[2] & 0x3f) << 6) + (p[3] & 0x3f); + if ((p[4] & 0xc0) == 0x80) { + if (len == 5) + return ((p[0] & 0x03) << 24) + ((p[1] & 0x3f) << 18) + + ((p[2] & 0x3f) << 12) + ((p[3] & 0x3f) << 6) + + (p[4] & 0x3f); + if ((p[5] & 0xc0) == 0x80 && len == 6) + return ((p[0] & 0x01) << 30) + ((p[1] & 0x3f) << 24) + + ((p[2] & 0x3f) << 18) + ((p[3] & 0x3f) << 12) + + ((p[4] & 0x3f) << 6) + (p[5] & 0x3f); + } + } + } + } + return p[0]; +} + +static mrb_value +mrb_str_ord(mrb_state* mrb, mrb_value str) +{ + if (RSTRING_LEN(str) == 0) + mrb_raise(mrb, E_ARGUMENT_ERROR, "empty string"); + return mrb_fixnum_value(utf8code((unsigned char*) RSTRING_PTR(str))); +} +#else +static mrb_value +mrb_str_ord(mrb_state* mrb, mrb_value str) +{ + if (RSTRING_LEN(str) == 0) + mrb_raise(mrb, E_ARGUMENT_ERROR, "empty string"); + return mrb_fixnum_value((unsigned char)RSTRING_PTR(str)[0]); +} +#endif + +/* + * call-seq: + * str.delete_prefix!(prefix) -> self or nil + * + * Deletes leading <code>prefix</code> from <i>str</i>, returning + * <code>nil</code> if no change was made. + * + * "hello".delete_prefix!("hel") #=> "lo" + * "hello".delete_prefix!("llo") #=> nil + */ +static mrb_value +mrb_str_del_prefix_bang(mrb_state *mrb, mrb_value self) +{ + mrb_int plen, slen; + const char *ptr; + char *s; + struct RString *str = RSTRING(self); + + mrb_get_args(mrb, "s", &ptr, &plen); + slen = RSTR_LEN(str); + if (plen > slen) return mrb_nil_value(); + s = RSTR_PTR(str); + if (memcmp(s, ptr, plen) != 0) return mrb_nil_value(); + if (!mrb_frozen_p(str) && (RSTR_SHARED_P(str) || RSTR_FSHARED_P(str))) { + str->as.heap.ptr += plen; + } + else { + mrb_str_modify(mrb, str); + s = RSTR_PTR(str); + memmove(s, s+plen, slen-plen); + } + RSTR_SET_LEN(str, slen-plen); + return self; +} + +/* + * call-seq: + * str.delete_prefix(prefix) -> new_str + * + * Returns a copy of <i>str</i> with leading <code>prefix</code> deleted. + * + * "hello".delete_prefix("hel") #=> "lo" + * "hello".delete_prefix("llo") #=> "hello" + */ +static mrb_value +mrb_str_del_prefix(mrb_state *mrb, mrb_value self) +{ + mrb_int plen, slen; + const char *ptr; + + mrb_get_args(mrb, "s", &ptr, &plen); + slen = RSTRING_LEN(self); + if (plen > slen) return mrb_str_dup(mrb, self); + if (memcmp(RSTRING_PTR(self), ptr, plen) != 0) + return mrb_str_dup(mrb, self); + return mrb_str_substr(mrb, self, plen, slen-plen); +} + +/* + * call-seq: + * str.delete_suffix!(suffix) -> self or nil + * + * Deletes trailing <code>suffix</code> from <i>str</i>, returning + * <code>nil</code> if no change was made. + * + * "hello".delete_suffix!("llo") #=> "he" + * "hello".delete_suffix!("hel") #=> nil + */ +static mrb_value +mrb_str_del_suffix_bang(mrb_state *mrb, mrb_value self) +{ + mrb_int plen, slen; + const char *ptr; + char *s; + struct RString *str = RSTRING(self); + + mrb_get_args(mrb, "s", &ptr, &plen); + slen = RSTR_LEN(str); + if (plen > slen) return mrb_nil_value(); + s = RSTR_PTR(str); + if (memcmp(s+slen-plen, ptr, plen) != 0) return mrb_nil_value(); + if (!mrb_frozen_p(str) && (RSTR_SHARED_P(str) || RSTR_FSHARED_P(str))) { + /* no need to modify string */ + } + else { + mrb_str_modify(mrb, str); + } + RSTR_SET_LEN(str, slen-plen); + return self; +} + +/* + * call-seq: + * str.delete_suffix(suffix) -> new_str + * + * Returns a copy of <i>str</i> with leading <code>suffix</code> deleted. + * + * "hello".delete_suffix("hel") #=> "lo" + * "hello".delete_suffix("llo") #=> "hello" + */ +static mrb_value +mrb_str_del_suffix(mrb_state *mrb, mrb_value self) +{ + mrb_int plen, slen; + const char *ptr; + + mrb_get_args(mrb, "s", &ptr, &plen); + slen = RSTRING_LEN(self); + if (plen > slen) return mrb_str_dup(mrb, self); + if (memcmp(RSTRING_PTR(self)+slen-plen, ptr, plen) != 0) + return mrb_str_dup(mrb, self); + return mrb_str_substr(mrb, self, 0, slen-plen); +} + +static mrb_value +mrb_str_lines(mrb_state *mrb, mrb_value self) +{ + mrb_value result; + int ai; + mrb_int len; + char *b = RSTRING_PTR(self); + char *p = b, *t; + char *e = b + RSTRING_LEN(self); + + result = mrb_ary_new(mrb); + ai = mrb_gc_arena_save(mrb); + while (p < e) { + t = p; + while (p < e && *p != '\n') p++; + if (*p == '\n') p++; + len = (mrb_int) (p - t); + mrb_ary_push(mrb, result, mrb_str_new(mrb, t, len)); + mrb_gc_arena_restore(mrb, ai); + } + return result; +} + void mrb_mruby_string_ext_gem_init(mrb_state* mrb) { struct RClass * s = mrb->string_class; mrb_define_method(mrb, s, "dump", mrb_str_dump, MRB_ARGS_NONE()); - mrb_define_method(mrb, s, "getbyte", mrb_str_getbyte, MRB_ARGS_REQ(1)); mrb_define_method(mrb, s, "swapcase!", mrb_str_swapcase_bang, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "swapcase", mrb_str_swapcase, MRB_ARGS_NONE()); - mrb_define_method(mrb, s, "concat", mrb_str_concat2, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, s, "<<", mrb_str_concat2, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, s, "concat", mrb_str_concat_m, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, s, "<<", mrb_str_concat_m, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, s, "count", mrb_str_count, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, s, "tr", mrb_str_tr, MRB_ARGS_REQ(2)); + mrb_define_method(mrb, s, "tr!", mrb_str_tr_bang, MRB_ARGS_REQ(2)); + mrb_define_method(mrb, s, "tr_s", mrb_str_tr_s, MRB_ARGS_REQ(2)); + mrb_define_method(mrb, s, "tr_s!", mrb_str_tr_s_bang, MRB_ARGS_REQ(2)); + mrb_define_method(mrb, s, "squeeze", mrb_str_squeeze, MRB_ARGS_OPT(1)); + mrb_define_method(mrb, s, "squeeze!", mrb_str_squeeze_bang, MRB_ARGS_OPT(1)); + mrb_define_method(mrb, s, "delete", mrb_str_delete, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, s, "delete!", mrb_str_delete_bang, MRB_ARGS_REQ(1)); mrb_define_method(mrb, s, "start_with?", mrb_str_start_with, MRB_ARGS_REST()); mrb_define_method(mrb, s, "end_with?", mrb_str_end_with, MRB_ARGS_REST()); mrb_define_method(mrb, s, "hex", mrb_str_hex, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "oct", mrb_str_oct, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "chr", mrb_str_chr, MRB_ARGS_NONE()); - mrb_define_method(mrb, s, "lines", mrb_str_lines, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "succ", mrb_str_succ, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "succ!", mrb_str_succ_bang, MRB_ARGS_NONE()); - mrb_alias_method(mrb, s, mrb_intern_lit(mrb, "next"), mrb_intern_lit(mrb, "succ")); - mrb_alias_method(mrb, s, mrb_intern_lit(mrb, "next!"), mrb_intern_lit(mrb, "succ!")); + mrb_define_method(mrb, s, "next", mrb_str_succ, MRB_ARGS_NONE()); + mrb_define_method(mrb, s, "next!", mrb_str_succ_bang, MRB_ARGS_NONE()); + mrb_define_method(mrb, s, "ord", mrb_str_ord, MRB_ARGS_NONE()); + mrb_define_method(mrb, s, "delete_prefix!", mrb_str_del_prefix_bang, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, s, "delete_prefix", mrb_str_del_prefix, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, s, "delete_suffix!", mrb_str_del_suffix_bang, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, s, "delete_suffix", mrb_str_del_suffix, MRB_ARGS_REQ(1)); + + mrb_define_method(mrb, s, "__lines", mrb_str_lines, MRB_ARGS_NONE()); + + mrb_define_method(mrb, mrb_class_get(mrb, "Integer"), "chr", mrb_int_chr, MRB_ARGS_OPT(1)); } void diff --git a/mrbgems/mruby-string-ext/test/numeric.rb b/mrbgems/mruby-string-ext/test/numeric.rb new file mode 100644 index 000000000..dfcb9ebf4 --- /dev/null +++ b/mrbgems/mruby-string-ext/test/numeric.rb @@ -0,0 +1,29 @@ +# coding: utf-8 + +assert('Integer#chr') do + assert_equal("A", 65.chr) + assert_equal("B", 0x42.chr) + assert_equal("\xab", 171.chr) + assert_raise(RangeError) { -1.chr } + assert_raise(RangeError) { 256.chr } + + assert_equal("A", 65.chr("ASCII-8BIT")) + assert_equal("B", 0x42.chr("BINARY")) + assert_equal("\xab", 171.chr("ascii-8bit")) + assert_raise(RangeError) { -1.chr("binary") } + assert_raise(RangeError) { 256.chr("Ascii-8bit") } + assert_raise(ArgumentError) { 65.chr("ASCII") } + assert_raise(ArgumentError) { 65.chr("ASCII-8BIT", 2) } + assert_raise(TypeError) { 65.chr(:BINARY) } + + if __ENCODING__ == "ASCII-8BIT" + assert_raise(ArgumentError) { 65.chr("UTF-8") } + else + assert_equal("A", 65.chr("UTF-8")) + assert_equal("B", 0x42.chr("UTF-8")) + assert_equal("«", 171.chr("utf-8")) + assert_equal("あ", 12354.chr("Utf-8")) + assert_raise(RangeError) { -1.chr("utf-8") } + assert_raise(RangeError) { 0x110000.chr.chr("UTF-8") } + end +end diff --git a/mrbgems/mruby-string-ext/test/range.rb b/mrbgems/mruby-string-ext/test/range.rb new file mode 100644 index 000000000..80c286850 --- /dev/null +++ b/mrbgems/mruby-string-ext/test/range.rb @@ -0,0 +1,26 @@ +assert('Range#max') do + # returns the maximum value in the range when called with no arguments + assert_equal 'l', ('f'..'l').max + assert_equal 'e', ('a'...'f').max + + # returns nil when the endpoint is less than the start point + assert_equal nil, ('z'..'l').max +end + +assert('Range#max given a block') do + # returns nil when the endpoint is less than the start point + assert_equal nil, (('z'..'l').max { |x, y| x <=> y }) +end + +assert('Range#min') do + # returns the minimum value in the range when called with no arguments + assert_equal 'f', ('f'..'l').min + + # returns nil when the start point is greater than the endpoint + assert_equal nil, ('z'..'l').min +end + +assert('Range#min given a block') do + # returns nil when the start point is greater than the endpoint + assert_equal nil, (('z'..'l').min { |x, y| x <=> y }) +end diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index eba666e13..7be673aa6 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -1,75 +1,83 @@ +# coding: utf-8 ## # String(Ext) Test -assert('String#getbyte') do - str1 = "hello" - bytes1 = [104, 101, 108, 108, 111] - assert_equal bytes1[0], str1.getbyte(0) - assert_equal bytes1[-1], str1.getbyte(-1) - assert_equal bytes1[6], str1.getbyte(6) +UTF8STRING = __ENCODING__ == "UTF-8" - str2 = "\xFF" - bytes2 = [0xFF] - assert_equal bytes2[0], str2.getbyte(0) +def assert_upto(exp, receiver, *args) + act = [] + receiver.upto(*args) { |v| act << v } + assert_equal exp, act end assert('String#dump') do - ("\1" * 100).dump # should not raise an exception - regress #1210 - "\0".inspect == "\"\\000\"" and - "foo".dump == "\"foo\"" + assert_equal("\"\\x00\"", "\0".dump) + assert_equal("\"foo\"", "foo".dump) + assert_equal('"\xe3\x82\x8b"', "る".dump) + assert_nothing_raised { ("\1" * 100).dump } # regress #1210 end assert('String#strip') do s = " abc " - "".strip == "" and " \t\r\n\f\v".strip == "" and - "\0a\0".strip == "\0a" and - "abc".strip == "abc" and - " abc".strip == "abc" and - "abc ".strip == "abc" and - " abc ".strip == "abc" and - s == " abc " + assert_equal("abc", s.strip) + assert_equal(" abc ", s) + assert_equal("", "".strip) + assert_equal("", " \t\r\n\f\v".strip) + assert_equal("\0a", "\0a\0".strip) + assert_equal("abc", "abc".strip) + assert_equal("abc", " abc".strip) + assert_equal("abc", "abc ".strip) end assert('String#lstrip') do s = " abc " - s.lstrip - "".lstrip == "" and " \t\r\n\f\v".lstrip == "" and - "\0a\0".lstrip == "\0a\0" and - "abc".lstrip == "abc" and - " abc".lstrip == "abc" and - "abc ".lstrip == "abc " and - " abc ".lstrip == "abc " and - s == " abc " + assert_equal("abc ", s.lstrip) + assert_equal(" abc ", s) + assert_equal("", "".lstrip) + assert_equal("", " \t\r\n\f\v".lstrip) + assert_equal("\0a\0", "\0a\0".lstrip) + assert_equal("abc", "abc".lstrip) + assert_equal("abc", " abc".lstrip) + assert_equal("abc ", "abc ".lstrip) end assert('String#rstrip') do s = " abc " - s.rstrip - "".rstrip == "" and " \t\r\n\f\v".rstrip == "" and - "\0a\0".rstrip == "\0a" and - "abc".rstrip == "abc" and - " abc".rstrip == " abc" and - "abc ".rstrip == "abc" and - " abc ".rstrip == " abc" and - s == " abc " + assert_equal(" abc", s.rstrip) + assert_equal(" abc ", s) + assert_equal("", "".rstrip) + assert_equal("", " \t\r\n\f\v".rstrip) + assert_equal("\0a", "\0a\0".rstrip) + assert_equal("abc", "abc".rstrip) + assert_equal(" abc", " abc".rstrip) + assert_equal("abc", "abc ".rstrip) end assert('String#strip!') do s = " abc " t = "abc" - s.strip! == "abc" and s == "abc" and t.strip! == nil + assert_equal("abc", s.strip!) + assert_equal("abc", s) + assert_nil(t.strip!) + assert_equal("abc", t) end assert('String#lstrip!') do s = " abc " t = "abc " - s.lstrip! == "abc " and s == "abc " and t.lstrip! == nil + assert_equal("abc ", s.lstrip!) + assert_equal("abc ", s) + assert_nil(t.lstrip!) + assert_equal("abc ", t) end assert('String#rstrip!') do s = " abc " t = " abc" - s.rstrip! == " abc" and s == " abc" and t.rstrip! == nil + assert_equal(" abc", s.rstrip!) + assert_equal(" abc", s) + assert_nil(t.rstrip!) + assert_equal(" abc", t) end assert('String#swapcase') do @@ -85,12 +93,17 @@ assert('String#swapcase!') do end assert('String#concat') do - s = "Hello " - s.concat "World!" - t = "Hello " - t << "World!" - assert_equal "Hello World!", t - assert_equal "Hello World!", s + assert_equal "Hello World!", "Hello " << "World" << 33 + assert_equal "Hello World!", "Hello ".concat("World").concat(33) + assert_raise(TypeError) { "".concat(Object.new) } + + if UTF8STRING + assert_equal "H«", "H" << 0xab + assert_equal "Hは", "H" << 12399 + else + assert_equal "H\xab", "H" << 0xab + assert_raise(RangeError) { "H" << 12399 } + end end assert('String#casecmp') do @@ -98,11 +111,69 @@ assert('String#casecmp') do assert_equal 0, "aBcDeF".casecmp("abcdef") assert_equal(-1, "abcdef".casecmp("abcdefg")) assert_equal 0, "abcdef".casecmp("ABCDEF") - o = Object.new - def o.to_str - "ABCDEF" - end - assert_equal 0, "abcdef".casecmp(o) +end + +assert('String#count') do + s = "abccdeff123" + assert_equal 0, s.count("") + assert_equal 1, s.count("a") + assert_equal 2, s.count("ab") + assert_equal 9, s.count("^c") + assert_equal 8, s.count("a-z") + assert_equal 4, s.count("a0-9") +end + +assert('String#tr') do + assert_equal "ABC", "abc".tr('a-z', 'A-Z') + assert_equal "hippo", "hello".tr('el', 'ip') + assert_equal "Ruby", "Lisp".tr("Lisp", "Ruby") + assert_equal "*e**o", "hello".tr('^aeiou', '*') + assert_equal "heo", "hello".tr('l', '') +end + +assert('String#tr!') do + s = "abcdefghijklmnopqR" + assert_equal "ab12222hijklmnopqR", s.tr!("cdefg", "12") + assert_equal "ab12222hijklmnopqR", s +end + +assert('String#tr_s') do + assert_equal "hero", "hello".tr_s('l', 'r') + assert_equal "h*o", "hello".tr_s('el', '*') + assert_equal "hhxo", "hello".tr_s('el', 'hx') +end + +assert('String#tr_s!') do + s = "hello" + assert_equal "hero", s.tr_s!('l', 'r') + assert_equal "hero", s + assert_nil s.tr_s!('l', 'r') +end + +assert('String#squeeze') do + assert_equal "yelow mon", "yellow moon".squeeze + assert_equal " now is the", " now is the".squeeze(" ") + assert_equal "puters shot balls", "putters shoot balls".squeeze("m-z") +end + +assert('String#squeeze!') do + s = " now is the" + assert_equal " now is the", s.squeeze!(" ") + assert_equal " now is the", s +end + +assert('String#delete') do + assert_equal "he", "hello".delete("lo") + assert_equal "hll", "hello".delete("aeiou") + assert_equal "ll", "hello".delete("^l") + assert_equal "ho", "hello".delete("ej-m") +end + +assert('String#delete!') do + s = "hello" + assert_equal "he", s.delete!("lo") + assert_equal "he", s + assert_nil s.delete!("lz") end assert('String#start_with?') do @@ -162,10 +233,6 @@ assert('String#oct') do assert_equal (-8), "-10".oct end -assert('String#chr') do - assert_equal "a", "abcde".chr -end - assert('String#lines') do assert_equal ["Hel\n", "lo\n", "World!"], "Hel\nlo\nWorld!".lines assert_equal ["Hel\n", "lo\n", "World!\n"], "Hel\nlo\nWorld!\n".lines @@ -358,8 +425,8 @@ assert('String#succ') do assert_equal "-b-", a a = "-z-"; a.succ! assert_equal "-aa-", a - a = "あa"; a.succ! - assert_equal "あb", a + a = "あb"; a.succ! + assert_equal "あc", a a = "あaz"; a.succ! assert_equal "あba", a end @@ -370,3 +437,279 @@ assert('String#next') do a = "00"; a.next! assert_equal "01", a end + +assert('String#insert') do + assert_equal "Xabcd", "abcd".insert(0, 'X') + assert_equal "abcXd", "abcd".insert(3, 'X') + assert_equal "abcdX", "abcd".insert(4, 'X') + assert_equal "abXcd", "abcd".insert(-3, 'X') + assert_equal "abcdX", "abcd".insert(-1, 'X') + assert_raise(IndexError) { "abcd".insert(5, 'X') } + assert_raise(IndexError) { "abcd".insert(-6, 'X') } + + a = "abcd" + a.insert(0, 'X') + assert_equal "Xabcd", a +end + +assert('String#prepend') do + a = "world" + assert_equal "hello world", a.prepend("hello ") + assert_equal "hello world", a +end + +assert('String#ljust') do + assert_equal "hello", "hello".ljust(4) + assert_equal "hello ", "hello".ljust(20) + assert_equal 20, "hello".ljust(20).length + assert_equal "hello123412341234123", "hello".ljust(20, '1234') + assert_equal "hello", "hello".ljust(-3) +end + +assert('String#rjust') do + assert_equal "hello", "hello".rjust(4) + assert_equal " hello", "hello".rjust(20) + assert_equal 20, "hello".rjust(20).length + assert_equal "123412341234123hello", "hello".rjust(20, '1234') + assert_equal "hello", "hello".rjust(-3) +end + +assert('String#center') do + assert_equal "hello", "hello".center(4) + assert_equal " hello ", "hello".center(20) + assert_equal 20, "hello".center(20).length + assert_equal "1231231hello12312312", "hello".center(20, '123') + assert_equal "hello", "hello".center(-3) +end + +if UTF8STRING + assert('String#ljust with UTF8') do + assert_equal "helloん ", "helloん".ljust(20) + assert_equal "helloó ", "helloó".ljust(34) + assert_equal 34, "helloó".ljust(34).length + assert_equal "helloんんんんんんんんんんんんんん", "hello".ljust(19, 'ん') + assert_equal "helloんんんんんんんんんんんんんんん", "hello".ljust(20, 'ん') + end + + assert('String#rjust with UTF8') do + assert_equal " helloん", "helloん".rjust(20) + assert_equal " helloó", "helloó".rjust(34) + # assert_equal 34, "helloó".rjust(34).length + assert_equal "んんんんんんんんんんんんんんhello", "hello".rjust(19, 'ん') + assert_equal "んんんんんんんんんんんんんんんhello", "hello".rjust(20, 'ん') + end + + assert('UTF8 byte counting') do + ret = ' ' + ret[-6..-1] = "helloó" + assert_equal 34, ret.length + end +end + +assert('String#ljust should not change string') do + a = "hello" + a.ljust(20) + assert_equal "hello", a +end + +assert('String#rjust should not change string') do + a = "hello" + a.rjust(20) + assert_equal "hello", a +end + +assert('String#ljust should raise on zero width padding') do + assert_raise(ArgumentError) { "foo".ljust(10, '') } +end + +assert('String#rjust should raise on zero width padding') do + assert_raise(ArgumentError) { "foo".rjust(10, '') } +end + +assert('String#upto') do + assert_upto %w(a8 a9 b0 b1 b2 b3 b4 b5 b6), "a8", "b6" + assert_upto ["9", "10", "11"], "9", "11" + assert_upto [], "25", "5" + assert_upto ["07", "08", "09", "10", "11"], "07", "11" + assert_upto ["9", ":", ";", "<", "=", ">", "?", "@", "A"], "9", "A" + + if UTF8STRING + assert_upto %w(あ ぃ い ぅ う ぇ え ぉ お), "あ", "お" + end + + a = "aa" + start = "aa" + count = 0 + assert_equal("aa", a.upto("zz") {|s| + assert_equal(start, s) + start.succ! + count += 1 + }) + assert_equal(676, count) + + a = "a" + start = "a" + count = 0 + assert_equal("a", a.upto("a") {|s| + assert_equal(start, s) + start.succ! + count += 1 + }) + assert_equal(1, count) + + a = "a" + start = "a" + count = 0 + assert_equal("a", a.upto("b", true) {|s| + assert_equal(start, s) + start.succ! + count += 1 + }) + assert_equal(1, count) + + a = "0" + start = "0" + count = 0 + assert_equal("0", a.upto("0") {|s| + assert_equal(start, s) + start.succ! + count += 1 + }) + assert_equal(1, count) + + a = "0" + start = "0" + count = 0 + assert_equal("0", a.upto("-1") {|s| + assert_equal(start, s) + start.succ! + count += 1 + }) + assert_equal(0, count) + + a = "-1" + start = "-1" + count = 0 + assert_equal("-1", a.upto("-2") {|s| + assert_equal(start, s) + start.succ! + count += 1 + }) + assert_equal(2, count) + + assert_raise(TypeError) { "a".upto(:c) {} } +end + +assert('String#ord') do + got = "hello!".split('').map {|x| x.ord} + expect = [104, 101, 108, 108, 111, 33] + unless UTF8STRING + got << "\xff".ord + expect << 0xff + end + assert_equal expect, got +end + +assert('String#ord(UTF-8)') do + got = "こんにちは世界!".split('').map {|x| x.ord} + expect = [0x3053,0x3093,0x306b,0x3061,0x306f,0x4e16,0x754c,0x21] + assert_equal expect, got +end if UTF8STRING + +assert('String#chr') do + assert_equal "a", "abcde".chr + assert_equal "h", "hello!".chr + assert_equal "", "".chr +end + +assert('String#chr(UTF-8)') do + assert_equal "こ", "こんにちは世界!".chr +end if UTF8STRING + +assert('String#chars') do + expect = ["h", "e", "l", "l", "o", "!"] + assert_equal expect, "hello!".chars + s = "" + "hello!".chars do |x| + s += x + end + assert_equal "hello!", s +end + +assert('String#chars(UTF-8)') do + expect = ['こ', 'ん', 'に', 'ち', 'は', '世', '界', '!'] + assert_equal expect, "こんにちは世界!".chars + s = "" + "こんにちは世界!".chars do |x| + s += x + end + assert_equal "こんにちは世界!", s +end if UTF8STRING + +assert('String#each_char') do + chars = [] + "hello!".each_char do |x| + chars << x + end + assert_equal ["h", "e", "l", "l", "o", "!"], chars +end + +assert('String#each_char(UTF-8)') do + chars = [] + "こんにちは世界!".each_char do |x| + chars << x + end + assert_equal ["こ", "ん", "に", "ち", "は", "世", "界", "!"], chars +end if UTF8STRING + +assert('String#codepoints') do + expect = [104, 101, 108, 108, 111, 33] + assert_equal expect, "hello!".codepoints + cp = [] + "hello!".codepoints do |x| + cp << x + end + assert_equal expect, cp +end + +assert('String#codepoints(UTF-8)') do + expect = [12371, 12435, 12395, 12385, 12399, 19990, 30028, 33] + assert_equal expect, "こんにちは世界!".codepoints + cp = [] + "こんにちは世界!".codepoints do |x| + cp << x + end + assert_equal expect, cp +end if UTF8STRING + +assert('String#each_codepoint') do + expect = [104, 101, 108, 108, 111, 33] + cp = [] + "hello!".each_codepoint do |x| + cp << x + end + assert_equal expect, cp +end + +assert('String#each_codepoint(UTF-8)') do + expect = [12371, 12435, 12395, 12385, 12399, 19990, 30028, 33] + cp = [] + "こんにちは世界!".each_codepoint do |x| + cp << x + end + assert_equal expect, cp +end if UTF8STRING + +assert('String#delete_prefix') do + assert_equal "llo", "hello".delete_prefix("he") + assert_equal "hello", "hello".delete_prefix("llo") + assert_equal "llo", "hello".delete_prefix!("he") + assert_nil "hello".delete_prefix!("llo") +end + +assert('String#delete_suffix') do + assert_equal "he", "hello".delete_suffix("llo") + assert_equal "hello", "hello".delete_suffix("he") + assert_equal "he", "hello".delete_suffix!("llo") + assert_nil "hello".delete_suffix!("he") +end |
