summaryrefslogtreecommitdiffhomepage
path: root/mrbgems/mruby-string-ext
diff options
context:
space:
mode:
Diffstat (limited to 'mrbgems/mruby-string-ext')
-rw-r--r--mrbgems/mruby-string-ext/mrbgem.rake2
-rw-r--r--mrbgems/mruby-string-ext/mrblib/string.rb295
-rw-r--r--mrbgems/mruby-string-ext/src/string.c1022
-rw-r--r--mrbgems/mruby-string-ext/test/numeric.rb29
-rw-r--r--mrbgems/mruby-string-ext/test/range.rb26
-rw-r--r--mrbgems/mruby-string-ext/test/string.rb453
6 files changed, 1687 insertions, 140 deletions
diff --git a/mrbgems/mruby-string-ext/mrbgem.rake b/mrbgems/mruby-string-ext/mrbgem.rake
index 688589933..f2df5a783 100644
--- a/mrbgems/mruby-string-ext/mrbgem.rake
+++ b/mrbgems/mruby-string-ext/mrbgem.rake
@@ -1,5 +1,5 @@
MRuby::Gem::Specification.new('mruby-string-ext') do |spec|
spec.license = 'MIT'
spec.author = 'mruby developers'
- spec.summary = 'extensional String class'
+ spec.summary = 'String class extension'
end
diff --git a/mrbgems/mruby-string-ext/mrblib/string.rb b/mrbgems/mruby-string-ext/mrblib/string.rb
index 34744cc38..2b3071567 100644
--- a/mrbgems/mruby-string-ext/mrblib/string.rb
+++ b/mrbgems/mruby-string-ext/mrblib/string.rb
@@ -26,7 +26,7 @@ class String
def lstrip
a = 0
z = self.size - 1
- a += 1 while " \f\n\r\t\v".include?(self[a]) and a <= z
+ a += 1 while a <= z and " \f\n\r\t\v".include?(self[a])
(z >= 0) ? self[a..z] : ""
end
@@ -43,7 +43,7 @@ class String
def rstrip
a = 0
z = self.size - 1
- z -= 1 while " \f\n\r\t\v\0".include?(self[z]) and a <= z
+ z -= 1 while a <= z and " \f\n\r\t\v\0".include?(self[z])
(z >= 0) ? self[a..z] : ""
end
@@ -59,8 +59,8 @@ class String
def strip
a = 0
z = self.size - 1
- a += 1 while " \f\n\r\t\v".include?(self[a]) and a <= z
- z -= 1 while " \f\n\r\t\v\0".include?(self[z]) and a <= z
+ a += 1 while a <= z and " \f\n\r\t\v".include?(self[a])
+ z -= 1 while a <= z and " \f\n\r\t\v\0".include?(self[z])
(z >= 0) ? self[a..z] : ""
end
@@ -76,6 +76,7 @@ class String
# "hello".lstrip! #=> nil
#
def lstrip!
+ raise FrozenError, "can't modify frozen String" if frozen?
s = self.lstrip
(s == self) ? nil : self.replace(s)
end
@@ -92,6 +93,7 @@ class String
# "hello".rstrip! #=> nil
#
def rstrip!
+ raise FrozenError, "can't modify frozen String" if frozen?
s = self.rstrip
(s == self) ? nil : self.replace(s)
end
@@ -104,6 +106,7 @@ class String
# <code>nil</code> if <i>str</i> was not altered.
#
def strip!
+ raise FrozenError, "can't modify frozen String" if frozen?
s = self.strip
(s == self) ? nil : self.replace(s)
end
@@ -120,9 +123,22 @@ class String
# "abcdef".casecmp("ABCDEF") #=> 0
#
def casecmp(str)
- self.downcase <=> str.to_str.downcase
+ self.downcase <=> str.__to_str.downcase
rescue NoMethodError
- raise TypeError, "no implicit conversion of #{str.class} into String"
+ nil
+ end
+
+ ##
+ # call-seq:
+ # str.casecmp?(other) -> true, false, or nil
+ #
+ # Returns true if str and other_str are equal after case folding,
+ # false if they are not equal, and nil if other_str is not a string.
+
+ def casecmp?(str)
+ c = self.casecmp(str)
+ return nil if c.nil?
+ return c == 0
end
def partition(sep)
@@ -132,7 +148,7 @@ class String
m = n + sep.size
[ slice(0, n), sep, slice(m, size - m) ]
else
- [ self, "", "" ]
+ [ self[0..-1], "", "" ]
end
end
@@ -164,9 +180,10 @@ class String
# string #=> "thsa sting"
#
def slice!(arg1, arg2=nil)
- raise "wrong number of arguments (for 1..2)" if arg1 == nil && arg2 == nil
+ raise FrozenError, "can't modify frozen String" if frozen?
+ raise "wrong number of arguments (for 1..2)" if arg1.nil? && arg2.nil?
- if arg1 != nil && arg2 != nil
+ if !arg1.nil? && !arg2.nil?
idx = arg1
idx += self.size if arg1 < 0
if idx >= 0 && idx <= self.size && arg2 > 0
@@ -188,7 +205,7 @@ class String
else
idx = arg1
idx += self.size if arg1 < 0
- validated = true if idx >=0 && arg1 < self.size
+ validated = true if idx >=0 && arg1 < self.size
end
if validated
str = self[arg1]
@@ -196,8 +213,8 @@ class String
return nil
end
end
- unless str == nil || str == ""
- if arg1 != nil && arg2 !=nil
+ unless str.nil? || str == ""
+ if !arg1.nil? && !arg2.nil?
idx = arg1 >= 0 ? arg1 : self.size+arg1
str2 = self[0...idx] + self[idx+arg2..-1].to_s
else
@@ -207,14 +224,264 @@ class String
str2 = self[0...idx] + self[idx2+1..-1].to_s
elsif arg1.kind_of?(String)
idx = self.index(arg1)
- str2 = self[0...idx] + self[idx+arg1.size..-1] unless idx == nil
+ str2 = self[0...idx] + self[idx+arg1.size..-1] unless idx.nil?
else
idx = arg1 >= 0 ? arg1 : self.size+arg1
str2 = self[0...idx] + self[idx+1..-1].to_s
end
end
- self.replace(str2) unless str2 == nil
+ self.replace(str2) unless str2.nil?
end
str
end
+
+ ##
+ # call-seq:
+ # str.insert(index, other_str) -> str
+ #
+ # Inserts <i>other_str</i> before the character at the given
+ # <i>index</i>, modifying <i>str</i>. Negative indices count from the
+ # end of the string, and insert <em>after</em> the given character.
+ # The intent is insert <i>aString</i> so that it starts at the given
+ # <i>index</i>.
+ #
+ # "abcd".insert(0, 'X') #=> "Xabcd"
+ # "abcd".insert(3, 'X') #=> "abcXd"
+ # "abcd".insert(4, 'X') #=> "abcdX"
+ # "abcd".insert(-3, 'X') #=> "abXcd"
+ # "abcd".insert(-1, 'X') #=> "abcdX"
+ #
+ def insert(idx, str)
+ if idx == -1
+ return self << str
+ elsif idx < 0
+ idx += 1
+ end
+ self[idx, 0] = str
+ self
+ end
+
+ ##
+ # call-seq:
+ # str.ljust(integer, padstr=' ') -> new_str
+ #
+ # If <i>integer</i> is greater than the length of <i>str</i>, returns a new
+ # <code>String</code> of length <i>integer</i> with <i>str</i> left justified
+ # and padded with <i>padstr</i>; otherwise, returns <i>str</i>.
+ #
+ # "hello".ljust(4) #=> "hello"
+ # "hello".ljust(20) #=> "hello "
+ # "hello".ljust(20, '1234') #=> "hello123412341234123"
+ def ljust(idx, padstr = ' ')
+ raise ArgumentError, 'zero width padding' if padstr == ''
+ return self if idx <= self.size
+ pad_repetitions = idx / padstr.size
+ padding = (padstr * pad_repetitions)[0, idx-self.size]
+ self + padding
+ end
+
+ ##
+ # call-seq:
+ # str.rjust(integer, padstr=' ') -> new_str
+ #
+ # If <i>integer</i> is greater than the length of <i>str</i>, returns a new
+ # <code>String</code> of length <i>integer</i> with <i>str</i> right justified
+ # and padded with <i>padstr</i>; otherwise, returns <i>str</i>.
+ #
+ # "hello".rjust(4) #=> "hello"
+ # "hello".rjust(20) #=> " hello"
+ # "hello".rjust(20, '1234') #=> "123412341234123hello"
+ def rjust(idx, padstr = ' ')
+ raise ArgumentError, 'zero width padding' if padstr == ''
+ return self if idx <= self.size
+ pad_repetitions = idx / padstr.size
+ padding = (padstr * pad_repetitions)[0, idx-self.size]
+ padding + self
+ end
+
+ ##
+ # call-seq:
+ # str.center(width, padstr=' ') -> new_str
+ #
+ # Centers +str+ in +width+. If +width+ is greater than the length of +str+,
+ # returns a new String of length +width+ with +str+ centered and padded with
+ # +padstr+; otherwise, returns +str+.
+ #
+ # "hello".center(4) #=> "hello"
+ # "hello".center(20) #=> " hello "
+ # "hello".center(20, '123') #=> "1231231hello12312312"
+ def center(width, padstr = ' ')
+ raise ArgumentError, 'zero width padding' if padstr == ''
+ return self if width <= self.size
+ width -= self.size
+ pad1 = width / 2
+ pad2 = width - pad1
+ (padstr*pad1)[0,pad1] + self + (padstr*pad2)[0,pad2]
+ end
+
+ def chars(&block)
+ if block_given?
+ self.split('').each do |i|
+ block.call(i)
+ end
+ self
+ else
+ self.split('')
+ end
+ end
+
+ ##
+ # Call the given block for each character of
+ # +self+.
+ def each_char(&block)
+ return to_enum :each_char unless block
+ pos = 0
+ while pos < self.size
+ block.call(self[pos])
+ pos += 1
+ end
+ self
+ end
+
+ def codepoints(&block)
+ if block_given?
+ self.split('').each do|x|
+ block.call(x.ord)
+ end
+ self
+ else
+ self.split('').map{|x| x.ord}
+ end
+ end
+ alias each_codepoint codepoints
+
+ ##
+ # call-seq:
+ # str.prepend(other_str) -> str
+ #
+ # Prepend---Prepend the given string to <i>str</i>.
+ #
+ # a = "world"
+ # a.prepend("hello ") #=> "hello world"
+ # a #=> "hello world"
+ def prepend(arg)
+ self[0, 0] = arg
+ self
+ end
+
+ ##
+ # call-seq:
+ # string.lines -> array of string
+ # string.lines {|s| block} -> array of string
+ #
+ # Returns strings per line;
+ #
+ # a = "abc\ndef"
+ # a.lines #=> ["abc\n", "def"]
+ #
+ # If a block is given, it works the same as <code>each_line</code>.
+ def lines(&blk)
+ lines = self.__lines
+ if blk
+ lines.each do |line|
+ blk.call(line)
+ end
+ end
+ lines
+ end
+
+ ##
+ # call-seq:
+ # str.upto(other_str, exclusive=false) {|s| block } -> str
+ # str.upto(other_str, exclusive=false) -> an_enumerator
+ #
+ # Iterates through successive values, starting at <i>str</i> and
+ # ending at <i>other_str</i> inclusive, passing each value in turn to
+ # the block. The <code>String#succ</code> method is used to generate
+ # each value. If optional second argument exclusive is omitted or is false,
+ # the last value will be included; otherwise it will be excluded.
+ #
+ # If no block is given, an enumerator is returned instead.
+ #
+ # "a8".upto("b6") {|s| print s, ' ' }
+ # for s in "a8".."b6"
+ # print s, ' '
+ # end
+ #
+ # <em>produces:</em>
+ #
+ # a8 a9 b0 b1 b2 b3 b4 b5 b6
+ # a8 a9 b0 b1 b2 b3 b4 b5 b6
+ #
+ # If <i>str</i> and <i>other_str</i> contains only ascii numeric characters,
+ # both are recognized as decimal numbers. In addition, the width of
+ # string (e.g. leading zeros) is handled appropriately.
+ #
+ # "9".upto("11").to_a #=> ["9", "10", "11"]
+ # "25".upto("5").to_a #=> []
+ # "07".upto("11").to_a #=> ["07", "08", "09", "10", "11"]
+ def upto(max, exclusive=false, &block)
+ return to_enum(:upto, max, exclusive) unless block
+ raise TypeError, "no implicit conversion of #{max.class} into String" unless max.kind_of? String
+
+ len = self.length
+ maxlen = max.length
+ # single character
+ if len == 1 and maxlen == 1
+ c = self.ord
+ e = max.ord
+ while c <= e
+ break if exclusive and c == e
+ yield c.chr(__ENCODING__)
+ c += 1
+ end
+ return self
+ end
+ # both edges are all digits
+ bi = self.to_i(10)
+ ei = max.to_i(10)
+ if (bi > 0 or bi == "0"*len) and (ei > 0 or ei == "0"*maxlen)
+ while bi <= ei
+ break if exclusive and bi == ei
+ s = bi.to_s
+ s = s.rjust(len, "0") if s.length < len
+ yield s
+ bi += 1
+ end
+ return self
+ end
+ bs = self
+ while true
+ n = (bs <=> max)
+ break if n > 0
+ break if exclusive and n == 0
+ yield bs
+ break if n == 0
+ bsiz = bs.size
+ break if bsiz > max.size || bsiz == 0
+ bs = bs.succ
+ end
+ self
+ end
+
+ def __upto_endless(&block)
+ len = self.length
+ # both edges are all digits
+ bi = self.to_i(10)
+ if bi > 0 or bi == "0"*len
+ while true
+ s = bi.to_s
+ s = s.rjust(len, "0") if s.length < len
+ yield s
+ bi += 1
+ end
+ return self
+ end
+ bs = self
+ while true
+ yield bs
+ bs = bs.succ
+ end
+ self
+ end
end
diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c
index 9db3589c7..158cb5193 100644
--- a/mrbgems/mruby-string-ext/src/string.c
+++ b/mrbgems/mruby-string-ext/src/string.c
@@ -1,23 +1,94 @@
-#include <ctype.h>
#include <string.h>
-#include "mruby.h"
-#include "mruby/array.h"
-#include "mruby/class.h"
-#include "mruby/string.h"
+#include <mruby.h>
+#include <mruby/array.h>
+#include <mruby/class.h>
+#include <mruby/string.h>
+#include <mruby/range.h>
+
+#define ENC_ASCII_8BIT "ASCII-8BIT"
+#define ENC_BINARY "BINARY"
+#define ENC_UTF8 "UTF-8"
+
+#define ENC_COMP_P(enc, enc_lit) \
+ str_casecmp_p(RSTRING_PTR(enc), RSTRING_LEN(enc), enc_lit, sizeof(enc_lit"")-1)
+
+#ifdef MRB_NO_FLOAT
+# define mrb_float_p(o) FALSE
+#endif
+
+static mrb_bool
+str_casecmp_p(const char *s1, mrb_int len1, const char *s2, mrb_int len2)
+{
+ const char *e1, *e2;
+
+ if (len1 != len2) return FALSE;
+ e1 = s1 + len1;
+ e2 = s2 + len2;
+ while (s1 < e1 && s2 < e2) {
+ if (*s1 != *s2 && TOUPPER(*s1) != TOUPPER(*s2)) return FALSE;
+ ++s1;
+ ++s2;
+ }
+ return TRUE;
+}
static mrb_value
-mrb_str_getbyte(mrb_state *mrb, mrb_value str)
+int_chr_binary(mrb_state *mrb, mrb_value num)
{
- mrb_int pos;
- mrb_get_args(mrb, "i", &pos);
+ mrb_int cp = mrb_as_int(mrb, num);
+ char c;
+ mrb_value str;
- if (pos < 0)
- pos += RSTRING_LEN(str);
- if (pos < 0 || RSTRING_LEN(str) <= pos)
- return mrb_nil_value();
+ if (cp < 0 || 0xff < cp) {
+ mrb_raisef(mrb, E_RANGE_ERROR, "%v out of char range", num);
+ }
+ c = (char)cp;
+ str = mrb_str_new(mrb, &c, 1);
+ RSTR_SET_ASCII_FLAG(mrb_str_ptr(str));
+ return str;
+}
+
+#ifdef MRB_UTF8_STRING
+static mrb_value
+int_chr_utf8(mrb_state *mrb, mrb_value num)
+{
+ mrb_int cp = mrb_int(mrb, num);
+ char utf8[4];
+ mrb_int len;
+ mrb_value str;
+ uint32_t ascii_flag = 0;
- return mrb_fixnum_value((unsigned char)RSTRING_PTR(str)[pos]);
+ if (cp < 0 || 0x10FFFF < cp) {
+ mrb_raisef(mrb, E_RANGE_ERROR, "%v out of char range", num);
+ }
+ if (cp < 0x80) {
+ utf8[0] = (char)cp;
+ len = 1;
+ ascii_flag = MRB_STR_ASCII;
+ }
+ else if (cp < 0x800) {
+ utf8[0] = (char)(0xC0 | (cp >> 6));
+ utf8[1] = (char)(0x80 | (cp & 0x3F));
+ len = 2;
+ }
+ else if (cp < 0x10000) {
+ utf8[0] = (char)(0xE0 | (cp >> 12));
+ utf8[1] = (char)(0x80 | ((cp >> 6) & 0x3F));
+ utf8[2] = (char)(0x80 | ( cp & 0x3F));
+ len = 3;
+ }
+ else {
+ utf8[0] = (char)(0xF0 | (cp >> 18));
+ utf8[1] = (char)(0x80 | ((cp >> 12) & 0x3F));
+ utf8[2] = (char)(0x80 | ((cp >> 6) & 0x3F));
+ utf8[3] = (char)(0x80 | ( cp & 0x3F));
+ len = 4;
+ }
+ str = mrb_str_new(mrb, utf8, len);
+ mrb_str_ptr(str)->flags |= ascii_flag;
+ return str;
}
+#endif
/*
* call-seq:
@@ -83,18 +154,27 @@ mrb_str_swapcase(mrb_state *mrb, mrb_value self)
*
* Append---Concatenates the given object to <i>str</i>. If the object is a
* <code>Integer</code>, it is considered as a codepoint, and is converted
- * to a character before concatenation.
+ * to a character before concatenation
+ * (equivalent to <code>str.concat(integer.chr(__ENCODING__))</code>).
*
* a = "hello "
* a << "world" #=> "hello world"
* a.concat(33) #=> "hello world!"
*/
static mrb_value
-mrb_str_concat2(mrb_state *mrb, mrb_value self)
+mrb_str_concat_m(mrb_state *mrb, mrb_value self)
{
- mrb_value str;
- mrb_get_args(mrb, "S", &str);
- mrb_str_concat(mrb, self, str);
+ mrb_value str = mrb_get_arg1(mrb);
+
+ if (mrb_integer_p(str) || mrb_float_p(str))
+#ifdef MRB_UTF8_STRING
+ str = int_chr_utf8(mrb, str);
+#else
+ str = int_chr_binary(mrb, str);
+#endif
+ else
+ mrb_ensure_string_type(mrb, str);
+ mrb_str_cat_str(mrb, self, str);
return self;
}
@@ -114,14 +194,15 @@ mrb_str_concat2(mrb_state *mrb, mrb_value self)
static mrb_value
mrb_str_start_with(mrb_state *mrb, mrb_value self)
{
- mrb_value *argv, sub;
+ const mrb_value *argv;
+ mrb_value sub;
mrb_int argc, i;
mrb_get_args(mrb, "*", &argv, &argc);
for (i = 0; i < argc; i++) {
size_t len_l, len_r;
int ai = mrb_gc_arena_save(mrb);
- sub = mrb_string_type(mrb, argv[i]);
+ sub = mrb_ensure_string_type(mrb, argv[i]);
mrb_gc_arena_restore(mrb, ai);
len_l = RSTRING_LEN(self);
len_r = RSTRING_LEN(sub);
@@ -143,14 +224,15 @@ mrb_str_start_with(mrb_state *mrb, mrb_value self)
static mrb_value
mrb_str_end_with(mrb_state *mrb, mrb_value self)
{
- mrb_value *argv, sub;
+ const mrb_value *argv;
+ mrb_value sub;
mrb_int argc, i;
mrb_get_args(mrb, "*", &argv, &argc);
for (i = 0; i < argc; i++) {
size_t len_l, len_r;
int ai = mrb_gc_arena_save(mrb);
- sub = mrb_string_type(mrb, argv[i]);
+ sub = mrb_ensure_string_type(mrb, argv[i]);
mrb_gc_arena_restore(mrb, ai);
len_l = RSTRING_LEN(self);
len_r = RSTRING_LEN(sub);
@@ -165,6 +247,591 @@ mrb_str_end_with(mrb_state *mrb, mrb_value self)
return mrb_false_value();
}
+enum tr_pattern_type {
+ TR_UNINITIALIZED = 0,
+ TR_IN_ORDER = 1,
+ TR_RANGE = 2,
+};
+
+/*
+ #tr Pattern syntax
+
+ <syntax> ::= (<pattern>)* | '^' (<pattern>)*
+ <pattern> ::= <in order> | <range>
+ <in order> ::= (<ch>)+
+ <range> ::= <ch> '-' <ch>
+*/
+struct tr_pattern {
+ uint8_t type; // 1:in-order, 2:range
+ mrb_bool flag_reverse : 1;
+ mrb_bool flag_on_heap : 1;
+ uint16_t n;
+ union {
+ uint16_t start_pos;
+ char ch[2];
+ } val;
+ struct tr_pattern *next;
+};
+
+#define STATIC_TR_PATTERN { 0 }
+
+static inline void
+tr_free_pattern(mrb_state *mrb, struct tr_pattern *pat)
+{
+ while (pat) {
+ struct tr_pattern *p = pat->next;
+ if (pat->flag_on_heap) {
+ mrb_free(mrb, pat);
+ }
+ pat = p;
+ }
+}
+
+static struct tr_pattern*
+tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_pattern, mrb_bool flag_reverse_enable)
+{
+ const char *pattern = RSTRING_PTR(v_pattern);
+ mrb_int pattern_length = RSTRING_LEN(v_pattern);
+ mrb_bool flag_reverse = FALSE;
+ struct tr_pattern *pat1;
+ mrb_int i = 0;
+
+ if(flag_reverse_enable && pattern_length >= 2 && pattern[0] == '^') {
+ flag_reverse = TRUE;
+ i++;
+ }
+
+ while (i < pattern_length) {
+ /* is range pattern ? */
+ mrb_bool const ret_uninit = (ret->type == TR_UNINITIALIZED);
+ pat1 = ret_uninit
+ ? ret
+ : (struct tr_pattern*)mrb_malloc_simple(mrb, sizeof(struct tr_pattern));
+ if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-') {
+ if (pat1 == NULL && ret) {
+ nomem:
+ tr_free_pattern(mrb, ret);
+ mrb_exc_raise(mrb, mrb_obj_value(mrb->nomem_err));
+ return NULL; /* not reached */
+ }
+ pat1->type = TR_RANGE;
+ pat1->flag_reverse = flag_reverse;
+ pat1->flag_on_heap = !ret_uninit;
+ pat1->n = pattern[i+2] - pattern[i] + 1;
+ pat1->next = NULL;
+ pat1->val.ch[0] = pattern[i];
+ pat1->val.ch[1] = pattern[i+2];
+ i += 3;
+ }
+ else {
+ /* in order pattern. */
+ mrb_int start_pos = i++;
+ mrb_int len;
+
+ while (i < pattern_length) {
+ if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-')
+ break;
+ i++;
+ }
+
+ len = i - start_pos;
+ if (len > UINT16_MAX) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "tr pattern too long (max 65535)");
+ }
+ if (pat1 == NULL && ret) {
+ goto nomem;
+ }
+ pat1->type = TR_IN_ORDER;
+ pat1->flag_reverse = flag_reverse;
+ pat1->flag_on_heap = !ret_uninit;
+ pat1->n = (uint16_t)len;
+ pat1->next = NULL;
+ pat1->val.start_pos = (uint16_t)start_pos;
+ }
+
+ if (ret == NULL || ret_uninit) {
+ ret = pat1;
+ }
+ else {
+ struct tr_pattern *p = ret;
+ while (p->next != NULL) {
+ p = p->next;
+ }
+ p->next = pat1;
+ }
+ }
+
+ return ret;
+}
+
+static inline mrb_int
+tr_find_character(const struct tr_pattern *pat, const char *pat_str, int ch)
+{
+ mrb_int ret = -1;
+ mrb_int n_sum = 0;
+ mrb_int flag_reverse = pat ? pat->flag_reverse : 0;
+
+ while (pat != NULL) {
+ if (pat->type == TR_IN_ORDER) {
+ int i;
+ for (i = 0; i < pat->n; i++) {
+ if (pat_str[pat->val.start_pos + i] == ch) ret = n_sum + i;
+ }
+ }
+ else if (pat->type == TR_RANGE) {
+ if (pat->val.ch[0] <= ch && ch <= pat->val.ch[1])
+ ret = n_sum + ch - pat->val.ch[0];
+ }
+ else {
+ mrb_assert(pat->type == TR_UNINITIALIZED);
+ }
+ n_sum += pat->n;
+ pat = pat->next;
+ }
+
+ if (flag_reverse) {
+ return (ret < 0) ? MRB_INT_MAX : -1;
+ }
+ return ret;
+}
+
+static inline mrb_int
+tr_get_character(const struct tr_pattern *pat, const char *pat_str, mrb_int n_th)
+{
+ mrb_int n_sum = 0;
+
+ while (pat != NULL) {
+ if (n_th < (n_sum + pat->n)) {
+ mrb_int i = (n_th - n_sum);
+
+ switch (pat->type) {
+ case TR_IN_ORDER:
+ return pat_str[pat->val.start_pos + i];
+ case TR_RANGE:
+ return pat->val.ch[0]+i;
+ case TR_UNINITIALIZED:
+ return -1;
+ }
+ }
+ if (pat->next == NULL) {
+ switch (pat->type) {
+ case TR_IN_ORDER:
+ return pat_str[pat->val.start_pos + pat->n - 1];
+ case TR_RANGE:
+ return pat->val.ch[1];
+ case TR_UNINITIALIZED:
+ return -1;
+ }
+ }
+ n_sum += pat->n;
+ pat = pat->next;
+ }
+
+ return -1;
+}
+
+static inline void
+tr_bitmap_set(uint8_t bitmap[32], uint8_t ch)
+{
+ uint8_t idx1 = ch / 8;
+ uint8_t idx2 = ch % 8;
+ bitmap[idx1] |= (1<<idx2);
+}
+
+static inline mrb_bool
+tr_bitmap_detect(uint8_t bitmap[32], uint8_t ch)
+{
+ uint8_t idx1 = ch / 8;
+ uint8_t idx2 = ch % 8;
+ if (bitmap[idx1] & (1<<idx2))
+ return TRUE;
+ return FALSE;
+}
+
+/* compile patter to bitmap */
+static void
+tr_compile_pattern(const struct tr_pattern *pat, mrb_value pstr, uint8_t bitmap[32])
+{
+ const char *pattern = RSTRING_PTR(pstr);
+ mrb_int flag_reverse = pat ? pat->flag_reverse : 0;
+ int i;
+
+ for (i=0; i<32; i++) {
+ bitmap[i] = 0;
+ }
+ while (pat != NULL) {
+ if (pat->type == TR_IN_ORDER) {
+ for (i = 0; i < pat->n; i++) {
+ tr_bitmap_set(bitmap, pattern[pat->val.start_pos + i]);
+ }
+ }
+ else if (pat->type == TR_RANGE) {
+ for (i = pat->val.ch[0]; i < pat->val.ch[1]; i++) {
+ tr_bitmap_set(bitmap, i);
+ }
+ }
+ else {
+ mrb_assert(pat->type == TR_UNINITIALIZED);
+ }
+ pat = pat->next;
+ }
+
+ if (flag_reverse) {
+ for (i=0; i<32; i++) {
+ bitmap[i] ^= 0xff;
+ }
+ }
+}
+
+static mrb_bool
+str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squeeze)
+{
+ struct tr_pattern pat = STATIC_TR_PATTERN;
+ struct tr_pattern rep_storage = STATIC_TR_PATTERN;
+ char *s;
+ mrb_int len;
+ mrb_int i;
+ mrb_int j;
+ mrb_bool flag_changed = FALSE;
+ mrb_int lastch = -1;
+ struct tr_pattern *rep;
+
+ mrb_str_modify(mrb, mrb_str_ptr(str));
+ tr_parse_pattern(mrb, &pat, p1, TRUE);
+ rep = tr_parse_pattern(mrb, &rep_storage, p2, FALSE);
+ s = RSTRING_PTR(str);
+ len = RSTRING_LEN(str);
+
+ for (i=j=0; i<len; i++,j++) {
+ mrb_int n = tr_find_character(&pat, RSTRING_PTR(p1), s[i]);
+
+ if (i>j) s[j] = s[i];
+ if (n >= 0) {
+ flag_changed = TRUE;
+ if (rep == NULL) {
+ j--;
+ }
+ else {
+ mrb_int c = tr_get_character(rep, RSTRING_PTR(p2), n);
+
+ if (c < 0 || (squeeze && c == lastch)) {
+ j--;
+ continue;
+ }
+ if (c > 0x80) {
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "character (%i) out of range", c);
+ }
+ lastch = c;
+ s[i] = (char)c;
+ }
+ }
+ }
+
+ tr_free_pattern(mrb, &pat);
+ tr_free_pattern(mrb, rep);
+
+ if (flag_changed) {
+ RSTR_SET_LEN(RSTRING(str), j);
+ RSTRING_PTR(str)[j] = 0;
+ }
+ return flag_changed;
+}
+
+/*
+ * call-seq:
+ * str.tr(from_str, to_str) => new_str
+ *
+ * Returns a copy of str with the characters in from_str replaced by the
+ * corresponding characters in to_str. If to_str is shorter than from_str,
+ * it is padded with its last character in order to maintain the
+ * correspondence.
+ *
+ * "hello".tr('el', 'ip') #=> "hippo"
+ * "hello".tr('aeiou', '*') #=> "h*ll*"
+ * "hello".tr('aeiou', 'AA*') #=> "hAll*"
+ *
+ * Both strings may use the c1-c2 notation to denote ranges of characters,
+ * and from_str may start with a ^, which denotes all characters except
+ * those listed.
+ *
+ * "hello".tr('a-y', 'b-z') #=> "ifmmp"
+ * "hello".tr('^aeiou', '*') #=> "*e**o"
+ *
+ * The backslash character \ can be used to escape ^ or - and is otherwise
+ * ignored unless it appears at the end of a range or the end of the
+ * from_str or to_str:
+ *
+ *
+ * "hello^world".tr("\\^aeiou", "*") #=> "h*ll**w*rld"
+ * "hello-world".tr("a\\-eo", "*") #=> "h*ll**w*rld"
+ *
+ * "hello\r\nworld".tr("\r", "") #=> "hello\nworld"
+ * "hello\r\nworld".tr("\\r", "") #=> "hello\r\nwold"
+ * "hello\r\nworld".tr("\\\r", "") #=> "hello\nworld"
+ *
+ * "X['\\b']".tr("X\\", "") #=> "['b']"
+ * "X['\\b']".tr("X-\\]", "") #=> "'b'"
+ *
+ * Note: conversion is effective only in ASCII region.
+ */
+static mrb_value
+mrb_str_tr(mrb_state *mrb, mrb_value str)
+{
+ mrb_value dup;
+ mrb_value p1, p2;
+
+ mrb_get_args(mrb, "SS", &p1, &p2);
+ dup = mrb_str_dup(mrb, str);
+ str_tr(mrb, dup, p1, p2, FALSE);
+ return dup;
+}
+
+/*
+ * call-seq:
+ * str.tr!(from_str, to_str) -> str or nil
+ *
+ * Translates str in place, using the same rules as String#tr.
+ * Returns str, or nil if no changes were made.
+ */
+static mrb_value
+mrb_str_tr_bang(mrb_state *mrb, mrb_value str)
+{
+ mrb_value p1, p2;
+
+ mrb_get_args(mrb, "SS", &p1, &p2);
+ if (str_tr(mrb, str, p1, p2, FALSE)) {
+ return str;
+ }
+ return mrb_nil_value();
+}
+
+/*
+ * call-seq:
+ * str.tr_s(from_str, to_str) -> new_str
+ *
+ * Processes a copy of str as described under String#tr, then removes
+ * duplicate characters in regions that were affected by the translation.
+ *
+ * "hello".tr_s('l', 'r') #=> "hero"
+ * "hello".tr_s('el', '*') #=> "h*o"
+ * "hello".tr_s('el', 'hx') #=> "hhxo"
+ */
+static mrb_value
+mrb_str_tr_s(mrb_state *mrb, mrb_value str)
+{
+ mrb_value dup;
+ mrb_value p1, p2;
+
+ mrb_get_args(mrb, "SS", &p1, &p2);
+ dup = mrb_str_dup(mrb, str);
+ str_tr(mrb, dup, p1, p2, TRUE);
+ return dup;
+}
+
+/*
+ * call-seq:
+ * str.tr_s!(from_str, to_str) -> str or nil
+ *
+ * Performs String#tr_s processing on str in place, returning
+ * str, or nil if no changes were made.
+ */
+static mrb_value
+mrb_str_tr_s_bang(mrb_state *mrb, mrb_value str)
+{
+ mrb_value p1, p2;
+
+ mrb_get_args(mrb, "SS", &p1, &p2);
+ if (str_tr(mrb, str, p1, p2, TRUE)) {
+ return str;
+ }
+ return mrb_nil_value();
+}
+
+static mrb_bool
+str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat)
+{
+ struct tr_pattern pat_storage = STATIC_TR_PATTERN;
+ struct tr_pattern *pat = NULL;
+ mrb_int i, j;
+ char *s;
+ mrb_int len;
+ mrb_bool flag_changed = FALSE;
+ mrb_int lastch = -1;
+ uint8_t bitmap[32];
+
+ mrb_str_modify(mrb, mrb_str_ptr(str));
+ if (!mrb_nil_p(v_pat)) {
+ pat = tr_parse_pattern(mrb, &pat_storage, v_pat, TRUE);
+ tr_compile_pattern(pat, v_pat, bitmap);
+ tr_free_pattern(mrb, pat);
+ }
+ s = RSTRING_PTR(str);
+ len = RSTRING_LEN(str);
+
+ if (pat) {
+ for (i=j=0; i<len; i++,j++) {
+ if (i>j) s[j] = s[i];
+ if (tr_bitmap_detect(bitmap, s[i]) && s[i] == lastch) {
+ flag_changed = TRUE;
+ j--;
+ }
+ lastch = s[i];
+ }
+ }
+ else {
+ for (i=j=0; i<len; i++,j++) {
+ if (i>j) s[j] = s[i];
+ if (s[i] >= 0 && s[i] == lastch) {
+ flag_changed = TRUE;
+ j--;
+ }
+ lastch = s[i];
+ }
+ }
+
+ if (flag_changed) {
+ RSTR_SET_LEN(RSTRING(str), j);
+ RSTRING_PTR(str)[j] = 0;
+ }
+ return flag_changed;
+}
+
+/*
+ * call-seq:
+ * str.squeeze([other_str]) -> new_str
+ *
+ * Builds a set of characters from the other_str
+ * parameter(s) using the procedure described for String#count. Returns a
+ * new string where runs of the same character that occur in this set are
+ * replaced by a single character. If no arguments are given, all runs of
+ * identical characters are replaced by a single character.
+ *
+ * "yellow moon".squeeze #=> "yelow mon"
+ * " now is the".squeeze(" ") #=> " now is the"
+ * "putters shoot balls".squeeze("m-z") #=> "puters shot balls"
+ */
+static mrb_value
+mrb_str_squeeze(mrb_state *mrb, mrb_value str)
+{
+ mrb_value pat = mrb_nil_value();
+ mrb_value dup;
+
+ mrb_get_args(mrb, "|S", &pat);
+ dup = mrb_str_dup(mrb, str);
+ str_squeeze(mrb, dup, pat);
+ return dup;
+}
+
+/*
+ * call-seq:
+ * str.squeeze!([other_str]) -> str or nil
+ *
+ * Squeezes str in place, returning either str, or nil if no
+ * changes were made.
+ */
+static mrb_value
+mrb_str_squeeze_bang(mrb_state *mrb, mrb_value str)
+{
+ mrb_value pat = mrb_nil_value();
+
+ mrb_get_args(mrb, "|S", &pat);
+ if (str_squeeze(mrb, str, pat)) {
+ return str;
+ }
+ return mrb_nil_value();
+}
+
+static mrb_bool
+str_delete(mrb_state *mrb, mrb_value str, mrb_value v_pat)
+{
+ struct tr_pattern pat = STATIC_TR_PATTERN;
+ mrb_int i, j;
+ char *s;
+ mrb_int len;
+ mrb_bool flag_changed = FALSE;
+ uint8_t bitmap[32];
+
+ mrb_str_modify(mrb, mrb_str_ptr(str));
+ tr_parse_pattern(mrb, &pat, v_pat, TRUE);
+ tr_compile_pattern(&pat, v_pat, bitmap);
+ tr_free_pattern(mrb, &pat);
+
+ s = RSTRING_PTR(str);
+ len = RSTRING_LEN(str);
+
+ for (i=j=0; i<len; i++,j++) {
+ if (i>j) s[j] = s[i];
+ if (tr_bitmap_detect(bitmap, s[i])) {
+ flag_changed = TRUE;
+ j--;
+ }
+ }
+ if (flag_changed) {
+ RSTR_SET_LEN(RSTRING(str), j);
+ RSTRING_PTR(str)[j] = 0;
+ }
+ return flag_changed;
+}
+
+static mrb_value
+mrb_str_delete(mrb_state *mrb, mrb_value str)
+{
+ mrb_value pat;
+ mrb_value dup;
+
+ mrb_get_args(mrb, "S", &pat);
+ dup = mrb_str_dup(mrb, str);
+ str_delete(mrb, dup, pat);
+ return dup;
+}
+
+static mrb_value
+mrb_str_delete_bang(mrb_state *mrb, mrb_value str)
+{
+ mrb_value pat;
+
+ mrb_get_args(mrb, "S", &pat);
+ if (str_delete(mrb, str, pat)) {
+ return str;
+ }
+ return mrb_nil_value();
+}
+
+/*
+ * call_seq:
+ * str.count([other_str]) -> integer
+ *
+ * Each other_str parameter defines a set of characters to count. The
+ * intersection of these sets defines the characters to count in str. Any
+ * other_str that starts with a caret ^ is negated. The sequence c1-c2
+ * means all characters between c1 and c2. The backslash character \ can
+ * be used to escape ^ or - and is otherwise ignored unless it appears at
+ * the end of a sequence or the end of a other_str.
+ */
+static mrb_value
+mrb_str_count(mrb_state *mrb, mrb_value str)
+{
+ mrb_value v_pat = mrb_nil_value();
+ mrb_int i;
+ char *s;
+ mrb_int len;
+ mrb_int count = 0;
+ struct tr_pattern pat = STATIC_TR_PATTERN;
+ uint8_t bitmap[32];
+
+ mrb_get_args(mrb, "S", &v_pat);
+ tr_parse_pattern(mrb, &pat, v_pat, TRUE);
+ tr_compile_pattern(&pat, v_pat, bitmap);
+ tr_free_pattern(mrb, &pat);
+
+ s = RSTRING_PTR(str);
+ len = RSTRING_LEN(str);
+ for (i = 0; i < len; i++) {
+ if (tr_bitmap_detect(bitmap, s[i])) count++;
+ }
+ return mrb_fixnum_value(count);
+}
+
static mrb_value
mrb_str_hex(mrb_state *mrb, mrb_value self)
{
@@ -194,49 +861,40 @@ mrb_str_chr(mrb_state *mrb, mrb_value self)
/*
* call-seq:
- * string.lines -> array of string
+ * int.chr([encoding]) -> string
*
- * Returns strings per line;
+ * Returns a string containing the character represented by the +int+'s value
+ * according to +encoding+. +"ASCII-8BIT"+ (+"BINARY"+) and +"UTF-8"+ (only
+ * with +MRB_UTF8_STRING+) can be specified as +encoding+ (default is
+ * +"ASCII-8BIT"+).
*
- * a = "abc\ndef"
- * a.lines #=> ["abc\n", "def"]
+ * 65.chr #=> "A"
+ * 230.chr #=> "\xE6"
+ * 230.chr("ASCII-8BIT") #=> "\xE6"
+ * 230.chr("UTF-8") #=> "\u00E6"
*/
static mrb_value
-mrb_str_lines(mrb_state *mrb, mrb_value self)
+mrb_int_chr(mrb_state *mrb, mrb_value num)
{
- mrb_value result;
- mrb_value blk;
- int ai;
- mrb_int len;
- mrb_value arg;
- char *p = RSTRING_PTR(self), *t;
- char *e = p + RSTRING_LEN(self);
-
- mrb_get_args(mrb, "&", &blk);
+ mrb_value enc;
+ mrb_bool enc_given;
- result = mrb_ary_new(mrb);
-
- if (!mrb_nil_p(blk)) {
- while (p < e) {
- t = p;
- while (p < e && *p != '\n') p++;
- if (*p == '\n') p++;
- len = (mrb_int) (p - t);
- arg = mrb_str_new(mrb, t, len);
- mrb_yield_argv(mrb, blk, 1, &arg);
- }
- return self;
+ mrb_get_args(mrb, "|S?", &enc, &enc_given);
+ if (!enc_given ||
+ ENC_COMP_P(enc, ENC_ASCII_8BIT) ||
+ ENC_COMP_P(enc, ENC_BINARY)) {
+ return int_chr_binary(mrb, num);
}
- while (p < e) {
- ai = mrb_gc_arena_save(mrb);
- t = p;
- while (p < e && *p != '\n') p++;
- if (*p == '\n') p++;
- len = (mrb_int) (p - t);
- mrb_ary_push(mrb, result, mrb_str_new(mrb, t, len));
- mrb_gc_arena_restore(mrb, ai);
+#ifdef MRB_UTF8_STRING
+ else if (ENC_COMP_P(enc, ENC_UTF8)) {
+ return int_chr_utf8(mrb, num);
}
- return result;
+#endif
+ else {
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "unknown encoding name - %v", enc);
+ }
+ /* not reached */
+ return mrb_nil_value();
}
/*
@@ -253,9 +911,9 @@ mrb_str_succ_bang(mrb_state *mrb, mrb_value self)
{
mrb_value result;
unsigned char *p, *e, *b, *t;
- char *prepend;
+ const char *prepend;
struct RString *s = mrb_str_ptr(self);
- size_t l;
+ mrb_int l;
if (RSTRING_LEN(self) == 0)
return self;
@@ -275,7 +933,8 @@ mrb_str_succ_bang(mrb_state *mrb, mrb_value self)
if (e < b) {
e = p + l - 1;
result = mrb_str_new_lit(mrb, "");
- } else {
+ }
+ else {
// find leading letter of the ascii/number
b = e;
while (b > p) {
@@ -293,7 +952,8 @@ mrb_str_succ_bang(mrb_state *mrb, mrb_value self)
if (*e == 0xff) {
mrb_str_cat_lit(mrb, result, "\x01");
(*e) = 0;
- } else
+ }
+ else
(*e)++;
break;
}
@@ -301,13 +961,16 @@ mrb_str_succ_bang(mrb_state *mrb, mrb_value self)
if (*e == '9') {
if (e == b) prepend = "1";
*e = '0';
- } else if (*e == 'z') {
+ }
+ else if (*e == 'z') {
if (e == b) prepend = "a";
*e = 'a';
- } else if (*e == 'Z') {
+ }
+ else if (*e == 'Z') {
if (e == b) prepend = "A";
*e = 'A';
- } else {
+ }
+ else {
(*e)++;
break;
}
@@ -331,27 +994,246 @@ mrb_str_succ(mrb_state *mrb, mrb_value self)
return str;
}
+#ifdef MRB_UTF8_STRING
+static const char utf8len_codepage_zero[256] =
+{
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0,
+};
+
+static mrb_int
+utf8code(unsigned char* p)
+{
+ mrb_int len;
+
+ if (p[0] < 0x80)
+ return p[0];
+
+ len = utf8len_codepage_zero[p[0]];
+ if (len > 1 && (p[1] & 0xc0) == 0x80) {
+ if (len == 2)
+ return ((p[0] & 0x1f) << 6) + (p[1] & 0x3f);
+ if ((p[2] & 0xc0) == 0x80) {
+ if (len == 3)
+ return ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6)
+ + (p[2] & 0x3f);
+ if ((p[3] & 0xc0) == 0x80) {
+ if (len == 4)
+ return ((p[0] & 0x07) << 18) + ((p[1] & 0x3f) << 12)
+ + ((p[2] & 0x3f) << 6) + (p[3] & 0x3f);
+ if ((p[4] & 0xc0) == 0x80) {
+ if (len == 5)
+ return ((p[0] & 0x03) << 24) + ((p[1] & 0x3f) << 18)
+ + ((p[2] & 0x3f) << 12) + ((p[3] & 0x3f) << 6)
+ + (p[4] & 0x3f);
+ if ((p[5] & 0xc0) == 0x80 && len == 6)
+ return ((p[0] & 0x01) << 30) + ((p[1] & 0x3f) << 24)
+ + ((p[2] & 0x3f) << 18) + ((p[3] & 0x3f) << 12)
+ + ((p[4] & 0x3f) << 6) + (p[5] & 0x3f);
+ }
+ }
+ }
+ }
+ return p[0];
+}
+
+static mrb_value
+mrb_str_ord(mrb_state* mrb, mrb_value str)
+{
+ if (RSTRING_LEN(str) == 0)
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "empty string");
+ return mrb_fixnum_value(utf8code((unsigned char*) RSTRING_PTR(str)));
+}
+#else
+static mrb_value
+mrb_str_ord(mrb_state* mrb, mrb_value str)
+{
+ if (RSTRING_LEN(str) == 0)
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "empty string");
+ return mrb_fixnum_value((unsigned char)RSTRING_PTR(str)[0]);
+}
+#endif
+
+/*
+ * call-seq:
+ * str.delete_prefix!(prefix) -> self or nil
+ *
+ * Deletes leading <code>prefix</code> from <i>str</i>, returning
+ * <code>nil</code> if no change was made.
+ *
+ * "hello".delete_prefix!("hel") #=> "lo"
+ * "hello".delete_prefix!("llo") #=> nil
+ */
+static mrb_value
+mrb_str_del_prefix_bang(mrb_state *mrb, mrb_value self)
+{
+ mrb_int plen, slen;
+ const char *ptr;
+ char *s;
+ struct RString *str = RSTRING(self);
+
+ mrb_get_args(mrb, "s", &ptr, &plen);
+ slen = RSTR_LEN(str);
+ if (plen > slen) return mrb_nil_value();
+ s = RSTR_PTR(str);
+ if (memcmp(s, ptr, plen) != 0) return mrb_nil_value();
+ if (!mrb_frozen_p(str) && (RSTR_SHARED_P(str) || RSTR_FSHARED_P(str))) {
+ str->as.heap.ptr += plen;
+ }
+ else {
+ mrb_str_modify(mrb, str);
+ s = RSTR_PTR(str);
+ memmove(s, s+plen, slen-plen);
+ }
+ RSTR_SET_LEN(str, slen-plen);
+ return self;
+}
+
+/*
+ * call-seq:
+ * str.delete_prefix(prefix) -> new_str
+ *
+ * Returns a copy of <i>str</i> with leading <code>prefix</code> deleted.
+ *
+ * "hello".delete_prefix("hel") #=> "lo"
+ * "hello".delete_prefix("llo") #=> "hello"
+ */
+static mrb_value
+mrb_str_del_prefix(mrb_state *mrb, mrb_value self)
+{
+ mrb_int plen, slen;
+ const char *ptr;
+
+ mrb_get_args(mrb, "s", &ptr, &plen);
+ slen = RSTRING_LEN(self);
+ if (plen > slen) return mrb_str_dup(mrb, self);
+ if (memcmp(RSTRING_PTR(self), ptr, plen) != 0)
+ return mrb_str_dup(mrb, self);
+ return mrb_str_substr(mrb, self, plen, slen-plen);
+}
+
+/*
+ * call-seq:
+ * str.delete_suffix!(suffix) -> self or nil
+ *
+ * Deletes trailing <code>suffix</code> from <i>str</i>, returning
+ * <code>nil</code> if no change was made.
+ *
+ * "hello".delete_suffix!("llo") #=> "he"
+ * "hello".delete_suffix!("hel") #=> nil
+ */
+static mrb_value
+mrb_str_del_suffix_bang(mrb_state *mrb, mrb_value self)
+{
+ mrb_int plen, slen;
+ const char *ptr;
+ char *s;
+ struct RString *str = RSTRING(self);
+
+ mrb_get_args(mrb, "s", &ptr, &plen);
+ slen = RSTR_LEN(str);
+ if (plen > slen) return mrb_nil_value();
+ s = RSTR_PTR(str);
+ if (memcmp(s+slen-plen, ptr, plen) != 0) return mrb_nil_value();
+ if (!mrb_frozen_p(str) && (RSTR_SHARED_P(str) || RSTR_FSHARED_P(str))) {
+ /* no need to modify string */
+ }
+ else {
+ mrb_str_modify(mrb, str);
+ }
+ RSTR_SET_LEN(str, slen-plen);
+ return self;
+}
+
+/*
+ * call-seq:
+ * str.delete_suffix(suffix) -> new_str
+ *
+ * Returns a copy of <i>str</i> with leading <code>suffix</code> deleted.
+ *
+ * "hello".delete_suffix("hel") #=> "lo"
+ * "hello".delete_suffix("llo") #=> "hello"
+ */
+static mrb_value
+mrb_str_del_suffix(mrb_state *mrb, mrb_value self)
+{
+ mrb_int plen, slen;
+ const char *ptr;
+
+ mrb_get_args(mrb, "s", &ptr, &plen);
+ slen = RSTRING_LEN(self);
+ if (plen > slen) return mrb_str_dup(mrb, self);
+ if (memcmp(RSTRING_PTR(self)+slen-plen, ptr, plen) != 0)
+ return mrb_str_dup(mrb, self);
+ return mrb_str_substr(mrb, self, 0, slen-plen);
+}
+
+static mrb_value
+mrb_str_lines(mrb_state *mrb, mrb_value self)
+{
+ mrb_value result;
+ int ai;
+ mrb_int len;
+ char *b = RSTRING_PTR(self);
+ char *p = b, *t;
+ char *e = b + RSTRING_LEN(self);
+
+ result = mrb_ary_new(mrb);
+ ai = mrb_gc_arena_save(mrb);
+ while (p < e) {
+ t = p;
+ while (p < e && *p != '\n') p++;
+ if (*p == '\n') p++;
+ len = (mrb_int) (p - t);
+ mrb_ary_push(mrb, result, mrb_str_new(mrb, t, len));
+ mrb_gc_arena_restore(mrb, ai);
+ }
+ return result;
+}
+
void
mrb_mruby_string_ext_gem_init(mrb_state* mrb)
{
struct RClass * s = mrb->string_class;
mrb_define_method(mrb, s, "dump", mrb_str_dump, MRB_ARGS_NONE());
- mrb_define_method(mrb, s, "getbyte", mrb_str_getbyte, MRB_ARGS_REQ(1));
mrb_define_method(mrb, s, "swapcase!", mrb_str_swapcase_bang, MRB_ARGS_NONE());
mrb_define_method(mrb, s, "swapcase", mrb_str_swapcase, MRB_ARGS_NONE());
- mrb_define_method(mrb, s, "concat", mrb_str_concat2, MRB_ARGS_REQ(1));
- mrb_define_method(mrb, s, "<<", mrb_str_concat2, MRB_ARGS_REQ(1));
+ mrb_define_method(mrb, s, "concat", mrb_str_concat_m, MRB_ARGS_REQ(1));
+ mrb_define_method(mrb, s, "<<", mrb_str_concat_m, MRB_ARGS_REQ(1));
+ mrb_define_method(mrb, s, "count", mrb_str_count, MRB_ARGS_REQ(1));
+ mrb_define_method(mrb, s, "tr", mrb_str_tr, MRB_ARGS_REQ(2));
+ mrb_define_method(mrb, s, "tr!", mrb_str_tr_bang, MRB_ARGS_REQ(2));
+ mrb_define_method(mrb, s, "tr_s", mrb_str_tr_s, MRB_ARGS_REQ(2));
+ mrb_define_method(mrb, s, "tr_s!", mrb_str_tr_s_bang, MRB_ARGS_REQ(2));
+ mrb_define_method(mrb, s, "squeeze", mrb_str_squeeze, MRB_ARGS_OPT(1));
+ mrb_define_method(mrb, s, "squeeze!", mrb_str_squeeze_bang, MRB_ARGS_OPT(1));
+ mrb_define_method(mrb, s, "delete", mrb_str_delete, MRB_ARGS_REQ(1));
+ mrb_define_method(mrb, s, "delete!", mrb_str_delete_bang, MRB_ARGS_REQ(1));
mrb_define_method(mrb, s, "start_with?", mrb_str_start_with, MRB_ARGS_REST());
mrb_define_method(mrb, s, "end_with?", mrb_str_end_with, MRB_ARGS_REST());
mrb_define_method(mrb, s, "hex", mrb_str_hex, MRB_ARGS_NONE());
mrb_define_method(mrb, s, "oct", mrb_str_oct, MRB_ARGS_NONE());
mrb_define_method(mrb, s, "chr", mrb_str_chr, MRB_ARGS_NONE());
- mrb_define_method(mrb, s, "lines", mrb_str_lines, MRB_ARGS_NONE());
mrb_define_method(mrb, s, "succ", mrb_str_succ, MRB_ARGS_NONE());
mrb_define_method(mrb, s, "succ!", mrb_str_succ_bang, MRB_ARGS_NONE());
- mrb_alias_method(mrb, s, mrb_intern_lit(mrb, "next"), mrb_intern_lit(mrb, "succ"));
- mrb_alias_method(mrb, s, mrb_intern_lit(mrb, "next!"), mrb_intern_lit(mrb, "succ!"));
+ mrb_define_method(mrb, s, "next", mrb_str_succ, MRB_ARGS_NONE());
+ mrb_define_method(mrb, s, "next!", mrb_str_succ_bang, MRB_ARGS_NONE());
+ mrb_define_method(mrb, s, "ord", mrb_str_ord, MRB_ARGS_NONE());
+ mrb_define_method(mrb, s, "delete_prefix!", mrb_str_del_prefix_bang, MRB_ARGS_REQ(1));
+ mrb_define_method(mrb, s, "delete_prefix", mrb_str_del_prefix, MRB_ARGS_REQ(1));
+ mrb_define_method(mrb, s, "delete_suffix!", mrb_str_del_suffix_bang, MRB_ARGS_REQ(1));
+ mrb_define_method(mrb, s, "delete_suffix", mrb_str_del_suffix, MRB_ARGS_REQ(1));
+
+ mrb_define_method(mrb, s, "__lines", mrb_str_lines, MRB_ARGS_NONE());
+
+ mrb_define_method(mrb, mrb_class_get(mrb, "Integer"), "chr", mrb_int_chr, MRB_ARGS_OPT(1));
}
void
diff --git a/mrbgems/mruby-string-ext/test/numeric.rb b/mrbgems/mruby-string-ext/test/numeric.rb
new file mode 100644
index 000000000..dfcb9ebf4
--- /dev/null
+++ b/mrbgems/mruby-string-ext/test/numeric.rb
@@ -0,0 +1,29 @@
+# coding: utf-8
+
+assert('Integer#chr') do
+ assert_equal("A", 65.chr)
+ assert_equal("B", 0x42.chr)
+ assert_equal("\xab", 171.chr)
+ assert_raise(RangeError) { -1.chr }
+ assert_raise(RangeError) { 256.chr }
+
+ assert_equal("A", 65.chr("ASCII-8BIT"))
+ assert_equal("B", 0x42.chr("BINARY"))
+ assert_equal("\xab", 171.chr("ascii-8bit"))
+ assert_raise(RangeError) { -1.chr("binary") }
+ assert_raise(RangeError) { 256.chr("Ascii-8bit") }
+ assert_raise(ArgumentError) { 65.chr("ASCII") }
+ assert_raise(ArgumentError) { 65.chr("ASCII-8BIT", 2) }
+ assert_raise(TypeError) { 65.chr(:BINARY) }
+
+ if __ENCODING__ == "ASCII-8BIT"
+ assert_raise(ArgumentError) { 65.chr("UTF-8") }
+ else
+ assert_equal("A", 65.chr("UTF-8"))
+ assert_equal("B", 0x42.chr("UTF-8"))
+ assert_equal("«", 171.chr("utf-8"))
+ assert_equal("あ", 12354.chr("Utf-8"))
+ assert_raise(RangeError) { -1.chr("utf-8") }
+ assert_raise(RangeError) { 0x110000.chr.chr("UTF-8") }
+ end
+end
diff --git a/mrbgems/mruby-string-ext/test/range.rb b/mrbgems/mruby-string-ext/test/range.rb
new file mode 100644
index 000000000..80c286850
--- /dev/null
+++ b/mrbgems/mruby-string-ext/test/range.rb
@@ -0,0 +1,26 @@
+assert('Range#max') do
+ # returns the maximum value in the range when called with no arguments
+ assert_equal 'l', ('f'..'l').max
+ assert_equal 'e', ('a'...'f').max
+
+ # returns nil when the endpoint is less than the start point
+ assert_equal nil, ('z'..'l').max
+end
+
+assert('Range#max given a block') do
+ # returns nil when the endpoint is less than the start point
+ assert_equal nil, (('z'..'l').max { |x, y| x <=> y })
+end
+
+assert('Range#min') do
+ # returns the minimum value in the range when called with no arguments
+ assert_equal 'f', ('f'..'l').min
+
+ # returns nil when the start point is greater than the endpoint
+ assert_equal nil, ('z'..'l').min
+end
+
+assert('Range#min given a block') do
+ # returns nil when the start point is greater than the endpoint
+ assert_equal nil, (('z'..'l').min { |x, y| x <=> y })
+end
diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb
index eba666e13..7be673aa6 100644
--- a/mrbgems/mruby-string-ext/test/string.rb
+++ b/mrbgems/mruby-string-ext/test/string.rb
@@ -1,75 +1,83 @@
+# coding: utf-8
##
# String(Ext) Test
-assert('String#getbyte') do
- str1 = "hello"
- bytes1 = [104, 101, 108, 108, 111]
- assert_equal bytes1[0], str1.getbyte(0)
- assert_equal bytes1[-1], str1.getbyte(-1)
- assert_equal bytes1[6], str1.getbyte(6)
+UTF8STRING = __ENCODING__ == "UTF-8"
- str2 = "\xFF"
- bytes2 = [0xFF]
- assert_equal bytes2[0], str2.getbyte(0)
+def assert_upto(exp, receiver, *args)
+ act = []
+ receiver.upto(*args) { |v| act << v }
+ assert_equal exp, act
end
assert('String#dump') do
- ("\1" * 100).dump # should not raise an exception - regress #1210
- "\0".inspect == "\"\\000\"" and
- "foo".dump == "\"foo\""
+ assert_equal("\"\\x00\"", "\0".dump)
+ assert_equal("\"foo\"", "foo".dump)
+ assert_equal('"\xe3\x82\x8b"', "る".dump)
+ assert_nothing_raised { ("\1" * 100).dump } # regress #1210
end
assert('String#strip') do
s = " abc "
- "".strip == "" and " \t\r\n\f\v".strip == "" and
- "\0a\0".strip == "\0a" and
- "abc".strip == "abc" and
- " abc".strip == "abc" and
- "abc ".strip == "abc" and
- " abc ".strip == "abc" and
- s == " abc "
+ assert_equal("abc", s.strip)
+ assert_equal(" abc ", s)
+ assert_equal("", "".strip)
+ assert_equal("", " \t\r\n\f\v".strip)
+ assert_equal("\0a", "\0a\0".strip)
+ assert_equal("abc", "abc".strip)
+ assert_equal("abc", " abc".strip)
+ assert_equal("abc", "abc ".strip)
end
assert('String#lstrip') do
s = " abc "
- s.lstrip
- "".lstrip == "" and " \t\r\n\f\v".lstrip == "" and
- "\0a\0".lstrip == "\0a\0" and
- "abc".lstrip == "abc" and
- " abc".lstrip == "abc" and
- "abc ".lstrip == "abc " and
- " abc ".lstrip == "abc " and
- s == " abc "
+ assert_equal("abc ", s.lstrip)
+ assert_equal(" abc ", s)
+ assert_equal("", "".lstrip)
+ assert_equal("", " \t\r\n\f\v".lstrip)
+ assert_equal("\0a\0", "\0a\0".lstrip)
+ assert_equal("abc", "abc".lstrip)
+ assert_equal("abc", " abc".lstrip)
+ assert_equal("abc ", "abc ".lstrip)
end
assert('String#rstrip') do
s = " abc "
- s.rstrip
- "".rstrip == "" and " \t\r\n\f\v".rstrip == "" and
- "\0a\0".rstrip == "\0a" and
- "abc".rstrip == "abc" and
- " abc".rstrip == " abc" and
- "abc ".rstrip == "abc" and
- " abc ".rstrip == " abc" and
- s == " abc "
+ assert_equal(" abc", s.rstrip)
+ assert_equal(" abc ", s)
+ assert_equal("", "".rstrip)
+ assert_equal("", " \t\r\n\f\v".rstrip)
+ assert_equal("\0a", "\0a\0".rstrip)
+ assert_equal("abc", "abc".rstrip)
+ assert_equal(" abc", " abc".rstrip)
+ assert_equal("abc", "abc ".rstrip)
end
assert('String#strip!') do
s = " abc "
t = "abc"
- s.strip! == "abc" and s == "abc" and t.strip! == nil
+ assert_equal("abc", s.strip!)
+ assert_equal("abc", s)
+ assert_nil(t.strip!)
+ assert_equal("abc", t)
end
assert('String#lstrip!') do
s = " abc "
t = "abc "
- s.lstrip! == "abc " and s == "abc " and t.lstrip! == nil
+ assert_equal("abc ", s.lstrip!)
+ assert_equal("abc ", s)
+ assert_nil(t.lstrip!)
+ assert_equal("abc ", t)
end
assert('String#rstrip!') do
s = " abc "
t = " abc"
- s.rstrip! == " abc" and s == " abc" and t.rstrip! == nil
+ assert_equal(" abc", s.rstrip!)
+ assert_equal(" abc", s)
+ assert_nil(t.rstrip!)
+ assert_equal(" abc", t)
end
assert('String#swapcase') do
@@ -85,12 +93,17 @@ assert('String#swapcase!') do
end
assert('String#concat') do
- s = "Hello "
- s.concat "World!"
- t = "Hello "
- t << "World!"
- assert_equal "Hello World!", t
- assert_equal "Hello World!", s
+ assert_equal "Hello World!", "Hello " << "World" << 33
+ assert_equal "Hello World!", "Hello ".concat("World").concat(33)
+ assert_raise(TypeError) { "".concat(Object.new) }
+
+ if UTF8STRING
+ assert_equal "H«", "H" << 0xab
+ assert_equal "Hは", "H" << 12399
+ else
+ assert_equal "H\xab", "H" << 0xab
+ assert_raise(RangeError) { "H" << 12399 }
+ end
end
assert('String#casecmp') do
@@ -98,11 +111,69 @@ assert('String#casecmp') do
assert_equal 0, "aBcDeF".casecmp("abcdef")
assert_equal(-1, "abcdef".casecmp("abcdefg"))
assert_equal 0, "abcdef".casecmp("ABCDEF")
- o = Object.new
- def o.to_str
- "ABCDEF"
- end
- assert_equal 0, "abcdef".casecmp(o)
+end
+
+assert('String#count') do
+ s = "abccdeff123"
+ assert_equal 0, s.count("")
+ assert_equal 1, s.count("a")
+ assert_equal 2, s.count("ab")
+ assert_equal 9, s.count("^c")
+ assert_equal 8, s.count("a-z")
+ assert_equal 4, s.count("a0-9")
+end
+
+assert('String#tr') do
+ assert_equal "ABC", "abc".tr('a-z', 'A-Z')
+ assert_equal "hippo", "hello".tr('el', 'ip')
+ assert_equal "Ruby", "Lisp".tr("Lisp", "Ruby")
+ assert_equal "*e**o", "hello".tr('^aeiou', '*')
+ assert_equal "heo", "hello".tr('l', '')
+end
+
+assert('String#tr!') do
+ s = "abcdefghijklmnopqR"
+ assert_equal "ab12222hijklmnopqR", s.tr!("cdefg", "12")
+ assert_equal "ab12222hijklmnopqR", s
+end
+
+assert('String#tr_s') do
+ assert_equal "hero", "hello".tr_s('l', 'r')
+ assert_equal "h*o", "hello".tr_s('el', '*')
+ assert_equal "hhxo", "hello".tr_s('el', 'hx')
+end
+
+assert('String#tr_s!') do
+ s = "hello"
+ assert_equal "hero", s.tr_s!('l', 'r')
+ assert_equal "hero", s
+ assert_nil s.tr_s!('l', 'r')
+end
+
+assert('String#squeeze') do
+ assert_equal "yelow mon", "yellow moon".squeeze
+ assert_equal " now is the", " now is the".squeeze(" ")
+ assert_equal "puters shot balls", "putters shoot balls".squeeze("m-z")
+end
+
+assert('String#squeeze!') do
+ s = " now is the"
+ assert_equal " now is the", s.squeeze!(" ")
+ assert_equal " now is the", s
+end
+
+assert('String#delete') do
+ assert_equal "he", "hello".delete("lo")
+ assert_equal "hll", "hello".delete("aeiou")
+ assert_equal "ll", "hello".delete("^l")
+ assert_equal "ho", "hello".delete("ej-m")
+end
+
+assert('String#delete!') do
+ s = "hello"
+ assert_equal "he", s.delete!("lo")
+ assert_equal "he", s
+ assert_nil s.delete!("lz")
end
assert('String#start_with?') do
@@ -162,10 +233,6 @@ assert('String#oct') do
assert_equal (-8), "-10".oct
end
-assert('String#chr') do
- assert_equal "a", "abcde".chr
-end
-
assert('String#lines') do
assert_equal ["Hel\n", "lo\n", "World!"], "Hel\nlo\nWorld!".lines
assert_equal ["Hel\n", "lo\n", "World!\n"], "Hel\nlo\nWorld!\n".lines
@@ -358,8 +425,8 @@ assert('String#succ') do
assert_equal "-b-", a
a = "-z-"; a.succ!
assert_equal "-aa-", a
- a = "あa"; a.succ!
- assert_equal "あb", a
+ a = "あb"; a.succ!
+ assert_equal "あc", a
a = "あaz"; a.succ!
assert_equal "あba", a
end
@@ -370,3 +437,279 @@ assert('String#next') do
a = "00"; a.next!
assert_equal "01", a
end
+
+assert('String#insert') do
+ assert_equal "Xabcd", "abcd".insert(0, 'X')
+ assert_equal "abcXd", "abcd".insert(3, 'X')
+ assert_equal "abcdX", "abcd".insert(4, 'X')
+ assert_equal "abXcd", "abcd".insert(-3, 'X')
+ assert_equal "abcdX", "abcd".insert(-1, 'X')
+ assert_raise(IndexError) { "abcd".insert(5, 'X') }
+ assert_raise(IndexError) { "abcd".insert(-6, 'X') }
+
+ a = "abcd"
+ a.insert(0, 'X')
+ assert_equal "Xabcd", a
+end
+
+assert('String#prepend') do
+ a = "world"
+ assert_equal "hello world", a.prepend("hello ")
+ assert_equal "hello world", a
+end
+
+assert('String#ljust') do
+ assert_equal "hello", "hello".ljust(4)
+ assert_equal "hello ", "hello".ljust(20)
+ assert_equal 20, "hello".ljust(20).length
+ assert_equal "hello123412341234123", "hello".ljust(20, '1234')
+ assert_equal "hello", "hello".ljust(-3)
+end
+
+assert('String#rjust') do
+ assert_equal "hello", "hello".rjust(4)
+ assert_equal " hello", "hello".rjust(20)
+ assert_equal 20, "hello".rjust(20).length
+ assert_equal "123412341234123hello", "hello".rjust(20, '1234')
+ assert_equal "hello", "hello".rjust(-3)
+end
+
+assert('String#center') do
+ assert_equal "hello", "hello".center(4)
+ assert_equal " hello ", "hello".center(20)
+ assert_equal 20, "hello".center(20).length
+ assert_equal "1231231hello12312312", "hello".center(20, '123')
+ assert_equal "hello", "hello".center(-3)
+end
+
+if UTF8STRING
+ assert('String#ljust with UTF8') do
+ assert_equal "helloん ", "helloん".ljust(20)
+ assert_equal "helloó ", "helloó".ljust(34)
+ assert_equal 34, "helloó".ljust(34).length
+ assert_equal "helloんんんんんんんんんんんんんん", "hello".ljust(19, 'ん')
+ assert_equal "helloんんんんんんんんんんんんんんん", "hello".ljust(20, 'ん')
+ end
+
+ assert('String#rjust with UTF8') do
+ assert_equal " helloん", "helloん".rjust(20)
+ assert_equal " helloó", "helloó".rjust(34)
+ # assert_equal 34, "helloó".rjust(34).length
+ assert_equal "んんんんんんんんんんんんんんhello", "hello".rjust(19, 'ん')
+ assert_equal "んんんんんんんんんんんんんんんhello", "hello".rjust(20, 'ん')
+ end
+
+ assert('UTF8 byte counting') do
+ ret = ' '
+ ret[-6..-1] = "helloó"
+ assert_equal 34, ret.length
+ end
+end
+
+assert('String#ljust should not change string') do
+ a = "hello"
+ a.ljust(20)
+ assert_equal "hello", a
+end
+
+assert('String#rjust should not change string') do
+ a = "hello"
+ a.rjust(20)
+ assert_equal "hello", a
+end
+
+assert('String#ljust should raise on zero width padding') do
+ assert_raise(ArgumentError) { "foo".ljust(10, '') }
+end
+
+assert('String#rjust should raise on zero width padding') do
+ assert_raise(ArgumentError) { "foo".rjust(10, '') }
+end
+
+assert('String#upto') do
+ assert_upto %w(a8 a9 b0 b1 b2 b3 b4 b5 b6), "a8", "b6"
+ assert_upto ["9", "10", "11"], "9", "11"
+ assert_upto [], "25", "5"
+ assert_upto ["07", "08", "09", "10", "11"], "07", "11"
+ assert_upto ["9", ":", ";", "<", "=", ">", "?", "@", "A"], "9", "A"
+
+ if UTF8STRING
+ assert_upto %w(あ ぃ い ぅ う ぇ え ぉ お), "あ", "お"
+ end
+
+ a = "aa"
+ start = "aa"
+ count = 0
+ assert_equal("aa", a.upto("zz") {|s|
+ assert_equal(start, s)
+ start.succ!
+ count += 1
+ })
+ assert_equal(676, count)
+
+ a = "a"
+ start = "a"
+ count = 0
+ assert_equal("a", a.upto("a") {|s|
+ assert_equal(start, s)
+ start.succ!
+ count += 1
+ })
+ assert_equal(1, count)
+
+ a = "a"
+ start = "a"
+ count = 0
+ assert_equal("a", a.upto("b", true) {|s|
+ assert_equal(start, s)
+ start.succ!
+ count += 1
+ })
+ assert_equal(1, count)
+
+ a = "0"
+ start = "0"
+ count = 0
+ assert_equal("0", a.upto("0") {|s|
+ assert_equal(start, s)
+ start.succ!
+ count += 1
+ })
+ assert_equal(1, count)
+
+ a = "0"
+ start = "0"
+ count = 0
+ assert_equal("0", a.upto("-1") {|s|
+ assert_equal(start, s)
+ start.succ!
+ count += 1
+ })
+ assert_equal(0, count)
+
+ a = "-1"
+ start = "-1"
+ count = 0
+ assert_equal("-1", a.upto("-2") {|s|
+ assert_equal(start, s)
+ start.succ!
+ count += 1
+ })
+ assert_equal(2, count)
+
+ assert_raise(TypeError) { "a".upto(:c) {} }
+end
+
+assert('String#ord') do
+ got = "hello!".split('').map {|x| x.ord}
+ expect = [104, 101, 108, 108, 111, 33]
+ unless UTF8STRING
+ got << "\xff".ord
+ expect << 0xff
+ end
+ assert_equal expect, got
+end
+
+assert('String#ord(UTF-8)') do
+ got = "こんにちは世界!".split('').map {|x| x.ord}
+ expect = [0x3053,0x3093,0x306b,0x3061,0x306f,0x4e16,0x754c,0x21]
+ assert_equal expect, got
+end if UTF8STRING
+
+assert('String#chr') do
+ assert_equal "a", "abcde".chr
+ assert_equal "h", "hello!".chr
+ assert_equal "", "".chr
+end
+
+assert('String#chr(UTF-8)') do
+ assert_equal "こ", "こんにちは世界!".chr
+end if UTF8STRING
+
+assert('String#chars') do
+ expect = ["h", "e", "l", "l", "o", "!"]
+ assert_equal expect, "hello!".chars
+ s = ""
+ "hello!".chars do |x|
+ s += x
+ end
+ assert_equal "hello!", s
+end
+
+assert('String#chars(UTF-8)') do
+ expect = ['こ', 'ん', 'に', 'ち', 'は', '世', '界', '!']
+ assert_equal expect, "こんにちは世界!".chars
+ s = ""
+ "こんにちは世界!".chars do |x|
+ s += x
+ end
+ assert_equal "こんにちは世界!", s
+end if UTF8STRING
+
+assert('String#each_char') do
+ chars = []
+ "hello!".each_char do |x|
+ chars << x
+ end
+ assert_equal ["h", "e", "l", "l", "o", "!"], chars
+end
+
+assert('String#each_char(UTF-8)') do
+ chars = []
+ "こんにちは世界!".each_char do |x|
+ chars << x
+ end
+ assert_equal ["こ", "ん", "に", "ち", "は", "世", "界", "!"], chars
+end if UTF8STRING
+
+assert('String#codepoints') do
+ expect = [104, 101, 108, 108, 111, 33]
+ assert_equal expect, "hello!".codepoints
+ cp = []
+ "hello!".codepoints do |x|
+ cp << x
+ end
+ assert_equal expect, cp
+end
+
+assert('String#codepoints(UTF-8)') do
+ expect = [12371, 12435, 12395, 12385, 12399, 19990, 30028, 33]
+ assert_equal expect, "こんにちは世界!".codepoints
+ cp = []
+ "こんにちは世界!".codepoints do |x|
+ cp << x
+ end
+ assert_equal expect, cp
+end if UTF8STRING
+
+assert('String#each_codepoint') do
+ expect = [104, 101, 108, 108, 111, 33]
+ cp = []
+ "hello!".each_codepoint do |x|
+ cp << x
+ end
+ assert_equal expect, cp
+end
+
+assert('String#each_codepoint(UTF-8)') do
+ expect = [12371, 12435, 12395, 12385, 12399, 19990, 30028, 33]
+ cp = []
+ "こんにちは世界!".each_codepoint do |x|
+ cp << x
+ end
+ assert_equal expect, cp
+end if UTF8STRING
+
+assert('String#delete_prefix') do
+ assert_equal "llo", "hello".delete_prefix("he")
+ assert_equal "hello", "hello".delete_prefix("llo")
+ assert_equal "llo", "hello".delete_prefix!("he")
+ assert_nil "hello".delete_prefix!("llo")
+end
+
+assert('String#delete_suffix') do
+ assert_equal "he", "hello".delete_suffix("llo")
+ assert_equal "hello", "hello".delete_suffix("he")
+ assert_equal "he", "hello".delete_suffix!("llo")
+ assert_nil "hello".delete_suffix!("he")
+end