diff options
| author | Yukihiro "Matz" Matsumoto <[email protected]> | 2019-07-17 10:35:41 +0900 |
|---|---|---|
| committer | GitHub <[email protected]> | 2019-07-17 10:35:41 +0900 |
| commit | d605b72c1d6fa4564a0a5e88535504b6850463b5 (patch) | |
| tree | 774fc0de56002abb3bb2b1c3387ff08f91876d17 /mrbgems/mruby-string-ext | |
| parent | 2af92d0ebcbeca6d3d85a27c8193273080a63090 (diff) | |
| parent | 9af3b7c6258de327218dd04e69d76ae68caf17b1 (diff) | |
| download | mruby-d605b72c1d6fa4564a0a5e88535504b6850463b5.tar.gz mruby-d605b72c1d6fa4564a0a5e88535504b6850463b5.zip | |
Merge branch 'master' into i110/inspect-recursion
Diffstat (limited to 'mrbgems/mruby-string-ext')
| -rw-r--r-- | mrbgems/mruby-string-ext/mrblib/string.rb | 33 | ||||
| -rw-r--r-- | mrbgems/mruby-string-ext/src/string.c | 663 | ||||
| -rw-r--r-- | mrbgems/mruby-string-ext/test/range.rb | 26 | ||||
| -rw-r--r-- | mrbgems/mruby-string-ext/test/string.rb | 226 |
4 files changed, 818 insertions, 130 deletions
diff --git a/mrbgems/mruby-string-ext/mrblib/string.rb b/mrbgems/mruby-string-ext/mrblib/string.rb index 27ca30610..fdaf2f960 100644 --- a/mrbgems/mruby-string-ext/mrblib/string.rb +++ b/mrbgems/mruby-string-ext/mrblib/string.rb @@ -1,25 +1,6 @@ class String ## - # call-seq: - # String.try_convert(obj) -> string or nil - # - # Try to convert <i>obj</i> into a String, using to_str method. - # Returns converted string or nil if <i>obj</i> cannot be converted - # for any reason. - # - # String.try_convert("str") #=> "str" - # String.try_convert(/re/) #=> nil - # - def self.try_convert(obj) - if obj.respond_to?(:to_str) - obj.to_str - else - nil - end - end - - ## # call-seq: # string.clear -> string # @@ -112,7 +93,7 @@ class String # "hello".rstrip! #=> nil # def rstrip! - raise RuntimeError, "can't modify frozen String" if frozen? + raise FrozenError, "can't modify frozen String" if frozen? s = self.rstrip (s == self) ? nil : self.replace(s) end @@ -142,7 +123,7 @@ class String # "abcdef".casecmp("ABCDEF") #=> 0 # def casecmp(str) - self.downcase <=> str.to_str.downcase + self.downcase <=> str.__to_str.downcase rescue NoMethodError nil end @@ -329,11 +310,15 @@ class String end end + ## + # Call the given block for each character of + # +self+. def each_char(&block) return to_enum :each_char unless block - - split('').each do |i| - block.call(i) + pos = 0 + while pos < self.size + block.call(self[pos]) + pos += 1 end self end diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index 4cab49094..50a4e5582 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -29,7 +29,7 @@ mrb_str_setbyte(mrb_state *mrb, mrb_value str) len = RSTRING_LEN(str); if (pos < -len || len <= pos) - mrb_raisef(mrb, E_INDEX_ERROR, "index %S is out of array", mrb_fixnum_value(pos)); + mrb_raisef(mrb, E_INDEX_ERROR, "index %S out of string", mrb_fixnum_value(pos)); if (pos < 0) pos += len; @@ -42,44 +42,32 @@ mrb_str_setbyte(mrb_state *mrb, mrb_value str) static mrb_value mrb_str_byteslice(mrb_state *mrb, mrb_value str) { - mrb_value a1; - mrb_int len; + mrb_value a1, a2; + mrb_int str_len = RSTRING_LEN(str), beg, len; + mrb_bool empty = TRUE; - if (mrb_get_argc(mrb) == 2) { - mrb_int pos; - mrb_get_args(mrb, "ii", &pos, &len); - return mrb_str_substr(mrb, str, pos, len); + if (mrb_get_args(mrb, "o|o", &a1, &a2) == 2) { + beg = mrb_fixnum(mrb_to_int(mrb, a1)); + len = mrb_fixnum(mrb_to_int(mrb, a2)); + goto subseq; } - mrb_get_args(mrb, "o|i", &a1, &len); - switch (mrb_type(a1)) { - case MRB_TT_RANGE: - { - mrb_int beg; - - len = RSTRING_LEN(str); - switch (mrb_range_beg_len(mrb, a1, &beg, &len, len, TRUE)) { - case 0: /* not range */ - break; - case 1: /* range */ - return mrb_str_substr(mrb, str, beg, len); - case 2: /* out of range */ - mrb_raisef(mrb, E_RANGE_ERROR, "%S out of range", a1); - break; - } - return mrb_nil_value(); + if (mrb_type(a1) == MRB_TT_RANGE) { + if (mrb_range_beg_len(mrb, a1, &beg, &len, str_len, TRUE) == MRB_RANGE_OK) { + goto subseq; } -#ifndef MRB_WITHOUT_FLOAT - case MRB_TT_FLOAT: - a1 = mrb_fixnum_value((mrb_int)mrb_float(a1)); - /* fall through */ -#endif - case MRB_TT_FIXNUM: - return mrb_str_substr(mrb, str, mrb_fixnum(a1), 1); - default: - mrb_raise(mrb, E_TYPE_ERROR, "wrong type of argument"); + return mrb_nil_value(); + } + + beg = mrb_fixnum(mrb_to_int(mrb, a1)); + len = 1; + empty = FALSE; +subseq: + if (mrb_str_beg_len(str_len, &beg, &len) && (empty || len != 0)) { + return mrb_str_byte_subseq(mrb, str, beg, len); + } + else { + return mrb_nil_value(); } - /* not reached */ - return mrb_nil_value(); } /* @@ -163,7 +151,7 @@ mrb_str_concat_m(mrb_state *mrb, mrb_value self) if (mrb_fixnum_p(str)) str = mrb_fixnum_chr(mrb, str); else - str = mrb_string_type(mrb, str); + str = mrb_ensure_string_type(mrb, str); mrb_str_concat(mrb, self, str); return self; } @@ -191,7 +179,7 @@ mrb_str_start_with(mrb_state *mrb, mrb_value self) for (i = 0; i < argc; i++) { size_t len_l, len_r; int ai = mrb_gc_arena_save(mrb); - sub = mrb_string_type(mrb, argv[i]); + sub = mrb_ensure_string_type(mrb, argv[i]); mrb_gc_arena_restore(mrb, ai); len_l = RSTRING_LEN(self); len_r = RSTRING_LEN(sub); @@ -220,7 +208,7 @@ mrb_str_end_with(mrb_state *mrb, mrb_value self) for (i = 0; i < argc; i++) { size_t len_l, len_r; int ai = mrb_gc_arena_save(mrb); - sub = mrb_string_type(mrb, argv[i]); + sub = mrb_ensure_string_type(mrb, argv[i]); mrb_gc_arena_restore(mrb, ai); len_l = RSTRING_LEN(self); len_r = RSTRING_LEN(sub); @@ -235,6 +223,592 @@ mrb_str_end_with(mrb_state *mrb, mrb_value self) return mrb_false_value(); } +enum tr_pattern_type { + TR_UNINITIALIZED = 0, + TR_IN_ORDER = 1, + TR_RANGE = 2, +}; + +/* + #tr Pattern syntax + + <syntax> ::= (<pattern>)* | '^' (<pattern>)* + <pattern> ::= <in order> | <range> + <in order> ::= (<ch>)+ + <range> ::= <ch> '-' <ch> +*/ +struct tr_pattern { + uint8_t type; // 1:in-order, 2:range + mrb_bool flag_reverse : 1; + mrb_bool flag_on_heap : 1; + uint16_t n; + union { + uint16_t start_pos; + char ch[2]; + } val; + struct tr_pattern *next; +}; + +#define STATIC_TR_PATTERN { 0 } + +static inline void +tr_free_pattern(mrb_state *mrb, struct tr_pattern *pat) +{ + while (pat) { + struct tr_pattern *p = pat->next; + if (pat->flag_on_heap) { + mrb_free(mrb, pat); + } + pat = p; + } +} + +static struct tr_pattern* +tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_pattern, mrb_bool flag_reverse_enable) +{ + const char *pattern = RSTRING_PTR(v_pattern); + mrb_int pattern_length = RSTRING_LEN(v_pattern); + mrb_bool flag_reverse = FALSE; + struct tr_pattern *pat1; + mrb_int i = 0; + + if(flag_reverse_enable && pattern_length >= 2 && pattern[0] == '^') { + flag_reverse = TRUE; + i++; + } + + while (i < pattern_length) { + /* is range pattern ? */ + mrb_bool const ret_uninit = (ret->type == TR_UNINITIALIZED); + pat1 = ret_uninit + ? ret + : (struct tr_pattern*)mrb_malloc_simple(mrb, sizeof(struct tr_pattern)); + if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-') { + if (pat1 == NULL && ret) { + nomem: + tr_free_pattern(mrb, ret); + mrb_exc_raise(mrb, mrb_obj_value(mrb->nomem_err)); + return NULL; /* not reached */ + } + pat1->type = TR_RANGE; + pat1->flag_reverse = flag_reverse; + pat1->flag_on_heap = !ret_uninit; + pat1->n = pattern[i+2] - pattern[i] + 1; + pat1->next = NULL; + pat1->val.ch[0] = pattern[i]; + pat1->val.ch[1] = pattern[i+2]; + i += 3; + } + else { + /* in order pattern. */ + mrb_int start_pos = i++; + mrb_int len; + + while (i < pattern_length) { + if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-') + break; + i++; + } + + len = i - start_pos; + if (len > UINT16_MAX) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "tr pattern too long (max 65536)"); + } + if (pat1 == NULL && ret) { + goto nomem; + } + pat1->type = TR_IN_ORDER; + pat1->flag_reverse = flag_reverse; + pat1->flag_on_heap = !ret_uninit; + pat1->n = len; + pat1->next = NULL; + pat1->val.start_pos = start_pos; + } + + if (ret == NULL || ret_uninit) { + ret = pat1; + } + else { + struct tr_pattern *p = ret; + while (p->next != NULL) { + p = p->next; + } + p->next = pat1; + } + } + + return ret; +} + +static inline mrb_int +tr_find_character(const struct tr_pattern *pat, const char *pat_str, int ch) +{ + mrb_int ret = -1; + mrb_int n_sum = 0; + mrb_int flag_reverse = pat ? pat->flag_reverse : 0; + + while (pat != NULL) { + if (pat->type == TR_IN_ORDER) { + int i; + for (i = 0; i < pat->n; i++) { + if (pat_str[pat->val.start_pos + i] == ch) ret = n_sum + i; + } + } + else if (pat->type == TR_RANGE) { + if (pat->val.ch[0] <= ch && ch <= pat->val.ch[1]) + ret = n_sum + ch - pat->val.ch[0]; + } + else { + mrb_assert(pat->type == TR_UNINITIALIZED); + } + n_sum += pat->n; + pat = pat->next; + } + + if (flag_reverse) { + return (ret < 0) ? MRB_INT_MAX : -1; + } + return ret; +} + +static inline mrb_int +tr_get_character(const struct tr_pattern *pat, const char *pat_str, mrb_int n_th) +{ + mrb_int n_sum = 0; + + while (pat != NULL) { + if (n_th < (n_sum + pat->n)) { + mrb_int i = (n_th - n_sum); + + switch (pat->type) { + case TR_IN_ORDER: + return pat_str[pat->val.start_pos + i]; + case TR_RANGE: + return pat->val.ch[0]+i; + case TR_UNINITIALIZED: + return -1; + } + } + if (pat->next == NULL) { + switch (pat->type) { + case TR_IN_ORDER: + return pat_str[pat->val.start_pos + pat->n - 1]; + case TR_RANGE: + return pat->val.ch[1]; + case TR_UNINITIALIZED: + return -1; + } + } + n_sum += pat->n; + pat = pat->next; + } + + return -1; +} + +static inline void +tr_bitmap_set(uint8_t bitmap[32], uint8_t ch) +{ + uint8_t idx1 = ch / 8; + uint8_t idx2 = ch % 8; + bitmap[idx1] |= (1<<idx2); +} + +static inline mrb_bool +tr_bitmap_detect(uint8_t bitmap[32], uint8_t ch) +{ + uint8_t idx1 = ch / 8; + uint8_t idx2 = ch % 8; + if (bitmap[idx1] & (1<<idx2)) + return TRUE; + return FALSE; +} + +/* compile patter to bitmap */ +static void +tr_compile_pattern(const struct tr_pattern *pat, mrb_value pstr, uint8_t bitmap[32]) +{ + const char *pattern = RSTRING_PTR(pstr); + mrb_int flag_reverse = pat ? pat->flag_reverse : 0; + int i; + + for (i=0; i<32; i++) { + bitmap[i] = 0; + } + while (pat != NULL) { + if (pat->type == TR_IN_ORDER) { + for (i = 0; i < pat->n; i++) { + tr_bitmap_set(bitmap, pattern[pat->val.start_pos + i]); + } + } + else if (pat->type == TR_RANGE) { + for (i = pat->val.ch[0]; i < pat->val.ch[1]; i++) { + tr_bitmap_set(bitmap, i); + } + } + else { + mrb_assert(pat->type == TR_UNINITIALIZED); + } + pat = pat->next; + } + + if (flag_reverse) { + for (i=0; i<32; i++) { + bitmap[i] ^= 0xff; + } + } +} + +static mrb_bool +str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squeeze) +{ + struct tr_pattern pat = STATIC_TR_PATTERN; + struct tr_pattern rep_storage = STATIC_TR_PATTERN; + char *s; + mrb_int len; + mrb_int i; + mrb_int j; + mrb_bool flag_changed = FALSE; + mrb_int lastch = -1; + struct tr_pattern *rep; + + mrb_str_modify(mrb, mrb_str_ptr(str)); + tr_parse_pattern(mrb, &pat, p1, TRUE); + rep = tr_parse_pattern(mrb, &rep_storage, p2, FALSE); + s = RSTRING_PTR(str); + len = RSTRING_LEN(str); + + for (i=j=0; i<len; i++,j++) { + mrb_int n = tr_find_character(&pat, RSTRING_PTR(p1), s[i]); + + if (i>j) s[j] = s[i]; + if (n >= 0) { + flag_changed = TRUE; + if (rep == NULL) { + j--; + } + else { + mrb_int c = tr_get_character(rep, RSTRING_PTR(p2), n); + + if (c < 0 || (squeeze && c == lastch)) { + j--; + continue; + } + if (c > 0x80) { + mrb_raisef(mrb, E_ARGUMENT_ERROR, "character (%S) out of range", + mrb_fixnum_value((mrb_int)c)); + } + lastch = c; + s[i] = (char)c; + } + } + } + + tr_free_pattern(mrb, &pat); + tr_free_pattern(mrb, rep); + + if (flag_changed) { + RSTR_SET_LEN(RSTRING(str), j); + RSTRING_PTR(str)[j] = 0; + } + return flag_changed; +} + +/* + * call-seq: + * str.tr(from_str, to_str) => new_str + * + * Returns a copy of str with the characters in from_str replaced by the + * corresponding characters in to_str. If to_str is shorter than from_str, + * it is padded with its last character in order to maintain the + * correspondence. + * + * "hello".tr('el', 'ip') #=> "hippo" + * "hello".tr('aeiou', '*') #=> "h*ll*" + * "hello".tr('aeiou', 'AA*') #=> "hAll*" + * + * Both strings may use the c1-c2 notation to denote ranges of characters, + * and from_str may start with a ^, which denotes all characters except + * those listed. + * + * "hello".tr('a-y', 'b-z') #=> "ifmmp" + * "hello".tr('^aeiou', '*') #=> "*e**o" + * + * The backslash character \ can be used to escape ^ or - and is otherwise + * ignored unless it appears at the end of a range or the end of the + * from_str or to_str: + * + * + * "hello^world".tr("\\^aeiou", "*") #=> "h*ll**w*rld" + * "hello-world".tr("a\\-eo", "*") #=> "h*ll**w*rld" + * + * "hello\r\nworld".tr("\r", "") #=> "hello\nworld" + * "hello\r\nworld".tr("\\r", "") #=> "hello\r\nwold" + * "hello\r\nworld".tr("\\\r", "") #=> "hello\nworld" + * + * "X['\\b']".tr("X\\", "") #=> "['b']" + * "X['\\b']".tr("X-\\]", "") #=> "'b'" + * + * Note: conversion is effective only in ASCII region. + */ +static mrb_value +mrb_str_tr(mrb_state *mrb, mrb_value str) +{ + mrb_value dup; + mrb_value p1, p2; + + mrb_get_args(mrb, "SS", &p1, &p2); + dup = mrb_str_dup(mrb, str); + str_tr(mrb, dup, p1, p2, FALSE); + return dup; +} + +/* + * call-seq: + * str.tr!(from_str, to_str) -> str or nil + * + * Translates str in place, using the same rules as String#tr. + * Returns str, or nil if no changes were made. + */ +static mrb_value +mrb_str_tr_bang(mrb_state *mrb, mrb_value str) +{ + mrb_value p1, p2; + + mrb_get_args(mrb, "SS", &p1, &p2); + if (str_tr(mrb, str, p1, p2, FALSE)) { + return str; + } + return mrb_nil_value(); +} + +/* + * call-seq: + * str.tr_s(from_str, to_str) -> new_str + * + * Processes a copy of str as described under String#tr, then removes + * duplicate characters in regions that were affected by the translation. + * + * "hello".tr_s('l', 'r') #=> "hero" + * "hello".tr_s('el', '*') #=> "h*o" + * "hello".tr_s('el', 'hx') #=> "hhxo" + */ +static mrb_value +mrb_str_tr_s(mrb_state *mrb, mrb_value str) +{ + mrb_value dup; + mrb_value p1, p2; + + mrb_get_args(mrb, "SS", &p1, &p2); + dup = mrb_str_dup(mrb, str); + str_tr(mrb, dup, p1, p2, TRUE); + return dup; +} + +/* + * call-seq: + * str.tr_s!(from_str, to_str) -> str or nil + * + * Performs String#tr_s processing on str in place, returning + * str, or nil if no changes were made. + */ +static mrb_value +mrb_str_tr_s_bang(mrb_state *mrb, mrb_value str) +{ + mrb_value p1, p2; + + mrb_get_args(mrb, "SS", &p1, &p2); + if (str_tr(mrb, str, p1, p2, TRUE)) { + return str; + } + return mrb_nil_value(); +} + +static mrb_bool +str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat) +{ + struct tr_pattern pat_storage = STATIC_TR_PATTERN; + struct tr_pattern *pat = NULL; + mrb_int i, j; + char *s; + mrb_int len; + mrb_bool flag_changed = FALSE; + mrb_int lastch = -1; + uint8_t bitmap[32]; + + mrb_str_modify(mrb, mrb_str_ptr(str)); + if (!mrb_nil_p(v_pat)) { + pat = tr_parse_pattern(mrb, &pat_storage, v_pat, TRUE); + tr_compile_pattern(pat, v_pat, bitmap); + tr_free_pattern(mrb, pat); + } + s = RSTRING_PTR(str); + len = RSTRING_LEN(str); + + if (pat) { + for (i=j=0; i<len; i++,j++) { + if (i>j) s[j] = s[i]; + if (tr_bitmap_detect(bitmap, s[i]) && s[i] == lastch) { + flag_changed = TRUE; + j--; + } + lastch = s[i]; + } + } + else { + for (i=j=0; i<len; i++,j++) { + if (i>j) s[j] = s[i]; + if (s[i] >= 0 && s[i] == lastch) { + flag_changed = TRUE; + j--; + } + lastch = s[i]; + } + } + + if (flag_changed) { + RSTR_SET_LEN(RSTRING(str), j); + RSTRING_PTR(str)[j] = 0; + } + return flag_changed; +} + +/* + * call-seq: + * str.squeeze([other_str]) -> new_str + * + * Builds a set of characters from the other_str + * parameter(s) using the procedure described for String#count. Returns a + * new string where runs of the same character that occur in this set are + * replaced by a single character. If no arguments are given, all runs of + * identical characters are replaced by a single character. + * + * "yellow moon".squeeze #=> "yelow mon" + * " now is the".squeeze(" ") #=> " now is the" + * "putters shoot balls".squeeze("m-z") #=> "puters shot balls" + */ +static mrb_value +mrb_str_squeeze(mrb_state *mrb, mrb_value str) +{ + mrb_value pat = mrb_nil_value(); + mrb_value dup; + + mrb_get_args(mrb, "|S", &pat); + dup = mrb_str_dup(mrb, str); + str_squeeze(mrb, dup, pat); + return dup; +} + +/* + * call-seq: + * str.squeeze!([other_str]) -> str or nil + * + * Squeezes str in place, returning either str, or nil if no + * changes were made. + */ +static mrb_value +mrb_str_squeeze_bang(mrb_state *mrb, mrb_value str) +{ + mrb_value pat = mrb_nil_value(); + + mrb_get_args(mrb, "|S", &pat); + if (str_squeeze(mrb, str, pat)) { + return str; + } + return mrb_nil_value(); +} + +static mrb_bool +str_delete(mrb_state *mrb, mrb_value str, mrb_value v_pat) +{ + struct tr_pattern pat = STATIC_TR_PATTERN; + mrb_int i, j; + char *s; + mrb_int len; + mrb_bool flag_changed = FALSE; + uint8_t bitmap[32]; + + mrb_str_modify(mrb, mrb_str_ptr(str)); + tr_parse_pattern(mrb, &pat, v_pat, TRUE); + tr_compile_pattern(&pat, v_pat, bitmap); + tr_free_pattern(mrb, &pat); + + s = RSTRING_PTR(str); + len = RSTRING_LEN(str); + + for (i=j=0; i<len; i++,j++) { + if (i>j) s[j] = s[i]; + if (tr_bitmap_detect(bitmap, s[i])) { + flag_changed = TRUE; + j--; + } + } + if (flag_changed) { + RSTR_SET_LEN(RSTRING(str), j); + RSTRING_PTR(str)[j] = 0; + } + return flag_changed; +} + +static mrb_value +mrb_str_delete(mrb_state *mrb, mrb_value str) +{ + mrb_value pat; + mrb_value dup; + + mrb_get_args(mrb, "S", &pat); + dup = mrb_str_dup(mrb, str); + str_delete(mrb, dup, pat); + return dup; +} + +static mrb_value +mrb_str_delete_bang(mrb_state *mrb, mrb_value str) +{ + mrb_value pat; + + mrb_get_args(mrb, "S", &pat); + if (str_delete(mrb, str, pat)) { + return str; + } + return mrb_nil_value(); +} + +/* + * call_seq: + * str.count([other_str]) -> integer + * + * Each other_str parameter defines a set of characters to count. The + * intersection of these sets defines the characters to count in str. Any + * other_str that starts with a caret ^ is negated. The sequence c1-c2 + * means all characters between c1 and c2. The backslash character \ can + * be used to escape ^ or - and is otherwise ignored unless it appears at + * the end of a sequence or the end of a other_str. + */ +static mrb_value +mrb_str_count(mrb_state *mrb, mrb_value str) +{ + mrb_value v_pat = mrb_nil_value(); + mrb_int i; + char *s; + mrb_int len; + mrb_int count = 0; + struct tr_pattern pat = STATIC_TR_PATTERN; + uint8_t bitmap[32]; + + mrb_get_args(mrb, "S", &v_pat); + tr_parse_pattern(mrb, &pat, v_pat, TRUE); + tr_compile_pattern(&pat, v_pat, bitmap); + tr_free_pattern(mrb, &pat); + + s = RSTRING_PTR(str); + len = RSTRING_LEN(str); + for (i = 0; i < len; i++) { + if (tr_bitmap_detect(bitmap, s[i])) count++; + } + return mrb_fixnum_value(count); +} + static mrb_value mrb_str_hex(mrb_state *mrb, mrb_value self) { @@ -620,6 +1194,15 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb) mrb_define_method(mrb, s, "swapcase", mrb_str_swapcase, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "concat", mrb_str_concat_m, MRB_ARGS_REQ(1)); mrb_define_method(mrb, s, "<<", mrb_str_concat_m, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, s, "count", mrb_str_count, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, s, "tr", mrb_str_tr, MRB_ARGS_REQ(2)); + mrb_define_method(mrb, s, "tr!", mrb_str_tr_bang, MRB_ARGS_REQ(2)); + mrb_define_method(mrb, s, "tr_s", mrb_str_tr_s, MRB_ARGS_REQ(2)); + mrb_define_method(mrb, s, "tr_s!", mrb_str_tr_s_bang, MRB_ARGS_REQ(2)); + mrb_define_method(mrb, s, "squeeze", mrb_str_squeeze, MRB_ARGS_OPT(1)); + mrb_define_method(mrb, s, "squeeze!", mrb_str_squeeze_bang, MRB_ARGS_OPT(1)); + mrb_define_method(mrb, s, "delete", mrb_str_delete, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, s, "delete!", mrb_str_delete_bang, MRB_ARGS_REQ(1)); mrb_define_method(mrb, s, "start_with?", mrb_str_start_with, MRB_ARGS_REST()); mrb_define_method(mrb, s, "end_with?", mrb_str_end_with, MRB_ARGS_REST()); mrb_define_method(mrb, s, "hex", mrb_str_hex, MRB_ARGS_NONE()); @@ -627,8 +1210,8 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb) mrb_define_method(mrb, s, "chr", mrb_str_chr, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "succ", mrb_str_succ, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "succ!", mrb_str_succ_bang, MRB_ARGS_NONE()); - mrb_alias_method(mrb, s, mrb_intern_lit(mrb, "next"), mrb_intern_lit(mrb, "succ")); - mrb_alias_method(mrb, s, mrb_intern_lit(mrb, "next!"), mrb_intern_lit(mrb, "succ!")); + mrb_define_alias(mrb, s, "next", "succ"); + mrb_define_alias(mrb, s, "next!", "succ!"); mrb_define_method(mrb, s, "ord", mrb_str_ord, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "delete_prefix!", mrb_str_del_prefix_bang, MRB_ARGS_REQ(1)); mrb_define_method(mrb, s, "delete_prefix", mrb_str_del_prefix, MRB_ARGS_REQ(1)); diff --git a/mrbgems/mruby-string-ext/test/range.rb b/mrbgems/mruby-string-ext/test/range.rb new file mode 100644 index 000000000..80c286850 --- /dev/null +++ b/mrbgems/mruby-string-ext/test/range.rb @@ -0,0 +1,26 @@ +assert('Range#max') do + # returns the maximum value in the range when called with no arguments + assert_equal 'l', ('f'..'l').max + assert_equal 'e', ('a'...'f').max + + # returns nil when the endpoint is less than the start point + assert_equal nil, ('z'..'l').max +end + +assert('Range#max given a block') do + # returns nil when the endpoint is less than the start point + assert_equal nil, (('z'..'l').max { |x, y| x <=> y }) +end + +assert('Range#min') do + # returns the minimum value in the range when called with no arguments + assert_equal 'f', ('f'..'l').min + + # returns nil when the start point is greater than the endpoint + assert_equal nil, ('z'..'l').min +end + +assert('Range#min given a block') do + # returns nil when the start point is greater than the endpoint + assert_equal nil, (('z'..'l').min { |x, y| x <=> y }) +end diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index b6146fb90..9a324c46d 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -2,14 +2,7 @@ ## # String(Ext) Test -UTF8STRING = ("\343\201\202".size == 1) - -assert('String.try_convert') do - assert_nil String.try_convert(nil) - assert_nil String.try_convert(:foo) - assert_equal "", String.try_convert("") - assert_equal "1,2,3", String.try_convert("1,2,3") -end +UTF8STRING = __ENCODING__ == "UTF-8" assert('String#getbyte') do str1 = "hello" @@ -31,83 +24,132 @@ assert('String#setbyte') do assert_equal("Hello", str1) end -assert("String#setbyte raises IndexError if arg conversion resizes String") do - $s = "01234\n" - class Tmp - def to_i - $s.chomp! '' - 95 - end - end - tmp = Tmp.new - assert_raise(IndexError) { $s.setbyte(5, tmp) } -end - assert('String#byteslice') do str1 = "hello" + str2 = "\u3042ab" # "\xE3\x81\x82ab" + + assert_equal("h", str1.byteslice(0)) assert_equal("e", str1.byteslice(1)) + assert_equal(nil, str1.byteslice(5)) assert_equal("o", str1.byteslice(-1)) + assert_equal(nil, str1.byteslice(-6)) + assert_equal("\xE3", str2.byteslice(0)) + assert_equal("\x81", str2.byteslice(1)) + assert_equal(nil, str2.byteslice(5)) + assert_equal("b", str2.byteslice(-1)) + assert_equal(nil, str2.byteslice(-6)) + + assert_equal("", str1.byteslice(0, 0)) + assert_equal(str1, str1.byteslice(0, 6)) + assert_equal("el", str1.byteslice(1, 2)) + assert_equal("", str1.byteslice(5, 1)) + assert_equal("o", str1.byteslice(-1, 6)) + assert_equal(nil, str1.byteslice(-6, 1)) + assert_equal(nil, str1.byteslice(0, -1)) + assert_equal("", str2.byteslice(0, 0)) + assert_equal(str2, str2.byteslice(0, 6)) + assert_equal("\x81\x82", str2.byteslice(1, 2)) + assert_equal("", str2.byteslice(5, 1)) + assert_equal("b", str2.byteslice(-1, 6)) + assert_equal(nil, str2.byteslice(-6, 1)) + assert_equal(nil, str2.byteslice(0, -1)) + assert_equal("ell", str1.byteslice(1..3)) assert_equal("el", str1.byteslice(1...3)) + assert_equal("h", str1.byteslice(0..0)) + assert_equal("", str1.byteslice(5..0)) + assert_equal("o", str1.byteslice(4..5)) + assert_equal(nil, str1.byteslice(6..0)) + assert_equal("", str1.byteslice(-1..0)) + assert_equal("llo", str1.byteslice(-3..5)) + assert_equal("\x81\x82a", str2.byteslice(1..3)) + assert_equal("\x81\x82", str2.byteslice(1...3)) + assert_equal("\xE3", str2.byteslice(0..0)) + assert_equal("", str2.byteslice(5..0)) + assert_equal("b", str2.byteslice(4..5)) + assert_equal(nil, str2.byteslice(6..0)) + assert_equal("", str2.byteslice(-1..0)) + assert_equal("\x82ab", str2.byteslice(-3..5)) + + assert_raise(ArgumentError) { str1.byteslice } + assert_raise(ArgumentError) { str1.byteslice(1, 2, 3) } + assert_raise(TypeError) { str1.byteslice("1") } + assert_raise(TypeError) { str1.byteslice("1", 2) } + assert_raise(TypeError) { str1.byteslice(1, "2") } + assert_raise(TypeError) { str1.byteslice(1..2, 3) } + + skip unless Object.const_defined?(:Float) + assert_equal("o", str1.byteslice(4.0)) + assert_equal("\x82ab", str2.byteslice(2.0, 3.0)) end assert('String#dump') do - ("\1" * 100).dump # should not raise an exception - regress #1210 - "\0".inspect == "\"\\000\"" and - "foo".dump == "\"foo\"" + assert_equal("\"\\x00\"", "\0".dump) + assert_equal("\"foo\"", "foo".dump) + assert_nothing_raised { ("\1" * 100).dump } # regress #1210 end assert('String#strip') do s = " abc " - "".strip == "" and " \t\r\n\f\v".strip == "" and - "\0a\0".strip == "\0a" and - "abc".strip == "abc" and - " abc".strip == "abc" and - "abc ".strip == "abc" and - " abc ".strip == "abc" and - s == " abc " + assert_equal("abc", s.strip) + assert_equal(" abc ", s) + assert_equal("", "".strip) + assert_equal("", " \t\r\n\f\v".strip) + assert_equal("\0a", "\0a\0".strip) + assert_equal("abc", "abc".strip) + assert_equal("abc", " abc".strip) + assert_equal("abc", "abc ".strip) end assert('String#lstrip') do s = " abc " - s.lstrip - "".lstrip == "" and " \t\r\n\f\v".lstrip == "" and - "\0a\0".lstrip == "\0a\0" and - "abc".lstrip == "abc" and - " abc".lstrip == "abc" and - "abc ".lstrip == "abc " and - " abc ".lstrip == "abc " and - s == " abc " + assert_equal("abc ", s.lstrip) + assert_equal(" abc ", s) + assert_equal("", "".lstrip) + assert_equal("", " \t\r\n\f\v".lstrip) + assert_equal("\0a\0", "\0a\0".lstrip) + assert_equal("abc", "abc".lstrip) + assert_equal("abc", " abc".lstrip) + assert_equal("abc ", "abc ".lstrip) end assert('String#rstrip') do s = " abc " - s.rstrip - "".rstrip == "" and " \t\r\n\f\v".rstrip == "" and - "\0a\0".rstrip == "\0a" and - "abc".rstrip == "abc" and - " abc".rstrip == " abc" and - "abc ".rstrip == "abc" and - " abc ".rstrip == " abc" and - s == " abc " + assert_equal(" abc", s.rstrip) + assert_equal(" abc ", s) + assert_equal("", "".rstrip) + assert_equal("", " \t\r\n\f\v".rstrip) + assert_equal("\0a", "\0a\0".rstrip) + assert_equal("abc", "abc".rstrip) + assert_equal(" abc", " abc".rstrip) + assert_equal("abc", "abc ".rstrip) end assert('String#strip!') do s = " abc " t = "abc" - s.strip! == "abc" and s == "abc" and t.strip! == nil + assert_equal("abc", s.strip!) + assert_equal("abc", s) + assert_nil(t.strip!) + assert_equal("abc", t) end assert('String#lstrip!') do s = " abc " t = "abc " - s.lstrip! == "abc " and s == "abc " and t.lstrip! == nil + assert_equal("abc ", s.lstrip!) + assert_equal("abc ", s) + assert_nil(t.lstrip!) + assert_equal("abc ", t) end assert('String#rstrip!') do s = " abc " t = " abc" - s.rstrip! == " abc" and s == " abc" and t.rstrip! == nil + assert_equal(" abc", s.rstrip!) + assert_equal(" abc", s) + assert_nil(t.rstrip!) + assert_equal(" abc", t) end assert('String#swapcase') do @@ -126,12 +168,6 @@ assert('String#concat') do assert_equal "Hello World!", "Hello " << "World" << 33 assert_equal "Hello World!", "Hello ".concat("World").concat(33) - o = Object.new - def o.to_str - "to_str" - end - assert_equal "hi to_str", "hi " << o - assert_raise(TypeError) { "".concat(Object.new) } end @@ -140,11 +176,69 @@ assert('String#casecmp') do assert_equal 0, "aBcDeF".casecmp("abcdef") assert_equal(-1, "abcdef".casecmp("abcdefg")) assert_equal 0, "abcdef".casecmp("ABCDEF") - o = Object.new - def o.to_str - "ABCDEF" - end - assert_equal 0, "abcdef".casecmp(o) +end + +assert('String#count') do + s = "abccdeff123" + assert_equal 0, s.count("") + assert_equal 1, s.count("a") + assert_equal 2, s.count("ab") + assert_equal 9, s.count("^c") + assert_equal 8, s.count("a-z") + assert_equal 4, s.count("a0-9") +end + +assert('String#tr') do + assert_equal "ABC", "abc".tr('a-z', 'A-Z') + assert_equal "hippo", "hello".tr('el', 'ip') + assert_equal "Ruby", "Lisp".tr("Lisp", "Ruby") + assert_equal "*e**o", "hello".tr('^aeiou', '*') + assert_equal "heo", "hello".tr('l', '') +end + +assert('String#tr!') do + s = "abcdefghijklmnopqR" + assert_equal "ab12222hijklmnopqR", s.tr!("cdefg", "12") + assert_equal "ab12222hijklmnopqR", s +end + +assert('String#tr_s') do + assert_equal "hero", "hello".tr_s('l', 'r') + assert_equal "h*o", "hello".tr_s('el', '*') + assert_equal "hhxo", "hello".tr_s('el', 'hx') +end + +assert('String#tr_s!') do + s = "hello" + assert_equal "hero", s.tr_s!('l', 'r') + assert_equal "hero", s + assert_nil s.tr_s!('l', 'r') +end + +assert('String#squeeze') do + assert_equal "yelow mon", "yellow moon".squeeze + assert_equal " now is the", " now is the".squeeze(" ") + assert_equal "puters shot balls", "putters shoot balls".squeeze("m-z") +end + +assert('String#squeeze!') do + s = " now is the" + assert_equal " now is the", s.squeeze!(" ") + assert_equal " now is the", s +end + +assert('String#delete') do + assert_equal "he", "hello".delete("lo") + assert_equal "hll", "hello".delete("aeiou") + assert_equal "ll", "hello".delete("^l") + assert_equal "ho", "hello".delete("ej-m") +end + +assert('String#delete!') do + s = "hello" + assert_equal "he", s.delete!("lo") + assert_equal "he", s + assert_nil s.delete!("lz") end assert('String#start_with?') do @@ -614,19 +708,19 @@ assert('String#chars(UTF-8)') do end if UTF8STRING assert('String#each_char') do - s = "" + chars = [] "hello!".each_char do |x| - s += x + chars << x end - assert_equal "hello!", s + assert_equal ["h", "e", "l", "l", "o", "!"], chars end assert('String#each_char(UTF-8)') do - s = "" + chars = [] "こんにちは世界!".each_char do |x| - s += x + chars << x end - assert_equal "こんにちは世界!", s + assert_equal ["こ", "ん", "に", "ち", "は", "世", "界", "!"], chars end if UTF8STRING assert('String#codepoints') do |
