From 270131253f62d806ea480ef4793e0b39cd068ee4 Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Sat, 27 Apr 2019 12:50:02 +0900 Subject: Remove duplicated `String#each_char` --- mrbgems/mruby-string-ext/test/string.rb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'mrbgems/mruby-string-ext/test/string.rb') diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index 44ca1fde2..02777e594 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -657,19 +657,19 @@ assert('String#chars(UTF-8)') do end if UTF8STRING assert('String#each_char') do - s = "" + chars = [] "hello!".each_char do |x| - s += x + chars << x end - assert_equal "hello!", s + assert_equal ["h", "e", "l", "l", "o", "!"], chars end assert('String#each_char(UTF-8)') do - s = "" + chars = [] "こんにちは世界!".each_char do |x| - s += x + chars << x end - assert_equal "こんにちは世界!", s + assert_equal ["こ", "ん", "に", "ち", "は", "世", "界", "!"], chars end if UTF8STRING assert('String#codepoints') do -- cgit v1.2.3 From 75df13a97334c162b2cf743c3e37c4933a4b0d1c Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Tue, 25 Jun 2019 22:58:21 +0900 Subject: Fix `String#byteslice` with `MRB_UTF8_STRING` and some edge cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Example: $ bin/mruby -e ' p "あa".byteslice(1) p "bar".byteslice(3) p "bar".byteslice(4..0) ' Before this patch: "a" "" RangeError (4..0 out of range) After this patch (same as Ruby): "\x81" nil nil --- include/mruby/string.h | 3 ++ mrbgems/mruby-string-ext/src/string.c | 58 +++++++++++++-------------------- mrbgems/mruby-string-ext/test/string.rb | 51 +++++++++++++++++++++++++++++ src/string.c | 49 ++++++++++++++-------------- 4 files changed, 102 insertions(+), 59 deletions(-) (limited to 'mrbgems/mruby-string-ext/test/string.rb') diff --git a/include/mruby/string.h b/include/mruby/string.h index 22445f654..b563541cb 100644 --- a/include/mruby/string.h +++ b/include/mruby/string.h @@ -438,6 +438,9 @@ mrb_value mrb_str_inspect(mrb_state *mrb, mrb_value str); #define mrb_str_buf_cat(mrb, str, ptr, len) mrb_str_cat(mrb, str, ptr, len) #define mrb_str_buf_append(mrb, str, str2) mrb_str_cat_str(mrb, str, str2) +mrb_bool mrb_str_beg_len(mrb_int str_len, mrb_int *begp, mrb_int *lenp); +mrb_value mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len); + #ifdef MRB_UTF8_STRING mrb_int mrb_utf8_len(const char *str, mrb_int byte_len); #endif diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index d9ebb7392..50a4e5582 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -42,44 +42,32 @@ mrb_str_setbyte(mrb_state *mrb, mrb_value str) static mrb_value mrb_str_byteslice(mrb_state *mrb, mrb_value str) { - mrb_value a1; - mrb_int len; - - if (mrb_get_argc(mrb) == 2) { - mrb_int pos; - mrb_get_args(mrb, "ii", &pos, &len); - return mrb_str_substr(mrb, str, pos, len); + mrb_value a1, a2; + mrb_int str_len = RSTRING_LEN(str), beg, len; + mrb_bool empty = TRUE; + + if (mrb_get_args(mrb, "o|o", &a1, &a2) == 2) { + beg = mrb_fixnum(mrb_to_int(mrb, a1)); + len = mrb_fixnum(mrb_to_int(mrb, a2)); + goto subseq; } - mrb_get_args(mrb, "o|i", &a1, &len); - switch (mrb_type(a1)) { - case MRB_TT_RANGE: - { - mrb_int beg; - - len = RSTRING_LEN(str); - switch (mrb_range_beg_len(mrb, a1, &beg, &len, len, TRUE)) { - case MRB_RANGE_TYPE_MISMATCH: - break; - case MRB_RANGE_OK: - return mrb_str_substr(mrb, str, beg, len); - case MRB_RANGE_OUT: - mrb_raisef(mrb, E_RANGE_ERROR, "%S out of range", a1); - break; - } - return mrb_nil_value(); + if (mrb_type(a1) == MRB_TT_RANGE) { + if (mrb_range_beg_len(mrb, a1, &beg, &len, str_len, TRUE) == MRB_RANGE_OK) { + goto subseq; } -#ifndef MRB_WITHOUT_FLOAT - case MRB_TT_FLOAT: - a1 = mrb_fixnum_value((mrb_int)mrb_float(a1)); - /* fall through */ -#endif - case MRB_TT_FIXNUM: - return mrb_str_substr(mrb, str, mrb_fixnum(a1), 1); - default: - mrb_raise(mrb, E_TYPE_ERROR, "wrong type of argument"); + return mrb_nil_value(); + } + + beg = mrb_fixnum(mrb_to_int(mrb, a1)); + len = 1; + empty = FALSE; +subseq: + if (mrb_str_beg_len(str_len, &beg, &len) && (empty || len != 0)) { + return mrb_str_byte_subseq(mrb, str, beg, len); + } + else { + return mrb_nil_value(); } - /* not reached */ - return mrb_nil_value(); } /* diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index 02777e594..bf633bcef 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -26,10 +26,61 @@ end assert('String#byteslice') do str1 = "hello" + str2 = "\u3042ab" # "\xE3\x81\x82ab" + + assert_equal("h", str1.byteslice(0)) assert_equal("e", str1.byteslice(1)) + assert_equal(nil, str1.byteslice(5)) assert_equal("o", str1.byteslice(-1)) + assert_equal(nil, str1.byteslice(-6)) + assert_equal("\xE3", str2.byteslice(0)) + assert_equal("\x81", str2.byteslice(1)) + assert_equal(nil, str2.byteslice(5)) + assert_equal("b", str2.byteslice(-1)) + assert_equal(nil, str2.byteslice(-6)) + + assert_equal("", str1.byteslice(0, 0)) + assert_equal(str1, str1.byteslice(0, 6)) + assert_equal("el", str1.byteslice(1, 2)) + assert_equal("", str1.byteslice(5, 1)) + assert_equal("o", str1.byteslice(-1, 6)) + assert_equal(nil, str1.byteslice(-6, 1)) + assert_equal(nil, str1.byteslice(0, -1)) + assert_equal("", str2.byteslice(0, 0)) + assert_equal(str2, str2.byteslice(0, 6)) + assert_equal("\x81\x82", str2.byteslice(1, 2)) + assert_equal("", str2.byteslice(5, 1)) + assert_equal("b", str2.byteslice(-1, 6)) + assert_equal(nil, str2.byteslice(-6, 1)) + assert_equal(nil, str2.byteslice(0, -1)) + assert_equal("ell", str1.byteslice(1..3)) assert_equal("el", str1.byteslice(1...3)) + assert_equal("h", str1.byteslice(0..0)) + assert_equal("", str1.byteslice(5..0)) + assert_equal("o", str1.byteslice(4..5)) + assert_equal(nil, str1.byteslice(6..0)) + assert_equal("", str1.byteslice(-1..0)) + assert_equal("llo", str1.byteslice(-3..5)) + assert_equal("\x81\x82a", str2.byteslice(1..3)) + assert_equal("\x81\x82", str2.byteslice(1...3)) + assert_equal("\xE3", str2.byteslice(0..0)) + assert_equal("", str2.byteslice(5..0)) + assert_equal("b", str2.byteslice(4..5)) + assert_equal(nil, str2.byteslice(6..0)) + assert_equal("", str2.byteslice(-1..0)) + assert_equal("\x82ab", str2.byteslice(-3..5)) + + assert_raise(ArgumentError) { str1.byteslice } + assert_raise(ArgumentError) { str1.byteslice(1, 2, 3) } + assert_raise(TypeError) { str1.byteslice("1") } + assert_raise(TypeError) { str1.byteslice("1", 2) } + assert_raise(TypeError) { str1.byteslice(1, "2") } + assert_raise(TypeError) { str1.byteslice(1..2, 3) } + + skip unless Object.const_defined?(:Float) + assert_equal("o", str1.byteslice(4.0)) + assert_equal("\x82ab", str2.byteslice(2.0, 3.0)) end assert('String#dump') do diff --git a/src/string.c b/src/string.c index ed58c484b..f5fb936a6 100644 --- a/src/string.c +++ b/src/string.c @@ -410,8 +410,8 @@ str_make_shared(mrb_state *mrb, struct RString *orig, struct RString *s) } } -static mrb_value -byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) +mrb_value +mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) { struct RString *orig, *s; @@ -434,32 +434,33 @@ str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) beg = chars2bytes(str, 0, beg); len = chars2bytes(str, beg, len); - return byte_subseq(mrb, str, beg, len); + return mrb_str_byte_subseq(mrb, str, beg, len); } #else -#define str_subseq(mrb, str, beg, len) byte_subseq(mrb, str, beg, len) +#define str_subseq(mrb, str, beg, len) mrb_str_byte_subseq(mrb, str, beg, len) #endif -static mrb_value -str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) +mrb_bool +mrb_str_beg_len(mrb_int str_len, mrb_int *begp, mrb_int *lenp) { - mrb_int clen = RSTRING_CHAR_LEN(str); - - if (len < 0) return mrb_nil_value(); - if (clen == 0) { - len = 0; + if (str_len < *begp || *lenp < 0) return FALSE; + if (*begp < 0) { + *begp += str_len; + if (*begp < 0) return FALSE; } - if (beg > clen) return mrb_nil_value(); - if (beg < 0) { - beg += clen; - if (beg < 0) return mrb_nil_value(); + if (*lenp > str_len - *begp) + *lenp = str_len - *begp; + if (*lenp <= 0) { + *lenp = 0; } - if (len > clen - beg) - len = clen - beg; - if (len <= 0) { - len = 0; - } - return str_subseq(mrb, str, beg, len); + return TRUE; +} + +static mrb_value +str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) +{ + return mrb_str_beg_len(RSTRING_CHAR_LEN(str), &beg, &len) ? + str_subseq(mrb, str, beg, len) : mrb_nil_value(); } MRB_API mrb_int @@ -1917,7 +1918,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) } } else if (ISSPACE(c)) { - mrb_ary_push(mrb, result, byte_subseq(mrb, str, beg, end-beg)); + mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, beg, end-beg)); mrb_gc_arena_restore(mrb, ai); skip = TRUE; beg = idx; @@ -1942,7 +1943,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) else { end = chars2bytes(str, idx, 1); } - mrb_ary_push(mrb, result, byte_subseq(mrb, str, idx, end)); + mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, idx, end)); mrb_gc_arena_restore(mrb, ai); idx += end + pat_len; if (lim_p && lim <= ++i) break; @@ -1954,7 +1955,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) tmp = mrb_str_new_empty(mrb, str); } else { - tmp = byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg); + tmp = mrb_str_byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg); } mrb_ary_push(mrb, result, tmp); } -- cgit v1.2.3 From bc3176da630e3e055d58aa065ff897aec66df280 Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Fri, 28 Jun 2019 19:26:29 +0900 Subject: Use `__ENCODING__` in tests It cannot be used for `String#size` test if judging whether or not `MRB_UTF8_STRING` is defined by result of `String#size`. --- mrbgems/mruby-string-ext/test/string.rb | 2 +- mrbgems/mruby-symbol-ext/test/symbol.rb | 2 +- test/t/string.rb | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'mrbgems/mruby-string-ext/test/string.rb') diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index bf633bcef..9a324c46d 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -2,7 +2,7 @@ ## # String(Ext) Test -UTF8STRING = ("\343\201\202".size == 1) +UTF8STRING = __ENCODING__ == "UTF-8" assert('String#getbyte') do str1 = "hello" diff --git a/mrbgems/mruby-symbol-ext/test/symbol.rb b/mrbgems/mruby-symbol-ext/test/symbol.rb index 61ecad247..db686e5f4 100644 --- a/mrbgems/mruby-symbol-ext/test/symbol.rb +++ b/mrbgems/mruby-symbol-ext/test/symbol.rb @@ -14,7 +14,7 @@ end assert("Symbol##{n}") do assert_equal 5, :hello.__send__(n) assert_equal 4, :"aA\0b".__send__(n) - if "あ".size == 1 # enable MRB_UTF8_STRING? + if __ENCODING__ == "UTF-8" assert_equal 8, :"こんにちは世界!".__send__(n) assert_equal 4, :"aあ\0b".__send__(n) else diff --git a/test/t/string.rb b/test/t/string.rb index 7ef236dbe..81699f17e 100644 --- a/test/t/string.rb +++ b/test/t/string.rb @@ -2,7 +2,7 @@ ## # String ISO Test -UTF8STRING = ("\343\201\202".size == 1) +UTF8STRING = __ENCODING__ == "UTF-8" assert('String', '15.2.10') do assert_equal Class, String.class -- cgit v1.2.3 From 8ab846b5b92dcbe7c9ad927af99e6f06143b18af Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Fri, 19 Jul 2019 21:34:25 +0900 Subject: Refine `String#chr` test and separate `Fixnum#chr` test --- mrbgems/mruby-string-ext/test/fixnum.rb | 3 +++ mrbgems/mruby-string-ext/test/string.rb | 9 +++------ 2 files changed, 6 insertions(+), 6 deletions(-) create mode 100644 mrbgems/mruby-string-ext/test/fixnum.rb (limited to 'mrbgems/mruby-string-ext/test/string.rb') diff --git a/mrbgems/mruby-string-ext/test/fixnum.rb b/mrbgems/mruby-string-ext/test/fixnum.rb new file mode 100644 index 000000000..9036b1a06 --- /dev/null +++ b/mrbgems/mruby-string-ext/test/fixnum.rb @@ -0,0 +1,3 @@ +assert('Fixnum#chr') do + assert_equal "a", 97.chr +end diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index 9a324c46d..2eb35f840 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -298,12 +298,6 @@ assert('String#oct') do assert_equal (-8), "-10".oct end -assert('String#chr') do - assert_equal "a", "abcde".chr - # test Fixnum#chr as well - assert_equal "a", 97.chr -end - assert('String#lines') do assert_equal ["Hel\n", "lo\n", "World!"], "Hel\nlo\nWorld!".lines assert_equal ["Hel\n", "lo\n", "World!\n"], "Hel\nlo\nWorld!\n".lines @@ -681,8 +675,11 @@ assert('String#ord(UTF-8)') do end if UTF8STRING assert('String#chr') do + assert_equal "a", "abcde".chr assert_equal "h", "hello!".chr + assert_equal "", "".chr end + assert('String#chr(UTF-8)') do assert_equal "こ", "こんにちは世界!".chr end if UTF8STRING -- cgit v1.2.3 From e86aa61f203ec1589d37798ceb8b40385c7f85e0 Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Tue, 23 Jul 2019 20:16:46 +0900 Subject: Add encoding argument to `Integral#chr` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, `Integral#chr` in mruby changes behavior by `MRB_UTF8_STRING` setting. before this patch: $ bin/mruby -e 'p 171.chr' #=> "\xab" (`MRB_UTF8_STRING` is disabled) $ bin/mruby -e 'p 171.chr' #=> "«" (`MRB_UTF8_STRING` is enabled) This behavior is incompatible with Ruby, and a little inconvenient because it can't be interpreted as ASCII-8BIT with `MRB_UTF8_STRING`, I think. So add encoding argument according to Ruby. after this patch: $ bin/mruby -e 'p 171.chr' #=> "\xab" $ bin/mruby -e 'p 171.chr("ASCII-8BIT")' #=> "\xab" $ bin/mruby -e 'p 171.chr("UTF-8")' #=> "«" Allow only `String` for encoding because mruby doesn't have `Encoding` class, and `"ASCII-8BIT"` (`"BINARY"`) and `"UTF-8"` (only with `MRB_UTF8_STRING`) are valid value (default is `"ASCII-8BIT"`). --- mrbgems/mruby-string-ext/mrblib/string.rb | 2 +- mrbgems/mruby-string-ext/src/string.c | 166 ++++++++++++++++++++++-------- mrbgems/mruby-string-ext/test/numeric.rb | 24 +++++ mrbgems/mruby-string-ext/test/string.rb | 9 +- 4 files changed, 156 insertions(+), 45 deletions(-) (limited to 'mrbgems/mruby-string-ext/test/string.rb') diff --git a/mrbgems/mruby-string-ext/mrblib/string.rb b/mrbgems/mruby-string-ext/mrblib/string.rb index fdaf2f960..e57d75355 100644 --- a/mrbgems/mruby-string-ext/mrblib/string.rb +++ b/mrbgems/mruby-string-ext/mrblib/string.rb @@ -414,7 +414,7 @@ class String e = max.ord while c <= e break if exclusive and c == e - yield c.chr + yield c.chr(__ENCODING__) c += 1 end return self diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index ab9919650..036a0a463 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -5,6 +5,91 @@ #include #include +#define ENC_ASCII_8BIT "ASCII-8BIT" +#define ENC_BINARY "BINARY" +#define ENC_UTF8 "UTF-8" + +#define ENC_COMP_P(enc, enc_lit) \ + str_casecmp_p(RSTRING_PTR(enc), RSTRING_LEN(enc), enc_lit, sizeof(enc_lit"")-1) + +#ifdef MRB_WITHOUT_FLOAT +# define mrb_float_p(o) FALSE +#endif + +static mrb_bool +str_casecmp_p(const char *s1, mrb_int len1, const char *s2, mrb_int len2) +{ + const char *e1, *e2; + + if (len1 != len2) return FALSE; + e1 = s1 + len1; + e2 = s2 + len2; + while (s1 < e1 && s2 < e2) { + if (*s1 != *s2 && TOUPPER(*s1) != TOUPPER(*s2)) return FALSE; + ++s1; + ++s2; + } + return TRUE; +} + +static mrb_value +int_chr_binary(mrb_state *mrb, mrb_value num) +{ + mrb_int cp = mrb_int(mrb, num); + char c; + mrb_value str; + + if (cp < 0 || 0xff < cp) { + mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", num); + } + c = (char)cp; + str = mrb_str_new(mrb, &c, 1); + RSTR_SET_ASCII_FLAG(mrb_str_ptr(str)); + return str; +} + +#ifdef MRB_UTF8_STRING +static mrb_value +int_chr_utf8(mrb_state *mrb, mrb_value num) +{ + mrb_int cp = mrb_int(mrb, num); + char utf8[4]; + mrb_int len; + mrb_value str; + uint32_t ascii_flag = 0; + + if (cp < 0 || 0x10FFFF < cp) { + mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", num); + } + if (cp < 0x80) { + utf8[0] = (char)cp; + len = 1; + ascii_flag = MRB_STR_ASCII; + } + else if (cp < 0x800) { + utf8[0] = (char)(0xC0 | (cp >> 6)); + utf8[1] = (char)(0x80 | (cp & 0x3F)); + len = 2; + } + else if (cp < 0x10000) { + utf8[0] = (char)(0xE0 | (cp >> 12)); + utf8[1] = (char)(0x80 | ((cp >> 6) & 0x3F)); + utf8[2] = (char)(0x80 | ( cp & 0x3F)); + len = 3; + } + else { + utf8[0] = (char)(0xF0 | (cp >> 18)); + utf8[1] = (char)(0x80 | ((cp >> 12) & 0x3F)); + utf8[2] = (char)(0x80 | ((cp >> 6) & 0x3F)); + utf8[3] = (char)(0x80 | ( cp & 0x3F)); + len = 4; + } + str = mrb_str_new(mrb, utf8, len); + mrb_str_ptr(str)->flags |= ascii_flag; + return str; +} +#endif + static mrb_value mrb_str_getbyte(mrb_state *mrb, mrb_value str) { @@ -125,8 +210,6 @@ mrb_str_swapcase(mrb_state *mrb, mrb_value self) return str; } -static mrb_value mrb_int_chr(mrb_state *mrb, mrb_value num); - /* * call-seq: * str << integer -> str @@ -136,7 +219,8 @@ static mrb_value mrb_int_chr(mrb_state *mrb, mrb_value num); * * Append---Concatenates the given object to str. If the object is a * Integer, it is considered as a codepoint, and is converted - * to a character before concatenation. + * to a character before concatenation + * (equivalent to str.concat(integer.chr(__ENCODING__))). * * a = "hello " * a << "world" #=> "hello world" @@ -148,8 +232,12 @@ mrb_str_concat_m(mrb_state *mrb, mrb_value self) mrb_value str; mrb_get_args(mrb, "o", &str); - if (mrb_fixnum_p(str)) - str = mrb_int_chr(mrb, str); + if (mrb_fixnum_p(str) || mrb_float_p(str)) +#ifdef MRB_UTF8_STRING + str = int_chr_utf8(mrb, str); +#else + str = int_chr_binary(mrb, str); +#endif else str = mrb_ensure_string_type(mrb, str); mrb_str_concat(mrb, self, str); @@ -800,7 +888,7 @@ mrb_str_count(mrb_state *mrb, mrb_value str) tr_parse_pattern(mrb, &pat, v_pat, TRUE); tr_compile_pattern(&pat, v_pat, bitmap); tr_free_pattern(mrb, &pat); - + s = RSTRING_PTR(str); len = RSTRING_LEN(str); for (i = 0; i < len; i++) { @@ -836,49 +924,40 @@ mrb_str_chr(mrb_state *mrb, mrb_value self) return mrb_str_substr(mrb, self, 0, 1); } +/* + * call-seq: + * int.chr([encoding]) -> string + * + * Returns a string containing the character represented by the +int+'s value + * according to +encoding+. +"ASCII-8BIT"+ (+"BINARY"+) and +"UTF-8"+ (only + * with +MRB_UTF8_STRING+) can be specified as +encoding+ (default is + * +"ASCII-8BIT"+). + * + * 65.chr #=> "A" + * 230.chr #=> "\xE6" + * 230.chr("ASCII-8BIT") #=> "\xE6" + * 230.chr("UTF-8") #=> "\u00E6" + */ static mrb_value mrb_int_chr(mrb_state *mrb, mrb_value num) { - mrb_int cp = mrb_fixnum(num); -#ifdef MRB_UTF8_STRING - char utf8[4]; - mrb_int len; - - if (cp < 0 || 0x10FFFF < cp) { - mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", num); - } - if (cp < 0x80) { - utf8[0] = (char)cp; - len = 1; + mrb_value enc; + mrb_bool enc_given; + + mrb_get_args(mrb, "|S?", &enc, &enc_given); + if (!enc_given || + ENC_COMP_P(enc, ENC_ASCII_8BIT) || + ENC_COMP_P(enc, ENC_BINARY)) { + return int_chr_binary(mrb, num); } - else if (cp < 0x800) { - utf8[0] = (char)(0xC0 | (cp >> 6)); - utf8[1] = (char)(0x80 | (cp & 0x3F)); - len = 2; - } - else if (cp < 0x10000) { - utf8[0] = (char)(0xE0 | (cp >> 12)); - utf8[1] = (char)(0x80 | ((cp >> 6) & 0x3F)); - utf8[2] = (char)(0x80 | ( cp & 0x3F)); - len = 3; +#ifdef MRB_UTF8_STRING + else if (ENC_COMP_P(enc, ENC_UTF8)) { + return int_chr_utf8(mrb, num); } +#endif else { - utf8[0] = (char)(0xF0 | (cp >> 18)); - utf8[1] = (char)(0x80 | ((cp >> 12) & 0x3F)); - utf8[2] = (char)(0x80 | ((cp >> 6) & 0x3F)); - utf8[3] = (char)(0x80 | ( cp & 0x3F)); - len = 4; - } - return mrb_str_new(mrb, utf8, len); -#else - char c; - - if (cp < 0 || 0xff < cp) { - mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", num); + mrb_raisef(mrb, E_ARGUMENT_ERROR, "unknown encoding name - %S", enc); } - c = (char)cp; - return mrb_str_new(mrb, &c, 1); -#endif } /* @@ -1219,7 +1298,8 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb) mrb_define_method(mrb, s, "delete_suffix", mrb_str_del_suffix, MRB_ARGS_REQ(1)); mrb_define_method(mrb, s, "__lines", mrb_str_lines, MRB_ARGS_NONE()); - mrb_define_method(mrb, mrb_module_get(mrb, "Integral"), "chr", mrb_int_chr, MRB_ARGS_NONE()); + + mrb_define_method(mrb, mrb_module_get(mrb, "Integral"), "chr", mrb_int_chr, MRB_ARGS_OPT(1)); } void diff --git a/mrbgems/mruby-string-ext/test/numeric.rb b/mrbgems/mruby-string-ext/test/numeric.rb index cae562fc1..dfcb9ebf4 100644 --- a/mrbgems/mruby-string-ext/test/numeric.rb +++ b/mrbgems/mruby-string-ext/test/numeric.rb @@ -1,5 +1,29 @@ +# coding: utf-8 + assert('Integer#chr') do assert_equal("A", 65.chr) assert_equal("B", 0x42.chr) + assert_equal("\xab", 171.chr) assert_raise(RangeError) { -1.chr } + assert_raise(RangeError) { 256.chr } + + assert_equal("A", 65.chr("ASCII-8BIT")) + assert_equal("B", 0x42.chr("BINARY")) + assert_equal("\xab", 171.chr("ascii-8bit")) + assert_raise(RangeError) { -1.chr("binary") } + assert_raise(RangeError) { 256.chr("Ascii-8bit") } + assert_raise(ArgumentError) { 65.chr("ASCII") } + assert_raise(ArgumentError) { 65.chr("ASCII-8BIT", 2) } + assert_raise(TypeError) { 65.chr(:BINARY) } + + if __ENCODING__ == "ASCII-8BIT" + assert_raise(ArgumentError) { 65.chr("UTF-8") } + else + assert_equal("A", 65.chr("UTF-8")) + assert_equal("B", 0x42.chr("UTF-8")) + assert_equal("«", 171.chr("utf-8")) + assert_equal("あ", 12354.chr("Utf-8")) + assert_raise(RangeError) { -1.chr("utf-8") } + assert_raise(RangeError) { 0x110000.chr.chr("UTF-8") } + end end diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index 2eb35f840..8f1d25f29 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -167,8 +167,15 @@ end assert('String#concat') do assert_equal "Hello World!", "Hello " << "World" << 33 assert_equal "Hello World!", "Hello ".concat("World").concat(33) - assert_raise(TypeError) { "".concat(Object.new) } + + if UTF8STRING + assert_equal "H«", "H" << 0xab + assert_equal "Hは", "H" << 12399 + else + assert_equal "H\xab", "H" << 0xab + assert_raise(RangeError) { "H" << 12399 } + end end assert('String#casecmp') do -- cgit v1.2.3 From fab781cf8b3cb0b6cf76b5e8669a4a5f602d2004 Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Wed, 11 Sep 2019 10:35:52 +0900 Subject: Drop test dependency from `mruby-string-ext` to `mruby-enumerator` --- mrbgems/mruby-string-ext/mrbgem.rake | 1 - mrbgems/mruby-string-ext/test/string.rb | 23 ++++++++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'mrbgems/mruby-string-ext/test/string.rb') diff --git a/mrbgems/mruby-string-ext/mrbgem.rake b/mrbgems/mruby-string-ext/mrbgem.rake index 9812f2cc9..f2df5a783 100644 --- a/mrbgems/mruby-string-ext/mrbgem.rake +++ b/mrbgems/mruby-string-ext/mrbgem.rake @@ -2,5 +2,4 @@ MRuby::Gem::Specification.new('mruby-string-ext') do |spec| spec.license = 'MIT' spec.author = 'mruby developers' spec.summary = 'String class extension' - spec.add_test_dependency 'mruby-enumerator', core: 'mruby-enumerator' end diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index 8f1d25f29..edbeb02d7 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -4,6 +4,12 @@ UTF8STRING = __ENCODING__ == "UTF-8" +def assert_upto(exp, receiver, *args) + act = [] + receiver.upto(*args) { |v| act << v } + assert_equal exp, act +end + assert('String#getbyte') do str1 = "hello" bytes1 = [104, 101, 108, 108, 111] @@ -591,16 +597,15 @@ assert('String#rjust should raise on zero width padding') do end assert('String#upto') do - assert_equal %w(a8 a9 b0 b1 b2 b3 b4 b5 b6), "a8".upto("b6").to_a - assert_equal ["9", "10", "11"], "9".upto("11").to_a - assert_equal [], "25".upto("5").to_a - assert_equal ["07", "08", "09", "10", "11"], "07".upto("11").to_a + assert_upto %w(a8 a9 b0 b1 b2 b3 b4 b5 b6), "a8", "b6" + assert_upto ["9", "10", "11"], "9", "11" + assert_upto [], "25", "5" + assert_upto ["07", "08", "09", "10", "11"], "07", "11" + assert_upto ["9", ":", ";", "<", "=", ">", "?", "@", "A"], "9", "A" -if UTF8STRING - assert_equal ["あ", "ぃ", "い", "ぅ", "う", "ぇ", "え", "ぉ", "お"], "あ".upto("お").to_a -end - - assert_equal ["9", ":", ";", "<", "=", ">", "?", "@", "A"], "9".upto("A").to_a + if UTF8STRING + assert_upto %w(あ ぃ い ぅ う ぇ え ぉ お), "あ", "お" + end a = "aa" start = "aa" -- cgit v1.2.3 From 04b098d000c129b1efde98904b9a1b411b32a46a Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Wed, 11 Sep 2019 18:49:08 +0900 Subject: Move tests related to `getbyte`, `setbyte`, byteslice` to core. --- mrbgems/mruby-string-ext/test/string.rb | 79 --------------------------------- test/t/string.rb | 79 +++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 79 deletions(-) (limited to 'mrbgems/mruby-string-ext/test/string.rb') diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index edbeb02d7..6914fe31d 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -10,85 +10,6 @@ def assert_upto(exp, receiver, *args) assert_equal exp, act end -assert('String#getbyte') do - str1 = "hello" - bytes1 = [104, 101, 108, 108, 111] - assert_equal bytes1[0], str1.getbyte(0) - assert_equal bytes1[-1], str1.getbyte(-1) - assert_equal bytes1[6], str1.getbyte(6) - - str2 = "\xFF" - bytes2 = [0xFF] - assert_equal bytes2[0], str2.getbyte(0) -end - -assert('String#setbyte') do - str1 = "hello" - h = "H".getbyte(0) - str1.setbyte(0, h) - assert_equal(h, str1.getbyte(0)) - assert_equal("Hello", str1) -end - -assert('String#byteslice') do - str1 = "hello" - str2 = "\u3042ab" # "\xE3\x81\x82ab" - - assert_equal("h", str1.byteslice(0)) - assert_equal("e", str1.byteslice(1)) - assert_equal(nil, str1.byteslice(5)) - assert_equal("o", str1.byteslice(-1)) - assert_equal(nil, str1.byteslice(-6)) - assert_equal("\xE3", str2.byteslice(0)) - assert_equal("\x81", str2.byteslice(1)) - assert_equal(nil, str2.byteslice(5)) - assert_equal("b", str2.byteslice(-1)) - assert_equal(nil, str2.byteslice(-6)) - - assert_equal("", str1.byteslice(0, 0)) - assert_equal(str1, str1.byteslice(0, 6)) - assert_equal("el", str1.byteslice(1, 2)) - assert_equal("", str1.byteslice(5, 1)) - assert_equal("o", str1.byteslice(-1, 6)) - assert_equal(nil, str1.byteslice(-6, 1)) - assert_equal(nil, str1.byteslice(0, -1)) - assert_equal("", str2.byteslice(0, 0)) - assert_equal(str2, str2.byteslice(0, 6)) - assert_equal("\x81\x82", str2.byteslice(1, 2)) - assert_equal("", str2.byteslice(5, 1)) - assert_equal("b", str2.byteslice(-1, 6)) - assert_equal(nil, str2.byteslice(-6, 1)) - assert_equal(nil, str2.byteslice(0, -1)) - - assert_equal("ell", str1.byteslice(1..3)) - assert_equal("el", str1.byteslice(1...3)) - assert_equal("h", str1.byteslice(0..0)) - assert_equal("", str1.byteslice(5..0)) - assert_equal("o", str1.byteslice(4..5)) - assert_equal(nil, str1.byteslice(6..0)) - assert_equal("", str1.byteslice(-1..0)) - assert_equal("llo", str1.byteslice(-3..5)) - assert_equal("\x81\x82a", str2.byteslice(1..3)) - assert_equal("\x81\x82", str2.byteslice(1...3)) - assert_equal("\xE3", str2.byteslice(0..0)) - assert_equal("", str2.byteslice(5..0)) - assert_equal("b", str2.byteslice(4..5)) - assert_equal(nil, str2.byteslice(6..0)) - assert_equal("", str2.byteslice(-1..0)) - assert_equal("\x82ab", str2.byteslice(-3..5)) - - assert_raise(ArgumentError) { str1.byteslice } - assert_raise(ArgumentError) { str1.byteslice(1, 2, 3) } - assert_raise(TypeError) { str1.byteslice("1") } - assert_raise(TypeError) { str1.byteslice("1", 2) } - assert_raise(TypeError) { str1.byteslice(1, "2") } - assert_raise(TypeError) { str1.byteslice(1..2, 3) } - - skip unless Object.const_defined?(:Float) - assert_equal("o", str1.byteslice(4.0)) - assert_equal("\x82ab", str2.byteslice(2.0, 3.0)) -end - assert('String#dump') do assert_equal("\"\\x00\"", "\0".dump) assert_equal("\"foo\"", "foo".dump) diff --git a/test/t/string.rb b/test/t/string.rb index 7e3c327b1..c820bfa92 100644 --- a/test/t/string.rb +++ b/test/t/string.rb @@ -784,3 +784,82 @@ assert('String literal concatenation') do assert_equal 3, ('A' "B" 'C').size assert_equal 4, (%(A) "B#{?C}" "D").size end + +assert('String#getbyte') do + str1 = "hello" + bytes1 = [104, 101, 108, 108, 111] + assert_equal bytes1[0], str1.getbyte(0) + assert_equal bytes1[-1], str1.getbyte(-1) + assert_equal bytes1[6], str1.getbyte(6) + + str2 = "\xFF" + bytes2 = [0xFF] + assert_equal bytes2[0], str2.getbyte(0) +end + +assert('String#setbyte') do + str1 = "hello" + h = "H".getbyte(0) + str1.setbyte(0, h) + assert_equal(h, str1.getbyte(0)) + assert_equal("Hello", str1) +end + +assert('String#byteslice') do + str1 = "hello" + str2 = "\u3042ab" # "\xE3\x81\x82ab" + + assert_equal("h", str1.byteslice(0)) + assert_equal("e", str1.byteslice(1)) + assert_equal(nil, str1.byteslice(5)) + assert_equal("o", str1.byteslice(-1)) + assert_equal(nil, str1.byteslice(-6)) + assert_equal("\xE3", str2.byteslice(0)) + assert_equal("\x81", str2.byteslice(1)) + assert_equal(nil, str2.byteslice(5)) + assert_equal("b", str2.byteslice(-1)) + assert_equal(nil, str2.byteslice(-6)) + + assert_equal("", str1.byteslice(0, 0)) + assert_equal(str1, str1.byteslice(0, 6)) + assert_equal("el", str1.byteslice(1, 2)) + assert_equal("", str1.byteslice(5, 1)) + assert_equal("o", str1.byteslice(-1, 6)) + assert_equal(nil, str1.byteslice(-6, 1)) + assert_equal(nil, str1.byteslice(0, -1)) + assert_equal("", str2.byteslice(0, 0)) + assert_equal(str2, str2.byteslice(0, 6)) + assert_equal("\x81\x82", str2.byteslice(1, 2)) + assert_equal("", str2.byteslice(5, 1)) + assert_equal("b", str2.byteslice(-1, 6)) + assert_equal(nil, str2.byteslice(-6, 1)) + assert_equal(nil, str2.byteslice(0, -1)) + + assert_equal("ell", str1.byteslice(1..3)) + assert_equal("el", str1.byteslice(1...3)) + assert_equal("h", str1.byteslice(0..0)) + assert_equal("", str1.byteslice(5..0)) + assert_equal("o", str1.byteslice(4..5)) + assert_equal(nil, str1.byteslice(6..0)) + assert_equal("", str1.byteslice(-1..0)) + assert_equal("llo", str1.byteslice(-3..5)) + assert_equal("\x81\x82a", str2.byteslice(1..3)) + assert_equal("\x81\x82", str2.byteslice(1...3)) + assert_equal("\xE3", str2.byteslice(0..0)) + assert_equal("", str2.byteslice(5..0)) + assert_equal("b", str2.byteslice(4..5)) + assert_equal(nil, str2.byteslice(6..0)) + assert_equal("", str2.byteslice(-1..0)) + assert_equal("\x82ab", str2.byteslice(-3..5)) + + assert_raise(ArgumentError) { str1.byteslice } + assert_raise(ArgumentError) { str1.byteslice(1, 2, 3) } + assert_raise(TypeError) { str1.byteslice("1") } + assert_raise(TypeError) { str1.byteslice("1", 2) } + assert_raise(TypeError) { str1.byteslice(1, "2") } + assert_raise(TypeError) { str1.byteslice(1..2, 3) } + + skip unless Object.const_defined?(:Float) + assert_equal("o", str1.byteslice(4.0)) + assert_equal("\x82ab", str2.byteslice(2.0, 3.0)) +end -- cgit v1.2.3 From 7ce5d3394706723a82d337641f960c58649e0134 Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Thu, 10 Oct 2019 19:50:48 +0900 Subject: Integrate `mrb_str_inspect` and `mrb_str_dump` --- mrbgems/mruby-string-ext/test/string.rb | 1 + src/string.c | 261 ++++++++++---------------------- test/t/string.rb | 12 +- 3 files changed, 90 insertions(+), 184 deletions(-) (limited to 'mrbgems/mruby-string-ext/test/string.rb') diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb index 6914fe31d..3f11c00a0 100644 --- a/mrbgems/mruby-string-ext/test/string.rb +++ b/mrbgems/mruby-string-ext/test/string.rb @@ -13,6 +13,7 @@ end assert('String#dump') do assert_equal("\"\\x00\"", "\0".dump) assert_equal("\"foo\"", "foo".dump) + assert_equal('"\xe3\x82\x8b"', "る".dump) assert_nothing_raised { ("\1" * 100).dump } # regress #1210 end diff --git a/src/string.c b/src/string.c index 1428ea780..a45dee11e 100644 --- a/src/string.c +++ b/src/string.c @@ -1318,6 +1318,84 @@ str_replace_partial(mrb_state *mrb, mrb_value src, mrb_int pos, mrb_int end, mrb return src; } +#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */ +#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{')) + +static mrb_value +str_escape(mrb_state *mrb, mrb_value str, mrb_bool inspect) +{ + const char *p, *pend; + char buf[CHAR_ESC_LEN + 1]; + mrb_value result = mrb_str_new_lit(mrb, "\""); +#ifdef MRB_UTF8_STRING + uint32_t ascii_flag = MRB_STR_ASCII; +#endif + + p = RSTRING_PTR(str); pend = RSTRING_END(str); + for (;p < pend; p++) { + unsigned char c, cc; +#ifdef MRB_UTF8_STRING + if (inspect) { + mrb_int clen = utf8len(p, pend); + if (clen > 1) { + mrb_int i; + + for (i=0; iflags |= ascii_flag; + mrb_str_ptr(result)->flags |= ascii_flag; +#endif + + return result; +} + static void mrb_str_aset(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_value replace) { @@ -2574,8 +2652,6 @@ mrb_str_upcase(mrb_state *mrb, mrb_value self) return str; } -#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{')) - /* * call-seq: * str.dump -> new_str @@ -2586,113 +2662,7 @@ mrb_str_upcase(mrb_state *mrb, mrb_value self) mrb_value mrb_str_dump(mrb_state *mrb, mrb_value str) { - mrb_int len; - const char *p, *pend; - char *q; - struct RString *result; - - len = 2; /* "" */ - p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str); - while (p < pend) { - unsigned char c = *p++; - switch (c) { - case '"': case '\\': - case '\n': case '\r': - case '\t': case '\f': - case '\013': case '\010': case '\007': case '\033': - len += 2; - break; - - case '#': - len += IS_EVSTR(p, pend) ? 2 : 1; - break; - - default: - if (ISPRINT(c)) { - len++; - } - else { - len += 4; /* \NNN */ - } - break; - } - } - - result = str_new(mrb, 0, len); - str_with_class(result, str); - p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str); - q = RSTR_PTR(result); - *q++ = '"'; - while (p < pend) { - unsigned char c = *p++; - - switch (c) { - case '"': - case '\\': - *q++ = '\\'; - *q++ = c; - break; - - case '\n': - *q++ = '\\'; - *q++ = 'n'; - break; - - case '\r': - *q++ = '\\'; - *q++ = 'r'; - break; - - case '\t': - *q++ = '\\'; - *q++ = 't'; - break; - - case '\f': - *q++ = '\\'; - *q++ = 'f'; - break; - - case '\013': - *q++ = '\\'; - *q++ = 'v'; - break; - - case '\010': - *q++ = '\\'; - *q++ = 'b'; - break; - - case '\007': - *q++ = '\\'; - *q++ = 'a'; - break; - - case '\033': - *q++ = '\\'; - *q++ = 'e'; - break; - - case '#': - if (IS_EVSTR(p, pend)) *q++ = '\\'; - *q++ = '#'; - break; - - default: - if (ISPRINT(c)) { - *q++ = c; - } - else { - *q++ = '\\'; - *q++ = 'x'; - q[1] = mrb_digitmap[c % 16]; c /= 16; - q[0] = mrb_digitmap[c % 16]; - q += 2; - } - } - } - *q = '"'; - return mrb_obj_value(result); + return str_escape(mrb, str, FALSE); } MRB_API mrb_value @@ -2762,8 +2732,6 @@ mrb_str_append(mrb_state *mrb, mrb_value str1, mrb_value str2) return mrb_str_cat_str(mrb, str1, str2); } -#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */ - /* * call-seq: * str.inspect -> string @@ -2778,76 +2746,7 @@ mrb_str_append(mrb_state *mrb, mrb_value str1, mrb_value str2) mrb_value mrb_str_inspect(mrb_state *mrb, mrb_value str) { - const char *p, *pend; - char buf[CHAR_ESC_LEN + 1]; - mrb_value result = mrb_str_new_lit(mrb, "\""); -#ifdef MRB_UTF8_STRING - uint32_t ascii_flag = MRB_STR_ASCII; -#endif - - p = RSTRING_PTR(str); pend = RSTRING_END(str); - for (;p < pend; p++) { - unsigned char c, cc; -#ifdef MRB_UTF8_STRING - mrb_int clen; - - clen = utf8len(p, pend); - if (clen > 1) { - mrb_int i; - - for (i=0; iflags |= ascii_flag; - mrb_str_ptr(result)->flags |= ascii_flag; -#endif - - return result; + return str_escape(mrb, str, TRUE); } /* diff --git a/test/t/string.rb b/test/t/string.rb index e1ff48312..65ad13103 100644 --- a/test/t/string.rb +++ b/test/t/string.rb @@ -748,12 +748,18 @@ assert('String#upcase!', '15.2.10.5.43') do end assert('String#inspect', '15.2.10.5.46') do + assert_equal "\"\\x00\"", "\0".inspect + assert_equal "\"foo\"", "foo".inspect + if UTF8STRING + assert_equal '"る"', "る".inspect + else + assert_equal '"\xe3\x82\x8b"', "る".inspect + end + # should not raise an exception - regress #1210 assert_nothing_raised do - ("\1" * 100).inspect + ("\1" * 100).inspect end - - assert_equal "\"\\x00\"", "\0".inspect end # Not ISO specified -- cgit v1.2.3