summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorYukihiro "Matz" Matsumoto <[email protected]>2019-02-04 21:11:06 +0900
committerGitHub <[email protected]>2019-02-04 21:11:06 +0900
commitdb2545cb3c61f55702be2abe8836b5190effa9c5 (patch)
tree4b3cfd9dbed6bfb7eac0e92b524d5dd02cd284ab
parent5fd9f68b2735d6c104c9945e374ff28309af7a39 (diff)
parent69fd1a592560d321061790c94f93532db93dccb9 (diff)
downloadmruby-db2545cb3c61f55702be2abe8836b5190effa9c5.tar.gz
mruby-db2545cb3c61f55702be2abe8836b5190effa9c5.zip
Merge pull request #4260 from shuujii/fix-symbol-size-with-mrb_utf8_string
Fix `Symbol#size` for multi-byte characters with `MRB_UTF8_STRING`
-rw-r--r--include/mruby/string.h4
-rw-r--r--mrbgems/mruby-symbol-ext/src/symbol.c7
-rw-r--r--mrbgems/mruby-symbol-ext/test/symbol.rb15
-rw-r--r--src/string.c35
4 files changed, 45 insertions, 16 deletions
diff --git a/include/mruby/string.h b/include/mruby/string.h
index 3fe8295ff..6fe0556b0 100644
--- a/include/mruby/string.h
+++ b/include/mruby/string.h
@@ -439,6 +439,10 @@ void mrb_regexp_check(mrb_state *mrb, mrb_value obj);
#define mrb_str_buf_cat(mrb, str, ptr, len) mrb_str_cat(mrb, str, ptr, len)
#define mrb_str_buf_append(mrb, str, str2) mrb_str_cat_str(mrb, str, str2)
+#ifdef MRB_UTF8_STRING
+mrb_int mrb_utf8_len(const char *str, mrb_int byte_len);
+#endif
+
MRB_END_DECL
#endif /* MRUBY_STRING_H */
diff --git a/mrbgems/mruby-symbol-ext/src/symbol.c b/mrbgems/mruby-symbol-ext/src/symbol.c
index a992dbfce..215226502 100644
--- a/mrbgems/mruby-symbol-ext/src/symbol.c
+++ b/mrbgems/mruby-symbol-ext/src/symbol.c
@@ -1,6 +1,7 @@
#include <mruby.h>
#include <mruby/khash.h>
#include <mruby/array.h>
+#include <mruby/string.h>
typedef struct symbol_name {
size_t len;
@@ -45,7 +46,13 @@ static mrb_value
mrb_sym_length(mrb_state *mrb, mrb_value self)
{
mrb_int len;
+#ifdef MRB_UTF8_STRING
+ mrb_int byte_len;
+ const char *name = mrb_sym2name_len(mrb, mrb_symbol(self), &byte_len);
+ len = mrb_utf8_len(name, byte_len);
+#else
mrb_sym2name_len(mrb, mrb_symbol(self), &len);
+#endif
return mrb_fixnum_value(len);
}
diff --git a/mrbgems/mruby-symbol-ext/test/symbol.rb b/mrbgems/mruby-symbol-ext/test/symbol.rb
index 63c1bd826..2c7a62b0c 100644
--- a/mrbgems/mruby-symbol-ext/test/symbol.rb
+++ b/mrbgems/mruby-symbol-ext/test/symbol.rb
@@ -7,9 +7,18 @@ assert('Symbol.all_symbols') do
assert_equal foo, symbols
end
-assert("Symbol#length") do
- assert_equal 5, :hello.size
- assert_equal 5, :mruby.length
+%w[size length].each do |n|
+ assert("Symbol##{n}") do
+ assert_equal 5, :hello.__send__(n)
+ assert_equal 4, :"aA\0b".__send__(n)
+ if "あ".size == 1 # enable MRB_UTF8_STRING?
+ assert_equal 8, :"こんにちは世界!".__send__(n)
+ assert_equal 4, :"aあ\0b".__send__(n)
+ else
+ assert_equal 22, :"こんにちは世界!".__send__(n)
+ assert_equal 6, :"aあ\0b".__send__(n)
+ end
+ end
end
assert("Symbol#capitalize") do
diff --git a/src/string.c b/src/string.c
index 52b869eca..148e2fee2 100644
--- a/src/string.c
+++ b/src/string.c
@@ -238,27 +238,36 @@ utf8len(const char* p, const char* e)
return len;
}
-static mrb_int
-utf8_strlen(mrb_value str, mrb_int len)
+mrb_int
+mrb_utf8_len(const char *str, mrb_int byte_len)
{
mrb_int total = 0;
- char* p = RSTRING_PTR(str);
- char* e = p;
- if (RSTRING(str)->flags & MRB_STR_NO_UTF) {
- return RSTRING_LEN(str);
- }
- e += len < 0 ? RSTRING_LEN(str) : len;
- while (p<e) {
+ const char *p = str;
+ const char *e = p + byte_len;
+
+ while (p < e) {
p += utf8len(p, e);
total++;
}
- if (RSTRING_LEN(str) == total) {
- RSTRING(str)->flags |= MRB_STR_NO_UTF;
- }
return total;
}
-#define RSTRING_CHAR_LEN(s) utf8_strlen(s, -1)
+static mrb_int
+utf8_strlen(mrb_value str)
+{
+ mrb_int byte_len = RSTRING_LEN(str);
+
+ if (RSTRING(str)->flags & MRB_STR_NO_UTF) {
+ return byte_len;
+ }
+ else {
+ mrb_int utf8_len = mrb_utf8_len(RSTRING_PTR(str), byte_len);
+ if (byte_len == utf8_len) RSTRING(str)->flags |= MRB_STR_NO_UTF;
+ return utf8_len;
+ }
+}
+
+#define RSTRING_CHAR_LEN(s) utf8_strlen(s)
/* map character index to byte offset index */
static mrb_int