summaryrefslogtreecommitdiffhomepage
path: root/mrbgems/mruby-string-utf8
diff options
context:
space:
mode:
authorchasonr <[email protected]>2014-02-26 20:18:26 -0500
committerchasonr <[email protected]>2014-02-26 20:18:26 -0500
commit5e5ee841a1c6aee13c7683b233c26d7db79f9c2f (patch)
treef5645e1ab9415881efd5e3adef5a60f616d300cb /mrbgems/mruby-string-utf8
parent313f6b599796e84da07b850bf2b5d341bf2c156a (diff)
downloadmruby-5e5ee841a1c6aee13c7683b233c26d7db79f9c2f.tar.gz
mruby-5e5ee841a1c6aee13c7683b233c26d7db79f9c2f.zip
Implement sprintf("%c") for UTF-8.
* sprintf("%c") is changed to accept a string for which String#size returns 1, even if it is longer than one byte, and to convert a Fixnum via Fixnum#chr (possibly returning more than one byte). Thus, if the UTF-8 gem is in use, a character will be understood as a single UTF-8 character. * The change to sprintf depends on the implementation of Fixnum#chr added to mrbgems/mruby-string-utf8/src/string.c. This should work with any other gem that implements a multibyte encoding, as long as it implements String#size and Fixnum#chr as appropriate.
Diffstat (limited to 'mrbgems/mruby-string-utf8')
-rw-r--r--mrbgems/mruby-string-utf8/src/string.c37
1 files changed, 37 insertions, 0 deletions
diff --git a/mrbgems/mruby-string-utf8/src/string.c b/mrbgems/mruby-string-utf8/src/string.c
index 2dd848c86..4f3833944 100644
--- a/mrbgems/mruby-string-utf8/src/string.c
+++ b/mrbgems/mruby-string-utf8/src/string.c
@@ -279,6 +279,41 @@ mrb_str_reverse(mrb_state *mrb, mrb_value str)
return mrb_str_reverse_bang(mrb, mrb_str_dup(mrb, str));
}
+static mrb_value
+mrb_fixnum_chr(mrb_state *mrb, mrb_value num)
+{
+ mrb_int cp = mrb_fixnum(num);
+ char utf8[4];
+ int len;
+
+ if (cp < 0 || 0x10FFFF < cp) {
+ mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", num);
+ }
+ if (cp < 0x80) {
+ utf8[0] = (char)cp;
+ len = 1;
+ }
+ else if (cp < 0x800) {
+ utf8[0] = (char)(0xC0 | (cp >> 6));
+ utf8[1] = (char)(0x80 | (cp & 0x3F));
+ len = 2;
+ }
+ else if (cp < 0x10000) {
+ utf8[0] = (char)(0xE0 | (cp >> 12));
+ utf8[1] = (char)(0x80 | ((cp >> 6) & 0x3F));
+ utf8[2] = (char)(0x80 | ( cp & 0x3F));
+ len = 3;
+ }
+ else {
+ utf8[0] = (char)(0xF0 | (cp >> 18));
+ utf8[1] = (char)(0x80 | ((cp >> 12) & 0x3F));
+ utf8[2] = (char)(0x80 | ((cp >> 6) & 0x3F));
+ utf8[3] = (char)(0x80 | ( cp & 0x3F));
+ len = 4;
+ }
+ return mrb_str_new(mrb, utf8, len);
+}
+
void
mrb_mruby_string_utf8_gem_init(mrb_state* mrb)
{
@@ -290,6 +325,8 @@ mrb_mruby_string_utf8_gem_init(mrb_state* mrb)
mrb_define_method(mrb, s, "slice", mrb_str_aref_m, MRB_ARGS_ANY());
mrb_define_method(mrb, s, "reverse", mrb_str_reverse, MRB_ARGS_NONE());
mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, MRB_ARGS_NONE());
+
+ mrb_define_method(mrb, mrb->fixnum_class, "chr", mrb_fixnum_chr, MRB_ARGS_NONE());
}
void