summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorchasonr <[email protected]>2014-02-26 20:18:26 -0500
committerchasonr <[email protected]>2014-02-26 20:18:26 -0500
commit5e5ee841a1c6aee13c7683b233c26d7db79f9c2f (patch)
treef5645e1ab9415881efd5e3adef5a60f616d300cb
parent313f6b599796e84da07b850bf2b5d341bf2c156a (diff)
downloadmruby-5e5ee841a1c6aee13c7683b233c26d7db79f9c2f.tar.gz
mruby-5e5ee841a1c6aee13c7683b233c26d7db79f9c2f.zip
Implement sprintf("%c") for UTF-8.
* sprintf("%c") is changed to accept a string for which String#size returns 1, even if it is longer than one byte, and to convert a Fixnum via Fixnum#chr (possibly returning more than one byte). Thus, if the UTF-8 gem is in use, a character will be understood as a single UTF-8 character. * The change to sprintf depends on the implementation of Fixnum#chr added to mrbgems/mruby-string-utf8/src/string.c. This should work with any other gem that implements a multibyte encoding, as long as it implements String#size and Fixnum#chr as appropriate.
-rw-r--r--mrbgems/mruby-sprintf/src/sprintf.c21
-rw-r--r--mrbgems/mruby-string-utf8/src/string.c37
2 files changed, 47 insertions, 11 deletions
diff --git a/mrbgems/mruby-sprintf/src/sprintf.c b/mrbgems/mruby-sprintf/src/sprintf.c
index b20cbe1df..5b255d28e 100644
--- a/mrbgems/mruby-sprintf/src/sprintf.c
+++ b/mrbgems/mruby-sprintf/src/sprintf.c
@@ -666,38 +666,37 @@ retry:
case 'c': {
mrb_value val = GETARG();
mrb_value tmp;
- unsigned int c;
+ char *c;
tmp = mrb_check_string_type(mrb, val);
if (!mrb_nil_p(tmp)) {
- if (RSTRING_LEN(tmp) != 1 ) {
+ if (mrb_fixnum(mrb_funcall(mrb, tmp, "size", 0)) != 1 ) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "%c requires a character");
}
- c = RSTRING_PTR(tmp)[0];
- n = 1;
}
- else {
- c = mrb_fixnum(val);
- n = 1;
+ else if (mrb_fixnum_p(val)) {
+ tmp = mrb_funcall(mrb, val, "chr", 0);
}
- if (n <= 0) {
+ else {
mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid character");
}
+ c = RSTRING_PTR(tmp);
+ n = RSTRING_LEN(tmp);
if (!(flags & FWIDTH)) {
CHECK(n);
- buf[blen] = c;
+ memcpy(buf+blen, c, n);
blen += n;
}
else if ((flags & FMINUS)) {
CHECK(n);
- buf[blen] = c;
+ memcpy(buf+blen, c, n);
blen += n;
FILL(' ', width-1);
}
else {
FILL(' ', width-1);
CHECK(n);
- buf[blen] = c;
+ memcpy(buf+blen, c, n);
blen += n;
}
}
diff --git a/mrbgems/mruby-string-utf8/src/string.c b/mrbgems/mruby-string-utf8/src/string.c
index 2dd848c86..4f3833944 100644
--- a/mrbgems/mruby-string-utf8/src/string.c
+++ b/mrbgems/mruby-string-utf8/src/string.c
@@ -279,6 +279,41 @@ mrb_str_reverse(mrb_state *mrb, mrb_value str)
return mrb_str_reverse_bang(mrb, mrb_str_dup(mrb, str));
}
+static mrb_value
+mrb_fixnum_chr(mrb_state *mrb, mrb_value num)
+{
+ mrb_int cp = mrb_fixnum(num);
+ char utf8[4];
+ int len;
+
+ if (cp < 0 || 0x10FFFF < cp) {
+ mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", num);
+ }
+ if (cp < 0x80) {
+ utf8[0] = (char)cp;
+ len = 1;
+ }
+ else if (cp < 0x800) {
+ utf8[0] = (char)(0xC0 | (cp >> 6));
+ utf8[1] = (char)(0x80 | (cp & 0x3F));
+ len = 2;
+ }
+ else if (cp < 0x10000) {
+ utf8[0] = (char)(0xE0 | (cp >> 12));
+ utf8[1] = (char)(0x80 | ((cp >> 6) & 0x3F));
+ utf8[2] = (char)(0x80 | ( cp & 0x3F));
+ len = 3;
+ }
+ else {
+ utf8[0] = (char)(0xF0 | (cp >> 18));
+ utf8[1] = (char)(0x80 | ((cp >> 12) & 0x3F));
+ utf8[2] = (char)(0x80 | ((cp >> 6) & 0x3F));
+ utf8[3] = (char)(0x80 | ( cp & 0x3F));
+ len = 4;
+ }
+ return mrb_str_new(mrb, utf8, len);
+}
+
void
mrb_mruby_string_utf8_gem_init(mrb_state* mrb)
{
@@ -290,6 +325,8 @@ mrb_mruby_string_utf8_gem_init(mrb_state* mrb)
mrb_define_method(mrb, s, "slice", mrb_str_aref_m, MRB_ARGS_ANY());
mrb_define_method(mrb, s, "reverse", mrb_str_reverse, MRB_ARGS_NONE());
mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, MRB_ARGS_NONE());
+
+ mrb_define_method(mrb, mrb->fixnum_class, "chr", mrb_fixnum_chr, MRB_ARGS_NONE());
}
void