diff options
| author | chasonr <[email protected]> | 2014-02-26 20:18:26 -0500 |
|---|---|---|
| committer | chasonr <[email protected]> | 2014-02-26 20:18:26 -0500 |
| commit | 5e5ee841a1c6aee13c7683b233c26d7db79f9c2f (patch) | |
| tree | f5645e1ab9415881efd5e3adef5a60f616d300cb | |
| parent | 313f6b599796e84da07b850bf2b5d341bf2c156a (diff) | |
| download | mruby-5e5ee841a1c6aee13c7683b233c26d7db79f9c2f.tar.gz mruby-5e5ee841a1c6aee13c7683b233c26d7db79f9c2f.zip | |
Implement sprintf("%c") for UTF-8.
* sprintf("%c") is changed to accept a string for which String#size returns
1, even if it is longer than one byte, and to convert a Fixnum via
Fixnum#chr (possibly returning more than one byte). Thus, if the UTF-8
gem is in use, a character will be understood as a single UTF-8 character.
* The change to sprintf depends on the implementation of Fixnum#chr added
to mrbgems/mruby-string-utf8/src/string.c.
This should work with any other gem that implements a multibyte encoding, as
long as it implements String#size and Fixnum#chr as appropriate.
| -rw-r--r-- | mrbgems/mruby-sprintf/src/sprintf.c | 21 | ||||
| -rw-r--r-- | mrbgems/mruby-string-utf8/src/string.c | 37 |
2 files changed, 47 insertions, 11 deletions
diff --git a/mrbgems/mruby-sprintf/src/sprintf.c b/mrbgems/mruby-sprintf/src/sprintf.c index b20cbe1df..5b255d28e 100644 --- a/mrbgems/mruby-sprintf/src/sprintf.c +++ b/mrbgems/mruby-sprintf/src/sprintf.c @@ -666,38 +666,37 @@ retry: case 'c': { mrb_value val = GETARG(); mrb_value tmp; - unsigned int c; + char *c; tmp = mrb_check_string_type(mrb, val); if (!mrb_nil_p(tmp)) { - if (RSTRING_LEN(tmp) != 1 ) { + if (mrb_fixnum(mrb_funcall(mrb, tmp, "size", 0)) != 1 ) { mrb_raise(mrb, E_ARGUMENT_ERROR, "%c requires a character"); } - c = RSTRING_PTR(tmp)[0]; - n = 1; } - else { - c = mrb_fixnum(val); - n = 1; + else if (mrb_fixnum_p(val)) { + tmp = mrb_funcall(mrb, val, "chr", 0); } - if (n <= 0) { + else { mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid character"); } + c = RSTRING_PTR(tmp); + n = RSTRING_LEN(tmp); if (!(flags & FWIDTH)) { CHECK(n); - buf[blen] = c; + memcpy(buf+blen, c, n); blen += n; } else if ((flags & FMINUS)) { CHECK(n); - buf[blen] = c; + memcpy(buf+blen, c, n); blen += n; FILL(' ', width-1); } else { FILL(' ', width-1); CHECK(n); - buf[blen] = c; + memcpy(buf+blen, c, n); blen += n; } } diff --git a/mrbgems/mruby-string-utf8/src/string.c b/mrbgems/mruby-string-utf8/src/string.c index 2dd848c86..4f3833944 100644 --- a/mrbgems/mruby-string-utf8/src/string.c +++ b/mrbgems/mruby-string-utf8/src/string.c @@ -279,6 +279,41 @@ mrb_str_reverse(mrb_state *mrb, mrb_value str) return mrb_str_reverse_bang(mrb, mrb_str_dup(mrb, str)); } +static mrb_value +mrb_fixnum_chr(mrb_state *mrb, mrb_value num) +{ + mrb_int cp = mrb_fixnum(num); + char utf8[4]; + int len; + + if (cp < 0 || 0x10FFFF < cp) { + mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", num); + } + if (cp < 0x80) { + utf8[0] = (char)cp; + len = 1; + } + else if (cp < 0x800) { + utf8[0] = (char)(0xC0 | (cp >> 6)); + utf8[1] = (char)(0x80 | (cp & 0x3F)); + len = 2; + } + else if (cp < 0x10000) { + utf8[0] = (char)(0xE0 | (cp >> 12)); + utf8[1] = (char)(0x80 | ((cp >> 6) & 0x3F)); + utf8[2] = (char)(0x80 | ( cp & 0x3F)); + len = 3; + } + else { + utf8[0] = (char)(0xF0 | (cp >> 18)); + utf8[1] = (char)(0x80 | ((cp >> 12) & 0x3F)); + utf8[2] = (char)(0x80 | ((cp >> 6) & 0x3F)); + utf8[3] = (char)(0x80 | ( cp & 0x3F)); + len = 4; + } + return mrb_str_new(mrb, utf8, len); +} + void mrb_mruby_string_utf8_gem_init(mrb_state* mrb) { @@ -290,6 +325,8 @@ mrb_mruby_string_utf8_gem_init(mrb_state* mrb) mrb_define_method(mrb, s, "slice", mrb_str_aref_m, MRB_ARGS_ANY()); mrb_define_method(mrb, s, "reverse", mrb_str_reverse, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, MRB_ARGS_NONE()); + + mrb_define_method(mrb, mrb->fixnum_class, "chr", mrb_fixnum_chr, MRB_ARGS_NONE()); } void |
