diff options
Diffstat (limited to 'mrbgems/mruby-string-ext/src')
| -rw-r--r-- | mrbgems/mruby-string-ext/src/string.c | 475 |
1 files changed, 288 insertions, 187 deletions
diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index 9e35b18a9..f5f46c082 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -5,82 +5,90 @@ #include <mruby/string.h> #include <mruby/range.h> -static mrb_value -mrb_str_getbyte(mrb_state *mrb, mrb_value str) -{ - mrb_int pos; - mrb_get_args(mrb, "i", &pos); +#define ENC_ASCII_8BIT "ASCII-8BIT" +#define ENC_BINARY "BINARY" +#define ENC_UTF8 "UTF-8" - if (pos < 0) - pos += RSTRING_LEN(str); - if (pos < 0 || RSTRING_LEN(str) <= pos) - return mrb_nil_value(); +#define ENC_COMP_P(enc, enc_lit) \ + str_casecmp_p(RSTRING_PTR(enc), RSTRING_LEN(enc), enc_lit, sizeof(enc_lit"")-1) - return mrb_fixnum_value((unsigned char)RSTRING_PTR(str)[pos]); +#ifdef MRB_NO_FLOAT +# define mrb_float_p(o) FALSE +#endif + +static mrb_bool +str_casecmp_p(const char *s1, mrb_int len1, const char *s2, mrb_int len2) +{ + const char *e1, *e2; + + if (len1 != len2) return FALSE; + e1 = s1 + len1; + e2 = s2 + len2; + while (s1 < e1 && s2 < e2) { + if (*s1 != *s2 && TOUPPER(*s1) != TOUPPER(*s2)) return FALSE; + ++s1; + ++s2; + } + return TRUE; } static mrb_value -mrb_str_setbyte(mrb_state *mrb, mrb_value str) +int_chr_binary(mrb_state *mrb, mrb_value num) { - mrb_int pos, byte; - mrb_int len; - - mrb_get_args(mrb, "ii", &pos, &byte); - - len = RSTRING_LEN(str); - if (pos < -len || len <= pos) - mrb_raisef(mrb, E_INDEX_ERROR, "index %S is out of array", mrb_fixnum_value(pos)); - if (pos < 0) - pos += len; + mrb_int cp = mrb_as_int(mrb, num); + char c; + mrb_value str; - mrb_str_modify(mrb, mrb_str_ptr(str)); - byte &= 0xff; - RSTRING_PTR(str)[pos] = (unsigned char)byte; - return mrb_fixnum_value((unsigned char)byte); + if (cp < 0 || 0xff < cp) { + mrb_raisef(mrb, E_RANGE_ERROR, "%v out of char range", num); + } + c = (char)cp; + str = mrb_str_new(mrb, &c, 1); + RSTR_SET_ASCII_FLAG(mrb_str_ptr(str)); + return str; } +#ifdef MRB_UTF8_STRING static mrb_value -mrb_str_byteslice(mrb_state *mrb, mrb_value str) +int_chr_utf8(mrb_state *mrb, mrb_value num) { - mrb_value a1; + mrb_int cp = mrb_int(mrb, num); + char utf8[4]; mrb_int len; + mrb_value str; + uint32_t ascii_flag = 0; - if (mrb_get_argc(mrb) == 2) { - mrb_int pos; - mrb_get_args(mrb, "ii", &pos, &len); - return mrb_str_substr(mrb, str, pos, len); + if (cp < 0 || 0x10FFFF < cp) { + mrb_raisef(mrb, E_RANGE_ERROR, "%v out of char range", num); } - mrb_get_args(mrb, "o|i", &a1, &len); - switch (mrb_type(a1)) { - case MRB_TT_RANGE: - { - mrb_int beg; - - len = RSTRING_LEN(str); - switch (mrb_range_beg_len(mrb, a1, &beg, &len, len, TRUE)) { - case 0: /* not range */ - break; - case 1: /* range */ - return mrb_str_substr(mrb, str, beg, len); - case 2: /* out of range */ - mrb_raisef(mrb, E_RANGE_ERROR, "%S out of range", a1); - break; - } - return mrb_nil_value(); - } -#ifndef MRB_WITHOUT_FLOAT - case MRB_TT_FLOAT: - a1 = mrb_fixnum_value((mrb_int)mrb_float(a1)); - /* fall through */ -#endif - case MRB_TT_FIXNUM: - return mrb_str_substr(mrb, str, mrb_fixnum(a1), 1); - default: - mrb_raise(mrb, E_TYPE_ERROR, "wrong type of argument"); + if (cp < 0x80) { + utf8[0] = (char)cp; + len = 1; + ascii_flag = MRB_STR_ASCII; } - /* not reached */ - return mrb_nil_value(); + else if (cp < 0x800) { + utf8[0] = (char)(0xC0 | (cp >> 6)); + utf8[1] = (char)(0x80 | (cp & 0x3F)); + len = 2; + } + else if (cp < 0x10000) { + utf8[0] = (char)(0xE0 | (cp >> 12)); + utf8[1] = (char)(0x80 | ((cp >> 6) & 0x3F)); + utf8[2] = (char)(0x80 | ( cp & 0x3F)); + len = 3; + } + else { + utf8[0] = (char)(0xF0 | (cp >> 18)); + utf8[1] = (char)(0x80 | ((cp >> 12) & 0x3F)); + utf8[2] = (char)(0x80 | ((cp >> 6) & 0x3F)); + utf8[3] = (char)(0x80 | ( cp & 0x3F)); + len = 4; + } + str = mrb_str_new(mrb, utf8, len); + mrb_str_ptr(str)->flags |= ascii_flag; + return str; } +#endif /* * call-seq: @@ -137,8 +145,6 @@ mrb_str_swapcase(mrb_state *mrb, mrb_value self) return str; } -static mrb_value mrb_fixnum_chr(mrb_state *mrb, mrb_value num); - /* * call-seq: * str << integer -> str @@ -148,7 +154,8 @@ static mrb_value mrb_fixnum_chr(mrb_state *mrb, mrb_value num); * * Append---Concatenates the given object to <i>str</i>. If the object is a * <code>Integer</code>, it is considered as a codepoint, and is converted - * to a character before concatenation. + * to a character before concatenation + * (equivalent to <code>str.concat(integer.chr(__ENCODING__))</code>). * * a = "hello " * a << "world" #=> "hello world" @@ -157,14 +164,17 @@ static mrb_value mrb_fixnum_chr(mrb_state *mrb, mrb_value num); static mrb_value mrb_str_concat_m(mrb_state *mrb, mrb_value self) { - mrb_value str; + mrb_value str = mrb_get_arg1(mrb); - mrb_get_args(mrb, "o", &str); - if (mrb_fixnum_p(str)) - str = mrb_fixnum_chr(mrb, str); + if (mrb_integer_p(str) || mrb_float_p(str)) +#ifdef MRB_UTF8_STRING + str = int_chr_utf8(mrb, str); +#else + str = int_chr_binary(mrb, str); +#endif else - str = mrb_string_type(mrb, str); - mrb_str_concat(mrb, self, str); + mrb_ensure_string_type(mrb, str); + mrb_str_cat_str(mrb, self, str); return self; } @@ -184,14 +194,15 @@ mrb_str_concat_m(mrb_state *mrb, mrb_value self) static mrb_value mrb_str_start_with(mrb_state *mrb, mrb_value self) { - mrb_value *argv, sub; + const mrb_value *argv; + mrb_value sub; mrb_int argc, i; mrb_get_args(mrb, "*", &argv, &argc); for (i = 0; i < argc; i++) { size_t len_l, len_r; int ai = mrb_gc_arena_save(mrb); - sub = mrb_string_type(mrb, argv[i]); + sub = mrb_ensure_string_type(mrb, argv[i]); mrb_gc_arena_restore(mrb, ai); len_l = RSTRING_LEN(self); len_r = RSTRING_LEN(sub); @@ -213,14 +224,15 @@ mrb_str_start_with(mrb_state *mrb, mrb_value self) static mrb_value mrb_str_end_with(mrb_state *mrb, mrb_value self) { - mrb_value *argv, sub; + const mrb_value *argv; + mrb_value sub; mrb_int argc, i; mrb_get_args(mrb, "*", &argv, &argc); for (i = 0; i < argc; i++) { size_t len_l, len_r; int ai = mrb_gc_arena_save(mrb); - sub = mrb_string_type(mrb, argv[i]); + sub = mrb_ensure_string_type(mrb, argv[i]); mrb_gc_arena_restore(mrb, ai); len_l = RSTRING_LEN(self); len_r = RSTRING_LEN(sub); @@ -235,6 +247,12 @@ mrb_str_end_with(mrb_state *mrb, mrb_value self) return mrb_false_value(); } +enum tr_pattern_type { + TR_UNINITIALIZED = 0, + TR_IN_ORDER = 1, + TR_RANGE = 2, +}; + /* #tr Pattern syntax @@ -244,19 +262,27 @@ mrb_str_end_with(mrb_state *mrb, mrb_value self) <range> ::= <ch> '-' <ch> */ struct tr_pattern { - uint8_t type; // 1:in-order, 2:range - mrb_bool flag_reverse; - int16_t n; + uint8_t type; // 1:in-order, 2:range + mrb_bool flag_reverse : 1; + mrb_bool flag_on_heap : 1; + uint16_t n; + union { + uint16_t start_pos; + char ch[2]; + } val; struct tr_pattern *next; - char ch[]; }; -static void +#define STATIC_TR_PATTERN { 0 } + +static inline void tr_free_pattern(mrb_state *mrb, struct tr_pattern *pat) { while (pat) { struct tr_pattern *p = pat->next; - mrb_free(mrb, pat); + if (pat->flag_on_heap) { + mrb_free(mrb, pat); + } pat = p; } } @@ -265,10 +291,10 @@ static struct tr_pattern* tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_pattern, mrb_bool flag_reverse_enable) { const char *pattern = RSTRING_PTR(v_pattern); - int pattern_length = RSTRING_LEN(v_pattern); + mrb_int pattern_length = RSTRING_LEN(v_pattern); mrb_bool flag_reverse = FALSE; struct tr_pattern *pat1; - int i = 0; + mrb_int i = 0; if(flag_reverse_enable && pattern_length >= 2 && pattern[0] == '^') { flag_reverse = TRUE; @@ -277,46 +303,53 @@ tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_patte while (i < pattern_length) { /* is range pattern ? */ + mrb_bool const ret_uninit = (ret->type == TR_UNINITIALIZED); + pat1 = ret_uninit + ? ret + : (struct tr_pattern*)mrb_malloc_simple(mrb, sizeof(struct tr_pattern)); if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-') { - pat1 = (struct tr_pattern*)mrb_malloc_simple(mrb, sizeof(struct tr_pattern) + 2); if (pat1 == NULL && ret) { nomem: tr_free_pattern(mrb, ret); mrb_exc_raise(mrb, mrb_obj_value(mrb->nomem_err)); return NULL; /* not reached */ } - pat1->type = 2; + pat1->type = TR_RANGE; pat1->flag_reverse = flag_reverse; + pat1->flag_on_heap = !ret_uninit; pat1->n = pattern[i+2] - pattern[i] + 1; pat1->next = NULL; - pat1->ch[0] = pattern[i]; - pat1->ch[1] = pattern[i+2]; + pat1->val.ch[0] = pattern[i]; + pat1->val.ch[1] = pattern[i+2]; i += 3; } else { /* in order pattern. */ - int start_pos = i++; - int len; + mrb_int start_pos = i++; + mrb_int len; while (i < pattern_length) { - if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-') + if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-') break; - i++; + i++; } len = i - start_pos; - pat1 = (struct tr_pattern*)mrb_malloc_simple(mrb, sizeof(struct tr_pattern) + len); + if (len > UINT16_MAX) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "tr pattern too long (max 65535)"); + } if (pat1 == NULL && ret) { goto nomem; } - pat1->type = 1; + pat1->type = TR_IN_ORDER; pat1->flag_reverse = flag_reverse; - pat1->n = len; + pat1->flag_on_heap = !ret_uninit; + pat1->n = (uint16_t)len; pat1->next = NULL; - memcpy(pat1->ch, &pattern[start_pos], len); + pat1->val.start_pos = (uint16_t)start_pos; } - if (ret == NULL) { + if (ret == NULL || ret_uninit) { ret = pat1; } else { @@ -331,23 +364,26 @@ tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_patte return ret; } -static mrb_int -tr_find_character(const struct tr_pattern *pat, int ch) +static inline mrb_int +tr_find_character(const struct tr_pattern *pat, const char *pat_str, int ch) { mrb_int ret = -1; mrb_int n_sum = 0; mrb_int flag_reverse = pat ? pat->flag_reverse : 0; while (pat != NULL) { - if (pat->type == 1) { /* pat->type == 1 in-order */ + if (pat->type == TR_IN_ORDER) { int i; for (i = 0; i < pat->n; i++) { - if (pat->ch[i] == ch) ret = n_sum + i; + if (pat_str[pat->val.start_pos + i] == ch) ret = n_sum + i; } } - else { /* pat->type == 2 range */ - if (pat->ch[0] <= ch && ch <= pat->ch[1]) - ret = n_sum + ch - pat->ch[0]; + else if (pat->type == TR_RANGE) { + if (pat->val.ch[0] <= ch && ch <= pat->val.ch[1]) + ret = n_sum + ch - pat->val.ch[0]; + } + else { + mrb_assert(pat->type == TR_UNINITIALIZED); } n_sum += pat->n; pat = pat->next; @@ -359,17 +395,33 @@ tr_find_character(const struct tr_pattern *pat, int ch) return ret; } -static mrb_int -tr_get_character(const struct tr_pattern *pat, mrb_int n_th) +static inline mrb_int +tr_get_character(const struct tr_pattern *pat, const char *pat_str, mrb_int n_th) { mrb_int n_sum = 0; + while (pat != NULL) { if (n_th < (n_sum + pat->n)) { mrb_int i = (n_th - n_sum); - return (pat->type == 1) ? pat->ch[i] :pat->ch[0] + i; + + switch (pat->type) { + case TR_IN_ORDER: + return pat_str[pat->val.start_pos + i]; + case TR_RANGE: + return pat->val.ch[0]+i; + case TR_UNINITIALIZED: + return -1; + } } if (pat->next == NULL) { - return (pat->type == 1) ? pat->ch[pat->n - 1] : pat->ch[1]; + switch (pat->type) { + case TR_IN_ORDER: + return pat_str[pat->val.start_pos + pat->n - 1]; + case TR_RANGE: + return pat->val.ch[1]; + case TR_UNINITIALIZED: + return -1; + } } n_sum += pat->n; pat = pat->next; @@ -378,51 +430,105 @@ tr_get_character(const struct tr_pattern *pat, mrb_int n_th) return -1; } +static inline void +tr_bitmap_set(uint8_t bitmap[32], uint8_t ch) +{ + uint8_t idx1 = ch / 8; + uint8_t idx2 = ch % 8; + bitmap[idx1] |= (1<<idx2); +} + +static inline mrb_bool +tr_bitmap_detect(uint8_t bitmap[32], uint8_t ch) +{ + uint8_t idx1 = ch / 8; + uint8_t idx2 = ch % 8; + if (bitmap[idx1] & (1<<idx2)) + return TRUE; + return FALSE; +} + +/* compile patter to bitmap */ +static void +tr_compile_pattern(const struct tr_pattern *pat, mrb_value pstr, uint8_t bitmap[32]) +{ + const char *pattern = RSTRING_PTR(pstr); + mrb_int flag_reverse = pat ? pat->flag_reverse : 0; + int i; + + for (i=0; i<32; i++) { + bitmap[i] = 0; + } + while (pat != NULL) { + if (pat->type == TR_IN_ORDER) { + for (i = 0; i < pat->n; i++) { + tr_bitmap_set(bitmap, pattern[pat->val.start_pos + i]); + } + } + else if (pat->type == TR_RANGE) { + for (i = pat->val.ch[0]; i < pat->val.ch[1]; i++) { + tr_bitmap_set(bitmap, i); + } + } + else { + mrb_assert(pat->type == TR_UNINITIALIZED); + } + pat = pat->next; + } + + if (flag_reverse) { + for (i=0; i<32; i++) { + bitmap[i] ^= 0xff; + } + } +} + static mrb_bool str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squeeze) { - struct tr_pattern *pat; - struct tr_pattern *rep; + struct tr_pattern pat = STATIC_TR_PATTERN; + struct tr_pattern rep_storage = STATIC_TR_PATTERN; char *s; mrb_int len; mrb_int i; mrb_int j; mrb_bool flag_changed = FALSE; mrb_int lastch = -1; + struct tr_pattern *rep; mrb_str_modify(mrb, mrb_str_ptr(str)); - pat = tr_parse_pattern(mrb, NULL, p1, TRUE); - rep = tr_parse_pattern(mrb, NULL, p2, FALSE); + tr_parse_pattern(mrb, &pat, p1, TRUE); + rep = tr_parse_pattern(mrb, &rep_storage, p2, FALSE); s = RSTRING_PTR(str); len = RSTRING_LEN(str); for (i=j=0; i<len; i++,j++) { - mrb_int n = tr_find_character(pat, s[i]); + mrb_int n = tr_find_character(&pat, RSTRING_PTR(p1), s[i]); if (i>j) s[j] = s[i]; if (n >= 0) { flag_changed = TRUE; if (rep == NULL) { - j--; + j--; } else { - mrb_int c = tr_get_character(rep, n); + mrb_int c = tr_get_character(rep, RSTRING_PTR(p2), n); - if (squeeze && c == lastch) { + if (c < 0 || (squeeze && c == lastch)) { j--; continue; } - if (c < 0 || c > 0x80) { - mrb_raisef(mrb, E_ARGUMENT_ERROR, "character (%S) out of range", - mrb_fixnum_value((mrb_int)c)); + if (c > 0x80) { + mrb_raisef(mrb, E_ARGUMENT_ERROR, "character (%i) out of range", c); } - lastch = s[i] = c; + lastch = c; + s[i] = (char)c; } } } - tr_free_pattern(mrb, pat); - if (rep) tr_free_pattern(mrb, rep); + tr_free_pattern(mrb, &pat); + tr_free_pattern(mrb, rep); if (flag_changed) { RSTR_SET_LEN(RSTRING(str), j); @@ -544,26 +650,28 @@ mrb_str_tr_s_bang(mrb_state *mrb, mrb_value str) static mrb_bool str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat) { + struct tr_pattern pat_storage = STATIC_TR_PATTERN; struct tr_pattern *pat = NULL; mrb_int i, j; char *s; mrb_int len; mrb_bool flag_changed = FALSE; mrb_int lastch = -1; + uint8_t bitmap[32]; mrb_str_modify(mrb, mrb_str_ptr(str)); if (!mrb_nil_p(v_pat)) { - pat = tr_parse_pattern(mrb, pat, v_pat, TRUE); + pat = tr_parse_pattern(mrb, &pat_storage, v_pat, TRUE); + tr_compile_pattern(pat, v_pat, bitmap); + tr_free_pattern(mrb, pat); } s = RSTRING_PTR(str); len = RSTRING_LEN(str); if (pat) { for (i=j=0; i<len; i++,j++) { - mrb_int n = tr_find_character(pat, s[i]); - if (i>j) s[j] = s[i]; - if (n >= 0 && s[i] == lastch) { + if (tr_bitmap_detect(bitmap, s[i]) && s[i] == lastch) { flag_changed = TRUE; j--; } @@ -580,7 +688,6 @@ str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat) lastch = s[i]; } } - tr_free_pattern(mrb, pat); if (flag_changed) { RSTR_SET_LEN(RSTRING(str), j); @@ -637,27 +744,28 @@ mrb_str_squeeze_bang(mrb_state *mrb, mrb_value str) static mrb_bool str_delete(mrb_state *mrb, mrb_value str, mrb_value v_pat) { - struct tr_pattern *pat = NULL; + struct tr_pattern pat = STATIC_TR_PATTERN; mrb_int i, j; char *s; mrb_int len; mrb_bool flag_changed = FALSE; + uint8_t bitmap[32]; mrb_str_modify(mrb, mrb_str_ptr(str)); - pat = tr_parse_pattern(mrb, pat, v_pat, TRUE); + tr_parse_pattern(mrb, &pat, v_pat, TRUE); + tr_compile_pattern(&pat, v_pat, bitmap); + tr_free_pattern(mrb, &pat); + s = RSTRING_PTR(str); len = RSTRING_LEN(str); for (i=j=0; i<len; i++,j++) { - mrb_int n = tr_find_character(pat, s[i]); - if (i>j) s[j] = s[i]; - if (n >= 0) { + if (tr_bitmap_detect(bitmap, s[i])) { flag_changed = TRUE; j--; } } - tr_free_pattern(mrb, pat); if (flag_changed) { RSTR_SET_LEN(RSTRING(str), j); RSTRING_PTR(str)[j] = 0; @@ -704,22 +812,23 @@ static mrb_value mrb_str_count(mrb_state *mrb, mrb_value str) { mrb_value v_pat = mrb_nil_value(); - struct tr_pattern *pat = NULL; mrb_int i; char *s; mrb_int len; mrb_int count = 0; + struct tr_pattern pat = STATIC_TR_PATTERN; + uint8_t bitmap[32]; mrb_get_args(mrb, "S", &v_pat); - pat = tr_parse_pattern(mrb, pat, v_pat, TRUE); + tr_parse_pattern(mrb, &pat, v_pat, TRUE); + tr_compile_pattern(&pat, v_pat, bitmap); + tr_free_pattern(mrb, &pat); + s = RSTRING_PTR(str); len = RSTRING_LEN(str); for (i = 0; i < len; i++) { - mrb_int n = tr_find_character(pat, s[i]); - - if (n >= 0) count++; + if (tr_bitmap_detect(bitmap, s[i])) count++; } - tr_free_pattern(mrb, pat); return mrb_fixnum_value(count); } @@ -750,49 +859,42 @@ mrb_str_chr(mrb_state *mrb, mrb_value self) return mrb_str_substr(mrb, self, 0, 1); } +/* + * call-seq: + * int.chr([encoding]) -> string + * + * Returns a string containing the character represented by the +int+'s value + * according to +encoding+. +"ASCII-8BIT"+ (+"BINARY"+) and +"UTF-8"+ (only + * with +MRB_UTF8_STRING+) can be specified as +encoding+ (default is + * +"ASCII-8BIT"+). + * + * 65.chr #=> "A" + * 230.chr #=> "\xE6" + * 230.chr("ASCII-8BIT") #=> "\xE6" + * 230.chr("UTF-8") #=> "\u00E6" + */ static mrb_value -mrb_fixnum_chr(mrb_state *mrb, mrb_value num) +mrb_int_chr(mrb_state *mrb, mrb_value num) { - mrb_int cp = mrb_fixnum(num); -#ifdef MRB_UTF8_STRING - char utf8[4]; - mrb_int len; - - if (cp < 0 || 0x10FFFF < cp) { - mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", num); - } - if (cp < 0x80) { - utf8[0] = (char)cp; - len = 1; + mrb_value enc; + mrb_bool enc_given; + + mrb_get_args(mrb, "|S?", &enc, &enc_given); + if (!enc_given || + ENC_COMP_P(enc, ENC_ASCII_8BIT) || + ENC_COMP_P(enc, ENC_BINARY)) { + return int_chr_binary(mrb, num); } - else if (cp < 0x800) { - utf8[0] = (char)(0xC0 | (cp >> 6)); - utf8[1] = (char)(0x80 | (cp & 0x3F)); - len = 2; - } - else if (cp < 0x10000) { - utf8[0] = (char)(0xE0 | (cp >> 12)); - utf8[1] = (char)(0x80 | ((cp >> 6) & 0x3F)); - utf8[2] = (char)(0x80 | ( cp & 0x3F)); - len = 3; +#ifdef MRB_UTF8_STRING + else if (ENC_COMP_P(enc, ENC_UTF8)) { + return int_chr_utf8(mrb, num); } +#endif else { - utf8[0] = (char)(0xF0 | (cp >> 18)); - utf8[1] = (char)(0x80 | ((cp >> 12) & 0x3F)); - utf8[2] = (char)(0x80 | ((cp >> 6) & 0x3F)); - utf8[3] = (char)(0x80 | ( cp & 0x3F)); - len = 4; + mrb_raisef(mrb, E_ARGUMENT_ERROR, "unknown encoding name - %v", enc); } - return mrb_str_new(mrb, utf8, len); -#else - char c; - - if (cp < 0 || 0xff < cp) { - mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", num); - } - c = (char)cp; - return mrb_str_new(mrb, &c, 1); -#endif + /* not reached */ + return mrb_nil_value(); } /* @@ -972,7 +1074,8 @@ static mrb_value mrb_str_del_prefix_bang(mrb_state *mrb, mrb_value self) { mrb_int plen, slen; - char *ptr, *s; + const char *ptr; + char *s; struct RString *str = RSTRING(self); mrb_get_args(mrb, "s", &ptr, &plen); @@ -980,7 +1083,7 @@ mrb_str_del_prefix_bang(mrb_state *mrb, mrb_value self) if (plen > slen) return mrb_nil_value(); s = RSTR_PTR(str); if (memcmp(s, ptr, plen) != 0) return mrb_nil_value(); - if (!MRB_FROZEN_P(str) && (RSTR_SHARED_P(str) || RSTR_FSHARED_P(str))) { + if (!mrb_frozen_p(str) && (RSTR_SHARED_P(str) || RSTR_FSHARED_P(str))) { str->as.heap.ptr += plen; } else { @@ -1005,7 +1108,7 @@ static mrb_value mrb_str_del_prefix(mrb_state *mrb, mrb_value self) { mrb_int plen, slen; - char *ptr; + const char *ptr; mrb_get_args(mrb, "s", &ptr, &plen); slen = RSTRING_LEN(self); @@ -1029,7 +1132,8 @@ static mrb_value mrb_str_del_suffix_bang(mrb_state *mrb, mrb_value self) { mrb_int plen, slen; - char *ptr, *s; + const char *ptr; + char *s; struct RString *str = RSTRING(self); mrb_get_args(mrb, "s", &ptr, &plen); @@ -1037,7 +1141,7 @@ mrb_str_del_suffix_bang(mrb_state *mrb, mrb_value self) if (plen > slen) return mrb_nil_value(); s = RSTR_PTR(str); if (memcmp(s+slen-plen, ptr, plen) != 0) return mrb_nil_value(); - if (!MRB_FROZEN_P(str) && (RSTR_SHARED_P(str) || RSTR_FSHARED_P(str))) { + if (!mrb_frozen_p(str) && (RSTR_SHARED_P(str) || RSTR_FSHARED_P(str))) { /* no need to modify string */ } else { @@ -1060,7 +1164,7 @@ static mrb_value mrb_str_del_suffix(mrb_state *mrb, mrb_value self) { mrb_int plen, slen; - char *ptr; + const char *ptr; mrb_get_args(mrb, "s", &ptr, &plen); slen = RSTRING_LEN(self); @@ -1080,8 +1184,7 @@ mrb_str_lines(mrb_state *mrb, mrb_value self) char *p = b, *t; char *e = b + RSTRING_LEN(self); - mrb_get_args(mrb, ""); - + mrb->c->ci->mid = 0; result = mrb_ary_new(mrb); ai = mrb_gc_arena_save(mrb); while (p < e) { @@ -1101,9 +1204,6 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb) struct RClass * s = mrb->string_class; mrb_define_method(mrb, s, "dump", mrb_str_dump, MRB_ARGS_NONE()); - mrb_define_method(mrb, s, "getbyte", mrb_str_getbyte, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, s, "setbyte", mrb_str_setbyte, MRB_ARGS_REQ(2)); - mrb_define_method(mrb, s, "byteslice", mrb_str_byteslice, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); mrb_define_method(mrb, s, "swapcase!", mrb_str_swapcase_bang, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "swapcase", mrb_str_swapcase, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "concat", mrb_str_concat_m, MRB_ARGS_REQ(1)); @@ -1124,8 +1224,8 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb) mrb_define_method(mrb, s, "chr", mrb_str_chr, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "succ", mrb_str_succ, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "succ!", mrb_str_succ_bang, MRB_ARGS_NONE()); - mrb_define_alias(mrb, s, "next", "succ"); - mrb_define_alias(mrb, s, "next!", "succ!"); + mrb_define_method(mrb, s, "next", mrb_str_succ, MRB_ARGS_NONE()); + mrb_define_method(mrb, s, "next!", mrb_str_succ_bang, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "ord", mrb_str_ord, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "delete_prefix!", mrb_str_del_prefix_bang, MRB_ARGS_REQ(1)); mrb_define_method(mrb, s, "delete_prefix", mrb_str_del_prefix, MRB_ARGS_REQ(1)); @@ -1133,7 +1233,8 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb) mrb_define_method(mrb, s, "delete_suffix", mrb_str_del_suffix, MRB_ARGS_REQ(1)); mrb_define_method(mrb, s, "__lines", mrb_str_lines, MRB_ARGS_NONE()); - mrb_define_method(mrb, mrb->fixnum_class, "chr", mrb_fixnum_chr, MRB_ARGS_NONE()); + + mrb_define_method(mrb, mrb_class_get(mrb, "Integer"), "chr", mrb_int_chr, MRB_ARGS_OPT(1)); } void |
