summaryrefslogtreecommitdiffhomepage
path: root/mrbgems/mruby-string-ext/src
diff options
context:
space:
mode:
Diffstat (limited to 'mrbgems/mruby-string-ext/src')
-rw-r--r--mrbgems/mruby-string-ext/src/string.c475
1 files changed, 288 insertions, 187 deletions
diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c
index 9e35b18a9..f5f46c082 100644
--- a/mrbgems/mruby-string-ext/src/string.c
+++ b/mrbgems/mruby-string-ext/src/string.c
@@ -5,82 +5,90 @@
#include <mruby/string.h>
#include <mruby/range.h>
-static mrb_value
-mrb_str_getbyte(mrb_state *mrb, mrb_value str)
-{
- mrb_int pos;
- mrb_get_args(mrb, "i", &pos);
+#define ENC_ASCII_8BIT "ASCII-8BIT"
+#define ENC_BINARY "BINARY"
+#define ENC_UTF8 "UTF-8"
- if (pos < 0)
- pos += RSTRING_LEN(str);
- if (pos < 0 || RSTRING_LEN(str) <= pos)
- return mrb_nil_value();
+#define ENC_COMP_P(enc, enc_lit) \
+ str_casecmp_p(RSTRING_PTR(enc), RSTRING_LEN(enc), enc_lit, sizeof(enc_lit"")-1)
- return mrb_fixnum_value((unsigned char)RSTRING_PTR(str)[pos]);
+#ifdef MRB_NO_FLOAT
+# define mrb_float_p(o) FALSE
+#endif
+
+static mrb_bool
+str_casecmp_p(const char *s1, mrb_int len1, const char *s2, mrb_int len2)
+{
+ const char *e1, *e2;
+
+ if (len1 != len2) return FALSE;
+ e1 = s1 + len1;
+ e2 = s2 + len2;
+ while (s1 < e1 && s2 < e2) {
+ if (*s1 != *s2 && TOUPPER(*s1) != TOUPPER(*s2)) return FALSE;
+ ++s1;
+ ++s2;
+ }
+ return TRUE;
}
static mrb_value
-mrb_str_setbyte(mrb_state *mrb, mrb_value str)
+int_chr_binary(mrb_state *mrb, mrb_value num)
{
- mrb_int pos, byte;
- mrb_int len;
-
- mrb_get_args(mrb, "ii", &pos, &byte);
-
- len = RSTRING_LEN(str);
- if (pos < -len || len <= pos)
- mrb_raisef(mrb, E_INDEX_ERROR, "index %S is out of array", mrb_fixnum_value(pos));
- if (pos < 0)
- pos += len;
+ mrb_int cp = mrb_as_int(mrb, num);
+ char c;
+ mrb_value str;
- mrb_str_modify(mrb, mrb_str_ptr(str));
- byte &= 0xff;
- RSTRING_PTR(str)[pos] = (unsigned char)byte;
- return mrb_fixnum_value((unsigned char)byte);
+ if (cp < 0 || 0xff < cp) {
+ mrb_raisef(mrb, E_RANGE_ERROR, "%v out of char range", num);
+ }
+ c = (char)cp;
+ str = mrb_str_new(mrb, &c, 1);
+ RSTR_SET_ASCII_FLAG(mrb_str_ptr(str));
+ return str;
}
+#ifdef MRB_UTF8_STRING
static mrb_value
-mrb_str_byteslice(mrb_state *mrb, mrb_value str)
+int_chr_utf8(mrb_state *mrb, mrb_value num)
{
- mrb_value a1;
+ mrb_int cp = mrb_int(mrb, num);
+ char utf8[4];
mrb_int len;
+ mrb_value str;
+ uint32_t ascii_flag = 0;
- if (mrb_get_argc(mrb) == 2) {
- mrb_int pos;
- mrb_get_args(mrb, "ii", &pos, &len);
- return mrb_str_substr(mrb, str, pos, len);
+ if (cp < 0 || 0x10FFFF < cp) {
+ mrb_raisef(mrb, E_RANGE_ERROR, "%v out of char range", num);
}
- mrb_get_args(mrb, "o|i", &a1, &len);
- switch (mrb_type(a1)) {
- case MRB_TT_RANGE:
- {
- mrb_int beg;
-
- len = RSTRING_LEN(str);
- switch (mrb_range_beg_len(mrb, a1, &beg, &len, len, TRUE)) {
- case 0: /* not range */
- break;
- case 1: /* range */
- return mrb_str_substr(mrb, str, beg, len);
- case 2: /* out of range */
- mrb_raisef(mrb, E_RANGE_ERROR, "%S out of range", a1);
- break;
- }
- return mrb_nil_value();
- }
-#ifndef MRB_WITHOUT_FLOAT
- case MRB_TT_FLOAT:
- a1 = mrb_fixnum_value((mrb_int)mrb_float(a1));
- /* fall through */
-#endif
- case MRB_TT_FIXNUM:
- return mrb_str_substr(mrb, str, mrb_fixnum(a1), 1);
- default:
- mrb_raise(mrb, E_TYPE_ERROR, "wrong type of argument");
+ if (cp < 0x80) {
+ utf8[0] = (char)cp;
+ len = 1;
+ ascii_flag = MRB_STR_ASCII;
}
- /* not reached */
- return mrb_nil_value();
+ else if (cp < 0x800) {
+ utf8[0] = (char)(0xC0 | (cp >> 6));
+ utf8[1] = (char)(0x80 | (cp & 0x3F));
+ len = 2;
+ }
+ else if (cp < 0x10000) {
+ utf8[0] = (char)(0xE0 | (cp >> 12));
+ utf8[1] = (char)(0x80 | ((cp >> 6) & 0x3F));
+ utf8[2] = (char)(0x80 | ( cp & 0x3F));
+ len = 3;
+ }
+ else {
+ utf8[0] = (char)(0xF0 | (cp >> 18));
+ utf8[1] = (char)(0x80 | ((cp >> 12) & 0x3F));
+ utf8[2] = (char)(0x80 | ((cp >> 6) & 0x3F));
+ utf8[3] = (char)(0x80 | ( cp & 0x3F));
+ len = 4;
+ }
+ str = mrb_str_new(mrb, utf8, len);
+ mrb_str_ptr(str)->flags |= ascii_flag;
+ return str;
}
+#endif
/*
* call-seq:
@@ -137,8 +145,6 @@ mrb_str_swapcase(mrb_state *mrb, mrb_value self)
return str;
}
-static mrb_value mrb_fixnum_chr(mrb_state *mrb, mrb_value num);
-
/*
* call-seq:
* str << integer -> str
@@ -148,7 +154,8 @@ static mrb_value mrb_fixnum_chr(mrb_state *mrb, mrb_value num);
*
* Append---Concatenates the given object to <i>str</i>. If the object is a
* <code>Integer</code>, it is considered as a codepoint, and is converted
- * to a character before concatenation.
+ * to a character before concatenation
+ * (equivalent to <code>str.concat(integer.chr(__ENCODING__))</code>).
*
* a = "hello "
* a << "world" #=> "hello world"
@@ -157,14 +164,17 @@ static mrb_value mrb_fixnum_chr(mrb_state *mrb, mrb_value num);
static mrb_value
mrb_str_concat_m(mrb_state *mrb, mrb_value self)
{
- mrb_value str;
+ mrb_value str = mrb_get_arg1(mrb);
- mrb_get_args(mrb, "o", &str);
- if (mrb_fixnum_p(str))
- str = mrb_fixnum_chr(mrb, str);
+ if (mrb_integer_p(str) || mrb_float_p(str))
+#ifdef MRB_UTF8_STRING
+ str = int_chr_utf8(mrb, str);
+#else
+ str = int_chr_binary(mrb, str);
+#endif
else
- str = mrb_string_type(mrb, str);
- mrb_str_concat(mrb, self, str);
+ mrb_ensure_string_type(mrb, str);
+ mrb_str_cat_str(mrb, self, str);
return self;
}
@@ -184,14 +194,15 @@ mrb_str_concat_m(mrb_state *mrb, mrb_value self)
static mrb_value
mrb_str_start_with(mrb_state *mrb, mrb_value self)
{
- mrb_value *argv, sub;
+ const mrb_value *argv;
+ mrb_value sub;
mrb_int argc, i;
mrb_get_args(mrb, "*", &argv, &argc);
for (i = 0; i < argc; i++) {
size_t len_l, len_r;
int ai = mrb_gc_arena_save(mrb);
- sub = mrb_string_type(mrb, argv[i]);
+ sub = mrb_ensure_string_type(mrb, argv[i]);
mrb_gc_arena_restore(mrb, ai);
len_l = RSTRING_LEN(self);
len_r = RSTRING_LEN(sub);
@@ -213,14 +224,15 @@ mrb_str_start_with(mrb_state *mrb, mrb_value self)
static mrb_value
mrb_str_end_with(mrb_state *mrb, mrb_value self)
{
- mrb_value *argv, sub;
+ const mrb_value *argv;
+ mrb_value sub;
mrb_int argc, i;
mrb_get_args(mrb, "*", &argv, &argc);
for (i = 0; i < argc; i++) {
size_t len_l, len_r;
int ai = mrb_gc_arena_save(mrb);
- sub = mrb_string_type(mrb, argv[i]);
+ sub = mrb_ensure_string_type(mrb, argv[i]);
mrb_gc_arena_restore(mrb, ai);
len_l = RSTRING_LEN(self);
len_r = RSTRING_LEN(sub);
@@ -235,6 +247,12 @@ mrb_str_end_with(mrb_state *mrb, mrb_value self)
return mrb_false_value();
}
+enum tr_pattern_type {
+ TR_UNINITIALIZED = 0,
+ TR_IN_ORDER = 1,
+ TR_RANGE = 2,
+};
+
/*
#tr Pattern syntax
@@ -244,19 +262,27 @@ mrb_str_end_with(mrb_state *mrb, mrb_value self)
<range> ::= <ch> '-' <ch>
*/
struct tr_pattern {
- uint8_t type; // 1:in-order, 2:range
- mrb_bool flag_reverse;
- int16_t n;
+ uint8_t type; // 1:in-order, 2:range
+ mrb_bool flag_reverse : 1;
+ mrb_bool flag_on_heap : 1;
+ uint16_t n;
+ union {
+ uint16_t start_pos;
+ char ch[2];
+ } val;
struct tr_pattern *next;
- char ch[];
};
-static void
+#define STATIC_TR_PATTERN { 0 }
+
+static inline void
tr_free_pattern(mrb_state *mrb, struct tr_pattern *pat)
{
while (pat) {
struct tr_pattern *p = pat->next;
- mrb_free(mrb, pat);
+ if (pat->flag_on_heap) {
+ mrb_free(mrb, pat);
+ }
pat = p;
}
}
@@ -265,10 +291,10 @@ static struct tr_pattern*
tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_pattern, mrb_bool flag_reverse_enable)
{
const char *pattern = RSTRING_PTR(v_pattern);
- int pattern_length = RSTRING_LEN(v_pattern);
+ mrb_int pattern_length = RSTRING_LEN(v_pattern);
mrb_bool flag_reverse = FALSE;
struct tr_pattern *pat1;
- int i = 0;
+ mrb_int i = 0;
if(flag_reverse_enable && pattern_length >= 2 && pattern[0] == '^') {
flag_reverse = TRUE;
@@ -277,46 +303,53 @@ tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_patte
while (i < pattern_length) {
/* is range pattern ? */
+ mrb_bool const ret_uninit = (ret->type == TR_UNINITIALIZED);
+ pat1 = ret_uninit
+ ? ret
+ : (struct tr_pattern*)mrb_malloc_simple(mrb, sizeof(struct tr_pattern));
if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-') {
- pat1 = (struct tr_pattern*)mrb_malloc_simple(mrb, sizeof(struct tr_pattern) + 2);
if (pat1 == NULL && ret) {
nomem:
tr_free_pattern(mrb, ret);
mrb_exc_raise(mrb, mrb_obj_value(mrb->nomem_err));
return NULL; /* not reached */
}
- pat1->type = 2;
+ pat1->type = TR_RANGE;
pat1->flag_reverse = flag_reverse;
+ pat1->flag_on_heap = !ret_uninit;
pat1->n = pattern[i+2] - pattern[i] + 1;
pat1->next = NULL;
- pat1->ch[0] = pattern[i];
- pat1->ch[1] = pattern[i+2];
+ pat1->val.ch[0] = pattern[i];
+ pat1->val.ch[1] = pattern[i+2];
i += 3;
}
else {
/* in order pattern. */
- int start_pos = i++;
- int len;
+ mrb_int start_pos = i++;
+ mrb_int len;
while (i < pattern_length) {
- if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-')
+ if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-')
break;
- i++;
+ i++;
}
len = i - start_pos;
- pat1 = (struct tr_pattern*)mrb_malloc_simple(mrb, sizeof(struct tr_pattern) + len);
+ if (len > UINT16_MAX) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "tr pattern too long (max 65535)");
+ }
if (pat1 == NULL && ret) {
goto nomem;
}
- pat1->type = 1;
+ pat1->type = TR_IN_ORDER;
pat1->flag_reverse = flag_reverse;
- pat1->n = len;
+ pat1->flag_on_heap = !ret_uninit;
+ pat1->n = (uint16_t)len;
pat1->next = NULL;
- memcpy(pat1->ch, &pattern[start_pos], len);
+ pat1->val.start_pos = (uint16_t)start_pos;
}
- if (ret == NULL) {
+ if (ret == NULL || ret_uninit) {
ret = pat1;
}
else {
@@ -331,23 +364,26 @@ tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_patte
return ret;
}
-static mrb_int
-tr_find_character(const struct tr_pattern *pat, int ch)
+static inline mrb_int
+tr_find_character(const struct tr_pattern *pat, const char *pat_str, int ch)
{
mrb_int ret = -1;
mrb_int n_sum = 0;
mrb_int flag_reverse = pat ? pat->flag_reverse : 0;
while (pat != NULL) {
- if (pat->type == 1) { /* pat->type == 1 in-order */
+ if (pat->type == TR_IN_ORDER) {
int i;
for (i = 0; i < pat->n; i++) {
- if (pat->ch[i] == ch) ret = n_sum + i;
+ if (pat_str[pat->val.start_pos + i] == ch) ret = n_sum + i;
}
}
- else { /* pat->type == 2 range */
- if (pat->ch[0] <= ch && ch <= pat->ch[1])
- ret = n_sum + ch - pat->ch[0];
+ else if (pat->type == TR_RANGE) {
+ if (pat->val.ch[0] <= ch && ch <= pat->val.ch[1])
+ ret = n_sum + ch - pat->val.ch[0];
+ }
+ else {
+ mrb_assert(pat->type == TR_UNINITIALIZED);
}
n_sum += pat->n;
pat = pat->next;
@@ -359,17 +395,33 @@ tr_find_character(const struct tr_pattern *pat, int ch)
return ret;
}
-static mrb_int
-tr_get_character(const struct tr_pattern *pat, mrb_int n_th)
+static inline mrb_int
+tr_get_character(const struct tr_pattern *pat, const char *pat_str, mrb_int n_th)
{
mrb_int n_sum = 0;
+
while (pat != NULL) {
if (n_th < (n_sum + pat->n)) {
mrb_int i = (n_th - n_sum);
- return (pat->type == 1) ? pat->ch[i] :pat->ch[0] + i;
+
+ switch (pat->type) {
+ case TR_IN_ORDER:
+ return pat_str[pat->val.start_pos + i];
+ case TR_RANGE:
+ return pat->val.ch[0]+i;
+ case TR_UNINITIALIZED:
+ return -1;
+ }
}
if (pat->next == NULL) {
- return (pat->type == 1) ? pat->ch[pat->n - 1] : pat->ch[1];
+ switch (pat->type) {
+ case TR_IN_ORDER:
+ return pat_str[pat->val.start_pos + pat->n - 1];
+ case TR_RANGE:
+ return pat->val.ch[1];
+ case TR_UNINITIALIZED:
+ return -1;
+ }
}
n_sum += pat->n;
pat = pat->next;
@@ -378,51 +430,105 @@ tr_get_character(const struct tr_pattern *pat, mrb_int n_th)
return -1;
}
+static inline void
+tr_bitmap_set(uint8_t bitmap[32], uint8_t ch)
+{
+ uint8_t idx1 = ch / 8;
+ uint8_t idx2 = ch % 8;
+ bitmap[idx1] |= (1<<idx2);
+}
+
+static inline mrb_bool
+tr_bitmap_detect(uint8_t bitmap[32], uint8_t ch)
+{
+ uint8_t idx1 = ch / 8;
+ uint8_t idx2 = ch % 8;
+ if (bitmap[idx1] & (1<<idx2))
+ return TRUE;
+ return FALSE;
+}
+
+/* compile patter to bitmap */
+static void
+tr_compile_pattern(const struct tr_pattern *pat, mrb_value pstr, uint8_t bitmap[32])
+{
+ const char *pattern = RSTRING_PTR(pstr);
+ mrb_int flag_reverse = pat ? pat->flag_reverse : 0;
+ int i;
+
+ for (i=0; i<32; i++) {
+ bitmap[i] = 0;
+ }
+ while (pat != NULL) {
+ if (pat->type == TR_IN_ORDER) {
+ for (i = 0; i < pat->n; i++) {
+ tr_bitmap_set(bitmap, pattern[pat->val.start_pos + i]);
+ }
+ }
+ else if (pat->type == TR_RANGE) {
+ for (i = pat->val.ch[0]; i < pat->val.ch[1]; i++) {
+ tr_bitmap_set(bitmap, i);
+ }
+ }
+ else {
+ mrb_assert(pat->type == TR_UNINITIALIZED);
+ }
+ pat = pat->next;
+ }
+
+ if (flag_reverse) {
+ for (i=0; i<32; i++) {
+ bitmap[i] ^= 0xff;
+ }
+ }
+}
+
static mrb_bool
str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squeeze)
{
- struct tr_pattern *pat;
- struct tr_pattern *rep;
+ struct tr_pattern pat = STATIC_TR_PATTERN;
+ struct tr_pattern rep_storage = STATIC_TR_PATTERN;
char *s;
mrb_int len;
mrb_int i;
mrb_int j;
mrb_bool flag_changed = FALSE;
mrb_int lastch = -1;
+ struct tr_pattern *rep;
mrb_str_modify(mrb, mrb_str_ptr(str));
- pat = tr_parse_pattern(mrb, NULL, p1, TRUE);
- rep = tr_parse_pattern(mrb, NULL, p2, FALSE);
+ tr_parse_pattern(mrb, &pat, p1, TRUE);
+ rep = tr_parse_pattern(mrb, &rep_storage, p2, FALSE);
s = RSTRING_PTR(str);
len = RSTRING_LEN(str);
for (i=j=0; i<len; i++,j++) {
- mrb_int n = tr_find_character(pat, s[i]);
+ mrb_int n = tr_find_character(&pat, RSTRING_PTR(p1), s[i]);
if (i>j) s[j] = s[i];
if (n >= 0) {
flag_changed = TRUE;
if (rep == NULL) {
- j--;
+ j--;
}
else {
- mrb_int c = tr_get_character(rep, n);
+ mrb_int c = tr_get_character(rep, RSTRING_PTR(p2), n);
- if (squeeze && c == lastch) {
+ if (c < 0 || (squeeze && c == lastch)) {
j--;
continue;
}
- if (c < 0 || c > 0x80) {
- mrb_raisef(mrb, E_ARGUMENT_ERROR, "character (%S) out of range",
- mrb_fixnum_value((mrb_int)c));
+ if (c > 0x80) {
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "character (%i) out of range", c);
}
- lastch = s[i] = c;
+ lastch = c;
+ s[i] = (char)c;
}
}
}
- tr_free_pattern(mrb, pat);
- if (rep) tr_free_pattern(mrb, rep);
+ tr_free_pattern(mrb, &pat);
+ tr_free_pattern(mrb, rep);
if (flag_changed) {
RSTR_SET_LEN(RSTRING(str), j);
@@ -544,26 +650,28 @@ mrb_str_tr_s_bang(mrb_state *mrb, mrb_value str)
static mrb_bool
str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat)
{
+ struct tr_pattern pat_storage = STATIC_TR_PATTERN;
struct tr_pattern *pat = NULL;
mrb_int i, j;
char *s;
mrb_int len;
mrb_bool flag_changed = FALSE;
mrb_int lastch = -1;
+ uint8_t bitmap[32];
mrb_str_modify(mrb, mrb_str_ptr(str));
if (!mrb_nil_p(v_pat)) {
- pat = tr_parse_pattern(mrb, pat, v_pat, TRUE);
+ pat = tr_parse_pattern(mrb, &pat_storage, v_pat, TRUE);
+ tr_compile_pattern(pat, v_pat, bitmap);
+ tr_free_pattern(mrb, pat);
}
s = RSTRING_PTR(str);
len = RSTRING_LEN(str);
if (pat) {
for (i=j=0; i<len; i++,j++) {
- mrb_int n = tr_find_character(pat, s[i]);
-
if (i>j) s[j] = s[i];
- if (n >= 0 && s[i] == lastch) {
+ if (tr_bitmap_detect(bitmap, s[i]) && s[i] == lastch) {
flag_changed = TRUE;
j--;
}
@@ -580,7 +688,6 @@ str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat)
lastch = s[i];
}
}
- tr_free_pattern(mrb, pat);
if (flag_changed) {
RSTR_SET_LEN(RSTRING(str), j);
@@ -637,27 +744,28 @@ mrb_str_squeeze_bang(mrb_state *mrb, mrb_value str)
static mrb_bool
str_delete(mrb_state *mrb, mrb_value str, mrb_value v_pat)
{
- struct tr_pattern *pat = NULL;
+ struct tr_pattern pat = STATIC_TR_PATTERN;
mrb_int i, j;
char *s;
mrb_int len;
mrb_bool flag_changed = FALSE;
+ uint8_t bitmap[32];
mrb_str_modify(mrb, mrb_str_ptr(str));
- pat = tr_parse_pattern(mrb, pat, v_pat, TRUE);
+ tr_parse_pattern(mrb, &pat, v_pat, TRUE);
+ tr_compile_pattern(&pat, v_pat, bitmap);
+ tr_free_pattern(mrb, &pat);
+
s = RSTRING_PTR(str);
len = RSTRING_LEN(str);
for (i=j=0; i<len; i++,j++) {
- mrb_int n = tr_find_character(pat, s[i]);
-
if (i>j) s[j] = s[i];
- if (n >= 0) {
+ if (tr_bitmap_detect(bitmap, s[i])) {
flag_changed = TRUE;
j--;
}
}
- tr_free_pattern(mrb, pat);
if (flag_changed) {
RSTR_SET_LEN(RSTRING(str), j);
RSTRING_PTR(str)[j] = 0;
@@ -704,22 +812,23 @@ static mrb_value
mrb_str_count(mrb_state *mrb, mrb_value str)
{
mrb_value v_pat = mrb_nil_value();
- struct tr_pattern *pat = NULL;
mrb_int i;
char *s;
mrb_int len;
mrb_int count = 0;
+ struct tr_pattern pat = STATIC_TR_PATTERN;
+ uint8_t bitmap[32];
mrb_get_args(mrb, "S", &v_pat);
- pat = tr_parse_pattern(mrb, pat, v_pat, TRUE);
+ tr_parse_pattern(mrb, &pat, v_pat, TRUE);
+ tr_compile_pattern(&pat, v_pat, bitmap);
+ tr_free_pattern(mrb, &pat);
+
s = RSTRING_PTR(str);
len = RSTRING_LEN(str);
for (i = 0; i < len; i++) {
- mrb_int n = tr_find_character(pat, s[i]);
-
- if (n >= 0) count++;
+ if (tr_bitmap_detect(bitmap, s[i])) count++;
}
- tr_free_pattern(mrb, pat);
return mrb_fixnum_value(count);
}
@@ -750,49 +859,42 @@ mrb_str_chr(mrb_state *mrb, mrb_value self)
return mrb_str_substr(mrb, self, 0, 1);
}
+/*
+ * call-seq:
+ * int.chr([encoding]) -> string
+ *
+ * Returns a string containing the character represented by the +int+'s value
+ * according to +encoding+. +"ASCII-8BIT"+ (+"BINARY"+) and +"UTF-8"+ (only
+ * with +MRB_UTF8_STRING+) can be specified as +encoding+ (default is
+ * +"ASCII-8BIT"+).
+ *
+ * 65.chr #=> "A"
+ * 230.chr #=> "\xE6"
+ * 230.chr("ASCII-8BIT") #=> "\xE6"
+ * 230.chr("UTF-8") #=> "\u00E6"
+ */
static mrb_value
-mrb_fixnum_chr(mrb_state *mrb, mrb_value num)
+mrb_int_chr(mrb_state *mrb, mrb_value num)
{
- mrb_int cp = mrb_fixnum(num);
-#ifdef MRB_UTF8_STRING
- char utf8[4];
- mrb_int len;
-
- if (cp < 0 || 0x10FFFF < cp) {
- mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", num);
- }
- if (cp < 0x80) {
- utf8[0] = (char)cp;
- len = 1;
+ mrb_value enc;
+ mrb_bool enc_given;
+
+ mrb_get_args(mrb, "|S?", &enc, &enc_given);
+ if (!enc_given ||
+ ENC_COMP_P(enc, ENC_ASCII_8BIT) ||
+ ENC_COMP_P(enc, ENC_BINARY)) {
+ return int_chr_binary(mrb, num);
}
- else if (cp < 0x800) {
- utf8[0] = (char)(0xC0 | (cp >> 6));
- utf8[1] = (char)(0x80 | (cp & 0x3F));
- len = 2;
- }
- else if (cp < 0x10000) {
- utf8[0] = (char)(0xE0 | (cp >> 12));
- utf8[1] = (char)(0x80 | ((cp >> 6) & 0x3F));
- utf8[2] = (char)(0x80 | ( cp & 0x3F));
- len = 3;
+#ifdef MRB_UTF8_STRING
+ else if (ENC_COMP_P(enc, ENC_UTF8)) {
+ return int_chr_utf8(mrb, num);
}
+#endif
else {
- utf8[0] = (char)(0xF0 | (cp >> 18));
- utf8[1] = (char)(0x80 | ((cp >> 12) & 0x3F));
- utf8[2] = (char)(0x80 | ((cp >> 6) & 0x3F));
- utf8[3] = (char)(0x80 | ( cp & 0x3F));
- len = 4;
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "unknown encoding name - %v", enc);
}
- return mrb_str_new(mrb, utf8, len);
-#else
- char c;
-
- if (cp < 0 || 0xff < cp) {
- mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", num);
- }
- c = (char)cp;
- return mrb_str_new(mrb, &c, 1);
-#endif
+ /* not reached */
+ return mrb_nil_value();
}
/*
@@ -972,7 +1074,8 @@ static mrb_value
mrb_str_del_prefix_bang(mrb_state *mrb, mrb_value self)
{
mrb_int plen, slen;
- char *ptr, *s;
+ const char *ptr;
+ char *s;
struct RString *str = RSTRING(self);
mrb_get_args(mrb, "s", &ptr, &plen);
@@ -980,7 +1083,7 @@ mrb_str_del_prefix_bang(mrb_state *mrb, mrb_value self)
if (plen > slen) return mrb_nil_value();
s = RSTR_PTR(str);
if (memcmp(s, ptr, plen) != 0) return mrb_nil_value();
- if (!MRB_FROZEN_P(str) && (RSTR_SHARED_P(str) || RSTR_FSHARED_P(str))) {
+ if (!mrb_frozen_p(str) && (RSTR_SHARED_P(str) || RSTR_FSHARED_P(str))) {
str->as.heap.ptr += plen;
}
else {
@@ -1005,7 +1108,7 @@ static mrb_value
mrb_str_del_prefix(mrb_state *mrb, mrb_value self)
{
mrb_int plen, slen;
- char *ptr;
+ const char *ptr;
mrb_get_args(mrb, "s", &ptr, &plen);
slen = RSTRING_LEN(self);
@@ -1029,7 +1132,8 @@ static mrb_value
mrb_str_del_suffix_bang(mrb_state *mrb, mrb_value self)
{
mrb_int plen, slen;
- char *ptr, *s;
+ const char *ptr;
+ char *s;
struct RString *str = RSTRING(self);
mrb_get_args(mrb, "s", &ptr, &plen);
@@ -1037,7 +1141,7 @@ mrb_str_del_suffix_bang(mrb_state *mrb, mrb_value self)
if (plen > slen) return mrb_nil_value();
s = RSTR_PTR(str);
if (memcmp(s+slen-plen, ptr, plen) != 0) return mrb_nil_value();
- if (!MRB_FROZEN_P(str) && (RSTR_SHARED_P(str) || RSTR_FSHARED_P(str))) {
+ if (!mrb_frozen_p(str) && (RSTR_SHARED_P(str) || RSTR_FSHARED_P(str))) {
/* no need to modify string */
}
else {
@@ -1060,7 +1164,7 @@ static mrb_value
mrb_str_del_suffix(mrb_state *mrb, mrb_value self)
{
mrb_int plen, slen;
- char *ptr;
+ const char *ptr;
mrb_get_args(mrb, "s", &ptr, &plen);
slen = RSTRING_LEN(self);
@@ -1080,8 +1184,7 @@ mrb_str_lines(mrb_state *mrb, mrb_value self)
char *p = b, *t;
char *e = b + RSTRING_LEN(self);
- mrb_get_args(mrb, "");
-
+ mrb->c->ci->mid = 0;
result = mrb_ary_new(mrb);
ai = mrb_gc_arena_save(mrb);
while (p < e) {
@@ -1101,9 +1204,6 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb)
struct RClass * s = mrb->string_class;
mrb_define_method(mrb, s, "dump", mrb_str_dump, MRB_ARGS_NONE());
- mrb_define_method(mrb, s, "getbyte", mrb_str_getbyte, MRB_ARGS_REQ(1));
- mrb_define_method(mrb, s, "setbyte", mrb_str_setbyte, MRB_ARGS_REQ(2));
- mrb_define_method(mrb, s, "byteslice", mrb_str_byteslice, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1));
mrb_define_method(mrb, s, "swapcase!", mrb_str_swapcase_bang, MRB_ARGS_NONE());
mrb_define_method(mrb, s, "swapcase", mrb_str_swapcase, MRB_ARGS_NONE());
mrb_define_method(mrb, s, "concat", mrb_str_concat_m, MRB_ARGS_REQ(1));
@@ -1124,8 +1224,8 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb)
mrb_define_method(mrb, s, "chr", mrb_str_chr, MRB_ARGS_NONE());
mrb_define_method(mrb, s, "succ", mrb_str_succ, MRB_ARGS_NONE());
mrb_define_method(mrb, s, "succ!", mrb_str_succ_bang, MRB_ARGS_NONE());
- mrb_define_alias(mrb, s, "next", "succ");
- mrb_define_alias(mrb, s, "next!", "succ!");
+ mrb_define_method(mrb, s, "next", mrb_str_succ, MRB_ARGS_NONE());
+ mrb_define_method(mrb, s, "next!", mrb_str_succ_bang, MRB_ARGS_NONE());
mrb_define_method(mrb, s, "ord", mrb_str_ord, MRB_ARGS_NONE());
mrb_define_method(mrb, s, "delete_prefix!", mrb_str_del_prefix_bang, MRB_ARGS_REQ(1));
mrb_define_method(mrb, s, "delete_prefix", mrb_str_del_prefix, MRB_ARGS_REQ(1));
@@ -1133,7 +1233,8 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb)
mrb_define_method(mrb, s, "delete_suffix", mrb_str_del_suffix, MRB_ARGS_REQ(1));
mrb_define_method(mrb, s, "__lines", mrb_str_lines, MRB_ARGS_NONE());
- mrb_define_method(mrb, mrb->fixnum_class, "chr", mrb_fixnum_chr, MRB_ARGS_NONE());
+
+ mrb_define_method(mrb, mrb_class_get(mrb, "Integer"), "chr", mrb_int_chr, MRB_ARGS_OPT(1));
}
void