diff options
Diffstat (limited to 'src/string.c')
| -rw-r--r-- | src/string.c | 235 |
1 files changed, 110 insertions, 125 deletions
diff --git a/src/string.c b/src/string.c index e3a359b49..0a28f2565 100644 --- a/src/string.c +++ b/src/string.c @@ -4,7 +4,6 @@ ** See Copyright Notice in mruby.h */ -#include <ctype.h> #include <float.h> #include <limits.h> #include <stddef.h> @@ -29,7 +28,7 @@ typedef struct mrb_shared_string { static mrb_value str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2); static mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len); -mrb_int +MRB_API mrb_int mrb_str_strlen(mrb_state *mrb, struct RString *s) { mrb_int i, max = RSTR_LEN(s); @@ -76,7 +75,7 @@ str_decref(mrb_state *mrb, mrb_shared_string *shared) } } -void +MRB_API void mrb_str_modify(mrb_state *mrb, struct RString *s) { if (RSTR_SHARED_P(s)) { @@ -120,7 +119,7 @@ mrb_str_modify(mrb_state *mrb, struct RString *s) } } -mrb_value +MRB_API mrb_value mrb_str_resize(mrb_state *mrb, mrb_value str, mrb_int len) { mrb_int slen; @@ -141,14 +140,34 @@ mrb_str_resize(mrb_state *mrb, mrb_value str, mrb_int len) #define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class)) static struct RString* +str_new_static(mrb_state *mrb, const char *p, size_t len) +{ + struct RString *s; + + if (len >= MRB_INT_MAX) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big"); + } + s = mrb_obj_alloc_string(mrb); + s->as.heap.len = len; + s->as.heap.aux.capa = 0; /* nofree */ + s->as.heap.ptr = (char *)p; + s->flags = MRB_STR_NOFREE; + + return s; +} + +static struct RString* str_new(mrb_state *mrb, const char *p, size_t len) { struct RString *s; + if (p && mrb_ro_data_p(p)) { + return str_new_static(mrb, p, len); + } s = mrb_obj_alloc_string(mrb); if (len < RSTRING_EMBED_LEN_MAX) { RSTR_SET_EMBED_FLAG(s); - RSTR_SET_EMBED_LEN(s,len); + RSTR_SET_EMBED_LEN(s, len); if (p) { memcpy(s->as.ary, p, len); } @@ -167,7 +186,7 @@ str_new(mrb_state *mrb, const char *p, size_t len) return s; } -static void +static inline void str_with_class(mrb_state *mrb, struct RString *s, mrb_value obj) { s->c = mrb_str_ptr(obj)->c; @@ -186,7 +205,7 @@ mrb_str_new_empty(mrb_state *mrb, mrb_value str) # define MRB_STR_BUF_MIN_SIZE 128 #endif -mrb_value +MRB_API mrb_value mrb_str_buf_new(mrb_state *mrb, size_t capa) { struct RString *s; @@ -248,7 +267,7 @@ str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, size_t len) RSTR_PTR(s)[total] = '\0'; /* sentinel */ } -mrb_value +MRB_API mrb_value mrb_str_new(mrb_state *mrb, const char *p, size_t len) { return mrb_obj_value(str_new(mrb, p, len)); @@ -261,7 +280,7 @@ mrb_str_new(mrb_state *mrb, const char *p, size_t len) * Returns a new string object containing a copy of <i>str</i>. */ -mrb_value +MRB_API mrb_value mrb_str_new_cstr(mrb_state *mrb, const char *p) { struct RString *s; @@ -279,19 +298,10 @@ mrb_str_new_cstr(mrb_state *mrb, const char *p) return mrb_obj_value(s); } -mrb_value +MRB_API mrb_value mrb_str_new_static(mrb_state *mrb, const char *p, size_t len) { - struct RString *s; - - if (len >= MRB_INT_MAX) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big"); - } - s = mrb_obj_alloc_string(mrb); - s->as.heap.len = len; - s->as.heap.aux.capa = 0; /* nofree */ - s->as.heap.ptr = (char *)p; - s->flags = MRB_STR_NOFREE; + struct RString *s = str_new_static(mrb, p, len); return mrb_obj_value(s); } @@ -306,7 +316,7 @@ mrb_gc_free_str(mrb_state *mrb, struct RString *str) mrb_free(mrb, str->as.heap.ptr); } -char * +MRB_API char* mrb_str_to_cstr(mrb_state *mrb, mrb_value str0) { struct RString *s; @@ -366,7 +376,7 @@ str_make_shared(mrb_state *mrb, struct RString *s) * * Returns a new string object containing a copy of <i>str</i>. */ -void +MRB_API void mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other) { struct RString *s1 = mrb_str_ptr(self), *s2; @@ -393,7 +403,7 @@ mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other) * * Returns a new string object containing a copy of <i>str</i>. */ -mrb_value +MRB_API mrb_value mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b) { struct RString *s = mrb_str_ptr(a); @@ -424,28 +434,15 @@ mrb_str_plus_m(mrb_state *mrb, mrb_value self) return mrb_str_plus(mrb, self, str); } -/* - * call-seq: - * len = strlen(String("abcd")) - * - * Returns a new string object containing a copy of <i>str</i>. - */ -static mrb_value -mrb_str_bytesize(mrb_state *mrb, mrb_value self) -{ - struct RString *s = mrb_str_ptr(self); - return mrb_fixnum_value(RSTR_LEN(s)); -} - /* 15.2.10.5.26 */ /* 15.2.10.5.33 */ /* * call-seq: * len = strlen(String("abcd")) * - * Returns a new string object containing a copy of <i>str</i>. + * Returns the length of string. */ -mrb_value +static mrb_value mrb_str_size(mrb_state *mrb, mrb_value self) { struct RString *s = mrb_str_ptr(self); @@ -506,7 +503,7 @@ mrb_str_times(mrb_state *mrb, mrb_value self) * = 0 * < -1 */ -int +MRB_API int mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2) { mrb_int len; @@ -592,7 +589,7 @@ str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2) return FALSE; } -mrb_bool +MRB_API mrb_bool mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2) { if (mrb_immediate_p(str2)) return FALSE; @@ -628,7 +625,7 @@ mrb_str_equal_m(mrb_state *mrb, mrb_value str1) return mrb_bool_value(mrb_str_equal(mrb, str1, str2)); } /* ---------------------------------- */ -mrb_value +MRB_API mrb_value mrb_str_to_str(mrb_state *mrb, mrb_value str) { mrb_value s; @@ -643,25 +640,24 @@ mrb_str_to_str(mrb_state *mrb, mrb_value str) return str; } -char * +MRB_API const char* mrb_string_value_ptr(mrb_state *mrb, mrb_value ptr) { mrb_value str = mrb_str_to_str(mrb, ptr); return RSTRING_PTR(str); } -static mrb_value -noregexp(mrb_state *mrb, mrb_value self) +void +mrb_noregexp(mrb_state *mrb, mrb_value self) { mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp class not implemented"); - return mrb_nil_value(); } -static void -regexp_check(mrb_state *mrb, mrb_value obj) +void +mrb_regexp_check(mrb_state *mrb, mrb_value obj) { if (mrb_regexp_p(mrb, obj)) { - noregexp(mrb, obj); + mrb_noregexp(mrb, obj); } } @@ -736,7 +732,7 @@ mrb_str_index(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int offset) return pos + offset; } -mrb_value +MRB_API mrb_value mrb_str_dup(mrb_state *mrb, mrb_value str) { struct RString *s = mrb_str_ptr(str); @@ -751,7 +747,7 @@ mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx) { mrb_int idx; - regexp_check(mrb, indx); + mrb_regexp_check(mrb, indx); switch (mrb_type(indx)) { case MRB_TT_FIXNUM: idx = mrb_fixnum(indx); @@ -779,7 +775,12 @@ num_index: return mrb_nil_value(); } } + case MRB_TT_FLOAT: default: + indx = mrb_Integer(mrb, indx); + if (mrb_nil_p(indx)) { + mrb_raise(mrb, E_TYPE_ERROR, "can't convert to Fixnum"); + } idx = mrb_fixnum(indx); goto num_index; } @@ -816,6 +817,7 @@ num_index: * * a = "hello there" * a[1] #=> 101(1.8.7) "e"(1.9.2) + * a[1.1] #=> "e"(1.9.2) * a[1,3] #=> "ell" * a[1..3] #=> "ell" * a[-3,2] #=> "er" @@ -833,7 +835,7 @@ mrb_str_aref_m(mrb_state *mrb, mrb_value str) argc = mrb_get_args(mrb, "o|o", &a1, &a2); if (argc == 2) { - regexp_check(mrb, a1); + mrb_regexp_check(mrb, a1); return mrb_str_substr(mrb, str, mrb_fixnum(a1), mrb_fixnum(a2)); } if (argc != 1) { @@ -1172,7 +1174,7 @@ mrb_str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) return mrb_obj_value(s); } -mrb_value +MRB_API mrb_value mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) { if (len < 0) return mrb_nil_value(); @@ -1297,7 +1299,7 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str) else sub = mrb_nil_value(); } - regexp_check(mrb, sub); + mrb_regexp_check(mrb, sub); if (pos < 0) { pos += RSTRING_LEN(str); if (pos < 0) { @@ -1307,7 +1309,7 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str) switch (mrb_type(sub)) { case MRB_TT_FIXNUM: { - int c = mrb_fixnum(sub); + mrb_int c = mrb_fixnum(sub); mrb_int len = RSTRING_LEN(str); unsigned char *p = (unsigned char*)RSTRING_PTR(str); @@ -1435,13 +1437,13 @@ mrb_str_init(mrb_state *mrb, mrb_value self) * * 'cat and dog'.to_sym #=> :"cat and dog" */ -mrb_value +MRB_API mrb_value mrb_str_intern(mrb_state *mrb, mrb_value self) { return mrb_symbol_value(mrb_intern_str(mrb, self)); } /* ---------------------------------- */ -mrb_value +MRB_API mrb_value mrb_obj_as_string(mrb_state *mrb, mrb_value obj) { mrb_value str; @@ -1455,7 +1457,7 @@ mrb_obj_as_string(mrb_state *mrb, mrb_value obj) return str; } -mrb_value +MRB_API mrb_value mrb_ptr_to_str(mrb_state *mrb, void *p) { struct RString *p_str; @@ -1485,13 +1487,13 @@ mrb_ptr_to_str(mrb_state *mrb, void *p) return mrb_obj_value(p_str); } -mrb_value +MRB_API mrb_value mrb_string_type(mrb_state *mrb, mrb_value str) { return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); } -mrb_value +MRB_API mrb_value mrb_check_string_type(mrb_state *mrb, mrb_value str) { return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); @@ -1636,7 +1638,7 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str) if (pos < 0) { pos += len; if (pos < 0) { - regexp_check(mrb, sub); + mrb_regexp_check(mrb, sub); return mrb_nil_value(); } } @@ -1649,12 +1651,11 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str) else sub = mrb_nil_value(); } - regexp_check(mrb, sub); + mrb_regexp_check(mrb, sub); switch (mrb_type(sub)) { case MRB_TT_FIXNUM: { - int c = mrb_fixnum(sub); - mrb_int len = RSTRING_LEN(str); + mrb_int c = mrb_fixnum(sub); unsigned char *p = (unsigned char*)RSTRING_PTR(str); for (pos=len-1;pos>=0;pos--) { @@ -1757,29 +1758,28 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) } } else { - noregexp(mrb, str); + mrb_noregexp(mrb, str); } } result = mrb_ary_new(mrb); beg = 0; if (split_type == awk) { - char *ptr = RSTRING_PTR(str); - char *eptr = RSTRING_END(str); - char *bptr = ptr; mrb_bool skip = TRUE; + mrb_int idx = 0; + mrb_int str_len = RSTRING_LEN(str); unsigned int c; + int ai = mrb_gc_arena_save(mrb); - end = beg; - while (ptr < eptr) { - int ai = mrb_gc_arena_save(mrb); - c = (unsigned char)*ptr++; + idx = end = beg; + while (idx < str_len) { + c = (unsigned char)RSTRING_PTR(str)[idx++]; if (skip) { if (ISSPACE(c)) { - beg = ptr - bptr; + beg = idx; } else { - end = ptr - bptr; + end = idx; skip = FALSE; if (lim_p && lim <= i) break; } @@ -1788,45 +1788,36 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg)); mrb_gc_arena_restore(mrb, ai); skip = TRUE; - beg = ptr - bptr; + beg = idx; if (lim_p) ++i; } else { - end = ptr - bptr; + end = idx; } } } else if (split_type == string) { - char *ptr = RSTRING_PTR(str); /* s->as.ary */ - char *temp = ptr; - char *eptr = RSTRING_END(str); - mrb_int slen = RSTRING_LEN(spat); - - if (slen == 0) { - int ai = mrb_gc_arena_save(mrb); - while (ptr < eptr) { - mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr-temp, 1)); - mrb_gc_arena_restore(mrb, ai); - ptr++; - if (lim_p && lim <= ++i) break; - } - } - else { - char *sptr = RSTRING_PTR(spat); - int ai = mrb_gc_arena_save(mrb); - - while (ptr < eptr && - (end = mrb_memsearch(sptr, slen, ptr, eptr - ptr)) >= 0) { - mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr - temp, end)); - mrb_gc_arena_restore(mrb, ai); - ptr += end + slen; - if (lim_p && lim <= ++i) break; + mrb_int str_len = RSTRING_LEN(str); + mrb_int pat_len = RSTRING_LEN(spat); + mrb_int idx = 0; + int ai = mrb_gc_arena_save(mrb); + + while (idx < str_len) { + if (pat_len > 0) { + end = mrb_memsearch(RSTRING_PTR(spat), pat_len, RSTRING_PTR(str)+idx, str_len - idx); + if (end < 0) break; + } else { + end = 1; } + mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, idx, end)); + mrb_gc_arena_restore(mrb, ai); + idx += end + pat_len; + if (lim_p && lim <= ++i) break; } - beg = ptr - temp; + beg = idx; } else { - noregexp(mrb, str); + mrb_noregexp(mrb, str); } if (RSTRING_LEN(str) > 0 && (lim_p || RSTRING_LEN(str) > beg || lim < 0)) { if (RSTRING_LEN(str) == beg) { @@ -1847,7 +1838,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) return result; } -mrb_value +MRB_API mrb_value mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck) { const char *p; @@ -1856,13 +1847,10 @@ mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck) unsigned long n = 0; mrb_int val; -#undef ISDIGIT -#define ISDIGIT(c) ('0' <= (c) && (c) <= '9') #define conv_digit(c) \ - (!ISASCII(c) ? -1 : \ - isdigit(c) ? ((c) - '0') : \ - islower(c) ? ((c) - 'a' + 10) : \ - isupper(c) ? ((c) - 'A' + 10) : \ + (ISDIGIT(c) ? ((c) - '0') : \ + ISLOWER(c) ? ((c) - 'a' + 10) : \ + ISUPPER(c) ? ((c) - 'A' + 10) : \ -1) if (!str) { @@ -1994,7 +1982,7 @@ bad: return mrb_fixnum_value(0); } -char * +MRB_API const char* mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr) { struct RString *ps = mrb_str_ptr(*ptr); @@ -2008,10 +1996,10 @@ mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr) return p; } -mrb_value +MRB_API mrb_value mrb_str_to_inum(mrb_state *mrb, mrb_value str, mrb_int base, mrb_bool badcheck) { - char *s; + const char *s; mrb_int len; str = mrb_str_to_str(mrb, str); @@ -2064,16 +2052,13 @@ mrb_str_to_i(mrb_state *mrb, mrb_value self) return mrb_str_to_inum(mrb, self, base, FALSE); } -double +MRB_API double mrb_cstr_to_dbl(mrb_state *mrb, const char * p, mrb_bool badcheck) { char *end; double d; enum {max_width = 20}; -#define OutOfRange() (((w = end - p) > max_width) ? \ - (w = max_width, ellipsis = "...") : \ - (w = (int)(end - p), ellipsis = "")) if (!p) return 0.0; while (ISSPACE(*p)) p++; @@ -2130,7 +2115,7 @@ bad: return d; } -double +MRB_API double mrb_str_to_dbl(mrb_state *mrb, mrb_value str, mrb_bool badcheck) { char *s; @@ -2360,26 +2345,26 @@ mrb_str_dump(mrb_state *mrb, mrb_value str) return mrb_obj_value(result); } -mrb_value +MRB_API mrb_value mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len) { str_buf_cat(mrb, mrb_str_ptr(str), ptr, len); return str; } -mrb_value +MRB_API mrb_value mrb_str_cat_cstr(mrb_state *mrb, mrb_value str, const char *ptr) { return mrb_str_cat(mrb, str, ptr, strlen(ptr)); } -mrb_value +MRB_API mrb_value mrb_str_cat_str(mrb_state *mrb, mrb_value str, mrb_value str2) { return mrb_str_cat(mrb, str, RSTRING_PTR(str2), RSTRING_LEN(str2)); } -mrb_value +MRB_API mrb_value mrb_str_append(mrb_state *mrb, mrb_value str, mrb_value str2) { str2 = mrb_str_to_str(mrb, str2); @@ -2486,7 +2471,7 @@ mrb_init_string(mrb_state *mrb) s = mrb->string_class = mrb_define_class(mrb, "String", mrb->object_class); /* 15.2.10 */ MRB_SET_INSTANCE_TT(s, MRB_TT_STRING); - mrb_define_method(mrb, s, "bytesize", mrb_str_bytesize, MRB_ARGS_NONE()); + mrb_define_method(mrb, s, "bytesize", mrb_str_size, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "<=>", mrb_str_cmp_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.1 */ mrb_define_method(mrb, s, "==", mrb_str_equal_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.2 */ @@ -2494,7 +2479,7 @@ mrb_init_string(mrb_state *mrb) mrb_define_method(mrb, s, "*", mrb_str_times, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */ mrb_define_method(mrb, s, "[]", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.6 */ mrb_define_method(mrb, s, "capitalize", mrb_str_capitalize, MRB_ARGS_NONE()); /* 15.2.10.5.7 */ - mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, MRB_ARGS_REQ(1)); /* 15.2.10.5.8 */ + mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, MRB_ARGS_NONE()); /* 15.2.10.5.8 */ mrb_define_method(mrb, s, "chomp", mrb_str_chomp, MRB_ARGS_ANY()); /* 15.2.10.5.9 */ mrb_define_method(mrb, s, "chomp!", mrb_str_chomp_bang, MRB_ARGS_ANY()); /* 15.2.10.5.10 */ mrb_define_method(mrb, s, "chop", mrb_str_chop, MRB_ARGS_REQ(1)); /* 15.2.10.5.11 */ @@ -2504,7 +2489,7 @@ mrb_init_string(mrb_state *mrb) mrb_define_method(mrb, s, "empty?", mrb_str_empty_p, MRB_ARGS_NONE()); /* 15.2.10.5.16 */ mrb_define_method(mrb, s, "eql?", mrb_str_eql, MRB_ARGS_REQ(1)); /* 15.2.10.5.17 */ - mrb_define_method(mrb, s, "hash", mrb_str_hash_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.20 */ + mrb_define_method(mrb, s, "hash", mrb_str_hash_m, MRB_ARGS_NONE()); /* 15.2.10.5.20 */ mrb_define_method(mrb, s, "include?", mrb_str_include, MRB_ARGS_REQ(1)); /* 15.2.10.5.21 */ mrb_define_method(mrb, s, "index", mrb_str_index_m, MRB_ARGS_ANY()); /* 15.2.10.5.22 */ mrb_define_method(mrb, s, "initialize", mrb_str_init, MRB_ARGS_REQ(1)); /* 15.2.10.5.23 */ @@ -2524,8 +2509,8 @@ mrb_init_string(mrb_state *mrb) mrb_define_method(mrb, s, "to_s", mrb_str_to_s, MRB_ARGS_NONE()); /* 15.2.10.5.40 */ mrb_define_method(mrb, s, "to_str", mrb_str_to_s, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "to_sym", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.41 */ - mrb_define_method(mrb, s, "upcase", mrb_str_upcase, MRB_ARGS_REQ(1)); /* 15.2.10.5.42 */ - mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, MRB_ARGS_REQ(1)); /* 15.2.10.5.43 */ + mrb_define_method(mrb, s, "upcase", mrb_str_upcase, MRB_ARGS_NONE()); /* 15.2.10.5.42 */ + mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.43 */ mrb_define_method(mrb, s, "inspect", mrb_str_inspect, MRB_ARGS_NONE()); /* 15.2.10.5.46(x) */ mrb_define_method(mrb, s, "bytes", mrb_str_bytes, MRB_ARGS_NONE()); } |
