diff options
Diffstat (limited to 'src/string.c')
| -rw-r--r-- | src/string.c | 242 |
1 files changed, 131 insertions, 111 deletions
diff --git a/src/string.c b/src/string.c index cfef5730c..09777ac69 100644 --- a/src/string.c +++ b/src/string.c @@ -6,6 +6,8 @@ #include "mruby.h" +#include <stddef.h> +#include <stdlib.h> #include <string.h> #include "mruby/string.h" #include "mruby/class.h" @@ -20,19 +22,13 @@ const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz"; static mrb_value str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2); -static mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, int beg, int len); +static mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len); #define RESIZE_CAPA(s,capacity) do {\ s->ptr = (char *)mrb_realloc(mrb, s->ptr, (capacity)+1);\ s->aux.capa = capacity;\ } while (0) -static const char* -_obj_classname(mrb_state *mrb, mrb_value obj) -{ - return mrb_class_name(mrb, mrb_obj_class(mrb, obj)); -} - void mrb_str_decref(mrb_state *mrb, mrb_shared_string *shared) { @@ -56,11 +52,11 @@ str_modify(mrb_state *mrb, struct RString *s) } else { char *ptr, *p; - long len; + mrb_int len; p = s->ptr; len = s->len; - ptr = (char *)mrb_malloc(mrb, len+1); + ptr = (char *)mrb_malloc(mrb, (size_t)len + 1); if (p) { memcpy(ptr, p, len); } @@ -104,21 +100,6 @@ str_mod_check(mrb_state *mrb, mrb_value str, char *p, mrb_int len) #define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class)) -static struct RString* -str_alloc(mrb_state *mrb, struct RClass *c) -{ - struct RString* s; - - s = mrb_obj_alloc_string(mrb); - - s->c = c; - s->ptr = 0; - s->len = 0; - s->aux.capa = 0; - - return s; -} - /* char offset to byte offset */ int mrb_str_offset(mrb_state *mrb, mrb_value str, int pos) @@ -129,8 +110,9 @@ mrb_str_offset(mrb_state *mrb, mrb_value str, int pos) static struct RString* str_new(mrb_state *mrb, const char *p, int len) { - struct RString *s = str_alloc(mrb, mrb->string_class); + struct RString *s; + s = mrb_obj_alloc_string(mrb); s->len = len; s->aux.capa = len; s->ptr = (char *)mrb_malloc(mrb, len+1); @@ -156,6 +138,10 @@ mrb_str_new_empty(mrb_state *mrb, mrb_value str) return mrb_obj_value(s); } +#ifndef MRB_STR_BUF_MIN_SIZE +# define MRB_STR_BUF_MIN_SIZE 128 +#endif + mrb_value mrb_str_buf_new(mrb_state *mrb, int capa) { @@ -163,8 +149,8 @@ mrb_str_buf_new(mrb_state *mrb, int capa) s = mrb_obj_alloc_string(mrb); - if (capa < STR_BUF_MIN_SIZE) { - capa = STR_BUF_MIN_SIZE; + if (capa < MRB_STR_BUF_MIN_SIZE) { + capa = MRB_STR_BUF_MIN_SIZE; } s->len = 0; s->aux.capa = capa; @@ -175,9 +161,11 @@ mrb_str_buf_new(mrb_state *mrb, int capa) } static void -str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, int len) +str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, size_t len) { - long capa, total, off = -1; + mrb_int capa; + mrb_int total; + ptrdiff_t off = -1; str_modify(mrb, s); if (ptr >= s->ptr && ptr <= s->ptr + s->len) { @@ -185,13 +173,13 @@ str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, int len) } if (len == 0) return; capa = s->aux.capa; - if (s->len >= INT_MAX - len) { + if (s->len >= MRB_INT_MAX - len) { mrb_raise(mrb, E_ARGUMENT_ERROR, "string sizes too big"); } total = s->len+len; if (capa <= total) { while (total > capa) { - if (capa + 1 >= INT_MAX / 2) { + if (capa + 1 >= MRB_INT_MAX / 2) { capa = (total + 4095) / 4096; break; } @@ -208,7 +196,7 @@ str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, int len) } mrb_value -mrb_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, int len) +mrb_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len) { if (len == 0) return str; str_buf_cat(mrb, mrb_str_ptr(str), ptr, len); @@ -216,7 +204,7 @@ mrb_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, int len) } mrb_value -mrb_str_new(mrb_state *mrb, const char *p, int len) +mrb_str_new(mrb_state *mrb, const char *p, size_t len) { struct RString *s; @@ -224,17 +212,6 @@ mrb_str_new(mrb_state *mrb, const char *p, int len) return mrb_obj_value(s); } -mrb_value -mrb_str_new2(mrb_state *mrb, const char *ptr) -{ - struct RString *s; - if (!ptr) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "NULL pointer given"); - } - s = str_new(mrb, ptr, strlen(ptr)); - return mrb_obj_value(s); -} - /* * call-seq: (Caution! NULL string) * String.new(str="") => new_str @@ -246,11 +223,23 @@ mrb_value mrb_str_new_cstr(mrb_state *mrb, const char *p) { struct RString *s; - int len = strlen(p); + size_t len; + + if (p) { + len = strlen(p); + if ((mrb_int)len < 0) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big"); + } + } + else { + len = 0; + } s = mrb_obj_alloc_string(mrb); s->ptr = (char *)mrb_malloc(mrb, len+1); - memcpy(s->ptr, p, len); + if (p) { + memcpy(s->ptr, p, len); + } s->ptr[len] = 0; s->len = len; s->aux.capa = len; @@ -263,6 +252,10 @@ mrb_str_to_cstr(mrb_state *mrb, mrb_value str0) { mrb_value str; + if (!mrb_string_p(str0)) { + mrb_raise(mrb, E_TYPE_ERROR, "expected String"); + } + str = mrb_str_new(mrb, RSTRING_PTR(str0), RSTRING_LEN(str0)); if (strlen(RSTRING_PTR(str)) != RSTRING_LEN(str)) { mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte"); @@ -302,7 +295,7 @@ mrb_str_literal(mrb_state *mrb, mrb_value str) struct RString *s, *orig; mrb_shared_string *shared; - s = str_alloc(mrb, mrb->string_class); + s = mrb_obj_alloc_string(mrb); orig = mrb_str_ptr(str); if (!(orig->flags & MRB_STR_SHARED)) { str_make_shared(mrb, mrb_str_ptr(str)); @@ -311,6 +304,7 @@ mrb_str_literal(mrb_state *mrb, mrb_value str) shared->refcnt++; s->ptr = shared->ptr; s->len = shared->len; + s->aux.capa = 0; s->aux.shared = shared; s->flags |= MRB_STR_SHARED; @@ -446,7 +440,7 @@ mrb_str_times(mrb_state *mrb, mrb_value self) if (times < 0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument"); } - if (times && INT_MAX/times < RSTRING_LEN(self)) { + if (times && MRB_INT_MAX / times < RSTRING_LEN(self)) { mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big"); } @@ -532,10 +526,10 @@ mrb_str_cmp_m(mrb_state *mrb, mrb_value str1) mrb_get_args(mrb, "o", &str2); if (!mrb_string_p(str2)) { - if (!mrb_respond_to(mrb, str2, mrb_intern(mrb, "to_s"))) { + if (!mrb_respond_to(mrb, str2, mrb_intern2(mrb, "to_s", 4))) { return mrb_nil_value(); } - else if (!mrb_respond_to(mrb, str2, mrb_intern(mrb, "<=>"))) { + else if (!mrb_respond_to(mrb, str2, mrb_intern2(mrb, "<=>", 3))) { return mrb_nil_value(); } else { @@ -557,8 +551,9 @@ mrb_str_cmp_m(mrb_state *mrb, mrb_value str1) static int str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2) { - const long len = RSTRING_LEN(str1); + const size_t len = RSTRING_LEN(str1); + /* assert(SIZE_MAX >= MRB_INT_MAX) */ if (len != RSTRING_LEN(str2)) return FALSE; if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), len) == 0) return TRUE; @@ -571,7 +566,7 @@ mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2) if (mrb_obj_equal(mrb, str1, str2)) return TRUE; if (!mrb_string_p(str2)) { if (mrb_nil_p(str2)) return FALSE; - if (!mrb_respond_to(mrb, str2, mrb_intern(mrb, "to_str"))) { + if (!mrb_respond_to(mrb, str2, mrb_intern2(mrb, "to_str", 6))) { return FALSE; } str2 = mrb_funcall(mrb, str2, "to_str", 0); @@ -595,11 +590,12 @@ static mrb_value mrb_str_equal_m(mrb_state *mrb, mrb_value str1) { mrb_value str2; + mrb_bool equal_p; mrb_get_args(mrb, "o", &str2); - if (mrb_str_equal(mrb, str1, str2)) - return mrb_true_value(); - return mrb_false_value(); + equal_p = mrb_str_equal(mrb, str1, str2); + + return mrb_bool_value(equal_p); } /* ---------------------------------- */ mrb_value @@ -657,8 +653,8 @@ mrb_str_match(mrb_state *mrb, mrb_value self/* x */) return mrb_nil_value(); } -static inline long -mrb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long n) +static inline mrb_int +mrb_memsearch_qs(const unsigned char *xs, mrb_int m, const unsigned char *ys, mrb_int n) { const unsigned char *x = xs, *xe = xs + m; const unsigned char *y = ys; @@ -677,8 +673,8 @@ mrb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long return -1; } -static int -mrb_memsearch(const void *x0, int m, const void *y0, int n) +static mrb_int +mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n) { const unsigned char *x = (const unsigned char *)x0, *y = (const unsigned char *)y0; @@ -705,7 +701,8 @@ mrb_str_index(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int offset) { mrb_int pos; char *s, *sptr; - int len, slen; + mrb_int len, slen; + len = RSTRING_LEN(str); slen = RSTRING_LEN(sub); if (offset < 0) { @@ -739,9 +736,9 @@ mrb_str_dup(mrb_state *mrb, mrb_value str) static mrb_value mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx) { - long idx; + mrb_int idx; - if (!strcmp(_obj_classname(mrb, indx), REGEXP_CLASS)) { + if (!strcmp(mrb_obj_classname(mrb, indx), REGEXP_CLASS)) { mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented"); } switch (mrb_type(indx)) { @@ -920,7 +917,8 @@ mrb_str_chomp_bang(mrb_state *mrb, mrb_value str) mrb_value rs; mrb_int newline; char *p, *pp; - long len, rslen; + mrb_int rslen; + mrb_int len; struct RString *s = mrb_str_ptr(str); str_modify(mrb, s); @@ -1131,9 +1129,7 @@ mrb_str_empty_p(mrb_state *mrb, mrb_value self) { struct RString *s = mrb_str_ptr(self); - if (s->len == 0) - return mrb_true_value(); - return mrb_false_value(); + return mrb_bool_value(s->len == 0); } /* 15.2.10.5.17 */ @@ -1147,17 +1143,16 @@ static mrb_value mrb_str_eql(mrb_state *mrb, mrb_value self) { mrb_value str2; + mrb_bool eql_p; mrb_get_args(mrb, "o", &str2); - if (mrb_type(str2) != MRB_TT_STRING) - return mrb_false_value(); - if (str_eql(mrb, self, str2)) - return mrb_true_value(); - return mrb_false_value(); + eql_p = (mrb_type(str2) == MRB_TT_STRING) && str_eql(mrb, self, str2); + + return mrb_bool_value(eql_p); } static mrb_value -mrb_str_subseq(mrb_state *mrb, mrb_value str, int beg, int len) +mrb_str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) { struct RString *orig, *s; mrb_shared_string *shared; @@ -1176,7 +1171,7 @@ mrb_str_subseq(mrb_state *mrb, mrb_value str, int beg, int len) } mrb_value -mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, int len) +mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) { mrb_value str2; @@ -1267,7 +1262,7 @@ mrb_str_hash(mrb_state *mrb, mrb_value str) { /* 1-8-7 */ struct RString *s = mrb_str_ptr(str); - long len = s->len; + mrb_int len = s->len; char *p = s->ptr; mrb_int key = 0; @@ -1311,18 +1306,20 @@ mrb_str_include(mrb_state *mrb, mrb_value self) { mrb_int i; mrb_value str2; + mrb_bool include_p; mrb_get_args(mrb, "o", &str2); if (mrb_type(str2) == MRB_TT_FIXNUM) { - if (memchr(RSTRING_PTR(self), mrb_fixnum(str2), RSTRING_LEN(self))) - return mrb_true_value(); - return mrb_false_value(); + include_p = memchr(RSTRING_PTR(self), mrb_fixnum(str2), RSTRING_LEN(self)); + } + else { + mrb_string_value(mrb, &str2); + i = mrb_str_index(mrb, self, str2, 0); + + include_p = (i != -1); } - mrb_string_value(mrb, &str2); - i = mrb_str_index(mrb, self, str2, 0); - if (i == -1) return mrb_false_value(); - return mrb_true_value(); + return mrb_bool_value(include_p); } /* 15.2.10.5.22 */ @@ -1381,7 +1378,7 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str) switch (mrb_type(sub)) { case MRB_TT_FIXNUM: { int c = mrb_fixnum(sub); - long len = RSTRING_LEN(str); + mrb_int len = RSTRING_LEN(str); unsigned char *p = (unsigned char*)RSTRING_PTR(str); for (;pos<len;pos++) { @@ -1396,7 +1393,7 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str) tmp = mrb_check_string_type(mrb, sub); if (mrb_nil_p(tmp)) { mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %s given", - _obj_classname(mrb, sub)); + mrb_obj_classname(mrb, sub)); } sub = tmp; } @@ -1404,10 +1401,10 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str) case MRB_TT_STRING: pos = mrb_str_index(mrb, str, sub, pos); break; - } + } - if (pos == -1) return mrb_nil_value(); - return mrb_fixnum_value(pos); + if (pos == -1) return mrb_nil_value(); + return mrb_fixnum_value(pos); } #define STR_REPLACE_SHARED_MIN 10 @@ -1532,6 +1529,36 @@ mrb_obj_as_string(mrb_state *mrb, mrb_value obj) } mrb_value +mrb_ptr_to_str(mrb_state *mrb, void *p) +{ + struct RString *p_str; + char *p1; + char *p2; + intptr_t n = (intptr_t)p; + + p_str = str_new(mrb, NULL, 2 + sizeof(uintptr_t) * CHAR_BIT / 4); + p1 = p_str->ptr; + *p1++ = '0'; + *p1++ = 'x'; + p2 = p1; + + do { + *p2++ = mrb_digitmap[n % 16]; + n /= 16; + } while (n > 0); + *p2 = '\0'; + p_str->len = (mrb_int)(p2 - p_str->ptr); + + while (p1 < p2) { + const char c = *p1; + *p1++ = *--p2; + *p2 = c; + } + + return mrb_obj_value(p_str); +} + +mrb_value mrb_check_string_type(mrb_state *mrb, mrb_value str) { return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); @@ -1637,7 +1664,7 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos) char *s, *sbeg, *t; struct RString *ps = mrb_str_ptr(str); struct RString *psub = mrb_str_ptr(sub); - long len = psub->len; + mrb_int len = psub->len; /* substring longer than string */ if (ps->len < len) return -1; @@ -1661,15 +1688,6 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos) } } -#ifdef INCLUDE_ENCODING -/* byte offset to char offset */ -int -mrb_str_sublen(mrb_state *mrb, mrb_value str, long pos) -{ - return pos; -} -#endif //INCLUDE_ENCODING - /* 15.2.10.5.31 */ /* * call-seq: @@ -1728,7 +1746,7 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str) switch (mrb_type(sub)) { case MRB_TT_FIXNUM: { int c = mrb_fixnum(sub); - long len = RSTRING_LEN(str); + mrb_int len = RSTRING_LEN(str); unsigned char *p = (unsigned char*)RSTRING_PTR(str); for (pos=len;pos>=0;pos--) { @@ -1865,7 +1883,9 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) int argc; mrb_value spat = mrb_nil_value(); enum {awk, string, regexp} split_type = string; - long beg, end, i = 0, lim_p; + long i = 0, lim_p; + mrb_int beg; + mrb_int end; mrb_int lim = 0; mrb_value result, tmp; @@ -1934,7 +1954,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) char *ptr = RSTRING_PTR(str); char *temp = ptr; char *eptr = RSTRING_END(str); - long slen = RSTRING_LEN(spat); + mrb_int slen = RSTRING_LEN(spat); if (slen == 0) { int ai = mrb_gc_arena_save(mrb); @@ -1972,7 +1992,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) mrb_ary_push(mrb, result, tmp); } if (!lim_p && lim == 0) { - long len; + mrb_int len; while ((len = RARRAY_LEN(result)) > 0 && (tmp = RARRAY_PTR(result)[len-1], RSTRING_LEN(tmp) == 0)) mrb_ary_pop(mrb, result); @@ -2054,7 +2074,8 @@ mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck) char *end; char sign = 1; int c; - unsigned long val; + unsigned long n; + mrb_int val; #undef ISDIGIT #define ISDIGIT(c) ('0' <= (c) && (c) <= '9') @@ -2158,19 +2179,18 @@ mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck) return mrb_fixnum_value(0); } - val = strtoul((char*)str, &end, base); - + n = strtoul((char*)str, &end, base); + if (n > MRB_INT_MAX) { + mrb_raisef(mrb, E_ARGUMENT_ERROR, "string (%s) too big for integer", str); + } + val = n; if (badcheck) { if (end == str) goto bad; /* no number */ while (*end && ISSPACE(*end)) end++; if (*end) goto bad; /* trailing garbage */ } - if (sign) return mrb_fixnum_value(val); - else { - long result = -(long)val; - return mrb_fixnum_value(result); - } + return mrb_fixnum_value(sign ? val : -val); bad: mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for number(%s)", str); /* not reached */ @@ -2440,7 +2460,7 @@ mrb_str_upcase(mrb_state *mrb, mrb_value self) mrb_value mrb_str_dump(mrb_state *mrb, mrb_value str) { - long len; + mrb_int len; const char *p, *pend; char *q; struct RString *result; @@ -2542,7 +2562,7 @@ mrb_str_dump(mrb_state *mrb, mrb_value str) } mrb_value -mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len) +mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, mrb_int len) { if (len < 0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size (or size too big)"); @@ -2552,7 +2572,7 @@ mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len) } mrb_value -mrb_str_cat2(mrb_state *mrb, mrb_value str, const char *ptr) +mrb_str_cat_cstr(mrb_state *mrb, mrb_value str, const char *ptr) { return mrb_str_cat(mrb, str, ptr, strlen(ptr)); } |
