diff options
Diffstat (limited to 'src/string.c')
| -rw-r--r-- | src/string.c | 209 |
1 files changed, 132 insertions, 77 deletions
diff --git a/src/string.c b/src/string.c index 5a0a6a233..44e3c9069 100644 --- a/src/string.c +++ b/src/string.c @@ -10,6 +10,7 @@ #ifndef MRB_WITHOUT_FLOAT #include <float.h> +#include <math.h> #endif #include <limits.h> #include <stddef.h> @@ -300,8 +301,8 @@ static const char utf8len_codepage[256] = 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1, }; -static mrb_int -utf8len(const char* p, const char* e) +mrb_int +mrb_utf8len(const char* p, const char* e) { mrb_int len; mrb_int i; @@ -317,14 +318,14 @@ utf8len(const char* p, const char* e) } mrb_int -mrb_utf8_len(const char *str, mrb_int byte_len) +mrb_utf8_strlen(const char *str, mrb_int byte_len) { mrb_int total = 0; const char *p = str; const char *e = p + byte_len; while (p < e) { - p += utf8len(p, e); + p += mrb_utf8len(p, e); total++; } return total; @@ -340,7 +341,7 @@ utf8_strlen(mrb_value str) return byte_len; } else { - mrb_int utf8_len = mrb_utf8_len(RSTR_PTR(s), byte_len); + mrb_int utf8_len = mrb_utf8_strlen(RSTR_PTR(s), byte_len); if (byte_len == utf8_len) RSTR_SET_ASCII_FLAG(s); return utf8_len; } @@ -361,7 +362,7 @@ chars2bytes(mrb_value s, mrb_int off, mrb_int idx) const char *e = RSTRING_END(s); for (b=i=0; p<e && i<idx; i++) { - n = utf8len(p, e); + n = mrb_utf8len(p, e); b += n; p += n; } @@ -378,7 +379,7 @@ bytes2chars(char *p, mrb_int len, mrb_int bi) mrb_int i; for (i = 0; p < pivot; i ++) { - p += utf8len(p, e); + p += mrb_utf8len(p, e); } if (p != pivot) return -1; return i; @@ -399,7 +400,7 @@ char_adjust(const char *beg, const char *end, const char *ptr) while (p > beg) { p --; if ((*p & 0xc0) != 0x80) { - int clen = utf8len(p, end); + int clen = mrb_utf8len(p, end); if (clen > ptr - p) return p; break; } @@ -462,10 +463,10 @@ str_index_str_by_char_search(mrb_state *mrb, const char *p, const char *pend, co } pivot = p + qstable[(unsigned char)p[slen - 1]]; - if (pivot > pend || pivot < p /* overflowed */) { return -1; } + if (pivot >= pend || pivot < p /* overflowed */) { return -1; } do { - p += utf8len(p, pend); + p += mrb_utf8len(p, pend); off ++; } while (p < pivot); } @@ -484,7 +485,7 @@ str_index_str_by_char(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos) for (; pos > 0; pos --) { if (pend - p < 1) { return -1; } - p += utf8len(p, pend); + p += mrb_utf8len(p, pend); } if (slen < 1) { return off; } @@ -503,25 +504,45 @@ str_index_str_by_char(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos) #define str_index_str_by_char(mrb, str, sub, pos) str_index_str(mrb, str, sub, pos) #endif +#ifndef MRB_QS_SHORT_STRING_LENGTH +#define MRB_QS_SHORT_STRING_LENGTH 2048 +#endif + static inline mrb_int mrb_memsearch_qs(const unsigned char *xs, mrb_int m, const unsigned char *ys, mrb_int n) { - const unsigned char *x = xs, *xe = xs + m; - const unsigned char *y = ys; - int i; - ptrdiff_t qstable[256]; + if (n + m < MRB_QS_SHORT_STRING_LENGTH) { + const unsigned char *y = ys; + const unsigned char *ye = ys+n-m+1; - /* Preprocessing */ - for (i = 0; i < 256; ++i) - qstable[i] = m + 1; - for (; x < xe; ++x) - qstable[*x] = xe - x; - /* Searching */ - for (; y + m <= ys + n; y += *(qstable + y[m])) { - if (*xs == *y && memcmp(xs, y, m) == 0) - return (mrb_int)(y - ys); + for (;;) { + y = (const unsigned char*)memchr(y, xs[0], (size_t)(ye-y)); + if (y == NULL) return -1; + if (memcmp(xs, y, m) == 0) { + return (mrb_int)(y - ys); + } + y++; + } + return -1; + } + else { + const unsigned char *x = xs, *xe = xs + m; + const unsigned char *y = ys; + int i; + ptrdiff_t qstable[256]; + + /* Preprocessing */ + for (i = 0; i < 256; ++i) + qstable[i] = m + 1; + for (; x < xe; ++x) + qstable[*x] = xe - x; + /* Searching */ + for (; y + m <= ys + n; y += *(qstable + y[m])) { + if (*xs == *y && memcmp(xs, y, m) == 0) + return (mrb_int)(y - ys); + } + return -1; } - return -1; } static mrb_int @@ -553,7 +574,7 @@ str_share(mrb_state *mrb, struct RString *orig, struct RString *s) size_t len = (size_t)orig->as.heap.len; mrb_assert(!RSTR_EMBED_P(orig)); - if (RSTR_NOFREE_P(orig) || RSTR_POOL_P(orig)) { + if (RSTR_NOFREE_P(orig)) { str_init_nofree(s, orig->as.heap.ptr, len); } else if (RSTR_SHARED_P(orig)) { @@ -562,7 +583,7 @@ str_share(mrb_state *mrb, struct RString *orig, struct RString *s) else if (RSTR_FSHARED_P(orig)) { str_init_fshared(orig, s, orig->as.heap.aux.fshared); } - else if (mrb_frozen_p(orig)) { + else if (mrb_frozen_p(orig) && !RSTR_POOL_P(orig)) { str_init_fshared(orig, s, orig); } else { @@ -576,12 +597,9 @@ str_share(mrb_state *mrb, struct RString *orig, struct RString *s) } mrb_value -mrb_str_pool(mrb_state *mrb, mrb_value str) +mrb_str_pool(mrb_state *mrb, const char *p, mrb_int len, mrb_bool nofree) { struct RString *s = (struct RString *)mrb_malloc(mrb, sizeof(struct RString)); - struct RString *orig = mrb_str_ptr(str); - const char *p = RSTR_PTR(orig); - size_t len = (size_t)RSTR_LEN(orig); s->tt = MRB_TT_STRING; s->c = mrb->string_class; @@ -590,7 +608,7 @@ mrb_str_pool(mrb_state *mrb, mrb_value str) if (RSTR_EMBEDDABLE_P(len)) { str_init_embed(s, p, len); } - else if (RSTR_NOFREE_P(orig)) { + else if (nofree) { str_init_nofree(s, p, len); } else { @@ -1095,7 +1113,6 @@ mrb_str_equal_m(mrb_state *mrb, mrb_value str1) return mrb_bool_value(mrb_str_equal(mrb, str1, str2)); } /* ---------------------------------- */ -mrb_value mrb_mod_to_s(mrb_state *mrb, mrb_value klass); MRB_API mrb_value mrb_str_to_str(mrb_state *mrb, mrb_value str) @@ -1345,7 +1362,7 @@ str_escape(mrb_state *mrb, mrb_value str, mrb_bool inspect) unsigned char c, cc; #ifdef MRB_UTF8_STRING if (inspect) { - mrb_int clen = utf8len(p, pend); + mrb_int clen = mrb_utf8len(p, pend); if (clen > 1) { mrb_int i; @@ -1648,7 +1665,7 @@ mrb_str_chop_bang(mrb_state *mrb, mrb_value str) const char* t = RSTR_PTR(s), *p = t; const char* e = p + RSTR_LEN(s); while (p<e) { - mrb_int clen = utf8len(p, e); + mrb_int clen = mrb_utf8len(p, e); if (p + clen>=e) break; p += clen; } @@ -2020,7 +2037,7 @@ mrb_str_reverse_bang(mrb_state *mrb, mrb_value str) p = RSTR_PTR(s); e = p + RSTR_LEN(s); while (p<e) { - mrb_int clen = utf8len(p, e); + mrb_int clen = mrb_utf8len(p, e); str_reverse(p, p + clen - 1); p += clen; } @@ -2243,7 +2260,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) return result; } -static mrb_value +mrb_value mrb_str_len_to_inum(mrb_state *mrb, const char *str, mrb_int len, mrb_int base, int badcheck) { const char *p = str; @@ -2492,20 +2509,78 @@ mrb_str_to_i(mrb_state *mrb, mrb_value self) } #ifndef MRB_WITHOUT_FLOAT -MRB_API double -mrb_cstr_to_dbl(mrb_state *mrb, const char * s, mrb_bool badcheck) +double +mrb_str_len_to_dbl(mrb_state *mrb, const char *s, size_t len, mrb_bool badcheck) { - const char *p = s; + char buf[DBL_DIG * 4 + 20]; + const char *p = s, *p2; + const char *pend = p + len; char *end; - char buf[DBL_DIG * 4 + 10]; + char *n; + char prev = 0; double d; + mrb_bool dot = FALSE; if (!p) return 0.0; - while (ISSPACE(*p)) p++; - - if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { - return 0.0; + while (p<pend && ISSPACE(*p)) p++; + p2 = p; + + if (pend - p > 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { + mrb_value x; + + if (!badcheck) return 0.0; + x = mrb_str_len_to_inum(mrb, p, pend-p, 0, badcheck); + if (mrb_fixnum_p(x)) + d = (double)mrb_fixnum(x); + else /* if (mrb_float_p(x)) */ + d = mrb_float(x); + return d; } + while (p < pend) { + if (!*p) { + if (badcheck) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "string for Float contains null byte"); + /* not reached */ + } + pend = p; + p = p2; + goto nocopy; + } + if (!badcheck && *p == ' ') { + pend = p; + p = p2; + goto nocopy; + } + if (*p == '_') break; + p++; + } + p = p2; + n = buf; + while (p < pend) { + char c = *p++; + if (c == '.') dot = TRUE; + if (c == '_') { + /* remove an underscore between digits */ + if (n == buf || !ISDIGIT(prev) || p == pend) { + if (badcheck) goto bad; + break; + } + } + else if (badcheck && prev == '_' && !ISDIGIT(c)) goto bad; + else { + const char *bend = buf+sizeof(buf)-1; + if (n==bend) { /* buffer overflow */ + if (dot) break; /* cut off remaining fractions */ + return INFINITY; + } + *n++ = c; + } + prev = c; + } + *n = '\0'; + p = buf; + pend = n; +nocopy: d = mrb_float_read(p, &end); if (p == end) { if (badcheck) { @@ -2515,44 +2590,24 @@ bad: } return d; } - if (*end) { - char *n = buf; - char *e = buf + sizeof(buf) - 1; - char prev = 0; - - while (p < end && n < e) prev = *n++ = *p++; - while (*p) { - if (*p == '_') { - /* remove an underscore between digits */ - if (n == buf || !ISDIGIT(prev) || (++p, !ISDIGIT(*p))) { - if (badcheck) goto bad; - break; - } - } - prev = *p++; - if (n < e) *n++ = prev; - } - *n = '\0'; - p = buf; - - if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { - return 0.0; - } - - d = mrb_float_read(p, &end); - if (badcheck) { - if (!end || p == end) goto bad; - while (*end && ISSPACE(*end)) end++; - if (*end) goto bad; - } + if (badcheck) { + if (!end || p == end) goto bad; + while (end<pend && ISSPACE(*end)) end++; + if (end<pend) goto bad; } return d; } MRB_API double +mrb_cstr_to_dbl(mrb_state *mrb, const char *s, mrb_bool badcheck) +{ + return mrb_str_len_to_dbl(mrb, s, strlen(s), badcheck); +} + +MRB_API double mrb_str_to_dbl(mrb_state *mrb, mrb_value str, mrb_bool badcheck) { - return mrb_cstr_to_dbl(mrb, RSTRING_CSTR(mrb, str), badcheck); + return mrb_str_len_to_dbl(mrb, RSTRING_PTR(str), RSTRING_LEN(str), badcheck); } /* 15.2.10.5.39 */ @@ -2704,7 +2759,7 @@ mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len) MRB_API mrb_value mrb_str_cat_cstr(mrb_state *mrb, mrb_value str, const char *ptr) { - return mrb_str_cat(mrb, str, ptr, strlen(ptr)); + return mrb_str_cat(mrb, str, ptr, ptr ? strlen(ptr) : 0); } MRB_API mrb_value |
