summaryrefslogtreecommitdiffhomepage
path: root/src/string.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/string.c')
-rw-r--r--src/string.c209
1 files changed, 132 insertions, 77 deletions
diff --git a/src/string.c b/src/string.c
index 5a0a6a233..44e3c9069 100644
--- a/src/string.c
+++ b/src/string.c
@@ -10,6 +10,7 @@
#ifndef MRB_WITHOUT_FLOAT
#include <float.h>
+#include <math.h>
#endif
#include <limits.h>
#include <stddef.h>
@@ -300,8 +301,8 @@ static const char utf8len_codepage[256] =
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1,
};
-static mrb_int
-utf8len(const char* p, const char* e)
+mrb_int
+mrb_utf8len(const char* p, const char* e)
{
mrb_int len;
mrb_int i;
@@ -317,14 +318,14 @@ utf8len(const char* p, const char* e)
}
mrb_int
-mrb_utf8_len(const char *str, mrb_int byte_len)
+mrb_utf8_strlen(const char *str, mrb_int byte_len)
{
mrb_int total = 0;
const char *p = str;
const char *e = p + byte_len;
while (p < e) {
- p += utf8len(p, e);
+ p += mrb_utf8len(p, e);
total++;
}
return total;
@@ -340,7 +341,7 @@ utf8_strlen(mrb_value str)
return byte_len;
}
else {
- mrb_int utf8_len = mrb_utf8_len(RSTR_PTR(s), byte_len);
+ mrb_int utf8_len = mrb_utf8_strlen(RSTR_PTR(s), byte_len);
if (byte_len == utf8_len) RSTR_SET_ASCII_FLAG(s);
return utf8_len;
}
@@ -361,7 +362,7 @@ chars2bytes(mrb_value s, mrb_int off, mrb_int idx)
const char *e = RSTRING_END(s);
for (b=i=0; p<e && i<idx; i++) {
- n = utf8len(p, e);
+ n = mrb_utf8len(p, e);
b += n;
p += n;
}
@@ -378,7 +379,7 @@ bytes2chars(char *p, mrb_int len, mrb_int bi)
mrb_int i;
for (i = 0; p < pivot; i ++) {
- p += utf8len(p, e);
+ p += mrb_utf8len(p, e);
}
if (p != pivot) return -1;
return i;
@@ -399,7 +400,7 @@ char_adjust(const char *beg, const char *end, const char *ptr)
while (p > beg) {
p --;
if ((*p & 0xc0) != 0x80) {
- int clen = utf8len(p, end);
+ int clen = mrb_utf8len(p, end);
if (clen > ptr - p) return p;
break;
}
@@ -462,10 +463,10 @@ str_index_str_by_char_search(mrb_state *mrb, const char *p, const char *pend, co
}
pivot = p + qstable[(unsigned char)p[slen - 1]];
- if (pivot > pend || pivot < p /* overflowed */) { return -1; }
+ if (pivot >= pend || pivot < p /* overflowed */) { return -1; }
do {
- p += utf8len(p, pend);
+ p += mrb_utf8len(p, pend);
off ++;
} while (p < pivot);
}
@@ -484,7 +485,7 @@ str_index_str_by_char(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
for (; pos > 0; pos --) {
if (pend - p < 1) { return -1; }
- p += utf8len(p, pend);
+ p += mrb_utf8len(p, pend);
}
if (slen < 1) { return off; }
@@ -503,25 +504,45 @@ str_index_str_by_char(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
#define str_index_str_by_char(mrb, str, sub, pos) str_index_str(mrb, str, sub, pos)
#endif
+#ifndef MRB_QS_SHORT_STRING_LENGTH
+#define MRB_QS_SHORT_STRING_LENGTH 2048
+#endif
+
static inline mrb_int
mrb_memsearch_qs(const unsigned char *xs, mrb_int m, const unsigned char *ys, mrb_int n)
{
- const unsigned char *x = xs, *xe = xs + m;
- const unsigned char *y = ys;
- int i;
- ptrdiff_t qstable[256];
+ if (n + m < MRB_QS_SHORT_STRING_LENGTH) {
+ const unsigned char *y = ys;
+ const unsigned char *ye = ys+n-m+1;
- /* Preprocessing */
- for (i = 0; i < 256; ++i)
- qstable[i] = m + 1;
- for (; x < xe; ++x)
- qstable[*x] = xe - x;
- /* Searching */
- for (; y + m <= ys + n; y += *(qstable + y[m])) {
- if (*xs == *y && memcmp(xs, y, m) == 0)
- return (mrb_int)(y - ys);
+ for (;;) {
+ y = (const unsigned char*)memchr(y, xs[0], (size_t)(ye-y));
+ if (y == NULL) return -1;
+ if (memcmp(xs, y, m) == 0) {
+ return (mrb_int)(y - ys);
+ }
+ y++;
+ }
+ return -1;
+ }
+ else {
+ const unsigned char *x = xs, *xe = xs + m;
+ const unsigned char *y = ys;
+ int i;
+ ptrdiff_t qstable[256];
+
+ /* Preprocessing */
+ for (i = 0; i < 256; ++i)
+ qstable[i] = m + 1;
+ for (; x < xe; ++x)
+ qstable[*x] = xe - x;
+ /* Searching */
+ for (; y + m <= ys + n; y += *(qstable + y[m])) {
+ if (*xs == *y && memcmp(xs, y, m) == 0)
+ return (mrb_int)(y - ys);
+ }
+ return -1;
}
- return -1;
}
static mrb_int
@@ -553,7 +574,7 @@ str_share(mrb_state *mrb, struct RString *orig, struct RString *s)
size_t len = (size_t)orig->as.heap.len;
mrb_assert(!RSTR_EMBED_P(orig));
- if (RSTR_NOFREE_P(orig) || RSTR_POOL_P(orig)) {
+ if (RSTR_NOFREE_P(orig)) {
str_init_nofree(s, orig->as.heap.ptr, len);
}
else if (RSTR_SHARED_P(orig)) {
@@ -562,7 +583,7 @@ str_share(mrb_state *mrb, struct RString *orig, struct RString *s)
else if (RSTR_FSHARED_P(orig)) {
str_init_fshared(orig, s, orig->as.heap.aux.fshared);
}
- else if (mrb_frozen_p(orig)) {
+ else if (mrb_frozen_p(orig) && !RSTR_POOL_P(orig)) {
str_init_fshared(orig, s, orig);
}
else {
@@ -576,12 +597,9 @@ str_share(mrb_state *mrb, struct RString *orig, struct RString *s)
}
mrb_value
-mrb_str_pool(mrb_state *mrb, mrb_value str)
+mrb_str_pool(mrb_state *mrb, const char *p, mrb_int len, mrb_bool nofree)
{
struct RString *s = (struct RString *)mrb_malloc(mrb, sizeof(struct RString));
- struct RString *orig = mrb_str_ptr(str);
- const char *p = RSTR_PTR(orig);
- size_t len = (size_t)RSTR_LEN(orig);
s->tt = MRB_TT_STRING;
s->c = mrb->string_class;
@@ -590,7 +608,7 @@ mrb_str_pool(mrb_state *mrb, mrb_value str)
if (RSTR_EMBEDDABLE_P(len)) {
str_init_embed(s, p, len);
}
- else if (RSTR_NOFREE_P(orig)) {
+ else if (nofree) {
str_init_nofree(s, p, len);
}
else {
@@ -1095,7 +1113,6 @@ mrb_str_equal_m(mrb_state *mrb, mrb_value str1)
return mrb_bool_value(mrb_str_equal(mrb, str1, str2));
}
/* ---------------------------------- */
-mrb_value mrb_mod_to_s(mrb_state *mrb, mrb_value klass);
MRB_API mrb_value
mrb_str_to_str(mrb_state *mrb, mrb_value str)
@@ -1345,7 +1362,7 @@ str_escape(mrb_state *mrb, mrb_value str, mrb_bool inspect)
unsigned char c, cc;
#ifdef MRB_UTF8_STRING
if (inspect) {
- mrb_int clen = utf8len(p, pend);
+ mrb_int clen = mrb_utf8len(p, pend);
if (clen > 1) {
mrb_int i;
@@ -1648,7 +1665,7 @@ mrb_str_chop_bang(mrb_state *mrb, mrb_value str)
const char* t = RSTR_PTR(s), *p = t;
const char* e = p + RSTR_LEN(s);
while (p<e) {
- mrb_int clen = utf8len(p, e);
+ mrb_int clen = mrb_utf8len(p, e);
if (p + clen>=e) break;
p += clen;
}
@@ -2020,7 +2037,7 @@ mrb_str_reverse_bang(mrb_state *mrb, mrb_value str)
p = RSTR_PTR(s);
e = p + RSTR_LEN(s);
while (p<e) {
- mrb_int clen = utf8len(p, e);
+ mrb_int clen = mrb_utf8len(p, e);
str_reverse(p, p + clen - 1);
p += clen;
}
@@ -2243,7 +2260,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
return result;
}
-static mrb_value
+mrb_value
mrb_str_len_to_inum(mrb_state *mrb, const char *str, mrb_int len, mrb_int base, int badcheck)
{
const char *p = str;
@@ -2492,20 +2509,78 @@ mrb_str_to_i(mrb_state *mrb, mrb_value self)
}
#ifndef MRB_WITHOUT_FLOAT
-MRB_API double
-mrb_cstr_to_dbl(mrb_state *mrb, const char * s, mrb_bool badcheck)
+double
+mrb_str_len_to_dbl(mrb_state *mrb, const char *s, size_t len, mrb_bool badcheck)
{
- const char *p = s;
+ char buf[DBL_DIG * 4 + 20];
+ const char *p = s, *p2;
+ const char *pend = p + len;
char *end;
- char buf[DBL_DIG * 4 + 10];
+ char *n;
+ char prev = 0;
double d;
+ mrb_bool dot = FALSE;
if (!p) return 0.0;
- while (ISSPACE(*p)) p++;
-
- if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
- return 0.0;
+ while (p<pend && ISSPACE(*p)) p++;
+ p2 = p;
+
+ if (pend - p > 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
+ mrb_value x;
+
+ if (!badcheck) return 0.0;
+ x = mrb_str_len_to_inum(mrb, p, pend-p, 0, badcheck);
+ if (mrb_fixnum_p(x))
+ d = (double)mrb_fixnum(x);
+ else /* if (mrb_float_p(x)) */
+ d = mrb_float(x);
+ return d;
}
+ while (p < pend) {
+ if (!*p) {
+ if (badcheck) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string for Float contains null byte");
+ /* not reached */
+ }
+ pend = p;
+ p = p2;
+ goto nocopy;
+ }
+ if (!badcheck && *p == ' ') {
+ pend = p;
+ p = p2;
+ goto nocopy;
+ }
+ if (*p == '_') break;
+ p++;
+ }
+ p = p2;
+ n = buf;
+ while (p < pend) {
+ char c = *p++;
+ if (c == '.') dot = TRUE;
+ if (c == '_') {
+ /* remove an underscore between digits */
+ if (n == buf || !ISDIGIT(prev) || p == pend) {
+ if (badcheck) goto bad;
+ break;
+ }
+ }
+ else if (badcheck && prev == '_' && !ISDIGIT(c)) goto bad;
+ else {
+ const char *bend = buf+sizeof(buf)-1;
+ if (n==bend) { /* buffer overflow */
+ if (dot) break; /* cut off remaining fractions */
+ return INFINITY;
+ }
+ *n++ = c;
+ }
+ prev = c;
+ }
+ *n = '\0';
+ p = buf;
+ pend = n;
+nocopy:
d = mrb_float_read(p, &end);
if (p == end) {
if (badcheck) {
@@ -2515,44 +2590,24 @@ bad:
}
return d;
}
- if (*end) {
- char *n = buf;
- char *e = buf + sizeof(buf) - 1;
- char prev = 0;
-
- while (p < end && n < e) prev = *n++ = *p++;
- while (*p) {
- if (*p == '_') {
- /* remove an underscore between digits */
- if (n == buf || !ISDIGIT(prev) || (++p, !ISDIGIT(*p))) {
- if (badcheck) goto bad;
- break;
- }
- }
- prev = *p++;
- if (n < e) *n++ = prev;
- }
- *n = '\0';
- p = buf;
-
- if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
- return 0.0;
- }
-
- d = mrb_float_read(p, &end);
- if (badcheck) {
- if (!end || p == end) goto bad;
- while (*end && ISSPACE(*end)) end++;
- if (*end) goto bad;
- }
+ if (badcheck) {
+ if (!end || p == end) goto bad;
+ while (end<pend && ISSPACE(*end)) end++;
+ if (end<pend) goto bad;
}
return d;
}
MRB_API double
+mrb_cstr_to_dbl(mrb_state *mrb, const char *s, mrb_bool badcheck)
+{
+ return mrb_str_len_to_dbl(mrb, s, strlen(s), badcheck);
+}
+
+MRB_API double
mrb_str_to_dbl(mrb_state *mrb, mrb_value str, mrb_bool badcheck)
{
- return mrb_cstr_to_dbl(mrb, RSTRING_CSTR(mrb, str), badcheck);
+ return mrb_str_len_to_dbl(mrb, RSTRING_PTR(str), RSTRING_LEN(str), badcheck);
}
/* 15.2.10.5.39 */
@@ -2704,7 +2759,7 @@ mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len)
MRB_API mrb_value
mrb_str_cat_cstr(mrb_state *mrb, mrb_value str, const char *ptr)
{
- return mrb_str_cat(mrb, str, ptr, strlen(ptr));
+ return mrb_str_cat(mrb, str, ptr, ptr ? strlen(ptr) : 0);
}
MRB_API mrb_value