summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--mrbgems/mruby-string-ext/test/string.rb1
-rw-r--r--src/string.c261
-rw-r--r--test/t/string.rb12
3 files changed, 90 insertions, 184 deletions
diff --git a/mrbgems/mruby-string-ext/test/string.rb b/mrbgems/mruby-string-ext/test/string.rb
index 6914fe31d..3f11c00a0 100644
--- a/mrbgems/mruby-string-ext/test/string.rb
+++ b/mrbgems/mruby-string-ext/test/string.rb
@@ -13,6 +13,7 @@ end
assert('String#dump') do
assert_equal("\"\\x00\"", "\0".dump)
assert_equal("\"foo\"", "foo".dump)
+ assert_equal('"\xe3\x82\x8b"', "る".dump)
assert_nothing_raised { ("\1" * 100).dump } # regress #1210
end
diff --git a/src/string.c b/src/string.c
index 1428ea780..a45dee11e 100644
--- a/src/string.c
+++ b/src/string.c
@@ -1318,6 +1318,84 @@ str_replace_partial(mrb_state *mrb, mrb_value src, mrb_int pos, mrb_int end, mrb
return src;
}
+#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
+#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
+
+static mrb_value
+str_escape(mrb_state *mrb, mrb_value str, mrb_bool inspect)
+{
+ const char *p, *pend;
+ char buf[CHAR_ESC_LEN + 1];
+ mrb_value result = mrb_str_new_lit(mrb, "\"");
+#ifdef MRB_UTF8_STRING
+ uint32_t ascii_flag = MRB_STR_ASCII;
+#endif
+
+ p = RSTRING_PTR(str); pend = RSTRING_END(str);
+ for (;p < pend; p++) {
+ unsigned char c, cc;
+#ifdef MRB_UTF8_STRING
+ if (inspect) {
+ mrb_int clen = utf8len(p, pend);
+ if (clen > 1) {
+ mrb_int i;
+
+ for (i=0; i<clen; i++) {
+ buf[i] = p[i];
+ }
+ mrb_str_cat(mrb, result, buf, clen);
+ p += clen-1;
+ ascii_flag = 0;
+ continue;
+ }
+ }
+#endif
+ c = *p;
+ if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p+1, pend))) {
+ buf[0] = '\\'; buf[1] = c;
+ mrb_str_cat(mrb, result, buf, 2);
+ continue;
+ }
+ if (ISPRINT(c)) {
+ buf[0] = c;
+ mrb_str_cat(mrb, result, buf, 1);
+ continue;
+ }
+ switch (c) {
+ case '\n': cc = 'n'; break;
+ case '\r': cc = 'r'; break;
+ case '\t': cc = 't'; break;
+ case '\f': cc = 'f'; break;
+ case '\013': cc = 'v'; break;
+ case '\010': cc = 'b'; break;
+ case '\007': cc = 'a'; break;
+ case 033: cc = 'e'; break;
+ default: cc = 0; break;
+ }
+ if (cc) {
+ buf[0] = '\\';
+ buf[1] = (char)cc;
+ mrb_str_cat(mrb, result, buf, 2);
+ continue;
+ }
+ else {
+ buf[0] = '\\';
+ buf[1] = 'x';
+ buf[3] = mrb_digitmap[c % 16]; c /= 16;
+ buf[2] = mrb_digitmap[c % 16];
+ mrb_str_cat(mrb, result, buf, 4);
+ continue;
+ }
+ }
+ mrb_str_cat_lit(mrb, result, "\"");
+#ifdef MRB_UTF8_STRING
+ mrb_str_ptr(str)->flags |= ascii_flag;
+ mrb_str_ptr(result)->flags |= ascii_flag;
+#endif
+
+ return result;
+}
+
static void
mrb_str_aset(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_value replace)
{
@@ -2574,8 +2652,6 @@ mrb_str_upcase(mrb_state *mrb, mrb_value self)
return str;
}
-#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
-
/*
* call-seq:
* str.dump -> new_str
@@ -2586,113 +2662,7 @@ mrb_str_upcase(mrb_state *mrb, mrb_value self)
mrb_value
mrb_str_dump(mrb_state *mrb, mrb_value str)
{
- mrb_int len;
- const char *p, *pend;
- char *q;
- struct RString *result;
-
- len = 2; /* "" */
- p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
- while (p < pend) {
- unsigned char c = *p++;
- switch (c) {
- case '"': case '\\':
- case '\n': case '\r':
- case '\t': case '\f':
- case '\013': case '\010': case '\007': case '\033':
- len += 2;
- break;
-
- case '#':
- len += IS_EVSTR(p, pend) ? 2 : 1;
- break;
-
- default:
- if (ISPRINT(c)) {
- len++;
- }
- else {
- len += 4; /* \NNN */
- }
- break;
- }
- }
-
- result = str_new(mrb, 0, len);
- str_with_class(result, str);
- p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
- q = RSTR_PTR(result);
- *q++ = '"';
- while (p < pend) {
- unsigned char c = *p++;
-
- switch (c) {
- case '"':
- case '\\':
- *q++ = '\\';
- *q++ = c;
- break;
-
- case '\n':
- *q++ = '\\';
- *q++ = 'n';
- break;
-
- case '\r':
- *q++ = '\\';
- *q++ = 'r';
- break;
-
- case '\t':
- *q++ = '\\';
- *q++ = 't';
- break;
-
- case '\f':
- *q++ = '\\';
- *q++ = 'f';
- break;
-
- case '\013':
- *q++ = '\\';
- *q++ = 'v';
- break;
-
- case '\010':
- *q++ = '\\';
- *q++ = 'b';
- break;
-
- case '\007':
- *q++ = '\\';
- *q++ = 'a';
- break;
-
- case '\033':
- *q++ = '\\';
- *q++ = 'e';
- break;
-
- case '#':
- if (IS_EVSTR(p, pend)) *q++ = '\\';
- *q++ = '#';
- break;
-
- default:
- if (ISPRINT(c)) {
- *q++ = c;
- }
- else {
- *q++ = '\\';
- *q++ = 'x';
- q[1] = mrb_digitmap[c % 16]; c /= 16;
- q[0] = mrb_digitmap[c % 16];
- q += 2;
- }
- }
- }
- *q = '"';
- return mrb_obj_value(result);
+ return str_escape(mrb, str, FALSE);
}
MRB_API mrb_value
@@ -2762,8 +2732,6 @@ mrb_str_append(mrb_state *mrb, mrb_value str1, mrb_value str2)
return mrb_str_cat_str(mrb, str1, str2);
}
-#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
-
/*
* call-seq:
* str.inspect -> string
@@ -2778,76 +2746,7 @@ mrb_str_append(mrb_state *mrb, mrb_value str1, mrb_value str2)
mrb_value
mrb_str_inspect(mrb_state *mrb, mrb_value str)
{
- const char *p, *pend;
- char buf[CHAR_ESC_LEN + 1];
- mrb_value result = mrb_str_new_lit(mrb, "\"");
-#ifdef MRB_UTF8_STRING
- uint32_t ascii_flag = MRB_STR_ASCII;
-#endif
-
- p = RSTRING_PTR(str); pend = RSTRING_END(str);
- for (;p < pend; p++) {
- unsigned char c, cc;
-#ifdef MRB_UTF8_STRING
- mrb_int clen;
-
- clen = utf8len(p, pend);
- if (clen > 1) {
- mrb_int i;
-
- for (i=0; i<clen; i++) {
- buf[i] = p[i];
- }
- mrb_str_cat(mrb, result, buf, clen);
- p += clen-1;
- ascii_flag = 0;
- continue;
- }
-#endif
- c = *p;
- if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p+1, pend))) {
- buf[0] = '\\'; buf[1] = c;
- mrb_str_cat(mrb, result, buf, 2);
- continue;
- }
- if (ISPRINT(c)) {
- buf[0] = c;
- mrb_str_cat(mrb, result, buf, 1);
- continue;
- }
- switch (c) {
- case '\n': cc = 'n'; break;
- case '\r': cc = 'r'; break;
- case '\t': cc = 't'; break;
- case '\f': cc = 'f'; break;
- case '\013': cc = 'v'; break;
- case '\010': cc = 'b'; break;
- case '\007': cc = 'a'; break;
- case 033: cc = 'e'; break;
- default: cc = 0; break;
- }
- if (cc) {
- buf[0] = '\\';
- buf[1] = (char)cc;
- mrb_str_cat(mrb, result, buf, 2);
- continue;
- }
- else {
- buf[0] = '\\';
- buf[1] = 'x';
- buf[3] = mrb_digitmap[c % 16]; c /= 16;
- buf[2] = mrb_digitmap[c % 16];
- mrb_str_cat(mrb, result, buf, 4);
- continue;
- }
- }
- mrb_str_cat_lit(mrb, result, "\"");
-#ifdef MRB_UTF8_STRING
- mrb_str_ptr(str)->flags |= ascii_flag;
- mrb_str_ptr(result)->flags |= ascii_flag;
-#endif
-
- return result;
+ return str_escape(mrb, str, TRUE);
}
/*
diff --git a/test/t/string.rb b/test/t/string.rb
index e1ff48312..65ad13103 100644
--- a/test/t/string.rb
+++ b/test/t/string.rb
@@ -748,12 +748,18 @@ assert('String#upcase!', '15.2.10.5.43') do
end
assert('String#inspect', '15.2.10.5.46') do
+ assert_equal "\"\\x00\"", "\0".inspect
+ assert_equal "\"foo\"", "foo".inspect
+ if UTF8STRING
+ assert_equal '"る"', "る".inspect
+ else
+ assert_equal '"\xe3\x82\x8b"', "る".inspect
+ end
+
# should not raise an exception - regress #1210
assert_nothing_raised do
- ("\1" * 100).inspect
+ ("\1" * 100).inspect
end
-
- assert_equal "\"\\x00\"", "\0".inspect
end
# Not ISO specified