summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorYukihiro Matsumoto <[email protected]>2012-05-31 15:32:38 +0900
committerYukihiro Matsumoto <[email protected]>2012-05-31 15:32:38 +0900
commit64fc4ac332eab0be7704cf6f7ec5a96c523c0ed9 (patch)
tree00bca09773b7584fd2b56c371fe6159550c38b6f
parent0d8adaaaa16859342a37e3bf6832a8717c54f27c (diff)
downloadmruby-64fc4ac332eab0be7704cf6f7ec5a96c523c0ed9.tar.gz
mruby-64fc4ac332eab0be7704cf6f7ec5a96c523c0ed9.zip
resolve conflict
-rw-r--r--include/mruby/string.h23
-rw-r--r--src/class.c8
-rw-r--r--src/encoding.c1685
-rw-r--r--src/encoding.h9
-rw-r--r--src/gc.c4
-rw-r--r--src/init.c2
-rw-r--r--src/object.c13
-rw-r--r--src/re.c792
-rw-r--r--src/sprintf.c33
-rw-r--r--src/string.c2379
-rw-r--r--src/symbol.c148
11 files changed, 331 insertions, 4765 deletions
diff --git a/include/mruby/string.h b/include/mruby/string.h
index 08dd306e8..fca28cf72 100644
--- a/include/mruby/string.h
+++ b/include/mruby/string.h
@@ -30,34 +30,15 @@ extern const char mrb_digitmap[];
struct RString {
MRUBY_OBJECT_HEADER;
int len;
- union {
- int capa;
- struct RString *shared;
- } aux;
+ int capa;
char *buf;
};
-extern struct SCOPE {
- struct RBasic super;
- mrb_sym *local_tbl;
- mrb_value *local_vars;
- int flags;
-} *ruby_scope;
-
-struct RVarmap {
- struct RBasic super;
- mrb_sym id;
- mrb_value val;
- struct RVarmap *next;
-};
-extern struct RVarmap *ruby_dyna_vars;
-
#define mrb_str_ptr(s) ((struct RString*)((s).value.p))
#define RSTRING(s) ((struct RString*)((s).value.p))
#define RSTRING_PTR(s) (RSTRING(s)->buf)
#define RSTRING_LEN(s) (RSTRING(s)->len)
-#define RSTRING_CAPA(s) (RSTRING(s)->aux.capa)
-#define RSTRING_SHARED(s) (RSTRING(s)->aux.shared)
+#define RSTRING_CAPA(s) (RSTRING(s)->capa)
#define RSTRING_END(s) (RSTRING(s)->buf + RSTRING(s)->len)
#define MRB_STR_SHARED 256
diff --git a/src/class.c b/src/class.c
index f96922f4b..f9a6154c5 100644
--- a/src/class.c
+++ b/src/class.c
@@ -17,12 +17,6 @@
#include "mruby/khash.h"
-#ifdef INCLUDE_REGEXP
- #define mrb_usascii_str_new2 mrb_usascii_str_new_cstr
-#else
- #define mrb_usascii_str_new2 mrb_str_new_cstr
-#endif
-
KHASH_MAP_INIT_INT(mt, struct RProc*);
KHASH_MAP_INIT_INT(iv, mrb_value);
@@ -1052,7 +1046,7 @@ mrb_mod_to_s(mrb_state *mrb, mrb_value klass)
{
//if (FL_TEST(klass, FL_SINGLETON)) {
if (mrb_type(klass) == MRB_TT_SCLASS) {
- mrb_value s = mrb_usascii_str_new2(mrb, "#<");
+ mrb_value s = mrb_str_new_cstr(mrb, "#<");
mrb_value v = mrb_iv_get(mrb, klass, mrb_intern(mrb, "__attached__"));
mrb_str_cat2(mrb, s, "Class:");
diff --git a/src/encoding.c b/src/encoding.c
deleted file mode 100644
index 8e4257829..000000000
--- a/src/encoding.c
+++ /dev/null
@@ -1,1685 +0,0 @@
-/*
-** encoding.c - Encoding class
-**
-** See Copyright Notice in mruby.h
-*/
-
-#include "mruby.h"
-#ifdef INCLUDE_ENCODING
-#include <ctype.h>
-#ifndef NO_LOCALE_CHARMAP
-#ifdef __CYGWIN__
-#include <windows.h>
-#endif
-#ifdef HAVE_LANGINFO_H
-#include <langinfo.h>
-#endif
-#endif
-
-#define USE_UPPER_CASE_TABLE
-
-#include <ctype.h>
-#include <stdio.h>
-#include "regenc.h"
-#include "regint.h"
-#include "encoding.h"
-#include "st.h"
-#include <string.h>
-#include "mruby/numeric.h"
-#include "mruby/string.h"
-#include "mruby/array.h"
-#include "mruby/variable.h"
-#include "mruby/hash.h"
-
-#define pprintf printf
-#define mrb_warning printf
-#define mrb_bug printf
-#ifndef INT_MAX
-#define INT_MAX 2147483647
-#endif
-#define mrb_isascii(c) ((unsigned long)(c) < 128)
-#define OBJ_FREEZE(a)
-static mrb_sym id_encoding;
-//mrb_value mrb_cEncoding;
-static mrb_value mrb_encoding_list;
-
-struct mrb_encoding_entry {
- const char *name;
- mrb_encoding *enc;
- mrb_encoding *base;
-};
-
-static struct {
- struct mrb_encoding_entry *list;
- int count;
- int size;
- st_table *names;
-} enc_table;
-
-void mrb_enc_init(mrb_state *mrb);
-
-enum {
- ENCINDEX_ASCII,
- ENCINDEX_UTF_8,
- ENCINDEX_US_ASCII,
- ENCINDEX_BUILTIN_MAX
-};
-#define ENCODING_COUNT ENCINDEX_BUILTIN_MAX
-#define ENCODING_NAMELEN_MAX 63
-#define valid_encoding_name_p(name) ((name) && strlen(name) <= ENCODING_NAMELEN_MAX)
-#define STRCASECMP(s1, s2) (st_strcasecmp(s1, s2))
-
-//#define BUILTIN_TYPE(x) (int)(((struct RBasic*)(x))->flags & T_MASK)
-#ifndef FALSE
-#define FALSE 0
-#endif
-
-#ifndef TRUE
-#define TRUE 1
-#endif
-
-#ifndef OTHER
-#define OTHER 2
-#endif
-
-#define mrb_usascii_str_new2 mrb_usascii_str_new_cstr
-
-static const struct mrb_data_type encoding_data_type = {
- "encoding", 0,
-};
-#define is_data_encoding(obj) (DATA_TYPE(obj) == &encoding_data_type)
-
-// RUBY_IMMEDIATE_MASK = 0x03,
-//#define IMMEDIATE_MASK RUBY_IMMEDIATE_MASK
-//#define IMMEDIATE_P(x) ((VALUE)(x) & IMMEDIATE_MASK)
-//#define SPECIAL_CONST_P(x) (IMMEDIATE_P(x) || !RTEST(x))
-
-static mrb_value
-enc_new(mrb_state *mrb, mrb_encoding *encoding)
-{
- return mrb_obj_value(Data_Wrap_Struct(mrb, ENCODE_CLASS, &encoding_data_type, encoding));
-}
-
-#define enc_autoload_p(enc) (!mrb_enc_mbmaxlen(enc))
-
-#define UNSPECIFIED_ENCODING INT_MAX
-
-
-static mrb_value
-mrb_enc_from_encoding_index(mrb_state *mrb, int idx)
-{
- mrb_value list, enc;
-
- if (mrb_nil_p(list = mrb_encoding_list)) {
- mrb_bug("mrb_enc_from_encoding_index(%d): no mrb_encoding_list", idx);
- }
- enc = mrb_ary_ref(mrb, list, idx);//mrb_ary_entry(list, idx);
- if (mrb_nil_p(enc)) {
- mrb_bug("mrb_enc_from_encoding_index(%d): not created yet", idx);
- }
- return enc;
-}
-
-mrb_value
-mrb_enc_from_encoding(mrb_state *mrb, mrb_encoding *encoding)
-{
- int idx;
- if (!encoding) return mrb_nil_value();
- idx = ENC_TO_ENCINDEX(encoding);
- return mrb_enc_from_encoding_index(mrb, idx);
-}
-
-static int enc_autoload(mrb_state *mrb, mrb_encoding *enc);
-static int
-check_encoding(mrb_state *mrb, mrb_encoding *enc)
-{
- int index = mrb_enc_to_index(enc);
- if (mrb_enc_from_index(mrb, index) != enc)
- return -1;
- if (enc_autoload_p(enc)) {
- index = enc_autoload(mrb, enc);
- }
- return index;
-}
-
-static int
-enc_check_encoding(mrb_state *mrb, mrb_value obj)
-{
- if (SPECIAL_CONST_P(obj) || !is_data_encoding(obj)) {
- return -1;
- }
- return check_encoding(mrb, RDATA(obj)->data);
-}
-
-static int
-must_encoding(mrb_state *mrb, mrb_value enc)
-{
- int index = enc_check_encoding(mrb, enc);
- if (index < 0) {
- mrb_raise(mrb, E_TYPE_ERROR, "wrong argument type %s (expected Encoding)",
- mrb_obj_classname(mrb, enc));
- }
- return index;
-}
-
-int
-mrb_to_encoding_index(mrb_state *mrb, mrb_value enc)
-{
- int idx;
-
- idx = enc_check_encoding(mrb, enc);
- if (idx >= 0) {
- return idx;
- }
- else if (mrb_nil_p(enc = mrb_check_string_type(mrb, enc))) {
- return -1;
- }
- if (!mrb_enc_asciicompat(mrb, mrb_enc_get(mrb, enc))) {
- return -1;
- }
- //return mrb_enc_find_index(StringValueCStr(enc));
- return mrb_enc_find_index(mrb, mrb_string_value_cstr(mrb, &enc));
-
-}
-
-static mrb_encoding *
-to_encoding(mrb_state *mrb, mrb_value enc)
-{
- int idx;
-
- //StringValue(enc);
- mrb_string_value(mrb, &enc);
-
- if (!mrb_enc_asciicompat(mrb, mrb_enc_get(mrb, enc))) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid name encoding (non ASCII)");
- }
- //idx = mrb_enc_find_index(StringValueCStr(enc));
- idx = mrb_enc_find_index(mrb, mrb_string_value_cstr(mrb, &enc));
- if (idx < 0) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "unknown encoding name - %s", RSTRING_PTR(enc));
- }
- return mrb_enc_from_index(mrb, idx);
-}
-
-mrb_encoding *
-mrb_to_encoding(mrb_state *mrb, mrb_value enc)
-{
- if (enc_check_encoding(mrb, enc) >= 0) return RDATA(enc)->data;
- return to_encoding(mrb, enc);
-}
-
-static int
-enc_table_expand(int newsize)
-{
- struct mrb_encoding_entry *ent;
- int count = newsize;
-
- if (enc_table.size >= newsize) return newsize;
- newsize = (newsize + 7) / 8 * 8;
- ent = realloc(enc_table.list, sizeof(*enc_table.list) * newsize);
- if (!ent) return -1;
- memset(ent + enc_table.size, 0, sizeof(*ent)*(newsize - enc_table.size));
- enc_table.list = ent;
- enc_table.size = newsize;
- return count;
-}
-
-static int
-enc_register_at(mrb_state *mrb, int index, const char *name, mrb_encoding *encoding)
-{
- struct mrb_encoding_entry *ent = &enc_table.list[index];
- mrb_value list;
- mrb_value ref_ary;
-
- if (!valid_encoding_name_p(name)) return -1;
- if (!ent->name) {
- ent->name = name = strdup(name);
- }
- else if (STRCASECMP(name, ent->name)) {
- return -1;
- }
- if (!ent->enc) {
- ent->enc = xmalloc(sizeof(mrb_encoding));
- }
- if (encoding) {
- *ent->enc = *encoding;
- }
- else {
- memset(ent->enc, 0, sizeof(*ent->enc));
- }
- encoding = ent->enc;
- encoding->name = name;
- encoding->ruby_encoding_index = index;
- st_insert(enc_table.names, (st_data_t)name, (st_data_t)index);
- list = mrb_encoding_list;
- //if (list && mrb_nil_p((mrb_ary_ref(mrb, list, index)))) {
- if (list.tt) {
- ref_ary = mrb_ary_ref(mrb, list, index);
- if mrb_nil_p(ref_ary) {
- /* initialize encoding data */
- mrb_ary_set(mrb, list, index, enc_new(mrb, encoding));//rb_ary_store(list, index, enc_new(encoding));
- }
- }
- return index;
-}
-
-
-static int
-enc_register(mrb_state *mrb, const char *name, mrb_encoding *encoding)
-{
- int index = enc_table.count;
-
- if ((index = enc_table_expand(index + 1)) < 0) return -1;
- enc_table.count = index;
- return enc_register_at(mrb, index - 1, name, encoding);
-}
-
-static void set_encoding_const(mrb_state *, const char*, mrb_encoding*);
-int mrb_enc_registered(const char*);
-
-static void
-enc_check_duplication(mrb_state *mrb, const char *name)
-{
- if (mrb_enc_registered(name) >= 0) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "encoding %s is already registered", name);
- }
-}
-static mrb_encoding*
-set_base_encoding(int index, mrb_encoding *base)
-{
- mrb_encoding *enc = enc_table.list[index].enc;
-
- enc_table.list[index].base = base;
- if (mrb_enc_dummy_p(base)) ENC_SET_DUMMY(enc);
- return enc;
-}
-
-int
-mrb_enc_replicate(mrb_state *mrb, const char *name, mrb_encoding *encoding)
-{
- int idx;
-
- enc_check_duplication(mrb, name);
- idx = enc_register(mrb, name, encoding);
- set_base_encoding(idx, encoding);
- set_encoding_const(mrb, name, mrb_enc_from_index(mrb, idx));
- return idx;
-}
-
-/* 15.2.40.2.17 */
-/*
- * call-seq:
- * enc.replicate(name) -> encoding
- *
- * Returns a replicated encoding of _enc_ whose name is _name_.
- * The new encoding should have the same byte structure of _enc_.
- * If _name_ is used by another encoding, raise ArgumentError.
- *
- */
-static mrb_value
-enc_replicate(mrb_state *mrb, mrb_value encoding)
-{
- mrb_value name;
- mrb_get_args(mrb, "o", &name);
- return mrb_enc_from_encoding_index(mrb,
- //mrb_enc_replicate(mrb, StringValueCStr(name),
- mrb_enc_replicate(mrb, mrb_string_value_cstr(mrb, &name),
- mrb_to_encoding(mrb, encoding)));
-}
-static int
-enc_replicate_with_index(mrb_state *mrb, const char *name, mrb_encoding *origenc, int idx)
-{
- if (idx < 0) {
- idx = enc_register(mrb, name, origenc);
- }
- else {
- idx = enc_register_at(mrb, idx, name, origenc);
- }
- if (idx >= 0) {
- set_base_encoding(idx, origenc);
- set_encoding_const(mrb, name, mrb_enc_from_index(mrb, idx));
- }
- return idx;
-}
-int
-mrb_encdb_replicate(mrb_state *mrb, const char *name, const char *orig)
-{
- int origidx = mrb_enc_registered(orig);
- int idx = mrb_enc_registered(name);
-
- if (origidx < 0) {
- origidx = enc_register(mrb, orig, 0);
- }
- return enc_replicate_with_index(mrb, name, mrb_enc_from_index(mrb, origidx), idx);
-}
-int
-mrb_define_dummy_encoding(mrb_state *mrb, const char *name)
-{
- int index = mrb_enc_replicate(mrb, name, mrb_ascii8bit_encoding(mrb));
- mrb_encoding *enc = enc_table.list[index].enc;
-
- ENC_SET_DUMMY(enc);
- return index;
-}
-
-int
-mrb_encdb_dummy(mrb_state *mrb, const char *name)
-{
- int index = enc_replicate_with_index(mrb, name, mrb_ascii8bit_encoding(mrb),
- mrb_enc_registered(name));
- mrb_encoding *enc = enc_table.list[index].enc;
-
- ENC_SET_DUMMY(enc);
- return index;
-}
-
-/* 15.2.40.2.13 */
-/*
- * call-seq:
- * enc.dummy? -> true or false
- *
- * Returns true for dummy encodings.
- * A dummy encoding is an encoding for which character handling is not properly
- * implemented.
- * It is used for stateful encodings.
- *
- * Encoding::ISO_2022_JP.dummy? #=> true
- * Encoding::UTF_8.dummy? #=> false
- *
- */
-static mrb_value
-enc_dummy_p(mrb_state *mrb, mrb_value enc)
-{
- return ENC_DUMMY_P(enc_table.list[must_encoding(mrb, enc)].enc) ? mrb_true_value() : mrb_false_value();
-}
-
-/* 15.2.40.2.12 */
-/*
- * call-seq:
- * enc.ascii_compatible? -> true or false
- *
- * Returns whether ASCII-compatible or not.
- *
- * Encoding::UTF_8.ascii_compatible? #=> true
- * Encoding::UTF_16BE.ascii_compatible? #=> false
- *
- */
-static mrb_value
-enc_ascii_compatible_p(mrb_state *mrb, mrb_value enc)
-{
- return mrb_enc_asciicompat(mrb, enc_table.list[must_encoding(mrb, enc)].enc) ? mrb_true_value() : mrb_false_value();
-}
-
-static const char *
-enc_alias_internal(const char *alias, int idx)
-{
- alias = strdup(alias);
- st_insert(enc_table.names, (st_data_t)alias, (st_data_t)idx);
- return alias;
-}
-
-/*
- * Returns 1 when the encoding is Unicode series other than UTF-7 else 0.
- */
-int
-mrb_enc_unicode_p(mrb_encoding *enc)
-{
- const char *name = mrb_enc_name(enc);
- return name[0] == 'U' && name[1] == 'T' && name[2] == 'F' && name[4] != '7';
-}
-
-extern mrb_encoding OnigEncodingUTF_8;
-extern mrb_encoding OnigEncodingUS_ASCII;
-
-void
-mrb_enc_init(mrb_state *mrb)
-{
- enc_table_expand(ENCODING_COUNT + 1);
- if (!enc_table.names) {
- enc_table.names = st_init_strcasetable();
- }
-#define ENC_REGISTER(enc) enc_register_at(mrb, ENCINDEX_##enc, mrb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc)
- ENC_REGISTER(ASCII);
- ENC_REGISTER(UTF_8);
- ENC_REGISTER(US_ASCII);
-#undef ENC_REGISTER
- enc_table.count = ENCINDEX_BUILTIN_MAX;
-}
-
-mrb_encoding *
-mrb_enc_from_index(mrb_state *mrb, int index)
-{
- if (!enc_table.list) {
- mrb_enc_init(mrb);
- }
- if (index < 0 || enc_table.count <= index) {
- return 0;
- }
- return enc_table.list[index].enc;
-}
-
-int
-mrb_enc_registered(const char *name)
-{
- st_data_t idx = 0;
-
- if (!name) return -1;
- if (!enc_table.list) return -1;
- if (st_lookup(enc_table.names, (st_data_t)name, &idx)) {
- return (int)idx;
- }
- return -1;
-}
-
-mrb_value
-mrb_require_safe(mrb_value fname, int safe)
-{
- mrb_value result = mrb_nil_value();
- return result;
-}
-static int
-load_encoding(const char *name)
-{
- mrb_value enclib;// = mrb_sprintf("enc/%s.so", name);
- //mrb_value verbose;// = ruby_verbose;
- //mrb_value debug;// = ruby_debug;
- //mrb_value loaded;
- char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib) - 3;
- int idx;
-
- while (s < e) {
- if (!ISALNUM(*s)) *s = '_';
- else if (ISUPPER(*s)) *s = TOLOWER(*s);
- ++s;
- }
- OBJ_FREEZE(enclib);
- //ruby_verbose = mrb_false_value();
- //ruby_debug = mrb_false_value();
- //loaded = mrb_protect(require_enc, enclib, 0);
- //ruby_verbose = verbose;
- //ruby_debug = debug;
- //rb_set_errinfo(mrb_nil_value());
- //if (mrb_nil_p(loaded)) return -1;
- if ((idx = mrb_enc_registered(name)) < 0) return -1;
- if (enc_autoload_p(enc_table.list[idx].enc)) return -1;
- return idx;
-}
-
-static int
-enc_autoload(mrb_state *mrb, mrb_encoding *enc)
-{
- int i;
- mrb_encoding *base = enc_table.list[ENC_TO_ENCINDEX(enc)].base;
-
- if (base) {
- i = 0;
- do {
- if (i >= enc_table.count) return -1;
- } while (enc_table.list[i].enc != base && (++i, 1));
- if (enc_autoload_p(base)) {
- if (enc_autoload(mrb, base) < 0) return -1;
- }
- i = ENC_TO_ENCINDEX(enc);
- enc_register_at(mrb, i, mrb_enc_name(enc), base);
- }
- else {
- i = load_encoding(mrb_enc_name(enc));
- }
- return i;
-}
-
-int
-mrb_enc_find_index(mrb_state *mrb, const char *name)
-{
- int i = mrb_enc_registered(name);
- mrb_encoding *enc;
-
- if (i < 0) {
- i = load_encoding(name);
- }
- else if (!(enc = mrb_enc_from_index(mrb, i))) {
- if (i != UNSPECIFIED_ENCODING) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "encoding %s is not registered", name);
- }
- }
- else if (enc_autoload_p(enc)) {
- if (enc_autoload(mrb, enc) < 0) {
- //mrb_warn("failed to load encoding (%s); use ASCII-8BIT instead",
- printf("failed to load encoding (%s); use ASCII-8BIT instead",
- name);
- return 0;
- }
- }
- return i;
-}
-
-mrb_encoding *
-mrb_enc_find(mrb_state *mrb, const char *name)
-{
- int idx = mrb_enc_find_index(mrb, name);
- if (idx < 0) idx = 0;
- return mrb_enc_from_index(mrb, idx);
-}
-
-static inline int
-enc_capable(mrb_value obj)
-{
- if (SPECIAL_CONST_P(obj)) return (mrb_type(obj) == MRB_TT_SYMBOL);
- switch (mrb_type(obj)/*BUILTIN_TYPE(obj)*/) {
- case MRB_TT_STRING:
- case MRB_TT_REGEX:
- case MRB_TT_FILE:
- return TRUE;
- case MRB_TT_DATA:
- if (is_data_encoding(obj)) return TRUE;
- default:
- return FALSE;
- }
-}
-
-mrb_sym
-mrb_id_encoding(mrb_state *mrb)
-{
- //CONST_ID(id_encoding, "encoding");
- id_encoding = mrb_intern(mrb, "encoding");
- return id_encoding;
-}
-
-int
-mrb_enc_get_index(mrb_state *mrb, mrb_value obj)
-{
- int i = -1;
- mrb_value tmp;
- struct RString *ps;
-
- if (SPECIAL_CONST_P(obj)) {
- if (mrb_type(obj) != MRB_TT_SYMBOL) return -1;
- //obj = mrb_id2str(SYM2ID(obj));
- obj = mrb_str_new_cstr(mrb, mrb_sym2name(mrb, SYM2ID(obj)));
- }
- switch (mrb_type(obj)/*BUILTIN_TYPE(obj)*/) {
- as_default:
- default:
- case MRB_TT_STRING:
- case MRB_TT_REGEX:
- i = (int)ENCODING_GET_INLINED(obj);
- ps = mrb_str_ptr(obj);
- if (i == ENCODING_INLINE_MAX) {
- mrb_value iv;
-
- //iv = rb_ivar_get(obj, mrb_id_encoding(mrb));
- iv = mrb_iv_get(mrb, obj, mrb_id_encoding(mrb));
- i = mrb_fixnum(iv);
- }
- break;
-
- case MRB_TT_FILE:
- tmp = mrb_funcall(mrb, obj, "internal_encoding", 0, 0);
- if (mrb_nil_p(tmp)) obj = mrb_funcall(mrb, obj, "external_encoding", 0, 0);
- else obj = tmp;
- if (mrb_nil_p(obj)) break;
- case MRB_TT_DATA:
- if (is_data_encoding(obj)) {
- i = enc_check_encoding(mrb, obj);
- }
- else {
- goto as_default;
- }
- break;
- }
- return i;
-}
-
-void
-mrb_enc_set_index(mrb_state *mrb, mrb_value obj, int idx)
-{
- if (idx < ENCODING_INLINE_MAX) {
- ENCODING_SET_INLINED(obj, idx);
- return;
- }
- ENCODING_SET_INLINED(obj, ENCODING_INLINE_MAX);
- //mrb_ivar_set(obj, mrb_id_encoding(mrb), INT2NUM(idx));
- mrb_iv_set(mrb, obj, mrb_id_encoding(mrb), mrb_fixnum_value(idx));
- return;
-}
-
-mrb_value
-mrb_enc_associate_index(mrb_state *mrb, mrb_value obj, int idx)
-{
-/* enc_check_capable(obj);*/
- if (mrb_enc_get_index(mrb, obj) == idx)
- return obj;
- if (SPECIAL_CONST_P(obj)) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "cannot set encoding");
- }
- if (!ENC_CODERANGE_ASCIIONLY(obj) ||
- !mrb_enc_asciicompat(mrb, mrb_enc_from_index(mrb, idx))) {
- ENC_CODERANGE_CLEAR(obj);
- }
- mrb_enc_set_index(mrb, obj, idx);
- return obj;
-}
-
-mrb_value
-mrb_enc_associate(mrb_state *mrb, mrb_value obj, mrb_encoding *enc)
-{
- return mrb_enc_associate_index(mrb, obj, mrb_enc_to_index(enc));
-}
-
-mrb_encoding*
-mrb_enc_get(mrb_state *mrb, mrb_value obj)
-{
- return mrb_enc_from_index(mrb, mrb_enc_get_index(mrb, obj));
-}
-
-mrb_encoding*
-mrb_enc_check(mrb_state *mrb, mrb_value str1, mrb_value str2)
-{
- mrb_encoding *enc = mrb_enc_compatible(mrb, str1, str2);
- if (!enc)
- mrb_raise(mrb, E_ENCODING_ERROR, "incompatible character encodings: %s and %s",
- mrb_enc_name(mrb_enc_get(mrb, str1)),
- mrb_enc_name(mrb_enc_get(mrb, str2)));
- return enc;
-}
-
-mrb_encoding*
-mrb_enc_compatible(mrb_state *mrb, mrb_value str1, mrb_value str2)
-{
- int idx1, idx2;
- mrb_encoding *enc1, *enc2;
-
- idx1 = mrb_enc_get_index(mrb, str1);
- idx2 = mrb_enc_get_index(mrb, str2);
-
- if (idx1 < 0 || idx2 < 0)
- return 0;
-
- if (idx1 == idx2) {
- return mrb_enc_from_index(mrb, idx1);
- }
- enc1 = mrb_enc_from_index(mrb, idx1);
- enc2 = mrb_enc_from_index(mrb, idx2);
-
- if (mrb_type(str2) == MRB_TT_STRING && RSTRING_LEN(str2) == 0)
- //return (idx1 == ENCINDEX_US_ASCII && mrb_enc_asciicompat(mrb, enc2)) ? enc2 : enc1;
- return enc1;
- if (mrb_type(str1) == MRB_TT_STRING && RSTRING_LEN(str1) == 0)
- //return (idx2 == ENCINDEX_US_ASCII && mrb_enc_asciicompat(mrb, enc1)) ? enc1 : enc2;
- return enc2;
- if (!mrb_enc_asciicompat(mrb, enc1) || !mrb_enc_asciicompat(mrb, enc2)) {
- return 0;
- }
-
- /* objects whose encoding is the same of contents */
- //if (mrb_type(str2)/*BUILTIN_TYPE(str2)*/ != MRB_TT_STRING && idx2 == ENCINDEX_US_ASCII)
- //return enc1;
- //if (mrb_type(str1)/*BUILTIN_TYPE(str1)*/ != MRB_TT_STRING && idx1 == ENCINDEX_US_ASCII)
- //return enc2;
-
- if (mrb_type(str1)/*BUILTIN_TYPE(str1)*/ != MRB_TT_STRING) {
- mrb_value tmp = str1;
- int idx0 = idx1;
- str1 = str2;
- str2 = tmp;
- idx1 = idx2;
- idx2 = idx0;
- }
- if (mrb_type(str1)/*BUILTIN_TYPE(str1)*/ == MRB_TT_STRING) {
- int cr1, cr2;
-
- cr1 = mrb_enc_str_coderange(mrb, str1);
- if (mrb_type(str2)/*BUILTIN_TYPE(str2)*/ == MRB_TT_STRING) {
- cr2 = mrb_enc_str_coderange(mrb, str2);
- if (cr1 != cr2) {
- /* may need to handle ENC_CODERANGE_BROKEN */
- if (cr1 == ENC_CODERANGE_7BIT) return enc2;
- if (cr2 == ENC_CODERANGE_7BIT) return enc1;
- }
- if (cr2 == ENC_CODERANGE_7BIT) {
- if (idx1 == ENCINDEX_ASCII) return enc2;
- return enc1;
- }
- }
- if (cr1 == ENC_CODERANGE_7BIT)
- return enc2;
- }
- return 0;
-}
-
-void
-mrb_enc_copy(mrb_state *mrb, mrb_value obj1, mrb_value obj2)
-{
- mrb_enc_associate_index(mrb, obj1, mrb_enc_get_index(mrb, obj2));
-}
-
-
-/*
- * call-seq:
- * obj.encoding -> encoding
- *
- * Returns the Encoding object that represents the encoding of obj.
- */
-
-mrb_value
-mrb_obj_encoding(mrb_state *mrb, mrb_value obj)
-{
- mrb_encoding *enc = mrb_enc_get(mrb, obj);
- if (!enc) {
- mrb_raise(mrb, E_TYPE_ERROR, "unknown encoding");
- }
- return mrb_enc_from_encoding(mrb, enc);
-}
-
-int
-mrb_enc_fast_mbclen(const char *p, const char *e, mrb_encoding *enc)
-{
- return ONIGENC_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
-}
-
-int
-mrb_enc_mbclen(const char *p, const char *e, mrb_encoding *enc)
-{
- int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
- if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p)
- return MBCLEN_CHARFOUND_LEN(n);
- else {
- int min = mrb_enc_mbminlen(enc);
- return min <= e-p ? min : (int)(e-p);
- }
-}
-
-int
-mrb_enc_precise_mbclen(const char *p, const char *e, mrb_encoding *enc)
-{
- int n;
- if (e <= p)
- return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1);
- n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
- if (e-p < n)
- return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n-(int)(e-p));
- return n;
-}
-
-int
-mrb_enc_ascget(mrb_state *mrb, const char *p, const char *e, int *len, mrb_encoding *enc)
-{
- unsigned int c, l;
- if (e <= p)
- return -1;
- if (mrb_enc_asciicompat(mrb, enc)) {
- c = (unsigned char)*p;
- if (!ISASCII(c))
- return -1;
- if (len) *len = 1;
- return c;
- }
- l = mrb_enc_precise_mbclen(p, e, enc);
- if (!MBCLEN_CHARFOUND_P(l))
- return -1;
- c = mrb_enc_mbc_to_codepoint(p, e, enc);
- if (!mrb_enc_isascii(c, enc))
- return -1;
- if (len) *len = l;
- return c;
-}
-
-unsigned int
-mrb_enc_codepoint_len(mrb_state *mrb, const char *p, const char *e, int *len_p, mrb_encoding *enc)
-{
- int r;
- if (e <= p)
- mrb_raise(mrb, E_ARGUMENT_ERROR, "empty string");
- r = mrb_enc_precise_mbclen(p, e, enc);
- if (MBCLEN_CHARFOUND_P(r)) {
- if (len_p) *len_p = MBCLEN_CHARFOUND_LEN(r);
- return mrb_enc_mbc_to_codepoint(p, e, enc);
- }
- else
- mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid byte sequence in %s", mrb_enc_name(enc));
- return 0;
-}
-
-#undef mrb_enc_codepoint
-unsigned int
-mrb_enc_codepoint(mrb_state *mrb, const char *p, const char *e, mrb_encoding *enc)
-{
- return mrb_enc_codepoint_len(mrb, p, e, 0, enc);
-}
-
-int
-mrb_enc_codelen(mrb_state *mrb, int c, mrb_encoding *enc)
-{
- int n = ONIGENC_CODE_TO_MBCLEN(enc,c);
- if (n == 0) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid codepoint 0x%x in %s", c, mrb_enc_name(enc));
- }
- return n;
-}
-
-int
-mrb_enc_toupper(int c, mrb_encoding *enc)
-{
- return (ONIGENC_IS_ASCII_CODE(c)?ONIGENC_ASCII_CODE_TO_UPPER_CASE(c):(c));
-}
-
-int
-mrb_enc_tolower(int c, mrb_encoding *enc)
-{
- return (ONIGENC_IS_ASCII_CODE(c)?ONIGENC_ASCII_CODE_TO_LOWER_CASE(c):(c));
-}
-
-/* 15.2.40.2.14 */
-/*
- * call-seq:
- * enc.inspect -> string
- *
- * Returns a string which represents the encoding for programmers.
- *
- * Encoding::UTF_8.inspect #=> "#<Encoding:UTF-8>"
- * Encoding::ISO_2022_JP.inspect #=> "#<Encoding:ISO-2022-JP (dummy)>"
- */
-static mrb_value
-enc_inspect(mrb_state *mrb, mrb_value self)
-{
- mrb_value str;
- //mrb_value str = mrb_sprintf("#<%s:%s%s>", mrb_obj_classname(mrb, self),
- // mrb_enc_name((mrb_encoding*)(DATA_PTR(self))),
- // (mrb_fixnum(enc_dummy_p(mrb, self)) ? " (dummy)" : ""));
- char buf[256];
- sprintf(buf, "#<%s:%s%s>", mrb_obj_classname(mrb, self),
- mrb_enc_name((mrb_encoding*)(DATA_PTR(self))),
- (mrb_enc_dummy_p((mrb_encoding*)(DATA_PTR(self))) ? " (dummy)" : ""));
- str = mrb_str_new(mrb, buf, strlen(buf));
- ENCODING_CODERANGE_SET(mrb, str, mrb_usascii_encindex(), ENC_CODERANGE_7BIT);
- return str;
-}
-
-/* 15.2.40.2.15 */
-/* 15.2.40.2.18 */
-/*
- * call-seq:
- * enc.name -> string
- *
- * Returns the name of the encoding.
- *
- * Encoding::UTF_8.name #=> "UTF-8"
- */
-static mrb_value
-enc_name(mrb_state *mrb, mrb_value self)
-{
- return mrb_usascii_str_new2(mrb, mrb_enc_name((mrb_encoding*)DATA_PTR(self)));
-}
-
-struct fn_arg {
- mrb_state *mrb;
- enum st_retval (*func)(ANYARGS);
- void *a;
-};
-
-static enum st_retval
-fn_i(st_data_t key, st_data_t val, st_data_t arg) {
- struct fn_arg *a = (struct fn_arg*)arg;
-
- return (*a->func)(a->mrb, key, val, a->a);
-}
-
-static int
-st_foreachNew(mrb_state *mrb, st_table *tbl, enum st_retval (*func)(ANYARGS), void *a)
-{
- struct fn_arg arg = {
- mrb,
- func,
- a,
- };
-
- return st_foreach(tbl, fn_i, (st_data_t)&arg);
-}
-
-static enum st_retval
-enc_names_i(mrb_state *mrb, st_data_t name, st_data_t idx, st_data_t args)
-{
- mrb_value *arg = (mrb_value*)args;
- int iargs = mrb_fixnum(arg[0]);
- //if ((int)idx == (int)arg[0]) {
- if ((int)idx == iargs) {
- mrb_value str = mrb_usascii_str_new2(mrb, (char*)name);
- //OBJ_FREEZE(str);
- mrb_ary_push(mrb, arg[1], str);
- }
- return ST_CONTINUE;
-}
-
-/* 15.2.40.2.16 */
-/*
- * call-seq:
- * enc.names -> array
- *
- * Returns the list of name and aliases of the encoding.
- *
- * Encoding::WINDOWS_31J.names #=> ["Windows-31J", "CP932", "csWindows31J"]
- */
-static mrb_value
-enc_names(mrb_state *mrb, mrb_value self)
-{
- mrb_value args[2];
-
- args[0] = mrb_fixnum_value(mrb_to_encoding_index(mrb, self));
- args[1] = mrb_ary_new_capa(mrb, 0);//mrb_ary_new2(0);
- st_foreachNew(mrb, enc_table.names, enc_names_i, args);
- return args[1];
-}
-
-/* 15.2.40.2.8 */
-/*
- * call-seq:
- * Encoding.list -> [enc1, enc2, ...]
- *
- * Returns the list of loaded encodings.
- *
- * Encoding.list
- * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
- * #<Encoding:ISO-2022-JP (dummy)>]
- *
- * Encoding.find("US-ASCII")
- * #=> #<Encoding:US-ASCII>
- *
- * Encoding.list
- * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
- * #<Encoding:US-ASCII>, #<Encoding:ISO-2022-JP (dummy)>]
- *
- */
-static mrb_value
-enc_list(mrb_state *mrb, mrb_value klass)
-{
- struct RArray *ar = (struct RArray*)mrb_encoding_list.value.p;
- mrb_value ary = mrb_ary_new_capa(mrb, 0);//mrb_ary_new2(0);
- //mrb_ary_replace_m(mrb, ary/*, mmrb_encoding_list*/);
- mrb_ary_replace(mrb, mrb_ary_ptr(ary), ar->buf, enc_table.count);
- return ary;
-}
-
-/* 15.2.40.2.7 */
-/*
- * call-seq:
- * Encoding.find(string) -> enc
- * Encoding.find(symbol) -> enc
- *
- * Search the encoding with specified <i>name</i>.
- * <i>name</i> should be a string or symbol.
- *
- * Encoding.find("US-ASCII") #=> #<Encoding:US-ASCII>
- * Encoding.find(:Shift_JIS) #=> #<Encoding:Shift_JIS>
- *
- * Names which this method accept are encoding names and aliases
- * including following special aliases
- *
- * "external":: default external encoding
- * "internal":: default internal encoding
- * "locale":: locale encoding
- * "filesystem":: filesystem encoding
- *
- * An ArgumentError is raised when no encoding with <i>name</i>.
- * Only <code>Encoding.find("internal")</code> however returns nil
- * when no encoding named "internal", in other words, when Ruby has no
- * default internal encoding.
- */
-static mrb_value
-enc_find(mrb_state *mrb, mrb_value klass)
-{
- mrb_value enc;
-
- mrb_get_args(mrb, "o", &enc);
- return mrb_enc_from_encoding(mrb, to_encoding(mrb, enc));
-}
-
-/* 15.2.40.2.2 */
-/*
- * call-seq:
- * Encoding.compatible?(str1, str2) -> enc or nil
- *
- * Checks the compatibility of two strings.
- * If they are compatible, means concatenatable,
- * returns an encoding which the concatenated string will be.
- * If they are not compatible, nil is returned.
- *
- * Encoding.compatible?("\xa1".force_encoding("iso-8859-1"), "b")
- * #=> #<Encoding:ISO-8859-1>
- *
- * Encoding.compatible?(
- * "\xa1".force_encoding("iso-8859-1"),
- * "\xa1\xa1".force_encoding("euc-jp"))
- * #=> nil
- *
- */
-static mrb_value
-enc_compatible_p(mrb_state *mrb, mrb_value klass)
-{
- mrb_value str1;
- mrb_value str2;
- mrb_encoding *enc;
-
- mrb_get_args(mrb, "oo", &str1, &str2);
- if (!enc_capable(str1)) return mrb_nil_value();
- if (!enc_capable(str2)) return mrb_nil_value();
- enc = mrb_enc_compatible(mrb, str1, str2);
- if (!enc) return mrb_nil_value();
- return mrb_enc_from_encoding(mrb, enc);
-}
-
-/* 15.2.40.2.19 */
-/* :nodoc: */
-static mrb_value
-enc_dump(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value self)
-{
- //mrb_scan_args(argc, argv, "01", 0);
- return enc_name(mrb, self);
-}
-
-/* 15.2.40.2.11 */
-/* :nodoc: */
-static mrb_value
-enc_load(mrb_state *mrb, mrb_value klass)
-{
- mrb_value str;
-
- mrb_get_args(mrb, "o", &str);
- return enc_find(mrb, str);
-}
-
-mrb_encoding *
-mrb_ascii8bit_encoding(mrb_state *mrb)
-{
- if (!enc_table.list) {
- mrb_enc_init(mrb);
- }
- return enc_table.list[ENCINDEX_ASCII].enc;
-}
-
-int
-mrb_ascii8bit_encindex(void)
-{
- return ENCINDEX_ASCII;
-}
-
-mrb_encoding *
-mrb_utf8_encoding(mrb_state *mrb)
-{
- if (!enc_table.list) {
- mrb_enc_init(mrb);
- }
- return enc_table.list[ENCINDEX_UTF_8].enc;
-}
-
-int
-mrb_utf8_encindex(void)
-{
- return ENCINDEX_UTF_8;
-}
-
-mrb_encoding *
-mrb_usascii_encoding(mrb_state *mrb)
-{
- if (!enc_table.list) {
- mrb_enc_init(mrb);
- }
- return enc_table.list[ENCINDEX_US_ASCII].enc;
-}
-
-int
-mrb_usascii_encindex(void)
-{
- return ENCINDEX_US_ASCII;
-}
-
-int
-mrb_locale_encindex(mrb_state *mrb)
-{
- mrb_value charmap = mrb_locale_charmap(mrb, mrb_obj_value(ENCODE_CLASS));
- int idx;
-
- if (mrb_nil_p(charmap))
- idx = mrb_usascii_encindex();
- //else if ((idx = mrb_enc_find_index(StringValueCStr(charmap))) < 0)
- else if ((idx = mrb_enc_find_index(mrb, mrb_string_value_cstr(mrb, &charmap))) < 0)
- idx = mrb_ascii8bit_encindex();
-
- if (mrb_enc_registered("locale") < 0) enc_alias_internal("locale", idx);
-
- return idx;
-}
-
-mrb_encoding *
-mrb_locale_encoding(mrb_state *mrb)
-{
- return mrb_enc_from_index(mrb, mrb_locale_encindex(mrb));
-}
-
-static int
-enc_set_filesystem_encoding(mrb_state *mrb)
-{
- int idx;
-#if defined NO_LOCALE_CHARMAP
- idx = mrb_enc_to_index(mrb_default_external_encoding(mrb));
-#elif defined _WIN32 || defined __CYGWIN__
- char cp[sizeof(int) * 8 / 3 + 4];
- //snprintf(cp, sizeof cp, "CP%d", AreFileApisANSI() ? GetACP() : GetOEMCP());
- idx = mrb_enc_find_index(mrb, cp);
- if (idx < 0) idx = mrb_ascii8bit_encindex();
-#else
- idx = mrb_enc_to_index(mrb_default_external_encoding(mrb));
-#endif
-
- enc_alias_internal("filesystem", idx);
- return idx;
-}
-
-int
-mrb_filesystem_encindex(void)
-{
- int idx = mrb_enc_registered("filesystem");
- if (idx < 0)
- idx = mrb_ascii8bit_encindex();
- return idx;
-}
-
-mrb_encoding *
-mrb_filesystem_encoding(mrb_state *mrb)
-{
- return mrb_enc_from_index(mrb, mrb_filesystem_encindex());
-}
-
-struct default_encoding {
- int index; /* -2 => not yet set, -1 => nil */
- mrb_encoding *enc;
-};
-
-static struct default_encoding default_external = {0};
-
-static int
-enc_set_default_encoding(mrb_state *mrb, struct default_encoding *def, mrb_value encoding, const char *name)
-{
- int overridden = FALSE;
-
- if (def->index != -2)
- /* Already set */
- overridden = TRUE;
-
- if (mrb_nil_p(encoding)) {
- def->index = -1;
- def->enc = 0;
- st_insert(enc_table.names, (st_data_t)strdup(name),
- (st_data_t)UNSPECIFIED_ENCODING);
- }
- else {
- def->index = mrb_enc_to_index(mrb_to_encoding(mrb, encoding));
- def->enc = 0;
- enc_alias_internal(name, def->index);
- }
-
- if (def == &default_external)
- enc_set_filesystem_encoding(mrb);
-
- return overridden;
-}
-
-mrb_encoding *
-mrb_default_external_encoding(mrb_state *mrb)
-{
- if (default_external.enc) return default_external.enc;
-
- if (default_external.index >= 0) {
- default_external.enc = mrb_enc_from_index(mrb, default_external.index);
- return default_external.enc;
- }
- else {
- return mrb_locale_encoding(mrb);
- }
-}
-
-mrb_value
-mrb_enc_default_external(mrb_state *mrb)
-{
- return mrb_enc_from_encoding(mrb, mrb_default_external_encoding(mrb));
-}
-
-/* 15.2.40.2.3 */
-/*
- * call-seq:
- * Encoding.default_external -> enc
- *
- * Returns default external encoding.
- *
- * It is initialized by the locale or -E option.
- */
-static mrb_value
-get_default_external(mrb_state *mrb, mrb_value klass)
-{
- return mrb_enc_default_external(mrb);
-}
-
-void
-mrb_enc_set_default_external(mrb_state *mrb, mrb_value encoding)
-{
- if (mrb_nil_p(encoding)) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "default external can not be nil");
- }
- enc_set_default_encoding(mrb, &default_external, encoding,
- "external");
-}
-
-/* 15.2.40.2.4 */
-/*
- * call-seq:
- * Encoding.default_external = enc
- *
- * Sets default external encoding.
- */
-static mrb_value
-set_default_external(mrb_state *mrb, mrb_value klass)
-{
- mrb_value encoding;
-
- mrb_get_args(mrb, "o", &encoding);
- mrb_warning("setting Encoding.default_external");
- mrb_enc_set_default_external(mrb, encoding);
- return encoding;
-}
-
-static struct default_encoding default_internal = {-2};
-
-mrb_encoding *
-mrb_default_internal_encoding(mrb_state *mrb)
-{
- if (!default_internal.enc && default_internal.index >= 0) {
- default_internal.enc = mrb_enc_from_index(mrb, default_internal.index);
- }
- return default_internal.enc; /* can be NULL */
-}
-
-mrb_value
-mrb_enc_default_internal(mrb_state *mrb)
-{
- /* Note: These functions cope with default_internal not being set */
- return mrb_enc_from_encoding(mrb, mrb_default_internal_encoding(mrb));
-}
-
-/* 15.2.40.2.5 */
-/*
- * call-seq:
- * Encoding.default_internal -> enc
- *
- * Returns default internal encoding.
- *
- * It is initialized by the source internal_encoding or -E option.
- */
-static mrb_value
-get_default_internal(mrb_state *mrb, mrb_value klass)
-{
- return mrb_enc_default_internal(mrb);
-}
-
-void
-mrb_enc_set_default_internal(mrb_state *mrb, mrb_value encoding)
-{
- enc_set_default_encoding(mrb, &default_internal, encoding,
- "internal");
-}
-
-/* 15.2.40.2.6 */
-/*
- * call-seq:
- * Encoding.default_internal = enc or nil
- *
- * Sets default internal encoding.
- * Or removes default internal encoding when passed nil.
- */
-static mrb_value
-set_default_internal(mrb_state *mrb, mrb_value klass)
-{
- mrb_value encoding;
-
- mrb_get_args(mrb, "o", &encoding);
- mrb_warning("setting Encoding.default_internal");
- mrb_enc_set_default_internal(mrb, encoding);
- return encoding;
-}
-
-#define digit(x) ((x) >= '0' && (x) <= '9')
-#ifndef _MSC_VER
-#define strstart(s, n) (strncasecmp(s, n, strlen(n)) == 0)
-#else
-#define strstart(s, n) (_stricmp(s, n) == 0)
-#endif
-#define C_CODESET "US-ASCII" /* Return this as the encoding of the
- * C/POSIX locale. Could as well one day
- * become "UTF-8". */
-#if defined _WIN32 || defined __CYGWIN__
-#define JA_CODESET "Windows-31J"
-#else
-#define JA_CODESET "EUC-JP"
-#endif
-
-static char buf[16];
-
-const char *
-nl_langinfo_codeset(void)
-{
- const char *l, *p;
- int n;
-
- if (((l = getenv("LC_ALL")) && *l) ||
- ((l = getenv("LC_CTYPE")) && *l) ||
- ((l = getenv("LANG")) && *l)) {
- /* check standardized locales */
- if (!strcmp(l, "C") || !strcmp(l, "POSIX"))
- return C_CODESET;
- /* check for encoding name fragment */
- p = strchr(l, '.');
- if (!p++) p = l;
- if (strstart(p, "UTF"))
- return "UTF-8";
- if ((n = 5, strstart(p, "8859-")) || (n = 9, strstart(p, "ISO-8859-"))) {
- if (digit(p[n])) {
- p += n;
- memcpy(buf, "ISO-8859-\0\0", 12);
- buf[9] = *p++;
- if (digit(*p)) buf[10] = *p++;
- return buf;
- }
- }
- if (strstart(p, "KOI8-R")) return "KOI8-R";
- if (strstart(p, "KOI8-U")) return "KOI8-U";
- if (strstart(p, "620")) return "TIS-620";
- if (strstart(p, "2312")) return "GB2312";
- if (strstart(p, "HKSCS")) return "Big5HKSCS"; /* no MIME charset */
- if (strstart(p, "BIG5")) return "Big5";
- if (strstart(p, "GBK")) return "GBK"; /* no MIME charset */
- if (strstart(p, "18030")) return "GB18030"; /* no MIME charset */
- if (strstart(p, "Shift_JIS") || strstart(p, "SJIS")) return "Windows-31J";
- /* check for conclusive modifier */
- if (strstart(p, "euro")) return "ISO-8859-15";
- /* check for language (and perhaps country) codes */
- if (strstart(l, "zh_TW")) return "Big5";
- if (strstart(l, "zh_HK")) return "Big5HKSCS"; /* no MIME charset */
- if (strstart(l, "zh")) return "GB2312";
- if (strstart(l, "ja")) return JA_CODESET;
- if (strstart(l, "ko")) return "EUC-KR";
- if (strstart(l, "ru")) return "KOI8-R";
- if (strstart(l, "uk")) return "KOI8-U";
- if (strstart(l, "pl") || strstart(l, "hr") ||
- strstart(l, "hu") || strstart(l, "cs") ||
- strstart(l, "sk") || strstart(l, "sl")) return "ISO-8859-2";
- if (strstart(l, "eo") || strstart(l, "mt")) return "ISO-8859-3";
- if (strstart(l, "el")) return "ISO-8859-7";
- if (strstart(l, "he")) return "ISO-8859-8";
- if (strstart(l, "tr")) return "ISO-8859-9";
- if (strstart(l, "th")) return "TIS-620"; /* or ISO-8859-11 */
- if (strstart(l, "lt")) return "ISO-8859-13";
- if (strstart(l, "cy")) return "ISO-8859-14";
- if (strstart(l, "ro")) return "ISO-8859-2"; /* or ISO-8859-16 */
- if (strstart(l, "am") || strstart(l, "vi")) return "UTF-8";
- /* Send me further rules if you like, but don't forget that we are
- * *only* interested in locale naming conventions on platforms
- * that do not already provide an nl_langinfo(CODESET) implementation. */
- }
- return NULL;
-}
-
-/* 15.2.40.2.9 */
-/*
- * call-seq:
- * Encoding.locale_charmap -> string
- *
- * Returns the locale charmap name.
- *
- * Debian GNU/Linux
- * LANG=C
- * Encoding.locale_charmap #=> "ANSI_X3.4-1968"
- * LANG=ja_JP.EUC-JP
- * Encoding.locale_charmap #=> "EUC-JP"
- *
- * SunOS 5
- * LANG=C
- * Encoding.locale_charmap #=> "646"
- * LANG=ja
- * Encoding.locale_charmap #=> "eucJP"
- *
- * The result is highly platform dependent.
- * So Encoding.find(Encoding.locale_charmap) may cause an error.
- * If you need some encoding object even for unknown locale,
- * Encoding.find("locale") can be used.
- *
- */
-mrb_value
-mrb_locale_charmap(mrb_state *mrb, mrb_value klass)
-{
-#if defined NO_LOCALE_CHARMAP
- return mrb_usascii_str_new2(mrb, "ASCII-8BIT");
-#elif defined _WIN32 || defined __CYGWIN__
- const char *nl_langinfo_codeset(void);
- const char *codeset = nl_langinfo_codeset();
- char cp[sizeof(int) * 3 + 4];
- if (!codeset) {
- //snprintf(cp, sizeof(cp), "CP%d", GetConsoleCP());
- codeset = cp;
- }
- return mrb_usascii_str_new2(mrb, codeset);
-#elif defined HAVE_LANGINFO_H
- char *codeset;
- codeset = nl_langinfo(CODESET);
- return mrb_usascii_str_new2(mrb, codeset);
-#else
- return mrb_nil_value();
-#endif
-}
-static void
-set_encoding_const(mrb_state *mrb, const char *name, mrb_encoding *enc)
-{
- mrb_value encoding = mrb_enc_from_encoding(mrb, enc);
- char *s = (char*)name;
- int haslower = 0, hasupper = 0, valid = 0;
-
- if (ISDIGIT(*s)) return;
- if (ISUPPER(*s)) {
- hasupper = 1;
- while (*++s && (ISALNUM(*s) || *s == '_')) {
- if (ISLOWER(*s)) haslower = 1;
- }
- }
- if (!*s) {
- if (s - name > ENCODING_NAMELEN_MAX) return;
- valid = 1;
- //mrb_define_const(mrb_cEncoding, name, encoding);
- mrb_define_const(mrb, ENCODE_CLASS, name, encoding);
- }
- if (!valid || haslower) {
- size_t len = s - name;
- if (len > ENCODING_NAMELEN_MAX) return;
- if (!haslower || !hasupper) {
- do {
- if (ISLOWER(*s)) haslower = 1;
- if (ISUPPER(*s)) hasupper = 1;
- } while (*++s && (!haslower || !hasupper));
- len = s - name;
- }
- len += strlen(s);
- if (len++ > ENCODING_NAMELEN_MAX) return;
- //MEMCPY(s = ALLOCA_N(char, len), name, char, len);
- memcpy(s = mrb_malloc(mrb, len), name, len);
- name = s;
- if (!valid) {
- if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
- for (; *s; ++s) {
- if (!ISALNUM(*s)) *s = '_';
- }
- if (hasupper) {
- mrb_define_const(mrb, ENCODE_CLASS, name, encoding);
- }
- }
- if (haslower) {
- for (s = (char*)name; *s; ++s) {
- if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
- }
- mrb_define_const(mrb, ENCODE_CLASS, name, encoding);
- }
- }
-}
-static enum st_retval
-mrb_enc_name_list_i(mrb_state *mrb, st_data_t name, st_data_t idx, mrb_value *arg)
-{
- mrb_value ary = *arg;
- mrb_value str = mrb_usascii_str_new2(mrb, (char*)name);
- //OBJ_FREEZE(str);
- mrb_ary_push(mrb, ary, str);
- return ST_CONTINUE;
-}
-
-/* 15.2.40.2.10 */
-/*
- * call-seq:
- * Encoding.name_list -> ["enc1", "enc2", ...]
- *
- * Returns the list of available encoding names.
- *
- * Encoding.name_list
- * #=> ["US-ASCII", "ASCII-8BIT", "UTF-8",
- * "ISO-8859-1", "Shift_JIS", "EUC-JP",
- * "Windows-31J",
- * "BINARY", "CP932", "eucJP"]
- *
- */
-
-static mrb_value
-mrb_enc_name_list(mrb_state *mrb, mrb_value klass)
-{
- mrb_value ary = mrb_ary_new_capa(mrb, enc_table.names->num_entries);//mrb_ary_new2(enc_table.names->num_entries);
- st_foreachNew(mrb, enc_table.names, mrb_enc_name_list_i, &ary);
- return ary;
-}
-
-static enum st_retval
-mrb_enc_aliases_enc_i(mrb_state *mrb, st_data_t name, st_data_t orig, st_data_t arg)
-{
- mrb_value *p = (mrb_value*)arg;
- mrb_value aliases = p[0], ary = p[1];
- int idx = (int)orig;
- mrb_value key, str = mrb_ary_ref(mrb, ary, idx);//mrb_ary_entry(ary, idx);
-
- if (mrb_nil_p(str)) {
- mrb_encoding *enc = mrb_enc_from_index(mrb, idx);
-
- if (!enc) return ST_CONTINUE;
- if (STRCASECMP((char*)name, mrb_enc_name(enc)) == 0) {
- return ST_CONTINUE;
- }
- str = mrb_usascii_str_new2(mrb, mrb_enc_name(enc));
- OBJ_FREEZE(str);
- mrb_ary_set(mrb, ary, idx, str);//rb_ary_store(ary, idx, str);
- }
- key = mrb_usascii_str_new2(mrb, (char*)name);
- OBJ_FREEZE(key);
- mrb_hash_set(mrb, aliases, key, str);
- return ST_CONTINUE;
-}
-
-/* 15.2.40.2.1 */
-/*
- * call-seq:
- * Encoding.aliases -> {"alias1" => "orig1", "alias2" => "orig2", ...}
- *
- * Returns the hash of available encoding alias and original encoding name.
- *
- * Encoding.aliases
- * #=> {"BINARY"=>"ASCII-8BIT", "ASCII"=>"US-ASCII", "ANSI_X3.4-1986"=>"US-ASCII",
- * "SJIS"=>"Shift_JIS", "eucJP"=>"EUC-JP", "CP932"=>"Windows-31J"}
- *
- */
-
-static mrb_value
-mrb_enc_aliases(mrb_state *mrb, mrb_value klass)
-{
- mrb_value aliases[2];
- aliases[0] = mrb_hash_new_capa(mrb, 0);
- aliases[1] = mrb_ary_new(mrb);
- st_foreachNew(mrb, enc_table.names, mrb_enc_aliases_enc_i, aliases);
- return aliases[0];
-}
-
-void
-mrb_init_encoding(mrb_state *mrb)
-{
-#undef mrb_intern
-#define mrb_intern(str) mrb_intern_const(str)
- mrb_value list;
- int i;
- struct RClass *s;
-
- s = mrb_define_class(mrb, "Encoding", mrb->object_class);
- //mrb_undef_alloc_func(mrb_cEncoding);
- //mrb_undef_method(CLASS_OF(mrb_cEncoding), "new");
- mrb_define_class_method(mrb, s, "aliases", mrb_enc_aliases, ARGS_NONE()); /* 15.2.40.2.1 */
- mrb_define_class_method(mrb, s, "compatible?", enc_compatible_p, ARGS_REQ(2)); /* 15.2.40.2.2 */
- mrb_define_class_method(mrb, s, "default_external", get_default_external, ARGS_NONE()); /* 15.2.40.2.3 */
- mrb_define_class_method(mrb, s, "default_external=", set_default_external, ARGS_REQ(1)); /* 15.2.40.2.4 */
- mrb_define_class_method(mrb, s, "default_internal", get_default_internal, ARGS_NONE()); /* 15.2.40.2.5 */
- mrb_define_class_method(mrb, s, "default_internal=", set_default_internal, ARGS_REQ(1)); /* 15.2.40.2.6 */
- mrb_define_class_method(mrb, s, "find", enc_find, ARGS_REQ(1)); /* 15.2.40.2.7 */
- mrb_define_class_method(mrb, s, "list", enc_list, ARGS_NONE()); /* 15.2.40.2.8 */
- mrb_define_class_method(mrb, s, "locale_charmap", mrb_locale_charmap, ARGS_NONE()); /* 15.2.40.2.9 */
- mrb_define_class_method(mrb, s, "name_list", mrb_enc_name_list, ARGS_NONE()); /* 15.2.40.2.10 */
- mrb_define_class_method(mrb, s, "_load", enc_load, ARGS_REQ(1)); /* 15.2.40.2.11 */
- mrb_define_method(mrb, s, "ascii_compatible?", enc_ascii_compatible_p, ARGS_NONE()); /* 15.2.40.2.12 */
- mrb_define_method(mrb, s, "dummy?", enc_dummy_p, ARGS_NONE()); /* 15.2.40.2.13 */
- mrb_define_method(mrb, s, "inspect", enc_inspect, ARGS_NONE()); /* 15.2.40.2.14 */
- mrb_define_method(mrb, s, "name", enc_name, ARGS_NONE()); /* 15.2.40.2.15 */
- mrb_define_method(mrb, s, "names", enc_names, ARGS_NONE()); /* 15.2.40.2.16 */
- mrb_define_method(mrb, s, "replicate", enc_replicate, ARGS_REQ(1)); /* 15.2.40.2.17 */
- mrb_define_method(mrb, s, "to_s", enc_name, ARGS_NONE()); /* 15.2.40.2.18 */
- mrb_define_method(mrb, s, "_dump", enc_dump, ARGS_ANY()); /* 15.2.40.2.19 */
-
-/* add kusuda --> */
- if (!enc_table.list) {
- mrb_enc_init(mrb);
- }
-/* add kusuda --< */
- list = mrb_ary_new_capa(mrb, enc_table.count);//mrb_ary_new2(enc_table.count);
- RBASIC(list)->c = 0;
- mrb_encoding_list = list;
- //mrb_gc_register_mark_object(list);
-
- for (i = 0; i < enc_table.count; ++i) {
- mrb_ary_push(mrb, list, enc_new(mrb, enc_table.list[i].enc));
- }
-}
-
-/* locale insensitive functions */
-
-#define ctype_test(c, ctype) \
- (mrb_isascii(c) && ONIGENC_IS_ASCII_CODE_CTYPE((c), ctype))
-
-int mrb_isalnum(int c) { return ctype_test(c, ONIGENC_CTYPE_ALNUM); }
-int mrb_isalpha(int c) { return ctype_test(c, ONIGENC_CTYPE_ALPHA); }
-int mrb_isblank(int c) { return ctype_test(c, ONIGENC_CTYPE_BLANK); }
-int mrb_iscntrl(int c) { return ctype_test(c, ONIGENC_CTYPE_CNTRL); }
-int mrb_isdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_DIGIT); }
-int mrb_isgraph(int c) { return ctype_test(c, ONIGENC_CTYPE_GRAPH); }
-int mrb_islower(int c) { return ctype_test(c, ONIGENC_CTYPE_LOWER); }
-int mrb_isprint(int c) { return ctype_test(c, ONIGENC_CTYPE_PRINT); }
-int mrb_ispunct(int c) { return ctype_test(c, ONIGENC_CTYPE_PUNCT); }
-int mrb_isspace(int c) { return ctype_test(c, ONIGENC_CTYPE_SPACE); }
-int mrb_isupper(int c) { return ctype_test(c, ONIGENC_CTYPE_UPPER); }
-int mrb_isxdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_XDIGIT); }
-
-int
-mrb_tolower(int c)
-{
- return mrb_isascii(c) ? ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) : c;
-}
-
-int
-mrb_toupper(int c)
-{
- return mrb_isascii(c) ? ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) : c;
-}
-#endif //INCLUDE_ENCODING
diff --git a/src/encoding.h b/src/encoding.h
index c6c470644..1312fb947 100644
--- a/src/encoding.h
+++ b/src/encoding.h
@@ -174,11 +174,7 @@ int mrb_enc_codelen(mrb_state *mrb, int code, mrb_encoding *enc);
#endif //INCLUDE_ENCODING
/* code,ptr,encoding -> write buf */
-#ifdef INCLUDE_ENCODING
-#define mrb_enc_mbcput(c,buf,enc) ONIGENC_CODE_TO_MBC(enc,c,(UChar*)(buf))
-#else
-#define mrb_enc_mbcput(c,buf,enc) *(buf) = (char)(c)
-#endif //INCLUDE_ENCODING
+#define mrb_enc_mbcput(c,buf,enc) ((*(buf) = (char)(c)),1)
/* start, ptr, end, encoding -> prev_char */
#define mrb_enc_prev_char(s,p,e,enc) (char *)onigenc_get_prev_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e))
@@ -232,9 +228,6 @@ mrb_value mrb_enc_default_internal(mrb_state *mrb);
void mrb_enc_set_default_external(mrb_state *mrb, mrb_value encoding);
void mrb_enc_set_default_internal(mrb_state *mrb, mrb_value encoding);
mrb_value mrb_locale_charmap(mrb_state *mrb, mrb_value klass);
-#ifdef INCLUDE_ENCODING
-int mrb_memsearch(mrb_state *mrb, const void*,int,const void*,int,mrb_encoding*);
-#endif //INCLUDE_ENCODING
mrb_value mrb_usascii_str_new_cstr(mrb_state *mrb, const char *ptr);
int mrb_str_buf_cat_escaped_char(mrb_state *mrb, mrb_value result, unsigned int c, int unicode_p);
diff --git a/src/gc.c b/src/gc.c
index 999a1a7d2..536dbbbe6 100644
--- a/src/gc.c
+++ b/src/gc.c
@@ -266,7 +266,7 @@ mrb_obj_alloc(mrb_state *mrb, enum mrb_vtype ttype, struct RClass *cls)
mrb->live++;
if (mrb->arena_idx > MRB_ARENA_SIZE) {
/* arena overflow error */
- mrb->arena_idx = MRB_ARENA_SIZE - 2; /* force room in arena */
+ mrb->arena_idx = MRB_ARENA_SIZE - 4; /* force room in arena */
mrb_raise(mrb, mrb->eRuntimeError_class, "arena overflow error");
}
mrb->arena[mrb->arena_idx++] = p;
@@ -360,12 +360,14 @@ gc_mark_children(mrb_state *mrb, struct RBasic *obj)
case MRB_TT_STRING:
{
+#if 0
struct RString *s = (struct RString*)obj;
while (s->flags & MRB_STR_SHARED) {
s = s->aux.shared;
if (!s) break;
}
+#endif
}
break;
diff --git a/src/init.c b/src/init.c
index 5aab8d6ae..17ce24313 100644
--- a/src/init.c
+++ b/src/init.c
@@ -20,7 +20,6 @@ void mrb_init_proc(mrb_state*);
void mrb_init_range(mrb_state*);
void mrb_init_string(mrb_state*);
void mrb_init_regexp(mrb_state*);
-void mrb_init_encoding(mrb_state*);
void mrb_init_exception(mrb_state*);
void mrb_init_time(mrb_state*);
void mrb_init_io(mrb_state*);
@@ -54,7 +53,6 @@ mrb_init_core(mrb_state *mrb)
mrb_init_gc(mrb);
#ifdef INCLUDE_REGEXP
mrb_init_regexp(mrb);
- mrb_init_encoding(mrb);
#endif
mrb_init_exception(mrb);
mrb_init_print(mrb);
diff --git a/src/object.c b/src/object.c
index 1d84909ec..4dc900feb 100644
--- a/src/object.c
+++ b/src/object.c
@@ -11,13 +11,6 @@
#include "mruby/class.h"
#include "mruby/numeric.h"
-#ifdef INCLUDE_REGEXP
- #define mrb_usascii_str_new2 mrb_usascii_str_new_cstr
-#else
- #define mrb_usascii_str_new2 mrb_str_new_cstr
- #define mrb_usascii_str_new mrb_str_new
-#endif
-
#ifndef FALSE
#define FALSE 0
#endif
@@ -106,7 +99,7 @@ mrb_true(mrb_state *mrb, mrb_value obj)
static mrb_value
nil_to_s(mrb_state *mrb, mrb_value obj)
{
- return mrb_usascii_str_new(mrb, 0, 0);
+ return mrb_str_new(mrb, 0, 0);
}
/***********************************************************************
@@ -166,7 +159,7 @@ true_xor(mrb_state *mrb, mrb_value obj)
static mrb_value
true_to_s(mrb_state *mrb, mrb_value obj)
{
- return mrb_usascii_str_new2(mrb, "true");
+ return mrb_str_new_cstr(mrb, "true");
}
/* 15.2.5.3.4 */
@@ -279,7 +272,7 @@ false_or(mrb_state *mrb, mrb_value obj)
static mrb_value
false_to_s(mrb_state *mrb, mrb_value obj)
{
- return mrb_usascii_str_new2(mrb, "false");
+ return mrb_str_new_cstr(mrb, "false");
}
void
diff --git a/src/re.c b/src/re.c
index aea60ec17..86b0469d3 100644
--- a/src/re.c
+++ b/src/re.c
@@ -7,16 +7,11 @@
#include "mruby.h"
#include <string.h>
#include "mruby/string.h"
-#include "mruby/khash.h"
#include "encoding.h"
#include "re.h"
-#include "mruby/numeric.h"
-#include "mruby/range.h"
#include "mruby/array.h"
#include "regint.h"
#include "mruby/class.h"
-#include "mruby/hash.h"
-#include "mruby/variable.h"
#include "error.h"
#ifdef INCLUDE_REGEXP
@@ -54,13 +49,10 @@ unsigned long ruby_scan_oct(const char*, size_t, size_t*);
unsigned long ruby_scan_hex(const char*, size_t, size_t*);
static mrb_value mrb_match_to_a(mrb_state *mrb, mrb_value match);
-static mrb_value mrb_reg_preprocess(mrb_state *mrb, const char *p, const char *end, mrb_encoding *enc,
- mrb_encoding **fixed_enc, onig_errmsg_buffer err);
-static void mrb_reg_expr_str(mrb_state *mrb, mrb_value str, const char *s, long len,
- mrb_encoding *enc, mrb_encoding *resenc);
+static mrb_value mrb_reg_preprocess(mrb_state *mrb, const char *p, const char *end, onig_errmsg_buffer err);
+static void mrb_reg_expr_str(mrb_state *mrb, mrb_value str, const char *s, long len);
static char * option_to_str(char str[4], int options);
-static mrb_value reg_cache;
//static int may_need_recompile;
//static int reg_kcode = DEFAULT_KCODE;
/* ------------------------------------------------------------------------- */
@@ -94,22 +86,20 @@ mrb_reg_s_new_instance(mrb_state *mrb, /*int argc, mrb_value *argv, */mrb_value
re->usecnt = 0;
return mrb_funcall_argv(mrb, mrb_obj_value(re), "initialize", argc, argv);
}
-//#define mrb_enc_mbcput(a,b,c) a
+
mrb_value
mrb_reg_quote(mrb_state *mrb, mrb_value str)
{
- mrb_encoding *enc = mrb_enc_get(mrb, str);
char *s, *send, *t;
mrb_value tmp;
- int c,clen;
- int ascii_only = mrb_enc_str_asciionly_p(mrb, str);
+ int c;
s = RSTRING_PTR(str);
send = s + RSTRING_LEN(str);
while (s < send) {
- c = mrb_enc_ascget(mrb, s, send, &clen, enc);
+ c = *s;
if (c == -1) {
- s += mbclen(s, send, enc);
+ s += send - s;
continue;
}
switch (c) {
@@ -121,38 +111,29 @@ mrb_reg_quote(mrb_state *mrb, mrb_value str)
case '\t': case '\f': case '\n': case '\r':
goto meta_found;
}
- s += clen;
+ s++;
}
//tmp = mrb_str_new3(str);
tmp = mrb_str_new(mrb, RSTRING_PTR(str), RSTRING_LEN(str));
- if (ascii_only) {
- mrb_enc_associate(mrb, tmp, mrb_usascii_encoding(mrb));
- }
return tmp;
meta_found:
tmp = mrb_str_new(mrb, 0, RSTRING_LEN(str)*2);
- if (ascii_only) {
- mrb_enc_associate(mrb, tmp, mrb_usascii_encoding(mrb));
- }
- else {
- mrb_enc_copy(mrb, tmp, str);
- }
t = RSTRING_PTR(tmp);
/* copy upto metacharacter */
memcpy(t, RSTRING_PTR(str), s - RSTRING_PTR(str));
t += s - RSTRING_PTR(str);
while (s < send) {
- c = mrb_enc_ascget(mrb, s, send, &clen, enc);
+ c = *s;
if (c == -1) {
- int n = mbclen(s, send, enc);
+ int n = send - s;
while (n--)
*t++ = *s++;
continue;
}
- s += clen;
+ s++;
switch (c) {
case '[': case ']': case '{': case '}':
case '(': case ')': case '|': case '-':
@@ -263,7 +244,7 @@ mrb_reg_nth_match(mrb_state *mrb, mrb_int nth, mrb_value match)
if (start == -1) return mrb_nil_value();
end = m->rmatch->regs.end[nth];
len = end - start;
- str = mrb_str_substr(mrb, mrb_obj_value(m->str), start, len);
+ str = mrb_str_subseq(mrb, mrb_obj_value(m->str), start, len);
return str;
}
@@ -379,75 +360,13 @@ mrb_reg_options(mrb_state *mrb, mrb_value re)
return options;
}
-static void
-reg_enc_error(mrb_state *mrb, mrb_value re, mrb_value str)
-{
- mrb_raise(mrb, E_ENCODING_ERROR,
- "incompatible encoding regexp match (%s regexp with %s string)",
- mrb_enc_name(mrb_enc_get(mrb, re)),
- mrb_enc_name(mrb_enc_get(mrb, str)));
-}
-
-static int
-mrb_reg_fixed_encoding_p(mrb_value re)
-{
- /*if (FL_TEST(re, KCODE_FIXED))
- return Qtrue;
- else */
- return 0/*Qfalse*/;
-}
-
-static mrb_encoding*
-mrb_reg_prepare_enc(mrb_state *mrb, mrb_value re, mrb_value str, int warn)
-{
- mrb_encoding *enc = 0;
-
- if (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_BROKEN) {
- mrb_raise(mrb, E_ARGUMENT_ERROR,
- "invalid byte sequence in %s",
- mrb_enc_name(mrb_enc_get(mrb, str)));
- }
-
- mrb_reg_check(mrb, re);
- enc = mrb_enc_get(mrb, str);
- if (!mrb_enc_str_asciicompat_p(mrb, str)) {
- if (RREGEXP(re)->ptr->enc != enc) {
- reg_enc_error(mrb, re, str);
- }
- }
- else if (mrb_reg_fixed_encoding_p(re)) {
- if (RREGEXP(re)->ptr->enc != enc &&
- (!mrb_enc_asciicompat(mrb, RREGEXP(re)->ptr->enc) ||
- mrb_enc_str_coderange(mrb, str) != ENC_CODERANGE_7BIT)) {
- reg_enc_error(mrb, re, str);
- }
- enc = RREGEXP(re)->ptr->enc;
- }
- if (warn && (RBASIC(re)->flags & REG_ENCODING_NONE) &&
- enc != mrb_ascii8bit_encoding(mrb) &&
- mrb_enc_str_coderange(mrb, str) != ENC_CODERANGE_7BIT) {
- mrb_warn("regexp match /.../n against to %s string",
- mrb_enc_name(enc));
- }
- return enc;
-}
-
static mrb_value
mrb_reg_desc(mrb_state *mrb, const char *s, long len, mrb_value re)
{
- mrb_encoding *enc = mrb_enc_get(mrb, re);
mrb_value str = mrb_str_new_cstr(mrb, "/");//mrb_str_buf_new2("/");
- mrb_encoding *resenc = mrb_default_internal_encoding(mrb);
- if (resenc == NULL) resenc = mrb_default_external_encoding(mrb);
- if (re.tt && mrb_enc_asciicompat(mrb, enc)) {
- mrb_enc_copy(mrb, str, re);
- }
- else {
- mrb_enc_associate(mrb, str, mrb_usascii_encoding(mrb));
- }
- mrb_reg_expr_str(mrb, str, s, len, enc, resenc);
- mrb_str_buf_cat(mrb, str, "/", strlen("/"));//mrb_str_buf_cat2(str, "/");
+ mrb_reg_expr_str(mrb, str, s, len);
+ mrb_str_buf_cat(mrb, str, "/", strlen("/"));
if (re.tt) {
char opts[4];
mrb_reg_check(mrb, re);
@@ -476,18 +395,14 @@ mrb_reg_prepare_re(mrb_state *mrb, mrb_value re, mrb_value str)
OnigErrorInfo einfo;
const char *pattern;
mrb_value unescaped;
- mrb_encoding *fixed_enc = 0;
- mrb_encoding *enc = mrb_reg_prepare_enc(mrb, re, str, 1);
-
- if (reg->enc == enc) return reg;
+ mrb_encoding *enc = mrb_ascii8bit_encoding(mrb);
mrb_reg_check(mrb, re);
reg = RREGEXP(re)->ptr;
pattern = RREGEXP_SRC_PTR(re);
unescaped = mrb_reg_preprocess(mrb,
- pattern, pattern + RREGEXP(re)->src->len, enc,
- &fixed_enc, err);
+ pattern, pattern + RREGEXP(re)->src->len, err);
if (mrb_nil_p(unescaped)) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "regexp preprocess failed: %s", err);
@@ -675,18 +590,6 @@ ruby_scan_hex(const char *start, size_t len, size_t *retlen)
return retval;
}
-static int
-check_unicode_range(unsigned long code, onig_errmsg_buffer err)
-{
- if ((0xd800 <= code && code <= 0xdfff) || /* Surrogates */
- 0x10ffff < code) {
- //errcpy(err, "invalid Unicode range");
- printf("invalid Unicode range");
- return -1;
- }
- return 0;
-}
-
#define BYTEWIDTH 8
int
@@ -735,59 +638,6 @@ mrb_uv_to_utf8(mrb_state *mrb, char buf[6], unsigned long uv)
return 0;
}
-static int
-append_utf8(mrb_state *mrb, unsigned long uv,
- mrb_value buf, mrb_encoding **encp, onig_errmsg_buffer err)
-{
- if (check_unicode_range(uv, err) != 0)
- return -1;
- if (uv < 0x80) {
- char escbuf[5];
- snprintf(escbuf, sizeof(escbuf), "\\x%02X", (int)uv);
- mrb_str_buf_cat(mrb, buf, escbuf, 4);
- }
- else {
- int len;
- char utf8buf[6];
- len = mrb_uv_to_utf8(mrb, utf8buf, uv);
- mrb_str_buf_cat(mrb, buf, utf8buf, len);
-
- if (*encp == 0)
- *encp = mrb_utf8_encoding(mrb);
- else if (*encp != mrb_utf8_encoding(mrb)) {
- //errcpy(err, "UTF-8 character in non UTF-8 regexp");
- printf("UTF-8 character in non UTF-8 regexp");
- return -1;
- }
- }
- return 0;
-}
-
-static int
-unescape_unicode_bmp(mrb_state *mrb, const char **pp, const char *end,
- mrb_value buf, mrb_encoding **encp, onig_errmsg_buffer err)
-{
- const char *p = *pp;
- size_t len;
- unsigned long code;
-
- if (end < p+4) {
- //errcpy(err, "invalid Unicode escape");
- printf("invalid Unicode escape");
- return -1;
- }
- code = ruby_scan_hex(p, 4, &len);
- if (len != 4) {
- //errcpy(err, "invalid Unicode escape");
- printf("invalid Unicode escape");
- return -1;
- }
- if (append_utf8(mrb, code, buf, encp, err) != 0)
- return -1;
- *pp = p + 4;
- return 0;
-}
-
unsigned long
ruby_scan_oct(const char *start, size_t len, size_t *retlen)
{
@@ -802,400 +652,29 @@ ruby_scan_oct(const char *start, size_t len, size_t *retlen)
return retval;
}
-static int
-read_escaped_byte(const char **pp, const char *end, onig_errmsg_buffer err)
-{
- const char *p = *pp;
- int code;
- int meta_prefix = 0, ctrl_prefix = 0;
- size_t len;
-
- if (p == end || *p++ != '\\') {
- //errcpy(err, "too short escaped multibyte character");
- printf("too short escaped multibyte character");
- return -1;
- }
-
-again:
- if (p == end) {
- //errcpy(err, "too short escape sequence");
- printf("too short escape sequence");
- return -1;
- }
- switch (*p++) {
- case '\\': code = '\\'; break;
- case 'n': code = '\n'; break;
- case 't': code = '\t'; break;
- case 'r': code = '\r'; break;
- case 'f': code = '\f'; break;
- case 'v': code = '\013'; break;
- case 'a': code = '\007'; break;
- case 'e': code = '\033'; break;
-
- /* \OOO */
- case '0': case '1': case '2': case '3':
- case '4': case '5': case '6': case '7':
- p--;
- code = scan_oct(p, end < p+3 ? end-p : 3, &len);
- p += len;
- break;
-
- case 'x': /* \xHH */
- code = scan_hex(p, end < p+2 ? end-p : 2, &len);
- if (len < 1) {
- //errcpy(err, "invalid hex escape");
- printf("invalid hex escape");
- return -1;
- }
- p += len;
- break;
-
- case 'M': /* \M-X, \M-\C-X, \M-\cX */
- if (meta_prefix) {
- //errcpy(err, "duplicate meta escape");
- printf("duplicate meta escape");
- return -1;
- }
- meta_prefix = 1;
- if (p+1 < end && *p++ == '-' && (*p & 0x80) == 0) {
- if (*p == '\\') {
- p++;
- goto again;
- }
- else {
- code = *p++;
- break;
- }
- }
- //errcpy(err, "too short meta escape");
- printf("too short meta escape");
- return -1;
-
- case 'C': /* \C-X, \C-\M-X */
- if (p == end || *p++ != '-') {
- //errcpy(err, "too short control escape");
- printf("too short control escape");
- return -1;
- }
- case 'c': /* \cX, \c\M-X */
- if (ctrl_prefix) {
- //errcpy(err, "duplicate control escape");
- printf("duplicate control escape");
- return -1;
- }
- ctrl_prefix = 1;
- if (p < end && (*p & 0x80) == 0) {
- if (*p == '\\') {
- p++;
- goto again;
- }
- else {
- code = *p++;
- break;
- }
- }
- //errcpy(err, "too short control escape");
- printf("too short control escape");
- return -1;
-
- default:
- //errcpy(err, "unexpected escape sequence");
- printf("unexpected escape sequence");
- return -1;
- }
- if (code < 0 || 0xff < code) {
- //errcpy(err, "invalid escape code");
- printf("invalid escape code");
- return -1;
- }
-
- if (ctrl_prefix)
- code &= 0x1f;
- if (meta_prefix)
- code |= 0x80;
-
- *pp = p;
- return code;
-}
-
-static int
-unescape_escaped_nonascii(mrb_state *mrb, const char **pp, const char *end, mrb_encoding *enc,
- mrb_value buf, mrb_encoding **encp, onig_errmsg_buffer err)
-{
- const char *p = *pp;
- int chmaxlen = mrb_enc_mbmaxlen(enc);
- //char *chbuf = ALLOCA_N(char, chmaxlen);
- char *chbuf = mrb_malloc(mrb, chmaxlen);
- int chlen = 0;
- int byte;
- int l;
-
- memset(chbuf, 0, chmaxlen);
-
- byte = read_escaped_byte(&p, end, err);
- if (byte == -1) {
- return -1;
- }
-
- chbuf[chlen++] = byte;
- while (chlen < chmaxlen &&
- MBCLEN_NEEDMORE_P(mrb_enc_precise_mbclen(chbuf, chbuf+chlen, enc))) {
- byte = read_escaped_byte(&p, end, err);
- if (byte == -1) {
- return -1;
- }
- chbuf[chlen++] = byte;
- }
-
- l = mrb_enc_precise_mbclen(chbuf, chbuf+chlen, enc);
- if (MBCLEN_INVALID_P(l)) {
- //errcpy(err, "invalid multibyte escape");
- printf("invalid multibyte escape");
- return -1;
- }
- if (1 < chlen || (chbuf[0] & 0x80)) {
- mrb_str_buf_cat(mrb, buf, chbuf, chlen);
-
- if (*encp == 0)
- *encp = enc;
- else if (*encp != enc) {
- //errcpy(err, "escaped non ASCII character in UTF-8 regexp");
- printf("escaped non ASCII character in UTF-8 regexp");
- return -1;
- }
- }
- else {
- char escbuf[5];
- snprintf(escbuf, sizeof(escbuf), "\\x%02X", chbuf[0]&0xff);
- mrb_str_buf_cat(mrb, buf, escbuf, 4);
- }
- *pp = p;
- return 0;
-}
-
-static int
-unescape_unicode_list(mrb_state *mrb, const char **pp, const char *end,
- mrb_value buf, mrb_encoding **encp, onig_errmsg_buffer err)
-{
- const char *p = *pp;
- int has_unicode = 0;
- unsigned long code;
- size_t len;
-
- while (p < end && ISSPACE(*p)) p++;
-
- while (1) {
- code = ruby_scan_hex(p, end-p, &len);
- if (len == 0)
- break;
- if (6 < len) { /* max 10FFFF */
- //errcpy(err, "invalid Unicode range");
- printf("invalid Unicode range");
- return -1;
- }
- p += len;
- if (append_utf8(mrb, code, buf, encp, err) != 0)
- return -1;
- has_unicode = 1;
-
- while (p < end && ISSPACE(*p)) p++;
- }
-
- if (has_unicode == 0) {
- //errcpy(err, "invalid Unicode list");
- printf("invalid Unicode list");
- return -1;
- }
-
- *pp = p;
-
- return 0;
-}
-
-static int
-unescape_nonascii(mrb_state *mrb, const char *p, const char *end, mrb_encoding *enc,
- mrb_value buf, mrb_encoding **encp, int *has_property,
- onig_errmsg_buffer err)
-{
- char c;
- char smallbuf[2];
-
- while (p < end) {
- int chlen = mrb_enc_precise_mbclen(p, end, enc);
- if (!MBCLEN_CHARFOUND_P(chlen)) {
- //errcpy(err, "invalid multibyte character");
- printf("invalid multibyte character");
- return -1;
- }
- chlen = MBCLEN_CHARFOUND_LEN(chlen);
- if (1 < chlen || (*p & 0x80)) {
- mrb_str_buf_cat(mrb, buf, p, chlen);
- p += chlen;
- if (*encp == 0)
- *encp = enc;
- else if (*encp != enc) {
- //errcpy(err, "non ASCII character in UTF-8 regexp");
- printf("non ASCII character in UTF-8 regexp");
- return -1;
- }
- continue;
- }
-
- switch (c = *p++) {
- case '\\':
- if (p == end) {
- //errcpy(err, "too short escape sequence");
- printf("too short escape sequence");
- return -1;
- }
- switch (c = *p++) {
- case '1': case '2': case '3':
- case '4': case '5': case '6': case '7': /* \O, \OO, \OOO or backref */
- {
- size_t octlen;
- if (ruby_scan_oct(p-1, end-(p-1), &octlen) <= 0177) {
- /* backref or 7bit octal.
- no need to unescape anyway.
- re-escaping may break backref */
- goto escape_asis;
- }
- }
- /* xxx: How about more than 199 subexpressions? */
-
- case '0': /* \0, \0O, \0OO */
-
- case 'x': /* \xHH */
- case 'c': /* \cX, \c\M-X */
- case 'C': /* \C-X, \C-\M-X */
- case 'M': /* \M-X, \M-\C-X, \M-\cX */
- p = p-2;
- if (unescape_escaped_nonascii(mrb, &p, end, enc, buf, encp, err) != 0)
- return -1;
- break;
-
- case 'u':
- if (p == end) {
- //errcpy(err, "too short escape sequence");
- printf("too short escape sequence");
- return -1;
- }
- if (*p == '{') {
- /* \u{H HH HHH HHHH HHHHH HHHHHH ...} */
- p++;
- if (unescape_unicode_list(mrb, &p, end, buf, encp, err) != 0)
- return -1;
- if (p == end || *p++ != '}') {
- //errcpy(err, "invalid Unicode list");
- printf("invalid Unicode list");
- return -1;
- }
- break;
- }
- else {
- /* \uHHHH */
- if (unescape_unicode_bmp(mrb, &p, end, buf, encp, err) != 0)
- return -1;
- break;
- }
-
- case 'p': /* \p{Hiragana} */
- case 'P':
- if (!*encp) {
- *has_property = 1;
- }
- goto escape_asis;
-
- default: /* \n, \\, \d, \9, etc. */
-escape_asis:
- smallbuf[0] = '\\';
- smallbuf[1] = c;
- mrb_str_buf_cat(mrb, buf, smallbuf, 2);
- break;
- }
- break;
-
- default:
- mrb_str_buf_cat(mrb, buf, &c, 1);
- break;
- }
- }
-
- return 0;
-}
-
-
static mrb_value
-mrb_reg_preprocess(mrb_state *mrb, const char *p, const char *end, mrb_encoding *enc,
- mrb_encoding **fixed_enc, onig_errmsg_buffer err)
+mrb_reg_preprocess(mrb_state *mrb, const char *p, const char *end, onig_errmsg_buffer err)
{
- mrb_value buf;
- int has_property = 0;
-
- //buf = mrb_str_buf_new(0);
- buf = mrb_str_buf_new(mrb, 0);
-
- if (mrb_enc_asciicompat(mrb, enc))
- *fixed_enc = 0;
- else {
- *fixed_enc = enc;
- mrb_enc_associate(mrb, buf, enc);
- }
-
- if (unescape_nonascii(mrb, p, end, enc, buf, fixed_enc, &has_property, err) != 0)
- return mrb_nil_value();
-
- if (has_property && !*fixed_enc) {
- *fixed_enc = enc;
- }
-
- if (*fixed_enc) {
- mrb_enc_associate(mrb, buf, *fixed_enc);
- }
-
- return buf;
+ return mrb_nil_value();
}
static int
-mrb_reg_initialize(mrb_state *mrb, mrb_value obj, const char *s, long len, mrb_encoding *enc,
+mrb_reg_initialize(mrb_state *mrb, mrb_value obj, const char *s, long len,
int options, onig_errmsg_buffer err,
const char *sourcefile, int sourceline)
{
struct RRegexp *re = RREGEXP(obj);
mrb_value unescaped;
- mrb_encoding *fixed_enc = 0;
- mrb_encoding *a_enc = mrb_ascii8bit_encoding(mrb);
+ mrb_encoding *enc = mrb_ascii8bit_encoding(mrb);
if (re->ptr)
mrb_raise(mrb, E_TYPE_ERROR, "already initialized regexp");
re->ptr = 0;
- if (mrb_enc_dummy_p(enc)) {
- //errcpy(err, "can't make regexp with dummy encoding");
- printf("can't make regexp with dummy encoding");
- return -1;
- }
-
- unescaped = mrb_reg_preprocess(mrb, s, s+len, enc, &fixed_enc, err);
+ unescaped = mrb_reg_preprocess(mrb, s, s+len, err);
if (mrb_nil_p(unescaped))
return -1;
- if (fixed_enc) {
- if ((fixed_enc != enc && (options & ARG_ENCODING_FIXED)) ||
- (fixed_enc != a_enc && (options & ARG_ENCODING_NONE))) {
- //errcpy(err, "incompatible character encoding");
- printf("incompatible character encoding");
- return -1;
- }
- if (fixed_enc != a_enc) {
- options |= ARG_ENCODING_FIXED;
- enc = fixed_enc;
- }
- }
- else if (!(options & ARG_ENCODING_FIXED)) {
- enc = mrb_usascii_encoding(mrb);
- }
-
- mrb_enc_associate(mrb, mrb_obj_value(re), enc);
- if ((options & ARG_ENCODING_FIXED) || fixed_enc) {
+ if ((options & ARG_ENCODING_FIXED)) {
//re->basic.flags |= KCODE_FIXED;
re->flags|= KCODE_FIXED;
}
@@ -1207,7 +686,7 @@ mrb_reg_initialize(mrb_state *mrb, mrb_value obj, const char *s, long len, mrb_e
options & ARG_REG_OPTION_MASK, err,
sourcefile, sourceline);
if (!re->ptr) return -1;
- re->src = mrb_str_ptr(mrb_enc_str_new(mrb, s, len, enc));
+ re->src = mrb_str_ptr(mrb_str_new(mrb, s, len));
return 0;
}
@@ -1217,8 +696,8 @@ mrb_reg_initialize_str(mrb_state *mrb, mrb_value obj, mrb_value str, int options
const char *sourcefile, int sourceline)
{
int ret;
- mrb_encoding *enc = mrb_enc_get(mrb, str);
+#if 0
if (options & ARG_ENCODING_NONE) {
mrb_encoding *ascii8bit = mrb_ascii8bit_encoding(mrb);
if (enc != ascii8bit) {
@@ -1230,8 +709,9 @@ mrb_reg_initialize_str(mrb_state *mrb, mrb_value obj, mrb_value str, int options
enc = ascii8bit;
}
}
+#endif
- ret = mrb_reg_initialize(mrb, obj, RSTRING_PTR(str), RSTRING_LEN(str), enc,
+ ret = mrb_reg_initialize(mrb, obj, RSTRING_PTR(str), RSTRING_LEN(str),
options, err, sourcefile, sourceline);
return ret;
@@ -1267,7 +747,6 @@ mrb_reg_initialize_m(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value se
onig_errmsg_buffer err = "";
int flags = 0;
mrb_value str;
- mrb_encoding *enc;
const char *ptr;
long len;
@@ -1286,10 +765,7 @@ mrb_reg_initialize_m(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value se
flags = mrb_reg_options(mrb, re);
ptr = RREGEXP_SRC_PTR(re);
len = RREGEXP_SRC_LEN(re);
- enc = mrb_enc_get(mrb, re);
- if (mrb_reg_initialize(mrb, self, ptr, len, enc, flags, err, NULL, 0)) {
- /*str = mrb_enc_str_new(mrb, ptr, len, enc);
- mrb_reg_raise_str(str, flags, err);*/
+ if (mrb_reg_initialize(mrb, self, ptr, len, flags, err, NULL, 0)) {
printf("mrb_reg_raise_str(str, flags, err);");
}
}
@@ -1298,12 +774,10 @@ mrb_reg_initialize_m(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value se
if (mrb_type(argv[1]) == MRB_TT_FIXNUM) flags = mrb_fixnum(argv[1]);
else if (mrb_test(argv[1])) flags = ONIG_OPTION_IGNORECASE;
}
- enc = 0;
if (argc == 3 && !mrb_nil_p(argv[2])) {
//char *kcode = StringValuePtr(argv[2]);
char *kcode = mrb_string_value_ptr(mrb, argv[2]);
if (kcode[0] == 'n' || kcode[0] == 'N') {
- enc = mrb_ascii8bit_encoding(mrb);
flags |= ARG_ENCODING_NONE;
}
else {
@@ -1314,9 +788,7 @@ mrb_reg_initialize_m(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value se
str = argv[0];
//ptr = StringValuePtr(str);
ptr = mrb_string_value_ptr(mrb, str);
- if (enc
- ? mrb_reg_initialize(mrb, self, ptr, RSTRING_LEN(str), enc, flags, err, NULL, 0)
- : mrb_reg_initialize_str(mrb, self, str, flags, err, NULL, 0)) {
+ if (mrb_reg_initialize_str(mrb, self, str, flags, err, NULL, 0)) {
//mrb_reg_raise_str(str, flags, err);
}
}
@@ -1346,7 +818,7 @@ mrb_reg_init_copy(mrb_state *mrb, mrb_value re/*, mrb_value copy*/)
mrb_reg_check(mrb, copy);
s = RREGEXP_SRC_PTR(copy);
len = RREGEXP_SRC_LEN(copy);
- if (mrb_reg_initialize(mrb, re, s, len, mrb_enc_get(mrb, copy), mrb_reg_options(mrb, copy),
+ if (mrb_reg_initialize(mrb, re, s, len, mrb_reg_options(mrb, copy),
err, 0/*NULL*/, 0) != 0) {
mrb_reg_raise(mrb, s, len, err, re);
}
@@ -1628,7 +1100,7 @@ mrb_reg_source(mrb_state *mrb, mrb_value re)
mrb_value str;
mrb_reg_check(mrb, re);
- str = mrb_enc_str_new(mrb, RREGEXP_SRC_PTR(re),RREGEXP_SRC_LEN(re), mrb_enc_get(mrb, re));
+ str = mrb_str_new(mrb, RREGEXP_SRC_PTR(re),RREGEXP_SRC_LEN(re));
return str;
}
@@ -1757,23 +1229,12 @@ typedef struct {
long char_pos;
} pair_t;
-static int
-pair_byte_cmp(const void *pair1, const void *pair2)
-{
- long diff = ((pair_t*)pair1)->byte_pos - ((pair_t*)pair2)->byte_pos;
- return diff ? diff > 0 ? 1 : -1 : 0;
-}
-
static void
update_char_offset(mrb_state *mrb, mrb_value match)
{
struct rmatch *rm = RMATCH(match)->rmatch;
struct re_registers *regs;
- int i, num_regs, num_pos;
- long c;
- char *s, *p, *q;
- mrb_encoding *enc;
- pair_t *pairs;
+ int i, num_regs;
if (rm->char_offset_updated)
return;
@@ -1787,55 +1248,12 @@ update_char_offset(mrb_state *mrb, mrb_value match)
rm->char_offset_num_allocated = num_regs;
}
- enc = mrb_enc_get(mrb, mrb_obj_value(RMATCH(match)->str));
- if (mrb_enc_mbmaxlen(enc) == 1) {
- for (i = 0; i < num_regs; i++) {
- rm->char_offset[i].beg = BEG(i);
- rm->char_offset[i].end = END(i);
- }
- rm->char_offset_updated = 1;
- return;
- }
-
- //pairs = ALLOCA_N(pair_t, num_regs*2);
- pairs = mrb_malloc(mrb, sizeof(pair_t)*num_regs*2);
-
- num_pos = 0;
for (i = 0; i < num_regs; i++) {
- if (BEG(i) < 0)
- continue;
- pairs[num_pos++].byte_pos = BEG(i);
- pairs[num_pos++].byte_pos = END(i);
- }
- qsort(pairs, num_pos, sizeof(pair_t), pair_byte_cmp);
-
- s = p = RMATCH(match)->str->buf;
- c = 0;
- for (i = 0; i < num_pos; i++) {
- q = s + pairs[i].byte_pos;
- c += mrb_enc_strlen(p, q, enc);
- pairs[i].char_pos = c;
- p = q;
- }
-
- for (i = 0; i < num_regs; i++) {
- pair_t key, *found;
- if (BEG(i) < 0) {
- rm->char_offset[i].beg = -1;
- rm->char_offset[i].end = -1;
- continue;
- }
-
- key.byte_pos = BEG(i);
- found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp);
- rm->char_offset[i].beg = found->char_pos;
-
- key.byte_pos = END(i);
- found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp);
- rm->char_offset[i].end = found->char_pos;
+ rm->char_offset[i].beg = BEG(i);
+ rm->char_offset[i].end = END(i);
}
-
rm->char_offset_updated = 1;
+ return;
}
/* 15.2.16.3.2 */
@@ -2235,49 +1653,36 @@ option_to_str(char str[4], int options)
#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
static void
-mrb_reg_expr_str(mrb_state *mrb, mrb_value str, const char *s, long len,
- mrb_encoding *enc, mrb_encoding *resenc)
+mrb_reg_expr_str(mrb_state *mrb, mrb_value str, const char *s, long len)
{
const char *p, *pend;
int need_escape = 0;
- int c, clen;
+ int c;
p = s; pend = p + len;
- if (mrb_enc_asciicompat(mrb, enc)) {
- while (p < pend) {
- c = mrb_enc_ascget(mrb, p, pend, &clen, enc);
- if (c == -1) {
- if (enc == resenc) {
- p += mbclen(p, pend, enc);
- }
- else {
- need_escape = 1;
- break;
- }
- }
- else if (c != '/' && mrb_enc_isprint(c, enc)) {
- p += clen;
- }
- else {
- need_escape = 1;
- break;
- }
+ while (p < pend) {
+ c = *p;
+ if (c == -1) {
+ p += pend - p;
+ }
+ else if (c != '/' && ISPRINT(c)) {
+ p++;
+ }
+ else {
+ need_escape = 1;
+ break;
}
- }
- else {
- need_escape = 1;
}
if (!need_escape) {
mrb_str_buf_cat(mrb, str, s, len);
}
else {
- int unicode_p = mrb_enc_unicode_p(enc);
p = s;
while (p<pend) {
- c = mrb_enc_ascget(mrb, p, pend, &clen, enc);
- if (c == '\\' && p+clen < pend) {
- int n = clen + mbclen(p+clen, pend, enc);
+ c = *p;
+ if (c == '\\' && p+1 < pend) {
+ int n = 1 + pend - (p+1);
mrb_str_buf_cat(mrb, str, p, n);
p += n;
continue;
@@ -2285,38 +1690,21 @@ mrb_reg_expr_str(mrb_state *mrb, mrb_value str, const char *s, long len,
else if (c == '/') {
char c = '\\';
mrb_str_buf_cat(mrb, str, &c, 1);
- mrb_str_buf_cat(mrb, str, p, clen);
- }
- else if (c == -1) {
- clen = mrb_enc_precise_mbclen(p, pend, enc);
- if (!MBCLEN_CHARFOUND_P(clen)) {
- c = (unsigned char)*p;
- clen = 1;
- goto hex;
- }
- if (resenc) {
- unsigned int c = mrb_enc_mbc_to_codepoint(p, pend, enc);
- mrb_str_buf_cat_escaped_char(mrb, str, c, unicode_p);
- }
- else {
- clen = MBCLEN_CHARFOUND_LEN(clen);
- mrb_str_buf_cat(mrb, str, p, clen);
- }
+ mrb_str_buf_cat(mrb, str, p, 1);
}
- else if (mrb_enc_isprint(c, enc)) {
- mrb_str_buf_cat(mrb, str, p, clen);
+ else if (ISPRINT(c)) {
+ mrb_str_buf_cat(mrb, str, p, 1);
}
- else if (!mrb_enc_isspace(c, enc)) {
+ else if (!ISSPACE(c)) {
char b[8];
- hex:
snprintf(b, sizeof(b), "\\x%02X", c);
mrb_str_buf_cat(mrb, str, b, 4);
}
else {
- mrb_str_buf_cat(mrb, str, p, clen);
+ mrb_str_buf_cat(mrb, str, p, 1);
}
- p += clen;
+ p++;
}
}
}
@@ -2355,7 +1743,6 @@ mrb_reg_to_s(mrb_state *mrb, mrb_value re)
mrb_reg_check(mrb, re);
memset(optbuf, 0, 5);
- mrb_enc_copy(mrb, str, re);
options = RREGEXP(re)->ptr->options;
ptr = (UChar*)RREGEXP_SRC_PTR(re);
len = RREGEXP_SRC_LEN(re);
@@ -2399,7 +1786,7 @@ again:
++ptr;
len -= 2;
- err = onig_new(&rp, ptr, ptr + len, ONIG_OPTION_DEFAULT,
+ err = onig_new(&rp, ptr, ptr + len, ONIG_OPTION_DEFAULT,
enc, OnigDefaultSyntax, NULL);
onig_free(rp);
}
@@ -2419,9 +1806,8 @@ again:
}
mrb_str_buf_cat(mrb, str, ":", strlen(":"));
- mrb_reg_expr_str(mrb, str, (char*)ptr, len, enc, NULL);
+ mrb_reg_expr_str(mrb, str, (char*)ptr, len);
mrb_str_buf_cat(mrb, str, ")", strlen(")"));
- mrb_enc_copy(mrb, str, re);
return str;
}
@@ -2663,8 +2049,6 @@ mrb_init_regexp(mrb_state *mrb)
mrb_define_const(mrb, s, "MULTILINE", mrb_fixnum_value(ONIG_OPTION_MULTILINE));
mrb_define_const(mrb, s, "FIXEDENCODING", mrb_fixnum_value(ARG_ENCODING_FIXED));
- //mrb_global_variable(&reg_cache);
-
s = mrb_define_class(mrb, "MatchData", mrb->object_class);
//mrb_undef_method(CLASS_OF(rb_cMatch), "new");
@@ -2705,27 +2089,23 @@ mrb_reg_regsub(mrb_state *mrb, mrb_value str, mrb_value src, struct re_registers
{
mrb_value val;
char *p, *s, *e;
- int no, clen;
- mrb_encoding *str_enc = mrb_enc_get(mrb, str);
- mrb_encoding *src_enc = mrb_enc_get(mrb, src);
- int acompat = mrb_enc_asciicompat(mrb, str_enc);
-#define ASCGET(mrb,s,e,cl) (acompat ? (*cl=1,ISASCII(s[0])?s[0]:-1) : mrb_enc_ascget(mrb, s, e, cl, str_enc))
struct RString *ps = mrb_str_ptr(str);
+ int no;
val.tt = 0;
p = s = ps->buf;
e = s + ps->len;
while (s < e) {
- int c = ASCGET(mrb, s, e, &clen);
+ int c = *s;
char *ss;
if (c == -1) {
- s += mbclen(s, e, str_enc);
+ s += e - s;
continue;
}
ss = s;
- s += clen;
+ s++;
if (c != '\\' || s == e) continue;
@@ -2733,16 +2113,16 @@ mrb_reg_regsub(mrb_state *mrb, mrb_value str, mrb_value src, struct re_registers
if (!val.tt) {
val = mrb_str_buf_new(mrb, ss-p);
}
- mrb_enc_str_buf_cat(mrb, val, p, ss-p, str_enc);
+ mrb_str_buf_cat(mrb, val, p, ss-p);
- c = ASCGET(mrb, s, e, &clen);
+ c = *s;
if (c == -1) {
- s += mbclen(s, e, str_enc);
- mrb_enc_str_buf_cat(mrb, val, ss, s-ss, str_enc);
+ s += e - s;
+ mrb_str_buf_cat(mrb, val, ss, s-ss);
p = s;
continue;
}
- s += clen;
+ s++;
p = s;
switch (c) {
@@ -2757,18 +2137,18 @@ mrb_reg_regsub(mrb_state *mrb, mrb_value str, mrb_value src, struct re_registers
break;
case 'k':
- if (s < e && ASCGET(mrb, s, e, &clen) == '<') {
+ if (s < e && *s == '<') {
char *name, *name_end;
- name_end = name = s + clen;
+ name_end = name = s + 1;
while (name_end < e) {
- c = ASCGET(mrb, name_end, e, &clen);
+ c = *name_end;
if (c == '>') break;
- name_end += c == -1 ? mbclen(name_end, e, str_enc) : clen;
+ name_end += c == -1 ? e - name_end : 1;
}
if (name_end < e) {
no = name_to_backref_number(mrb, regs, RREGEXP(regexp), name, name_end);
- p = s = name_end + clen;
+ p = s = name_end + 1;
break;
}
else {
@@ -2776,7 +2156,7 @@ mrb_reg_regsub(mrb_state *mrb, mrb_value str, mrb_value src, struct re_registers
}
}
- mrb_enc_str_buf_cat(mrb, val, ss, s-ss, str_enc);
+ mrb_str_buf_cat(mrb, val, ss, s-ss);
continue;
case '0':
@@ -2785,11 +2165,11 @@ mrb_reg_regsub(mrb_state *mrb, mrb_value str, mrb_value src, struct re_registers
break;
case '`':
- mrb_enc_str_buf_cat(mrb, val, RSTRING_PTR(src), BEG(0), src_enc);
+ mrb_str_buf_cat(mrb, val, RSTRING_PTR(src), BEG(0));
continue;
case '\'':
- mrb_enc_str_buf_cat(mrb, val, RSTRING_PTR(src)+END(0), RSTRING_LEN(src)-END(0), src_enc);
+ mrb_str_buf_cat(mrb, val, RSTRING_PTR(src)+END(0), RSTRING_LEN(src)-END(0));
continue;
case '+':
@@ -2799,31 +2179,29 @@ mrb_reg_regsub(mrb_state *mrb, mrb_value str, mrb_value src, struct re_registers
break;
case '\\':
- mrb_enc_str_buf_cat(mrb, val, s-clen, clen, str_enc);
+ mrb_str_buf_cat(mrb, val, s-1, 1);
continue;
default:
- mrb_enc_str_buf_cat(mrb, val, ss, s-ss, str_enc);
+ mrb_str_buf_cat(mrb, val, ss, s-ss);
continue;
}
if (no >= 0) {
if (no >= regs->num_regs) continue;
if (BEG(no) == -1) continue;
- mrb_enc_str_buf_cat(mrb, val, RSTRING_PTR(src)+BEG(no), END(no)-BEG(no), src_enc);
+ mrb_str_buf_cat(mrb, val, RSTRING_PTR(src)+BEG(no), END(no)-BEG(no));
}
} /* while (s < e) { */
if (!val.tt) return str;
if (p < e) {
- mrb_enc_str_buf_cat(mrb, val, p, e-p, str_enc);
+ mrb_str_buf_cat(mrb, val, p, e-p);
}
return val;
}
-//#define NEW_NODE(t,a0,a1,a2) mrb_node_newnode((t),(int)(a0),(int)(a1),(int)(a2))
-//#define NEW_IF(c,t,e) NEW_NODE(NODE_IF,c,t,e)
static inline NODE *
lfp_svar_place(mrb_state *mrb, /*mrb_thread_t *th,*/ mrb_value *lfp)
{
@@ -3038,9 +2416,6 @@ mrb_memsearch(mrb_state *mrb, const void *x0, int m, const void *y0, int n, mrb_
}
return -1;
}
- else if (enc == mrb_utf8_encoding(mrb)) {
- return mrb_memsearch_qs_utf8(x0, m, y0, n);
- }
else {
return mrb_memsearch_qs(x0, m, y0, n);
}
@@ -3077,12 +2452,7 @@ mrb_reg_new_str(mrb_state *mrb, mrb_value s, int options)
mrb_value
mrb_reg_regcomp(mrb_state *mrb, mrb_value str)
{
- mrb_value save_str = str;
- if (reg_cache.tt && RREGEXP_SRC_LEN(reg_cache) == RSTRING_LEN(str)
- && ENCODING_GET(mrb, reg_cache) == ENCODING_GET(mrb, str)
- && memcmp(RREGEXP_SRC_PTR(reg_cache), RSTRING_PTR(str), RSTRING_LEN(str)) == 0)
- return reg_cache;
- return reg_cache = mrb_reg_new_str(mrb, save_str, 0);
+ return mrb_reg_new_str(mrb, str, 0);
}
int
@@ -3143,7 +2513,7 @@ is_special_global_name(const char *m, const char *e, mrb_encoding *enc)
++m;
if (m < e && is_identchar(m, e, enc)) {
if (!ISASCII(*m)) mb = 1;
- m += mrb_enc_mbclen(m, e, enc);
+ m += e - m;
}
break;
default:
@@ -3228,7 +2598,7 @@ mrb_enc_symname2_p(const char *name, long len, mrb_encoding *enc)
id:
if (m >= e || (*m != '_' && !mrb_enc_isalpha(*m, enc) && ISASCII(*m)))
return FALSE;
- while (m < e && is_identchar(m, e, enc)) m += mrb_enc_mbclen(m, e, enc);
+ while (m < e && is_identchar(m, e, enc)) m += e - m;
if (localid) {
switch (*m) {
case '!': case '?': case '=': ++m;
diff --git a/src/sprintf.c b/src/sprintf.c
index dc9b83dec..b7c5e02fd 100644
--- a/src/sprintf.c
+++ b/src/sprintf.c
@@ -668,44 +668,37 @@ retry:
mrb_value tmp;
unsigned int c;
int n;
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc = mrb_enc_get(mrb, fmt);
-#endif //INCLUDE_ENCODING
tmp = mrb_check_string_type(mrb, val);
if (!mrb_nil_p(tmp)) {
if (RSTRING_LEN(tmp) != 1 ) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "%%c requires a character");
}
-#ifdef INCLUDE_ENCODING
- c = mrb_enc_codepoint_len(mrb, RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc);
-#else
c = RSTRING_PTR(tmp)[0];
n = 1;
-#endif //INCLUDE_ENCODING
}
else {
c = mrb_fixnum(val);
- n = mrb_enc_codelen(mrb, c, enc);
+ n = 1;
}
if (n <= 0) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid character");
}
if (!(flags & FWIDTH)) {
CHECK(n);
- mrb_enc_mbcput(c, &buf[blen], enc);
+ buf[blen] = c;
blen += n;
}
else if ((flags & FMINUS)) {
CHECK(n);
- mrb_enc_mbcput(c, &buf[blen], enc);
+ buf[blen] = c;
blen += n;
FILL(' ', width-1);
}
else {
FILL(' ', width-1);
CHECK(n);
- mrb_enc_mbcput(c, &buf[blen], enc);
+ buf[blen] = c;
blen += n;
}
}
@@ -717,25 +710,18 @@ format_s:
{
mrb_value arg = GETARG();
long len, slen;
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc = mrb_enc_get(mrb, fmt);
-#endif //INCLUDE_ENCODING
if (*p == 'p') arg = mrb_inspect(mrb, arg);
str = mrb_obj_as_string(mrb, arg);
len = RSTRING_LEN(str);
- mrb_str_set_len(mrb, result, blen);
+ RSTRING_LEN(result) = blen;
if (flags&(FPREC|FWIDTH)) {
slen = RSTRING_LEN(str);
if (slen < 0) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid mbstring sequence");
}
if ((flags&FPREC) && (prec < slen)) {
-#ifdef INCLUDE_ENCODING
- char *p = mrb_enc_nth(mrb, RSTRING_PTR(str), RSTRING_END(str),prec, enc);
-#else
char *p = RSTRING_PTR(str) + prec;
-#endif //INCLUDE_ENCODING
slen = prec;
len = p - RSTRING_PTR(str);
}
@@ -757,12 +743,10 @@ format_s:
buf[blen++] = ' ';
}
}
- mrb_enc_associate(mrb, result, enc);
break;
}
}
PUSH(RSTRING_PTR(str), len);
- mrb_enc_associate(mrb, result, enc);
}
break;
@@ -915,15 +899,8 @@ bin_retry:
if (*p == 'X') {
char *pp = s;
int c;
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc = mrb_enc_get(mrb, fmt);
-#endif //INCLUDE_ENCODING
while ((c = (int)(unsigned char)*pp) != 0) {
-#ifdef INCLUDE_ENCODING
- *pp = mrb_enc_toupper(c, enc);
-#else
*pp = toupper(c);
-#endif //INCLUDE_ENCODING
pp++;
}
}
diff --git a/src/string.c b/src/string.c
index 22e3dad60..257286144 100644
--- a/src/string.c
+++ b/src/string.c
@@ -9,13 +9,12 @@
#include <stdarg.h>
#include <string.h>
#include "mruby/string.h"
+#include <ctype.h>
#include "mruby/numeric.h"
#include "mruby/range.h"
-#include <ctype.h>
#include "mruby/array.h"
#include "mruby/class.h"
#include "mruby/variable.h"
-#include "mruby/hash.h"
#include <stdio.h>
#include "re.h"
#ifdef INCLUDE_REGEXP
@@ -23,8 +22,6 @@
#include "st.h"
#endif //INCLUDE_REGEXP
-#define mrb_usascii_str_new2 mrb_usascii_str_new_cstr
-
#ifndef FALSE
#define FALSE 0
#endif
@@ -38,33 +35,11 @@ const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
#ifdef INCLUDE_REGEXP
static mrb_value get_pat(mrb_state *mrb, mrb_value pat, mrb_int quote);
#endif //INCLUDE_REGEXP
-#ifdef INCLUDE_ENCODING
-static void mrb_enc_cr_str_copy_for_substr(mrb_state *mrb, mrb_value dest, mrb_value src);
-#else
-#define mrb_enc_cr_str_copy_for_substr(mrb, dest, src)
-#endif //INCLUDE_ENCODING
static mrb_value str_replace(mrb_state *mrb, mrb_value str, mrb_value str2);
-#ifdef INCLUDE_ENCODING
-static long str_strlen(mrb_state *mrb, mrb_value str, mrb_encoding *enc);
-#endif //INCLUDE_ENCODING
-#ifdef INCLUDE_ENCODING
-#define is_ascii_string(mrb, str) (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_7BIT)
-#define is_broken_string(mrb, str) (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_BROKEN)
-#define STR_ENC_GET(mrb, str) mrb_enc_from_index(mrb, ENCODING_GET(mrb, str))
-#endif //INCLUDE_ENCODING
-
-void
-mrb_str_set_len(mrb_state *mrb, mrb_value str, long len)
-{
- mrb_str_modify(mrb, str);
- RSTRING_LEN(str) = len;
- RSTRING_PTR(str)[len] = '\0';
-}
#define RESIZE_CAPA(str,capacity) do {\
RSTRING(str)->buf = mrb_realloc(mrb, RSTRING(str)->buf, (capacity)+1);\
- if (!MRB_STR_NOCAPA_P(str))\
- RSTRING_CAPA(str) = capacity;\
+ RSTRING_CAPA(str) = capacity;\
} while (0)
#define STR_SET_LEN(str, n) do { \
@@ -75,86 +50,18 @@ mrb_str_set_len(mrb_state *mrb, mrb_value str, long len)
RSTRING(str)->len--;\
} while (0)
-#ifdef INCLUDE_ENCODING
-static mrb_value mrb_enc_cr_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len,
- int ptr_encindex, int ptr_cr, int *ptr_cr_ret);
-#endif //INCLUDE_ENCODING
-
-#ifdef INCLUDE_ENCODING
-mrb_value
-mrb_usascii_str_new_cstr(mrb_state *mrb, const char *ptr)
-{
- mrb_value str = mrb_str_new_cstr(mrb, ptr);//mrb_str_new2(ptr);
- ENCODING_CODERANGE_SET(mrb, str, mrb_usascii_encindex(), ENC_CODERANGE_7BIT);
- return str;
-}
-
-mrb_value
-mrb_external_str_new_with_enc(mrb_state *mrb, const char *ptr, long len, mrb_encoding *eenc)
-{
- mrb_value str;
-
- str = mrb_str_new(mrb, ptr, len);
- if (eenc == mrb_usascii_encoding(mrb) &&
- mrb_enc_str_coderange(mrb, str) != ENC_CODERANGE_7BIT) {
- mrb_enc_associate(mrb, str, mrb_ascii8bit_encoding(mrb));
- return str;
- }
- mrb_enc_associate(mrb, str, eenc);
- return mrb_str_conv_enc(mrb, str, eenc, mrb_default_internal_encoding(mrb));
-}
-
-mrb_value
-mrb_locale_str_new(mrb_state *mrb, const char *ptr, long len)
-{
- return mrb_external_str_new_with_enc(mrb, ptr, len, mrb_locale_encoding(mrb));
-}
-
-mrb_value
-mrb_str_buf_cat_ascii(mrb_state *mrb, mrb_value str, const char *ptr)
-{
- /* ptr must reference NUL terminated ASCII string. */
- int encindex = ENCODING_GET(mrb, str);
- mrb_encoding *enc = mrb_enc_from_index(mrb, encindex);
- if (mrb_enc_asciicompat(mrb, enc)) {
- return mrb_enc_cr_str_buf_cat(mrb, str, ptr, strlen(ptr),
- encindex, ENC_CODERANGE_7BIT, 0);
- }
- else {
- //char *buf = ALLOCA_N(char, mrb_enc_mbmaxlen(enc));
- char *buf = mrb_malloc(mrb, mrb_enc_mbmaxlen(enc));
- while (*ptr) {
- unsigned int c = (unsigned char)*ptr;
- int len = mrb_enc_codelen(mrb, c, enc);
- mrb_enc_mbcput(c, buf, enc);
- mrb_enc_cr_str_buf_cat(mrb, str, buf, len,
- encindex, ENC_CODERANGE_VALID, 0);
- ptr++;
- }
- return str;
- }
-}
-
-mrb_value
-mrb_filesystem_str_new_cstr(mrb_state *mrb, const char *ptr)
-{
- return mrb_external_str_new_with_enc(mrb, ptr, strlen(ptr), mrb_filesystem_encoding(mrb));
-}
-#endif //INCLUDE_ENCODING
-
mrb_value
mrb_str_resize(mrb_state *mrb, mrb_value str, size_t len)
{
size_t slen;
- mrb_str_modify(mrb, str);
slen = RSTRING_LEN(str);
if (len != slen) {
if (slen < len || slen -len > 1024) {
RSTRING_PTR(str) = mrb_realloc(mrb, RSTRING_PTR(str), len+1);
}
if (!MRB_STR_NOCAPA_P(str)) {
- RSTRING(str)->aux.capa = len;
+ RSTRING_CAPA(str) = len;
}
RSTRING(str)->len = len;
RSTRING(str)->buf[len] = '\0'; /* sentinel */
@@ -162,16 +69,6 @@ mrb_str_resize(mrb_state *mrb, mrb_value str, size_t len)
return str;
}
-#ifdef INCLUDE_ENCODING
-mrb_value
-mrb_usascii_str_new(mrb_state *mrb, const char *ptr, long len)
-{
- mrb_value str = mrb_str_new(mrb, ptr, len);
- ENCODING_CODERANGE_SET(mrb, str, mrb_usascii_encindex(), ENC_CODERANGE_7BIT);
- return str;
-}
-#endif //INCLUDE_ENCODING
-
static inline void
str_mod_check(mrb_state *mrb, mrb_value str, char *p, mrb_int len)
{
@@ -182,36 +79,6 @@ str_mod_check(mrb_state *mrb, mrb_value str, char *p, mrb_int len)
}
}
-#ifdef INCLUDE_ENCODING
-static inline int
-single_byte_optimizable(mrb_state *mrb, mrb_value str)
-{
- mrb_encoding *enc;
- /* Conservative. It may be ENC_CODERANGE_UNKNOWN. */
- if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT)
- return 1;
-
- enc = STR_ENC_GET(mrb, str);
- if (mrb_enc_mbmaxlen(enc) == 1)
- return 1;
-
- /* Conservative. Possibly single byte.
- * "\xa1" in Shift_JIS for example. */
- return 0;
-}
-
-static inline const char *
-search_nonascii(const char *p, const char *e)
-{
- while (p < e) {
- if (!ISASCII(*p))
- return p;
- p++;
- }
- return NULL;
-}
-#endif //INCLUDE_ENCODING
-
static inline void
str_modifiable(mrb_value str)
{
@@ -226,71 +93,6 @@ str_independent(mrb_value str)
return 0;
}
-#ifdef INCLUDE_ENCODING
-static inline void
-str_enc_copy(mrb_state *mrb, mrb_value str1, mrb_value str2)
-{
- mrb_enc_set_index(mrb, str1, ENCODING_GET(mrb, str2));
-}
-
-static inline long
-enc_strlen(const char *p, const char *e, mrb_encoding *enc, int cr)
-{
- long c;
- const char *q;
-
- if (mrb_enc_mbmaxlen(enc) == mrb_enc_mbminlen(enc)) {
- return (e - p + mrb_enc_mbminlen(enc) - 1) / mrb_enc_mbminlen(enc);
- }
- else if (mrb_enc_asciicompat(mrb, enc)) {
- c = 0;
- if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID) {
- while (p < e) {
- if (ISASCII(*p)) {
- q = search_nonascii(p, e);
- if (!q)
- return c + (e - p);
- c += q - p;
- p = q;
- }
- p += mrb_enc_fast_mbclen(p, e, enc);
- c++;
- }
- }
- else {
- while (p < e) {
- if (ISASCII(*p)) {
- q = search_nonascii(p, e);
- if (!q)
- return c + (e - p);
- c += q - p;
- p = q;
- }
- p += mrb_enc_mbclen(p, e, enc);
- c++;
- }
- }
- return c;
- }
-
- for (c=0; p<e; c++) {
- p += mrb_enc_mbclen(p, e, enc);
- }
- return c;
-}
-
-size_t
-mrb_str_capacity(mrb_value str)
-{
- if (MRB_STR_NOCAPA_P(str)) {
- return RSTRING_LEN(str);
- }
- else {
- return RSTRING_CAPA(str);
- }
-}
-#endif //INCLUDE_ENCODING
-
#define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class))
static inline mrb_value
@@ -299,204 +101,28 @@ str_alloc(mrb_state *mrb)
struct RString* s;
s = mrb_obj_alloc_string(mrb);
- //NEWOBJ(str, struct RString);
- //OBJSETUP(str, klass, T_STRING);
s->buf = 0;
s->len = 0;
- s->aux.capa = 0;
+ s->capa = 0;
return mrb_obj_value(s);
}
-#ifdef INCLUDE_ENCODING
-long
-mrb_enc_strlen(const char *p, const char *e, mrb_encoding *enc)
-{
- return enc_strlen(p, e, enc, ENC_CODERANGE_UNKNOWN);
-}
-#endif //INCLUDE_ENCODING
-
-static void
-str_make_independent(mrb_state *mrb, mrb_value str)
-{
- char *ptr;
- long len = RSTRING_LEN(str);
-
- ptr = mrb_malloc(mrb, sizeof(char)*(len+1));
- if (RSTRING_PTR(str)) {
- memcpy(ptr, RSTRING_PTR(str), len);
- }
- ptr[len] = 0;
- RSTRING(str)->buf = ptr;
- RSTRING(str)->len = len;
- RSTRING(str)->aux.capa = len;
- MRB_STR_UNSET_NOCAPA(str);
-}
-
-#ifdef INCLUDE_ENCODING
-static int
-coderange_scan(const char *p, long len, mrb_encoding *enc)
-{
- const char *e = p + len;
-
- if (mrb_enc_to_index(enc) == 0) {
- /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
- p = search_nonascii(p, e);
- return p ? ENC_CODERANGE_VALID : ENC_CODERANGE_7BIT;
- }
-
- if (mrb_enc_asciicompat(mrb, enc)) {
- p = search_nonascii(p, e);
- if (!p) {
- return ENC_CODERANGE_7BIT;
- }
- while (p < e) {
- int ret = mrb_enc_precise_mbclen(p, e, enc);
- if (!MBCLEN_CHARFOUND_P(ret)) {
- return ENC_CODERANGE_BROKEN;
- }
- p += MBCLEN_CHARFOUND_LEN(ret);
- if (p < e) {
- p = search_nonascii(p, e);
- if (!p) {
- return ENC_CODERANGE_VALID;
- }
- }
- }
- if (e < p) {
- return ENC_CODERANGE_BROKEN;
- }
- return ENC_CODERANGE_VALID;
- }
-
- while (p < e) {
- int ret = mrb_enc_precise_mbclen(p, e, enc);
-
- if (!MBCLEN_CHARFOUND_P(ret)) {
- return ENC_CODERANGE_BROKEN;
- }
- p += MBCLEN_CHARFOUND_LEN(ret);
- }
- if (e < p) {
- return ENC_CODERANGE_BROKEN;
- }
- return ENC_CODERANGE_VALID;
-}
-
-int
-mrb_enc_str_coderange(mrb_state *mrb, mrb_value str)
-{
- int cr = ENC_CODERANGE(str);
-
- if (cr == ENC_CODERANGE_UNKNOWN) {
- mrb_encoding *enc = STR_ENC_GET(mrb, str);
- cr = coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
- ENC_CODERANGE_SET(str, cr);
- }
- return cr;
-}
-
-char*
-mrb_enc_nth(mrb_state *mrb, const char *p, const char *e, long nth, mrb_encoding *enc)
-{
- if (mrb_enc_mbmaxlen(enc) == 1) {
- p += nth;
- }
- else if (mrb_enc_mbmaxlen(enc) == mrb_enc_mbminlen(enc)) {
- p += nth * mrb_enc_mbmaxlen(enc);
- }
- else if (mrb_enc_asciicompat(mrb, enc)) {
- const char *p2, *e2;
- int n;
-
- while (p < e && 0 < nth) {
- e2 = p + nth;
- if (e < e2)
- return (char*)e;
- if (ISASCII(*p)) {
- p2 = search_nonascii(p, e2);
- if (!p2)
- return (char*)e2;
- nth -= p2 - p;
- p = p2;
- }
- n = mrb_enc_mbclen(p, e, enc);
- p += n;
- nth--;
- }
- if (nth != 0)
- return (char*)e;
- return (char*)p;
- }
- else {
- while (p<e && nth--) {
- p += mrb_enc_mbclen(p, e, enc);
- }
- }
- if (p > e) p = e;
- return (char*)p;
-}
-
-static char*
-str_nth(mrb_state *mrb, const char *p, const char *e, long nth, mrb_encoding *enc, int singlebyte)
-{
- if (singlebyte)
- p += nth;
- else {
- p = mrb_enc_nth(mrb, p, e, nth, enc);
- }
- if (!p) return 0;
- if (p > e) p = e;
- return (char*)p;
-}
-
/* char offset to byte offset */
-static long
-str_offset(mrb_state *mrb, const char *p, const char *e, long nth, mrb_encoding *enc, int singlebyte)
-{
- const char *pp = str_nth(mrb, p, e, nth, enc, singlebyte);
- if (!pp) return e - p;
- return pp - p;
-}
-
long
mrb_str_offset(mrb_state *mrb, mrb_value str, long pos)
{
- return str_offset(mrb, RSTRING_PTR(str), RSTRING_END(str), pos,
- STR_ENC_GET(mrb, str), single_byte_optimizable(mrb, str));
+ return pos;
}
-static void
-mrb_enc_cr_str_exact_copy(mrb_state *mrb, mrb_value dest, mrb_value src)
-{
- str_enc_copy(mrb, dest, src);
- ENC_CODERANGE_SET(dest, ENC_CODERANGE(src));
-}
-#else
-#define mrb_enc_cr_str_exact_copy(mrb, dest, src)
-#endif //INCLUDE_ENCODING
-
-mrb_value
-str_new4(mrb_state *mrb, mrb_value str)
+static mrb_value
+str_dup(mrb_state *mrb, mrb_value str)
{
- mrb_value str2;
-
- str2 = mrb_obj_value(mrb_obj_alloc_string(mrb));
- RSTRING(str2)->len = RSTRING_LEN(str);
- RSTRING(str2)->buf = RSTRING_PTR(str);
+ /* should return shared string */
+ struct RString *s = mrb_str_ptr(str);
- if (MRB_STR_SHARED_P(str)) {
- struct RString *shared = RSTRING_SHARED(str);
- FL_SET(str2, MRB_STR_SHARED);
- RSTRING_SHARED(str2) = shared;
- }
- else {
- FL_SET(str, MRB_STR_SHARED);
- RSTRING_SHARED(str) = mrb_str_ptr(str2);
- }
- mrb_enc_cr_str_exact_copy(mrb, str2, str);
- return str2;
+ return mrb_str_new(mrb, s->buf, s->len);
}
static mrb_value
@@ -506,11 +132,6 @@ str_new(mrb_state *mrb, enum mrb_vtype ttype, const char *p, size_t len)
//str = str_alloc(mrb);
str = mrb_str_buf_new(mrb, len);
-#ifdef INCLUDE_ENCODING
- if (len == 0) {
- ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
- }
-#endif //INCLUDE_ENCODING
if (p) {
memcpy(RSTRING_PTR(str), p, len);
}
@@ -519,19 +140,16 @@ str_new(mrb_state *mrb, enum mrb_vtype ttype, const char *p, size_t len)
return str;
}
-mrb_value
+static mrb_value
mrb_str_new_with_class(mrb_state *mrb, mrb_value obj, const char *ptr, long len)
{
return str_new(mrb, mrb_type(obj), ptr, len);
}
-#define mrb_str_new5 mrb_str_new_with_class
-
static mrb_value
str_new_empty(mrb_state *mrb, mrb_value str)
{
- mrb_value v = mrb_str_new5(mrb, str, 0, 0);
- return v;
+ return mrb_str_new_with_class(mrb, str, 0, 0);
}
mrb_value
@@ -545,7 +163,7 @@ mrb_str_buf_new(mrb_state *mrb, size_t capa)
capa = STR_BUF_MIN_SIZE;
}
s->len = 0;
- s->aux.capa = capa;
+ s->capa = capa;
s->buf = mrb_malloc(mrb, capa+1);
s->buf[0] = '\0';
@@ -560,7 +178,6 @@ str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len)
if (ptr >= RSTRING_PTR(str) && ptr <= RSTRING_END(str)) {
off = ptr - RSTRING_PTR(str);
}
- mrb_str_modify(mrb, str);
if (len == 0) return mrb_fixnum_value(0);
capa = RSTRING_CAPA(str);
if (RSTRING_LEN(str) >= LONG_MAX - len) {
@@ -615,7 +232,7 @@ mrb_str_new(mrb_state *mrb, const char *p, size_t len)
memcpy(s->buf, p, len);
}
s->len = len;
- s->aux.capa = len;
+ s->capa = len;
s->buf[len] ='\0';
return mrb_obj_value(s);
}
@@ -627,23 +244,9 @@ mrb_str_new2(mrb_state *mrb, const char *ptr)
if (!ptr) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "NULL pointer given");
}
-#ifdef INCLUDE_ENCODING
- return mrb_usascii_str_new2(mrb, ptr);
-#else
return mrb_str_new(mrb, ptr, strlen(ptr));
-#endif //INCLUDE_ENCODING
}
-#ifdef INCLUDE_ENCODING
-mrb_value
-mrb_enc_str_new(mrb_state *mrb, const char *ptr, long len, mrb_encoding *enc)
-{
- mrb_value str = mrb_str_new(mrb, ptr, len);
- mrb_enc_associate(mrb, str, enc);
- return str;
-}
-#endif //INCLUDE_ENCODING
-
/*
* call-seq: (Caution! NULL string)
* String.new(str="") => new_str
@@ -662,7 +265,7 @@ mrb_str_new_cstr(mrb_state *mrb, const char *p)
memcpy(s->buf, p, len);
s->buf[len] = 0;
s->len = len;
- s->aux.capa = len;
+ s->capa = len;
return mrb_obj_value(s);
}
@@ -715,8 +318,8 @@ mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other)
s2 = mrb_str_ptr(other);
len = s1->len + s2->len;
- if (s1->aux.capa < len) {
- s1->aux.capa = len;
+ if (s1->capa < len) {
+ s1->capa = len;
s1->buf = mrb_realloc(mrb, s1->buf, len+1);
}
memcpy(s1->buf+s1->len, s2->buf, s2->len);
@@ -757,30 +360,7 @@ mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b)
static mrb_value
mrb_str_plus_m(mrb_state *mrb, mrb_value self)
{
- mrb_value str3;
- mrb_value str2;
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc;
-#endif //INCLUDE_ENCODING
-
- //mrb_get_args(mrb, "s", &p, &len);
- mrb_get_args(mrb, "o", &str2);
-
- mrb_string_value(mrb, &str2);
-#ifdef INCLUDE_ENCODING
- enc = mrb_enc_check(mrb, self, str2);
-#endif //INCLUDE_ENCODING
- str3 = mrb_str_new(mrb, 0, RSTRING_LEN(self)+RSTRING_LEN(str2));
- memcpy(RSTRING_PTR(str3), RSTRING_PTR(self), RSTRING_LEN(self));
- memcpy(RSTRING_PTR(str3) + RSTRING_LEN(self),
- RSTRING_PTR(str2), RSTRING_LEN(str2));
- RSTRING_PTR(str3)[RSTRING_LEN(str3)] = '\0';
-#ifdef INCLUDE_ENCODING
- ENCODING_CODERANGE_SET(mrb, str3, mrb_enc_to_index(enc),
- ENC_CODERANGE_AND(ENC_CODERANGE(self), ENC_CODERANGE(str2)));
-#endif //INCLUDE_ENCODING
-
- return str3;
+ return mrb_nil_value();
}
/*
@@ -792,9 +372,7 @@ mrb_str_plus_m(mrb_state *mrb, mrb_value self)
static mrb_value
mrb_str_bytesize(mrb_state *mrb, mrb_value self)
{
- struct RString *s = mrb_str_ptr(self);
-
- return mrb_fixnum_value(s->len);
+ return mrb_nil_value();
}
/* 15.2.10.5.26 */
@@ -808,26 +386,10 @@ mrb_str_bytesize(mrb_state *mrb, mrb_value self)
mrb_value
mrb_str_size(mrb_state *mrb, mrb_value self)
{
-#ifdef INCLUDE_ENCODING
- long len;
-
- len = str_strlen(mrb, self, STR_ENC_GET(mrb, self));
- return mrb_fixnum_value(len);
-#else
- return mrb_str_bytesize(mrb, self);
-#endif //INCLUDE_ENCODING
+ return mrb_fixnum_value(RSTRING_LEN(self));
}
-void
-mrb_str_modify(mrb_state *mrb, mrb_value str)
-{
- if (!str_independent(str))
- str_make_independent(mrb, str);
-}
-
-
/* 15.2.10.5.1 */
-
/*
* call-seq:
* str * integer => new_str
@@ -840,35 +402,7 @@ mrb_str_modify(mrb_state *mrb, mrb_value str)
static mrb_value
mrb_str_times(mrb_state *mrb, mrb_value self)
{
- mrb_value str2;
- mrb_int n,len,times;
- char *ptr2;
-
- mrb_get_args(mrb, "i", &times);
-
- if (times < 0) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument");
- }
- if (times && INT32_MAX/times < RSTRING_LEN(self)) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big");
- }
-
- str2 = mrb_str_new5(mrb, self, 0, len = RSTRING_LEN(self)*times);
- ptr2 = RSTRING_PTR(str2);
- if (len > 0) {
- n = RSTRING_LEN(self);
- memcpy(ptr2, RSTRING_PTR(self), n);
- while (n <= len/2) {
- memcpy(ptr2 + n, ptr2, n);
- n *= 2;
- }
- memcpy(ptr2 + n, ptr2, len-n);
- }
- ptr2[RSTRING_LEN(str2)] = '\0';
-
- mrb_enc_cr_str_copy_for_substr(mrb, str2, self);
-
- return str2;
+ return mrb_nil_value();
}
/* -------------------------------------------------------------- */
@@ -930,73 +464,8 @@ mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2)
static mrb_value
mrb_str_cmp_m(mrb_state *mrb, mrb_value str1)
{
- mrb_value str2;
- mrb_int result;
-
- mrb_get_args(mrb, "o", &str2);
- if (mrb_type(str2) != MRB_TT_STRING) {
- if (!mrb_respond_to(mrb, str2, mrb_intern(mrb, "to_s"))) {
- return mrb_nil_value();
- }
- else if (!mrb_respond_to(mrb, str2, mrb_intern(mrb, "<=>"))) {
- return mrb_nil_value();
- }
- else
- {
- mrb_value tmp = mrb_funcall(mrb, str2, "<=>", 1, str1);
-
- if (mrb_nil_p(tmp)) return mrb_nil_value();
- if (!mrb_fixnum(tmp)) {
- return mrb_funcall(mrb, mrb_fixnum_value(0), "-", 1, tmp);
- }
- result = -mrb_fixnum(tmp);
- }
- }
- else {
- result = mrb_str_cmp(mrb, str1, str2);
- }
- return mrb_fixnum_value(result);
-}
-
-#ifdef INCLUDE_ENCODING
-int
-mrb_str_comparable(mrb_state *mrb, mrb_value str1, mrb_value str2)
-{
- int idx1, idx2;
- int rc1, rc2;
-
- if (RSTRING_LEN(str1) == 0) return TRUE;
- if (RSTRING_LEN(str2) == 0) return TRUE;
- idx1 = ENCODING_GET(mrb, str1);
- idx2 = ENCODING_GET(mrb, str2);
- if (idx1 == idx2) return TRUE;
- rc1 = mrb_enc_str_coderange(mrb, str1);
- rc2 = mrb_enc_str_coderange(mrb, str2);
- if (rc1 == ENC_CODERANGE_7BIT) {
- if (rc2 == ENC_CODERANGE_7BIT) return TRUE;
- if (mrb_enc_asciicompat(mrb, mrb_enc_from_index(mrb, idx2)))
- return TRUE;
- }
- if (rc2 == ENC_CODERANGE_7BIT) {
- if (mrb_enc_asciicompat(mrb, mrb_enc_from_index(mrb, idx1)))
- return TRUE;
- }
- return FALSE;
-}
-
-int
-mrb_str_hash_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2)
-{
- long len;
-
- if (!mrb_str_comparable(mrb, str1, str2)) return 1;
- if (RSTRING_LEN(str1) == (len = RSTRING_LEN(str2)) &&
- memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), len) == 0) {
- return 0;
- }
- return 1;
+ return mrb_nil_value();
}
-#endif //INCLUDE_ENCODING
static int
str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2)
@@ -1004,9 +473,6 @@ str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2)
const long len = RSTRING_LEN(str1);
if (len != RSTRING_LEN(str2)) return FALSE;
-#ifdef INCLUDE_ENCODING
- if (!mrb_str_comparable(mrb, str1, str2)) return FALSE;
-#endif //INCLUDE_ENCODING
if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), len) == 0)
return TRUE;
return FALSE;
@@ -1100,202 +566,9 @@ mrb_string_value_ptr(mrb_state *mrb, mrb_value ptr)
static mrb_value
mrb_str_match(mrb_state *mrb, mrb_value self/* x */)
{
- mrb_value y;
-
- mrb_get_args(mrb, "o", &y);
- switch (mrb_type(y)) {
- case MRB_TT_STRING:
- mrb_raise(mrb, E_TYPE_ERROR, "type mismatch: String given");
- case MRB_TT_REGEX:
-#ifdef INCLUDE_REGEXP
- return mrb_reg_match_str(mrb, y, self);
-#else
- mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
-#endif //INCLUDE_REGEXP
- default:
- if (mrb_respond_to(mrb, y, mrb_intern(mrb, "=~"))) {
- return mrb_funcall(mrb, y, "=~", 1, self);
- }
- else {
- return mrb_nil_value();
- }
- }
-}
-/* ---------------------------------- */
-mrb_value
-mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, int len)
-{
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc = STR_ENC_GET(mrb, str);
-#endif //INCLUDE_ENCODING
- mrb_value str2;
-#ifdef INCLUDE_ENCODING
- char *p, *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);
-#else
- char *p, *s = RSTRING_PTR(str);
-#endif //INCLUDE_ENCODING
-
- if (len < 0) return mrb_nil_value();
- if (!RSTRING_LEN(str)) {
- len = 0;
- }
-#ifdef INCLUDE_ENCODING
- if (single_byte_optimizable(mrb, str)) {
-#endif //INCLUDE_ENCODING
- if (beg > RSTRING_LEN(str)) return mrb_nil_value();
- if (beg < 0) {
- beg += RSTRING_LEN(str);
- if (beg < 0) return mrb_nil_value();
- }
- if (beg + len > RSTRING_LEN(str))
- len = RSTRING_LEN(str) - beg;
- if (len <= 0) {
- len = 0;
- p = 0;
- }
- else
- p = s + beg;
-#ifdef INCLUDE_ENCODING
- goto sub;
- }
- if (beg < 0) {
- if (len > -beg) len = -beg;
- if (-beg * mrb_enc_mbmaxlen(enc) < RSTRING_LEN(str) / 8) {
- beg = -beg;
- while (beg-- > len && (e = mrb_enc_prev_char(s, e, e, enc)) != 0);
- p = e;
- if (!p) return mrb_nil_value();
- while (len-- > 0 && (p = mrb_enc_prev_char(s, p, e, enc)) != 0);
- if (!p) return mrb_nil_value();
- len = e - p;
- goto sub;
- }
- else {
- beg += str_strlen(mrb, str, enc);
- if (beg < 0) return mrb_nil_value();
- }
- }
- else if (beg > 0 && beg > str_strlen(mrb, str, enc)) {
- return mrb_nil_value();
- }
- if (len == 0) {
- p = 0;
- }
- else if (mrb_enc_mbmaxlen(enc) == mrb_enc_mbminlen(enc)) {
- int char_sz = mrb_enc_mbmaxlen(enc);
-
- p = s + beg * char_sz;
- if (p > e) {
- p = e;
- len = 0;
- }
- else if (len * char_sz > e - p)
- len = e - p;
- else
- len *= char_sz;
- }
- else if ((p = str_nth(mrb, s, e, beg, enc, 0)) == e) {
- len = 0;
- }
- else {
- len = str_offset(mrb, p, e, len, enc, 0);
- }
-sub:
-#endif //INCLUDE_ENCODING
- if (len > STR_BUF_MIN_SIZE && beg + len == RSTRING_LEN(str)) {
-#ifdef INCLUDE_ENCODING
- str2 = mrb_str_new4(mrb, str);
- str2 = str_new3(mrb, mrb_obj_class(mrb, str2), str2);
-#else
- str2 = mrb_str_new(mrb, s, RSTRING_LEN(str));
-#endif //INCLUDE_ENCODING
- RSTRING(str2)->buf += RSTRING(str2)->len - len;
- RSTRING(str2)->len = len;
- }
- else {
- str2 = mrb_str_new5(mrb, str, p, len);
- mrb_enc_cr_str_copy_for_substr(mrb, str2, str);
- }
-
- return str2;
-}
-
-#ifdef INCLUDE_REGEXP
-static mrb_value
-mrb_str_subpat(mrb_state *mrb, mrb_value str, mrb_value re, mrb_int backref)
-{
- if (mrb_reg_search(mrb, re, str, 0, 0) >= 0) {
- mrb_value match = mrb_backref_get(mrb);
- int nth = mrb_reg_backref_number(mrb, match, mrb_fixnum_value(backref));
- return mrb_reg_nth_match(mrb, nth, mrb_backref_get(mrb));
- }
return mrb_nil_value();
}
-#endif //INCLUDE_REGEXP
-/* --- 1-8-7parse.c --> */
-
-#ifdef INCLUDE_ENCODING
-long
-mrb_enc_strlen_cr(mrb_state *mrb, const char *p, const char *e, mrb_encoding *enc, int *cr)
-{
- long c;
- const char *q;
- int ret;
-
- *cr = 0;
- if (mrb_enc_mbmaxlen(enc) == mrb_enc_mbminlen(enc)) {
- return (e - p + mrb_enc_mbminlen(enc) - 1) / mrb_enc_mbminlen(enc);
- }
- else if (mrb_enc_asciicompat(mrb, enc)) {
- c = 0;
- while (p < e) {
- if (ISASCII(*p)) {
- q = search_nonascii(p, e);
- if (!q) {
- if (!*cr) *cr = ENC_CODERANGE_7BIT;
- return c + (e - p);
- }
- c += q - p;
- p = q;
- }
- ret = mrb_enc_precise_mbclen(p, e, enc);
- if (MBCLEN_CHARFOUND_P(ret)) {
- *cr |= ENC_CODERANGE_VALID;
- p += MBCLEN_CHARFOUND_LEN(ret);
- }
- else {
- *cr = ENC_CODERANGE_BROKEN;
- p++;
- }
- c++;
- }
- if (!*cr) *cr = ENC_CODERANGE_7BIT;
- return c;
- }
-
- for (c=0; p<e; c++) {
- ret = mrb_enc_precise_mbclen(p, e, enc);
- if (MBCLEN_CHARFOUND_P(ret)) {
- *cr |= ENC_CODERANGE_VALID;
- p += MBCLEN_CHARFOUND_LEN(ret);
- }
- else {
- *cr = ENC_CODERANGE_BROKEN;
- if (p + mrb_enc_mbminlen(enc) <= e)
- p += mrb_enc_mbminlen(enc);
- else
- p = e;
- }
- }
- if (!*cr) *cr = ENC_CODERANGE_7BIT;
- return c;
-}
-#endif //INCLUDE_ENCODING
-
-/* --- 1-8-7parse.c --< */
-
-#ifndef INCLUDE_ENCODING
static inline long
mrb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long n)
{
@@ -1308,7 +581,7 @@ mrb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long
qstable[i] = m + 1;
for (; x < xe; ++x)
qstable[*x] = xe - x;
- /* Searching */
+ /* Searching */
for (; y + m <= ys + n; y += *(qstable + y[m])) {
if (*xs == *y && memcmp(xs, y, m) == 0)
return y - ys;
@@ -1316,7 +589,7 @@ mrb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long
return -1;
}
-int
+static int
mrb_memsearch(const void *x0, int m, const void *y0, int n)
{
const unsigned char *x = x0, *y = y0;
@@ -1328,7 +601,7 @@ mrb_memsearch(const void *x0, int m, const void *y0, int n)
else if (m < 1) {
return 0;
}
- else if (m == 1) {
+ else if (m == 1) {
const unsigned char *ys = y, *ye = ys + n;
for (; y < ye; ++y) {
if (*x == *y)
@@ -1338,60 +611,22 @@ mrb_memsearch(const void *x0, int m, const void *y0, int n)
}
return mrb_memsearch_qs(x0, m, y0, n);
}
-#endif //INCLUDE_ENCODING
-
-/* --- 1-8-7parse.c --< */
-#ifdef INCLUDE_ENCODING
-static long
-str_strlen(mrb_state *mrb, mrb_value str, mrb_encoding *enc)
-{
- const char *p, *e;
- long n;
- int cr;
-
- if (single_byte_optimizable(mrb, str)) return RSTRING_LEN(str);
- if (!enc) enc = STR_ENC_GET(mrb, str);
- p = RSTRING_PTR(str);
- e = RSTRING_END(str);
- cr = ENC_CODERANGE(str);
- n = mrb_enc_strlen_cr(mrb, p, e, enc, &cr);
- if (cr) {
- ENC_CODERANGE_SET(str, cr);
- }
- return n;
-}
-#endif //INCLUDE_ENCODING
static mrb_int
mrb_str_index(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int offset)
{
mrb_int pos;
- char *s, *sptr, *e;
+ char *s, *sptr;
int len, slen;
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc;
-
- enc = mrb_enc_check(mrb, str, sub);
- if (is_broken_string(mrb, sub)) {
- return -1;
- }
- len = str_strlen(mrb, str, enc);
- slen = str_strlen(mrb, sub, enc);
-#else
len = RSTRING_LEN(str);
slen = RSTRING_LEN(sub);
-#endif //INCLUDE_ENCODING
if (offset < 0) {
offset += len;
if (offset < 0) return -1;
}
if (len - offset < slen) return -1;
s = RSTRING_PTR(str);
- e = s + RSTRING_LEN(str);
if (offset) {
-#ifdef INCLUDE_ENCODING
- offset = str_offset(mrb, s, RSTRING_END(str), offset, enc, single_byte_optimizable(mrb, str));
-#endif //INCLUDE_ENCODING
s += offset;
}
if (slen == 0) return offset;
@@ -1399,21 +634,9 @@ mrb_str_index(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int offset)
sptr = RSTRING_PTR(sub);
slen = RSTRING_LEN(sub);
len = RSTRING_LEN(str) - offset;
-#ifdef INCLUDE_ENCODING
- for (;;) {
- char *t;
- pos = mrb_memsearch(mrb, sptr, slen, s, len, enc);
- if (pos < 0) return pos;
- t = mrb_enc_right_char_head(s, s+pos, e, enc);
- if (t == s + pos) break;
- if ((len -= t - s) <= 0) return -1;
- offset += t - s;
- s = t;
- }
-#else
pos = mrb_memsearch(sptr, slen, s, len);
+
if (pos < 0) return pos;
-#endif //INCLUDE_ENCODING
return pos + offset;
}
@@ -1430,7 +653,7 @@ mrb_str_dup(mrb_state *mrb, mrb_value str)
dup->buf[s->len] = 0;
}
dup->len = s->len;
- dup->aux.capa = s->len;
+ dup->capa = s->len;
return mrb_obj_value(dup);
}
@@ -1444,7 +667,7 @@ mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx)
idx = mrb_fixnum(indx);
num_index:
- str = mrb_str_substr(mrb, str, idx, 1);
+ str = mrb_str_subseq(mrb, str, idx, 1);
if (!mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value();
return str;
@@ -1467,18 +690,14 @@ num_index:
mrb_int beg, len;
mrb_value tmp;
-#ifdef INCLUDE_ENCODING
- len = str_strlen(mrb, str, STR_ENC_GET(mrb, str));
-#else
len = RSTRING_LEN(str);
-#endif //INCLUDE_ENCODING
switch (mrb_range_beg_len(mrb, indx, &beg, &len, len, 0)) {
case 0/*FLASE*/:
break;
case 2/*OTHER*/:
return mrb_nil_value();
default:
- tmp = mrb_str_substr(mrb, str, beg, len);
+ tmp = mrb_str_subseq(mrb, str, beg, len);
return tmp;
}
}
@@ -1539,12 +758,12 @@ num_index:
static mrb_value
mrb_str_aref_m(mrb_state *mrb, mrb_value str)
{
+ mrb_value a1, a2;
int argc;
- mrb_value *argv;
- mrb_get_args(mrb, "*", &argv, &argc);
+ argc = mrb_get_args(mrb, "o|o", &a1, &a2);
if (argc == 2) {
- if (mrb_type(argv[0]) == MRB_TT_REGEX) {
+ if (mrb_type(a1) == MRB_TT_REGEX) {
#ifdef INCLUDE_REGEXP
return mrb_str_subpat(mrb, str, argv[0], mrb_fixnum(argv[1]));
#else
@@ -1552,38 +771,14 @@ mrb_str_aref_m(mrb_state *mrb, mrb_value str)
return mrb_nil_value();
#endif //INCLUDE_REGEXP
}
- return mrb_str_substr(mrb, str, mrb_fixnum(argv[0]), mrb_fixnum(argv[1]));
+ return mrb_str_substr(mrb, str, mrb_fixnum(a1), mrb_fixnum(a2));
}
if (argc != 1) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 1)", argc);
}
- return mrb_str_aref(mrb, str, argv[0]);
+ return mrb_str_aref(mrb, str, a1);
}
-#ifdef INCLUDE_ENCODING
-/* As mrb_str_modify(), but don't clear coderange */
-static void
-str_modify_keep_cr(mrb_state *mrb, mrb_value str)
-{
- if (!str_independent(str))
- str_make_independent(mrb, str);
- if (ENC_CODERANGE(str) == ENC_CODERANGE_BROKEN)
- /* Force re-scan later */
- ENC_CODERANGE_CLEAR(str);
-}
-
-static void
-mrb_str_check_dummy_enc(mrb_state *mrb, mrb_encoding *enc)
-{
- if (mrb_enc_dummy_p(enc)) {
- mrb_raise(mrb, E_ENCODING_ERROR, "incompatible encoding with this operation: %s",
- mrb_enc_name(enc));
- }
-}
-#else
-#define str_modify_keep_cr(mrb, str) mrb_str_modify((mrb), (str))
-#endif //INCLUDE_ENCODING
-
/* 15.2.10.5.8 */
/*
* call-seq:
@@ -1600,51 +795,6 @@ mrb_str_check_dummy_enc(mrb_state *mrb, mrb_encoding *enc)
static mrb_value
mrb_str_capitalize_bang(mrb_state *mrb, mrb_value str)
{
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc;
-#endif //INCLUDE_ENCODING
- char *s, *send;
- int modify = 0;
-#ifdef INCLUDE_ENCODING
- unsigned int c;
- int n;
-#endif //INCLUDE_ENCODING
-
- str_modify_keep_cr(mrb, str);
-#ifdef INCLUDE_ENCODING
- enc = STR_ENC_GET(mrb, str);
- mrb_str_check_dummy_enc(mrb, enc);
-#endif //INCLUDE_ENCODING
- if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return mrb_nil_value();
- s = RSTRING_PTR(str); send = RSTRING_END(str);
-#ifdef INCLUDE_ENCODING
- c = mrb_enc_codepoint_len(mrb, s, send, &n, enc);
- if (mrb_enc_islower(c, enc)) {
- mrb_enc_mbcput(mrb_enc_toupper(c, enc), s, enc);
- modify = 1;
- }
- s += n;
- while (s < send) {
- c = mrb_enc_codepoint_len(mrb, s, send, &n, enc);
- if (mrb_enc_isupper(c, enc)) {
- mrb_enc_mbcput(mrb_enc_tolower(c, enc), s, enc);
- modify = 1;
- }
- s += n;
- }
-#else
- if (ISLOWER(*s)) {
- *s = toupper(*s);
- modify = 1;
- }
- while (++s < send) {
- if (ISUPPER(*s)) {
- *s = tolower(*s);
- modify = 1;
- }
- }
-#endif //INCLUDE_ENCODING
- if (modify) return str;
return mrb_nil_value();
}
@@ -1681,113 +831,6 @@ mrb_str_capitalize(mrb_state *mrb, mrb_value self)
static mrb_value
mrb_str_chomp_bang(mrb_state *mrb, mrb_value str)
{
- mrb_value *argv;
- int argc;
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc;
-#endif //INCLUDE_ENCODING
- mrb_value rs;
- mrb_int newline;
- char *p, *pp, *e;
- long len, rslen;
-
- str_modify_keep_cr(mrb, str);
- len = RSTRING_LEN(str);
- if (len == 0) return mrb_nil_value();
- p = RSTRING_PTR(str);
- e = p + len;
- //if (mrb_scan_args(argc, argv, "01", &rs) == 0) {
- mrb_get_args(mrb, "*", &argv, &argc);
- if (argc == 0) {
- rs = mrb_str_new2(mrb, "\n");
-smart_chomp:
-#ifdef INCLUDE_ENCODING
- enc = mrb_enc_get(mrb, str);
- if (mrb_enc_mbminlen(enc) > 1) {
- pp = mrb_enc_left_char_head(p, e-mrb_enc_mbminlen(enc), e, enc);
- if (mrb_enc_is_newline(pp, e, enc)) {
- e = pp;
- }
- pp = e - mrb_enc_mbminlen(enc);
- if (pp >= p) {
- pp = mrb_enc_left_char_head(p, pp, e, enc);
- if (mrb_enc_ascget(mrb, pp, e, 0, enc) == '\r') {
- e = pp;
- }
- }
- if (e == RSTRING_END(str)) {
- return mrb_nil_value();
- }
- len = e - RSTRING_PTR(str);
- STR_SET_LEN(str, len);
- }
- else {
-#endif //INCLUDE_ENCODING
- if (RSTRING_PTR(str)[len-1] == '\n') {
- STR_DEC_LEN(str);
- if (RSTRING_LEN(str) > 0 &&
- RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\r') {
- STR_DEC_LEN(str);
- }
- }
- else if (RSTRING_PTR(str)[len-1] == '\r') {
- STR_DEC_LEN(str);
- }
- else {
- return mrb_nil_value();
- }
-#ifdef INCLUDE_ENCODING
- }
-#endif //INCLUDE_ENCODING
- RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
- return str;
- }
- rs = argv[0];
- if (mrb_nil_p(rs)) return mrb_nil_value();
- //StringValue(rs);
- mrb_string_value(mrb, &rs);
- rslen = RSTRING_LEN(rs);
- if (rslen == 0) {
- while (len>0 && p[len-1] == '\n') {
- len--;
- if (len>0 && p[len-1] == '\r')
- len--;
- }
- if (len < RSTRING_LEN(str)) {
- STR_SET_LEN(str, len);
- RSTRING_PTR(str)[len] = '\0';
- return str;
- }
- return mrb_nil_value();
- }
- if (rslen > len) return mrb_nil_value();
- newline = RSTRING_PTR(rs)[rslen-1];
- if (rslen == 1 && newline == '\n')
- goto smart_chomp;
-
-#ifdef INCLUDE_ENCODING
- enc = mrb_enc_check(mrb, str, rs);
- if (is_broken_string(mrb, rs)) {
- return mrb_nil_value();
- }
- pp = e - rslen;
-#else
- pp = p + len - rslen;
-#endif //INCLUDE_ENCODING
- if (p[len-1] == newline &&
- (rslen <= 1 ||
- memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) {
-#ifdef INCLUDE_ENCODING
- if (mrb_enc_left_char_head(p, pp, e, enc) != pp)
- return mrb_nil_value();
- if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
- ENC_CODERANGE_CLEAR(str);
- }
-#endif //INCLUDE_ENCODING
- STR_SET_LEN(str, RSTRING_LEN(str) - rslen);
- RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
- return str;
- }
return mrb_nil_value();
}
@@ -1820,26 +863,6 @@ mrb_str_chomp(mrb_state *mrb, mrb_value self)
return str;
}
-#ifdef INCLUDE_ENCODING
-static long
-chopped_length(mrb_state *mrb, mrb_value str)
-{
- mrb_encoding *enc = STR_ENC_GET(mrb, str);
- const char *p, *p2, *beg, *end;
-
- beg = RSTRING_PTR(str);
- end = beg + RSTRING_LEN(str);
- if (beg > end) return 0;
- p = mrb_enc_prev_char(beg, end, end, enc);
- if (!p) return 0;
- if (p > beg && mrb_enc_ascget(mrb, p, end, 0, enc) == '\n') {
- p2 = mrb_enc_prev_char(beg, p, end, enc);
- if (p2 && mrb_enc_ascget(mrb, p2, end, 0, enc) == '\r') p = p2;
- }
- return p - beg;
-}
-#endif //INCLUDE_ENCODING
-
/* 15.2.10.5.12 */
/*
* call-seq:
@@ -1852,12 +875,7 @@ chopped_length(mrb_state *mrb, mrb_value str)
static mrb_value
mrb_str_chop_bang(mrb_state *mrb, mrb_value str)
{
- str_modify_keep_cr(mrb, str);
if (RSTRING_LEN(str) > 0) {
-#ifdef INCLUDE_ENCODING
- long len;
- len = chopped_length(mrb, str);
-#else
size_t len;
len = RSTRING_LEN(str) - 1;
if (RSTRING_PTR(str)[len] == '\n') {
@@ -1866,14 +884,8 @@ mrb_str_chop_bang(mrb_state *mrb, mrb_value str)
len--;
}
}
-#endif //INCLUDE_ENCODING
STR_SET_LEN(str, len);
RSTRING_PTR(str)[len] = '\0';
-#ifdef INCLUDE_ENCODING
- if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
- ENC_CODERANGE_CLEAR(str);
- }
-#endif //INCLUDE_ENCODING
return str;
}
return mrb_nil_value();
@@ -1900,13 +912,8 @@ static mrb_value
mrb_str_chop(mrb_state *mrb, mrb_value self)
{
mrb_value str;
-#ifdef INCLUDE_ENCODING
- str = mrb_str_new5(mrb, self, RSTRING_PTR(self), chopped_length(mrb, self));
- mrb_enc_cr_str_copy_for_substr(mrb, str, self);
-#else
str = mrb_str_dup(mrb, self);
mrb_str_chop_bang(mrb, str);
-#endif //INCLUDE_ENCODING
return str;
}
@@ -1921,63 +928,6 @@ mrb_str_chop(mrb_state *mrb, mrb_value self)
static mrb_value
mrb_str_downcase_bang(mrb_state *mrb, mrb_value str)
{
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc;
-#endif //INCLUDE_ENCODING
- char *s, *send;
- int modify = 0;
-
- str_modify_keep_cr(mrb, str);
-#ifdef INCLUDE_ENCODING
- enc = STR_ENC_GET(mrb, str);
- mrb_str_check_dummy_enc(mrb, enc);
-#endif //INCLUDE_ENCODING
- s = RSTRING_PTR(str); send = RSTRING_END(str);
-#ifdef INCLUDE_ENCODING
- if (single_byte_optimizable(mrb, str)) {
-#endif //INCLUDE_ENCODING
- while (s < send) {
- unsigned int c = *(unsigned char*)s;
-
-#ifdef INCLUDE_ENCODING
- if (mrb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') {
-#else
- if ('A' <= c && c <= 'Z') {
-#endif //INCLUDE_ENCODING
- *s = 'a' + (c - 'A');
- modify = 1;
- }
- s++;
- }
-#ifdef INCLUDE_ENCODING
- }
- else {
- int ascompat = mrb_enc_asciicompat(mrb, enc);
-
- while (s < send) {
- unsigned int c;
- int n;
-
- if (ascompat && (c = *(unsigned char*)s) < 0x80) {
- if (mrb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') {
- *s = 'a' + (c - 'A');
- modify = 1;
- }
- s++;
- }
- else {
- c = mrb_enc_codepoint_len(mrb, s, send, &n, enc);
- if (mrb_enc_isupper(c, enc)) {
- /* assuming toupper returns codepoint with same size */
- mrb_enc_mbcput(mrb_enc_tolower(c, enc), s, enc);
- modify = 1;
- }
- s += n;
- }
- }
- }
-#endif //INCLUDE_ENCODING
- if (modify) return str;
return mrb_nil_value();
}
@@ -2037,62 +987,7 @@ mrb_str_downcase(mrb_state *mrb, mrb_value self)
static mrb_value
mrb_str_each_line(mrb_state *mrb, mrb_value str)
{
- mrb_value rs;
- int newline;
- struct RString *ps = mrb_str_ptr(str);
- char *p = ps->buf, *pend = p + ps->len, *s;
- char *ptr = p;
- long len = ps->len, rslen;
- mrb_value line;
- struct RString *prs;
- mrb_value *argv, b;
- int argc;
-
- //if (mrb_scan_args(argc, argv, "01", &rs) == 0) {
- mrb_get_args(mrb, "*&", &argv, &argc, &b);
- if (argc > 0) {
- rs = argv[0];
- } else {
- rs = mrb_str_new2(mrb, "\n");
- }
- /*RETURN_ENUMERATOR(str, argc, argv);*/
- if (mrb_nil_p(rs)) {
- mrb_yield(mrb, b, str);
- return str;
- }
- //StringValue(rs);
- mrb_string_value(mrb, &rs);
- prs = mrb_str_ptr(rs);
- rslen = prs->len;
- if (rslen == 0) {
- newline = '\n';
- }
- else {
- newline = prs->buf[rslen-1];
- }
-
- for (s = p, p += rslen; p < pend; p++) {
- if (rslen == 0 && *p == '\n') {
- if (*++p != '\n') continue;
- while (*p == '\n') p++;
- }
- if (ps->buf < p && p[-1] == newline &&
- (rslen <= 1 ||
- memcmp(prs->buf, p-rslen, rslen) == 0)) {
- line = mrb_str_new5(mrb, str, s, p - s);
- mrb_yield(mrb, b, line);
- str_mod_check(mrb, str, ptr, len);
- s = p;
- }
- }
-
- if (s != pend) {
- if (p > pend) p = pend;
- line = mrb_str_new5(mrb, str, s, p - s);
- mrb_yield(mrb, b, line);
- }
-
- return str;
+ return mrb_nil_value();
}
/* 15.2.10.5.16 */
@@ -2106,7 +1001,7 @@ mrb_str_each_line(mrb_state *mrb, mrb_value str)
* "".empty? #=> true
*/
static mrb_value
-mrb_str_empty(mrb_state *mrb, mrb_value self)
+mrb_str_empty_p(mrb_state *mrb, mrb_value self)
{
struct RString *s = mrb_str_ptr(self);
@@ -2135,265 +1030,63 @@ mrb_str_eql(mrb_state *mrb, mrb_value self)
return mrb_false_value();
}
-#ifdef INCLUDE_ENCODING
-static void
-mrb_enc_cr_str_copy_for_substr(mrb_state *mrb, mrb_value dest, mrb_value src)
-{
- /* this function is designed for copying encoding and coderange
- * from src to new string "dest" which is made from the part of src.
- */
- str_enc_copy(mrb, dest, src);
- switch (ENC_CODERANGE(src)) {
- case ENC_CODERANGE_7BIT:
- ENC_CODERANGE_SET(dest, ENC_CODERANGE_7BIT);
- break;
- case ENC_CODERANGE_VALID:
- if (!mrb_enc_asciicompat(mrb, STR_ENC_GET(mrb, src)) ||
- search_nonascii(RSTRING_PTR(dest), RSTRING_END(dest)))
- ENC_CODERANGE_SET(dest, ENC_CODERANGE_VALID);
- else
- ENC_CODERANGE_SET(dest, ENC_CODERANGE_7BIT);
- break;
- default:
- if (RSTRING_LEN(dest) == 0) {
- if (!mrb_enc_asciicompat(mrb, STR_ENC_GET(mrb, src)))
- ENC_CODERANGE_SET(dest, ENC_CODERANGE_VALID);
- else
- ENC_CODERANGE_SET(dest, ENC_CODERANGE_7BIT);
- }
- break;
- }
-}
-#endif //INCLUDE_ENCODING
-
-static mrb_value
-str_replace_shared(mrb_state *mrb, mrb_value str2, mrb_value str)
-{
- str = mrb_str_new_frozen(mrb, str);
- RSTRING(str2)->len = RSTRING_LEN(str);
- RSTRING(str2)->buf = RSTRING_PTR(str);
- RSTRING_SHARED(str2) = mrb_str_ptr(str);
- FL_SET(str2, MRB_STR_SHARED);
- mrb_enc_cr_str_exact_copy(mrb, str2, str);
-
- return str2;
-}
-
-static mrb_value
-str_new_shared(mrb_state *mrb, struct RClass* klass, mrb_value str)
-{
- return str_replace_shared(mrb, str_alloc(mrb), str);
-}
-
-mrb_value
-str_new3(mrb_state *mrb, struct RClass* klass, mrb_value str)
-{
- return str_new_shared(mrb, klass, str);
-}
-
-mrb_value
-mrb_str_new_shared(mrb_state *mrb, mrb_value str)
-{
- mrb_value str2 = str_new3(mrb, mrb_obj_class(mrb, str), str);
-
- return str2;
-}
-
mrb_value
mrb_str_new_frozen(mrb_state *mrb, mrb_value orig)
{
- struct RClass* klass;
- mrb_value str;
-
- klass = mrb_obj_class(mrb, orig);
-
- if (MRB_STR_SHARED_P(orig) && RSTRING_SHARED(orig)) {
- long ofs;
- ofs = RSTRING_LEN(str) - RSTRING_SHARED(orig)->len;
-#ifdef INCLUDE_ENCODING
- if ((ofs > 0) || (klass != RBASIC(str)->c) ||
- ENCODING_GET(mrb, str) != ENCODING_GET(mrb, orig)) {
-#else
- if ((ofs > 0) || (klass != RBASIC(str)->c)) {
-#endif //INCLUDE_ENCODING
- str = str_new3(mrb, klass, str);
- RSTRING_PTR(str) += ofs;
- RSTRING_LEN(str) -= ofs;
- mrb_enc_cr_str_exact_copy(mrb, str, orig);
- }
- }
- else {
- str = str_new4(mrb, orig);
- }
- return str;
-}
-
-mrb_value
-mrb_str_drop_bytes(mrb_state *mrb, mrb_value str, long len)
-{
- char *ptr = RSTRING_PTR(str);
- long olen = RSTRING_LEN(str), nlen;
-
- str_modifiable(str);
- if (len > olen) len = olen;
- nlen = olen - len;
- if (!MRB_STR_SHARED_P(str)) mrb_str_new4(mrb, str);
- ptr = RSTRING(str)->buf += len;
- RSTRING(str)->len = nlen;
- ptr[nlen] = 0;
- //ENC_CODERANGE_CLEAR(str);
- return str;
+ return str_dup(mrb, orig);
}
mrb_value
mrb_str_subseq(mrb_state *mrb, mrb_value str, long beg, long len)
{
mrb_value str2;
- if (RSTRING_LEN(str) == beg + len &&
- STR_BUF_MIN_SIZE < len) {
- str2 = mrb_str_new_shared(mrb, mrb_str_new_frozen(mrb, str));
- mrb_str_drop_bytes(mrb, str2, beg);
- }
- else {
- str2 = mrb_str_new5(mrb, str, RSTRING_PTR(str)+beg, len);
- }
- mrb_enc_cr_str_copy_for_substr(mrb, str2, str);
+ str2 = mrb_str_new_with_class(mrb, str, RSTRING_PTR(str)+beg, len);
return str2;
}
-#ifdef INCLUDE_ENCODING
-int
-mrb_enc_str_asciionly_p(mrb_state *mrb, mrb_value str)
+mrb_value
+mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, int len)
{
- mrb_encoding *enc = STR_ENC_GET(mrb, str);
-
- if (!mrb_enc_asciicompat(mrb, enc))
- return 0/*FALSE*/;
- else if (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_7BIT)
- return 1/*TRUE*/;
- return 0/*FALSE*/;
-}
+ mrb_value str2;
+ char *p, *s = RSTRING_PTR(str);
-static mrb_value
-mrb_enc_cr_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len,
- int ptr_encindex, int ptr_cr, int *ptr_cr_ret)
-{
- int str_encindex = ENCODING_GET(mrb, str);
- int res_encindex;
- int str_cr, res_cr;
- int str_a8 = ENCODING_IS_ASCII8BIT(str);
- int ptr_a8 = ptr_encindex == 0;
-
- str_cr = ENC_CODERANGE(str);
-
- if (str_encindex == ptr_encindex) {
- if (str_cr == ENC_CODERANGE_UNKNOWN ||
- (ptr_a8 && str_cr != ENC_CODERANGE_7BIT)) {
- ptr_cr = ENC_CODERANGE_UNKNOWN;
- }
- else if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
- ptr_cr = coderange_scan(ptr, len, mrb_enc_from_index(mrb, ptr_encindex));
- }
+ if (len < 0) return mrb_nil_value();
+ if (!RSTRING_LEN(str)) {
+ len = 0;
}
- else {
- mrb_encoding *str_enc = mrb_enc_from_index(mrb, str_encindex);
- mrb_encoding *ptr_enc = mrb_enc_from_index(mrb, ptr_encindex);
- if (!mrb_enc_asciicompat(mrb, str_enc) || !mrb_enc_asciicompat(mrb, ptr_enc)) {
- if (len == 0)
- return str;
- if (RSTRING_LEN(str) == 0) {
- mrb_str_buf_cat(mrb, str, ptr, len);
- ENCODING_CODERANGE_SET(mrb, str, ptr_encindex, ptr_cr);
- return str;
- }
- goto incompatible;
- }
- if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
- ptr_cr = coderange_scan(ptr, len, ptr_enc);
- }
- if (str_cr == ENC_CODERANGE_UNKNOWN) {
- if (str_a8 || ptr_cr != ENC_CODERANGE_7BIT) {
- str_cr = mrb_enc_str_coderange(mrb, str);
- }
- }
+ if (beg > RSTRING_LEN(str)) return mrb_nil_value();
+ if (beg < 0) {
+ beg += RSTRING_LEN(str);
+ if (beg < 0) return mrb_nil_value();
}
- if (ptr_cr_ret)
- *ptr_cr_ret = ptr_cr;
-
- if (str_encindex != ptr_encindex &&
- str_cr != ENC_CODERANGE_7BIT &&
- ptr_cr != ENC_CODERANGE_7BIT) {
-incompatible:
- mrb_raise(mrb, E_ENCODING_ERROR, "incompatible character encodings: %s and %s",
- mrb_enc_name(mrb_enc_from_index(mrb, str_encindex)),
- mrb_enc_name(mrb_enc_from_index(mrb, ptr_encindex)));
+ if (beg + len > RSTRING_LEN(str))
+ len = RSTRING_LEN(str) - beg;
+ if (len <= 0) {
+ len = 0;
+ p = 0;
}
+ else
+ p = s + beg;
- if (str_cr == ENC_CODERANGE_UNKNOWN) {
- res_encindex = str_encindex;
- res_cr = ENC_CODERANGE_UNKNOWN;
- }
- else if (str_cr == ENC_CODERANGE_7BIT) {
- if (ptr_cr == ENC_CODERANGE_7BIT) {
- res_encindex = !str_a8 ? str_encindex : ptr_encindex;
- res_cr = ENC_CODERANGE_7BIT;
- }
- else {
- res_encindex = ptr_encindex;
- res_cr = ptr_cr;
- }
- }
- else if (str_cr == ENC_CODERANGE_VALID) {
- res_encindex = str_encindex;
- if (ptr_cr == ENC_CODERANGE_7BIT || ptr_cr == ENC_CODERANGE_VALID)
- res_cr = str_cr;
- else
- res_cr = ptr_cr;
+ if (len > STR_BUF_MIN_SIZE && beg + len == RSTRING_LEN(str)) {
+ str2 = mrb_str_new(mrb, s, RSTRING_LEN(str));
+ RSTRING(str2)->buf += RSTRING(str2)->len - len;
+ RSTRING(str2)->len = len;
}
- else { /* str_cr == ENC_CODERANGE_BROKEN */
- res_encindex = str_encindex;
- res_cr = str_cr;
- if (0 < len) res_cr = ENC_CODERANGE_UNKNOWN;
+ else {
+ str2 = mrb_str_new_with_class(mrb, str, p, len);
}
- if (len < 0) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size (or size too big)");
- }
- str_buf_cat(mrb, str, ptr, len);
- ENCODING_CODERANGE_SET(mrb, str, res_encindex, res_cr);
- return str;
-}
-
-mrb_value
-mrb_enc_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len, mrb_encoding *ptr_enc)
-{
- return mrb_enc_cr_str_buf_cat(mrb, str, ptr, len,
- mrb_enc_to_index(ptr_enc), ENC_CODERANGE_UNKNOWN, NULL);
+ return str2;
}
mrb_value
mrb_str_buf_append(mrb_state *mrb, mrb_value str, mrb_value str2)
{
- int str2_cr;
-
- str2_cr = ENC_CODERANGE(str2);
-
- mrb_enc_cr_str_buf_cat(mrb, str, RSTRING_PTR(str2), RSTRING_LEN(str2),
- ENCODING_GET(mrb, str2), str2_cr, &str2_cr);
-
- ENC_CODERANGE_SET(str2, str2_cr);
-
- return str;
-}
-#else
-mrb_value
-mrb_str_buf_append(mrb_state *mrb, mrb_value str, mrb_value str2)
-{
mrb_str_cat(mrb, str, RSTRING_PTR(str2), RSTRING_LEN(str2));
return str;
}
-#endif //INCLUDE_ENCODING
static inline void
str_discard(mrb_state *mrb, mrb_value str)
@@ -2406,38 +1099,6 @@ str_discard(mrb_state *mrb, mrb_value str)
}
}
-void
-mrb_str_shared_replace(mrb_state *mrb, mrb_value str, mrb_value str2)
-{
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc;
- int cr;
-#endif //INCLUDE_ENCODING
-
- if (mrb_obj_equal(mrb, str, str2)) return;
-#ifdef INCLUDE_ENCODING
- enc = STR_ENC_GET(mrb, str2);
- cr = ENC_CODERANGE(str2);
-#endif //INCLUDE_ENCODING
- str_discard(mrb, str);
- MRB_STR_UNSET_NOCAPA(str);
- RSTRING_PTR(str) = RSTRING_PTR(str2);
- RSTRING_LEN(str) = RSTRING_LEN(str2);
- if (MRB_STR_NOCAPA_P(str2)) {
- FL_SET(str, RBASIC(str2)->flags & MRB_STR_NOCAPA);
- RSTRING_SHARED(str) = RSTRING_SHARED(str2);
- }
- else {
- RSTRING_CAPA(str) = RSTRING_CAPA(str2);
- }
-
- MRB_STR_UNSET_NOCAPA(str2); /* abandon str2 */
- RSTRING_PTR(str2)[0] = 0;
- RSTRING_LEN(str2) = 0;
- mrb_enc_associate(mrb, str, enc);
- ENC_CODERANGE_SET(str, cr);
-}
-
#ifdef INCLUDE_REGEXP
static mrb_value
str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang)
@@ -2450,7 +1111,6 @@ str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang)
mrb_int beg0, end0;
mrb_int offset, blen, len, last;
char *sp, *cp;
- mrb_encoding *str_enc;
mrb_get_args(mrb, "*", &argv, &argc);
switch (argc) {
@@ -2478,7 +1138,6 @@ str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang)
dest = mrb_str_buf_new(mrb, blen);
sp = RSTRING_PTR(str);
cp = sp;
- str_enc = STR_ENC_GET(mrb, str);
do {
n++;
@@ -2490,7 +1149,7 @@ str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang)
len = beg - offset; /* copy pre-match substr */
if (len) {
- mrb_enc_str_buf_cat(mrb, dest, cp, len, str_enc);
+ mrb_str_buf_cat(mrb, dest, cp, len);
}
mrb_str_buf_append(mrb, dest, val);
@@ -2503,8 +1162,8 @@ str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang)
* in order to prevent infinite loops.
*/
if (RSTRING_LEN(str) <= end0) break;
- len = mrb_enc_fast_mbclen(RSTRING_PTR(str)+end0, RSTRING_END(str), str_enc);
- mrb_enc_str_buf_cat(mrb, dest, RSTRING_PTR(str)+end0, len, str_enc);
+ len = RSTRING_LEN(str)-end0;
+ mrb_str_buf_cat(mrb, dest, RSTRING_PTR(str)+end0, len);
offset = end0 + len;
}
cp = RSTRING_PTR(str) + offset;
@@ -2512,17 +1171,10 @@ str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang)
beg = mrb_reg_search(mrb, pat, str, offset, 0);
} while (beg >= 0);
if (RSTRING_LEN(str) > offset) {
- mrb_enc_str_buf_cat(mrb, dest, cp, RSTRING_LEN(str) - offset, str_enc);
+ mrb_str_buf_cat(mrb, dest, cp, RSTRING_LEN(str) - offset);
}
mrb_reg_search(mrb, pat, str, last, 0);
- if (bang) {
- mrb_str_shared_replace(mrb, str, dest);
- }
- else {
- RBASIC(dest)->c = mrb_obj_class(mrb, str);
- str = dest;
- }
-
+ RBASIC(dest)->c = mrb_obj_class(mrb, str);
return str;
}
@@ -2578,7 +1230,6 @@ mrb_str_gsub(mrb_state *mrb, mrb_value self)
static mrb_value
mrb_str_gsub_bang(mrb_state *mrb, mrb_value self)
{
- str_modify_keep_cr(mrb, self);
//return str_gsub(argc, argv, self, 1);
return str_gsub(mrb, self, 1);
}
@@ -2694,18 +1345,10 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
}
if (pos < 0) {
-#ifdef INCLUDE_ENCODING
- pos += str_strlen(mrb, str, STR_ENC_GET(mrb, str));
-#else
pos += RSTRING_LEN(str);
-#endif //INCLUDE_ENCODING
if (pos < 0) {
if (mrb_type(sub) == MRB_TT_REGEX) {
-#ifdef INCLUDE_REGEXP
- mrb_backref_set(mrb, mrb_nil_value());
-#else
- mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
-#endif //INCLUDE_REGEXP
+ mrb_raise(mrb, E_TYPE_ERROR, "Regexp class not supported");
}
return mrb_nil_value();
}
@@ -2714,11 +1357,9 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
switch (mrb_type(sub)) {
case MRB_TT_REGEX:
#ifdef INCLUDE_REGEXP
- if (pos > str_strlen(mrb, str, STR_ENC_GET(mrb, str)))
+ if (pos > RSTRING_LEN(str))
return mrb_nil_value();
- pos = str_offset(mrb, RSTRING_PTR(str), RSTRING_END(str), pos,
- mrb_enc_check(mrb, str, sub), single_byte_optimizable(mrb, str));
-
+ pos = mrb_str_offset(mrb, str, pos);
pos = mrb_reg_search(mrb, sub, str, pos, 0);
pos = mrb_str_sublen(mrb, str, pos);
#else
@@ -2750,9 +1391,6 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
/* fall through */
case MRB_TT_STRING:
pos = mrb_str_index(mrb, str, sub, pos);
-#ifdef INCLUDE_ENCODING
- pos = mrb_str_sublen(mrb, str, pos);
-#endif //INCLUDE_ENCODING
break;
}
@@ -2763,22 +1401,7 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
static mrb_value
str_replace(mrb_state *mrb, mrb_value str, mrb_value str2)
{
- long len;
-
- len = RSTRING_LEN(str2);
- if (MRB_STR_SHARED_P(str2)) {
- struct RString *shared = RSTRING_SHARED(str2);
- RSTRING_LEN(str) = len;
- RSTRING_PTR(str) = shared->buf;
- FL_SET(str, MRB_STR_SHARED);
- RSTRING_SHARED(str) = shared;
- }
- else {
- str_replace_shared(mrb, str, str2);
- }
-
- mrb_enc_cr_str_exact_copy(mrb, str, str2);
- return str;
+ return mrb_nil_value();
}
/* 15.2.10.5.24 */
@@ -2825,33 +1448,10 @@ mrb_str_init(mrb_state *mrb, mrb_value self)
return self;
}
-#ifdef INCLUDE_ENCODING
-mrb_sym
-mrb_intern3(mrb_state *mrb, const char *name, long len, mrb_encoding *enc)
-{
- return mrb_intern(mrb, name);
-}
-#endif //INCLUDE_ENCODING
-
mrb_sym
mrb_intern_str(mrb_state *mrb, mrb_value str)
{
- mrb_sym id;
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc;
-
- if (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_7BIT) {
- enc = mrb_usascii_encoding(mrb);
- }
- else {
- enc = mrb_enc_get(mrb, str);
- }
- id = mrb_intern3(mrb, RSTRING_PTR(str), RSTRING_LEN(str), enc);
-#else
- id = mrb_intern(mrb, RSTRING_PTR(str));
-#endif //INCLUDE_ENCODING
- str = RB_GC_GUARD(str);
- return id;
+ return mrb_intern(mrb, RSTRING_PTR(str));
}
/* 15.2.10.5.25 */
@@ -2984,66 +1584,19 @@ mrb_str_match_m(mrb_state *mrb, mrb_value self)
static mrb_value
mrb_str_reverse(mrb_state *mrb, mrb_value str)
{
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc;
-#endif //INCLUDE_ENCODING
- mrb_value rev;
- char *s, *e, *p;
-#ifdef INCLUDE_ENCODING
- int single = 1;
-#endif //INCLUDE_ENCODING
+ mrb_value obj;
+ char *s, *e, *p;
- if (RSTRING_LEN(str) <= 1) return mrb_str_dup(mrb, str);
-#ifdef INCLUDE_ENCODING
- enc = STR_ENC_GET(mrb, str);
-#endif //INCLUDE_ENCODING
- rev = mrb_str_new5(mrb, str, 0, RSTRING_LEN(str));
- s = RSTRING_PTR(str); e = RSTRING_END(str);
- p = RSTRING_END(rev);
+ if (RSTRING(str)->len <= 1) return str_dup(mrb, str);
- if (RSTRING_LEN(str) > 1) {
-#ifdef INCLUDE_ENCODING
- if (single_byte_optimizable(mrb, str)) {
-#endif //INCLUDE_ENCODING
- while (s < e) {
- *--p = *s++;
- }
-#ifdef INCLUDE_ENCODING
- }
- else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID) {
- while (s < e) {
- int clen = mrb_enc_fast_mbclen(s, e, enc);
-
- if (clen > 1 || (*s & 0x80)) single = 0;
- p -= clen;
- memcpy(p, s, clen);
- s += clen;
- }
- }
- else {
- while (s < e) {
- int clen = mrb_enc_mbclen(s, e, enc);
+ obj = mrb_str_new_with_class(mrb, str, 0, RSTRING(str)->len);
+ s = RSTRING(str)->buf; e = s + RSTRING(str)->len - 1;
+ p = RSTRING(obj)->buf;
- if (clen > 1 || (*s & 0x80)) single = 0;
- p -= clen;
- memcpy(p, s, clen);
- s += clen;
- }
- }
- }
- STR_SET_LEN(rev, RSTRING_LEN(str));
- if (ENC_CODERANGE(str) == ENC_CODERANGE_UNKNOWN) {
- if (single) {
- ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
- }
- else {
- ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID);
- }
-#endif //INCLUDE_ENCODING
- }
- mrb_enc_cr_str_copy_for_substr(mrb, rev, str);
-
- return rev;
+ while (e >= s) {
+ *p++ = *e--;
+ }
+ return obj;
}
/* 15.2.10.5.30 */
@@ -3056,29 +1609,18 @@ mrb_str_reverse(mrb_state *mrb, mrb_value str)
static mrb_value
mrb_str_reverse_bang(mrb_state *mrb, mrb_value str)
{
-#ifdef INCLUDE_ENCODING
- if (RSTRING_LEN(str) > 1) {
- if (single_byte_optimizable(mrb, str)) {
-#endif //INCLUDE_ENCODING
- char *s, *e, c;
- str_modify_keep_cr(mrb, str);
- s = RSTRING_PTR(str);
- e = RSTRING_END(str) - 1;
- while (s < e) {
- c = *s;
- *s++ = *e;
- *e-- = c;
- }
-#ifdef INCLUDE_ENCODING
- }
- else {
- mrb_str_shared_replace(mrb, str, mrb_str_reverse(mrb, str));
+ char *s, *e;
+ char c;
+
+ if (RSTRING(str)->len > 1) {
+ s = RSTRING(str)->buf;
+ e = s + RSTRING(str)->len - 1;
+ while (s < e) {
+ c = *s;
+ *s++ = *e;
+ *e-- = c;
}
}
- else {
- str_modify_keep_cr(mrb, str);
- }
-#endif //INCLUDE_ENCODING
return str;
}
@@ -3135,12 +1677,7 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
size_t
mrb_str_sublen(mrb_state *mrb, mrb_value str, long pos)
{
- if (single_byte_optimizable(mrb, str) || pos < 0)
- return pos;
- else {
- char *p = RSTRING_PTR(str);
- return enc_strlen(p, p + pos, STR_ENC_GET(mrb, str), ENC_CODERANGE(str));
- }
+ return pos;
}
#endif //INCLUDE_ENCODING
@@ -3170,14 +1707,8 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
int argc;
mrb_value sub;
mrb_value vpos;
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc = STR_ENC_GET(mrb, str);
- int pos, len = str_strlen(mrb, str, enc);
-#else
int pos, len = RSTRING_LEN(str);
-#endif //INCLUDE_ENCODING
- //if (mrb_scan_args(argc, argv, "11", &sub, &vpos) == 2) {
mrb_get_args(mrb, "*", &argv, &argc);
if (argc == 2) {
sub = argv[0];
@@ -3209,9 +1740,7 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
switch (mrb_type(sub)) {
case MRB_TT_REGEX:
#ifdef INCLUDE_REGEXP
- pos = str_offset(mrb, RSTRING_PTR(str), RSTRING_END(str), pos,
- STR_ENC_GET(mrb, str), single_byte_optimizable(mrb, str));
-
+ pos = mrb_str_offset(mrb, str, pos);
if (!RREGEXP(sub)->ptr || RREGEXP_SRC_LEN(sub)) {
pos = mrb_reg_search(mrb, sub, str, pos, 1);
pos = mrb_str_sublen(mrb, str, pos);
@@ -3269,12 +1798,11 @@ scan_once(mrb_state *mrb, mrb_value str, mrb_value pat, mrb_int *start)
pmatch = mrb_match_ptr(match);
regs = &pmatch->rmatch->regs;
if (regs->beg[0] == regs->end[0]) {
- mrb_encoding *enc = STR_ENC_GET(mrb, str);
/*
* Always consume at least one character of the input string
*/
if (ps->len > regs->end[0])
- *start = regs->end[0] + mrb_enc_fast_mbclen(RSTRING_PTR(str)+regs->end[0],RSTRING_END(str), enc);
+ *start = regs->end[0] + RSTRING_LEN(str)-regs->end[0];
else
*start = regs->end[0] + 1;
}
@@ -3426,16 +1954,11 @@ static const char isspacetable[256] = {
* "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""]
*/
-//static mrb_value
-//mrb_str_split_m(int argc, mrb_value *argv, mrb_value str)
static mrb_value
mrb_str_split_m(mrb_state *mrb, mrb_value str)
{
mrb_value *argv;
int argc;
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc;
-#endif //INCLUDE_ENCODING
mrb_value spat;
mrb_value limit;
enum {awk, string, regexp} split_type;
@@ -3462,21 +1985,12 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
i = 1;
}
-#ifdef INCLUDE_ENCODING
- enc = STR_ENC_GET(mrb, str);
-#endif //INCLUDE_ENCODING
- //if (mrb_nil_p(spat)) {
if (argc == 0) {
-// spat = mrb_nil_value();
-// goto fs_set;
split_type = awk;
}
else {
//fs_set:
if (mrb_type(spat) == MRB_TT_STRING) {
-#ifdef INCLUDE_REGEXP
- mrb_encoding *enc2 = STR_ENC_GET(mrb, spat);
-#endif //INCLUDE_REGEXP
split_type = string;
#ifdef INCLUDE_REGEXP
if (RSTRING_LEN(spat) == 0) {
@@ -3484,20 +1998,13 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
spat = mrb_reg_regcomp(mrb, spat);
split_type = regexp;
}
- else if (mrb_enc_asciicompat(mrb, enc2) == 1) {
+ else {
#endif //INCLUDE_REGEXP
if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){
split_type = awk;
}
#ifdef INCLUDE_REGEXP
}
- else {
- int l;
- if (mrb_enc_ascget(mrb, RSTRING_PTR(spat), RSTRING_END(spat), &l, enc2) == ' ' &&
- RSTRING_LEN(spat) == l) {
- split_type = awk;
- }
- }
#endif //INCLUDE_REGEXP
}
else {
@@ -3520,89 +2027,28 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
unsigned int c;
end = beg;
-#ifdef INCLUDE_ENCODING
- if (is_ascii_string(mrb, str)) {
-#endif //INCLUDE_ENCODING
- while (ptr < eptr) {
- c = (unsigned char)*ptr++;
- if (skip) {
- if (ascii_isspace(c)) {
- beg = ptr - bptr;
- }
- else {
- end = ptr - bptr;
- skip = 0;
- if (!mrb_nil_p(limit) && lim <= i) break;
- }
- }
- else if (ascii_isspace(c)) {
- mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg));
- skip = 1;
- beg = ptr - bptr;
- if (!mrb_nil_p(limit)) ++i;
- }
- else {
- end = ptr - bptr;
- }
- }
-#ifdef INCLUDE_ENCODING
- }
- else {
- while (ptr < eptr) {
- int n;
-
- c = mrb_enc_codepoint_len(mrb, ptr, eptr, &n, enc);
- ptr += n;
- if (skip) {
- if (mrb_isspace(c)) {
- beg = ptr - bptr;
- }
- else {
- end = ptr - bptr;
- skip = 0;
- if (!mrb_nil_p(limit) && lim <= i) break;
- }
- }
- else if (mrb_isspace(c)) {
- mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg));
- skip = 1;
- beg = ptr - bptr;
- if (!mrb_nil_p(limit)) ++i;
- }
- else {
- end = ptr - bptr;
- }
+ while (ptr < eptr) {
+ c = (unsigned char)*ptr++;
+ if (skip) {
+ if (ascii_isspace(c)) {
+ beg = ptr - bptr;
+ }
+ else {
+ end = ptr - bptr;
+ skip = 0;
+ if (!mrb_nil_p(limit) && lim <= i) break;
+ }
+ }
+ else if (ascii_isspace(c)) {
+ mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg));
+ skip = 1;
+ beg = ptr - bptr;
+ if (!mrb_nil_p(limit)) ++i;
}
- }
- }
- else if (split_type == string) {
- char *ptr = RSTRING_PTR(str);
- char *temp = ptr;
- char *eptr = RSTRING_END(str);
- char *sptr = RSTRING_PTR(spat);
- long slen = RSTRING_LEN(spat);
-
- if (is_broken_string(mrb, str)) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid byte sequence in %s", mrb_enc_name(STR_ENC_GET(mrb, str)));
- }
- if (is_broken_string(mrb, spat)) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid byte sequence in %s", mrb_enc_name(STR_ENC_GET(mrb, spat)));
- }
- enc = mrb_enc_check(mrb, str, spat);
- while (ptr < eptr &&
- (end = mrb_memsearch(mrb, sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
- /* Check we are at the start of a char */
- char *t = mrb_enc_right_char_head(ptr, ptr + end, eptr, enc);
- if (t != ptr + end) {
- ptr = t;
- continue;
+ else {
+ end = ptr - bptr;
}
- mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr - temp, end));
- ptr += end + slen;
- if (!mrb_nil_p(limit) && lim <= ++i) break;
}
- beg = ptr - temp;
-#endif //INCLUDE_ENCODING
}
else {
#ifdef INCLUDE_REGEXP
@@ -3621,17 +2067,14 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
break;
}
else if (last_null == 1) {
- mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg,
- mrb_enc_fast_mbclen(ptr+beg,
- ptr+len,
- enc)));
+ mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, len));
beg = start;
}
else {
if (ptr+start == ptr+len)
start++;
else
- start += mrb_enc_fast_mbclen(ptr+start,ptr+len,enc);
+ start += len;
last_null = 1;
continue;
}
@@ -3696,77 +2139,6 @@ mrb_block_given_p()
static mrb_value
mrb_str_sub_bang(mrb_state *mrb, mrb_value str)
{
- mrb_value *argv;
- int argc;
- mrb_value pat, repl;
- long plen;
-
- mrb_get_args(mrb, "*", &argv, &argc);
- if (argc == 1 && mrb_block_given_p()) {
- /* do nothing */
- }
- else if (argc == 2) {
- repl = argv[1];
- //StringValue(repl);
- mrb_string_value(mrb, &repl);
- }
- else {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 2)", argc);
- }
-
- pat = get_pat(mrb, argv[0], 1);
- str_modifiable(str);
- if (mrb_reg_search(mrb, pat, str, 0, 0) >= 0) {
- mrb_encoding *enc;
- int cr = ENC_CODERANGE(str);
- mrb_value match = mrb_backref_get(mrb);
- struct re_registers *regs = RMATCH_REGS(match);
- long beg0 = BEG(0);
- long end0 = END(0);
- char *p, *rp;
- long len, rlen;
-
- repl = mrb_reg_regsub(mrb, repl, str, regs, pat);
- enc = mrb_enc_compatible(mrb, str, repl);
- if (!enc) {
- mrb_encoding *str_enc = STR_ENC_GET(mrb, str);
- p = RSTRING_PTR(str); len = RSTRING_LEN(str);
- if (coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT ||
- coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) {
- mrb_raise(mrb, E_ENCODING_ERROR, "incompatible character encodings: %s and %s",
- mrb_enc_name(str_enc),
- mrb_enc_name(STR_ENC_GET(mrb, repl)));
- }
- enc = STR_ENC_GET(mrb, repl);
- }
- mrb_str_modify(mrb, str);
- mrb_enc_associate(mrb, str, enc);
- if (ENC_CODERANGE_UNKNOWN < cr && cr < ENC_CODERANGE_BROKEN) {
- int cr2 = ENC_CODERANGE(repl);
- if (cr2 == ENC_CODERANGE_BROKEN ||
- (cr == ENC_CODERANGE_VALID && cr2 == ENC_CODERANGE_7BIT))
- cr = ENC_CODERANGE_UNKNOWN;
- else
- cr = cr2;
- }
- plen = end0 - beg0;
- rp = RSTRING_PTR(repl); rlen = RSTRING_LEN(repl);
- len = RSTRING_LEN(str);
- if (rlen > plen) {
- RESIZE_CAPA(str, len + rlen - plen);
- }
- p = RSTRING_PTR(str);
- if (rlen != plen) {
- memmove(p + beg0 + rlen, p + beg0 + plen, len - beg0 - plen);
- }
- memcpy(p + beg0, rp, rlen);
- len += rlen - plen;
- STR_SET_LEN(str, len);
- RSTRING_PTR(str)[len] = '\0';
- ENC_CODERANGE_SET(str, cr);
-
- return str;
- }
return mrb_nil_value();
}
#endif //INCLUDE_REGEXP
@@ -4197,66 +2569,18 @@ mrb_str_to_s(mrb_state *mrb, mrb_value self)
static mrb_value
mrb_str_upcase_bang(mrb_state *mrb, mrb_value str)
{
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc;
-#endif //INCLUDE_ENCODING
char *s, *send;
int modify = 0;
-#ifdef INCLUDE_ENCODING
- int n;
-
- str_modify_keep_cr(mrb, str);
- enc = STR_ENC_GET(mrb, str);
- mrb_str_check_dummy_enc(mrb, enc);
- s = RSTRING_PTR(str); send = RSTRING_END(str);
- if (single_byte_optimizable(mrb, str)) {
- while (s < send) {
- unsigned int c = *(unsigned char*)s;
-
- if (mrb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') {
- *s = 'A' + (c - 'a');
- modify = 1;
- }
- s++;
- }
- }
- else {
- int ascompat = mrb_enc_asciicompat(mrb, enc);
-
- while (s < send) {
- unsigned int c;
- if (ascompat && (c = *(unsigned char*)s) < 0x80) {
- if (mrb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') {
- *s = 'A' + (c - 'a');
- modify = 1;
- }
- s++;
- }
- else {
- c = mrb_enc_codepoint_len(mrb, s, send, &n, enc);
- if (mrb_enc_islower(c, enc)) {
- /* assuming toupper returns codepoint with same size */
- mrb_enc_mbcput(mrb_enc_toupper(c, enc), s, enc);
- modify = 1;
- }
- s += n;
- }
- }
- }
-#else
- mrb_str_modify(mrb, str);
- s = RSTRING_PTR(str); send = RSTRING_END(str);
+ s = RSTRING(str)->buf; send = s + RSTRING(str)->len;
while (s < send) {
- unsigned int c = *(unsigned char*)s;
-
- if ('a' <= c && c <= 'z') {
- *s = 'A' + (c - 'a');
+ if (ISLOWER(*s)) {
+ *s = toupper(*s);
modify = 1;
}
s++;
}
-#endif //INCLUDE_ENCODING
+
if (modify) return str;
return mrb_nil_value();
}
@@ -4282,252 +2606,6 @@ mrb_str_upcase(mrb_state *mrb, mrb_value self)
return str;
}
-/* 15.2.10.5.xx */
-/*
- * call-seq:
- * str.force_encoding(encoding) -> str
- *
- * Changes the encoding to +encoding+ and returns self.
- */
-#ifdef INCLUDE_ENCODING
-static mrb_value
-mrb_str_force_encoding(mrb_state *mrb, mrb_value self)
-{
- mrb_value enc;
-
- mrb_get_args(mrb, "o", &enc);
- str_modifiable(self);
- mrb_enc_associate(mrb, self, mrb_to_encoding(mrb, enc));
- ENC_CODERANGE_CLEAR(self);
- return self;
-}
-
-long
-mrb_str_coderange_scan_restartable(const char *s, const char *e, mrb_encoding *enc, int *cr)
-{
- const char *p = s;
-
- if (*cr == ENC_CODERANGE_BROKEN)
- return e - s;
-
- if (mrb_enc_to_index(enc) == 0) {
- /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
- p = search_nonascii(p, e);
- *cr = (!p && *cr != ENC_CODERANGE_VALID) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
- return e - s;
- }
- else if (mrb_enc_asciicompat(mrb, enc)) {
- p = search_nonascii(p, e);
- if (!p) {
- if (*cr != ENC_CODERANGE_VALID) *cr = ENC_CODERANGE_7BIT;
- return e - s;
- }
- while (p < e) {
- int ret = mrb_enc_precise_mbclen(p, e, enc);
- if (!MBCLEN_CHARFOUND_P(ret)) {
- *cr = MBCLEN_INVALID_P(ret) ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_UNKNOWN;
- return p - s;
- }
- p += MBCLEN_CHARFOUND_LEN(ret);
- if (p < e) {
- p = search_nonascii(p, e);
- if (!p) {
- *cr = ENC_CODERANGE_VALID;
- return e - s;
- }
- }
- }
- *cr = e < p ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_VALID;
- return p - s;
- }
- else {
- while (p < e) {
- int ret = mrb_enc_precise_mbclen(p, e, enc);
- if (!MBCLEN_CHARFOUND_P(ret)) {
- *cr = MBCLEN_INVALID_P(ret) ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_UNKNOWN;
- return p - s;
- }
- p += MBCLEN_CHARFOUND_LEN(ret);
- }
- *cr = e < p ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_VALID;
- return p - s;
- }
-}
-
-mrb_value
-mrb_str_conv_enc_opts(mrb_state *mrb, mrb_value str, mrb_encoding *from, mrb_encoding *to, int ecflags, mrb_value ecopts)
-{
- mrb_econv_t *ec;
- mrb_econv_result_t ret;
- long len;
- mrb_value newstr;
- const unsigned char *sp;
- unsigned char *dp;
-
- if (!to) return str;
- if (from == to) return str;
- if ((mrb_enc_asciicompat(mrb, to) && ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) ||
- to == mrb_ascii8bit_encoding(mrb)) {
- if (STR_ENC_GET(mrb, str) != to) {
- str = mrb_str_dup(mrb, str);
- mrb_enc_associate(mrb, str, to);
- }
- return str;
- }
-
- len = RSTRING_LEN(str);
- newstr = mrb_str_new(mrb, 0, len);
-
- retry:
- ec = mrb_econv_open_opts(mrb, from->name, to->name, ecflags, ecopts);
- if (!ec) return str;
-
- sp = (unsigned char*)RSTRING_PTR(str);
- dp = (unsigned char*)RSTRING_PTR(newstr);
- ret = mrb_econv_convert(mrb, ec, &sp, (unsigned char*)RSTRING_END(str),
- &dp, (unsigned char*)RSTRING_END(newstr), 0);
- mrb_econv_close(ec);
- switch (ret) {
- case econv_destination_buffer_full:
- /* destination buffer short */
- len = len < 2 ? 2 : len * 2;
- mrb_str_resize(mrb, newstr, len);
- goto retry;
-
- case econv_finished:
- len = dp - (unsigned char*)RSTRING_PTR(newstr);
- mrb_str_set_len(mrb, newstr, len);
- mrb_enc_associate(mrb, newstr, to);
- return newstr;
-
- case econv_invalid_byte_sequence:
- case econv_undefined_conversion:
- case econv_source_buffer_empty:
- case econv_after_output:
- case econv_incomplete_input:
- /* some error, return original */
- return str;
-
- default:
- mrb_bug("Internal Error: Invalid return value mrb_econv_convert.");
- return str;
- }
-}
-
-mrb_value
-mrb_str_conv_enc(mrb_state *mrb, mrb_value str, mrb_encoding *from, mrb_encoding *to)
-{
- return mrb_str_conv_enc_opts(mrb, str, from, to, 0, mrb_nil_value());
-}
-#endif //INCLUDE_ENCODING
-
-#ifndef INCLUDE_ENCODING
-#undef SIGN_EXTEND_CHAR
-#if __STDC__
-# define SIGN_EXTEND_CHAR(c) ((signed char)(c))
-#else /* not __STDC__ */
-/* As in Harbison and Steele. */
-# define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
-#endif
-#define is_identchar(c) (SIGN_EXTEND_CHAR(c)!=-1&&(ISALNUM(c) || (c) == '_'))
-
-static int
-is_special_global_name(m)
- const char *m;
-{
- switch (*m) {
- case '~': case '*': case '$': case '?': case '!': case '@':
- case '/': case '\\': case ';': case ',': case '.': case '=':
- case ':': case '<': case '>': case '\"':
- case '&': case '`': case '\'': case '+':
- case '0':
- ++m;
- break;
- case '-':
- ++m;
- if (is_identchar(*m)) m += 1;
- break;
- default:
- if (!ISDIGIT(*m)) return 0;
- do ++m; while (ISDIGIT(*m));
- }
- return !*m;
-}
-
-int
-mrb_symname_p(const char *name)
-{
- const char *m = name;
- int localid = FALSE;
-
- if (!m) return FALSE;
- switch (*m) {
- case '\0':
- return FALSE;
-
- case '$':
- if (is_special_global_name(++m)) return TRUE;
- goto id;
-
- case '@':
- if (*++m == '@') ++m;
- goto id;
-
- case '<':
- switch (*++m) {
- case '<': ++m; break;
- case '=': if (*++m == '>') ++m; break;
- default: break;
- }
- break;
-
- case '>':
- switch (*++m) {
- case '>': case '=': ++m; break;
- }
- break;
-
- case '=':
- switch (*++m) {
- case '~': ++m; break;
- case '=': if (*++m == '=') ++m; break;
- default: return FALSE;
- }
- break;
-
- case '*':
- if (*++m == '*') ++m;
- break;
-
- case '+': case '-':
- if (*++m == '@') ++m;
- break;
-
- case '|': case '^': case '&': case '/': case '%': case '~': case '`':
- ++m;
- break;
-
- case '[':
- if (*++m != ']') return FALSE;
- if (*++m == '=') ++m;
- break;
-
- default:
- localid = !ISUPPER(*m);
-id:
- if (*m != '_' && !ISALPHA(*m)) return FALSE;
- while (is_identchar(*m)) m += 1;
- if (localid) {
- switch (*m) {
- case '!': case '?': case '=': ++m;
- }
- }
- break;
- }
- return *m ? FALSE : TRUE;
-}
-#endif //INCLUDE_ENCODING
-
/*
* call-seq:
* str.dump -> new_str
@@ -4538,16 +2616,10 @@ id:
mrb_value
mrb_str_dump(mrb_state *mrb, mrb_value str)
{
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc = mrb_enc_get(mrb, str);
-#endif //INCLUDE_ENCODING
long len;
const char *p, *pend;
- char *q, *qend;
+ char *q;
mrb_value result;
-#ifdef INCLUDE_ENCODING
- int u8 = (enc == mrb_utf8_encoding(mrb));
-#endif //INCLUDE_ENCODING
len = 2; /* "" */
p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
@@ -4570,33 +2642,15 @@ mrb_str_dump(mrb_state *mrb, mrb_value str)
len++;
}
else {
-#ifdef INCLUDE_ENCODING
- if (u8) { /* \u{NN} */
- int n = mrb_enc_precise_mbclen(p-1, pend, enc);
- if (MBCLEN_CHARFOUND_P(n-1)) {
- unsigned int cc = mrb_enc_mbc_to_codepoint(p-1, pend, enc);
- while (cc >>= 4) len++;
- len += 5;
- p += MBCLEN_CHARFOUND_LEN(n)-1;
- break;
- }
- }
-#endif //INCLUDE_ENCODING
len += 4; /* \xNN */
}
break;
}
}
-#ifdef INCLUDE_ENCODING
- if (!mrb_enc_asciicompat(mrb, enc)) {
- len += 19; /* ".force_encoding('')" */
- len += strlen(enc->name);
- }
-#endif //INCLUDE_ENCODING
- result = mrb_str_new5(mrb, str, 0, len);
+ result = mrb_str_new_with_class(mrb, str, 0, len);
p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
- q = RSTRING_PTR(result); qend = q + len + 1;
+ q = RSTRING_PTR(result);
*q++ = '"';
while (p < pend) {
@@ -4647,35 +2701,11 @@ mrb_str_dump(mrb_state *mrb, mrb_value str)
}
else {
*q++ = '\\';
-#ifdef INCLUDE_ENCODING
- if (u8) {
- int n = mrb_enc_precise_mbclen(p-1, pend, enc) - 1;
- if (MBCLEN_CHARFOUND_P(n)) {
- int cc = mrb_enc_mbc_to_codepoint(p-1, pend, enc);
- p += n;
- snprintf(q, qend-q, "u{%x}", cc);
- q += strlen(q);
- continue;
- }
- }
- snprintf(q, qend-q, "x%02X", c);
-#else
sprintf(q, "%03o", c&0xff);
-#endif //INCLUDE_ENCODING
q += 3;
}
}
*q++ = '"';
-#ifdef INCLUDE_ENCODING
- *q = '\0';
- if (!mrb_enc_asciicompat(mrb, enc)) {
- snprintf(q, qend-q, ".force_encoding(\"%s\")", enc->name);
- enc = mrb_ascii8bit_encoding(mrb);
- }
- /* result from dump is ASCII */
- mrb_enc_associate(mrb, result, enc);
- ENC_CODERANGE_SET(result, ENC_CODERANGE_7BIT);
-#endif //INCLUDE_ENCODING
return result;
}
@@ -4686,8 +2716,6 @@ mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len)
mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size (or size too big)");
}
if (0/*STR_ASSOC_P(str)*/) {
- mrb_str_modify(mrb, str);
- //if (STR_EMBED_P(str)) str_make_independent(mrb, str);
mrb_realloc(mrb, RSTRING(str)->buf, RSTRING(str)->len+len+1);
memcpy(RSTRING(str)->buf + RSTRING(str)->len, ptr, len);
RSTRING(str)->len += len;
@@ -4701,18 +2729,13 @@ mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len)
mrb_value
mrb_str_cat2(mrb_state *mrb, mrb_value str, const char *ptr)
{
- return mrb_str_cat(mrb, str, ptr, strlen(ptr));
+ return mrb_str_cat(mrb, str, ptr, strlen(ptr));
}
-mrb_value
+static mrb_value
mrb_str_vcatf(mrb_state *mrb, mrb_value str, const char *fmt, va_list ap)
{
- //mrb_printf_buffer f;
- //mrb_value klass;
-
- //StringValue(str);
mrb_string_value(mrb, &str);
- mrb_str_modify(mrb, str);
mrb_str_resize(mrb, str, (char*)RSTRING_END(str) - RSTRING_PTR(str));
return str;
@@ -4730,12 +2753,6 @@ mrb_str_catf(mrb_state *mrb, mrb_value str, const char *format, ...)
return str;
}
-void
-mrb_lastline_set(mrb_value val)
-{
- //vm_svar_set(0, val);
-}
-
mrb_value
mrb_str_append(mrb_state *mrb, mrb_value str, mrb_value str2)
{
@@ -4743,69 +2760,7 @@ mrb_str_append(mrb_state *mrb, mrb_value str, mrb_value str2)
return mrb_str_buf_append(mrb, str, str2);
}
-void
-mrb_str_setter(mrb_state *mrb, mrb_value val, mrb_sym id, mrb_value *var)
-{
- if (!mrb_nil_p(val) && (mrb_type(val) != MRB_TT_STRING)) {
- mrb_raise(mrb, E_TYPE_ERROR, "value of %s must be String", mrb_sym2name(mrb, id));
- }
- *var = val;
-}
-
-#ifdef INCLUDE_ENCODING
-/*
- * call-seq:
- * str.ascii_only? -> true or false
- *
- * Returns true for a string which has only ASCII characters.
- *
- * "abc".force_encoding("UTF-8").ascii_only? #=> true
- * "abc\u{6666}".force_encoding("UTF-8").ascii_only? #=> false
- */
-
-int
-mrb_str_is_ascii_only_p(mrb_state *mrb, mrb_value str)
-{
- int cr = mrb_enc_str_coderange(mrb, str);
-
- return cr == ENC_CODERANGE_7BIT ? TRUE : FALSE;
-}
-
-#endif //INCLUDE_ENCODING
-
#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
-int
-mrb_str_buf_cat_escaped_char(mrb_state *mrb, mrb_value result, unsigned int c, int unicode_p)
-{
- char buf[CHAR_ESC_LEN + 1];
- int l;
-
- if (sizeof(c) > 4) {
- c &= 0xffffffff;
- }
- if (unicode_p) {
- if (c < 0x7F && ISPRINT(c)) {
- snprintf(buf, CHAR_ESC_LEN, "%c", c);
- }
- else if (c < 0x10000) {
- snprintf(buf, CHAR_ESC_LEN, "\\u%04X", c);
- }
- else {
- snprintf(buf, CHAR_ESC_LEN, "\\u{%X}", c);
- }
- }
- else {
- if (c < 0x100) {
- snprintf(buf, CHAR_ESC_LEN, "\\x%02X", c);
- }
- else {
- snprintf(buf, CHAR_ESC_LEN, "\\x{%X}", c);
- }
- }
- l = (int)strlen(buf); /* CHAR_ESC_LEN cannot exceed INT_MAX */
- mrb_str_buf_cat(mrb, result, buf, l);
- return l;
-}
/*
* call-seq:
@@ -4821,24 +2776,9 @@ mrb_str_buf_cat_escaped_char(mrb_state *mrb, mrb_value result, unsigned int c, i
mrb_value
mrb_str_inspect(mrb_state *mrb, mrb_value str)
{
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc = STR_ENC_GET(mrb, str);
-#endif //INCLUDE_ENCODING
const char *p, *pend, *prev;
char buf[CHAR_ESC_LEN + 1];
-#ifdef INCLUDE_ENCODING
- mrb_value result = mrb_str_buf_new(mrb, 0);
- mrb_encoding *resenc = mrb_default_internal_encoding(mrb);
- int unicode_p = mrb_enc_unicode_p(enc);
- int asciicompat = mrb_enc_asciicompat(mrb, enc);
-
- if (resenc == NULL) resenc = mrb_default_external_encoding(mrb);
- if (!mrb_enc_asciicompat(mrb, resenc)) resenc = mrb_usascii_encoding(mrb);
- mrb_enc_associate(mrb, result, resenc);
- mrb_str_buf_cat(mrb, result, "\"", strlen("\""));
-#else
mrb_value result = mrb_str_new_cstr(mrb, "\"");
-#endif //INCLUDE_ENCODING
p = RSTRING_PTR(str); pend = RSTRING_END(str);
prev = p;
@@ -4846,37 +2786,6 @@ mrb_str_inspect(mrb_state *mrb, mrb_value str)
unsigned int c, cc;
int n;
-#ifdef INCLUDE_ENCODING
- n = mrb_enc_precise_mbclen(p, pend, enc);
- if (!MBCLEN_CHARFOUND_P(n)) {
- if (p > prev) mrb_str_buf_cat(mrb, result, prev, p - prev);
- n = mrb_enc_mbminlen(enc);
- if (pend < p + n)
- n = (int)(pend - p);
- while (n--) {
- snprintf(buf, CHAR_ESC_LEN, "\\x%02X", *p & 0377);
- mrb_str_buf_cat(mrb, result, buf, strlen(buf));
- prev = ++p;
- }
- continue;
- }
- n = MBCLEN_CHARFOUND_LEN(n);
- c = mrb_enc_mbc_to_codepoint(p, pend, enc);
- p += n;
- if (c == '"'|| c == '\\' ||
- (c == '#' &&
- p < pend &&
- MBCLEN_CHARFOUND_P(mrb_enc_precise_mbclen(p,pend,enc)) &&
- (cc = mrb_enc_codepoint(mrb, p, pend, enc),
- (cc == '$' || cc == '@' || cc == '{')))) {
- if (p - n > prev) mrb_str_buf_cat(mrb, result, prev, p - n - prev);
- mrb_str_buf_cat(mrb, result, "\\", strlen("\\")); //str_buf_cat2(result, "\\");
- if (asciicompat || enc == resenc) {
- prev = p - n;
- continue;
- }
- }
-#else
c = *p++;
n = 1;
if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p, pend))) {
@@ -4889,7 +2798,6 @@ mrb_str_inspect(mrb_state *mrb, mrb_value str)
mrb_str_buf_cat(mrb, result, buf, 1);
continue;
}
-#endif //INCLUDE_ENCODING
switch (c) {
case '\n': cc = 'n'; break;
case '\r': cc = 'r'; break;
@@ -4909,20 +2817,10 @@ mrb_str_inspect(mrb_state *mrb, mrb_value str)
prev = p;
continue;
}
-#ifdef INCLUDE_ENCODING
- if ((enc == resenc && mrb_enc_isprint(c, enc)) ||
- (asciicompat && mrb_enc_isascii(c, enc) && ISPRINT(c))) {
- continue;
- }
-#endif //INCLUDE_ENCODING
else {
if (p - n > prev) mrb_str_buf_cat(mrb, result, prev, p - n - prev);
-#ifdef INCLUDE_ENCODING
- mrb_str_buf_cat_escaped_char(mrb, result, c, unicode_p);
-#else
sprintf(buf, "\\%03o", c & 0377);
mrb_str_buf_cat(mrb, result, buf, strlen(buf));
-#endif //INCLUDE_ENCODING
prev = p;
continue;
}
@@ -4935,21 +2833,6 @@ mrb_str_inspect(mrb_state *mrb, mrb_value str)
return result;
}
-#ifdef INCLUDE_ENCODING
-int
-sym_printable(mrb_state *mrb, const char *s, const char *send, mrb_encoding *enc)
-{
- while (s < send) {
- int n;
- int c = mrb_enc_codepoint_len(mrb, s, send, &n, enc);
-
- if (!mrb_enc_isprint(c, enc)) return FALSE;
- s += n;
- }
- return TRUE;
-}
-#endif //INCLUDE_ENCODING
-
/* ---------------------------*/
void
mrb_init_string(mrb_state *mrb)
@@ -4978,7 +2861,7 @@ mrb_init_string(mrb_state *mrb)
mrb_define_method(mrb, s, "downcase", mrb_str_downcase, ARGS_NONE()); /* 15.2.10.5.13 */
mrb_define_method(mrb, s, "downcase!", mrb_str_downcase_bang, ARGS_NONE()); /* 15.2.10.5.14 */
mrb_define_method(mrb, s, "each_line", mrb_str_each_line, ARGS_REQ(1)); /* 15.2.10.5.15 */
- mrb_define_method(mrb, s, "empty?", mrb_str_empty, ARGS_NONE()); /* 15.2.10.5.16 */
+ mrb_define_method(mrb, s, "empty?", mrb_str_empty_p, ARGS_NONE()); /* 15.2.10.5.16 */
mrb_define_method(mrb, s, "eql?", mrb_str_eql, ARGS_REQ(1)); /* 15.2.10.5.17 */
#ifdef INCLUDE_REGEXP
mrb_define_method(mrb, s, "gsub", mrb_str_gsub, ARGS_REQ(1)); /* 15.2.10.5.18 */
@@ -5013,9 +2896,5 @@ mrb_init_string(mrb_state *mrb)
mrb_define_method(mrb, s, "to_sym", mrb_str_intern, ARGS_NONE()); /* 15.2.10.5.41 */
mrb_define_method(mrb, s, "upcase", mrb_str_upcase, ARGS_REQ(1)); /* 15.2.10.5.42 */
mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, ARGS_REQ(1)); /* 15.2.10.5.43 */
-#ifdef INCLUDE_ENCODING
- mrb_define_method(mrb, s, "encoding", mrb_obj_encoding, ARGS_NONE()); /* 15.2.10.5.44(x) */
- mrb_define_method(mrb, s, "force_encoding", mrb_str_force_encoding, ARGS_REQ(1)); /* 15.2.10.5.45(x) */
-#endif
mrb_define_method(mrb, s, "inspect", mrb_str_inspect, ARGS_NONE()); /* 15.2.10.5.46(x) */
}
diff --git a/src/symbol.c b/src/symbol.c
index b4ffc19e6..89e81af0e 100644
--- a/src/symbol.c
+++ b/src/symbol.c
@@ -149,13 +149,7 @@ mrb_sym_to_s(mrb_state *mrb, mrb_value sym)
{
mrb_sym id = SYM2ID(sym);
-#ifdef INCLUDE_REGEXP
- //return str_new3(mrb_cString, mrb_id2str(id));
- return str_new3(mrb, mrb_obj_class(mrb, sym), mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id)));
-#else
- return mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id)); //mrb_str_new2(mrb_id2name(SYM2ID(sym)));
-#endif
-
+ return mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id));
}
/* 15.2.11.3.4 */
@@ -185,42 +179,113 @@ sym_to_sym(mrb_state *mrb, mrb_value sym)
* :fred.inspect #=> ":fred"
*/
+#if __STDC__
+# define SIGN_EXTEND_CHAR(c) ((signed char)(c))
+#else /* not __STDC__ */
+/* As in Harbison and Steele. */
+# define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
+#endif
+#define is_identchar(c) (SIGN_EXTEND_CHAR(c)!=-1&&(ISALNUM(c) || (c) == '_'))
+
+static int
+is_special_global_name(m)
+ const char *m;
+{
+ switch (*m) {
+ case '~': case '*': case '$': case '?': case '!': case '@':
+ case '/': case '\\': case ';': case ',': case '.': case '=':
+ case ':': case '<': case '>': case '\"':
+ case '&': case '`': case '\'': case '+':
+ case '0':
+ ++m;
+ break;
+ case '-':
+ ++m;
+ if (is_identchar(*m)) m += 1;
+ break;
+ default:
+ if (!ISDIGIT(*m)) return 0;
+ do ++m; while (ISDIGIT(*m));
+ }
+ return !*m;
+}
+
+static int
+symname_p(const char *name)
+{
+ const char *m = name;
+ int localid = FALSE;
+
+ if (!m) return FALSE;
+ switch (*m) {
+ case '\0':
+ return FALSE;
+
+ case '$':
+ if (is_special_global_name(++m)) return TRUE;
+ goto id;
+
+ case '@':
+ if (*++m == '@') ++m;
+ goto id;
+
+ case '<':
+ switch (*++m) {
+ case '<': ++m; break;
+ case '=': if (*++m == '>') ++m; break;
+ default: break;
+ }
+ break;
+
+ case '>':
+ switch (*++m) {
+ case '>': case '=': ++m; break;
+ }
+ break;
+
+ case '=':
+ switch (*++m) {
+ case '~': ++m; break;
+ case '=': if (*++m == '=') ++m; break;
+ default: return FALSE;
+ }
+ break;
+
+ case '*':
+ if (*++m == '*') ++m;
+ break;
+
+ case '+': case '-':
+ if (*++m == '@') ++m;
+ break;
+
+ case '|': case '^': case '&': case '/': case '%': case '~': case '`':
+ ++m;
+ break;
+
+ case '[':
+ if (*++m != ']') return FALSE;
+ if (*++m == '=') ++m;
+ break;
+
+ default:
+ localid = !ISUPPER(*m);
+id:
+ if (*m != '_' && !ISALPHA(*m)) return FALSE;
+ while (is_identchar(*m)) m += 1;
+ if (localid) {
+ switch (*m) {
+ case '!': case '?': case '=': ++m;
+ }
+ }
+ break;
+ }
+ return *m ? FALSE : TRUE;
+}
+
static mrb_value
sym_inspect(mrb_state *mrb, mrb_value sym)
{
-#ifdef INCLUDE_ENCODING
- #define STR_ENC_GET(mrb, str) mrb_enc_from_index(mrb, ENCODING_GET(mrb, str))
- mrb_value str;
- mrb_sym id = SYM2ID(sym);
- mrb_encoding *enc;
- const char *ptr;
- long len;
- char *dest;
- mrb_encoding *resenc = mrb_default_internal_encoding(mrb);
-
- if (resenc == NULL) resenc = mrb_default_external_encoding(mrb);
- sym = mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id));//mrb_id2str(id);
- enc = STR_ENC_GET(mrb, sym);
- ptr = RSTRING_PTR(sym);
- len = RSTRING_LEN(sym);
- if ((resenc != enc && !mrb_str_is_ascii_only_p(mrb, sym)) || len != (long)strlen(ptr) ||
- !mrb_enc_symname_p(ptr, enc) || !sym_printable(mrb, ptr, ptr + len, enc)) {
- str = mrb_str_inspect(mrb, sym);
- len = RSTRING_LEN(str);
- mrb_str_resize(mrb, str, len + 1);
- dest = RSTRING_PTR(str);
- memmove(dest + 1, dest, len);
- dest[0] = ':';
- }
- else {
- char *dest;
- str = mrb_enc_str_new(mrb, 0, len + 1, enc);
- dest = RSTRING_PTR(str);
- dest[0] = ':';
- memcpy(dest + 1, ptr, len);
- }
- return str;
-#else
mrb_value str;
const char *name;
mrb_sym id = SYM2ID(sym);
@@ -229,12 +294,11 @@ sym_inspect(mrb_state *mrb, mrb_value sym)
str = mrb_str_new(mrb, 0, strlen(name)+1);
RSTRING(str)->buf[0] = ':';
strcpy(RSTRING(str)->buf+1, name);
- if (!mrb_symname_p(name)) {
+ if (!symname_p(name)) {
str = mrb_str_dump(mrb, str);
strncpy(RSTRING(str)->buf, ":\"", 2);
}
return str;
-#endif
}