diff options
| author | Yukihiro Matsumoto <[email protected]> | 2012-05-31 15:32:38 +0900 |
|---|---|---|
| committer | Yukihiro Matsumoto <[email protected]> | 2012-05-31 15:32:38 +0900 |
| commit | 64fc4ac332eab0be7704cf6f7ec5a96c523c0ed9 (patch) | |
| tree | 00bca09773b7584fd2b56c371fe6159550c38b6f /src | |
| parent | 0d8adaaaa16859342a37e3bf6832a8717c54f27c (diff) | |
| download | mruby-64fc4ac332eab0be7704cf6f7ec5a96c523c0ed9.tar.gz mruby-64fc4ac332eab0be7704cf6f7ec5a96c523c0ed9.zip | |
resolve conflict
Diffstat (limited to 'src')
| -rw-r--r-- | src/class.c | 8 | ||||
| -rw-r--r-- | src/encoding.c | 1685 | ||||
| -rw-r--r-- | src/encoding.h | 9 | ||||
| -rw-r--r-- | src/gc.c | 4 | ||||
| -rw-r--r-- | src/init.c | 2 | ||||
| -rw-r--r-- | src/object.c | 13 | ||||
| -rw-r--r-- | src/re.c | 792 | ||||
| -rw-r--r-- | src/sprintf.c | 33 | ||||
| -rw-r--r-- | src/string.c | 2379 | ||||
| -rw-r--r-- | src/symbol.c | 148 |
10 files changed, 329 insertions, 4744 deletions
diff --git a/src/class.c b/src/class.c index f96922f4b..f9a6154c5 100644 --- a/src/class.c +++ b/src/class.c @@ -17,12 +17,6 @@ #include "mruby/khash.h" -#ifdef INCLUDE_REGEXP - #define mrb_usascii_str_new2 mrb_usascii_str_new_cstr -#else - #define mrb_usascii_str_new2 mrb_str_new_cstr -#endif - KHASH_MAP_INIT_INT(mt, struct RProc*); KHASH_MAP_INIT_INT(iv, mrb_value); @@ -1052,7 +1046,7 @@ mrb_mod_to_s(mrb_state *mrb, mrb_value klass) { //if (FL_TEST(klass, FL_SINGLETON)) { if (mrb_type(klass) == MRB_TT_SCLASS) { - mrb_value s = mrb_usascii_str_new2(mrb, "#<"); + mrb_value s = mrb_str_new_cstr(mrb, "#<"); mrb_value v = mrb_iv_get(mrb, klass, mrb_intern(mrb, "__attached__")); mrb_str_cat2(mrb, s, "Class:"); diff --git a/src/encoding.c b/src/encoding.c deleted file mode 100644 index 8e4257829..000000000 --- a/src/encoding.c +++ /dev/null @@ -1,1685 +0,0 @@ -/* -** encoding.c - Encoding class -** -** See Copyright Notice in mruby.h -*/ - -#include "mruby.h" -#ifdef INCLUDE_ENCODING -#include <ctype.h> -#ifndef NO_LOCALE_CHARMAP -#ifdef __CYGWIN__ -#include <windows.h> -#endif -#ifdef HAVE_LANGINFO_H -#include <langinfo.h> -#endif -#endif - -#define USE_UPPER_CASE_TABLE - -#include <ctype.h> -#include <stdio.h> -#include "regenc.h" -#include "regint.h" -#include "encoding.h" -#include "st.h" -#include <string.h> -#include "mruby/numeric.h" -#include "mruby/string.h" -#include "mruby/array.h" -#include "mruby/variable.h" -#include "mruby/hash.h" - -#define pprintf printf -#define mrb_warning printf -#define mrb_bug printf -#ifndef INT_MAX -#define INT_MAX 2147483647 -#endif -#define mrb_isascii(c) ((unsigned long)(c) < 128) -#define OBJ_FREEZE(a) -static mrb_sym id_encoding; -//mrb_value mrb_cEncoding; -static mrb_value mrb_encoding_list; - -struct mrb_encoding_entry { - const char *name; - mrb_encoding *enc; - mrb_encoding *base; -}; - -static struct { - struct mrb_encoding_entry *list; - int count; - int size; - st_table *names; -} enc_table; - -void mrb_enc_init(mrb_state *mrb); - -enum { - ENCINDEX_ASCII, - ENCINDEX_UTF_8, - ENCINDEX_US_ASCII, - ENCINDEX_BUILTIN_MAX -}; -#define ENCODING_COUNT ENCINDEX_BUILTIN_MAX -#define ENCODING_NAMELEN_MAX 63 -#define valid_encoding_name_p(name) ((name) && strlen(name) <= ENCODING_NAMELEN_MAX) -#define STRCASECMP(s1, s2) (st_strcasecmp(s1, s2)) - -//#define BUILTIN_TYPE(x) (int)(((struct RBasic*)(x))->flags & T_MASK) -#ifndef FALSE -#define FALSE 0 -#endif - -#ifndef TRUE -#define TRUE 1 -#endif - -#ifndef OTHER -#define OTHER 2 -#endif - -#define mrb_usascii_str_new2 mrb_usascii_str_new_cstr - -static const struct mrb_data_type encoding_data_type = { - "encoding", 0, -}; -#define is_data_encoding(obj) (DATA_TYPE(obj) == &encoding_data_type) - -// RUBY_IMMEDIATE_MASK = 0x03, -//#define IMMEDIATE_MASK RUBY_IMMEDIATE_MASK -//#define IMMEDIATE_P(x) ((VALUE)(x) & IMMEDIATE_MASK) -//#define SPECIAL_CONST_P(x) (IMMEDIATE_P(x) || !RTEST(x)) - -static mrb_value -enc_new(mrb_state *mrb, mrb_encoding *encoding) -{ - return mrb_obj_value(Data_Wrap_Struct(mrb, ENCODE_CLASS, &encoding_data_type, encoding)); -} - -#define enc_autoload_p(enc) (!mrb_enc_mbmaxlen(enc)) - -#define UNSPECIFIED_ENCODING INT_MAX - - -static mrb_value -mrb_enc_from_encoding_index(mrb_state *mrb, int idx) -{ - mrb_value list, enc; - - if (mrb_nil_p(list = mrb_encoding_list)) { - mrb_bug("mrb_enc_from_encoding_index(%d): no mrb_encoding_list", idx); - } - enc = mrb_ary_ref(mrb, list, idx);//mrb_ary_entry(list, idx); - if (mrb_nil_p(enc)) { - mrb_bug("mrb_enc_from_encoding_index(%d): not created yet", idx); - } - return enc; -} - -mrb_value -mrb_enc_from_encoding(mrb_state *mrb, mrb_encoding *encoding) -{ - int idx; - if (!encoding) return mrb_nil_value(); - idx = ENC_TO_ENCINDEX(encoding); - return mrb_enc_from_encoding_index(mrb, idx); -} - -static int enc_autoload(mrb_state *mrb, mrb_encoding *enc); -static int -check_encoding(mrb_state *mrb, mrb_encoding *enc) -{ - int index = mrb_enc_to_index(enc); - if (mrb_enc_from_index(mrb, index) != enc) - return -1; - if (enc_autoload_p(enc)) { - index = enc_autoload(mrb, enc); - } - return index; -} - -static int -enc_check_encoding(mrb_state *mrb, mrb_value obj) -{ - if (SPECIAL_CONST_P(obj) || !is_data_encoding(obj)) { - return -1; - } - return check_encoding(mrb, RDATA(obj)->data); -} - -static int -must_encoding(mrb_state *mrb, mrb_value enc) -{ - int index = enc_check_encoding(mrb, enc); - if (index < 0) { - mrb_raise(mrb, E_TYPE_ERROR, "wrong argument type %s (expected Encoding)", - mrb_obj_classname(mrb, enc)); - } - return index; -} - -int -mrb_to_encoding_index(mrb_state *mrb, mrb_value enc) -{ - int idx; - - idx = enc_check_encoding(mrb, enc); - if (idx >= 0) { - return idx; - } - else if (mrb_nil_p(enc = mrb_check_string_type(mrb, enc))) { - return -1; - } - if (!mrb_enc_asciicompat(mrb, mrb_enc_get(mrb, enc))) { - return -1; - } - //return mrb_enc_find_index(StringValueCStr(enc)); - return mrb_enc_find_index(mrb, mrb_string_value_cstr(mrb, &enc)); - -} - -static mrb_encoding * -to_encoding(mrb_state *mrb, mrb_value enc) -{ - int idx; - - //StringValue(enc); - mrb_string_value(mrb, &enc); - - if (!mrb_enc_asciicompat(mrb, mrb_enc_get(mrb, enc))) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid name encoding (non ASCII)"); - } - //idx = mrb_enc_find_index(StringValueCStr(enc)); - idx = mrb_enc_find_index(mrb, mrb_string_value_cstr(mrb, &enc)); - if (idx < 0) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "unknown encoding name - %s", RSTRING_PTR(enc)); - } - return mrb_enc_from_index(mrb, idx); -} - -mrb_encoding * -mrb_to_encoding(mrb_state *mrb, mrb_value enc) -{ - if (enc_check_encoding(mrb, enc) >= 0) return RDATA(enc)->data; - return to_encoding(mrb, enc); -} - -static int -enc_table_expand(int newsize) -{ - struct mrb_encoding_entry *ent; - int count = newsize; - - if (enc_table.size >= newsize) return newsize; - newsize = (newsize + 7) / 8 * 8; - ent = realloc(enc_table.list, sizeof(*enc_table.list) * newsize); - if (!ent) return -1; - memset(ent + enc_table.size, 0, sizeof(*ent)*(newsize - enc_table.size)); - enc_table.list = ent; - enc_table.size = newsize; - return count; -} - -static int -enc_register_at(mrb_state *mrb, int index, const char *name, mrb_encoding *encoding) -{ - struct mrb_encoding_entry *ent = &enc_table.list[index]; - mrb_value list; - mrb_value ref_ary; - - if (!valid_encoding_name_p(name)) return -1; - if (!ent->name) { - ent->name = name = strdup(name); - } - else if (STRCASECMP(name, ent->name)) { - return -1; - } - if (!ent->enc) { - ent->enc = xmalloc(sizeof(mrb_encoding)); - } - if (encoding) { - *ent->enc = *encoding; - } - else { - memset(ent->enc, 0, sizeof(*ent->enc)); - } - encoding = ent->enc; - encoding->name = name; - encoding->ruby_encoding_index = index; - st_insert(enc_table.names, (st_data_t)name, (st_data_t)index); - list = mrb_encoding_list; - //if (list && mrb_nil_p((mrb_ary_ref(mrb, list, index)))) { - if (list.tt) { - ref_ary = mrb_ary_ref(mrb, list, index); - if mrb_nil_p(ref_ary) { - /* initialize encoding data */ - mrb_ary_set(mrb, list, index, enc_new(mrb, encoding));//rb_ary_store(list, index, enc_new(encoding)); - } - } - return index; -} - - -static int -enc_register(mrb_state *mrb, const char *name, mrb_encoding *encoding) -{ - int index = enc_table.count; - - if ((index = enc_table_expand(index + 1)) < 0) return -1; - enc_table.count = index; - return enc_register_at(mrb, index - 1, name, encoding); -} - -static void set_encoding_const(mrb_state *, const char*, mrb_encoding*); -int mrb_enc_registered(const char*); - -static void -enc_check_duplication(mrb_state *mrb, const char *name) -{ - if (mrb_enc_registered(name) >= 0) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "encoding %s is already registered", name); - } -} -static mrb_encoding* -set_base_encoding(int index, mrb_encoding *base) -{ - mrb_encoding *enc = enc_table.list[index].enc; - - enc_table.list[index].base = base; - if (mrb_enc_dummy_p(base)) ENC_SET_DUMMY(enc); - return enc; -} - -int -mrb_enc_replicate(mrb_state *mrb, const char *name, mrb_encoding *encoding) -{ - int idx; - - enc_check_duplication(mrb, name); - idx = enc_register(mrb, name, encoding); - set_base_encoding(idx, encoding); - set_encoding_const(mrb, name, mrb_enc_from_index(mrb, idx)); - return idx; -} - -/* 15.2.40.2.17 */ -/* - * call-seq: - * enc.replicate(name) -> encoding - * - * Returns a replicated encoding of _enc_ whose name is _name_. - * The new encoding should have the same byte structure of _enc_. - * If _name_ is used by another encoding, raise ArgumentError. - * - */ -static mrb_value -enc_replicate(mrb_state *mrb, mrb_value encoding) -{ - mrb_value name; - mrb_get_args(mrb, "o", &name); - return mrb_enc_from_encoding_index(mrb, - //mrb_enc_replicate(mrb, StringValueCStr(name), - mrb_enc_replicate(mrb, mrb_string_value_cstr(mrb, &name), - mrb_to_encoding(mrb, encoding))); -} -static int -enc_replicate_with_index(mrb_state *mrb, const char *name, mrb_encoding *origenc, int idx) -{ - if (idx < 0) { - idx = enc_register(mrb, name, origenc); - } - else { - idx = enc_register_at(mrb, idx, name, origenc); - } - if (idx >= 0) { - set_base_encoding(idx, origenc); - set_encoding_const(mrb, name, mrb_enc_from_index(mrb, idx)); - } - return idx; -} -int -mrb_encdb_replicate(mrb_state *mrb, const char *name, const char *orig) -{ - int origidx = mrb_enc_registered(orig); - int idx = mrb_enc_registered(name); - - if (origidx < 0) { - origidx = enc_register(mrb, orig, 0); - } - return enc_replicate_with_index(mrb, name, mrb_enc_from_index(mrb, origidx), idx); -} -int -mrb_define_dummy_encoding(mrb_state *mrb, const char *name) -{ - int index = mrb_enc_replicate(mrb, name, mrb_ascii8bit_encoding(mrb)); - mrb_encoding *enc = enc_table.list[index].enc; - - ENC_SET_DUMMY(enc); - return index; -} - -int -mrb_encdb_dummy(mrb_state *mrb, const char *name) -{ - int index = enc_replicate_with_index(mrb, name, mrb_ascii8bit_encoding(mrb), - mrb_enc_registered(name)); - mrb_encoding *enc = enc_table.list[index].enc; - - ENC_SET_DUMMY(enc); - return index; -} - -/* 15.2.40.2.13 */ -/* - * call-seq: - * enc.dummy? -> true or false - * - * Returns true for dummy encodings. - * A dummy encoding is an encoding for which character handling is not properly - * implemented. - * It is used for stateful encodings. - * - * Encoding::ISO_2022_JP.dummy? #=> true - * Encoding::UTF_8.dummy? #=> false - * - */ -static mrb_value -enc_dummy_p(mrb_state *mrb, mrb_value enc) -{ - return ENC_DUMMY_P(enc_table.list[must_encoding(mrb, enc)].enc) ? mrb_true_value() : mrb_false_value(); -} - -/* 15.2.40.2.12 */ -/* - * call-seq: - * enc.ascii_compatible? -> true or false - * - * Returns whether ASCII-compatible or not. - * - * Encoding::UTF_8.ascii_compatible? #=> true - * Encoding::UTF_16BE.ascii_compatible? #=> false - * - */ -static mrb_value -enc_ascii_compatible_p(mrb_state *mrb, mrb_value enc) -{ - return mrb_enc_asciicompat(mrb, enc_table.list[must_encoding(mrb, enc)].enc) ? mrb_true_value() : mrb_false_value(); -} - -static const char * -enc_alias_internal(const char *alias, int idx) -{ - alias = strdup(alias); - st_insert(enc_table.names, (st_data_t)alias, (st_data_t)idx); - return alias; -} - -/* - * Returns 1 when the encoding is Unicode series other than UTF-7 else 0. - */ -int -mrb_enc_unicode_p(mrb_encoding *enc) -{ - const char *name = mrb_enc_name(enc); - return name[0] == 'U' && name[1] == 'T' && name[2] == 'F' && name[4] != '7'; -} - -extern mrb_encoding OnigEncodingUTF_8; -extern mrb_encoding OnigEncodingUS_ASCII; - -void -mrb_enc_init(mrb_state *mrb) -{ - enc_table_expand(ENCODING_COUNT + 1); - if (!enc_table.names) { - enc_table.names = st_init_strcasetable(); - } -#define ENC_REGISTER(enc) enc_register_at(mrb, ENCINDEX_##enc, mrb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc) - ENC_REGISTER(ASCII); - ENC_REGISTER(UTF_8); - ENC_REGISTER(US_ASCII); -#undef ENC_REGISTER - enc_table.count = ENCINDEX_BUILTIN_MAX; -} - -mrb_encoding * -mrb_enc_from_index(mrb_state *mrb, int index) -{ - if (!enc_table.list) { - mrb_enc_init(mrb); - } - if (index < 0 || enc_table.count <= index) { - return 0; - } - return enc_table.list[index].enc; -} - -int -mrb_enc_registered(const char *name) -{ - st_data_t idx = 0; - - if (!name) return -1; - if (!enc_table.list) return -1; - if (st_lookup(enc_table.names, (st_data_t)name, &idx)) { - return (int)idx; - } - return -1; -} - -mrb_value -mrb_require_safe(mrb_value fname, int safe) -{ - mrb_value result = mrb_nil_value(); - return result; -} -static int -load_encoding(const char *name) -{ - mrb_value enclib;// = mrb_sprintf("enc/%s.so", name); - //mrb_value verbose;// = ruby_verbose; - //mrb_value debug;// = ruby_debug; - //mrb_value loaded; - char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib) - 3; - int idx; - - while (s < e) { - if (!ISALNUM(*s)) *s = '_'; - else if (ISUPPER(*s)) *s = TOLOWER(*s); - ++s; - } - OBJ_FREEZE(enclib); - //ruby_verbose = mrb_false_value(); - //ruby_debug = mrb_false_value(); - //loaded = mrb_protect(require_enc, enclib, 0); - //ruby_verbose = verbose; - //ruby_debug = debug; - //rb_set_errinfo(mrb_nil_value()); - //if (mrb_nil_p(loaded)) return -1; - if ((idx = mrb_enc_registered(name)) < 0) return -1; - if (enc_autoload_p(enc_table.list[idx].enc)) return -1; - return idx; -} - -static int -enc_autoload(mrb_state *mrb, mrb_encoding *enc) -{ - int i; - mrb_encoding *base = enc_table.list[ENC_TO_ENCINDEX(enc)].base; - - if (base) { - i = 0; - do { - if (i >= enc_table.count) return -1; - } while (enc_table.list[i].enc != base && (++i, 1)); - if (enc_autoload_p(base)) { - if (enc_autoload(mrb, base) < 0) return -1; - } - i = ENC_TO_ENCINDEX(enc); - enc_register_at(mrb, i, mrb_enc_name(enc), base); - } - else { - i = load_encoding(mrb_enc_name(enc)); - } - return i; -} - -int -mrb_enc_find_index(mrb_state *mrb, const char *name) -{ - int i = mrb_enc_registered(name); - mrb_encoding *enc; - - if (i < 0) { - i = load_encoding(name); - } - else if (!(enc = mrb_enc_from_index(mrb, i))) { - if (i != UNSPECIFIED_ENCODING) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "encoding %s is not registered", name); - } - } - else if (enc_autoload_p(enc)) { - if (enc_autoload(mrb, enc) < 0) { - //mrb_warn("failed to load encoding (%s); use ASCII-8BIT instead", - printf("failed to load encoding (%s); use ASCII-8BIT instead", - name); - return 0; - } - } - return i; -} - -mrb_encoding * -mrb_enc_find(mrb_state *mrb, const char *name) -{ - int idx = mrb_enc_find_index(mrb, name); - if (idx < 0) idx = 0; - return mrb_enc_from_index(mrb, idx); -} - -static inline int -enc_capable(mrb_value obj) -{ - if (SPECIAL_CONST_P(obj)) return (mrb_type(obj) == MRB_TT_SYMBOL); - switch (mrb_type(obj)/*BUILTIN_TYPE(obj)*/) { - case MRB_TT_STRING: - case MRB_TT_REGEX: - case MRB_TT_FILE: - return TRUE; - case MRB_TT_DATA: - if (is_data_encoding(obj)) return TRUE; - default: - return FALSE; - } -} - -mrb_sym -mrb_id_encoding(mrb_state *mrb) -{ - //CONST_ID(id_encoding, "encoding"); - id_encoding = mrb_intern(mrb, "encoding"); - return id_encoding; -} - -int -mrb_enc_get_index(mrb_state *mrb, mrb_value obj) -{ - int i = -1; - mrb_value tmp; - struct RString *ps; - - if (SPECIAL_CONST_P(obj)) { - if (mrb_type(obj) != MRB_TT_SYMBOL) return -1; - //obj = mrb_id2str(SYM2ID(obj)); - obj = mrb_str_new_cstr(mrb, mrb_sym2name(mrb, SYM2ID(obj))); - } - switch (mrb_type(obj)/*BUILTIN_TYPE(obj)*/) { - as_default: - default: - case MRB_TT_STRING: - case MRB_TT_REGEX: - i = (int)ENCODING_GET_INLINED(obj); - ps = mrb_str_ptr(obj); - if (i == ENCODING_INLINE_MAX) { - mrb_value iv; - - //iv = rb_ivar_get(obj, mrb_id_encoding(mrb)); - iv = mrb_iv_get(mrb, obj, mrb_id_encoding(mrb)); - i = mrb_fixnum(iv); - } - break; - - case MRB_TT_FILE: - tmp = mrb_funcall(mrb, obj, "internal_encoding", 0, 0); - if (mrb_nil_p(tmp)) obj = mrb_funcall(mrb, obj, "external_encoding", 0, 0); - else obj = tmp; - if (mrb_nil_p(obj)) break; - case MRB_TT_DATA: - if (is_data_encoding(obj)) { - i = enc_check_encoding(mrb, obj); - } - else { - goto as_default; - } - break; - } - return i; -} - -void -mrb_enc_set_index(mrb_state *mrb, mrb_value obj, int idx) -{ - if (idx < ENCODING_INLINE_MAX) { - ENCODING_SET_INLINED(obj, idx); - return; - } - ENCODING_SET_INLINED(obj, ENCODING_INLINE_MAX); - //mrb_ivar_set(obj, mrb_id_encoding(mrb), INT2NUM(idx)); - mrb_iv_set(mrb, obj, mrb_id_encoding(mrb), mrb_fixnum_value(idx)); - return; -} - -mrb_value -mrb_enc_associate_index(mrb_state *mrb, mrb_value obj, int idx) -{ -/* enc_check_capable(obj);*/ - if (mrb_enc_get_index(mrb, obj) == idx) - return obj; - if (SPECIAL_CONST_P(obj)) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "cannot set encoding"); - } - if (!ENC_CODERANGE_ASCIIONLY(obj) || - !mrb_enc_asciicompat(mrb, mrb_enc_from_index(mrb, idx))) { - ENC_CODERANGE_CLEAR(obj); - } - mrb_enc_set_index(mrb, obj, idx); - return obj; -} - -mrb_value -mrb_enc_associate(mrb_state *mrb, mrb_value obj, mrb_encoding *enc) -{ - return mrb_enc_associate_index(mrb, obj, mrb_enc_to_index(enc)); -} - -mrb_encoding* -mrb_enc_get(mrb_state *mrb, mrb_value obj) -{ - return mrb_enc_from_index(mrb, mrb_enc_get_index(mrb, obj)); -} - -mrb_encoding* -mrb_enc_check(mrb_state *mrb, mrb_value str1, mrb_value str2) -{ - mrb_encoding *enc = mrb_enc_compatible(mrb, str1, str2); - if (!enc) - mrb_raise(mrb, E_ENCODING_ERROR, "incompatible character encodings: %s and %s", - mrb_enc_name(mrb_enc_get(mrb, str1)), - mrb_enc_name(mrb_enc_get(mrb, str2))); - return enc; -} - -mrb_encoding* -mrb_enc_compatible(mrb_state *mrb, mrb_value str1, mrb_value str2) -{ - int idx1, idx2; - mrb_encoding *enc1, *enc2; - - idx1 = mrb_enc_get_index(mrb, str1); - idx2 = mrb_enc_get_index(mrb, str2); - - if (idx1 < 0 || idx2 < 0) - return 0; - - if (idx1 == idx2) { - return mrb_enc_from_index(mrb, idx1); - } - enc1 = mrb_enc_from_index(mrb, idx1); - enc2 = mrb_enc_from_index(mrb, idx2); - - if (mrb_type(str2) == MRB_TT_STRING && RSTRING_LEN(str2) == 0) - //return (idx1 == ENCINDEX_US_ASCII && mrb_enc_asciicompat(mrb, enc2)) ? enc2 : enc1; - return enc1; - if (mrb_type(str1) == MRB_TT_STRING && RSTRING_LEN(str1) == 0) - //return (idx2 == ENCINDEX_US_ASCII && mrb_enc_asciicompat(mrb, enc1)) ? enc1 : enc2; - return enc2; - if (!mrb_enc_asciicompat(mrb, enc1) || !mrb_enc_asciicompat(mrb, enc2)) { - return 0; - } - - /* objects whose encoding is the same of contents */ - //if (mrb_type(str2)/*BUILTIN_TYPE(str2)*/ != MRB_TT_STRING && idx2 == ENCINDEX_US_ASCII) - //return enc1; - //if (mrb_type(str1)/*BUILTIN_TYPE(str1)*/ != MRB_TT_STRING && idx1 == ENCINDEX_US_ASCII) - //return enc2; - - if (mrb_type(str1)/*BUILTIN_TYPE(str1)*/ != MRB_TT_STRING) { - mrb_value tmp = str1; - int idx0 = idx1; - str1 = str2; - str2 = tmp; - idx1 = idx2; - idx2 = idx0; - } - if (mrb_type(str1)/*BUILTIN_TYPE(str1)*/ == MRB_TT_STRING) { - int cr1, cr2; - - cr1 = mrb_enc_str_coderange(mrb, str1); - if (mrb_type(str2)/*BUILTIN_TYPE(str2)*/ == MRB_TT_STRING) { - cr2 = mrb_enc_str_coderange(mrb, str2); - if (cr1 != cr2) { - /* may need to handle ENC_CODERANGE_BROKEN */ - if (cr1 == ENC_CODERANGE_7BIT) return enc2; - if (cr2 == ENC_CODERANGE_7BIT) return enc1; - } - if (cr2 == ENC_CODERANGE_7BIT) { - if (idx1 == ENCINDEX_ASCII) return enc2; - return enc1; - } - } - if (cr1 == ENC_CODERANGE_7BIT) - return enc2; - } - return 0; -} - -void -mrb_enc_copy(mrb_state *mrb, mrb_value obj1, mrb_value obj2) -{ - mrb_enc_associate_index(mrb, obj1, mrb_enc_get_index(mrb, obj2)); -} - - -/* - * call-seq: - * obj.encoding -> encoding - * - * Returns the Encoding object that represents the encoding of obj. - */ - -mrb_value -mrb_obj_encoding(mrb_state *mrb, mrb_value obj) -{ - mrb_encoding *enc = mrb_enc_get(mrb, obj); - if (!enc) { - mrb_raise(mrb, E_TYPE_ERROR, "unknown encoding"); - } - return mrb_enc_from_encoding(mrb, enc); -} - -int -mrb_enc_fast_mbclen(const char *p, const char *e, mrb_encoding *enc) -{ - return ONIGENC_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); -} - -int -mrb_enc_mbclen(const char *p, const char *e, mrb_encoding *enc) -{ - int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); - if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p) - return MBCLEN_CHARFOUND_LEN(n); - else { - int min = mrb_enc_mbminlen(enc); - return min <= e-p ? min : (int)(e-p); - } -} - -int -mrb_enc_precise_mbclen(const char *p, const char *e, mrb_encoding *enc) -{ - int n; - if (e <= p) - return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1); - n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); - if (e-p < n) - return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n-(int)(e-p)); - return n; -} - -int -mrb_enc_ascget(mrb_state *mrb, const char *p, const char *e, int *len, mrb_encoding *enc) -{ - unsigned int c, l; - if (e <= p) - return -1; - if (mrb_enc_asciicompat(mrb, enc)) { - c = (unsigned char)*p; - if (!ISASCII(c)) - return -1; - if (len) *len = 1; - return c; - } - l = mrb_enc_precise_mbclen(p, e, enc); - if (!MBCLEN_CHARFOUND_P(l)) - return -1; - c = mrb_enc_mbc_to_codepoint(p, e, enc); - if (!mrb_enc_isascii(c, enc)) - return -1; - if (len) *len = l; - return c; -} - -unsigned int -mrb_enc_codepoint_len(mrb_state *mrb, const char *p, const char *e, int *len_p, mrb_encoding *enc) -{ - int r; - if (e <= p) - mrb_raise(mrb, E_ARGUMENT_ERROR, "empty string"); - r = mrb_enc_precise_mbclen(p, e, enc); - if (MBCLEN_CHARFOUND_P(r)) { - if (len_p) *len_p = MBCLEN_CHARFOUND_LEN(r); - return mrb_enc_mbc_to_codepoint(p, e, enc); - } - else - mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid byte sequence in %s", mrb_enc_name(enc)); - return 0; -} - -#undef mrb_enc_codepoint -unsigned int -mrb_enc_codepoint(mrb_state *mrb, const char *p, const char *e, mrb_encoding *enc) -{ - return mrb_enc_codepoint_len(mrb, p, e, 0, enc); -} - -int -mrb_enc_codelen(mrb_state *mrb, int c, mrb_encoding *enc) -{ - int n = ONIGENC_CODE_TO_MBCLEN(enc,c); - if (n == 0) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid codepoint 0x%x in %s", c, mrb_enc_name(enc)); - } - return n; -} - -int -mrb_enc_toupper(int c, mrb_encoding *enc) -{ - return (ONIGENC_IS_ASCII_CODE(c)?ONIGENC_ASCII_CODE_TO_UPPER_CASE(c):(c)); -} - -int -mrb_enc_tolower(int c, mrb_encoding *enc) -{ - return (ONIGENC_IS_ASCII_CODE(c)?ONIGENC_ASCII_CODE_TO_LOWER_CASE(c):(c)); -} - -/* 15.2.40.2.14 */ -/* - * call-seq: - * enc.inspect -> string - * - * Returns a string which represents the encoding for programmers. - * - * Encoding::UTF_8.inspect #=> "#<Encoding:UTF-8>" - * Encoding::ISO_2022_JP.inspect #=> "#<Encoding:ISO-2022-JP (dummy)>" - */ -static mrb_value -enc_inspect(mrb_state *mrb, mrb_value self) -{ - mrb_value str; - //mrb_value str = mrb_sprintf("#<%s:%s%s>", mrb_obj_classname(mrb, self), - // mrb_enc_name((mrb_encoding*)(DATA_PTR(self))), - // (mrb_fixnum(enc_dummy_p(mrb, self)) ? " (dummy)" : "")); - char buf[256]; - sprintf(buf, "#<%s:%s%s>", mrb_obj_classname(mrb, self), - mrb_enc_name((mrb_encoding*)(DATA_PTR(self))), - (mrb_enc_dummy_p((mrb_encoding*)(DATA_PTR(self))) ? " (dummy)" : "")); - str = mrb_str_new(mrb, buf, strlen(buf)); - ENCODING_CODERANGE_SET(mrb, str, mrb_usascii_encindex(), ENC_CODERANGE_7BIT); - return str; -} - -/* 15.2.40.2.15 */ -/* 15.2.40.2.18 */ -/* - * call-seq: - * enc.name -> string - * - * Returns the name of the encoding. - * - * Encoding::UTF_8.name #=> "UTF-8" - */ -static mrb_value -enc_name(mrb_state *mrb, mrb_value self) -{ - return mrb_usascii_str_new2(mrb, mrb_enc_name((mrb_encoding*)DATA_PTR(self))); -} - -struct fn_arg { - mrb_state *mrb; - enum st_retval (*func)(ANYARGS); - void *a; -}; - -static enum st_retval -fn_i(st_data_t key, st_data_t val, st_data_t arg) { - struct fn_arg *a = (struct fn_arg*)arg; - - return (*a->func)(a->mrb, key, val, a->a); -} - -static int -st_foreachNew(mrb_state *mrb, st_table *tbl, enum st_retval (*func)(ANYARGS), void *a) -{ - struct fn_arg arg = { - mrb, - func, - a, - }; - - return st_foreach(tbl, fn_i, (st_data_t)&arg); -} - -static enum st_retval -enc_names_i(mrb_state *mrb, st_data_t name, st_data_t idx, st_data_t args) -{ - mrb_value *arg = (mrb_value*)args; - int iargs = mrb_fixnum(arg[0]); - //if ((int)idx == (int)arg[0]) { - if ((int)idx == iargs) { - mrb_value str = mrb_usascii_str_new2(mrb, (char*)name); - //OBJ_FREEZE(str); - mrb_ary_push(mrb, arg[1], str); - } - return ST_CONTINUE; -} - -/* 15.2.40.2.16 */ -/* - * call-seq: - * enc.names -> array - * - * Returns the list of name and aliases of the encoding. - * - * Encoding::WINDOWS_31J.names #=> ["Windows-31J", "CP932", "csWindows31J"] - */ -static mrb_value -enc_names(mrb_state *mrb, mrb_value self) -{ - mrb_value args[2]; - - args[0] = mrb_fixnum_value(mrb_to_encoding_index(mrb, self)); - args[1] = mrb_ary_new_capa(mrb, 0);//mrb_ary_new2(0); - st_foreachNew(mrb, enc_table.names, enc_names_i, args); - return args[1]; -} - -/* 15.2.40.2.8 */ -/* - * call-seq: - * Encoding.list -> [enc1, enc2, ...] - * - * Returns the list of loaded encodings. - * - * Encoding.list - * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>, - * #<Encoding:ISO-2022-JP (dummy)>] - * - * Encoding.find("US-ASCII") - * #=> #<Encoding:US-ASCII> - * - * Encoding.list - * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>, - * #<Encoding:US-ASCII>, #<Encoding:ISO-2022-JP (dummy)>] - * - */ -static mrb_value -enc_list(mrb_state *mrb, mrb_value klass) -{ - struct RArray *ar = (struct RArray*)mrb_encoding_list.value.p; - mrb_value ary = mrb_ary_new_capa(mrb, 0);//mrb_ary_new2(0); - //mrb_ary_replace_m(mrb, ary/*, mmrb_encoding_list*/); - mrb_ary_replace(mrb, mrb_ary_ptr(ary), ar->buf, enc_table.count); - return ary; -} - -/* 15.2.40.2.7 */ -/* - * call-seq: - * Encoding.find(string) -> enc - * Encoding.find(symbol) -> enc - * - * Search the encoding with specified <i>name</i>. - * <i>name</i> should be a string or symbol. - * - * Encoding.find("US-ASCII") #=> #<Encoding:US-ASCII> - * Encoding.find(:Shift_JIS) #=> #<Encoding:Shift_JIS> - * - * Names which this method accept are encoding names and aliases - * including following special aliases - * - * "external":: default external encoding - * "internal":: default internal encoding - * "locale":: locale encoding - * "filesystem":: filesystem encoding - * - * An ArgumentError is raised when no encoding with <i>name</i>. - * Only <code>Encoding.find("internal")</code> however returns nil - * when no encoding named "internal", in other words, when Ruby has no - * default internal encoding. - */ -static mrb_value -enc_find(mrb_state *mrb, mrb_value klass) -{ - mrb_value enc; - - mrb_get_args(mrb, "o", &enc); - return mrb_enc_from_encoding(mrb, to_encoding(mrb, enc)); -} - -/* 15.2.40.2.2 */ -/* - * call-seq: - * Encoding.compatible?(str1, str2) -> enc or nil - * - * Checks the compatibility of two strings. - * If they are compatible, means concatenatable, - * returns an encoding which the concatenated string will be. - * If they are not compatible, nil is returned. - * - * Encoding.compatible?("\xa1".force_encoding("iso-8859-1"), "b") - * #=> #<Encoding:ISO-8859-1> - * - * Encoding.compatible?( - * "\xa1".force_encoding("iso-8859-1"), - * "\xa1\xa1".force_encoding("euc-jp")) - * #=> nil - * - */ -static mrb_value -enc_compatible_p(mrb_state *mrb, mrb_value klass) -{ - mrb_value str1; - mrb_value str2; - mrb_encoding *enc; - - mrb_get_args(mrb, "oo", &str1, &str2); - if (!enc_capable(str1)) return mrb_nil_value(); - if (!enc_capable(str2)) return mrb_nil_value(); - enc = mrb_enc_compatible(mrb, str1, str2); - if (!enc) return mrb_nil_value(); - return mrb_enc_from_encoding(mrb, enc); -} - -/* 15.2.40.2.19 */ -/* :nodoc: */ -static mrb_value -enc_dump(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value self) -{ - //mrb_scan_args(argc, argv, "01", 0); - return enc_name(mrb, self); -} - -/* 15.2.40.2.11 */ -/* :nodoc: */ -static mrb_value -enc_load(mrb_state *mrb, mrb_value klass) -{ - mrb_value str; - - mrb_get_args(mrb, "o", &str); - return enc_find(mrb, str); -} - -mrb_encoding * -mrb_ascii8bit_encoding(mrb_state *mrb) -{ - if (!enc_table.list) { - mrb_enc_init(mrb); - } - return enc_table.list[ENCINDEX_ASCII].enc; -} - -int -mrb_ascii8bit_encindex(void) -{ - return ENCINDEX_ASCII; -} - -mrb_encoding * -mrb_utf8_encoding(mrb_state *mrb) -{ - if (!enc_table.list) { - mrb_enc_init(mrb); - } - return enc_table.list[ENCINDEX_UTF_8].enc; -} - -int -mrb_utf8_encindex(void) -{ - return ENCINDEX_UTF_8; -} - -mrb_encoding * -mrb_usascii_encoding(mrb_state *mrb) -{ - if (!enc_table.list) { - mrb_enc_init(mrb); - } - return enc_table.list[ENCINDEX_US_ASCII].enc; -} - -int -mrb_usascii_encindex(void) -{ - return ENCINDEX_US_ASCII; -} - -int -mrb_locale_encindex(mrb_state *mrb) -{ - mrb_value charmap = mrb_locale_charmap(mrb, mrb_obj_value(ENCODE_CLASS)); - int idx; - - if (mrb_nil_p(charmap)) - idx = mrb_usascii_encindex(); - //else if ((idx = mrb_enc_find_index(StringValueCStr(charmap))) < 0) - else if ((idx = mrb_enc_find_index(mrb, mrb_string_value_cstr(mrb, &charmap))) < 0) - idx = mrb_ascii8bit_encindex(); - - if (mrb_enc_registered("locale") < 0) enc_alias_internal("locale", idx); - - return idx; -} - -mrb_encoding * -mrb_locale_encoding(mrb_state *mrb) -{ - return mrb_enc_from_index(mrb, mrb_locale_encindex(mrb)); -} - -static int -enc_set_filesystem_encoding(mrb_state *mrb) -{ - int idx; -#if defined NO_LOCALE_CHARMAP - idx = mrb_enc_to_index(mrb_default_external_encoding(mrb)); -#elif defined _WIN32 || defined __CYGWIN__ - char cp[sizeof(int) * 8 / 3 + 4]; - //snprintf(cp, sizeof cp, "CP%d", AreFileApisANSI() ? GetACP() : GetOEMCP()); - idx = mrb_enc_find_index(mrb, cp); - if (idx < 0) idx = mrb_ascii8bit_encindex(); -#else - idx = mrb_enc_to_index(mrb_default_external_encoding(mrb)); -#endif - - enc_alias_internal("filesystem", idx); - return idx; -} - -int -mrb_filesystem_encindex(void) -{ - int idx = mrb_enc_registered("filesystem"); - if (idx < 0) - idx = mrb_ascii8bit_encindex(); - return idx; -} - -mrb_encoding * -mrb_filesystem_encoding(mrb_state *mrb) -{ - return mrb_enc_from_index(mrb, mrb_filesystem_encindex()); -} - -struct default_encoding { - int index; /* -2 => not yet set, -1 => nil */ - mrb_encoding *enc; -}; - -static struct default_encoding default_external = {0}; - -static int -enc_set_default_encoding(mrb_state *mrb, struct default_encoding *def, mrb_value encoding, const char *name) -{ - int overridden = FALSE; - - if (def->index != -2) - /* Already set */ - overridden = TRUE; - - if (mrb_nil_p(encoding)) { - def->index = -1; - def->enc = 0; - st_insert(enc_table.names, (st_data_t)strdup(name), - (st_data_t)UNSPECIFIED_ENCODING); - } - else { - def->index = mrb_enc_to_index(mrb_to_encoding(mrb, encoding)); - def->enc = 0; - enc_alias_internal(name, def->index); - } - - if (def == &default_external) - enc_set_filesystem_encoding(mrb); - - return overridden; -} - -mrb_encoding * -mrb_default_external_encoding(mrb_state *mrb) -{ - if (default_external.enc) return default_external.enc; - - if (default_external.index >= 0) { - default_external.enc = mrb_enc_from_index(mrb, default_external.index); - return default_external.enc; - } - else { - return mrb_locale_encoding(mrb); - } -} - -mrb_value -mrb_enc_default_external(mrb_state *mrb) -{ - return mrb_enc_from_encoding(mrb, mrb_default_external_encoding(mrb)); -} - -/* 15.2.40.2.3 */ -/* - * call-seq: - * Encoding.default_external -> enc - * - * Returns default external encoding. - * - * It is initialized by the locale or -E option. - */ -static mrb_value -get_default_external(mrb_state *mrb, mrb_value klass) -{ - return mrb_enc_default_external(mrb); -} - -void -mrb_enc_set_default_external(mrb_state *mrb, mrb_value encoding) -{ - if (mrb_nil_p(encoding)) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "default external can not be nil"); - } - enc_set_default_encoding(mrb, &default_external, encoding, - "external"); -} - -/* 15.2.40.2.4 */ -/* - * call-seq: - * Encoding.default_external = enc - * - * Sets default external encoding. - */ -static mrb_value -set_default_external(mrb_state *mrb, mrb_value klass) -{ - mrb_value encoding; - - mrb_get_args(mrb, "o", &encoding); - mrb_warning("setting Encoding.default_external"); - mrb_enc_set_default_external(mrb, encoding); - return encoding; -} - -static struct default_encoding default_internal = {-2}; - -mrb_encoding * -mrb_default_internal_encoding(mrb_state *mrb) -{ - if (!default_internal.enc && default_internal.index >= 0) { - default_internal.enc = mrb_enc_from_index(mrb, default_internal.index); - } - return default_internal.enc; /* can be NULL */ -} - -mrb_value -mrb_enc_default_internal(mrb_state *mrb) -{ - /* Note: These functions cope with default_internal not being set */ - return mrb_enc_from_encoding(mrb, mrb_default_internal_encoding(mrb)); -} - -/* 15.2.40.2.5 */ -/* - * call-seq: - * Encoding.default_internal -> enc - * - * Returns default internal encoding. - * - * It is initialized by the source internal_encoding or -E option. - */ -static mrb_value -get_default_internal(mrb_state *mrb, mrb_value klass) -{ - return mrb_enc_default_internal(mrb); -} - -void -mrb_enc_set_default_internal(mrb_state *mrb, mrb_value encoding) -{ - enc_set_default_encoding(mrb, &default_internal, encoding, - "internal"); -} - -/* 15.2.40.2.6 */ -/* - * call-seq: - * Encoding.default_internal = enc or nil - * - * Sets default internal encoding. - * Or removes default internal encoding when passed nil. - */ -static mrb_value -set_default_internal(mrb_state *mrb, mrb_value klass) -{ - mrb_value encoding; - - mrb_get_args(mrb, "o", &encoding); - mrb_warning("setting Encoding.default_internal"); - mrb_enc_set_default_internal(mrb, encoding); - return encoding; -} - -#define digit(x) ((x) >= '0' && (x) <= '9') -#ifndef _MSC_VER -#define strstart(s, n) (strncasecmp(s, n, strlen(n)) == 0) -#else -#define strstart(s, n) (_stricmp(s, n) == 0) -#endif -#define C_CODESET "US-ASCII" /* Return this as the encoding of the - * C/POSIX locale. Could as well one day - * become "UTF-8". */ -#if defined _WIN32 || defined __CYGWIN__ -#define JA_CODESET "Windows-31J" -#else -#define JA_CODESET "EUC-JP" -#endif - -static char buf[16]; - -const char * -nl_langinfo_codeset(void) -{ - const char *l, *p; - int n; - - if (((l = getenv("LC_ALL")) && *l) || - ((l = getenv("LC_CTYPE")) && *l) || - ((l = getenv("LANG")) && *l)) { - /* check standardized locales */ - if (!strcmp(l, "C") || !strcmp(l, "POSIX")) - return C_CODESET; - /* check for encoding name fragment */ - p = strchr(l, '.'); - if (!p++) p = l; - if (strstart(p, "UTF")) - return "UTF-8"; - if ((n = 5, strstart(p, "8859-")) || (n = 9, strstart(p, "ISO-8859-"))) { - if (digit(p[n])) { - p += n; - memcpy(buf, "ISO-8859-\0\0", 12); - buf[9] = *p++; - if (digit(*p)) buf[10] = *p++; - return buf; - } - } - if (strstart(p, "KOI8-R")) return "KOI8-R"; - if (strstart(p, "KOI8-U")) return "KOI8-U"; - if (strstart(p, "620")) return "TIS-620"; - if (strstart(p, "2312")) return "GB2312"; - if (strstart(p, "HKSCS")) return "Big5HKSCS"; /* no MIME charset */ - if (strstart(p, "BIG5")) return "Big5"; - if (strstart(p, "GBK")) return "GBK"; /* no MIME charset */ - if (strstart(p, "18030")) return "GB18030"; /* no MIME charset */ - if (strstart(p, "Shift_JIS") || strstart(p, "SJIS")) return "Windows-31J"; - /* check for conclusive modifier */ - if (strstart(p, "euro")) return "ISO-8859-15"; - /* check for language (and perhaps country) codes */ - if (strstart(l, "zh_TW")) return "Big5"; - if (strstart(l, "zh_HK")) return "Big5HKSCS"; /* no MIME charset */ - if (strstart(l, "zh")) return "GB2312"; - if (strstart(l, "ja")) return JA_CODESET; - if (strstart(l, "ko")) return "EUC-KR"; - if (strstart(l, "ru")) return "KOI8-R"; - if (strstart(l, "uk")) return "KOI8-U"; - if (strstart(l, "pl") || strstart(l, "hr") || - strstart(l, "hu") || strstart(l, "cs") || - strstart(l, "sk") || strstart(l, "sl")) return "ISO-8859-2"; - if (strstart(l, "eo") || strstart(l, "mt")) return "ISO-8859-3"; - if (strstart(l, "el")) return "ISO-8859-7"; - if (strstart(l, "he")) return "ISO-8859-8"; - if (strstart(l, "tr")) return "ISO-8859-9"; - if (strstart(l, "th")) return "TIS-620"; /* or ISO-8859-11 */ - if (strstart(l, "lt")) return "ISO-8859-13"; - if (strstart(l, "cy")) return "ISO-8859-14"; - if (strstart(l, "ro")) return "ISO-8859-2"; /* or ISO-8859-16 */ - if (strstart(l, "am") || strstart(l, "vi")) return "UTF-8"; - /* Send me further rules if you like, but don't forget that we are - * *only* interested in locale naming conventions on platforms - * that do not already provide an nl_langinfo(CODESET) implementation. */ - } - return NULL; -} - -/* 15.2.40.2.9 */ -/* - * call-seq: - * Encoding.locale_charmap -> string - * - * Returns the locale charmap name. - * - * Debian GNU/Linux - * LANG=C - * Encoding.locale_charmap #=> "ANSI_X3.4-1968" - * LANG=ja_JP.EUC-JP - * Encoding.locale_charmap #=> "EUC-JP" - * - * SunOS 5 - * LANG=C - * Encoding.locale_charmap #=> "646" - * LANG=ja - * Encoding.locale_charmap #=> "eucJP" - * - * The result is highly platform dependent. - * So Encoding.find(Encoding.locale_charmap) may cause an error. - * If you need some encoding object even for unknown locale, - * Encoding.find("locale") can be used. - * - */ -mrb_value -mrb_locale_charmap(mrb_state *mrb, mrb_value klass) -{ -#if defined NO_LOCALE_CHARMAP - return mrb_usascii_str_new2(mrb, "ASCII-8BIT"); -#elif defined _WIN32 || defined __CYGWIN__ - const char *nl_langinfo_codeset(void); - const char *codeset = nl_langinfo_codeset(); - char cp[sizeof(int) * 3 + 4]; - if (!codeset) { - //snprintf(cp, sizeof(cp), "CP%d", GetConsoleCP()); - codeset = cp; - } - return mrb_usascii_str_new2(mrb, codeset); -#elif defined HAVE_LANGINFO_H - char *codeset; - codeset = nl_langinfo(CODESET); - return mrb_usascii_str_new2(mrb, codeset); -#else - return mrb_nil_value(); -#endif -} -static void -set_encoding_const(mrb_state *mrb, const char *name, mrb_encoding *enc) -{ - mrb_value encoding = mrb_enc_from_encoding(mrb, enc); - char *s = (char*)name; - int haslower = 0, hasupper = 0, valid = 0; - - if (ISDIGIT(*s)) return; - if (ISUPPER(*s)) { - hasupper = 1; - while (*++s && (ISALNUM(*s) || *s == '_')) { - if (ISLOWER(*s)) haslower = 1; - } - } - if (!*s) { - if (s - name > ENCODING_NAMELEN_MAX) return; - valid = 1; - //mrb_define_const(mrb_cEncoding, name, encoding); - mrb_define_const(mrb, ENCODE_CLASS, name, encoding); - } - if (!valid || haslower) { - size_t len = s - name; - if (len > ENCODING_NAMELEN_MAX) return; - if (!haslower || !hasupper) { - do { - if (ISLOWER(*s)) haslower = 1; - if (ISUPPER(*s)) hasupper = 1; - } while (*++s && (!haslower || !hasupper)); - len = s - name; - } - len += strlen(s); - if (len++ > ENCODING_NAMELEN_MAX) return; - //MEMCPY(s = ALLOCA_N(char, len), name, char, len); - memcpy(s = mrb_malloc(mrb, len), name, len); - name = s; - if (!valid) { - if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s); - for (; *s; ++s) { - if (!ISALNUM(*s)) *s = '_'; - } - if (hasupper) { - mrb_define_const(mrb, ENCODE_CLASS, name, encoding); - } - } - if (haslower) { - for (s = (char*)name; *s; ++s) { - if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s); - } - mrb_define_const(mrb, ENCODE_CLASS, name, encoding); - } - } -} -static enum st_retval -mrb_enc_name_list_i(mrb_state *mrb, st_data_t name, st_data_t idx, mrb_value *arg) -{ - mrb_value ary = *arg; - mrb_value str = mrb_usascii_str_new2(mrb, (char*)name); - //OBJ_FREEZE(str); - mrb_ary_push(mrb, ary, str); - return ST_CONTINUE; -} - -/* 15.2.40.2.10 */ -/* - * call-seq: - * Encoding.name_list -> ["enc1", "enc2", ...] - * - * Returns the list of available encoding names. - * - * Encoding.name_list - * #=> ["US-ASCII", "ASCII-8BIT", "UTF-8", - * "ISO-8859-1", "Shift_JIS", "EUC-JP", - * "Windows-31J", - * "BINARY", "CP932", "eucJP"] - * - */ - -static mrb_value -mrb_enc_name_list(mrb_state *mrb, mrb_value klass) -{ - mrb_value ary = mrb_ary_new_capa(mrb, enc_table.names->num_entries);//mrb_ary_new2(enc_table.names->num_entries); - st_foreachNew(mrb, enc_table.names, mrb_enc_name_list_i, &ary); - return ary; -} - -static enum st_retval -mrb_enc_aliases_enc_i(mrb_state *mrb, st_data_t name, st_data_t orig, st_data_t arg) -{ - mrb_value *p = (mrb_value*)arg; - mrb_value aliases = p[0], ary = p[1]; - int idx = (int)orig; - mrb_value key, str = mrb_ary_ref(mrb, ary, idx);//mrb_ary_entry(ary, idx); - - if (mrb_nil_p(str)) { - mrb_encoding *enc = mrb_enc_from_index(mrb, idx); - - if (!enc) return ST_CONTINUE; - if (STRCASECMP((char*)name, mrb_enc_name(enc)) == 0) { - return ST_CONTINUE; - } - str = mrb_usascii_str_new2(mrb, mrb_enc_name(enc)); - OBJ_FREEZE(str); - mrb_ary_set(mrb, ary, idx, str);//rb_ary_store(ary, idx, str); - } - key = mrb_usascii_str_new2(mrb, (char*)name); - OBJ_FREEZE(key); - mrb_hash_set(mrb, aliases, key, str); - return ST_CONTINUE; -} - -/* 15.2.40.2.1 */ -/* - * call-seq: - * Encoding.aliases -> {"alias1" => "orig1", "alias2" => "orig2", ...} - * - * Returns the hash of available encoding alias and original encoding name. - * - * Encoding.aliases - * #=> {"BINARY"=>"ASCII-8BIT", "ASCII"=>"US-ASCII", "ANSI_X3.4-1986"=>"US-ASCII", - * "SJIS"=>"Shift_JIS", "eucJP"=>"EUC-JP", "CP932"=>"Windows-31J"} - * - */ - -static mrb_value -mrb_enc_aliases(mrb_state *mrb, mrb_value klass) -{ - mrb_value aliases[2]; - aliases[0] = mrb_hash_new_capa(mrb, 0); - aliases[1] = mrb_ary_new(mrb); - st_foreachNew(mrb, enc_table.names, mrb_enc_aliases_enc_i, aliases); - return aliases[0]; -} - -void -mrb_init_encoding(mrb_state *mrb) -{ -#undef mrb_intern -#define mrb_intern(str) mrb_intern_const(str) - mrb_value list; - int i; - struct RClass *s; - - s = mrb_define_class(mrb, "Encoding", mrb->object_class); - //mrb_undef_alloc_func(mrb_cEncoding); - //mrb_undef_method(CLASS_OF(mrb_cEncoding), "new"); - mrb_define_class_method(mrb, s, "aliases", mrb_enc_aliases, ARGS_NONE()); /* 15.2.40.2.1 */ - mrb_define_class_method(mrb, s, "compatible?", enc_compatible_p, ARGS_REQ(2)); /* 15.2.40.2.2 */ - mrb_define_class_method(mrb, s, "default_external", get_default_external, ARGS_NONE()); /* 15.2.40.2.3 */ - mrb_define_class_method(mrb, s, "default_external=", set_default_external, ARGS_REQ(1)); /* 15.2.40.2.4 */ - mrb_define_class_method(mrb, s, "default_internal", get_default_internal, ARGS_NONE()); /* 15.2.40.2.5 */ - mrb_define_class_method(mrb, s, "default_internal=", set_default_internal, ARGS_REQ(1)); /* 15.2.40.2.6 */ - mrb_define_class_method(mrb, s, "find", enc_find, ARGS_REQ(1)); /* 15.2.40.2.7 */ - mrb_define_class_method(mrb, s, "list", enc_list, ARGS_NONE()); /* 15.2.40.2.8 */ - mrb_define_class_method(mrb, s, "locale_charmap", mrb_locale_charmap, ARGS_NONE()); /* 15.2.40.2.9 */ - mrb_define_class_method(mrb, s, "name_list", mrb_enc_name_list, ARGS_NONE()); /* 15.2.40.2.10 */ - mrb_define_class_method(mrb, s, "_load", enc_load, ARGS_REQ(1)); /* 15.2.40.2.11 */ - mrb_define_method(mrb, s, "ascii_compatible?", enc_ascii_compatible_p, ARGS_NONE()); /* 15.2.40.2.12 */ - mrb_define_method(mrb, s, "dummy?", enc_dummy_p, ARGS_NONE()); /* 15.2.40.2.13 */ - mrb_define_method(mrb, s, "inspect", enc_inspect, ARGS_NONE()); /* 15.2.40.2.14 */ - mrb_define_method(mrb, s, "name", enc_name, ARGS_NONE()); /* 15.2.40.2.15 */ - mrb_define_method(mrb, s, "names", enc_names, ARGS_NONE()); /* 15.2.40.2.16 */ - mrb_define_method(mrb, s, "replicate", enc_replicate, ARGS_REQ(1)); /* 15.2.40.2.17 */ - mrb_define_method(mrb, s, "to_s", enc_name, ARGS_NONE()); /* 15.2.40.2.18 */ - mrb_define_method(mrb, s, "_dump", enc_dump, ARGS_ANY()); /* 15.2.40.2.19 */ - -/* add kusuda --> */ - if (!enc_table.list) { - mrb_enc_init(mrb); - } -/* add kusuda --< */ - list = mrb_ary_new_capa(mrb, enc_table.count);//mrb_ary_new2(enc_table.count); - RBASIC(list)->c = 0; - mrb_encoding_list = list; - //mrb_gc_register_mark_object(list); - - for (i = 0; i < enc_table.count; ++i) { - mrb_ary_push(mrb, list, enc_new(mrb, enc_table.list[i].enc)); - } -} - -/* locale insensitive functions */ - -#define ctype_test(c, ctype) \ - (mrb_isascii(c) && ONIGENC_IS_ASCII_CODE_CTYPE((c), ctype)) - -int mrb_isalnum(int c) { return ctype_test(c, ONIGENC_CTYPE_ALNUM); } -int mrb_isalpha(int c) { return ctype_test(c, ONIGENC_CTYPE_ALPHA); } -int mrb_isblank(int c) { return ctype_test(c, ONIGENC_CTYPE_BLANK); } -int mrb_iscntrl(int c) { return ctype_test(c, ONIGENC_CTYPE_CNTRL); } -int mrb_isdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_DIGIT); } -int mrb_isgraph(int c) { return ctype_test(c, ONIGENC_CTYPE_GRAPH); } -int mrb_islower(int c) { return ctype_test(c, ONIGENC_CTYPE_LOWER); } -int mrb_isprint(int c) { return ctype_test(c, ONIGENC_CTYPE_PRINT); } -int mrb_ispunct(int c) { return ctype_test(c, ONIGENC_CTYPE_PUNCT); } -int mrb_isspace(int c) { return ctype_test(c, ONIGENC_CTYPE_SPACE); } -int mrb_isupper(int c) { return ctype_test(c, ONIGENC_CTYPE_UPPER); } -int mrb_isxdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_XDIGIT); } - -int -mrb_tolower(int c) -{ - return mrb_isascii(c) ? ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) : c; -} - -int -mrb_toupper(int c) -{ - return mrb_isascii(c) ? ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) : c; -} -#endif //INCLUDE_ENCODING diff --git a/src/encoding.h b/src/encoding.h index c6c470644..1312fb947 100644 --- a/src/encoding.h +++ b/src/encoding.h @@ -174,11 +174,7 @@ int mrb_enc_codelen(mrb_state *mrb, int code, mrb_encoding *enc); #endif //INCLUDE_ENCODING /* code,ptr,encoding -> write buf */ -#ifdef INCLUDE_ENCODING -#define mrb_enc_mbcput(c,buf,enc) ONIGENC_CODE_TO_MBC(enc,c,(UChar*)(buf)) -#else -#define mrb_enc_mbcput(c,buf,enc) *(buf) = (char)(c) -#endif //INCLUDE_ENCODING +#define mrb_enc_mbcput(c,buf,enc) ((*(buf) = (char)(c)),1) /* start, ptr, end, encoding -> prev_char */ #define mrb_enc_prev_char(s,p,e,enc) (char *)onigenc_get_prev_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e)) @@ -232,9 +228,6 @@ mrb_value mrb_enc_default_internal(mrb_state *mrb); void mrb_enc_set_default_external(mrb_state *mrb, mrb_value encoding); void mrb_enc_set_default_internal(mrb_state *mrb, mrb_value encoding); mrb_value mrb_locale_charmap(mrb_state *mrb, mrb_value klass); -#ifdef INCLUDE_ENCODING -int mrb_memsearch(mrb_state *mrb, const void*,int,const void*,int,mrb_encoding*); -#endif //INCLUDE_ENCODING mrb_value mrb_usascii_str_new_cstr(mrb_state *mrb, const char *ptr); int mrb_str_buf_cat_escaped_char(mrb_state *mrb, mrb_value result, unsigned int c, int unicode_p); @@ -266,7 +266,7 @@ mrb_obj_alloc(mrb_state *mrb, enum mrb_vtype ttype, struct RClass *cls) mrb->live++; if (mrb->arena_idx > MRB_ARENA_SIZE) { /* arena overflow error */ - mrb->arena_idx = MRB_ARENA_SIZE - 2; /* force room in arena */ + mrb->arena_idx = MRB_ARENA_SIZE - 4; /* force room in arena */ mrb_raise(mrb, mrb->eRuntimeError_class, "arena overflow error"); } mrb->arena[mrb->arena_idx++] = p; @@ -360,12 +360,14 @@ gc_mark_children(mrb_state *mrb, struct RBasic *obj) case MRB_TT_STRING: { +#if 0 struct RString *s = (struct RString*)obj; while (s->flags & MRB_STR_SHARED) { s = s->aux.shared; if (!s) break; } +#endif } break; diff --git a/src/init.c b/src/init.c index 5aab8d6ae..17ce24313 100644 --- a/src/init.c +++ b/src/init.c @@ -20,7 +20,6 @@ void mrb_init_proc(mrb_state*); void mrb_init_range(mrb_state*); void mrb_init_string(mrb_state*); void mrb_init_regexp(mrb_state*); -void mrb_init_encoding(mrb_state*); void mrb_init_exception(mrb_state*); void mrb_init_time(mrb_state*); void mrb_init_io(mrb_state*); @@ -54,7 +53,6 @@ mrb_init_core(mrb_state *mrb) mrb_init_gc(mrb); #ifdef INCLUDE_REGEXP mrb_init_regexp(mrb); - mrb_init_encoding(mrb); #endif mrb_init_exception(mrb); mrb_init_print(mrb); diff --git a/src/object.c b/src/object.c index 1d84909ec..4dc900feb 100644 --- a/src/object.c +++ b/src/object.c @@ -11,13 +11,6 @@ #include "mruby/class.h" #include "mruby/numeric.h" -#ifdef INCLUDE_REGEXP - #define mrb_usascii_str_new2 mrb_usascii_str_new_cstr -#else - #define mrb_usascii_str_new2 mrb_str_new_cstr - #define mrb_usascii_str_new mrb_str_new -#endif - #ifndef FALSE #define FALSE 0 #endif @@ -106,7 +99,7 @@ mrb_true(mrb_state *mrb, mrb_value obj) static mrb_value nil_to_s(mrb_state *mrb, mrb_value obj) { - return mrb_usascii_str_new(mrb, 0, 0); + return mrb_str_new(mrb, 0, 0); } /*********************************************************************** @@ -166,7 +159,7 @@ true_xor(mrb_state *mrb, mrb_value obj) static mrb_value true_to_s(mrb_state *mrb, mrb_value obj) { - return mrb_usascii_str_new2(mrb, "true"); + return mrb_str_new_cstr(mrb, "true"); } /* 15.2.5.3.4 */ @@ -279,7 +272,7 @@ false_or(mrb_state *mrb, mrb_value obj) static mrb_value false_to_s(mrb_state *mrb, mrb_value obj) { - return mrb_usascii_str_new2(mrb, "false"); + return mrb_str_new_cstr(mrb, "false"); } void @@ -7,16 +7,11 @@ #include "mruby.h" #include <string.h> #include "mruby/string.h" -#include "mruby/khash.h" #include "encoding.h" #include "re.h" -#include "mruby/numeric.h" -#include "mruby/range.h" #include "mruby/array.h" #include "regint.h" #include "mruby/class.h" -#include "mruby/hash.h" -#include "mruby/variable.h" #include "error.h" #ifdef INCLUDE_REGEXP @@ -54,13 +49,10 @@ unsigned long ruby_scan_oct(const char*, size_t, size_t*); unsigned long ruby_scan_hex(const char*, size_t, size_t*); static mrb_value mrb_match_to_a(mrb_state *mrb, mrb_value match); -static mrb_value mrb_reg_preprocess(mrb_state *mrb, const char *p, const char *end, mrb_encoding *enc, - mrb_encoding **fixed_enc, onig_errmsg_buffer err); -static void mrb_reg_expr_str(mrb_state *mrb, mrb_value str, const char *s, long len, - mrb_encoding *enc, mrb_encoding *resenc); +static mrb_value mrb_reg_preprocess(mrb_state *mrb, const char *p, const char *end, onig_errmsg_buffer err); +static void mrb_reg_expr_str(mrb_state *mrb, mrb_value str, const char *s, long len); static char * option_to_str(char str[4], int options); -static mrb_value reg_cache; //static int may_need_recompile; //static int reg_kcode = DEFAULT_KCODE; /* ------------------------------------------------------------------------- */ @@ -94,22 +86,20 @@ mrb_reg_s_new_instance(mrb_state *mrb, /*int argc, mrb_value *argv, */mrb_value re->usecnt = 0; return mrb_funcall_argv(mrb, mrb_obj_value(re), "initialize", argc, argv); } -//#define mrb_enc_mbcput(a,b,c) a + mrb_value mrb_reg_quote(mrb_state *mrb, mrb_value str) { - mrb_encoding *enc = mrb_enc_get(mrb, str); char *s, *send, *t; mrb_value tmp; - int c,clen; - int ascii_only = mrb_enc_str_asciionly_p(mrb, str); + int c; s = RSTRING_PTR(str); send = s + RSTRING_LEN(str); while (s < send) { - c = mrb_enc_ascget(mrb, s, send, &clen, enc); + c = *s; if (c == -1) { - s += mbclen(s, send, enc); + s += send - s; continue; } switch (c) { @@ -121,38 +111,29 @@ mrb_reg_quote(mrb_state *mrb, mrb_value str) case '\t': case '\f': case '\n': case '\r': goto meta_found; } - s += clen; + s++; } //tmp = mrb_str_new3(str); tmp = mrb_str_new(mrb, RSTRING_PTR(str), RSTRING_LEN(str)); - if (ascii_only) { - mrb_enc_associate(mrb, tmp, mrb_usascii_encoding(mrb)); - } return tmp; meta_found: tmp = mrb_str_new(mrb, 0, RSTRING_LEN(str)*2); - if (ascii_only) { - mrb_enc_associate(mrb, tmp, mrb_usascii_encoding(mrb)); - } - else { - mrb_enc_copy(mrb, tmp, str); - } t = RSTRING_PTR(tmp); /* copy upto metacharacter */ memcpy(t, RSTRING_PTR(str), s - RSTRING_PTR(str)); t += s - RSTRING_PTR(str); while (s < send) { - c = mrb_enc_ascget(mrb, s, send, &clen, enc); + c = *s; if (c == -1) { - int n = mbclen(s, send, enc); + int n = send - s; while (n--) *t++ = *s++; continue; } - s += clen; + s++; switch (c) { case '[': case ']': case '{': case '}': case '(': case ')': case '|': case '-': @@ -263,7 +244,7 @@ mrb_reg_nth_match(mrb_state *mrb, mrb_int nth, mrb_value match) if (start == -1) return mrb_nil_value(); end = m->rmatch->regs.end[nth]; len = end - start; - str = mrb_str_substr(mrb, mrb_obj_value(m->str), start, len); + str = mrb_str_subseq(mrb, mrb_obj_value(m->str), start, len); return str; } @@ -379,75 +360,13 @@ mrb_reg_options(mrb_state *mrb, mrb_value re) return options; } -static void -reg_enc_error(mrb_state *mrb, mrb_value re, mrb_value str) -{ - mrb_raise(mrb, E_ENCODING_ERROR, - "incompatible encoding regexp match (%s regexp with %s string)", - mrb_enc_name(mrb_enc_get(mrb, re)), - mrb_enc_name(mrb_enc_get(mrb, str))); -} - -static int -mrb_reg_fixed_encoding_p(mrb_value re) -{ - /*if (FL_TEST(re, KCODE_FIXED)) - return Qtrue; - else */ - return 0/*Qfalse*/; -} - -static mrb_encoding* -mrb_reg_prepare_enc(mrb_state *mrb, mrb_value re, mrb_value str, int warn) -{ - mrb_encoding *enc = 0; - - if (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_BROKEN) { - mrb_raise(mrb, E_ARGUMENT_ERROR, - "invalid byte sequence in %s", - mrb_enc_name(mrb_enc_get(mrb, str))); - } - - mrb_reg_check(mrb, re); - enc = mrb_enc_get(mrb, str); - if (!mrb_enc_str_asciicompat_p(mrb, str)) { - if (RREGEXP(re)->ptr->enc != enc) { - reg_enc_error(mrb, re, str); - } - } - else if (mrb_reg_fixed_encoding_p(re)) { - if (RREGEXP(re)->ptr->enc != enc && - (!mrb_enc_asciicompat(mrb, RREGEXP(re)->ptr->enc) || - mrb_enc_str_coderange(mrb, str) != ENC_CODERANGE_7BIT)) { - reg_enc_error(mrb, re, str); - } - enc = RREGEXP(re)->ptr->enc; - } - if (warn && (RBASIC(re)->flags & REG_ENCODING_NONE) && - enc != mrb_ascii8bit_encoding(mrb) && - mrb_enc_str_coderange(mrb, str) != ENC_CODERANGE_7BIT) { - mrb_warn("regexp match /.../n against to %s string", - mrb_enc_name(enc)); - } - return enc; -} - static mrb_value mrb_reg_desc(mrb_state *mrb, const char *s, long len, mrb_value re) { - mrb_encoding *enc = mrb_enc_get(mrb, re); mrb_value str = mrb_str_new_cstr(mrb, "/");//mrb_str_buf_new2("/"); - mrb_encoding *resenc = mrb_default_internal_encoding(mrb); - if (resenc == NULL) resenc = mrb_default_external_encoding(mrb); - if (re.tt && mrb_enc_asciicompat(mrb, enc)) { - mrb_enc_copy(mrb, str, re); - } - else { - mrb_enc_associate(mrb, str, mrb_usascii_encoding(mrb)); - } - mrb_reg_expr_str(mrb, str, s, len, enc, resenc); - mrb_str_buf_cat(mrb, str, "/", strlen("/"));//mrb_str_buf_cat2(str, "/"); + mrb_reg_expr_str(mrb, str, s, len); + mrb_str_buf_cat(mrb, str, "/", strlen("/")); if (re.tt) { char opts[4]; mrb_reg_check(mrb, re); @@ -476,18 +395,14 @@ mrb_reg_prepare_re(mrb_state *mrb, mrb_value re, mrb_value str) OnigErrorInfo einfo; const char *pattern; mrb_value unescaped; - mrb_encoding *fixed_enc = 0; - mrb_encoding *enc = mrb_reg_prepare_enc(mrb, re, str, 1); - - if (reg->enc == enc) return reg; + mrb_encoding *enc = mrb_ascii8bit_encoding(mrb); mrb_reg_check(mrb, re); reg = RREGEXP(re)->ptr; pattern = RREGEXP_SRC_PTR(re); unescaped = mrb_reg_preprocess(mrb, - pattern, pattern + RREGEXP(re)->src->len, enc, - &fixed_enc, err); + pattern, pattern + RREGEXP(re)->src->len, err); if (mrb_nil_p(unescaped)) { mrb_raise(mrb, E_ARGUMENT_ERROR, "regexp preprocess failed: %s", err); @@ -675,18 +590,6 @@ ruby_scan_hex(const char *start, size_t len, size_t *retlen) return retval; } -static int -check_unicode_range(unsigned long code, onig_errmsg_buffer err) -{ - if ((0xd800 <= code && code <= 0xdfff) || /* Surrogates */ - 0x10ffff < code) { - //errcpy(err, "invalid Unicode range"); - printf("invalid Unicode range"); - return -1; - } - return 0; -} - #define BYTEWIDTH 8 int @@ -735,59 +638,6 @@ mrb_uv_to_utf8(mrb_state *mrb, char buf[6], unsigned long uv) return 0; } -static int -append_utf8(mrb_state *mrb, unsigned long uv, - mrb_value buf, mrb_encoding **encp, onig_errmsg_buffer err) -{ - if (check_unicode_range(uv, err) != 0) - return -1; - if (uv < 0x80) { - char escbuf[5]; - snprintf(escbuf, sizeof(escbuf), "\\x%02X", (int)uv); - mrb_str_buf_cat(mrb, buf, escbuf, 4); - } - else { - int len; - char utf8buf[6]; - len = mrb_uv_to_utf8(mrb, utf8buf, uv); - mrb_str_buf_cat(mrb, buf, utf8buf, len); - - if (*encp == 0) - *encp = mrb_utf8_encoding(mrb); - else if (*encp != mrb_utf8_encoding(mrb)) { - //errcpy(err, "UTF-8 character in non UTF-8 regexp"); - printf("UTF-8 character in non UTF-8 regexp"); - return -1; - } - } - return 0; -} - -static int -unescape_unicode_bmp(mrb_state *mrb, const char **pp, const char *end, - mrb_value buf, mrb_encoding **encp, onig_errmsg_buffer err) -{ - const char *p = *pp; - size_t len; - unsigned long code; - - if (end < p+4) { - //errcpy(err, "invalid Unicode escape"); - printf("invalid Unicode escape"); - return -1; - } - code = ruby_scan_hex(p, 4, &len); - if (len != 4) { - //errcpy(err, "invalid Unicode escape"); - printf("invalid Unicode escape"); - return -1; - } - if (append_utf8(mrb, code, buf, encp, err) != 0) - return -1; - *pp = p + 4; - return 0; -} - unsigned long ruby_scan_oct(const char *start, size_t len, size_t *retlen) { @@ -802,400 +652,29 @@ ruby_scan_oct(const char *start, size_t len, size_t *retlen) return retval; } -static int -read_escaped_byte(const char **pp, const char *end, onig_errmsg_buffer err) -{ - const char *p = *pp; - int code; - int meta_prefix = 0, ctrl_prefix = 0; - size_t len; - - if (p == end || *p++ != '\\') { - //errcpy(err, "too short escaped multibyte character"); - printf("too short escaped multibyte character"); - return -1; - } - -again: - if (p == end) { - //errcpy(err, "too short escape sequence"); - printf("too short escape sequence"); - return -1; - } - switch (*p++) { - case '\\': code = '\\'; break; - case 'n': code = '\n'; break; - case 't': code = '\t'; break; - case 'r': code = '\r'; break; - case 'f': code = '\f'; break; - case 'v': code = '\013'; break; - case 'a': code = '\007'; break; - case 'e': code = '\033'; break; - - /* \OOO */ - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - p--; - code = scan_oct(p, end < p+3 ? end-p : 3, &len); - p += len; - break; - - case 'x': /* \xHH */ - code = scan_hex(p, end < p+2 ? end-p : 2, &len); - if (len < 1) { - //errcpy(err, "invalid hex escape"); - printf("invalid hex escape"); - return -1; - } - p += len; - break; - - case 'M': /* \M-X, \M-\C-X, \M-\cX */ - if (meta_prefix) { - //errcpy(err, "duplicate meta escape"); - printf("duplicate meta escape"); - return -1; - } - meta_prefix = 1; - if (p+1 < end && *p++ == '-' && (*p & 0x80) == 0) { - if (*p == '\\') { - p++; - goto again; - } - else { - code = *p++; - break; - } - } - //errcpy(err, "too short meta escape"); - printf("too short meta escape"); - return -1; - - case 'C': /* \C-X, \C-\M-X */ - if (p == end || *p++ != '-') { - //errcpy(err, "too short control escape"); - printf("too short control escape"); - return -1; - } - case 'c': /* \cX, \c\M-X */ - if (ctrl_prefix) { - //errcpy(err, "duplicate control escape"); - printf("duplicate control escape"); - return -1; - } - ctrl_prefix = 1; - if (p < end && (*p & 0x80) == 0) { - if (*p == '\\') { - p++; - goto again; - } - else { - code = *p++; - break; - } - } - //errcpy(err, "too short control escape"); - printf("too short control escape"); - return -1; - - default: - //errcpy(err, "unexpected escape sequence"); - printf("unexpected escape sequence"); - return -1; - } - if (code < 0 || 0xff < code) { - //errcpy(err, "invalid escape code"); - printf("invalid escape code"); - return -1; - } - - if (ctrl_prefix) - code &= 0x1f; - if (meta_prefix) - code |= 0x80; - - *pp = p; - return code; -} - -static int -unescape_escaped_nonascii(mrb_state *mrb, const char **pp, const char *end, mrb_encoding *enc, - mrb_value buf, mrb_encoding **encp, onig_errmsg_buffer err) -{ - const char *p = *pp; - int chmaxlen = mrb_enc_mbmaxlen(enc); - //char *chbuf = ALLOCA_N(char, chmaxlen); - char *chbuf = mrb_malloc(mrb, chmaxlen); - int chlen = 0; - int byte; - int l; - - memset(chbuf, 0, chmaxlen); - - byte = read_escaped_byte(&p, end, err); - if (byte == -1) { - return -1; - } - - chbuf[chlen++] = byte; - while (chlen < chmaxlen && - MBCLEN_NEEDMORE_P(mrb_enc_precise_mbclen(chbuf, chbuf+chlen, enc))) { - byte = read_escaped_byte(&p, end, err); - if (byte == -1) { - return -1; - } - chbuf[chlen++] = byte; - } - - l = mrb_enc_precise_mbclen(chbuf, chbuf+chlen, enc); - if (MBCLEN_INVALID_P(l)) { - //errcpy(err, "invalid multibyte escape"); - printf("invalid multibyte escape"); - return -1; - } - if (1 < chlen || (chbuf[0] & 0x80)) { - mrb_str_buf_cat(mrb, buf, chbuf, chlen); - - if (*encp == 0) - *encp = enc; - else if (*encp != enc) { - //errcpy(err, "escaped non ASCII character in UTF-8 regexp"); - printf("escaped non ASCII character in UTF-8 regexp"); - return -1; - } - } - else { - char escbuf[5]; - snprintf(escbuf, sizeof(escbuf), "\\x%02X", chbuf[0]&0xff); - mrb_str_buf_cat(mrb, buf, escbuf, 4); - } - *pp = p; - return 0; -} - -static int -unescape_unicode_list(mrb_state *mrb, const char **pp, const char *end, - mrb_value buf, mrb_encoding **encp, onig_errmsg_buffer err) -{ - const char *p = *pp; - int has_unicode = 0; - unsigned long code; - size_t len; - - while (p < end && ISSPACE(*p)) p++; - - while (1) { - code = ruby_scan_hex(p, end-p, &len); - if (len == 0) - break; - if (6 < len) { /* max 10FFFF */ - //errcpy(err, "invalid Unicode range"); - printf("invalid Unicode range"); - return -1; - } - p += len; - if (append_utf8(mrb, code, buf, encp, err) != 0) - return -1; - has_unicode = 1; - - while (p < end && ISSPACE(*p)) p++; - } - - if (has_unicode == 0) { - //errcpy(err, "invalid Unicode list"); - printf("invalid Unicode list"); - return -1; - } - - *pp = p; - - return 0; -} - -static int -unescape_nonascii(mrb_state *mrb, const char *p, const char *end, mrb_encoding *enc, - mrb_value buf, mrb_encoding **encp, int *has_property, - onig_errmsg_buffer err) -{ - char c; - char smallbuf[2]; - - while (p < end) { - int chlen = mrb_enc_precise_mbclen(p, end, enc); - if (!MBCLEN_CHARFOUND_P(chlen)) { - //errcpy(err, "invalid multibyte character"); - printf("invalid multibyte character"); - return -1; - } - chlen = MBCLEN_CHARFOUND_LEN(chlen); - if (1 < chlen || (*p & 0x80)) { - mrb_str_buf_cat(mrb, buf, p, chlen); - p += chlen; - if (*encp == 0) - *encp = enc; - else if (*encp != enc) { - //errcpy(err, "non ASCII character in UTF-8 regexp"); - printf("non ASCII character in UTF-8 regexp"); - return -1; - } - continue; - } - - switch (c = *p++) { - case '\\': - if (p == end) { - //errcpy(err, "too short escape sequence"); - printf("too short escape sequence"); - return -1; - } - switch (c = *p++) { - case '1': case '2': case '3': - case '4': case '5': case '6': case '7': /* \O, \OO, \OOO or backref */ - { - size_t octlen; - if (ruby_scan_oct(p-1, end-(p-1), &octlen) <= 0177) { - /* backref or 7bit octal. - no need to unescape anyway. - re-escaping may break backref */ - goto escape_asis; - } - } - /* xxx: How about more than 199 subexpressions? */ - - case '0': /* \0, \0O, \0OO */ - - case 'x': /* \xHH */ - case 'c': /* \cX, \c\M-X */ - case 'C': /* \C-X, \C-\M-X */ - case 'M': /* \M-X, \M-\C-X, \M-\cX */ - p = p-2; - if (unescape_escaped_nonascii(mrb, &p, end, enc, buf, encp, err) != 0) - return -1; - break; - - case 'u': - if (p == end) { - //errcpy(err, "too short escape sequence"); - printf("too short escape sequence"); - return -1; - } - if (*p == '{') { - /* \u{H HH HHH HHHH HHHHH HHHHHH ...} */ - p++; - if (unescape_unicode_list(mrb, &p, end, buf, encp, err) != 0) - return -1; - if (p == end || *p++ != '}') { - //errcpy(err, "invalid Unicode list"); - printf("invalid Unicode list"); - return -1; - } - break; - } - else { - /* \uHHHH */ - if (unescape_unicode_bmp(mrb, &p, end, buf, encp, err) != 0) - return -1; - break; - } - - case 'p': /* \p{Hiragana} */ - case 'P': - if (!*encp) { - *has_property = 1; - } - goto escape_asis; - - default: /* \n, \\, \d, \9, etc. */ -escape_asis: - smallbuf[0] = '\\'; - smallbuf[1] = c; - mrb_str_buf_cat(mrb, buf, smallbuf, 2); - break; - } - break; - - default: - mrb_str_buf_cat(mrb, buf, &c, 1); - break; - } - } - - return 0; -} - - static mrb_value -mrb_reg_preprocess(mrb_state *mrb, const char *p, const char *end, mrb_encoding *enc, - mrb_encoding **fixed_enc, onig_errmsg_buffer err) +mrb_reg_preprocess(mrb_state *mrb, const char *p, const char *end, onig_errmsg_buffer err) { - mrb_value buf; - int has_property = 0; - - //buf = mrb_str_buf_new(0); - buf = mrb_str_buf_new(mrb, 0); - - if (mrb_enc_asciicompat(mrb, enc)) - *fixed_enc = 0; - else { - *fixed_enc = enc; - mrb_enc_associate(mrb, buf, enc); - } - - if (unescape_nonascii(mrb, p, end, enc, buf, fixed_enc, &has_property, err) != 0) - return mrb_nil_value(); - - if (has_property && !*fixed_enc) { - *fixed_enc = enc; - } - - if (*fixed_enc) { - mrb_enc_associate(mrb, buf, *fixed_enc); - } - - return buf; + return mrb_nil_value(); } static int -mrb_reg_initialize(mrb_state *mrb, mrb_value obj, const char *s, long len, mrb_encoding *enc, +mrb_reg_initialize(mrb_state *mrb, mrb_value obj, const char *s, long len, int options, onig_errmsg_buffer err, const char *sourcefile, int sourceline) { struct RRegexp *re = RREGEXP(obj); mrb_value unescaped; - mrb_encoding *fixed_enc = 0; - mrb_encoding *a_enc = mrb_ascii8bit_encoding(mrb); + mrb_encoding *enc = mrb_ascii8bit_encoding(mrb); if (re->ptr) mrb_raise(mrb, E_TYPE_ERROR, "already initialized regexp"); re->ptr = 0; - if (mrb_enc_dummy_p(enc)) { - //errcpy(err, "can't make regexp with dummy encoding"); - printf("can't make regexp with dummy encoding"); - return -1; - } - - unescaped = mrb_reg_preprocess(mrb, s, s+len, enc, &fixed_enc, err); + unescaped = mrb_reg_preprocess(mrb, s, s+len, err); if (mrb_nil_p(unescaped)) return -1; - if (fixed_enc) { - if ((fixed_enc != enc && (options & ARG_ENCODING_FIXED)) || - (fixed_enc != a_enc && (options & ARG_ENCODING_NONE))) { - //errcpy(err, "incompatible character encoding"); - printf("incompatible character encoding"); - return -1; - } - if (fixed_enc != a_enc) { - options |= ARG_ENCODING_FIXED; - enc = fixed_enc; - } - } - else if (!(options & ARG_ENCODING_FIXED)) { - enc = mrb_usascii_encoding(mrb); - } - - mrb_enc_associate(mrb, mrb_obj_value(re), enc); - if ((options & ARG_ENCODING_FIXED) || fixed_enc) { + if ((options & ARG_ENCODING_FIXED)) { //re->basic.flags |= KCODE_FIXED; re->flags|= KCODE_FIXED; } @@ -1207,7 +686,7 @@ mrb_reg_initialize(mrb_state *mrb, mrb_value obj, const char *s, long len, mrb_e options & ARG_REG_OPTION_MASK, err, sourcefile, sourceline); if (!re->ptr) return -1; - re->src = mrb_str_ptr(mrb_enc_str_new(mrb, s, len, enc)); + re->src = mrb_str_ptr(mrb_str_new(mrb, s, len)); return 0; } @@ -1217,8 +696,8 @@ mrb_reg_initialize_str(mrb_state *mrb, mrb_value obj, mrb_value str, int options const char *sourcefile, int sourceline) { int ret; - mrb_encoding *enc = mrb_enc_get(mrb, str); +#if 0 if (options & ARG_ENCODING_NONE) { mrb_encoding *ascii8bit = mrb_ascii8bit_encoding(mrb); if (enc != ascii8bit) { @@ -1230,8 +709,9 @@ mrb_reg_initialize_str(mrb_state *mrb, mrb_value obj, mrb_value str, int options enc = ascii8bit; } } +#endif - ret = mrb_reg_initialize(mrb, obj, RSTRING_PTR(str), RSTRING_LEN(str), enc, + ret = mrb_reg_initialize(mrb, obj, RSTRING_PTR(str), RSTRING_LEN(str), options, err, sourcefile, sourceline); return ret; @@ -1267,7 +747,6 @@ mrb_reg_initialize_m(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value se onig_errmsg_buffer err = ""; int flags = 0; mrb_value str; - mrb_encoding *enc; const char *ptr; long len; @@ -1286,10 +765,7 @@ mrb_reg_initialize_m(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value se flags = mrb_reg_options(mrb, re); ptr = RREGEXP_SRC_PTR(re); len = RREGEXP_SRC_LEN(re); - enc = mrb_enc_get(mrb, re); - if (mrb_reg_initialize(mrb, self, ptr, len, enc, flags, err, NULL, 0)) { - /*str = mrb_enc_str_new(mrb, ptr, len, enc); - mrb_reg_raise_str(str, flags, err);*/ + if (mrb_reg_initialize(mrb, self, ptr, len, flags, err, NULL, 0)) { printf("mrb_reg_raise_str(str, flags, err);"); } } @@ -1298,12 +774,10 @@ mrb_reg_initialize_m(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value se if (mrb_type(argv[1]) == MRB_TT_FIXNUM) flags = mrb_fixnum(argv[1]); else if (mrb_test(argv[1])) flags = ONIG_OPTION_IGNORECASE; } - enc = 0; if (argc == 3 && !mrb_nil_p(argv[2])) { //char *kcode = StringValuePtr(argv[2]); char *kcode = mrb_string_value_ptr(mrb, argv[2]); if (kcode[0] == 'n' || kcode[0] == 'N') { - enc = mrb_ascii8bit_encoding(mrb); flags |= ARG_ENCODING_NONE; } else { @@ -1314,9 +788,7 @@ mrb_reg_initialize_m(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value se str = argv[0]; //ptr = StringValuePtr(str); ptr = mrb_string_value_ptr(mrb, str); - if (enc - ? mrb_reg_initialize(mrb, self, ptr, RSTRING_LEN(str), enc, flags, err, NULL, 0) - : mrb_reg_initialize_str(mrb, self, str, flags, err, NULL, 0)) { + if (mrb_reg_initialize_str(mrb, self, str, flags, err, NULL, 0)) { //mrb_reg_raise_str(str, flags, err); } } @@ -1346,7 +818,7 @@ mrb_reg_init_copy(mrb_state *mrb, mrb_value re/*, mrb_value copy*/) mrb_reg_check(mrb, copy); s = RREGEXP_SRC_PTR(copy); len = RREGEXP_SRC_LEN(copy); - if (mrb_reg_initialize(mrb, re, s, len, mrb_enc_get(mrb, copy), mrb_reg_options(mrb, copy), + if (mrb_reg_initialize(mrb, re, s, len, mrb_reg_options(mrb, copy), err, 0/*NULL*/, 0) != 0) { mrb_reg_raise(mrb, s, len, err, re); } @@ -1628,7 +1100,7 @@ mrb_reg_source(mrb_state *mrb, mrb_value re) mrb_value str; mrb_reg_check(mrb, re); - str = mrb_enc_str_new(mrb, RREGEXP_SRC_PTR(re),RREGEXP_SRC_LEN(re), mrb_enc_get(mrb, re)); + str = mrb_str_new(mrb, RREGEXP_SRC_PTR(re),RREGEXP_SRC_LEN(re)); return str; } @@ -1757,23 +1229,12 @@ typedef struct { long char_pos; } pair_t; -static int -pair_byte_cmp(const void *pair1, const void *pair2) -{ - long diff = ((pair_t*)pair1)->byte_pos - ((pair_t*)pair2)->byte_pos; - return diff ? diff > 0 ? 1 : -1 : 0; -} - static void update_char_offset(mrb_state *mrb, mrb_value match) { struct rmatch *rm = RMATCH(match)->rmatch; struct re_registers *regs; - int i, num_regs, num_pos; - long c; - char *s, *p, *q; - mrb_encoding *enc; - pair_t *pairs; + int i, num_regs; if (rm->char_offset_updated) return; @@ -1787,55 +1248,12 @@ update_char_offset(mrb_state *mrb, mrb_value match) rm->char_offset_num_allocated = num_regs; } - enc = mrb_enc_get(mrb, mrb_obj_value(RMATCH(match)->str)); - if (mrb_enc_mbmaxlen(enc) == 1) { - for (i = 0; i < num_regs; i++) { - rm->char_offset[i].beg = BEG(i); - rm->char_offset[i].end = END(i); - } - rm->char_offset_updated = 1; - return; - } - - //pairs = ALLOCA_N(pair_t, num_regs*2); - pairs = mrb_malloc(mrb, sizeof(pair_t)*num_regs*2); - - num_pos = 0; for (i = 0; i < num_regs; i++) { - if (BEG(i) < 0) - continue; - pairs[num_pos++].byte_pos = BEG(i); - pairs[num_pos++].byte_pos = END(i); - } - qsort(pairs, num_pos, sizeof(pair_t), pair_byte_cmp); - - s = p = RMATCH(match)->str->buf; - c = 0; - for (i = 0; i < num_pos; i++) { - q = s + pairs[i].byte_pos; - c += mrb_enc_strlen(p, q, enc); - pairs[i].char_pos = c; - p = q; - } - - for (i = 0; i < num_regs; i++) { - pair_t key, *found; - if (BEG(i) < 0) { - rm->char_offset[i].beg = -1; - rm->char_offset[i].end = -1; - continue; - } - - key.byte_pos = BEG(i); - found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp); - rm->char_offset[i].beg = found->char_pos; - - key.byte_pos = END(i); - found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp); - rm->char_offset[i].end = found->char_pos; + rm->char_offset[i].beg = BEG(i); + rm->char_offset[i].end = END(i); } - rm->char_offset_updated = 1; + return; } /* 15.2.16.3.2 */ @@ -2235,49 +1653,36 @@ option_to_str(char str[4], int options) #define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */ static void -mrb_reg_expr_str(mrb_state *mrb, mrb_value str, const char *s, long len, - mrb_encoding *enc, mrb_encoding *resenc) +mrb_reg_expr_str(mrb_state *mrb, mrb_value str, const char *s, long len) { const char *p, *pend; int need_escape = 0; - int c, clen; + int c; p = s; pend = p + len; - if (mrb_enc_asciicompat(mrb, enc)) { - while (p < pend) { - c = mrb_enc_ascget(mrb, p, pend, &clen, enc); - if (c == -1) { - if (enc == resenc) { - p += mbclen(p, pend, enc); - } - else { - need_escape = 1; - break; - } - } - else if (c != '/' && mrb_enc_isprint(c, enc)) { - p += clen; - } - else { - need_escape = 1; - break; - } + while (p < pend) { + c = *p; + if (c == -1) { + p += pend - p; + } + else if (c != '/' && ISPRINT(c)) { + p++; + } + else { + need_escape = 1; + break; } - } - else { - need_escape = 1; } if (!need_escape) { mrb_str_buf_cat(mrb, str, s, len); } else { - int unicode_p = mrb_enc_unicode_p(enc); p = s; while (p<pend) { - c = mrb_enc_ascget(mrb, p, pend, &clen, enc); - if (c == '\\' && p+clen < pend) { - int n = clen + mbclen(p+clen, pend, enc); + c = *p; + if (c == '\\' && p+1 < pend) { + int n = 1 + pend - (p+1); mrb_str_buf_cat(mrb, str, p, n); p += n; continue; @@ -2285,38 +1690,21 @@ mrb_reg_expr_str(mrb_state *mrb, mrb_value str, const char *s, long len, else if (c == '/') { char c = '\\'; mrb_str_buf_cat(mrb, str, &c, 1); - mrb_str_buf_cat(mrb, str, p, clen); - } - else if (c == -1) { - clen = mrb_enc_precise_mbclen(p, pend, enc); - if (!MBCLEN_CHARFOUND_P(clen)) { - c = (unsigned char)*p; - clen = 1; - goto hex; - } - if (resenc) { - unsigned int c = mrb_enc_mbc_to_codepoint(p, pend, enc); - mrb_str_buf_cat_escaped_char(mrb, str, c, unicode_p); - } - else { - clen = MBCLEN_CHARFOUND_LEN(clen); - mrb_str_buf_cat(mrb, str, p, clen); - } + mrb_str_buf_cat(mrb, str, p, 1); } - else if (mrb_enc_isprint(c, enc)) { - mrb_str_buf_cat(mrb, str, p, clen); + else if (ISPRINT(c)) { + mrb_str_buf_cat(mrb, str, p, 1); } - else if (!mrb_enc_isspace(c, enc)) { + else if (!ISSPACE(c)) { char b[8]; - hex: snprintf(b, sizeof(b), "\\x%02X", c); mrb_str_buf_cat(mrb, str, b, 4); } else { - mrb_str_buf_cat(mrb, str, p, clen); + mrb_str_buf_cat(mrb, str, p, 1); } - p += clen; + p++; } } } @@ -2355,7 +1743,6 @@ mrb_reg_to_s(mrb_state *mrb, mrb_value re) mrb_reg_check(mrb, re); memset(optbuf, 0, 5); - mrb_enc_copy(mrb, str, re); options = RREGEXP(re)->ptr->options; ptr = (UChar*)RREGEXP_SRC_PTR(re); len = RREGEXP_SRC_LEN(re); @@ -2399,7 +1786,7 @@ again: ++ptr; len -= 2; - err = onig_new(&rp, ptr, ptr + len, ONIG_OPTION_DEFAULT, + err = onig_new(&rp, ptr, ptr + len, ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, NULL); onig_free(rp); } @@ -2419,9 +1806,8 @@ again: } mrb_str_buf_cat(mrb, str, ":", strlen(":")); - mrb_reg_expr_str(mrb, str, (char*)ptr, len, enc, NULL); + mrb_reg_expr_str(mrb, str, (char*)ptr, len); mrb_str_buf_cat(mrb, str, ")", strlen(")")); - mrb_enc_copy(mrb, str, re); return str; } @@ -2663,8 +2049,6 @@ mrb_init_regexp(mrb_state *mrb) mrb_define_const(mrb, s, "MULTILINE", mrb_fixnum_value(ONIG_OPTION_MULTILINE)); mrb_define_const(mrb, s, "FIXEDENCODING", mrb_fixnum_value(ARG_ENCODING_FIXED)); - //mrb_global_variable(®_cache); - s = mrb_define_class(mrb, "MatchData", mrb->object_class); //mrb_undef_method(CLASS_OF(rb_cMatch), "new"); @@ -2705,27 +2089,23 @@ mrb_reg_regsub(mrb_state *mrb, mrb_value str, mrb_value src, struct re_registers { mrb_value val; char *p, *s, *e; - int no, clen; - mrb_encoding *str_enc = mrb_enc_get(mrb, str); - mrb_encoding *src_enc = mrb_enc_get(mrb, src); - int acompat = mrb_enc_asciicompat(mrb, str_enc); -#define ASCGET(mrb,s,e,cl) (acompat ? (*cl=1,ISASCII(s[0])?s[0]:-1) : mrb_enc_ascget(mrb, s, e, cl, str_enc)) struct RString *ps = mrb_str_ptr(str); + int no; val.tt = 0; p = s = ps->buf; e = s + ps->len; while (s < e) { - int c = ASCGET(mrb, s, e, &clen); + int c = *s; char *ss; if (c == -1) { - s += mbclen(s, e, str_enc); + s += e - s; continue; } ss = s; - s += clen; + s++; if (c != '\\' || s == e) continue; @@ -2733,16 +2113,16 @@ mrb_reg_regsub(mrb_state *mrb, mrb_value str, mrb_value src, struct re_registers if (!val.tt) { val = mrb_str_buf_new(mrb, ss-p); } - mrb_enc_str_buf_cat(mrb, val, p, ss-p, str_enc); + mrb_str_buf_cat(mrb, val, p, ss-p); - c = ASCGET(mrb, s, e, &clen); + c = *s; if (c == -1) { - s += mbclen(s, e, str_enc); - mrb_enc_str_buf_cat(mrb, val, ss, s-ss, str_enc); + s += e - s; + mrb_str_buf_cat(mrb, val, ss, s-ss); p = s; continue; } - s += clen; + s++; p = s; switch (c) { @@ -2757,18 +2137,18 @@ mrb_reg_regsub(mrb_state *mrb, mrb_value str, mrb_value src, struct re_registers break; case 'k': - if (s < e && ASCGET(mrb, s, e, &clen) == '<') { + if (s < e && *s == '<') { char *name, *name_end; - name_end = name = s + clen; + name_end = name = s + 1; while (name_end < e) { - c = ASCGET(mrb, name_end, e, &clen); + c = *name_end; if (c == '>') break; - name_end += c == -1 ? mbclen(name_end, e, str_enc) : clen; + name_end += c == -1 ? e - name_end : 1; } if (name_end < e) { no = name_to_backref_number(mrb, regs, RREGEXP(regexp), name, name_end); - p = s = name_end + clen; + p = s = name_end + 1; break; } else { @@ -2776,7 +2156,7 @@ mrb_reg_regsub(mrb_state *mrb, mrb_value str, mrb_value src, struct re_registers } } - mrb_enc_str_buf_cat(mrb, val, ss, s-ss, str_enc); + mrb_str_buf_cat(mrb, val, ss, s-ss); continue; case '0': @@ -2785,11 +2165,11 @@ mrb_reg_regsub(mrb_state *mrb, mrb_value str, mrb_value src, struct re_registers break; case '`': - mrb_enc_str_buf_cat(mrb, val, RSTRING_PTR(src), BEG(0), src_enc); + mrb_str_buf_cat(mrb, val, RSTRING_PTR(src), BEG(0)); continue; case '\'': - mrb_enc_str_buf_cat(mrb, val, RSTRING_PTR(src)+END(0), RSTRING_LEN(src)-END(0), src_enc); + mrb_str_buf_cat(mrb, val, RSTRING_PTR(src)+END(0), RSTRING_LEN(src)-END(0)); continue; case '+': @@ -2799,31 +2179,29 @@ mrb_reg_regsub(mrb_state *mrb, mrb_value str, mrb_value src, struct re_registers break; case '\\': - mrb_enc_str_buf_cat(mrb, val, s-clen, clen, str_enc); + mrb_str_buf_cat(mrb, val, s-1, 1); continue; default: - mrb_enc_str_buf_cat(mrb, val, ss, s-ss, str_enc); + mrb_str_buf_cat(mrb, val, ss, s-ss); continue; } if (no >= 0) { if (no >= regs->num_regs) continue; if (BEG(no) == -1) continue; - mrb_enc_str_buf_cat(mrb, val, RSTRING_PTR(src)+BEG(no), END(no)-BEG(no), src_enc); + mrb_str_buf_cat(mrb, val, RSTRING_PTR(src)+BEG(no), END(no)-BEG(no)); } } /* while (s < e) { */ if (!val.tt) return str; if (p < e) { - mrb_enc_str_buf_cat(mrb, val, p, e-p, str_enc); + mrb_str_buf_cat(mrb, val, p, e-p); } return val; } -//#define NEW_NODE(t,a0,a1,a2) mrb_node_newnode((t),(int)(a0),(int)(a1),(int)(a2)) -//#define NEW_IF(c,t,e) NEW_NODE(NODE_IF,c,t,e) static inline NODE * lfp_svar_place(mrb_state *mrb, /*mrb_thread_t *th,*/ mrb_value *lfp) { @@ -3038,9 +2416,6 @@ mrb_memsearch(mrb_state *mrb, const void *x0, int m, const void *y0, int n, mrb_ } return -1; } - else if (enc == mrb_utf8_encoding(mrb)) { - return mrb_memsearch_qs_utf8(x0, m, y0, n); - } else { return mrb_memsearch_qs(x0, m, y0, n); } @@ -3077,12 +2452,7 @@ mrb_reg_new_str(mrb_state *mrb, mrb_value s, int options) mrb_value mrb_reg_regcomp(mrb_state *mrb, mrb_value str) { - mrb_value save_str = str; - if (reg_cache.tt && RREGEXP_SRC_LEN(reg_cache) == RSTRING_LEN(str) - && ENCODING_GET(mrb, reg_cache) == ENCODING_GET(mrb, str) - && memcmp(RREGEXP_SRC_PTR(reg_cache), RSTRING_PTR(str), RSTRING_LEN(str)) == 0) - return reg_cache; - return reg_cache = mrb_reg_new_str(mrb, save_str, 0); + return mrb_reg_new_str(mrb, str, 0); } int @@ -3143,7 +2513,7 @@ is_special_global_name(const char *m, const char *e, mrb_encoding *enc) ++m; if (m < e && is_identchar(m, e, enc)) { if (!ISASCII(*m)) mb = 1; - m += mrb_enc_mbclen(m, e, enc); + m += e - m; } break; default: @@ -3228,7 +2598,7 @@ mrb_enc_symname2_p(const char *name, long len, mrb_encoding *enc) id: if (m >= e || (*m != '_' && !mrb_enc_isalpha(*m, enc) && ISASCII(*m))) return FALSE; - while (m < e && is_identchar(m, e, enc)) m += mrb_enc_mbclen(m, e, enc); + while (m < e && is_identchar(m, e, enc)) m += e - m; if (localid) { switch (*m) { case '!': case '?': case '=': ++m; diff --git a/src/sprintf.c b/src/sprintf.c index dc9b83dec..b7c5e02fd 100644 --- a/src/sprintf.c +++ b/src/sprintf.c @@ -668,44 +668,37 @@ retry: mrb_value tmp; unsigned int c; int n; -#ifdef INCLUDE_ENCODING - mrb_encoding *enc = mrb_enc_get(mrb, fmt); -#endif //INCLUDE_ENCODING tmp = mrb_check_string_type(mrb, val); if (!mrb_nil_p(tmp)) { if (RSTRING_LEN(tmp) != 1 ) { mrb_raise(mrb, E_ARGUMENT_ERROR, "%%c requires a character"); } -#ifdef INCLUDE_ENCODING - c = mrb_enc_codepoint_len(mrb, RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc); -#else c = RSTRING_PTR(tmp)[0]; n = 1; -#endif //INCLUDE_ENCODING } else { c = mrb_fixnum(val); - n = mrb_enc_codelen(mrb, c, enc); + n = 1; } if (n <= 0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid character"); } if (!(flags & FWIDTH)) { CHECK(n); - mrb_enc_mbcput(c, &buf[blen], enc); + buf[blen] = c; blen += n; } else if ((flags & FMINUS)) { CHECK(n); - mrb_enc_mbcput(c, &buf[blen], enc); + buf[blen] = c; blen += n; FILL(' ', width-1); } else { FILL(' ', width-1); CHECK(n); - mrb_enc_mbcput(c, &buf[blen], enc); + buf[blen] = c; blen += n; } } @@ -717,25 +710,18 @@ format_s: { mrb_value arg = GETARG(); long len, slen; -#ifdef INCLUDE_ENCODING - mrb_encoding *enc = mrb_enc_get(mrb, fmt); -#endif //INCLUDE_ENCODING if (*p == 'p') arg = mrb_inspect(mrb, arg); str = mrb_obj_as_string(mrb, arg); len = RSTRING_LEN(str); - mrb_str_set_len(mrb, result, blen); + RSTRING_LEN(result) = blen; if (flags&(FPREC|FWIDTH)) { slen = RSTRING_LEN(str); if (slen < 0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid mbstring sequence"); } if ((flags&FPREC) && (prec < slen)) { -#ifdef INCLUDE_ENCODING - char *p = mrb_enc_nth(mrb, RSTRING_PTR(str), RSTRING_END(str),prec, enc); -#else char *p = RSTRING_PTR(str) + prec; -#endif //INCLUDE_ENCODING slen = prec; len = p - RSTRING_PTR(str); } @@ -757,12 +743,10 @@ format_s: buf[blen++] = ' '; } } - mrb_enc_associate(mrb, result, enc); break; } } PUSH(RSTRING_PTR(str), len); - mrb_enc_associate(mrb, result, enc); } break; @@ -915,15 +899,8 @@ bin_retry: if (*p == 'X') { char *pp = s; int c; -#ifdef INCLUDE_ENCODING - mrb_encoding *enc = mrb_enc_get(mrb, fmt); -#endif //INCLUDE_ENCODING while ((c = (int)(unsigned char)*pp) != 0) { -#ifdef INCLUDE_ENCODING - *pp = mrb_enc_toupper(c, enc); -#else *pp = toupper(c); -#endif //INCLUDE_ENCODING pp++; } } diff --git a/src/string.c b/src/string.c index 22e3dad60..257286144 100644 --- a/src/string.c +++ b/src/string.c @@ -9,13 +9,12 @@ #include <stdarg.h> #include <string.h> #include "mruby/string.h" +#include <ctype.h> #include "mruby/numeric.h" #include "mruby/range.h" -#include <ctype.h> #include "mruby/array.h" #include "mruby/class.h" #include "mruby/variable.h" -#include "mruby/hash.h" #include <stdio.h> #include "re.h" #ifdef INCLUDE_REGEXP @@ -23,8 +22,6 @@ #include "st.h" #endif //INCLUDE_REGEXP -#define mrb_usascii_str_new2 mrb_usascii_str_new_cstr - #ifndef FALSE #define FALSE 0 #endif @@ -38,33 +35,11 @@ const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz"; #ifdef INCLUDE_REGEXP static mrb_value get_pat(mrb_state *mrb, mrb_value pat, mrb_int quote); #endif //INCLUDE_REGEXP -#ifdef INCLUDE_ENCODING -static void mrb_enc_cr_str_copy_for_substr(mrb_state *mrb, mrb_value dest, mrb_value src); -#else -#define mrb_enc_cr_str_copy_for_substr(mrb, dest, src) -#endif //INCLUDE_ENCODING static mrb_value str_replace(mrb_state *mrb, mrb_value str, mrb_value str2); -#ifdef INCLUDE_ENCODING -static long str_strlen(mrb_state *mrb, mrb_value str, mrb_encoding *enc); -#endif //INCLUDE_ENCODING -#ifdef INCLUDE_ENCODING -#define is_ascii_string(mrb, str) (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_7BIT) -#define is_broken_string(mrb, str) (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_BROKEN) -#define STR_ENC_GET(mrb, str) mrb_enc_from_index(mrb, ENCODING_GET(mrb, str)) -#endif //INCLUDE_ENCODING - -void -mrb_str_set_len(mrb_state *mrb, mrb_value str, long len) -{ - mrb_str_modify(mrb, str); - RSTRING_LEN(str) = len; - RSTRING_PTR(str)[len] = '\0'; -} #define RESIZE_CAPA(str,capacity) do {\ RSTRING(str)->buf = mrb_realloc(mrb, RSTRING(str)->buf, (capacity)+1);\ - if (!MRB_STR_NOCAPA_P(str))\ - RSTRING_CAPA(str) = capacity;\ + RSTRING_CAPA(str) = capacity;\ } while (0) #define STR_SET_LEN(str, n) do { \ @@ -75,86 +50,18 @@ mrb_str_set_len(mrb_state *mrb, mrb_value str, long len) RSTRING(str)->len--;\ } while (0) -#ifdef INCLUDE_ENCODING -static mrb_value mrb_enc_cr_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len, - int ptr_encindex, int ptr_cr, int *ptr_cr_ret); -#endif //INCLUDE_ENCODING - -#ifdef INCLUDE_ENCODING -mrb_value -mrb_usascii_str_new_cstr(mrb_state *mrb, const char *ptr) -{ - mrb_value str = mrb_str_new_cstr(mrb, ptr);//mrb_str_new2(ptr); - ENCODING_CODERANGE_SET(mrb, str, mrb_usascii_encindex(), ENC_CODERANGE_7BIT); - return str; -} - -mrb_value -mrb_external_str_new_with_enc(mrb_state *mrb, const char *ptr, long len, mrb_encoding *eenc) -{ - mrb_value str; - - str = mrb_str_new(mrb, ptr, len); - if (eenc == mrb_usascii_encoding(mrb) && - mrb_enc_str_coderange(mrb, str) != ENC_CODERANGE_7BIT) { - mrb_enc_associate(mrb, str, mrb_ascii8bit_encoding(mrb)); - return str; - } - mrb_enc_associate(mrb, str, eenc); - return mrb_str_conv_enc(mrb, str, eenc, mrb_default_internal_encoding(mrb)); -} - -mrb_value -mrb_locale_str_new(mrb_state *mrb, const char *ptr, long len) -{ - return mrb_external_str_new_with_enc(mrb, ptr, len, mrb_locale_encoding(mrb)); -} - -mrb_value -mrb_str_buf_cat_ascii(mrb_state *mrb, mrb_value str, const char *ptr) -{ - /* ptr must reference NUL terminated ASCII string. */ - int encindex = ENCODING_GET(mrb, str); - mrb_encoding *enc = mrb_enc_from_index(mrb, encindex); - if (mrb_enc_asciicompat(mrb, enc)) { - return mrb_enc_cr_str_buf_cat(mrb, str, ptr, strlen(ptr), - encindex, ENC_CODERANGE_7BIT, 0); - } - else { - //char *buf = ALLOCA_N(char, mrb_enc_mbmaxlen(enc)); - char *buf = mrb_malloc(mrb, mrb_enc_mbmaxlen(enc)); - while (*ptr) { - unsigned int c = (unsigned char)*ptr; - int len = mrb_enc_codelen(mrb, c, enc); - mrb_enc_mbcput(c, buf, enc); - mrb_enc_cr_str_buf_cat(mrb, str, buf, len, - encindex, ENC_CODERANGE_VALID, 0); - ptr++; - } - return str; - } -} - -mrb_value -mrb_filesystem_str_new_cstr(mrb_state *mrb, const char *ptr) -{ - return mrb_external_str_new_with_enc(mrb, ptr, strlen(ptr), mrb_filesystem_encoding(mrb)); -} -#endif //INCLUDE_ENCODING - mrb_value mrb_str_resize(mrb_state *mrb, mrb_value str, size_t len) { size_t slen; - mrb_str_modify(mrb, str); slen = RSTRING_LEN(str); if (len != slen) { if (slen < len || slen -len > 1024) { RSTRING_PTR(str) = mrb_realloc(mrb, RSTRING_PTR(str), len+1); } if (!MRB_STR_NOCAPA_P(str)) { - RSTRING(str)->aux.capa = len; + RSTRING_CAPA(str) = len; } RSTRING(str)->len = len; RSTRING(str)->buf[len] = '\0'; /* sentinel */ @@ -162,16 +69,6 @@ mrb_str_resize(mrb_state *mrb, mrb_value str, size_t len) return str; } -#ifdef INCLUDE_ENCODING -mrb_value -mrb_usascii_str_new(mrb_state *mrb, const char *ptr, long len) -{ - mrb_value str = mrb_str_new(mrb, ptr, len); - ENCODING_CODERANGE_SET(mrb, str, mrb_usascii_encindex(), ENC_CODERANGE_7BIT); - return str; -} -#endif //INCLUDE_ENCODING - static inline void str_mod_check(mrb_state *mrb, mrb_value str, char *p, mrb_int len) { @@ -182,36 +79,6 @@ str_mod_check(mrb_state *mrb, mrb_value str, char *p, mrb_int len) } } -#ifdef INCLUDE_ENCODING -static inline int -single_byte_optimizable(mrb_state *mrb, mrb_value str) -{ - mrb_encoding *enc; - /* Conservative. It may be ENC_CODERANGE_UNKNOWN. */ - if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) - return 1; - - enc = STR_ENC_GET(mrb, str); - if (mrb_enc_mbmaxlen(enc) == 1) - return 1; - - /* Conservative. Possibly single byte. - * "\xa1" in Shift_JIS for example. */ - return 0; -} - -static inline const char * -search_nonascii(const char *p, const char *e) -{ - while (p < e) { - if (!ISASCII(*p)) - return p; - p++; - } - return NULL; -} -#endif //INCLUDE_ENCODING - static inline void str_modifiable(mrb_value str) { @@ -226,71 +93,6 @@ str_independent(mrb_value str) return 0; } -#ifdef INCLUDE_ENCODING -static inline void -str_enc_copy(mrb_state *mrb, mrb_value str1, mrb_value str2) -{ - mrb_enc_set_index(mrb, str1, ENCODING_GET(mrb, str2)); -} - -static inline long -enc_strlen(const char *p, const char *e, mrb_encoding *enc, int cr) -{ - long c; - const char *q; - - if (mrb_enc_mbmaxlen(enc) == mrb_enc_mbminlen(enc)) { - return (e - p + mrb_enc_mbminlen(enc) - 1) / mrb_enc_mbminlen(enc); - } - else if (mrb_enc_asciicompat(mrb, enc)) { - c = 0; - if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID) { - while (p < e) { - if (ISASCII(*p)) { - q = search_nonascii(p, e); - if (!q) - return c + (e - p); - c += q - p; - p = q; - } - p += mrb_enc_fast_mbclen(p, e, enc); - c++; - } - } - else { - while (p < e) { - if (ISASCII(*p)) { - q = search_nonascii(p, e); - if (!q) - return c + (e - p); - c += q - p; - p = q; - } - p += mrb_enc_mbclen(p, e, enc); - c++; - } - } - return c; - } - - for (c=0; p<e; c++) { - p += mrb_enc_mbclen(p, e, enc); - } - return c; -} - -size_t -mrb_str_capacity(mrb_value str) -{ - if (MRB_STR_NOCAPA_P(str)) { - return RSTRING_LEN(str); - } - else { - return RSTRING_CAPA(str); - } -} -#endif //INCLUDE_ENCODING - #define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class)) static inline mrb_value @@ -299,204 +101,28 @@ str_alloc(mrb_state *mrb) struct RString* s; s = mrb_obj_alloc_string(mrb); - //NEWOBJ(str, struct RString); - //OBJSETUP(str, klass, T_STRING); s->buf = 0; s->len = 0; - s->aux.capa = 0; + s->capa = 0; return mrb_obj_value(s); } -#ifdef INCLUDE_ENCODING -long -mrb_enc_strlen(const char *p, const char *e, mrb_encoding *enc) -{ - return enc_strlen(p, e, enc, ENC_CODERANGE_UNKNOWN); -} -#endif //INCLUDE_ENCODING - -static void -str_make_independent(mrb_state *mrb, mrb_value str) -{ - char *ptr; - long len = RSTRING_LEN(str); - - ptr = mrb_malloc(mrb, sizeof(char)*(len+1)); - if (RSTRING_PTR(str)) { - memcpy(ptr, RSTRING_PTR(str), len); - } - ptr[len] = 0; - RSTRING(str)->buf = ptr; - RSTRING(str)->len = len; - RSTRING(str)->aux.capa = len; - MRB_STR_UNSET_NOCAPA(str); -} - -#ifdef INCLUDE_ENCODING -static int -coderange_scan(const char *p, long len, mrb_encoding *enc) -{ - const char *e = p + len; - - if (mrb_enc_to_index(enc) == 0) { - /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */ - p = search_nonascii(p, e); - return p ? ENC_CODERANGE_VALID : ENC_CODERANGE_7BIT; - } - - if (mrb_enc_asciicompat(mrb, enc)) { - p = search_nonascii(p, e); - if (!p) { - return ENC_CODERANGE_7BIT; - } - while (p < e) { - int ret = mrb_enc_precise_mbclen(p, e, enc); - if (!MBCLEN_CHARFOUND_P(ret)) { - return ENC_CODERANGE_BROKEN; - } - p += MBCLEN_CHARFOUND_LEN(ret); - if (p < e) { - p = search_nonascii(p, e); - if (!p) { - return ENC_CODERANGE_VALID; - } - } - } - if (e < p) { - return ENC_CODERANGE_BROKEN; - } - return ENC_CODERANGE_VALID; - } - - while (p < e) { - int ret = mrb_enc_precise_mbclen(p, e, enc); - - if (!MBCLEN_CHARFOUND_P(ret)) { - return ENC_CODERANGE_BROKEN; - } - p += MBCLEN_CHARFOUND_LEN(ret); - } - if (e < p) { - return ENC_CODERANGE_BROKEN; - } - return ENC_CODERANGE_VALID; -} - -int -mrb_enc_str_coderange(mrb_state *mrb, mrb_value str) -{ - int cr = ENC_CODERANGE(str); - - if (cr == ENC_CODERANGE_UNKNOWN) { - mrb_encoding *enc = STR_ENC_GET(mrb, str); - cr = coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc); - ENC_CODERANGE_SET(str, cr); - } - return cr; -} - -char* -mrb_enc_nth(mrb_state *mrb, const char *p, const char *e, long nth, mrb_encoding *enc) -{ - if (mrb_enc_mbmaxlen(enc) == 1) { - p += nth; - } - else if (mrb_enc_mbmaxlen(enc) == mrb_enc_mbminlen(enc)) { - p += nth * mrb_enc_mbmaxlen(enc); - } - else if (mrb_enc_asciicompat(mrb, enc)) { - const char *p2, *e2; - int n; - - while (p < e && 0 < nth) { - e2 = p + nth; - if (e < e2) - return (char*)e; - if (ISASCII(*p)) { - p2 = search_nonascii(p, e2); - if (!p2) - return (char*)e2; - nth -= p2 - p; - p = p2; - } - n = mrb_enc_mbclen(p, e, enc); - p += n; - nth--; - } - if (nth != 0) - return (char*)e; - return (char*)p; - } - else { - while (p<e && nth--) { - p += mrb_enc_mbclen(p, e, enc); - } - } - if (p > e) p = e; - return (char*)p; -} - -static char* -str_nth(mrb_state *mrb, const char *p, const char *e, long nth, mrb_encoding *enc, int singlebyte) -{ - if (singlebyte) - p += nth; - else { - p = mrb_enc_nth(mrb, p, e, nth, enc); - } - if (!p) return 0; - if (p > e) p = e; - return (char*)p; -} - /* char offset to byte offset */ -static long -str_offset(mrb_state *mrb, const char *p, const char *e, long nth, mrb_encoding *enc, int singlebyte) -{ - const char *pp = str_nth(mrb, p, e, nth, enc, singlebyte); - if (!pp) return e - p; - return pp - p; -} - long mrb_str_offset(mrb_state *mrb, mrb_value str, long pos) { - return str_offset(mrb, RSTRING_PTR(str), RSTRING_END(str), pos, - STR_ENC_GET(mrb, str), single_byte_optimizable(mrb, str)); + return pos; } -static void -mrb_enc_cr_str_exact_copy(mrb_state *mrb, mrb_value dest, mrb_value src) -{ - str_enc_copy(mrb, dest, src); - ENC_CODERANGE_SET(dest, ENC_CODERANGE(src)); -} -#else -#define mrb_enc_cr_str_exact_copy(mrb, dest, src) -#endif //INCLUDE_ENCODING - -mrb_value -str_new4(mrb_state *mrb, mrb_value str) +static mrb_value +str_dup(mrb_state *mrb, mrb_value str) { - mrb_value str2; - - str2 = mrb_obj_value(mrb_obj_alloc_string(mrb)); - RSTRING(str2)->len = RSTRING_LEN(str); - RSTRING(str2)->buf = RSTRING_PTR(str); + /* should return shared string */ + struct RString *s = mrb_str_ptr(str); - if (MRB_STR_SHARED_P(str)) { - struct RString *shared = RSTRING_SHARED(str); - FL_SET(str2, MRB_STR_SHARED); - RSTRING_SHARED(str2) = shared; - } - else { - FL_SET(str, MRB_STR_SHARED); - RSTRING_SHARED(str) = mrb_str_ptr(str2); - } - mrb_enc_cr_str_exact_copy(mrb, str2, str); - return str2; + return mrb_str_new(mrb, s->buf, s->len); } static mrb_value @@ -506,11 +132,6 @@ str_new(mrb_state *mrb, enum mrb_vtype ttype, const char *p, size_t len) //str = str_alloc(mrb); str = mrb_str_buf_new(mrb, len); -#ifdef INCLUDE_ENCODING - if (len == 0) { - ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT); - } -#endif //INCLUDE_ENCODING if (p) { memcpy(RSTRING_PTR(str), p, len); } @@ -519,19 +140,16 @@ str_new(mrb_state *mrb, enum mrb_vtype ttype, const char *p, size_t len) return str; } -mrb_value +static mrb_value mrb_str_new_with_class(mrb_state *mrb, mrb_value obj, const char *ptr, long len) { return str_new(mrb, mrb_type(obj), ptr, len); } -#define mrb_str_new5 mrb_str_new_with_class - static mrb_value str_new_empty(mrb_state *mrb, mrb_value str) { - mrb_value v = mrb_str_new5(mrb, str, 0, 0); - return v; + return mrb_str_new_with_class(mrb, str, 0, 0); } mrb_value @@ -545,7 +163,7 @@ mrb_str_buf_new(mrb_state *mrb, size_t capa) capa = STR_BUF_MIN_SIZE; } s->len = 0; - s->aux.capa = capa; + s->capa = capa; s->buf = mrb_malloc(mrb, capa+1); s->buf[0] = '\0'; @@ -560,7 +178,6 @@ str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len) if (ptr >= RSTRING_PTR(str) && ptr <= RSTRING_END(str)) { off = ptr - RSTRING_PTR(str); } - mrb_str_modify(mrb, str); if (len == 0) return mrb_fixnum_value(0); capa = RSTRING_CAPA(str); if (RSTRING_LEN(str) >= LONG_MAX - len) { @@ -615,7 +232,7 @@ mrb_str_new(mrb_state *mrb, const char *p, size_t len) memcpy(s->buf, p, len); } s->len = len; - s->aux.capa = len; + s->capa = len; s->buf[len] ='\0'; return mrb_obj_value(s); } @@ -627,23 +244,9 @@ mrb_str_new2(mrb_state *mrb, const char *ptr) if (!ptr) { mrb_raise(mrb, E_ARGUMENT_ERROR, "NULL pointer given"); } -#ifdef INCLUDE_ENCODING - return mrb_usascii_str_new2(mrb, ptr); -#else return mrb_str_new(mrb, ptr, strlen(ptr)); -#endif //INCLUDE_ENCODING } -#ifdef INCLUDE_ENCODING -mrb_value -mrb_enc_str_new(mrb_state *mrb, const char *ptr, long len, mrb_encoding *enc) -{ - mrb_value str = mrb_str_new(mrb, ptr, len); - mrb_enc_associate(mrb, str, enc); - return str; -} -#endif //INCLUDE_ENCODING - /* * call-seq: (Caution! NULL string) * String.new(str="") => new_str @@ -662,7 +265,7 @@ mrb_str_new_cstr(mrb_state *mrb, const char *p) memcpy(s->buf, p, len); s->buf[len] = 0; s->len = len; - s->aux.capa = len; + s->capa = len; return mrb_obj_value(s); } @@ -715,8 +318,8 @@ mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other) s2 = mrb_str_ptr(other); len = s1->len + s2->len; - if (s1->aux.capa < len) { - s1->aux.capa = len; + if (s1->capa < len) { + s1->capa = len; s1->buf = mrb_realloc(mrb, s1->buf, len+1); } memcpy(s1->buf+s1->len, s2->buf, s2->len); @@ -757,30 +360,7 @@ mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b) static mrb_value mrb_str_plus_m(mrb_state *mrb, mrb_value self) { - mrb_value str3; - mrb_value str2; -#ifdef INCLUDE_ENCODING - mrb_encoding *enc; -#endif //INCLUDE_ENCODING - - //mrb_get_args(mrb, "s", &p, &len); - mrb_get_args(mrb, "o", &str2); - - mrb_string_value(mrb, &str2); -#ifdef INCLUDE_ENCODING - enc = mrb_enc_check(mrb, self, str2); -#endif //INCLUDE_ENCODING - str3 = mrb_str_new(mrb, 0, RSTRING_LEN(self)+RSTRING_LEN(str2)); - memcpy(RSTRING_PTR(str3), RSTRING_PTR(self), RSTRING_LEN(self)); - memcpy(RSTRING_PTR(str3) + RSTRING_LEN(self), - RSTRING_PTR(str2), RSTRING_LEN(str2)); - RSTRING_PTR(str3)[RSTRING_LEN(str3)] = '\0'; -#ifdef INCLUDE_ENCODING - ENCODING_CODERANGE_SET(mrb, str3, mrb_enc_to_index(enc), - ENC_CODERANGE_AND(ENC_CODERANGE(self), ENC_CODERANGE(str2))); -#endif //INCLUDE_ENCODING - - return str3; + return mrb_nil_value(); } /* @@ -792,9 +372,7 @@ mrb_str_plus_m(mrb_state *mrb, mrb_value self) static mrb_value mrb_str_bytesize(mrb_state *mrb, mrb_value self) { - struct RString *s = mrb_str_ptr(self); - - return mrb_fixnum_value(s->len); + return mrb_nil_value(); } /* 15.2.10.5.26 */ @@ -808,26 +386,10 @@ mrb_str_bytesize(mrb_state *mrb, mrb_value self) mrb_value mrb_str_size(mrb_state *mrb, mrb_value self) { -#ifdef INCLUDE_ENCODING - long len; - - len = str_strlen(mrb, self, STR_ENC_GET(mrb, self)); - return mrb_fixnum_value(len); -#else - return mrb_str_bytesize(mrb, self); -#endif //INCLUDE_ENCODING + return mrb_fixnum_value(RSTRING_LEN(self)); } -void -mrb_str_modify(mrb_state *mrb, mrb_value str) -{ - if (!str_independent(str)) - str_make_independent(mrb, str); -} - - /* 15.2.10.5.1 */ - /* * call-seq: * str * integer => new_str @@ -840,35 +402,7 @@ mrb_str_modify(mrb_state *mrb, mrb_value str) static mrb_value mrb_str_times(mrb_state *mrb, mrb_value self) { - mrb_value str2; - mrb_int n,len,times; - char *ptr2; - - mrb_get_args(mrb, "i", ×); - - if (times < 0) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument"); - } - if (times && INT32_MAX/times < RSTRING_LEN(self)) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big"); - } - - str2 = mrb_str_new5(mrb, self, 0, len = RSTRING_LEN(self)*times); - ptr2 = RSTRING_PTR(str2); - if (len > 0) { - n = RSTRING_LEN(self); - memcpy(ptr2, RSTRING_PTR(self), n); - while (n <= len/2) { - memcpy(ptr2 + n, ptr2, n); - n *= 2; - } - memcpy(ptr2 + n, ptr2, len-n); - } - ptr2[RSTRING_LEN(str2)] = '\0'; - - mrb_enc_cr_str_copy_for_substr(mrb, str2, self); - - return str2; + return mrb_nil_value(); } /* -------------------------------------------------------------- */ @@ -930,73 +464,8 @@ mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2) static mrb_value mrb_str_cmp_m(mrb_state *mrb, mrb_value str1) { - mrb_value str2; - mrb_int result; - - mrb_get_args(mrb, "o", &str2); - if (mrb_type(str2) != MRB_TT_STRING) { - if (!mrb_respond_to(mrb, str2, mrb_intern(mrb, "to_s"))) { - return mrb_nil_value(); - } - else if (!mrb_respond_to(mrb, str2, mrb_intern(mrb, "<=>"))) { - return mrb_nil_value(); - } - else - { - mrb_value tmp = mrb_funcall(mrb, str2, "<=>", 1, str1); - - if (mrb_nil_p(tmp)) return mrb_nil_value(); - if (!mrb_fixnum(tmp)) { - return mrb_funcall(mrb, mrb_fixnum_value(0), "-", 1, tmp); - } - result = -mrb_fixnum(tmp); - } - } - else { - result = mrb_str_cmp(mrb, str1, str2); - } - return mrb_fixnum_value(result); -} - -#ifdef INCLUDE_ENCODING -int -mrb_str_comparable(mrb_state *mrb, mrb_value str1, mrb_value str2) -{ - int idx1, idx2; - int rc1, rc2; - - if (RSTRING_LEN(str1) == 0) return TRUE; - if (RSTRING_LEN(str2) == 0) return TRUE; - idx1 = ENCODING_GET(mrb, str1); - idx2 = ENCODING_GET(mrb, str2); - if (idx1 == idx2) return TRUE; - rc1 = mrb_enc_str_coderange(mrb, str1); - rc2 = mrb_enc_str_coderange(mrb, str2); - if (rc1 == ENC_CODERANGE_7BIT) { - if (rc2 == ENC_CODERANGE_7BIT) return TRUE; - if (mrb_enc_asciicompat(mrb, mrb_enc_from_index(mrb, idx2))) - return TRUE; - } - if (rc2 == ENC_CODERANGE_7BIT) { - if (mrb_enc_asciicompat(mrb, mrb_enc_from_index(mrb, idx1))) - return TRUE; - } - return FALSE; -} - -int -mrb_str_hash_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2) -{ - long len; - - if (!mrb_str_comparable(mrb, str1, str2)) return 1; - if (RSTRING_LEN(str1) == (len = RSTRING_LEN(str2)) && - memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), len) == 0) { - return 0; - } - return 1; + return mrb_nil_value(); } -#endif //INCLUDE_ENCODING static int str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2) @@ -1004,9 +473,6 @@ str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2) const long len = RSTRING_LEN(str1); if (len != RSTRING_LEN(str2)) return FALSE; -#ifdef INCLUDE_ENCODING - if (!mrb_str_comparable(mrb, str1, str2)) return FALSE; -#endif //INCLUDE_ENCODING if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), len) == 0) return TRUE; return FALSE; @@ -1100,202 +566,9 @@ mrb_string_value_ptr(mrb_state *mrb, mrb_value ptr) static mrb_value mrb_str_match(mrb_state *mrb, mrb_value self/* x */) { - mrb_value y; - - mrb_get_args(mrb, "o", &y); - switch (mrb_type(y)) { - case MRB_TT_STRING: - mrb_raise(mrb, E_TYPE_ERROR, "type mismatch: String given"); - case MRB_TT_REGEX: -#ifdef INCLUDE_REGEXP - return mrb_reg_match_str(mrb, y, self); -#else - mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); -#endif //INCLUDE_REGEXP - default: - if (mrb_respond_to(mrb, y, mrb_intern(mrb, "=~"))) { - return mrb_funcall(mrb, y, "=~", 1, self); - } - else { - return mrb_nil_value(); - } - } -} -/* ---------------------------------- */ -mrb_value -mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, int len) -{ -#ifdef INCLUDE_ENCODING - mrb_encoding *enc = STR_ENC_GET(mrb, str); -#endif //INCLUDE_ENCODING - mrb_value str2; -#ifdef INCLUDE_ENCODING - char *p, *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str); -#else - char *p, *s = RSTRING_PTR(str); -#endif //INCLUDE_ENCODING - - if (len < 0) return mrb_nil_value(); - if (!RSTRING_LEN(str)) { - len = 0; - } -#ifdef INCLUDE_ENCODING - if (single_byte_optimizable(mrb, str)) { -#endif //INCLUDE_ENCODING - if (beg > RSTRING_LEN(str)) return mrb_nil_value(); - if (beg < 0) { - beg += RSTRING_LEN(str); - if (beg < 0) return mrb_nil_value(); - } - if (beg + len > RSTRING_LEN(str)) - len = RSTRING_LEN(str) - beg; - if (len <= 0) { - len = 0; - p = 0; - } - else - p = s + beg; -#ifdef INCLUDE_ENCODING - goto sub; - } - if (beg < 0) { - if (len > -beg) len = -beg; - if (-beg * mrb_enc_mbmaxlen(enc) < RSTRING_LEN(str) / 8) { - beg = -beg; - while (beg-- > len && (e = mrb_enc_prev_char(s, e, e, enc)) != 0); - p = e; - if (!p) return mrb_nil_value(); - while (len-- > 0 && (p = mrb_enc_prev_char(s, p, e, enc)) != 0); - if (!p) return mrb_nil_value(); - len = e - p; - goto sub; - } - else { - beg += str_strlen(mrb, str, enc); - if (beg < 0) return mrb_nil_value(); - } - } - else if (beg > 0 && beg > str_strlen(mrb, str, enc)) { - return mrb_nil_value(); - } - if (len == 0) { - p = 0; - } - else if (mrb_enc_mbmaxlen(enc) == mrb_enc_mbminlen(enc)) { - int char_sz = mrb_enc_mbmaxlen(enc); - - p = s + beg * char_sz; - if (p > e) { - p = e; - len = 0; - } - else if (len * char_sz > e - p) - len = e - p; - else - len *= char_sz; - } - else if ((p = str_nth(mrb, s, e, beg, enc, 0)) == e) { - len = 0; - } - else { - len = str_offset(mrb, p, e, len, enc, 0); - } -sub: -#endif //INCLUDE_ENCODING - if (len > STR_BUF_MIN_SIZE && beg + len == RSTRING_LEN(str)) { -#ifdef INCLUDE_ENCODING - str2 = mrb_str_new4(mrb, str); - str2 = str_new3(mrb, mrb_obj_class(mrb, str2), str2); -#else - str2 = mrb_str_new(mrb, s, RSTRING_LEN(str)); -#endif //INCLUDE_ENCODING - RSTRING(str2)->buf += RSTRING(str2)->len - len; - RSTRING(str2)->len = len; - } - else { - str2 = mrb_str_new5(mrb, str, p, len); - mrb_enc_cr_str_copy_for_substr(mrb, str2, str); - } - - return str2; -} - -#ifdef INCLUDE_REGEXP -static mrb_value -mrb_str_subpat(mrb_state *mrb, mrb_value str, mrb_value re, mrb_int backref) -{ - if (mrb_reg_search(mrb, re, str, 0, 0) >= 0) { - mrb_value match = mrb_backref_get(mrb); - int nth = mrb_reg_backref_number(mrb, match, mrb_fixnum_value(backref)); - return mrb_reg_nth_match(mrb, nth, mrb_backref_get(mrb)); - } return mrb_nil_value(); } -#endif //INCLUDE_REGEXP -/* --- 1-8-7parse.c --> */ - -#ifdef INCLUDE_ENCODING -long -mrb_enc_strlen_cr(mrb_state *mrb, const char *p, const char *e, mrb_encoding *enc, int *cr) -{ - long c; - const char *q; - int ret; - - *cr = 0; - if (mrb_enc_mbmaxlen(enc) == mrb_enc_mbminlen(enc)) { - return (e - p + mrb_enc_mbminlen(enc) - 1) / mrb_enc_mbminlen(enc); - } - else if (mrb_enc_asciicompat(mrb, enc)) { - c = 0; - while (p < e) { - if (ISASCII(*p)) { - q = search_nonascii(p, e); - if (!q) { - if (!*cr) *cr = ENC_CODERANGE_7BIT; - return c + (e - p); - } - c += q - p; - p = q; - } - ret = mrb_enc_precise_mbclen(p, e, enc); - if (MBCLEN_CHARFOUND_P(ret)) { - *cr |= ENC_CODERANGE_VALID; - p += MBCLEN_CHARFOUND_LEN(ret); - } - else { - *cr = ENC_CODERANGE_BROKEN; - p++; - } - c++; - } - if (!*cr) *cr = ENC_CODERANGE_7BIT; - return c; - } - - for (c=0; p<e; c++) { - ret = mrb_enc_precise_mbclen(p, e, enc); - if (MBCLEN_CHARFOUND_P(ret)) { - *cr |= ENC_CODERANGE_VALID; - p += MBCLEN_CHARFOUND_LEN(ret); - } - else { - *cr = ENC_CODERANGE_BROKEN; - if (p + mrb_enc_mbminlen(enc) <= e) - p += mrb_enc_mbminlen(enc); - else - p = e; - } - } - if (!*cr) *cr = ENC_CODERANGE_7BIT; - return c; -} -#endif //INCLUDE_ENCODING - -/* --- 1-8-7parse.c --< */ - -#ifndef INCLUDE_ENCODING static inline long mrb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long n) { @@ -1308,7 +581,7 @@ mrb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long qstable[i] = m + 1; for (; x < xe; ++x) qstable[*x] = xe - x; - /* Searching */ + /* Searching */ for (; y + m <= ys + n; y += *(qstable + y[m])) { if (*xs == *y && memcmp(xs, y, m) == 0) return y - ys; @@ -1316,7 +589,7 @@ mrb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long return -1; } -int +static int mrb_memsearch(const void *x0, int m, const void *y0, int n) { const unsigned char *x = x0, *y = y0; @@ -1328,7 +601,7 @@ mrb_memsearch(const void *x0, int m, const void *y0, int n) else if (m < 1) { return 0; } - else if (m == 1) { + else if (m == 1) { const unsigned char *ys = y, *ye = ys + n; for (; y < ye; ++y) { if (*x == *y) @@ -1338,60 +611,22 @@ mrb_memsearch(const void *x0, int m, const void *y0, int n) } return mrb_memsearch_qs(x0, m, y0, n); } -#endif //INCLUDE_ENCODING - -/* --- 1-8-7parse.c --< */ -#ifdef INCLUDE_ENCODING -static long -str_strlen(mrb_state *mrb, mrb_value str, mrb_encoding *enc) -{ - const char *p, *e; - long n; - int cr; - - if (single_byte_optimizable(mrb, str)) return RSTRING_LEN(str); - if (!enc) enc = STR_ENC_GET(mrb, str); - p = RSTRING_PTR(str); - e = RSTRING_END(str); - cr = ENC_CODERANGE(str); - n = mrb_enc_strlen_cr(mrb, p, e, enc, &cr); - if (cr) { - ENC_CODERANGE_SET(str, cr); - } - return n; -} -#endif //INCLUDE_ENCODING static mrb_int mrb_str_index(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int offset) { mrb_int pos; - char *s, *sptr, *e; + char *s, *sptr; int len, slen; -#ifdef INCLUDE_ENCODING - mrb_encoding *enc; - - enc = mrb_enc_check(mrb, str, sub); - if (is_broken_string(mrb, sub)) { - return -1; - } - len = str_strlen(mrb, str, enc); - slen = str_strlen(mrb, sub, enc); -#else len = RSTRING_LEN(str); slen = RSTRING_LEN(sub); -#endif //INCLUDE_ENCODING if (offset < 0) { offset += len; if (offset < 0) return -1; } if (len - offset < slen) return -1; s = RSTRING_PTR(str); - e = s + RSTRING_LEN(str); if (offset) { -#ifdef INCLUDE_ENCODING - offset = str_offset(mrb, s, RSTRING_END(str), offset, enc, single_byte_optimizable(mrb, str)); -#endif //INCLUDE_ENCODING s += offset; } if (slen == 0) return offset; @@ -1399,21 +634,9 @@ mrb_str_index(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int offset) sptr = RSTRING_PTR(sub); slen = RSTRING_LEN(sub); len = RSTRING_LEN(str) - offset; -#ifdef INCLUDE_ENCODING - for (;;) { - char *t; - pos = mrb_memsearch(mrb, sptr, slen, s, len, enc); - if (pos < 0) return pos; - t = mrb_enc_right_char_head(s, s+pos, e, enc); - if (t == s + pos) break; - if ((len -= t - s) <= 0) return -1; - offset += t - s; - s = t; - } -#else pos = mrb_memsearch(sptr, slen, s, len); + if (pos < 0) return pos; -#endif //INCLUDE_ENCODING return pos + offset; } @@ -1430,7 +653,7 @@ mrb_str_dup(mrb_state *mrb, mrb_value str) dup->buf[s->len] = 0; } dup->len = s->len; - dup->aux.capa = s->len; + dup->capa = s->len; return mrb_obj_value(dup); } @@ -1444,7 +667,7 @@ mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx) idx = mrb_fixnum(indx); num_index: - str = mrb_str_substr(mrb, str, idx, 1); + str = mrb_str_subseq(mrb, str, idx, 1); if (!mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value(); return str; @@ -1467,18 +690,14 @@ num_index: mrb_int beg, len; mrb_value tmp; -#ifdef INCLUDE_ENCODING - len = str_strlen(mrb, str, STR_ENC_GET(mrb, str)); -#else len = RSTRING_LEN(str); -#endif //INCLUDE_ENCODING switch (mrb_range_beg_len(mrb, indx, &beg, &len, len, 0)) { case 0/*FLASE*/: break; case 2/*OTHER*/: return mrb_nil_value(); default: - tmp = mrb_str_substr(mrb, str, beg, len); + tmp = mrb_str_subseq(mrb, str, beg, len); return tmp; } } @@ -1539,12 +758,12 @@ num_index: static mrb_value mrb_str_aref_m(mrb_state *mrb, mrb_value str) { + mrb_value a1, a2; int argc; - mrb_value *argv; - mrb_get_args(mrb, "*", &argv, &argc); + argc = mrb_get_args(mrb, "o|o", &a1, &a2); if (argc == 2) { - if (mrb_type(argv[0]) == MRB_TT_REGEX) { + if (mrb_type(a1) == MRB_TT_REGEX) { #ifdef INCLUDE_REGEXP return mrb_str_subpat(mrb, str, argv[0], mrb_fixnum(argv[1])); #else @@ -1552,38 +771,14 @@ mrb_str_aref_m(mrb_state *mrb, mrb_value str) return mrb_nil_value(); #endif //INCLUDE_REGEXP } - return mrb_str_substr(mrb, str, mrb_fixnum(argv[0]), mrb_fixnum(argv[1])); + return mrb_str_substr(mrb, str, mrb_fixnum(a1), mrb_fixnum(a2)); } if (argc != 1) { mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 1)", argc); } - return mrb_str_aref(mrb, str, argv[0]); + return mrb_str_aref(mrb, str, a1); } -#ifdef INCLUDE_ENCODING -/* As mrb_str_modify(), but don't clear coderange */ -static void -str_modify_keep_cr(mrb_state *mrb, mrb_value str) -{ - if (!str_independent(str)) - str_make_independent(mrb, str); - if (ENC_CODERANGE(str) == ENC_CODERANGE_BROKEN) - /* Force re-scan later */ - ENC_CODERANGE_CLEAR(str); -} - -static void -mrb_str_check_dummy_enc(mrb_state *mrb, mrb_encoding *enc) -{ - if (mrb_enc_dummy_p(enc)) { - mrb_raise(mrb, E_ENCODING_ERROR, "incompatible encoding with this operation: %s", - mrb_enc_name(enc)); - } -} -#else -#define str_modify_keep_cr(mrb, str) mrb_str_modify((mrb), (str)) -#endif //INCLUDE_ENCODING - /* 15.2.10.5.8 */ /* * call-seq: @@ -1600,51 +795,6 @@ mrb_str_check_dummy_enc(mrb_state *mrb, mrb_encoding *enc) static mrb_value mrb_str_capitalize_bang(mrb_state *mrb, mrb_value str) { -#ifdef INCLUDE_ENCODING - mrb_encoding *enc; -#endif //INCLUDE_ENCODING - char *s, *send; - int modify = 0; -#ifdef INCLUDE_ENCODING - unsigned int c; - int n; -#endif //INCLUDE_ENCODING - - str_modify_keep_cr(mrb, str); -#ifdef INCLUDE_ENCODING - enc = STR_ENC_GET(mrb, str); - mrb_str_check_dummy_enc(mrb, enc); -#endif //INCLUDE_ENCODING - if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return mrb_nil_value(); - s = RSTRING_PTR(str); send = RSTRING_END(str); -#ifdef INCLUDE_ENCODING - c = mrb_enc_codepoint_len(mrb, s, send, &n, enc); - if (mrb_enc_islower(c, enc)) { - mrb_enc_mbcput(mrb_enc_toupper(c, enc), s, enc); - modify = 1; - } - s += n; - while (s < send) { - c = mrb_enc_codepoint_len(mrb, s, send, &n, enc); - if (mrb_enc_isupper(c, enc)) { - mrb_enc_mbcput(mrb_enc_tolower(c, enc), s, enc); - modify = 1; - } - s += n; - } -#else - if (ISLOWER(*s)) { - *s = toupper(*s); - modify = 1; - } - while (++s < send) { - if (ISUPPER(*s)) { - *s = tolower(*s); - modify = 1; - } - } -#endif //INCLUDE_ENCODING - if (modify) return str; return mrb_nil_value(); } @@ -1681,113 +831,6 @@ mrb_str_capitalize(mrb_state *mrb, mrb_value self) static mrb_value mrb_str_chomp_bang(mrb_state *mrb, mrb_value str) { - mrb_value *argv; - int argc; -#ifdef INCLUDE_ENCODING - mrb_encoding *enc; -#endif //INCLUDE_ENCODING - mrb_value rs; - mrb_int newline; - char *p, *pp, *e; - long len, rslen; - - str_modify_keep_cr(mrb, str); - len = RSTRING_LEN(str); - if (len == 0) return mrb_nil_value(); - p = RSTRING_PTR(str); - e = p + len; - //if (mrb_scan_args(argc, argv, "01", &rs) == 0) { - mrb_get_args(mrb, "*", &argv, &argc); - if (argc == 0) { - rs = mrb_str_new2(mrb, "\n"); -smart_chomp: -#ifdef INCLUDE_ENCODING - enc = mrb_enc_get(mrb, str); - if (mrb_enc_mbminlen(enc) > 1) { - pp = mrb_enc_left_char_head(p, e-mrb_enc_mbminlen(enc), e, enc); - if (mrb_enc_is_newline(pp, e, enc)) { - e = pp; - } - pp = e - mrb_enc_mbminlen(enc); - if (pp >= p) { - pp = mrb_enc_left_char_head(p, pp, e, enc); - if (mrb_enc_ascget(mrb, pp, e, 0, enc) == '\r') { - e = pp; - } - } - if (e == RSTRING_END(str)) { - return mrb_nil_value(); - } - len = e - RSTRING_PTR(str); - STR_SET_LEN(str, len); - } - else { -#endif //INCLUDE_ENCODING - if (RSTRING_PTR(str)[len-1] == '\n') { - STR_DEC_LEN(str); - if (RSTRING_LEN(str) > 0 && - RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\r') { - STR_DEC_LEN(str); - } - } - else if (RSTRING_PTR(str)[len-1] == '\r') { - STR_DEC_LEN(str); - } - else { - return mrb_nil_value(); - } -#ifdef INCLUDE_ENCODING - } -#endif //INCLUDE_ENCODING - RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0'; - return str; - } - rs = argv[0]; - if (mrb_nil_p(rs)) return mrb_nil_value(); - //StringValue(rs); - mrb_string_value(mrb, &rs); - rslen = RSTRING_LEN(rs); - if (rslen == 0) { - while (len>0 && p[len-1] == '\n') { - len--; - if (len>0 && p[len-1] == '\r') - len--; - } - if (len < RSTRING_LEN(str)) { - STR_SET_LEN(str, len); - RSTRING_PTR(str)[len] = '\0'; - return str; - } - return mrb_nil_value(); - } - if (rslen > len) return mrb_nil_value(); - newline = RSTRING_PTR(rs)[rslen-1]; - if (rslen == 1 && newline == '\n') - goto smart_chomp; - -#ifdef INCLUDE_ENCODING - enc = mrb_enc_check(mrb, str, rs); - if (is_broken_string(mrb, rs)) { - return mrb_nil_value(); - } - pp = e - rslen; -#else - pp = p + len - rslen; -#endif //INCLUDE_ENCODING - if (p[len-1] == newline && - (rslen <= 1 || - memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) { -#ifdef INCLUDE_ENCODING - if (mrb_enc_left_char_head(p, pp, e, enc) != pp) - return mrb_nil_value(); - if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) { - ENC_CODERANGE_CLEAR(str); - } -#endif //INCLUDE_ENCODING - STR_SET_LEN(str, RSTRING_LEN(str) - rslen); - RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0'; - return str; - } return mrb_nil_value(); } @@ -1820,26 +863,6 @@ mrb_str_chomp(mrb_state *mrb, mrb_value self) return str; } -#ifdef INCLUDE_ENCODING -static long -chopped_length(mrb_state *mrb, mrb_value str) -{ - mrb_encoding *enc = STR_ENC_GET(mrb, str); - const char *p, *p2, *beg, *end; - - beg = RSTRING_PTR(str); - end = beg + RSTRING_LEN(str); - if (beg > end) return 0; - p = mrb_enc_prev_char(beg, end, end, enc); - if (!p) return 0; - if (p > beg && mrb_enc_ascget(mrb, p, end, 0, enc) == '\n') { - p2 = mrb_enc_prev_char(beg, p, end, enc); - if (p2 && mrb_enc_ascget(mrb, p2, end, 0, enc) == '\r') p = p2; - } - return p - beg; -} -#endif //INCLUDE_ENCODING - /* 15.2.10.5.12 */ /* * call-seq: @@ -1852,12 +875,7 @@ chopped_length(mrb_state *mrb, mrb_value str) static mrb_value mrb_str_chop_bang(mrb_state *mrb, mrb_value str) { - str_modify_keep_cr(mrb, str); if (RSTRING_LEN(str) > 0) { -#ifdef INCLUDE_ENCODING - long len; - len = chopped_length(mrb, str); -#else size_t len; len = RSTRING_LEN(str) - 1; if (RSTRING_PTR(str)[len] == '\n') { @@ -1866,14 +884,8 @@ mrb_str_chop_bang(mrb_state *mrb, mrb_value str) len--; } } -#endif //INCLUDE_ENCODING STR_SET_LEN(str, len); RSTRING_PTR(str)[len] = '\0'; -#ifdef INCLUDE_ENCODING - if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) { - ENC_CODERANGE_CLEAR(str); - } -#endif //INCLUDE_ENCODING return str; } return mrb_nil_value(); @@ -1900,13 +912,8 @@ static mrb_value mrb_str_chop(mrb_state *mrb, mrb_value self) { mrb_value str; -#ifdef INCLUDE_ENCODING - str = mrb_str_new5(mrb, self, RSTRING_PTR(self), chopped_length(mrb, self)); - mrb_enc_cr_str_copy_for_substr(mrb, str, self); -#else str = mrb_str_dup(mrb, self); mrb_str_chop_bang(mrb, str); -#endif //INCLUDE_ENCODING return str; } @@ -1921,63 +928,6 @@ mrb_str_chop(mrb_state *mrb, mrb_value self) static mrb_value mrb_str_downcase_bang(mrb_state *mrb, mrb_value str) { -#ifdef INCLUDE_ENCODING - mrb_encoding *enc; -#endif //INCLUDE_ENCODING - char *s, *send; - int modify = 0; - - str_modify_keep_cr(mrb, str); -#ifdef INCLUDE_ENCODING - enc = STR_ENC_GET(mrb, str); - mrb_str_check_dummy_enc(mrb, enc); -#endif //INCLUDE_ENCODING - s = RSTRING_PTR(str); send = RSTRING_END(str); -#ifdef INCLUDE_ENCODING - if (single_byte_optimizable(mrb, str)) { -#endif //INCLUDE_ENCODING - while (s < send) { - unsigned int c = *(unsigned char*)s; - -#ifdef INCLUDE_ENCODING - if (mrb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') { -#else - if ('A' <= c && c <= 'Z') { -#endif //INCLUDE_ENCODING - *s = 'a' + (c - 'A'); - modify = 1; - } - s++; - } -#ifdef INCLUDE_ENCODING - } - else { - int ascompat = mrb_enc_asciicompat(mrb, enc); - - while (s < send) { - unsigned int c; - int n; - - if (ascompat && (c = *(unsigned char*)s) < 0x80) { - if (mrb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') { - *s = 'a' + (c - 'A'); - modify = 1; - } - s++; - } - else { - c = mrb_enc_codepoint_len(mrb, s, send, &n, enc); - if (mrb_enc_isupper(c, enc)) { - /* assuming toupper returns codepoint with same size */ - mrb_enc_mbcput(mrb_enc_tolower(c, enc), s, enc); - modify = 1; - } - s += n; - } - } - } -#endif //INCLUDE_ENCODING - if (modify) return str; return mrb_nil_value(); } @@ -2037,62 +987,7 @@ mrb_str_downcase(mrb_state *mrb, mrb_value self) static mrb_value mrb_str_each_line(mrb_state *mrb, mrb_value str) { - mrb_value rs; - int newline; - struct RString *ps = mrb_str_ptr(str); - char *p = ps->buf, *pend = p + ps->len, *s; - char *ptr = p; - long len = ps->len, rslen; - mrb_value line; - struct RString *prs; - mrb_value *argv, b; - int argc; - - //if (mrb_scan_args(argc, argv, "01", &rs) == 0) { - mrb_get_args(mrb, "*&", &argv, &argc, &b); - if (argc > 0) { - rs = argv[0]; - } else { - rs = mrb_str_new2(mrb, "\n"); - } - /*RETURN_ENUMERATOR(str, argc, argv);*/ - if (mrb_nil_p(rs)) { - mrb_yield(mrb, b, str); - return str; - } - //StringValue(rs); - mrb_string_value(mrb, &rs); - prs = mrb_str_ptr(rs); - rslen = prs->len; - if (rslen == 0) { - newline = '\n'; - } - else { - newline = prs->buf[rslen-1]; - } - - for (s = p, p += rslen; p < pend; p++) { - if (rslen == 0 && *p == '\n') { - if (*++p != '\n') continue; - while (*p == '\n') p++; - } - if (ps->buf < p && p[-1] == newline && - (rslen <= 1 || - memcmp(prs->buf, p-rslen, rslen) == 0)) { - line = mrb_str_new5(mrb, str, s, p - s); - mrb_yield(mrb, b, line); - str_mod_check(mrb, str, ptr, len); - s = p; - } - } - - if (s != pend) { - if (p > pend) p = pend; - line = mrb_str_new5(mrb, str, s, p - s); - mrb_yield(mrb, b, line); - } - - return str; + return mrb_nil_value(); } /* 15.2.10.5.16 */ @@ -2106,7 +1001,7 @@ mrb_str_each_line(mrb_state *mrb, mrb_value str) * "".empty? #=> true */ static mrb_value -mrb_str_empty(mrb_state *mrb, mrb_value self) +mrb_str_empty_p(mrb_state *mrb, mrb_value self) { struct RString *s = mrb_str_ptr(self); @@ -2135,265 +1030,63 @@ mrb_str_eql(mrb_state *mrb, mrb_value self) return mrb_false_value(); } -#ifdef INCLUDE_ENCODING -static void -mrb_enc_cr_str_copy_for_substr(mrb_state *mrb, mrb_value dest, mrb_value src) -{ - /* this function is designed for copying encoding and coderange - * from src to new string "dest" which is made from the part of src. - */ - str_enc_copy(mrb, dest, src); - switch (ENC_CODERANGE(src)) { - case ENC_CODERANGE_7BIT: - ENC_CODERANGE_SET(dest, ENC_CODERANGE_7BIT); - break; - case ENC_CODERANGE_VALID: - if (!mrb_enc_asciicompat(mrb, STR_ENC_GET(mrb, src)) || - search_nonascii(RSTRING_PTR(dest), RSTRING_END(dest))) - ENC_CODERANGE_SET(dest, ENC_CODERANGE_VALID); - else - ENC_CODERANGE_SET(dest, ENC_CODERANGE_7BIT); - break; - default: - if (RSTRING_LEN(dest) == 0) { - if (!mrb_enc_asciicompat(mrb, STR_ENC_GET(mrb, src))) - ENC_CODERANGE_SET(dest, ENC_CODERANGE_VALID); - else - ENC_CODERANGE_SET(dest, ENC_CODERANGE_7BIT); - } - break; - } -} -#endif //INCLUDE_ENCODING - -static mrb_value -str_replace_shared(mrb_state *mrb, mrb_value str2, mrb_value str) -{ - str = mrb_str_new_frozen(mrb, str); - RSTRING(str2)->len = RSTRING_LEN(str); - RSTRING(str2)->buf = RSTRING_PTR(str); - RSTRING_SHARED(str2) = mrb_str_ptr(str); - FL_SET(str2, MRB_STR_SHARED); - mrb_enc_cr_str_exact_copy(mrb, str2, str); - - return str2; -} - -static mrb_value -str_new_shared(mrb_state *mrb, struct RClass* klass, mrb_value str) -{ - return str_replace_shared(mrb, str_alloc(mrb), str); -} - -mrb_value -str_new3(mrb_state *mrb, struct RClass* klass, mrb_value str) -{ - return str_new_shared(mrb, klass, str); -} - -mrb_value -mrb_str_new_shared(mrb_state *mrb, mrb_value str) -{ - mrb_value str2 = str_new3(mrb, mrb_obj_class(mrb, str), str); - - return str2; -} - mrb_value mrb_str_new_frozen(mrb_state *mrb, mrb_value orig) { - struct RClass* klass; - mrb_value str; - - klass = mrb_obj_class(mrb, orig); - - if (MRB_STR_SHARED_P(orig) && RSTRING_SHARED(orig)) { - long ofs; - ofs = RSTRING_LEN(str) - RSTRING_SHARED(orig)->len; -#ifdef INCLUDE_ENCODING - if ((ofs > 0) || (klass != RBASIC(str)->c) || - ENCODING_GET(mrb, str) != ENCODING_GET(mrb, orig)) { -#else - if ((ofs > 0) || (klass != RBASIC(str)->c)) { -#endif //INCLUDE_ENCODING - str = str_new3(mrb, klass, str); - RSTRING_PTR(str) += ofs; - RSTRING_LEN(str) -= ofs; - mrb_enc_cr_str_exact_copy(mrb, str, orig); - } - } - else { - str = str_new4(mrb, orig); - } - return str; -} - -mrb_value -mrb_str_drop_bytes(mrb_state *mrb, mrb_value str, long len) -{ - char *ptr = RSTRING_PTR(str); - long olen = RSTRING_LEN(str), nlen; - - str_modifiable(str); - if (len > olen) len = olen; - nlen = olen - len; - if (!MRB_STR_SHARED_P(str)) mrb_str_new4(mrb, str); - ptr = RSTRING(str)->buf += len; - RSTRING(str)->len = nlen; - ptr[nlen] = 0; - //ENC_CODERANGE_CLEAR(str); - return str; + return str_dup(mrb, orig); } mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, long beg, long len) { mrb_value str2; - if (RSTRING_LEN(str) == beg + len && - STR_BUF_MIN_SIZE < len) { - str2 = mrb_str_new_shared(mrb, mrb_str_new_frozen(mrb, str)); - mrb_str_drop_bytes(mrb, str2, beg); - } - else { - str2 = mrb_str_new5(mrb, str, RSTRING_PTR(str)+beg, len); - } - mrb_enc_cr_str_copy_for_substr(mrb, str2, str); + str2 = mrb_str_new_with_class(mrb, str, RSTRING_PTR(str)+beg, len); return str2; } -#ifdef INCLUDE_ENCODING -int -mrb_enc_str_asciionly_p(mrb_state *mrb, mrb_value str) +mrb_value +mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, int len) { - mrb_encoding *enc = STR_ENC_GET(mrb, str); - - if (!mrb_enc_asciicompat(mrb, enc)) - return 0/*FALSE*/; - else if (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_7BIT) - return 1/*TRUE*/; - return 0/*FALSE*/; -} + mrb_value str2; + char *p, *s = RSTRING_PTR(str); -static mrb_value -mrb_enc_cr_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len, - int ptr_encindex, int ptr_cr, int *ptr_cr_ret) -{ - int str_encindex = ENCODING_GET(mrb, str); - int res_encindex; - int str_cr, res_cr; - int str_a8 = ENCODING_IS_ASCII8BIT(str); - int ptr_a8 = ptr_encindex == 0; - - str_cr = ENC_CODERANGE(str); - - if (str_encindex == ptr_encindex) { - if (str_cr == ENC_CODERANGE_UNKNOWN || - (ptr_a8 && str_cr != ENC_CODERANGE_7BIT)) { - ptr_cr = ENC_CODERANGE_UNKNOWN; - } - else if (ptr_cr == ENC_CODERANGE_UNKNOWN) { - ptr_cr = coderange_scan(ptr, len, mrb_enc_from_index(mrb, ptr_encindex)); - } + if (len < 0) return mrb_nil_value(); + if (!RSTRING_LEN(str)) { + len = 0; } - else { - mrb_encoding *str_enc = mrb_enc_from_index(mrb, str_encindex); - mrb_encoding *ptr_enc = mrb_enc_from_index(mrb, ptr_encindex); - if (!mrb_enc_asciicompat(mrb, str_enc) || !mrb_enc_asciicompat(mrb, ptr_enc)) { - if (len == 0) - return str; - if (RSTRING_LEN(str) == 0) { - mrb_str_buf_cat(mrb, str, ptr, len); - ENCODING_CODERANGE_SET(mrb, str, ptr_encindex, ptr_cr); - return str; - } - goto incompatible; - } - if (ptr_cr == ENC_CODERANGE_UNKNOWN) { - ptr_cr = coderange_scan(ptr, len, ptr_enc); - } - if (str_cr == ENC_CODERANGE_UNKNOWN) { - if (str_a8 || ptr_cr != ENC_CODERANGE_7BIT) { - str_cr = mrb_enc_str_coderange(mrb, str); - } - } + if (beg > RSTRING_LEN(str)) return mrb_nil_value(); + if (beg < 0) { + beg += RSTRING_LEN(str); + if (beg < 0) return mrb_nil_value(); } - if (ptr_cr_ret) - *ptr_cr_ret = ptr_cr; - - if (str_encindex != ptr_encindex && - str_cr != ENC_CODERANGE_7BIT && - ptr_cr != ENC_CODERANGE_7BIT) { -incompatible: - mrb_raise(mrb, E_ENCODING_ERROR, "incompatible character encodings: %s and %s", - mrb_enc_name(mrb_enc_from_index(mrb, str_encindex)), - mrb_enc_name(mrb_enc_from_index(mrb, ptr_encindex))); + if (beg + len > RSTRING_LEN(str)) + len = RSTRING_LEN(str) - beg; + if (len <= 0) { + len = 0; + p = 0; } + else + p = s + beg; - if (str_cr == ENC_CODERANGE_UNKNOWN) { - res_encindex = str_encindex; - res_cr = ENC_CODERANGE_UNKNOWN; - } - else if (str_cr == ENC_CODERANGE_7BIT) { - if (ptr_cr == ENC_CODERANGE_7BIT) { - res_encindex = !str_a8 ? str_encindex : ptr_encindex; - res_cr = ENC_CODERANGE_7BIT; - } - else { - res_encindex = ptr_encindex; - res_cr = ptr_cr; - } - } - else if (str_cr == ENC_CODERANGE_VALID) { - res_encindex = str_encindex; - if (ptr_cr == ENC_CODERANGE_7BIT || ptr_cr == ENC_CODERANGE_VALID) - res_cr = str_cr; - else - res_cr = ptr_cr; + if (len > STR_BUF_MIN_SIZE && beg + len == RSTRING_LEN(str)) { + str2 = mrb_str_new(mrb, s, RSTRING_LEN(str)); + RSTRING(str2)->buf += RSTRING(str2)->len - len; + RSTRING(str2)->len = len; } - else { /* str_cr == ENC_CODERANGE_BROKEN */ - res_encindex = str_encindex; - res_cr = str_cr; - if (0 < len) res_cr = ENC_CODERANGE_UNKNOWN; + else { + str2 = mrb_str_new_with_class(mrb, str, p, len); } - if (len < 0) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size (or size too big)"); - } - str_buf_cat(mrb, str, ptr, len); - ENCODING_CODERANGE_SET(mrb, str, res_encindex, res_cr); - return str; -} - -mrb_value -mrb_enc_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len, mrb_encoding *ptr_enc) -{ - return mrb_enc_cr_str_buf_cat(mrb, str, ptr, len, - mrb_enc_to_index(ptr_enc), ENC_CODERANGE_UNKNOWN, NULL); + return str2; } mrb_value mrb_str_buf_append(mrb_state *mrb, mrb_value str, mrb_value str2) { - int str2_cr; - - str2_cr = ENC_CODERANGE(str2); - - mrb_enc_cr_str_buf_cat(mrb, str, RSTRING_PTR(str2), RSTRING_LEN(str2), - ENCODING_GET(mrb, str2), str2_cr, &str2_cr); - - ENC_CODERANGE_SET(str2, str2_cr); - - return str; -} -#else -mrb_value -mrb_str_buf_append(mrb_state *mrb, mrb_value str, mrb_value str2) -{ mrb_str_cat(mrb, str, RSTRING_PTR(str2), RSTRING_LEN(str2)); return str; } -#endif //INCLUDE_ENCODING static inline void str_discard(mrb_state *mrb, mrb_value str) @@ -2406,38 +1099,6 @@ str_discard(mrb_state *mrb, mrb_value str) } } -void -mrb_str_shared_replace(mrb_state *mrb, mrb_value str, mrb_value str2) -{ -#ifdef INCLUDE_ENCODING - mrb_encoding *enc; - int cr; -#endif //INCLUDE_ENCODING - - if (mrb_obj_equal(mrb, str, str2)) return; -#ifdef INCLUDE_ENCODING - enc = STR_ENC_GET(mrb, str2); - cr = ENC_CODERANGE(str2); -#endif //INCLUDE_ENCODING - str_discard(mrb, str); - MRB_STR_UNSET_NOCAPA(str); - RSTRING_PTR(str) = RSTRING_PTR(str2); - RSTRING_LEN(str) = RSTRING_LEN(str2); - if (MRB_STR_NOCAPA_P(str2)) { - FL_SET(str, RBASIC(str2)->flags & MRB_STR_NOCAPA); - RSTRING_SHARED(str) = RSTRING_SHARED(str2); - } - else { - RSTRING_CAPA(str) = RSTRING_CAPA(str2); - } - - MRB_STR_UNSET_NOCAPA(str2); /* abandon str2 */ - RSTRING_PTR(str2)[0] = 0; - RSTRING_LEN(str2) = 0; - mrb_enc_associate(mrb, str, enc); - ENC_CODERANGE_SET(str, cr); -} - #ifdef INCLUDE_REGEXP static mrb_value str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang) @@ -2450,7 +1111,6 @@ str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang) mrb_int beg0, end0; mrb_int offset, blen, len, last; char *sp, *cp; - mrb_encoding *str_enc; mrb_get_args(mrb, "*", &argv, &argc); switch (argc) { @@ -2478,7 +1138,6 @@ str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang) dest = mrb_str_buf_new(mrb, blen); sp = RSTRING_PTR(str); cp = sp; - str_enc = STR_ENC_GET(mrb, str); do { n++; @@ -2490,7 +1149,7 @@ str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang) len = beg - offset; /* copy pre-match substr */ if (len) { - mrb_enc_str_buf_cat(mrb, dest, cp, len, str_enc); + mrb_str_buf_cat(mrb, dest, cp, len); } mrb_str_buf_append(mrb, dest, val); @@ -2503,8 +1162,8 @@ str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang) * in order to prevent infinite loops. */ if (RSTRING_LEN(str) <= end0) break; - len = mrb_enc_fast_mbclen(RSTRING_PTR(str)+end0, RSTRING_END(str), str_enc); - mrb_enc_str_buf_cat(mrb, dest, RSTRING_PTR(str)+end0, len, str_enc); + len = RSTRING_LEN(str)-end0; + mrb_str_buf_cat(mrb, dest, RSTRING_PTR(str)+end0, len); offset = end0 + len; } cp = RSTRING_PTR(str) + offset; @@ -2512,17 +1171,10 @@ str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang) beg = mrb_reg_search(mrb, pat, str, offset, 0); } while (beg >= 0); if (RSTRING_LEN(str) > offset) { - mrb_enc_str_buf_cat(mrb, dest, cp, RSTRING_LEN(str) - offset, str_enc); + mrb_str_buf_cat(mrb, dest, cp, RSTRING_LEN(str) - offset); } mrb_reg_search(mrb, pat, str, last, 0); - if (bang) { - mrb_str_shared_replace(mrb, str, dest); - } - else { - RBASIC(dest)->c = mrb_obj_class(mrb, str); - str = dest; - } - + RBASIC(dest)->c = mrb_obj_class(mrb, str); return str; } @@ -2578,7 +1230,6 @@ mrb_str_gsub(mrb_state *mrb, mrb_value self) static mrb_value mrb_str_gsub_bang(mrb_state *mrb, mrb_value self) { - str_modify_keep_cr(mrb, self); //return str_gsub(argc, argv, self, 1); return str_gsub(mrb, self, 1); } @@ -2694,18 +1345,10 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str) } if (pos < 0) { -#ifdef INCLUDE_ENCODING - pos += str_strlen(mrb, str, STR_ENC_GET(mrb, str)); -#else pos += RSTRING_LEN(str); -#endif //INCLUDE_ENCODING if (pos < 0) { if (mrb_type(sub) == MRB_TT_REGEX) { -#ifdef INCLUDE_REGEXP - mrb_backref_set(mrb, mrb_nil_value()); -#else - mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); -#endif //INCLUDE_REGEXP + mrb_raise(mrb, E_TYPE_ERROR, "Regexp class not supported"); } return mrb_nil_value(); } @@ -2714,11 +1357,9 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str) switch (mrb_type(sub)) { case MRB_TT_REGEX: #ifdef INCLUDE_REGEXP - if (pos > str_strlen(mrb, str, STR_ENC_GET(mrb, str))) + if (pos > RSTRING_LEN(str)) return mrb_nil_value(); - pos = str_offset(mrb, RSTRING_PTR(str), RSTRING_END(str), pos, - mrb_enc_check(mrb, str, sub), single_byte_optimizable(mrb, str)); - + pos = mrb_str_offset(mrb, str, pos); pos = mrb_reg_search(mrb, sub, str, pos, 0); pos = mrb_str_sublen(mrb, str, pos); #else @@ -2750,9 +1391,6 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str) /* fall through */ case MRB_TT_STRING: pos = mrb_str_index(mrb, str, sub, pos); -#ifdef INCLUDE_ENCODING - pos = mrb_str_sublen(mrb, str, pos); -#endif //INCLUDE_ENCODING break; } @@ -2763,22 +1401,7 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str) static mrb_value str_replace(mrb_state *mrb, mrb_value str, mrb_value str2) { - long len; - - len = RSTRING_LEN(str2); - if (MRB_STR_SHARED_P(str2)) { - struct RString *shared = RSTRING_SHARED(str2); - RSTRING_LEN(str) = len; - RSTRING_PTR(str) = shared->buf; - FL_SET(str, MRB_STR_SHARED); - RSTRING_SHARED(str) = shared; - } - else { - str_replace_shared(mrb, str, str2); - } - - mrb_enc_cr_str_exact_copy(mrb, str, str2); - return str; + return mrb_nil_value(); } /* 15.2.10.5.24 */ @@ -2825,33 +1448,10 @@ mrb_str_init(mrb_state *mrb, mrb_value self) return self; } -#ifdef INCLUDE_ENCODING -mrb_sym -mrb_intern3(mrb_state *mrb, const char *name, long len, mrb_encoding *enc) -{ - return mrb_intern(mrb, name); -} -#endif //INCLUDE_ENCODING - mrb_sym mrb_intern_str(mrb_state *mrb, mrb_value str) { - mrb_sym id; -#ifdef INCLUDE_ENCODING - mrb_encoding *enc; - - if (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_7BIT) { - enc = mrb_usascii_encoding(mrb); - } - else { - enc = mrb_enc_get(mrb, str); - } - id = mrb_intern3(mrb, RSTRING_PTR(str), RSTRING_LEN(str), enc); -#else - id = mrb_intern(mrb, RSTRING_PTR(str)); -#endif //INCLUDE_ENCODING - str = RB_GC_GUARD(str); - return id; + return mrb_intern(mrb, RSTRING_PTR(str)); } /* 15.2.10.5.25 */ @@ -2984,66 +1584,19 @@ mrb_str_match_m(mrb_state *mrb, mrb_value self) static mrb_value mrb_str_reverse(mrb_state *mrb, mrb_value str) { -#ifdef INCLUDE_ENCODING - mrb_encoding *enc; -#endif //INCLUDE_ENCODING - mrb_value rev; - char *s, *e, *p; -#ifdef INCLUDE_ENCODING - int single = 1; -#endif //INCLUDE_ENCODING + mrb_value obj; + char *s, *e, *p; - if (RSTRING_LEN(str) <= 1) return mrb_str_dup(mrb, str); -#ifdef INCLUDE_ENCODING - enc = STR_ENC_GET(mrb, str); -#endif //INCLUDE_ENCODING - rev = mrb_str_new5(mrb, str, 0, RSTRING_LEN(str)); - s = RSTRING_PTR(str); e = RSTRING_END(str); - p = RSTRING_END(rev); + if (RSTRING(str)->len <= 1) return str_dup(mrb, str); - if (RSTRING_LEN(str) > 1) { -#ifdef INCLUDE_ENCODING - if (single_byte_optimizable(mrb, str)) { -#endif //INCLUDE_ENCODING - while (s < e) { - *--p = *s++; - } -#ifdef INCLUDE_ENCODING - } - else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID) { - while (s < e) { - int clen = mrb_enc_fast_mbclen(s, e, enc); - - if (clen > 1 || (*s & 0x80)) single = 0; - p -= clen; - memcpy(p, s, clen); - s += clen; - } - } - else { - while (s < e) { - int clen = mrb_enc_mbclen(s, e, enc); + obj = mrb_str_new_with_class(mrb, str, 0, RSTRING(str)->len); + s = RSTRING(str)->buf; e = s + RSTRING(str)->len - 1; + p = RSTRING(obj)->buf; - if (clen > 1 || (*s & 0x80)) single = 0; - p -= clen; - memcpy(p, s, clen); - s += clen; - } - } - } - STR_SET_LEN(rev, RSTRING_LEN(str)); - if (ENC_CODERANGE(str) == ENC_CODERANGE_UNKNOWN) { - if (single) { - ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT); - } - else { - ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID); - } -#endif //INCLUDE_ENCODING - } - mrb_enc_cr_str_copy_for_substr(mrb, rev, str); - - return rev; + while (e >= s) { + *p++ = *e--; + } + return obj; } /* 15.2.10.5.30 */ @@ -3056,29 +1609,18 @@ mrb_str_reverse(mrb_state *mrb, mrb_value str) static mrb_value mrb_str_reverse_bang(mrb_state *mrb, mrb_value str) { -#ifdef INCLUDE_ENCODING - if (RSTRING_LEN(str) > 1) { - if (single_byte_optimizable(mrb, str)) { -#endif //INCLUDE_ENCODING - char *s, *e, c; - str_modify_keep_cr(mrb, str); - s = RSTRING_PTR(str); - e = RSTRING_END(str) - 1; - while (s < e) { - c = *s; - *s++ = *e; - *e-- = c; - } -#ifdef INCLUDE_ENCODING - } - else { - mrb_str_shared_replace(mrb, str, mrb_str_reverse(mrb, str)); + char *s, *e; + char c; + + if (RSTRING(str)->len > 1) { + s = RSTRING(str)->buf; + e = s + RSTRING(str)->len - 1; + while (s < e) { + c = *s; + *s++ = *e; + *e-- = c; } } - else { - str_modify_keep_cr(mrb, str); - } -#endif //INCLUDE_ENCODING return str; } @@ -3135,12 +1677,7 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos) size_t mrb_str_sublen(mrb_state *mrb, mrb_value str, long pos) { - if (single_byte_optimizable(mrb, str) || pos < 0) - return pos; - else { - char *p = RSTRING_PTR(str); - return enc_strlen(p, p + pos, STR_ENC_GET(mrb, str), ENC_CODERANGE(str)); - } + return pos; } #endif //INCLUDE_ENCODING @@ -3170,14 +1707,8 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str) int argc; mrb_value sub; mrb_value vpos; -#ifdef INCLUDE_ENCODING - mrb_encoding *enc = STR_ENC_GET(mrb, str); - int pos, len = str_strlen(mrb, str, enc); -#else int pos, len = RSTRING_LEN(str); -#endif //INCLUDE_ENCODING - //if (mrb_scan_args(argc, argv, "11", &sub, &vpos) == 2) { mrb_get_args(mrb, "*", &argv, &argc); if (argc == 2) { sub = argv[0]; @@ -3209,9 +1740,7 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str) switch (mrb_type(sub)) { case MRB_TT_REGEX: #ifdef INCLUDE_REGEXP - pos = str_offset(mrb, RSTRING_PTR(str), RSTRING_END(str), pos, - STR_ENC_GET(mrb, str), single_byte_optimizable(mrb, str)); - + pos = mrb_str_offset(mrb, str, pos); if (!RREGEXP(sub)->ptr || RREGEXP_SRC_LEN(sub)) { pos = mrb_reg_search(mrb, sub, str, pos, 1); pos = mrb_str_sublen(mrb, str, pos); @@ -3269,12 +1798,11 @@ scan_once(mrb_state *mrb, mrb_value str, mrb_value pat, mrb_int *start) pmatch = mrb_match_ptr(match); regs = &pmatch->rmatch->regs; if (regs->beg[0] == regs->end[0]) { - mrb_encoding *enc = STR_ENC_GET(mrb, str); /* * Always consume at least one character of the input string */ if (ps->len > regs->end[0]) - *start = regs->end[0] + mrb_enc_fast_mbclen(RSTRING_PTR(str)+regs->end[0],RSTRING_END(str), enc); + *start = regs->end[0] + RSTRING_LEN(str)-regs->end[0]; else *start = regs->end[0] + 1; } @@ -3426,16 +1954,11 @@ static const char isspacetable[256] = { * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""] */ -//static mrb_value -//mrb_str_split_m(int argc, mrb_value *argv, mrb_value str) static mrb_value mrb_str_split_m(mrb_state *mrb, mrb_value str) { mrb_value *argv; int argc; -#ifdef INCLUDE_ENCODING - mrb_encoding *enc; -#endif //INCLUDE_ENCODING mrb_value spat; mrb_value limit; enum {awk, string, regexp} split_type; @@ -3462,21 +1985,12 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) i = 1; } -#ifdef INCLUDE_ENCODING - enc = STR_ENC_GET(mrb, str); -#endif //INCLUDE_ENCODING - //if (mrb_nil_p(spat)) { if (argc == 0) { -// spat = mrb_nil_value(); -// goto fs_set; split_type = awk; } else { //fs_set: if (mrb_type(spat) == MRB_TT_STRING) { -#ifdef INCLUDE_REGEXP - mrb_encoding *enc2 = STR_ENC_GET(mrb, spat); -#endif //INCLUDE_REGEXP split_type = string; #ifdef INCLUDE_REGEXP if (RSTRING_LEN(spat) == 0) { @@ -3484,20 +1998,13 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) spat = mrb_reg_regcomp(mrb, spat); split_type = regexp; } - else if (mrb_enc_asciicompat(mrb, enc2) == 1) { + else { #endif //INCLUDE_REGEXP if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){ split_type = awk; } #ifdef INCLUDE_REGEXP } - else { - int l; - if (mrb_enc_ascget(mrb, RSTRING_PTR(spat), RSTRING_END(spat), &l, enc2) == ' ' && - RSTRING_LEN(spat) == l) { - split_type = awk; - } - } #endif //INCLUDE_REGEXP } else { @@ -3520,89 +2027,28 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) unsigned int c; end = beg; -#ifdef INCLUDE_ENCODING - if (is_ascii_string(mrb, str)) { -#endif //INCLUDE_ENCODING - while (ptr < eptr) { - c = (unsigned char)*ptr++; - if (skip) { - if (ascii_isspace(c)) { - beg = ptr - bptr; - } - else { - end = ptr - bptr; - skip = 0; - if (!mrb_nil_p(limit) && lim <= i) break; - } - } - else if (ascii_isspace(c)) { - mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg)); - skip = 1; - beg = ptr - bptr; - if (!mrb_nil_p(limit)) ++i; - } - else { - end = ptr - bptr; - } - } -#ifdef INCLUDE_ENCODING - } - else { - while (ptr < eptr) { - int n; - - c = mrb_enc_codepoint_len(mrb, ptr, eptr, &n, enc); - ptr += n; - if (skip) { - if (mrb_isspace(c)) { - beg = ptr - bptr; - } - else { - end = ptr - bptr; - skip = 0; - if (!mrb_nil_p(limit) && lim <= i) break; - } - } - else if (mrb_isspace(c)) { - mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg)); - skip = 1; - beg = ptr - bptr; - if (!mrb_nil_p(limit)) ++i; - } - else { - end = ptr - bptr; - } + while (ptr < eptr) { + c = (unsigned char)*ptr++; + if (skip) { + if (ascii_isspace(c)) { + beg = ptr - bptr; + } + else { + end = ptr - bptr; + skip = 0; + if (!mrb_nil_p(limit) && lim <= i) break; + } + } + else if (ascii_isspace(c)) { + mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg)); + skip = 1; + beg = ptr - bptr; + if (!mrb_nil_p(limit)) ++i; } - } - } - else if (split_type == string) { - char *ptr = RSTRING_PTR(str); - char *temp = ptr; - char *eptr = RSTRING_END(str); - char *sptr = RSTRING_PTR(spat); - long slen = RSTRING_LEN(spat); - - if (is_broken_string(mrb, str)) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid byte sequence in %s", mrb_enc_name(STR_ENC_GET(mrb, str))); - } - if (is_broken_string(mrb, spat)) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid byte sequence in %s", mrb_enc_name(STR_ENC_GET(mrb, spat))); - } - enc = mrb_enc_check(mrb, str, spat); - while (ptr < eptr && - (end = mrb_memsearch(mrb, sptr, slen, ptr, eptr - ptr, enc)) >= 0) { - /* Check we are at the start of a char */ - char *t = mrb_enc_right_char_head(ptr, ptr + end, eptr, enc); - if (t != ptr + end) { - ptr = t; - continue; + else { + end = ptr - bptr; } - mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr - temp, end)); - ptr += end + slen; - if (!mrb_nil_p(limit) && lim <= ++i) break; } - beg = ptr - temp; -#endif //INCLUDE_ENCODING } else { #ifdef INCLUDE_REGEXP @@ -3621,17 +2067,14 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) break; } else if (last_null == 1) { - mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, - mrb_enc_fast_mbclen(ptr+beg, - ptr+len, - enc))); + mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, len)); beg = start; } else { if (ptr+start == ptr+len) start++; else - start += mrb_enc_fast_mbclen(ptr+start,ptr+len,enc); + start += len; last_null = 1; continue; } @@ -3696,77 +2139,6 @@ mrb_block_given_p() static mrb_value mrb_str_sub_bang(mrb_state *mrb, mrb_value str) { - mrb_value *argv; - int argc; - mrb_value pat, repl; - long plen; - - mrb_get_args(mrb, "*", &argv, &argc); - if (argc == 1 && mrb_block_given_p()) { - /* do nothing */ - } - else if (argc == 2) { - repl = argv[1]; - //StringValue(repl); - mrb_string_value(mrb, &repl); - } - else { - mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 2)", argc); - } - - pat = get_pat(mrb, argv[0], 1); - str_modifiable(str); - if (mrb_reg_search(mrb, pat, str, 0, 0) >= 0) { - mrb_encoding *enc; - int cr = ENC_CODERANGE(str); - mrb_value match = mrb_backref_get(mrb); - struct re_registers *regs = RMATCH_REGS(match); - long beg0 = BEG(0); - long end0 = END(0); - char *p, *rp; - long len, rlen; - - repl = mrb_reg_regsub(mrb, repl, str, regs, pat); - enc = mrb_enc_compatible(mrb, str, repl); - if (!enc) { - mrb_encoding *str_enc = STR_ENC_GET(mrb, str); - p = RSTRING_PTR(str); len = RSTRING_LEN(str); - if (coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT || - coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) { - mrb_raise(mrb, E_ENCODING_ERROR, "incompatible character encodings: %s and %s", - mrb_enc_name(str_enc), - mrb_enc_name(STR_ENC_GET(mrb, repl))); - } - enc = STR_ENC_GET(mrb, repl); - } - mrb_str_modify(mrb, str); - mrb_enc_associate(mrb, str, enc); - if (ENC_CODERANGE_UNKNOWN < cr && cr < ENC_CODERANGE_BROKEN) { - int cr2 = ENC_CODERANGE(repl); - if (cr2 == ENC_CODERANGE_BROKEN || - (cr == ENC_CODERANGE_VALID && cr2 == ENC_CODERANGE_7BIT)) - cr = ENC_CODERANGE_UNKNOWN; - else - cr = cr2; - } - plen = end0 - beg0; - rp = RSTRING_PTR(repl); rlen = RSTRING_LEN(repl); - len = RSTRING_LEN(str); - if (rlen > plen) { - RESIZE_CAPA(str, len + rlen - plen); - } - p = RSTRING_PTR(str); - if (rlen != plen) { - memmove(p + beg0 + rlen, p + beg0 + plen, len - beg0 - plen); - } - memcpy(p + beg0, rp, rlen); - len += rlen - plen; - STR_SET_LEN(str, len); - RSTRING_PTR(str)[len] = '\0'; - ENC_CODERANGE_SET(str, cr); - - return str; - } return mrb_nil_value(); } #endif //INCLUDE_REGEXP @@ -4197,66 +2569,18 @@ mrb_str_to_s(mrb_state *mrb, mrb_value self) static mrb_value mrb_str_upcase_bang(mrb_state *mrb, mrb_value str) { -#ifdef INCLUDE_ENCODING - mrb_encoding *enc; -#endif //INCLUDE_ENCODING char *s, *send; int modify = 0; -#ifdef INCLUDE_ENCODING - int n; - - str_modify_keep_cr(mrb, str); - enc = STR_ENC_GET(mrb, str); - mrb_str_check_dummy_enc(mrb, enc); - s = RSTRING_PTR(str); send = RSTRING_END(str); - if (single_byte_optimizable(mrb, str)) { - while (s < send) { - unsigned int c = *(unsigned char*)s; - - if (mrb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') { - *s = 'A' + (c - 'a'); - modify = 1; - } - s++; - } - } - else { - int ascompat = mrb_enc_asciicompat(mrb, enc); - - while (s < send) { - unsigned int c; - if (ascompat && (c = *(unsigned char*)s) < 0x80) { - if (mrb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') { - *s = 'A' + (c - 'a'); - modify = 1; - } - s++; - } - else { - c = mrb_enc_codepoint_len(mrb, s, send, &n, enc); - if (mrb_enc_islower(c, enc)) { - /* assuming toupper returns codepoint with same size */ - mrb_enc_mbcput(mrb_enc_toupper(c, enc), s, enc); - modify = 1; - } - s += n; - } - } - } -#else - mrb_str_modify(mrb, str); - s = RSTRING_PTR(str); send = RSTRING_END(str); + s = RSTRING(str)->buf; send = s + RSTRING(str)->len; while (s < send) { - unsigned int c = *(unsigned char*)s; - - if ('a' <= c && c <= 'z') { - *s = 'A' + (c - 'a'); + if (ISLOWER(*s)) { + *s = toupper(*s); modify = 1; } s++; } -#endif //INCLUDE_ENCODING + if (modify) return str; return mrb_nil_value(); } @@ -4282,252 +2606,6 @@ mrb_str_upcase(mrb_state *mrb, mrb_value self) return str; } -/* 15.2.10.5.xx */ -/* - * call-seq: - * str.force_encoding(encoding) -> str - * - * Changes the encoding to +encoding+ and returns self. - */ -#ifdef INCLUDE_ENCODING -static mrb_value -mrb_str_force_encoding(mrb_state *mrb, mrb_value self) -{ - mrb_value enc; - - mrb_get_args(mrb, "o", &enc); - str_modifiable(self); - mrb_enc_associate(mrb, self, mrb_to_encoding(mrb, enc)); - ENC_CODERANGE_CLEAR(self); - return self; -} - -long -mrb_str_coderange_scan_restartable(const char *s, const char *e, mrb_encoding *enc, int *cr) -{ - const char *p = s; - - if (*cr == ENC_CODERANGE_BROKEN) - return e - s; - - if (mrb_enc_to_index(enc) == 0) { - /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */ - p = search_nonascii(p, e); - *cr = (!p && *cr != ENC_CODERANGE_VALID) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID; - return e - s; - } - else if (mrb_enc_asciicompat(mrb, enc)) { - p = search_nonascii(p, e); - if (!p) { - if (*cr != ENC_CODERANGE_VALID) *cr = ENC_CODERANGE_7BIT; - return e - s; - } - while (p < e) { - int ret = mrb_enc_precise_mbclen(p, e, enc); - if (!MBCLEN_CHARFOUND_P(ret)) { - *cr = MBCLEN_INVALID_P(ret) ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_UNKNOWN; - return p - s; - } - p += MBCLEN_CHARFOUND_LEN(ret); - if (p < e) { - p = search_nonascii(p, e); - if (!p) { - *cr = ENC_CODERANGE_VALID; - return e - s; - } - } - } - *cr = e < p ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_VALID; - return p - s; - } - else { - while (p < e) { - int ret = mrb_enc_precise_mbclen(p, e, enc); - if (!MBCLEN_CHARFOUND_P(ret)) { - *cr = MBCLEN_INVALID_P(ret) ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_UNKNOWN; - return p - s; - } - p += MBCLEN_CHARFOUND_LEN(ret); - } - *cr = e < p ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_VALID; - return p - s; - } -} - -mrb_value -mrb_str_conv_enc_opts(mrb_state *mrb, mrb_value str, mrb_encoding *from, mrb_encoding *to, int ecflags, mrb_value ecopts) -{ - mrb_econv_t *ec; - mrb_econv_result_t ret; - long len; - mrb_value newstr; - const unsigned char *sp; - unsigned char *dp; - - if (!to) return str; - if (from == to) return str; - if ((mrb_enc_asciicompat(mrb, to) && ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) || - to == mrb_ascii8bit_encoding(mrb)) { - if (STR_ENC_GET(mrb, str) != to) { - str = mrb_str_dup(mrb, str); - mrb_enc_associate(mrb, str, to); - } - return str; - } - - len = RSTRING_LEN(str); - newstr = mrb_str_new(mrb, 0, len); - - retry: - ec = mrb_econv_open_opts(mrb, from->name, to->name, ecflags, ecopts); - if (!ec) return str; - - sp = (unsigned char*)RSTRING_PTR(str); - dp = (unsigned char*)RSTRING_PTR(newstr); - ret = mrb_econv_convert(mrb, ec, &sp, (unsigned char*)RSTRING_END(str), - &dp, (unsigned char*)RSTRING_END(newstr), 0); - mrb_econv_close(ec); - switch (ret) { - case econv_destination_buffer_full: - /* destination buffer short */ - len = len < 2 ? 2 : len * 2; - mrb_str_resize(mrb, newstr, len); - goto retry; - - case econv_finished: - len = dp - (unsigned char*)RSTRING_PTR(newstr); - mrb_str_set_len(mrb, newstr, len); - mrb_enc_associate(mrb, newstr, to); - return newstr; - - case econv_invalid_byte_sequence: - case econv_undefined_conversion: - case econv_source_buffer_empty: - case econv_after_output: - case econv_incomplete_input: - /* some error, return original */ - return str; - - default: - mrb_bug("Internal Error: Invalid return value mrb_econv_convert."); - return str; - } -} - -mrb_value -mrb_str_conv_enc(mrb_state *mrb, mrb_value str, mrb_encoding *from, mrb_encoding *to) -{ - return mrb_str_conv_enc_opts(mrb, str, from, to, 0, mrb_nil_value()); -} -#endif //INCLUDE_ENCODING - -#ifndef INCLUDE_ENCODING -#undef SIGN_EXTEND_CHAR -#if __STDC__ -# define SIGN_EXTEND_CHAR(c) ((signed char)(c)) -#else /* not __STDC__ */ -/* As in Harbison and Steele. */ -# define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128) -#endif -#define is_identchar(c) (SIGN_EXTEND_CHAR(c)!=-1&&(ISALNUM(c) || (c) == '_')) - -static int -is_special_global_name(m) - const char *m; -{ - switch (*m) { - case '~': case '*': case '$': case '?': case '!': case '@': - case '/': case '\\': case ';': case ',': case '.': case '=': - case ':': case '<': case '>': case '\"': - case '&': case '`': case '\'': case '+': - case '0': - ++m; - break; - case '-': - ++m; - if (is_identchar(*m)) m += 1; - break; - default: - if (!ISDIGIT(*m)) return 0; - do ++m; while (ISDIGIT(*m)); - } - return !*m; -} - -int -mrb_symname_p(const char *name) -{ - const char *m = name; - int localid = FALSE; - - if (!m) return FALSE; - switch (*m) { - case '\0': - return FALSE; - - case '$': - if (is_special_global_name(++m)) return TRUE; - goto id; - - case '@': - if (*++m == '@') ++m; - goto id; - - case '<': - switch (*++m) { - case '<': ++m; break; - case '=': if (*++m == '>') ++m; break; - default: break; - } - break; - - case '>': - switch (*++m) { - case '>': case '=': ++m; break; - } - break; - - case '=': - switch (*++m) { - case '~': ++m; break; - case '=': if (*++m == '=') ++m; break; - default: return FALSE; - } - break; - - case '*': - if (*++m == '*') ++m; - break; - - case '+': case '-': - if (*++m == '@') ++m; - break; - - case '|': case '^': case '&': case '/': case '%': case '~': case '`': - ++m; - break; - - case '[': - if (*++m != ']') return FALSE; - if (*++m == '=') ++m; - break; - - default: - localid = !ISUPPER(*m); -id: - if (*m != '_' && !ISALPHA(*m)) return FALSE; - while (is_identchar(*m)) m += 1; - if (localid) { - switch (*m) { - case '!': case '?': case '=': ++m; - } - } - break; - } - return *m ? FALSE : TRUE; -} -#endif //INCLUDE_ENCODING - /* * call-seq: * str.dump -> new_str @@ -4538,16 +2616,10 @@ id: mrb_value mrb_str_dump(mrb_state *mrb, mrb_value str) { -#ifdef INCLUDE_ENCODING - mrb_encoding *enc = mrb_enc_get(mrb, str); -#endif //INCLUDE_ENCODING long len; const char *p, *pend; - char *q, *qend; + char *q; mrb_value result; -#ifdef INCLUDE_ENCODING - int u8 = (enc == mrb_utf8_encoding(mrb)); -#endif //INCLUDE_ENCODING len = 2; /* "" */ p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str); @@ -4570,33 +2642,15 @@ mrb_str_dump(mrb_state *mrb, mrb_value str) len++; } else { -#ifdef INCLUDE_ENCODING - if (u8) { /* \u{NN} */ - int n = mrb_enc_precise_mbclen(p-1, pend, enc); - if (MBCLEN_CHARFOUND_P(n-1)) { - unsigned int cc = mrb_enc_mbc_to_codepoint(p-1, pend, enc); - while (cc >>= 4) len++; - len += 5; - p += MBCLEN_CHARFOUND_LEN(n)-1; - break; - } - } -#endif //INCLUDE_ENCODING len += 4; /* \xNN */ } break; } } -#ifdef INCLUDE_ENCODING - if (!mrb_enc_asciicompat(mrb, enc)) { - len += 19; /* ".force_encoding('')" */ - len += strlen(enc->name); - } -#endif //INCLUDE_ENCODING - result = mrb_str_new5(mrb, str, 0, len); + result = mrb_str_new_with_class(mrb, str, 0, len); p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str); - q = RSTRING_PTR(result); qend = q + len + 1; + q = RSTRING_PTR(result); *q++ = '"'; while (p < pend) { @@ -4647,35 +2701,11 @@ mrb_str_dump(mrb_state *mrb, mrb_value str) } else { *q++ = '\\'; -#ifdef INCLUDE_ENCODING - if (u8) { - int n = mrb_enc_precise_mbclen(p-1, pend, enc) - 1; - if (MBCLEN_CHARFOUND_P(n)) { - int cc = mrb_enc_mbc_to_codepoint(p-1, pend, enc); - p += n; - snprintf(q, qend-q, "u{%x}", cc); - q += strlen(q); - continue; - } - } - snprintf(q, qend-q, "x%02X", c); -#else sprintf(q, "%03o", c&0xff); -#endif //INCLUDE_ENCODING q += 3; } } *q++ = '"'; -#ifdef INCLUDE_ENCODING - *q = '\0'; - if (!mrb_enc_asciicompat(mrb, enc)) { - snprintf(q, qend-q, ".force_encoding(\"%s\")", enc->name); - enc = mrb_ascii8bit_encoding(mrb); - } - /* result from dump is ASCII */ - mrb_enc_associate(mrb, result, enc); - ENC_CODERANGE_SET(result, ENC_CODERANGE_7BIT); -#endif //INCLUDE_ENCODING return result; } @@ -4686,8 +2716,6 @@ mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len) mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size (or size too big)"); } if (0/*STR_ASSOC_P(str)*/) { - mrb_str_modify(mrb, str); - //if (STR_EMBED_P(str)) str_make_independent(mrb, str); mrb_realloc(mrb, RSTRING(str)->buf, RSTRING(str)->len+len+1); memcpy(RSTRING(str)->buf + RSTRING(str)->len, ptr, len); RSTRING(str)->len += len; @@ -4701,18 +2729,13 @@ mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len) mrb_value mrb_str_cat2(mrb_state *mrb, mrb_value str, const char *ptr) { - return mrb_str_cat(mrb, str, ptr, strlen(ptr)); + return mrb_str_cat(mrb, str, ptr, strlen(ptr)); } -mrb_value +static mrb_value mrb_str_vcatf(mrb_state *mrb, mrb_value str, const char *fmt, va_list ap) { - //mrb_printf_buffer f; - //mrb_value klass; - - //StringValue(str); mrb_string_value(mrb, &str); - mrb_str_modify(mrb, str); mrb_str_resize(mrb, str, (char*)RSTRING_END(str) - RSTRING_PTR(str)); return str; @@ -4730,12 +2753,6 @@ mrb_str_catf(mrb_state *mrb, mrb_value str, const char *format, ...) return str; } -void -mrb_lastline_set(mrb_value val) -{ - //vm_svar_set(0, val); -} - mrb_value mrb_str_append(mrb_state *mrb, mrb_value str, mrb_value str2) { @@ -4743,69 +2760,7 @@ mrb_str_append(mrb_state *mrb, mrb_value str, mrb_value str2) return mrb_str_buf_append(mrb, str, str2); } -void -mrb_str_setter(mrb_state *mrb, mrb_value val, mrb_sym id, mrb_value *var) -{ - if (!mrb_nil_p(val) && (mrb_type(val) != MRB_TT_STRING)) { - mrb_raise(mrb, E_TYPE_ERROR, "value of %s must be String", mrb_sym2name(mrb, id)); - } - *var = val; -} - -#ifdef INCLUDE_ENCODING -/* - * call-seq: - * str.ascii_only? -> true or false - * - * Returns true for a string which has only ASCII characters. - * - * "abc".force_encoding("UTF-8").ascii_only? #=> true - * "abc\u{6666}".force_encoding("UTF-8").ascii_only? #=> false - */ - -int -mrb_str_is_ascii_only_p(mrb_state *mrb, mrb_value str) -{ - int cr = mrb_enc_str_coderange(mrb, str); - - return cr == ENC_CODERANGE_7BIT ? TRUE : FALSE; -} - -#endif //INCLUDE_ENCODING - #define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */ -int -mrb_str_buf_cat_escaped_char(mrb_state *mrb, mrb_value result, unsigned int c, int unicode_p) -{ - char buf[CHAR_ESC_LEN + 1]; - int l; - - if (sizeof(c) > 4) { - c &= 0xffffffff; - } - if (unicode_p) { - if (c < 0x7F && ISPRINT(c)) { - snprintf(buf, CHAR_ESC_LEN, "%c", c); - } - else if (c < 0x10000) { - snprintf(buf, CHAR_ESC_LEN, "\\u%04X", c); - } - else { - snprintf(buf, CHAR_ESC_LEN, "\\u{%X}", c); - } - } - else { - if (c < 0x100) { - snprintf(buf, CHAR_ESC_LEN, "\\x%02X", c); - } - else { - snprintf(buf, CHAR_ESC_LEN, "\\x{%X}", c); - } - } - l = (int)strlen(buf); /* CHAR_ESC_LEN cannot exceed INT_MAX */ - mrb_str_buf_cat(mrb, result, buf, l); - return l; -} /* * call-seq: @@ -4821,24 +2776,9 @@ mrb_str_buf_cat_escaped_char(mrb_state *mrb, mrb_value result, unsigned int c, i mrb_value mrb_str_inspect(mrb_state *mrb, mrb_value str) { -#ifdef INCLUDE_ENCODING - mrb_encoding *enc = STR_ENC_GET(mrb, str); -#endif //INCLUDE_ENCODING const char *p, *pend, *prev; char buf[CHAR_ESC_LEN + 1]; -#ifdef INCLUDE_ENCODING - mrb_value result = mrb_str_buf_new(mrb, 0); - mrb_encoding *resenc = mrb_default_internal_encoding(mrb); - int unicode_p = mrb_enc_unicode_p(enc); - int asciicompat = mrb_enc_asciicompat(mrb, enc); - - if (resenc == NULL) resenc = mrb_default_external_encoding(mrb); - if (!mrb_enc_asciicompat(mrb, resenc)) resenc = mrb_usascii_encoding(mrb); - mrb_enc_associate(mrb, result, resenc); - mrb_str_buf_cat(mrb, result, "\"", strlen("\"")); -#else mrb_value result = mrb_str_new_cstr(mrb, "\""); -#endif //INCLUDE_ENCODING p = RSTRING_PTR(str); pend = RSTRING_END(str); prev = p; @@ -4846,37 +2786,6 @@ mrb_str_inspect(mrb_state *mrb, mrb_value str) unsigned int c, cc; int n; -#ifdef INCLUDE_ENCODING - n = mrb_enc_precise_mbclen(p, pend, enc); - if (!MBCLEN_CHARFOUND_P(n)) { - if (p > prev) mrb_str_buf_cat(mrb, result, prev, p - prev); - n = mrb_enc_mbminlen(enc); - if (pend < p + n) - n = (int)(pend - p); - while (n--) { - snprintf(buf, CHAR_ESC_LEN, "\\x%02X", *p & 0377); - mrb_str_buf_cat(mrb, result, buf, strlen(buf)); - prev = ++p; - } - continue; - } - n = MBCLEN_CHARFOUND_LEN(n); - c = mrb_enc_mbc_to_codepoint(p, pend, enc); - p += n; - if (c == '"'|| c == '\\' || - (c == '#' && - p < pend && - MBCLEN_CHARFOUND_P(mrb_enc_precise_mbclen(p,pend,enc)) && - (cc = mrb_enc_codepoint(mrb, p, pend, enc), - (cc == '$' || cc == '@' || cc == '{')))) { - if (p - n > prev) mrb_str_buf_cat(mrb, result, prev, p - n - prev); - mrb_str_buf_cat(mrb, result, "\\", strlen("\\")); //str_buf_cat2(result, "\\"); - if (asciicompat || enc == resenc) { - prev = p - n; - continue; - } - } -#else c = *p++; n = 1; if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p, pend))) { @@ -4889,7 +2798,6 @@ mrb_str_inspect(mrb_state *mrb, mrb_value str) mrb_str_buf_cat(mrb, result, buf, 1); continue; } -#endif //INCLUDE_ENCODING switch (c) { case '\n': cc = 'n'; break; case '\r': cc = 'r'; break; @@ -4909,20 +2817,10 @@ mrb_str_inspect(mrb_state *mrb, mrb_value str) prev = p; continue; } -#ifdef INCLUDE_ENCODING - if ((enc == resenc && mrb_enc_isprint(c, enc)) || - (asciicompat && mrb_enc_isascii(c, enc) && ISPRINT(c))) { - continue; - } -#endif //INCLUDE_ENCODING else { if (p - n > prev) mrb_str_buf_cat(mrb, result, prev, p - n - prev); -#ifdef INCLUDE_ENCODING - mrb_str_buf_cat_escaped_char(mrb, result, c, unicode_p); -#else sprintf(buf, "\\%03o", c & 0377); mrb_str_buf_cat(mrb, result, buf, strlen(buf)); -#endif //INCLUDE_ENCODING prev = p; continue; } @@ -4935,21 +2833,6 @@ mrb_str_inspect(mrb_state *mrb, mrb_value str) return result; } -#ifdef INCLUDE_ENCODING -int -sym_printable(mrb_state *mrb, const char *s, const char *send, mrb_encoding *enc) -{ - while (s < send) { - int n; - int c = mrb_enc_codepoint_len(mrb, s, send, &n, enc); - - if (!mrb_enc_isprint(c, enc)) return FALSE; - s += n; - } - return TRUE; -} -#endif //INCLUDE_ENCODING - /* ---------------------------*/ void mrb_init_string(mrb_state *mrb) @@ -4978,7 +2861,7 @@ mrb_init_string(mrb_state *mrb) mrb_define_method(mrb, s, "downcase", mrb_str_downcase, ARGS_NONE()); /* 15.2.10.5.13 */ mrb_define_method(mrb, s, "downcase!", mrb_str_downcase_bang, ARGS_NONE()); /* 15.2.10.5.14 */ mrb_define_method(mrb, s, "each_line", mrb_str_each_line, ARGS_REQ(1)); /* 15.2.10.5.15 */ - mrb_define_method(mrb, s, "empty?", mrb_str_empty, ARGS_NONE()); /* 15.2.10.5.16 */ + mrb_define_method(mrb, s, "empty?", mrb_str_empty_p, ARGS_NONE()); /* 15.2.10.5.16 */ mrb_define_method(mrb, s, "eql?", mrb_str_eql, ARGS_REQ(1)); /* 15.2.10.5.17 */ #ifdef INCLUDE_REGEXP mrb_define_method(mrb, s, "gsub", mrb_str_gsub, ARGS_REQ(1)); /* 15.2.10.5.18 */ @@ -5013,9 +2896,5 @@ mrb_init_string(mrb_state *mrb) mrb_define_method(mrb, s, "to_sym", mrb_str_intern, ARGS_NONE()); /* 15.2.10.5.41 */ mrb_define_method(mrb, s, "upcase", mrb_str_upcase, ARGS_REQ(1)); /* 15.2.10.5.42 */ mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, ARGS_REQ(1)); /* 15.2.10.5.43 */ -#ifdef INCLUDE_ENCODING - mrb_define_method(mrb, s, "encoding", mrb_obj_encoding, ARGS_NONE()); /* 15.2.10.5.44(x) */ - mrb_define_method(mrb, s, "force_encoding", mrb_str_force_encoding, ARGS_REQ(1)); /* 15.2.10.5.45(x) */ -#endif mrb_define_method(mrb, s, "inspect", mrb_str_inspect, ARGS_NONE()); /* 15.2.10.5.46(x) */ } diff --git a/src/symbol.c b/src/symbol.c index b4ffc19e6..89e81af0e 100644 --- a/src/symbol.c +++ b/src/symbol.c @@ -149,13 +149,7 @@ mrb_sym_to_s(mrb_state *mrb, mrb_value sym) { mrb_sym id = SYM2ID(sym); -#ifdef INCLUDE_REGEXP - //return str_new3(mrb_cString, mrb_id2str(id)); - return str_new3(mrb, mrb_obj_class(mrb, sym), mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id))); -#else - return mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id)); //mrb_str_new2(mrb_id2name(SYM2ID(sym))); -#endif - + return mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id)); } /* 15.2.11.3.4 */ @@ -185,42 +179,113 @@ sym_to_sym(mrb_state *mrb, mrb_value sym) * :fred.inspect #=> ":fred" */ +#if __STDC__ +# define SIGN_EXTEND_CHAR(c) ((signed char)(c)) +#else /* not __STDC__ */ +/* As in Harbison and Steele. */ +# define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128) +#endif +#define is_identchar(c) (SIGN_EXTEND_CHAR(c)!=-1&&(ISALNUM(c) || (c) == '_')) + +static int +is_special_global_name(m) + const char *m; +{ + switch (*m) { + case '~': case '*': case '$': case '?': case '!': case '@': + case '/': case '\\': case ';': case ',': case '.': case '=': + case ':': case '<': case '>': case '\"': + case '&': case '`': case '\'': case '+': + case '0': + ++m; + break; + case '-': + ++m; + if (is_identchar(*m)) m += 1; + break; + default: + if (!ISDIGIT(*m)) return 0; + do ++m; while (ISDIGIT(*m)); + } + return !*m; +} + +static int +symname_p(const char *name) +{ + const char *m = name; + int localid = FALSE; + + if (!m) return FALSE; + switch (*m) { + case '\0': + return FALSE; + + case '$': + if (is_special_global_name(++m)) return TRUE; + goto id; + + case '@': + if (*++m == '@') ++m; + goto id; + + case '<': + switch (*++m) { + case '<': ++m; break; + case '=': if (*++m == '>') ++m; break; + default: break; + } + break; + + case '>': + switch (*++m) { + case '>': case '=': ++m; break; + } + break; + + case '=': + switch (*++m) { + case '~': ++m; break; + case '=': if (*++m == '=') ++m; break; + default: return FALSE; + } + break; + + case '*': + if (*++m == '*') ++m; + break; + + case '+': case '-': + if (*++m == '@') ++m; + break; + + case '|': case '^': case '&': case '/': case '%': case '~': case '`': + ++m; + break; + + case '[': + if (*++m != ']') return FALSE; + if (*++m == '=') ++m; + break; + + default: + localid = !ISUPPER(*m); +id: + if (*m != '_' && !ISALPHA(*m)) return FALSE; + while (is_identchar(*m)) m += 1; + if (localid) { + switch (*m) { + case '!': case '?': case '=': ++m; + } + } + break; + } + return *m ? FALSE : TRUE; +} + static mrb_value sym_inspect(mrb_state *mrb, mrb_value sym) { -#ifdef INCLUDE_ENCODING - #define STR_ENC_GET(mrb, str) mrb_enc_from_index(mrb, ENCODING_GET(mrb, str)) - mrb_value str; - mrb_sym id = SYM2ID(sym); - mrb_encoding *enc; - const char *ptr; - long len; - char *dest; - mrb_encoding *resenc = mrb_default_internal_encoding(mrb); - - if (resenc == NULL) resenc = mrb_default_external_encoding(mrb); - sym = mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id));//mrb_id2str(id); - enc = STR_ENC_GET(mrb, sym); - ptr = RSTRING_PTR(sym); - len = RSTRING_LEN(sym); - if ((resenc != enc && !mrb_str_is_ascii_only_p(mrb, sym)) || len != (long)strlen(ptr) || - !mrb_enc_symname_p(ptr, enc) || !sym_printable(mrb, ptr, ptr + len, enc)) { - str = mrb_str_inspect(mrb, sym); - len = RSTRING_LEN(str); - mrb_str_resize(mrb, str, len + 1); - dest = RSTRING_PTR(str); - memmove(dest + 1, dest, len); - dest[0] = ':'; - } - else { - char *dest; - str = mrb_enc_str_new(mrb, 0, len + 1, enc); - dest = RSTRING_PTR(str); - dest[0] = ':'; - memcpy(dest + 1, ptr, len); - } - return str; -#else mrb_value str; const char *name; mrb_sym id = SYM2ID(sym); @@ -229,12 +294,11 @@ sym_inspect(mrb_state *mrb, mrb_value sym) str = mrb_str_new(mrb, 0, strlen(name)+1); RSTRING(str)->buf[0] = ':'; strcpy(RSTRING(str)->buf+1, name); - if (!mrb_symname_p(name)) { + if (!symname_p(name)) { str = mrb_str_dump(mrb, str); strncpy(RSTRING(str)->buf, ":\"", 2); } return str; -#endif } |
