/* ** encoding.c - Encoding class ** ** See Copyright Notice in mruby.h */ #include "mruby.h" #ifdef INCLUDE_ENCODING #include #ifndef NO_LOCALE_CHARMAP #ifdef __CYGWIN__ #include #endif #ifdef HAVE_LANGINFO_H #include #endif #endif #define USE_UPPER_CASE_TABLE #include #include #include "regenc.h" #include "regint.h" #include "encoding.h" #include "st.h" #include #include "mruby/numeric.h" #include "mruby/string.h" #include "mruby/array.h" #include "mruby/variable.h" #include "mruby/hash.h" #define pprintf printf #define mrb_warning printf #define mrb_bug printf #ifndef INT_MAX #define INT_MAX 2147483647 #endif #define mrb_isascii(c) ((unsigned long)(c) < 128) #define OBJ_FREEZE(a) static mrb_sym id_encoding; //mrb_value mrb_cEncoding; static mrb_value mrb_encoding_list; struct mrb_encoding_entry { const char *name; mrb_encoding *enc; mrb_encoding *base; }; static struct { struct mrb_encoding_entry *list; int count; int size; st_table *names; } enc_table; void mrb_enc_init(mrb_state *mrb); enum { ENCINDEX_ASCII, ENCINDEX_UTF_8, ENCINDEX_US_ASCII, ENCINDEX_BUILTIN_MAX }; #define ENCODING_COUNT ENCINDEX_BUILTIN_MAX #define ENCODING_NAMELEN_MAX 63 #define valid_encoding_name_p(name) ((name) && strlen(name) <= ENCODING_NAMELEN_MAX) #define STRCASECMP(s1, s2) (st_strcasecmp(s1, s2)) //#define BUILTIN_TYPE(x) (int)(((struct RBasic*)(x))->flags & T_MASK) #ifndef FALSE #define FALSE 0 #endif #ifndef TRUE #define TRUE 1 #endif #ifndef OTHER #define OTHER 2 #endif #define mrb_usascii_str_new2 mrb_usascii_str_new_cstr static size_t enc_memsize(mrb_state *mrb, const void *p) { return 0; } static const struct mrb_data_type encoding_data_type = { "encoding", 0, }; #define is_data_encoding(obj) (DATA_TYPE(obj) == &encoding_data_type) // RUBY_IMMEDIATE_MASK = 0x03, //#define IMMEDIATE_MASK RUBY_IMMEDIATE_MASK //#define IMMEDIATE_P(x) ((VALUE)(x) & IMMEDIATE_MASK) //#define SPECIAL_CONST_P(x) (IMMEDIATE_P(x) || !RTEST(x)) static mrb_value enc_new(mrb_state *mrb, mrb_encoding *encoding) { return mrb_obj_value(Data_Wrap_Struct(mrb, mrb->encode_class, &encoding_data_type, encoding)); } #define enc_autoload_p(enc) (!mrb_enc_mbmaxlen(enc)) #define UNSPECIFIED_ENCODING INT_MAX static mrb_value mrb_enc_from_encoding_index(mrb_state *mrb, int idx) { mrb_value list, enc; if (mrb_nil_p(list = mrb_encoding_list)) { mrb_bug("mrb_enc_from_encoding_index(%d): no mrb_encoding_list", idx); } enc = mrb_ary_ref(mrb, list, idx);//mrb_ary_entry(list, idx); if (mrb_nil_p(enc)) { mrb_bug("mrb_enc_from_encoding_index(%d): not created yet", idx); } return enc; } mrb_value mrb_enc_from_encoding(mrb_state *mrb, mrb_encoding *encoding) { int idx; if (!encoding) return mrb_nil_value(); idx = ENC_TO_ENCINDEX(encoding); return mrb_enc_from_encoding_index(mrb, idx); } static int enc_autoload(mrb_state *mrb, mrb_encoding *enc); static int check_encoding(mrb_state *mrb, mrb_encoding *enc) { int index = mrb_enc_to_index(enc); if (mrb_enc_from_index(mrb, index) != enc) return -1; if (enc_autoload_p(enc)) { index = enc_autoload(mrb, enc); } return index; } static int enc_check_encoding(mrb_state *mrb, mrb_value obj) { if (SPECIAL_CONST_P(obj) || !is_data_encoding(obj)) { return -1; } return check_encoding(mrb, RDATA(obj)->data); } static int must_encoding(mrb_state *mrb, mrb_value enc) { int index = enc_check_encoding(mrb, enc); if (index < 0) { mrb_raise(mrb, E_TYPE_ERROR, "wrong argument type %s (expected Encoding)", mrb_obj_classname(mrb, enc)); } return index; } int mrb_to_encoding_index(mrb_state *mrb, mrb_value enc) { int idx; idx = enc_check_encoding(mrb, enc); if (idx >= 0) { return idx; } else if (mrb_nil_p(enc = mrb_check_string_type(mrb, enc))) { return -1; } if (!mrb_enc_asciicompat(mrb, mrb_enc_get(mrb, enc))) { return -1; } //return mrb_enc_find_index(StringValueCStr(enc)); return mrb_enc_find_index(mrb, mrb_string_value_cstr(mrb, &enc)); } static mrb_encoding * to_encoding(mrb_state *mrb, mrb_value enc) { int idx; //StringValue(enc); mrb_string_value(mrb, &enc); if (!mrb_enc_asciicompat(mrb, mrb_enc_get(mrb, enc))) { mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid name encoding (non ASCII)"); } //idx = mrb_enc_find_index(StringValueCStr(enc)); idx = mrb_enc_find_index(mrb, mrb_string_value_cstr(mrb, &enc)); if (idx < 0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "unknown encoding name - %s", RSTRING_PTR(enc)); } return mrb_enc_from_index(mrb, idx); } mrb_encoding * mrb_to_encoding(mrb_state *mrb, mrb_value enc) { if (enc_check_encoding(mrb, enc) >= 0) return RDATA(enc)->data; return to_encoding(mrb, enc); } static int enc_table_expand(int newsize) { struct mrb_encoding_entry *ent; int count = newsize; if (enc_table.size >= newsize) return newsize; newsize = (newsize + 7) / 8 * 8; ent = realloc(enc_table.list, sizeof(*enc_table.list) * newsize); if (!ent) return -1; memset(ent + enc_table.size, 0, sizeof(*ent)*(newsize - enc_table.size)); enc_table.list = ent; enc_table.size = newsize; return count; } static int enc_register_at(mrb_state *mrb, int index, const char *name, mrb_encoding *encoding) { struct mrb_encoding_entry *ent = &enc_table.list[index]; mrb_value list; mrb_value ref_ary; if (!valid_encoding_name_p(name)) return -1; if (!ent->name) { ent->name = name = strdup(name); } else if (STRCASECMP(name, ent->name)) { return -1; } if (!ent->enc) { ent->enc = xmalloc(sizeof(mrb_encoding)); } if (encoding) { *ent->enc = *encoding; } else { memset(ent->enc, 0, sizeof(*ent->enc)); } encoding = ent->enc; encoding->name = name; encoding->ruby_encoding_index = index; st_insert(enc_table.names, (st_data_t)name, (st_data_t)index); list = mrb_encoding_list; //if (list && mrb_nil_p((mrb_ary_ref(mrb, list, index)))) { if (list.tt) { ref_ary = mrb_ary_ref(mrb, list, index); if mrb_nil_p(ref_ary) { /* initialize encoding data */ mrb_ary_set(mrb, list, index, enc_new(mrb, encoding));//rb_ary_store(list, index, enc_new(encoding)); } } return index; } static int enc_register(mrb_state *mrb, const char *name, mrb_encoding *encoding) { int index = enc_table.count; if ((index = enc_table_expand(index + 1)) < 0) return -1; enc_table.count = index; return enc_register_at(mrb, index - 1, name, encoding); } static void set_encoding_const(mrb_state *mrb, const char *, mrb_encoding *); int mrb_enc_registered(const char *name); static void enc_check_duplication(mrb_state *mrb, const char *name) { if (mrb_enc_registered(name) >= 0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "encoding %s is already registered", name); } } static mrb_encoding* set_base_encoding(int index, mrb_encoding *base) { mrb_encoding *enc = enc_table.list[index].enc; enc_table.list[index].base = base; if (mrb_enc_dummy_p(base)) ENC_SET_DUMMY(enc); return enc; } int mrb_enc_replicate(mrb_state *mrb, const char *name, mrb_encoding *encoding) { int idx; enc_check_duplication(mrb, name); idx = enc_register(mrb, name, encoding); set_base_encoding(idx, encoding); set_encoding_const(mrb, name, mrb_enc_from_index(mrb, idx)); return idx; } /* 15.2.40.2.17 */ /* * call-seq: * enc.replicate(name) -> encoding * * Returns a replicated encoding of _enc_ whose name is _name_. * The new encoding should have the same byte structure of _enc_. * If _name_ is used by another encoding, raise ArgumentError. * */ static mrb_value enc_replicate(mrb_state *mrb, mrb_value encoding) { mrb_value name; mrb_get_args(mrb, "o", &name); return mrb_enc_from_encoding_index(mrb, //mrb_enc_replicate(mrb, StringValueCStr(name), mrb_enc_replicate(mrb, mrb_string_value_cstr(mrb, &name), mrb_to_encoding(mrb, encoding))); } static int enc_replicate_with_index(mrb_state *mrb, const char *name, mrb_encoding *origenc, int idx) { if (idx < 0) { idx = enc_register(mrb, name, origenc); } else { idx = enc_register_at(mrb, idx, name, origenc); } if (idx >= 0) { set_base_encoding(idx, origenc); set_encoding_const(mrb, name, mrb_enc_from_index(mrb, idx)); } return idx; } int mrb_encdb_replicate(mrb_state *mrb, const char *name, const char *orig) { int origidx = mrb_enc_registered(orig); int idx = mrb_enc_registered(name); if (origidx < 0) { origidx = enc_register(mrb, orig, 0); } return enc_replicate_with_index(mrb, name, mrb_enc_from_index(mrb, origidx), idx); } int mrb_define_dummy_encoding(mrb_state *mrb, const char *name) { int index = mrb_enc_replicate(mrb, name, mrb_ascii8bit_encoding(mrb)); mrb_encoding *enc = enc_table.list[index].enc; ENC_SET_DUMMY(enc); return index; } int mrb_encdb_dummy(mrb_state *mrb, const char *name) { int index = enc_replicate_with_index(mrb, name, mrb_ascii8bit_encoding(mrb), mrb_enc_registered(name)); mrb_encoding *enc = enc_table.list[index].enc; ENC_SET_DUMMY(enc); return index; } /* 15.2.40.2.13 */ /* * call-seq: * enc.dummy? -> true or false * * Returns true for dummy encodings. * A dummy encoding is an encoding for which character handling is not properly * implemented. * It is used for stateful encodings. * * Encoding::ISO_2022_JP.dummy? #=> true * Encoding::UTF_8.dummy? #=> false * */ static mrb_value enc_dummy_p(mrb_state *mrb, mrb_value enc) { return ENC_DUMMY_P(enc_table.list[must_encoding(mrb, enc)].enc) ? mrb_true_value() : mrb_false_value(); } /* 15.2.40.2.12 */ /* * call-seq: * enc.ascii_compatible? -> true or false * * Returns whether ASCII-compatible or not. * * Encoding::UTF_8.ascii_compatible? #=> true * Encoding::UTF_16BE.ascii_compatible? #=> false * */ static mrb_value enc_ascii_compatible_p(mrb_state *mrb, mrb_value enc) { return mrb_enc_asciicompat(mrb, enc_table.list[must_encoding(mrb, enc)].enc) ? mrb_true_value() : mrb_false_value(); } static const char * enc_alias_internal(const char *alias, int idx) { alias = strdup(alias); st_insert(enc_table.names, (st_data_t)alias, (st_data_t)idx); return alias; } /* * Returns 1 when the encoding is Unicode series other than UTF-7 else 0. */ int mrb_enc_unicode_p(mrb_encoding *enc) { const char *name = mrb_enc_name(enc); return name[0] == 'U' && name[1] == 'T' && name[2] == 'F' && name[4] != '7'; } extern mrb_encoding OnigEncodingUTF_8; extern mrb_encoding OnigEncodingUS_ASCII; void mrb_enc_init(mrb_state *mrb) { enc_table_expand(ENCODING_COUNT + 1); if (!enc_table.names) { enc_table.names = st_init_strcasetable(); } #define ENC_REGISTER(enc) enc_register_at(mrb, ENCINDEX_##enc, mrb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc) ENC_REGISTER(ASCII); ENC_REGISTER(UTF_8); ENC_REGISTER(US_ASCII); #undef ENC_REGISTER enc_table.count = ENCINDEX_BUILTIN_MAX; } mrb_encoding * mrb_enc_from_index(mrb_state *mrb, int index) { if (!enc_table.list) { mrb_enc_init(mrb); } if (index < 0 || enc_table.count <= index) { return 0; } return enc_table.list[index].enc; } int mrb_enc_registered(const char *name) { st_data_t idx = 0; if (!name) return -1; if (!enc_table.list) return -1; if (st_lookup(enc_table.names, (st_data_t)name, &idx)) { return (int)idx; } return -1; } mrb_value mrb_require_safe(mrb_value fname, int safe) { mrb_value result = mrb_nil_value(); return result; } static int load_encoding(const char *name) { mrb_value enclib;// = mrb_sprintf("enc/%s.so", name); //mrb_value verbose;// = ruby_verbose; //mrb_value debug;// = ruby_debug; //mrb_value loaded; char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib) - 3; int idx; while (s < e) { if (!ISALNUM(*s)) *s = '_'; else if (ISUPPER(*s)) *s = TOLOWER(*s); ++s; } OBJ_FREEZE(enclib); //ruby_verbose = mrb_false_value(); //ruby_debug = mrb_false_value(); //loaded = mrb_protect(require_enc, enclib, 0); //ruby_verbose = verbose; //ruby_debug = debug; //rb_set_errinfo(mrb_nil_value()); //if (mrb_nil_p(loaded)) return -1; if ((idx = mrb_enc_registered(name)) < 0) return -1; if (enc_autoload_p(enc_table.list[idx].enc)) return -1; return idx; } static int enc_autoload(mrb_state *mrb, mrb_encoding *enc) { int i; mrb_encoding *base = enc_table.list[ENC_TO_ENCINDEX(enc)].base; if (base) { i = 0; do { if (i >= enc_table.count) return -1; } while (enc_table.list[i].enc != base && (++i, 1)); if (enc_autoload_p(base)) { if (enc_autoload(mrb, base) < 0) return -1; } i = ENC_TO_ENCINDEX(enc); enc_register_at(mrb, i, mrb_enc_name(enc), base); } else { i = load_encoding(mrb_enc_name(enc)); } return i; } int mrb_enc_find_index(mrb_state *mrb, const char *name) { int i = mrb_enc_registered(name); mrb_encoding *enc; if (i < 0) { i = load_encoding(name); } else if (!(enc = mrb_enc_from_index(mrb, i))) { if (i != UNSPECIFIED_ENCODING) { mrb_raise(mrb, E_ARGUMENT_ERROR, "encoding %s is not registered", name); } } else if (enc_autoload_p(enc)) { if (enc_autoload(mrb, enc) < 0) { //mrb_warn("failed to load encoding (%s); use ASCII-8BIT instead", printf("failed to load encoding (%s); use ASCII-8BIT instead", name); return 0; } } return i; } mrb_encoding * mrb_enc_find(mrb_state *mrb, const char *name) { int idx = mrb_enc_find_index(mrb, name); if (idx < 0) idx = 0; return mrb_enc_from_index(mrb, idx); } static inline int enc_capable(mrb_value obj) { if (SPECIAL_CONST_P(obj)) return (mrb_type(obj) == MRB_TT_SYMBOL); switch (mrb_type(obj)/*BUILTIN_TYPE(obj)*/) { case MRB_TT_STRING: case MRB_TT_REGEX: case MRB_TT_FILE: return TRUE; case MRB_TT_DATA: if (is_data_encoding(obj)) return TRUE; default: return FALSE; } } mrb_sym mrb_id_encoding(mrb_state *mrb) { //CONST_ID(id_encoding, "encoding"); id_encoding = mrb_intern(mrb, "encoding"); return id_encoding; } int mrb_enc_get_index(mrb_state *mrb, mrb_value obj) { int i = -1; mrb_value tmp; struct RString *ps; if (SPECIAL_CONST_P(obj)) { if (mrb_type(obj) != MRB_TT_SYMBOL) return -1; //obj = mrb_id2str(SYM2ID(obj)); obj = mrb_str_new_cstr(mrb, mrb_sym2name(mrb, SYM2ID(obj))); } switch (mrb_type(obj)/*BUILTIN_TYPE(obj)*/) { as_default: default: case MRB_TT_STRING: case MRB_TT_REGEX: i = (int)ENCODING_GET_INLINED(obj); ps = mrb_str_ptr(obj); if (i == ENCODING_INLINE_MAX) { mrb_value iv; //iv = rb_ivar_get(obj, mrb_id_encoding(mrb)); iv = mrb_iv_get(mrb, obj, mrb_id_encoding(mrb)); i = mrb_fixnum(iv); } break; case MRB_TT_FILE: tmp = mrb_funcall(mrb, obj, "internal_encoding", 0, 0); if (mrb_nil_p(tmp)) obj = mrb_funcall(mrb, obj, "external_encoding", 0, 0); else obj = tmp; if (mrb_nil_p(obj)) break; case MRB_TT_DATA: if (is_data_encoding(obj)) { i = enc_check_encoding(mrb, obj); } else { goto as_default; } break; } return i; } void mrb_enc_set_index(mrb_state *mrb, mrb_value obj, int idx) { if (idx < ENCODING_INLINE_MAX) { ENCODING_SET_INLINED(obj, idx); return; } ENCODING_SET_INLINED(obj, ENCODING_INLINE_MAX); //mrb_ivar_set(obj, mrb_id_encoding(mrb), INT2NUM(idx)); mrb_iv_set(mrb, obj, mrb_id_encoding(mrb), mrb_fixnum_value(idx)); return; } mrb_value mrb_enc_associate_index(mrb_state *mrb, mrb_value obj, int idx) { /* enc_check_capable(obj);*/ if (mrb_enc_get_index(mrb, obj) == idx) return obj; if (SPECIAL_CONST_P(obj)) { mrb_raise(mrb, E_ARGUMENT_ERROR, "cannot set encoding"); } if (!ENC_CODERANGE_ASCIIONLY(obj) || !mrb_enc_asciicompat(mrb, mrb_enc_from_index(mrb, idx))) { ENC_CODERANGE_CLEAR(obj); } mrb_enc_set_index(mrb, obj, idx); return obj; } mrb_value mrb_enc_associate(mrb_state *mrb, mrb_value obj, mrb_encoding *enc) { return mrb_enc_associate_index(mrb, obj, mrb_enc_to_index(enc)); } mrb_encoding* mrb_enc_get(mrb_state *mrb, mrb_value obj) { return mrb_enc_from_index(mrb, mrb_enc_get_index(mrb, obj)); } mrb_encoding* mrb_enc_check(mrb_state *mrb, mrb_value str1, mrb_value str2) { mrb_encoding *enc = mrb_enc_compatible(mrb, str1, str2); if (!enc) mrb_raise(mrb, E_ENCODING_ERROR, "incompatible character encodings: %s and %s", mrb_enc_name(mrb_enc_get(mrb, str1)), mrb_enc_name(mrb_enc_get(mrb, str2))); return enc; } mrb_encoding* mrb_enc_compatible(mrb_state *mrb, mrb_value str1, mrb_value str2) { int idx1, idx2; mrb_encoding *enc1, *enc2; idx1 = mrb_enc_get_index(mrb, str1); idx2 = mrb_enc_get_index(mrb, str2); if (idx1 < 0 || idx2 < 0) return 0; if (idx1 == idx2) { return mrb_enc_from_index(mrb, idx1); } enc1 = mrb_enc_from_index(mrb, idx1); enc2 = mrb_enc_from_index(mrb, idx2); if (mrb_type(str2) == MRB_TT_STRING && RSTRING_LEN(str2) == 0) //return (idx1 == ENCINDEX_US_ASCII && mrb_enc_asciicompat(mrb, enc2)) ? enc2 : enc1; return enc1; if (mrb_type(str1) == MRB_TT_STRING && RSTRING_LEN(str1) == 0) //return (idx2 == ENCINDEX_US_ASCII && mrb_enc_asciicompat(mrb, enc1)) ? enc1 : enc2; return enc2; if (!mrb_enc_asciicompat(mrb, enc1) || !mrb_enc_asciicompat(mrb, enc2)) { return 0; } /* objects whose encoding is the same of contents */ //if (mrb_type(str2)/*BUILTIN_TYPE(str2)*/ != MRB_TT_STRING && idx2 == ENCINDEX_US_ASCII) //return enc1; //if (mrb_type(str1)/*BUILTIN_TYPE(str1)*/ != MRB_TT_STRING && idx1 == ENCINDEX_US_ASCII) //return enc2; if (mrb_type(str1)/*BUILTIN_TYPE(str1)*/ != MRB_TT_STRING) { mrb_value tmp = str1; int idx0 = idx1; str1 = str2; str2 = tmp; idx1 = idx2; idx2 = idx0; } if (mrb_type(str1)/*BUILTIN_TYPE(str1)*/ == MRB_TT_STRING) { int cr1, cr2; cr1 = mrb_enc_str_coderange(mrb, str1); if (mrb_type(str2)/*BUILTIN_TYPE(str2)*/ == MRB_TT_STRING) { cr2 = mrb_enc_str_coderange(mrb, str2); if (cr1 != cr2) { /* may need to handle ENC_CODERANGE_BROKEN */ if (cr1 == ENC_CODERANGE_7BIT) return enc2; if (cr2 == ENC_CODERANGE_7BIT) return enc1; } if (cr2 == ENC_CODERANGE_7BIT) { if (idx1 == ENCINDEX_ASCII) return enc2; return enc1; } } if (cr1 == ENC_CODERANGE_7BIT) return enc2; } return 0; } void mrb_enc_copy(mrb_state *mrb, mrb_value obj1, mrb_value obj2) { mrb_enc_associate_index(mrb, obj1, mrb_enc_get_index(mrb, obj2)); } /* * call-seq: * obj.encoding -> encoding * * Returns the Encoding object that represents the encoding of obj. */ mrb_value mrb_obj_encoding(mrb_state *mrb, mrb_value obj) { mrb_encoding *enc = mrb_enc_get(mrb, obj); if (!enc) { mrb_raise(mrb, E_TYPE_ERROR, "unknown encoding"); } return mrb_enc_from_encoding(mrb, enc); } int mrb_enc_fast_mbclen(const char *p, const char *e, mrb_encoding *enc) { return ONIGENC_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); } int mrb_enc_mbclen(const char *p, const char *e, mrb_encoding *enc) { int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p) return MBCLEN_CHARFOUND_LEN(n); else { int min = mrb_enc_mbminlen(enc); return min <= e-p ? min : (int)(e-p); } } int mrb_enc_precise_mbclen(const char *p, const char *e, mrb_encoding *enc) { int n; if (e <= p) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1); n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); if (e-p < n) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n-(int)(e-p)); return n; } int mrb_enc_ascget(mrb_state *mrb, const char *p, const char *e, int *len, mrb_encoding *enc) { unsigned int c, l; if (e <= p) return -1; if (mrb_enc_asciicompat(mrb, enc)) { c = (unsigned char)*p; if (!ISASCII(c)) return -1; if (len) *len = 1; return c; } l = mrb_enc_precise_mbclen(p, e, enc); if (!MBCLEN_CHARFOUND_P(l)) return -1; c = mrb_enc_mbc_to_codepoint(p, e, enc); if (!mrb_enc_isascii(c, enc)) return -1; if (len) *len = l; return c; } unsigned int mrb_enc_codepoint_len(mrb_state *mrb, const char *p, const char *e, int *len_p, mrb_encoding *enc) { int r; if (e <= p) mrb_raise(mrb, E_ARGUMENT_ERROR, "empty string"); r = mrb_enc_precise_mbclen(p, e, enc); if (MBCLEN_CHARFOUND_P(r)) { if (len_p) *len_p = MBCLEN_CHARFOUND_LEN(r); return mrb_enc_mbc_to_codepoint(p, e, enc); } else mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid byte sequence in %s", mrb_enc_name(enc)); return 0; } #undef mrb_enc_codepoint unsigned int mrb_enc_codepoint(mrb_state *mrb, const char *p, const char *e, mrb_encoding *enc) { return mrb_enc_codepoint_len(mrb, p, e, 0, enc); } int mrb_enc_codelen(mrb_state *mrb, int c, mrb_encoding *enc) { int n = ONIGENC_CODE_TO_MBCLEN(enc,c); if (n == 0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid codepoint 0x%x in %s", c, mrb_enc_name(enc)); } return n; } int mrb_enc_toupper(int c, mrb_encoding *enc) { return (ONIGENC_IS_ASCII_CODE(c)?ONIGENC_ASCII_CODE_TO_UPPER_CASE(c):(c)); } int mrb_enc_tolower(int c, mrb_encoding *enc) { return (ONIGENC_IS_ASCII_CODE(c)?ONIGENC_ASCII_CODE_TO_LOWER_CASE(c):(c)); } /* 15.2.40.2.14 */ /* * call-seq: * enc.inspect -> string * * Returns a string which represents the encoding for programmers. * * Encoding::UTF_8.inspect #=> "#" * Encoding::ISO_2022_JP.inspect #=> "#" */ static mrb_value enc_inspect(mrb_state *mrb, mrb_value self) { mrb_value str; //mrb_value str = mrb_sprintf("#<%s:%s%s>", mrb_obj_classname(mrb, self), // mrb_enc_name((mrb_encoding*)(DATA_PTR(self))), // (mrb_fixnum(enc_dummy_p(mrb, self)) ? " (dummy)" : "")); char buf[256]; sprintf(buf, "#<%s:%s%s>", mrb_obj_classname(mrb, self), mrb_enc_name((mrb_encoding*)(DATA_PTR(self))), (mrb_enc_dummy_p((mrb_encoding*)(DATA_PTR(self))) ? " (dummy)" : "")); str = mrb_str_new(mrb, buf, strlen(buf)); ENCODING_CODERANGE_SET(mrb, str, mrb_usascii_encindex(), ENC_CODERANGE_7BIT); return str; } /* 15.2.40.2.15 */ /* 15.2.40.2.18 */ /* * call-seq: * enc.name -> string * * Returns the name of the encoding. * * Encoding::UTF_8.name #=> "UTF-8" */ static mrb_value enc_name(mrb_state *mrb, mrb_value self) { return mrb_usascii_str_new2(mrb, mrb_enc_name((mrb_encoding*)DATA_PTR(self))); } struct fn_arg { mrb_state *mrb; int (*func)(ANYARGS); void *a; }; static int fn_i(st_data_t key, st_data_t val, st_data_t arg) { struct fn_arg *a = (struct fn_arg*)arg; return (*a->func)(a->mrb, key, val, a->a); } static int st_foreachNew(mrb_state *mrb, st_table *tbl, int (*func)(ANYARGS), void *a) { struct fn_arg arg = { mrb, func, a, }; return st_foreach(tbl, fn_i, (st_data_t)&arg); } static int enc_names_i(mrb_state *mrb, st_data_t name, st_data_t idx, st_data_t args) { mrb_value *arg = (mrb_value *)args; int iargs = mrb_fixnum(arg[0]); //if ((int)idx == (int)arg[0]) { if ((int)idx == iargs) { mrb_value str = mrb_usascii_str_new2(mrb, (char *)name); //OBJ_FREEZE(str); mrb_ary_push(mrb, arg[1], str); } return ST_CONTINUE; } /* 15.2.40.2.16 */ /* * call-seq: * enc.names -> array * * Returns the list of name and aliases of the encoding. * * Encoding::WINDOWS_31J.names #=> ["Windows-31J", "CP932", "csWindows31J"] */ static mrb_value enc_names(mrb_state *mrb, mrb_value self) { mrb_value args[2]; args[0] = mrb_fixnum_value(mrb_to_encoding_index(mrb, self)); args[1] = mrb_ary_new_capa(mrb, 0);//mrb_ary_new2(0); st_foreachNew(mrb, enc_table.names, enc_names_i, args); return args[1]; } /* 15.2.40.2.8 */ /* * call-seq: * Encoding.list -> [enc1, enc2, ...] * * Returns the list of loaded encodings. * * Encoding.list * #=> [#, #, * #] * * Encoding.find("US-ASCII") * #=> # * * Encoding.list * #=> [#, #, * #, #] * */ static mrb_value enc_list(mrb_state *mrb, mrb_value klass) { struct RArray *ar = (struct RArray *)mrb_encoding_list.value.p; mrb_value ary = mrb_ary_new_capa(mrb, 0);//mrb_ary_new2(0); //mrb_ary_replace_m(mrb, ary/*, mmrb_encoding_list*/); mrb_ary_replace(mrb, mrb_ary_ptr(ary), ar->buf, enc_table.count); return ary; } /* 15.2.40.2.7 */ /* * call-seq: * Encoding.find(string) -> enc * Encoding.find(symbol) -> enc * * Search the encoding with specified name. * name should be a string or symbol. * * Encoding.find("US-ASCII") #=> # * Encoding.find(:Shift_JIS) #=> # * * Names which this method accept are encoding names and aliases * including following special aliases * * "external":: default external encoding * "internal":: default internal encoding * "locale":: locale encoding * "filesystem":: filesystem encoding * * An ArgumentError is raised when no encoding with name. * Only Encoding.find("internal") however returns nil * when no encoding named "internal", in other words, when Ruby has no * default internal encoding. */ static mrb_value enc_find(mrb_state *mrb, mrb_value klass) { mrb_value enc; mrb_get_args(mrb, "o", &enc); return mrb_enc_from_encoding(mrb, to_encoding(mrb, enc)); } /* 15.2.40.2.2 */ /* * call-seq: * Encoding.compatible?(str1, str2) -> enc or nil * * Checks the compatibility of two strings. * If they are compatible, means concatenatable, * returns an encoding which the concatenated string will be. * If they are not compatible, nil is returned. * * Encoding.compatible?("\xa1".force_encoding("iso-8859-1"), "b") * #=> # * * Encoding.compatible?( * "\xa1".force_encoding("iso-8859-1"), * "\xa1\xa1".force_encoding("euc-jp")) * #=> nil * */ static mrb_value enc_compatible_p(mrb_state *mrb, mrb_value klass) { mrb_value str1; mrb_value str2; mrb_encoding *enc; mrb_get_args(mrb, "oo", &str1, &str2); if (!enc_capable(str1)) return mrb_nil_value(); if (!enc_capable(str2)) return mrb_nil_value(); enc = mrb_enc_compatible(mrb, str1, str2); if (!enc) return mrb_nil_value(); return mrb_enc_from_encoding(mrb, enc); } /* 15.2.40.2.19 */ /* :nodoc: */ static mrb_value enc_dump(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value self) { //mrb_scan_args(argc, argv, "01", 0); return enc_name(mrb, self); } /* 15.2.40.2.11 */ /* :nodoc: */ static mrb_value enc_load(mrb_state *mrb, mrb_value klass) { mrb_value str; mrb_get_args(mrb, "o", &str); return enc_find(mrb, str); } mrb_encoding * mrb_ascii8bit_encoding(mrb_state *mrb) { if (!enc_table.list) { mrb_enc_init(mrb); } return enc_table.list[ENCINDEX_ASCII].enc; } int mrb_ascii8bit_encindex(void) { return ENCINDEX_ASCII; } mrb_encoding * mrb_utf8_encoding(mrb_state *mrb) { if (!enc_table.list) { mrb_enc_init(mrb); } return enc_table.list[ENCINDEX_UTF_8].enc; } int mrb_utf8_encindex(void) { return ENCINDEX_UTF_8; } mrb_encoding * mrb_usascii_encoding(mrb_state *mrb) { if (!enc_table.list) { mrb_enc_init(mrb); } return enc_table.list[ENCINDEX_US_ASCII].enc; } int mrb_usascii_encindex(void) { return ENCINDEX_US_ASCII; } int mrb_locale_encindex(mrb_state *mrb) { mrb_value charmap = mrb_locale_charmap(mrb, mrb_obj_value(mrb->encode_class)); int idx; if (mrb_nil_p(charmap)) idx = mrb_usascii_encindex(); //else if ((idx = mrb_enc_find_index(StringValueCStr(charmap))) < 0) else if ((idx = mrb_enc_find_index(mrb, mrb_string_value_cstr(mrb, &charmap))) < 0) idx = mrb_ascii8bit_encindex(); if (mrb_enc_registered("locale") < 0) enc_alias_internal("locale", idx); return idx; } mrb_encoding * mrb_locale_encoding(mrb_state *mrb) { return mrb_enc_from_index(mrb, mrb_locale_encindex(mrb)); } static int enc_set_filesystem_encoding(mrb_state *mrb) { int idx; #if defined NO_LOCALE_CHARMAP idx = mrb_enc_to_index(mrb_default_external_encoding(mrb)); #elif defined _WIN32 || defined __CYGWIN__ char cp[sizeof(int) * 8 / 3 + 4]; //snprintf(cp, sizeof cp, "CP%d", AreFileApisANSI() ? GetACP() : GetOEMCP()); idx = mrb_enc_find_index(mrb, cp); if (idx < 0) idx = mrb_ascii8bit_encindex(); #else idx = mrb_enc_to_index(mrb_default_external_encoding(mrb)); #endif enc_alias_internal("filesystem", idx); return idx; } int mrb_filesystem_encindex(void) { int idx = mrb_enc_registered("filesystem"); if (idx < 0) idx = mrb_ascii8bit_encindex(); return idx; } mrb_encoding * mrb_filesystem_encoding(mrb_state *mrb) { return mrb_enc_from_index(mrb, mrb_filesystem_encindex()); } struct default_encoding { int index; /* -2 => not yet set, -1 => nil */ mrb_encoding *enc; }; static struct default_encoding default_external = {0}; static int enc_set_default_encoding(mrb_state *mrb, struct default_encoding *def, mrb_value encoding, const char *name) { int overridden = FALSE; if (def->index != -2) /* Already set */ overridden = TRUE; if (mrb_nil_p(encoding)) { def->index = -1; def->enc = 0; st_insert(enc_table.names, (st_data_t)strdup(name), (st_data_t)UNSPECIFIED_ENCODING); } else { def->index = mrb_enc_to_index(mrb_to_encoding(mrb, encoding)); def->enc = 0; enc_alias_internal(name, def->index); } if (def == &default_external) enc_set_filesystem_encoding(mrb); return overridden; } mrb_encoding * mrb_default_external_encoding(mrb_state *mrb) { if (default_external.enc) return default_external.enc; if (default_external.index >= 0) { default_external.enc = mrb_enc_from_index(mrb, default_external.index); return default_external.enc; } else { return mrb_locale_encoding(mrb); } } mrb_value mrb_enc_default_external(mrb_state *mrb) { return mrb_enc_from_encoding(mrb, mrb_default_external_encoding(mrb)); } /* 15.2.40.2.3 */ /* * call-seq: * Encoding.default_external -> enc * * Returns default external encoding. * * It is initialized by the locale or -E option. */ static mrb_value get_default_external(mrb_state *mrb, mrb_value klass) { return mrb_enc_default_external(mrb); } void mrb_enc_set_default_external(mrb_state *mrb, mrb_value encoding) { if (mrb_nil_p(encoding)) { mrb_raise(mrb, E_ARGUMENT_ERROR, "default external can not be nil"); } enc_set_default_encoding(mrb, &default_external, encoding, "external"); } /* 15.2.40.2.4 */ /* * call-seq: * Encoding.default_external = enc * * Sets default external encoding. */ static mrb_value set_default_external(mrb_state *mrb, mrb_value klass) { mrb_value encoding; mrb_get_args(mrb, "o", &encoding); mrb_warning("setting Encoding.default_external"); mrb_enc_set_default_external(mrb, encoding); return encoding; } static struct default_encoding default_internal = {-2}; mrb_encoding * mrb_default_internal_encoding(mrb_state *mrb) { if (!default_internal.enc && default_internal.index >= 0) { default_internal.enc = mrb_enc_from_index(mrb, default_internal.index); } return default_internal.enc; /* can be NULL */ } mrb_value mrb_enc_default_internal(mrb_state *mrb) { /* Note: These functions cope with default_internal not being set */ return mrb_enc_from_encoding(mrb, mrb_default_internal_encoding(mrb)); } /* 15.2.40.2.5 */ /* * call-seq: * Encoding.default_internal -> enc * * Returns default internal encoding. * * It is initialized by the source internal_encoding or -E option. */ static mrb_value get_default_internal(mrb_state *mrb, mrb_value klass) { return mrb_enc_default_internal(mrb); } void mrb_enc_set_default_internal(mrb_state *mrb, mrb_value encoding) { enc_set_default_encoding(mrb, &default_internal, encoding, "internal"); } /* 15.2.40.2.6 */ /* * call-seq: * Encoding.default_internal = enc or nil * * Sets default internal encoding. * Or removes default internal encoding when passed nil. */ static mrb_value set_default_internal(mrb_state *mrb, mrb_value klass) { mrb_value encoding; mrb_get_args(mrb, "o", &encoding); mrb_warning("setting Encoding.default_internal"); mrb_enc_set_default_internal(mrb, encoding); return encoding; } #define digit(x) ((x) >= '0' && (x) <= '9') #define strstart(s, n) (strncasecmp(s, n, strlen(n)) == 0) #define C_CODESET "US-ASCII" /* Return this as the encoding of the * C/POSIX locale. Could as well one day * become "UTF-8". */ #if defined _WIN32 || defined __CYGWIN__ #define JA_CODESET "Windows-31J" #else #define JA_CODESET "EUC-JP" #endif static char buf[16]; const char * nl_langinfo_codeset(void) { const char *l, *p; int n; if (((l = getenv("LC_ALL")) && *l) || ((l = getenv("LC_CTYPE")) && *l) || ((l = getenv("LANG")) && *l)) { /* check standardized locales */ if (!strcmp(l, "C") || !strcmp(l, "POSIX")) return C_CODESET; /* check for encoding name fragment */ p = strchr(l, '.'); if (!p++) p = l; if (strstart(p, "UTF")) return "UTF-8"; if ((n = 5, strstart(p, "8859-")) || (n = 9, strstart(p, "ISO-8859-"))) { if (digit(p[n])) { p += n; memcpy(buf, "ISO-8859-\0\0", 12); buf[9] = *p++; if (digit(*p)) buf[10] = *p++; return buf; } } if (strstart(p, "KOI8-R")) return "KOI8-R"; if (strstart(p, "KOI8-U")) return "KOI8-U"; if (strstart(p, "620")) return "TIS-620"; if (strstart(p, "2312")) return "GB2312"; if (strstart(p, "HKSCS")) return "Big5HKSCS"; /* no MIME charset */ if (strstart(p, "BIG5")) return "Big5"; if (strstart(p, "GBK")) return "GBK"; /* no MIME charset */ if (strstart(p, "18030")) return "GB18030"; /* no MIME charset */ if (strstart(p, "Shift_JIS") || strstart(p, "SJIS")) return "Windows-31J"; /* check for conclusive modifier */ if (strstart(p, "euro")) return "ISO-8859-15"; /* check for language (and perhaps country) codes */ if (strstart(l, "zh_TW")) return "Big5"; if (strstart(l, "zh_HK")) return "Big5HKSCS"; /* no MIME charset */ if (strstart(l, "zh")) return "GB2312"; if (strstart(l, "ja")) return JA_CODESET; if (strstart(l, "ko")) return "EUC-KR"; if (strstart(l, "ru")) return "KOI8-R"; if (strstart(l, "uk")) return "KOI8-U"; if (strstart(l, "pl") || strstart(l, "hr") || strstart(l, "hu") || strstart(l, "cs") || strstart(l, "sk") || strstart(l, "sl")) return "ISO-8859-2"; if (strstart(l, "eo") || strstart(l, "mt")) return "ISO-8859-3"; if (strstart(l, "el")) return "ISO-8859-7"; if (strstart(l, "he")) return "ISO-8859-8"; if (strstart(l, "tr")) return "ISO-8859-9"; if (strstart(l, "th")) return "TIS-620"; /* or ISO-8859-11 */ if (strstart(l, "lt")) return "ISO-8859-13"; if (strstart(l, "cy")) return "ISO-8859-14"; if (strstart(l, "ro")) return "ISO-8859-2"; /* or ISO-8859-16 */ if (strstart(l, "am") || strstart(l, "vi")) return "UTF-8"; /* Send me further rules if you like, but don't forget that we are * *only* interested in locale naming conventions on platforms * that do not already provide an nl_langinfo(CODESET) implementation. */ } return NULL; } /* 15.2.40.2.9 */ /* * call-seq: * Encoding.locale_charmap -> string * * Returns the locale charmap name. * * Debian GNU/Linux * LANG=C * Encoding.locale_charmap #=> "ANSI_X3.4-1968" * LANG=ja_JP.EUC-JP * Encoding.locale_charmap #=> "EUC-JP" * * SunOS 5 * LANG=C * Encoding.locale_charmap #=> "646" * LANG=ja * Encoding.locale_charmap #=> "eucJP" * * The result is highly platform dependent. * So Encoding.find(Encoding.locale_charmap) may cause an error. * If you need some encoding object even for unknown locale, * Encoding.find("locale") can be used. * */ mrb_value mrb_locale_charmap(mrb_state *mrb, mrb_value klass) { #if defined NO_LOCALE_CHARMAP return mrb_usascii_str_new2(mrb, "ASCII-8BIT"); #elif defined _WIN32 || defined __CYGWIN__ const char *nl_langinfo_codeset(void); const char *codeset = nl_langinfo_codeset(); char cp[sizeof(int) * 3 + 4]; if (!codeset) { //snprintf(cp, sizeof(cp), "CP%d", GetConsoleCP()); codeset = cp; } return mrb_usascii_str_new2(mrb, codeset); #elif defined HAVE_LANGINFO_H char *codeset; codeset = nl_langinfo(CODESET); return mrb_usascii_str_new2(mrb, codeset); #else return mrb_nil_value(); #endif } static void set_encoding_const(mrb_state *mrb, const char *name, mrb_encoding *enc) { mrb_value encoding = mrb_enc_from_encoding(mrb, enc); char *s = (char *)name; int haslower = 0, hasupper = 0, valid = 0; if (ISDIGIT(*s)) return; if (ISUPPER(*s)) { hasupper = 1; while (*++s && (ISALNUM(*s) || *s == '_')) { if (ISLOWER(*s)) haslower = 1; } } if (!*s) { if (s - name > ENCODING_NAMELEN_MAX) return; valid = 1; //mrb_define_const(mrb_cEncoding, name, encoding); mrb_define_const(mrb, mrb->encode_class, name, encoding); } if (!valid || haslower) { size_t len = s - name; if (len > ENCODING_NAMELEN_MAX) return; if (!haslower || !hasupper) { do { if (ISLOWER(*s)) haslower = 1; if (ISUPPER(*s)) hasupper = 1; } while (*++s && (!haslower || !hasupper)); len = s - name; } len += strlen(s); if (len++ > ENCODING_NAMELEN_MAX) return; //MEMCPY(s = ALLOCA_N(char, len), name, char, len); memcpy(s = mrb_malloc(mrb, len), name, len); name = s; if (!valid) { if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s); for (; *s; ++s) { if (!ISALNUM(*s)) *s = '_'; } if (hasupper) { mrb_define_const(mrb, mrb->encode_class, name, encoding); } } if (haslower) { for (s = (char *)name; *s; ++s) { if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s); } mrb_define_const(mrb, mrb->encode_class, name, encoding); } } } static int mrb_enc_name_list_i(mrb_state *mrb, st_data_t name, st_data_t idx, mrb_value *arg) { mrb_value ary = *arg; mrb_value str = mrb_usascii_str_new2(mrb, (char *)name); //OBJ_FREEZE(str); mrb_ary_push(mrb, ary, str); return ST_CONTINUE; } /* 15.2.40.2.10 */ /* * call-seq: * Encoding.name_list -> ["enc1", "enc2", ...] * * Returns the list of available encoding names. * * Encoding.name_list * #=> ["US-ASCII", "ASCII-8BIT", "UTF-8", * "ISO-8859-1", "Shift_JIS", "EUC-JP", * "Windows-31J", * "BINARY", "CP932", "eucJP"] * */ static mrb_value mrb_enc_name_list(mrb_state *mrb, mrb_value klass) { mrb_value ary = mrb_ary_new_capa(mrb, enc_table.names->num_entries);//mrb_ary_new2(enc_table.names->num_entries); st_foreachNew(mrb, enc_table.names, mrb_enc_name_list_i, &ary); return ary; } static int mrb_enc_aliases_enc_i(mrb_state *mrb, st_data_t name, st_data_t orig, st_data_t arg) { mrb_value *p = (mrb_value *)arg; mrb_value aliases = p[0], ary = p[1]; int idx = (int)orig; mrb_value key, str = mrb_ary_ref(mrb, ary, idx);//mrb_ary_entry(ary, idx); if (mrb_nil_p(str)) { mrb_encoding *enc = mrb_enc_from_index(mrb, idx); if (!enc) return ST_CONTINUE; if (STRCASECMP((char*)name, mrb_enc_name(enc)) == 0) { return ST_CONTINUE; } str = mrb_usascii_str_new2(mrb, mrb_enc_name(enc)); OBJ_FREEZE(str); mrb_ary_set(mrb, ary, idx, str);//rb_ary_store(ary, idx, str); } key = mrb_usascii_str_new2(mrb, (char *)name); OBJ_FREEZE(key); mrb_hash_set(mrb, aliases, key, str); return ST_CONTINUE; } /* 15.2.40.2.1 */ /* * call-seq: * Encoding.aliases -> {"alias1" => "orig1", "alias2" => "orig2", ...} * * Returns the hash of available encoding alias and original encoding name. * * Encoding.aliases * #=> {"BINARY"=>"ASCII-8BIT", "ASCII"=>"US-ASCII", "ANSI_X3.4-1986"=>"US-ASCII", * "SJIS"=>"Shift_JIS", "eucJP"=>"EUC-JP", "CP932"=>"Windows-31J"} * */ static mrb_value mrb_enc_aliases(mrb_state *mrb, mrb_value klass) { mrb_value aliases[2]; aliases[0] = mrb_hash_new_capa(mrb, 0); aliases[1] = mrb_ary_new(mrb); st_foreachNew(mrb, enc_table.names, mrb_enc_aliases_enc_i, aliases); return aliases[0]; } void mrb_init_encoding(mrb_state *mrb) { #undef mrb_intern #define mrb_intern(str) mrb_intern_const(str) mrb_value list; int i; struct RClass *s; s = mrb->encode_class = mrb_define_class(mrb, "Encoding", mrb->object_class); //mrb_undef_alloc_func(mrb_cEncoding); //mrb_undef_method(CLASS_OF(mrb_cEncoding), "new"); mrb_define_class_method(mrb, s, "aliases", mrb_enc_aliases, ARGS_NONE()); /* 15.2.40.2.1 */ mrb_define_class_method(mrb, s, "compatible?", enc_compatible_p, ARGS_REQ(2)); /* 15.2.40.2.2 */ mrb_define_class_method(mrb, s, "default_external", get_default_external, ARGS_NONE()); /* 15.2.40.2.3 */ mrb_define_class_method(mrb, s, "default_external=", set_default_external, ARGS_REQ(1)); /* 15.2.40.2.4 */ mrb_define_class_method(mrb, s, "default_internal", get_default_internal, ARGS_NONE()); /* 15.2.40.2.5 */ mrb_define_class_method(mrb, s, "default_internal=", set_default_internal, ARGS_REQ(1)); /* 15.2.40.2.6 */ mrb_define_class_method(mrb, s, "find", enc_find, ARGS_REQ(1)); /* 15.2.40.2.7 */ mrb_define_class_method(mrb, s, "list", enc_list, ARGS_NONE()); /* 15.2.40.2.8 */ mrb_define_class_method(mrb, s, "locale_charmap", mrb_locale_charmap, ARGS_NONE()); /* 15.2.40.2.9 */ mrb_define_class_method(mrb, s, "name_list", mrb_enc_name_list, ARGS_NONE()); /* 15.2.40.2.10 */ mrb_define_class_method(mrb, s, "_load", enc_load, ARGS_REQ(1)); /* 15.2.40.2.11 */ mrb_define_method(mrb, s, "ascii_compatible?", enc_ascii_compatible_p, ARGS_NONE()); /* 15.2.40.2.12 */ mrb_define_method(mrb, s, "dummy?", enc_dummy_p, ARGS_NONE()); /* 15.2.40.2.13 */ mrb_define_method(mrb, s, "inspect", enc_inspect, ARGS_NONE()); /* 15.2.40.2.14 */ mrb_define_method(mrb, s, "name", enc_name, ARGS_NONE()); /* 15.2.40.2.15 */ mrb_define_method(mrb, s, "names", enc_names, ARGS_NONE()); /* 15.2.40.2.16 */ mrb_define_method(mrb, s, "replicate", enc_replicate, ARGS_REQ(1)); /* 15.2.40.2.17 */ mrb_define_method(mrb, s, "to_s", enc_name, ARGS_NONE()); /* 15.2.40.2.18 */ mrb_define_method(mrb, s, "_dump", enc_dump, ARGS_ANY()); /* 15.2.40.2.19 */ /* add kusuda --> */ if (!enc_table.list) { mrb_enc_init(mrb); } /* add kusuda --< */ list = mrb_ary_new_capa(mrb, enc_table.count);//mrb_ary_new2(enc_table.count); RBASIC(list)->c = 0; mrb_encoding_list = list; //mrb_gc_register_mark_object(list); for (i = 0; i < enc_table.count; ++i) { mrb_ary_push(mrb, list, enc_new(mrb, enc_table.list[i].enc)); } } /* locale insensitive functions */ #define ctype_test(c, ctype) \ (mrb_isascii(c) && ONIGENC_IS_ASCII_CODE_CTYPE((c), ctype)) int mrb_isalnum(int c) { return ctype_test(c, ONIGENC_CTYPE_ALNUM); } int mrb_isalpha(int c) { return ctype_test(c, ONIGENC_CTYPE_ALPHA); } int mrb_isblank(int c) { return ctype_test(c, ONIGENC_CTYPE_BLANK); } int mrb_iscntrl(int c) { return ctype_test(c, ONIGENC_CTYPE_CNTRL); } int mrb_isdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_DIGIT); } int mrb_isgraph(int c) { return ctype_test(c, ONIGENC_CTYPE_GRAPH); } int mrb_islower(int c) { return ctype_test(c, ONIGENC_CTYPE_LOWER); } int mrb_isprint(int c) { return ctype_test(c, ONIGENC_CTYPE_PRINT); } int mrb_ispunct(int c) { return ctype_test(c, ONIGENC_CTYPE_PUNCT); } int mrb_isspace(int c) { return ctype_test(c, ONIGENC_CTYPE_SPACE); } int mrb_isupper(int c) { return ctype_test(c, ONIGENC_CTYPE_UPPER); } int mrb_isxdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_XDIGIT); } int mrb_tolower(int c) { return mrb_isascii(c) ? ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) : c; } int mrb_toupper(int c) { return mrb_isascii(c) ? ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) : c; } #endif //INCLUDE_ENCODING