diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-06-04 12:31:26 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-06-04 12:31:26 +0000 |
commit | 4fac3a042baf75b31b7cc7d06ce9052d48ed9d92 (patch) | |
tree | fe952114fed9f49b11fa58533478ef130eddeba7 /ext | |
parent | acb1179cd8edcd48e74178acf7d65e6c71181199 (diff) | |
download | ruby-4fac3a042baf75b31b7cc7d06ce9052d48ed9d92.tar.gz ruby-4fac3a042baf75b31b7cc7d06ce9052d48ed9d92.tar.xz ruby-4fac3a042baf75b31b7cc7d06ce9052d48ed9d92.zip |
* lib/json.rb, lib/json, ext/json, test/json:
import JSON library.
git-svn-id: http://svn.ruby-lang.org/repos/ruby/trunk@12428 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'ext')
-rw-r--r-- | ext/json/ext/generator/extconf.h | 3 | ||||
-rw-r--r-- | ext/json/ext/generator/extconf.rb | 9 | ||||
-rwxr-xr-x | ext/json/ext/generator/generator.c | 728 | ||||
-rw-r--r-- | ext/json/ext/generator/unicode.c | 184 | ||||
-rwxr-xr-x | ext/json/ext/generator/unicode.h | 53 | ||||
-rw-r--r-- | ext/json/ext/parser/extconf.h | 3 | ||||
-rw-r--r-- | ext/json/ext/parser/extconf.rb | 9 | ||||
-rw-r--r-- | ext/json/ext/parser/parser.c | 1601 | ||||
-rw-r--r-- | ext/json/ext/parser/parser.rl | 569 | ||||
-rw-r--r-- | ext/json/ext/parser/unicode.c | 156 | ||||
-rwxr-xr-x | ext/json/ext/parser/unicode.h | 58 | ||||
-rw-r--r-- | ext/nkf/nkf-utf8/nkf.c | 198 | ||||
-rw-r--r-- | ext/nkf/nkf-utf8/utf8tbl.c | 247 | ||||
-rw-r--r-- | ext/nkf/nkf-utf8/utf8tbl.h | 3 |
14 files changed, 3751 insertions, 70 deletions
diff --git a/ext/json/ext/generator/extconf.h b/ext/json/ext/generator/extconf.h new file mode 100644 index 000000000..cda0cc8ea --- /dev/null +++ b/ext/json/ext/generator/extconf.h @@ -0,0 +1,3 @@ +#ifndef EXTCONF_H +#define EXTCONF_H +#endif diff --git a/ext/json/ext/generator/extconf.rb b/ext/json/ext/generator/extconf.rb new file mode 100644 index 000000000..db721a92f --- /dev/null +++ b/ext/json/ext/generator/extconf.rb @@ -0,0 +1,9 @@ +require 'mkmf' +require 'rbconfig' + +if CONFIG['CC'] =~ /gcc/ + CONFIG['CC'] += ' -Wall -ggdb' + #CONFIG['CC'] += ' -Wall' +end + +create_makefile 'json/ext/generator' diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c new file mode 100755 index 000000000..60e09355f --- /dev/null +++ b/ext/json/ext/generator/generator.c @@ -0,0 +1,728 @@ +/* vim: set cin et sw=4 ts=4: */ + +#include <string.h> +#include "ruby.h" +#include "st.h" +#include "unicode.h" + +static VALUE mJSON, mExt, mGenerator, cState, mGeneratorMethods, mObject, + mHash, mArray, mInteger, mFloat, mString, mString_Extend, + mTrueClass, mFalseClass, mNilClass, eGeneratorError, + eCircularDatastructure; + +static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before, + i_object_nl, i_array_nl, i_check_circular, i_pack, i_unpack, + i_create_id, i_extend; + +typedef struct JSON_Generator_StateStruct { + VALUE indent; + VALUE space; + VALUE space_before; + VALUE object_nl; + VALUE array_nl; + int check_circular; + VALUE seen; + VALUE memo; + VALUE depth; + int flag; +} JSON_Generator_State; + +#define GET_STATE(self) \ + JSON_Generator_State *state; \ + Data_Get_Struct(self, JSON_Generator_State, state); + +/* + * Document-module: JSON::Ext::Generator + * + * This is the JSON generator implemented as a C extension. It can be + * configured to be used by setting + * + * JSON.generator = JSON::Ext::Generator + * + * with the method generator= in JSON. + * + */ + +static int hash_to_json_state_i(VALUE key, VALUE value, VALUE Vstate) +{ + VALUE json, buf, Vdepth; + GET_STATE(Vstate); + buf = state->memo; + Vdepth = state->depth; + + if (key == Qundef) return ST_CONTINUE; + if (state->flag) { + state->flag = 0; + rb_str_buf_cat2(buf, ","); + if (RSTRING_LEN(state->object_nl)) rb_str_buf_append(buf, state->object_nl); + } + if (RSTRING_LEN(state->object_nl)) { + rb_str_buf_append(buf, rb_str_times(state->indent, Vdepth)); + } + json = rb_funcall(rb_funcall(key, i_to_s, 0), i_to_json, 2, Vstate, Vdepth); + rb_str_buf_append(buf, json); + OBJ_INFECT(buf, json); + if (RSTRING_LEN(state->space_before)) { + rb_str_buf_append(buf, state->space_before); + } + rb_str_buf_cat2(buf, ":"); + if (RSTRING_LEN(state->space)) rb_str_buf_append(buf, state->space); + json = rb_funcall(value, i_to_json, 2, Vstate, Vdepth); + state->flag = 1; + rb_str_buf_append(buf, json); + OBJ_INFECT(buf, json); + state->depth = Vdepth; + state->memo = buf; + return ST_CONTINUE; +} + +inline static VALUE mHash_json_transfrom(VALUE self, VALUE Vstate, VALUE Vdepth) { + long depth, len = RHASH(self)->tbl->num_entries; + VALUE result; + GET_STATE(Vstate); + + depth = 1 + FIX2LONG(Vdepth); + result = rb_str_buf_new(len); + state->memo = result; + state->depth = LONG2FIX(depth); + state->flag = 0; + rb_str_buf_cat2(result, "{"); + if (RSTRING_LEN(state->object_nl)) rb_str_buf_append(result, state->object_nl); + rb_hash_foreach(self, hash_to_json_state_i, Vstate); + if (RSTRING_LEN(state->object_nl)) rb_str_buf_append(result, state->object_nl); + if (RSTRING_LEN(state->object_nl)) { + rb_str_buf_append(result, rb_str_times(state->indent, Vdepth)); + } + rb_str_buf_cat2(result, "}"); + return result; +} + +static int hash_to_json_i(VALUE key, VALUE value, VALUE buf) +{ + VALUE tmp; + + if (key == Qundef) return ST_CONTINUE; + if (RSTRING_LEN(buf) > 1) rb_str_buf_cat2(buf, ","); + tmp = rb_funcall(rb_funcall(key, i_to_s, 0), i_to_json, 0); + rb_str_buf_append(buf, tmp); + OBJ_INFECT(buf, tmp); + rb_str_buf_cat2(buf, ":"); + tmp = rb_funcall(value, i_to_json, 0); + rb_str_buf_append(buf, tmp); + OBJ_INFECT(buf, tmp); + + return ST_CONTINUE; +} + +/* + * call-seq: to_json(state = nil, depth = 0) + * + * Returns a JSON string containing a JSON object, that is unparsed from + * this Hash instance. + * _state_ is a JSON::State object, that can also be used to configure the + * produced JSON string output further. + * _depth_ is used to find out nesting depth, to indent accordingly. + */ +static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self) +{ + VALUE Vstate, Vdepth, result; + long depth; + + rb_scan_args(argc, argv, "02", &Vstate, &Vdepth); + depth = NIL_P(Vdepth) ? 0 : FIX2LONG(Vdepth); + if (NIL_P(Vstate)) { + long len = RHASH(self)->tbl->num_entries; + result = rb_str_buf_new(len); + rb_str_buf_cat2(result, "{"); + rb_hash_foreach(self, hash_to_json_i, result); + rb_str_buf_cat2(result, "}"); + } else { + GET_STATE(Vstate); + if (state->check_circular) { + VALUE self_id = rb_obj_id(self); + if (RTEST(rb_hash_aref(state->seen, self_id))) { + rb_raise(eCircularDatastructure, + "circular data structures not supported!"); + } + rb_hash_aset(state->seen, self_id, Qtrue); + result = mHash_json_transfrom(self, Vstate, LONG2FIX(depth)); + rb_hash_delete(state->seen, self_id); + } else { + result = mHash_json_transfrom(self, Vstate, LONG2FIX(depth)); + } + } + OBJ_INFECT(result, self); + return result; +} + +inline static VALUE mArray_json_transfrom(VALUE self, VALUE Vstate, VALUE Vdepth) { + long i, len = RARRAY_LEN(self); + VALUE shift, result; + long depth = NIL_P(Vdepth) ? 0 : FIX2LONG(Vdepth); + VALUE delim = rb_str_new2(","); + GET_STATE(Vstate); + + if (state->check_circular) { + VALUE self_id = rb_obj_id(self); + rb_hash_aset(state->seen, self_id, Qtrue); + result = rb_str_buf_new(len); + if (RSTRING_LEN(state->array_nl)) rb_str_append(delim, state->array_nl); + shift = rb_str_times(state->indent, LONG2FIX(depth + 1)); + + rb_str_buf_cat2(result, "["); + rb_str_buf_append(result, state->array_nl); + for (i = 0; i < len; i++) { + VALUE element = RARRAY_PTR(self)[i]; + if (RTEST(rb_hash_aref(state->seen, rb_obj_id(element)))) { + rb_raise(eCircularDatastructure, + "circular data structures not supported!"); + } + OBJ_INFECT(result, element); + if (i > 0) rb_str_buf_append(result, delim); + rb_str_buf_append(result, shift); + rb_str_buf_append(result, rb_funcall(element, i_to_json, 2, Vstate, LONG2FIX(depth + 1))); + } + if (RSTRING_LEN(state->array_nl)) { + rb_str_buf_append(result, state->array_nl); + rb_str_buf_append(result, rb_str_times(state->indent, LONG2FIX(depth))); + } + rb_str_buf_cat2(result, "]"); + rb_hash_delete(state->seen, self_id); + } else { + result = rb_str_buf_new(len); + if (RSTRING_LEN(state->array_nl)) rb_str_append(delim, state->array_nl); + shift = rb_str_times(state->indent, LONG2FIX(depth + 1)); + + rb_str_buf_cat2(result, "["); + rb_str_buf_append(result, state->array_nl); + for (i = 0; i < len; i++) { + VALUE element = RARRAY_PTR(self)[i]; + OBJ_INFECT(result, element); + if (i > 0) rb_str_buf_append(result, delim); + rb_str_buf_append(result, shift); + rb_str_buf_append(result, rb_funcall(element, i_to_json, 2, Vstate, LONG2FIX(depth + 1))); + } + rb_str_buf_append(result, state->array_nl); + if (RSTRING_LEN(state->array_nl)) { + rb_str_buf_append(result, rb_str_times(state->indent, LONG2FIX(depth))); + } + rb_str_buf_cat2(result, "]"); + } + return result; +} + +/* + * call-seq: to_json(state = nil, depth = 0) + * + * Returns a JSON string containing a JSON array, that is unparsed from + * this Array instance. + * _state_ is a JSON::State object, that can also be used to configure the + * produced JSON string output further. + * _depth_ is used to find out nesting depth, to indent accordingly. + */ +static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) { + VALUE Vstate, Vdepth, result; + + rb_scan_args(argc, argv, "02", &Vstate, &Vdepth); + if (NIL_P(Vstate)) { + long i, len = RARRAY_LEN(self); + result = rb_str_buf_new(2 + 2 * len); + rb_str_buf_cat2(result, "["); + for (i = 0; i < len; i++) { + VALUE element = RARRAY_PTR(self)[i]; + OBJ_INFECT(result, element); + if (i > 0) rb_str_buf_cat2(result, ","); + rb_str_buf_append(result, rb_funcall(element, i_to_json, 0)); + } + rb_str_buf_cat2(result, "]"); + } else { + result = mArray_json_transfrom(self, Vstate, Vdepth); + } + OBJ_INFECT(result, self); + return result; +} + +/* + * call-seq: to_json(*) + * + * Returns a JSON string representation for this Integer number. + */ +static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self) +{ + return rb_funcall(self, i_to_s, 0); +} + +/* + * call-seq: to_json(*) + * + * Returns a JSON string representation for this Float number. + */ +static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self) +{ + return rb_funcall(self, i_to_s, 0); +} + +/* + * call-seq: String.included(modul) + * + * Extends _modul_ with the String::Extend module. + */ +static VALUE mString_included_s(VALUE self, VALUE modul) { + return rb_funcall(modul, i_extend, 1, mString_Extend); +} + +/* + * call-seq: to_json(*) + * + * This string should be encoded with UTF-8 A call to this method + * returns a JSON string encoded with UTF16 big endian characters as + * \u????. + */ +static VALUE mString_to_json(int argc, VALUE *argv, VALUE self) +{ + VALUE result = rb_str_buf_new(RSTRING_LEN(self)); + rb_str_buf_cat2(result, "\""); + JSON_convert_UTF8_to_JSON(result, self, strictConversion); + rb_str_buf_cat2(result, "\""); + return result; +} + +/* + * call-seq: to_json_raw_object() + * + * This method creates a raw object hash, that can be nested into + * other data structures and will be unparsed as a raw string. This + * method should be used, if you want to convert raw strings to JSON + * instead of UTF-8 strings, e. g. binary data. + */ +static VALUE mString_to_json_raw_object(VALUE self) { + VALUE ary; + VALUE result = rb_hash_new(); + rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self))); + ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*")); + rb_hash_aset(result, rb_str_new2("raw"), ary); + return result; +} + +/* + * call-seq: to_json_raw(*args) + * + * This method creates a JSON text from the result of a call to + * to_json_raw_object of this String. + */ +static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self) { + VALUE obj = mString_to_json_raw_object(self); + Check_Type(obj, T_HASH); + return mHash_to_json(argc, argv, obj); +} + +/* + * call-seq: json_create(o) + * + * Raw Strings are JSON Objects (the raw bytes are stored in an array for the + * key "raw"). The Ruby String can be created by this module method. + */ +static VALUE mString_Extend_json_create(VALUE self, VALUE o) { + VALUE ary; + Check_Type(o, T_HASH); + ary = rb_hash_aref(o, rb_str_new2("raw")); + return rb_funcall(ary, i_pack, 1, rb_str_new2("C*")); +} + +/* + * call-seq: to_json(state = nil, depth = 0) + * + * Returns a JSON string for true: 'true'. + */ +static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self) +{ + return rb_str_new2("true"); +} + +/* + * call-seq: to_json(state = nil, depth = 0) + * + * Returns a JSON string for false: 'false'. + */ +static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self) +{ + return rb_str_new2("false"); +} + +/* + * call-seq: to_json(state = nil, depth = 0) + * + */ +static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self) +{ + return rb_str_new2("null"); +} + +/* + * call-seq: to_json(*) + * + * Converts this object to a string (calling #to_s), converts + * it to a JSON string, and returns the result. This is a fallback, if no + * special method #to_json was defined for some object. + */ +static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self) +{ + VALUE string = rb_funcall(self, i_to_s, 0); + Check_Type(string, T_STRING); + return mString_to_json(argc, argv, string); +} + +/* + * Document-class: JSON::Ext::Generator::State + * + * This class is used to create State instances, that are use to hold data + * while generating a JSON text from a a Ruby data structure. + */ + +static void State_mark(JSON_Generator_State *state) +{ + rb_gc_mark_maybe(state->indent); + rb_gc_mark_maybe(state->space); + rb_gc_mark_maybe(state->space_before); + rb_gc_mark_maybe(state->object_nl); + rb_gc_mark_maybe(state->array_nl); + rb_gc_mark_maybe(state->seen); + rb_gc_mark_maybe(state->memo); + rb_gc_mark_maybe(state->depth); +} + +static JSON_Generator_State *State_allocate() +{ + JSON_Generator_State *state = ALLOC(JSON_Generator_State); + return state; +} + +static VALUE cState_s_allocate(VALUE klass) +{ + JSON_Generator_State *state = State_allocate(); + return Data_Wrap_Struct(klass, State_mark, -1, state); +} + +/* + * call-seq: new(opts = {}) + * + * Instantiates a new State object, configured by _opts_. + * + * _opts_ can have the following keys: + * + * * *indent*: a string used to indent levels (default: ''), + * * *space*: a string that is put after, a : or , delimiter (default: ''), + * * *space_before*: a string that is put before a : pair delimiter (default: ''), + * * *object_nl*: a string that is put at the end of a JSON object (default: ''), + * * *array_nl*: a string that is put at the end of a JSON array (default: ''), + * * *check_circular*: true if checking for circular data structures + * should be done, false (the default) otherwise. + */ +static VALUE cState_initialize(int argc, VALUE *argv, VALUE self) +{ + VALUE opts; + GET_STATE(self); + + rb_scan_args(argc, argv, "01", &opts); + if (NIL_P(opts)) { + state->indent = rb_str_new2(""); + state->space = rb_str_new2(""); + state->space_before = rb_str_new2(""); + state->array_nl = rb_str_new2(""); + state->object_nl = rb_str_new2(""); + state->check_circular = 0; + } else { + VALUE tmp; + opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash"); + tmp = rb_hash_aref(opts, ID2SYM(i_indent)); + if (RTEST(tmp)) { + Check_Type(tmp, T_STRING); + state->indent = tmp; + } else { + state->indent = rb_str_new2(""); + } + tmp = rb_hash_aref(opts, ID2SYM(i_space)); + if (RTEST(tmp)) { + Check_Type(tmp, T_STRING); + state->space = tmp; + } else { + state->space = rb_str_new2(""); + } + tmp = rb_hash_aref(opts, ID2SYM(i_space_before)); + if (RTEST(tmp)) { + Check_Type(tmp, T_STRING); + state->space_before = tmp; + } else { + state->space_before = rb_str_new2(""); + } + tmp = rb_hash_aref(opts, ID2SYM(i_array_nl)); + if (RTEST(tmp)) { + Check_Type(tmp, T_STRING); + state->array_nl = tmp; + } else { + state->array_nl = rb_str_new2(""); + } + tmp = rb_hash_aref(opts, ID2SYM(i_object_nl)); + if (RTEST(tmp)) { + Check_Type(tmp, T_STRING); + state->object_nl = tmp; + } else { + state->object_nl = rb_str_new2(""); + } + tmp = rb_hash_aref(opts, ID2SYM(i_check_circular)); + state->check_circular = RTEST(tmp); + } + state->seen = rb_hash_new(); + state->memo = Qnil; + state->depth = INT2FIX(0); + return self; +} + +/* + * call-seq: from_state(opts) + * + * Creates a State object from _opts_, which ought to be Hash to create a + * new State instance configured by _opts_, something else to create an + * unconfigured instance. If _opts_ is a State object, it is just returned. + */ +static VALUE cState_from_state_s(VALUE self, VALUE opts) +{ + if (rb_obj_is_kind_of(opts, self)) { + return opts; + } else if (rb_obj_is_kind_of(opts, rb_cHash)) { + return rb_funcall(self, i_new, 1, opts); + } else { + return rb_funcall(self, i_new, 0); + } +} + +/* + * call-seq: indent() + * + * This string is used to indent levels in the JSON text. + */ +static VALUE cState_indent(VALUE self) +{ + GET_STATE(self); + return state->indent; +} + +/* + * call-seq: indent=(indent) + * + * This string is used to indent levels in the JSON text. + */ +static VALUE cState_indent_set(VALUE self, VALUE indent) +{ + GET_STATE(self); + Check_Type(indent, T_STRING); + return state->indent = indent; +} + +/* + * call-seq: space() + * + * This string is used to insert a space between the tokens in a JSON + * string. + */ +static VALUE cState_space(VALUE self) +{ + GET_STATE(self); + return state->space; +} + +/* + * call-seq: space=(space) + * + * This string is used to insert a space between the tokens in a JSON + * string. + */ +static VALUE cState_space_set(VALUE self, VALUE space) +{ + GET_STATE(self); + Check_Type(space, T_STRING); + return state->space = space; +} + +/* + * call-seq: space_before() + * + * This string is used to insert a space before the ':' in JSON objects. + */ +static VALUE cState_space_before(VALUE self) +{ + GET_STATE(self); + return state->space_before; +} + +/* + * call-seq: space_before=(space_before) + * + * This string is used to insert a space before the ':' in JSON objects. + */ +static VALUE cState_space_before_set(VALUE self, VALUE space_before) +{ + GET_STATE(self); + Check_Type(space_before, T_STRING); + return state->space_before = space_before; +} + +/* + * call-seq: object_nl() + * + * This string is put at the end of a line that holds a JSON object (or + * Hash). + */ +static VALUE cState_object_nl(VALUE self) +{ + GET_STATE(self); + return state->object_nl; +} + +/* + * call-seq: object_nl=(object_nl) + * + * This string is put at the end of a line that holds a JSON object (or + * Hash). + */ +static VALUE cState_object_nl_set(VALUE self, VALUE object_nl) +{ + GET_STATE(self); + Check_Type(object_nl, T_STRING); + return state->object_nl = object_nl; +} + +/* + * call-seq: array_nl() + * + * This string is put at the end of a line that holds a JSON array. + */ +static VALUE cState_array_nl(VALUE self) +{ + GET_STATE(self); + return state->array_nl; +} + +/* + * call-seq: array_nl=(array_nl) + * + * This string is put at the end of a line that holds a JSON array. + */ +static VALUE cState_array_nl_set(VALUE self, VALUE array_nl) +{ + GET_STATE(self); + Check_Type(array_nl, T_STRING); + return state->array_nl = array_nl; +} + +/* + * call-seq: check_circular?(object) + * + * Returns true, if circular data structures should be checked, + * otherwise returns false. + */ +static VALUE cState_check_circular_p(VALUE self) +{ + GET_STATE(self); + return state->check_circular ? Qtrue : Qfalse; +} + +/* + * call-seq: seen?(object) + * + * Returns _true_, if _object_ was already seen during this generating run. + */ +static VALUE cState_seen_p(VALUE self, VALUE object) +{ + GET_STATE(self); + return rb_hash_aref(state->seen, rb_obj_id(object)); +} + +/* + * call-seq: remember(object) + * + * Remember _object_, to find out if it was already encountered (if a cyclic + * data structure is rendered). + */ +static VALUE cState_remember(VALUE self, VALUE object) +{ + GET_STATE(self); + return rb_hash_aset(state->seen, rb_obj_id(object), Qtrue); +} + +/* + * call-seq: forget(object) + * + * Forget _object_ for this generating run. + */ +static VALUE cState_forget(VALUE self, VALUE object) +{ + GET_STATE(self); + return rb_hash_delete(state->seen, rb_obj_id(object)); +} + +void Init_generator() +{ + mJSON = rb_define_module("JSON"); + mExt = rb_define_module_under(mJSON, "Ext"); + mGenerator = rb_define_module_under(mExt, "Generator"); + eGeneratorError = rb_path2class("JSON::GeneratorError"); + eCircularDatastructure = rb_path2class("JSON::CircularDatastructure"); + cState = rb_define_class_under(mGenerator, "State", rb_cObject); + rb_define_alloc_func(cState, cState_s_allocate); + rb_define_singleton_method(cState, "from_state", cState_from_state_s, 1); + rb_define_method(cState, "initialize", cState_initialize, -1); + + rb_define_method(cState, "indent", cState_indent, 0); + rb_define_method(cState, "indent=", cState_indent_set, 1); + rb_define_method(cState, "space", cState_space, 0); + rb_define_method(cState, "space=", cState_space_set, 1); + rb_define_method(cState, "space_before", cState_space_before, 0); + rb_define_method(cState, "space_before=", cState_space_before_set, 1); + rb_define_method(cState, "object_nl", cState_object_nl, 0); + rb_define_method(cState, "object_nl=", cState_object_nl_set, 1); + rb_define_method(cState, "array_nl", cState_array_nl, 0); + rb_define_method(cState, "array_nl=", cState_array_nl_set, 1); + rb_define_method(cState, "check_circular?", cState_check_circular_p, 0); + rb_define_method(cState, "seen?", cState_seen_p, 1); + rb_define_method(cState, "remember", cState_remember, 1); + rb_define_method(cState, "forget", cState_forget, 1); + mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods"); + mObject = rb_define_module_under(mGeneratorMethods, "Object"); + rb_define_method(mObject, "to_json", mObject_to_json, -1); + mHash = rb_define_module_under(mGeneratorMethods, "Hash"); + rb_define_method(mHash, "to_json", mHash_to_json, -1); + mArray = rb_define_module_under(mGeneratorMethods, "Array"); + rb_define_method(mArray, "to_json", mArray_to_json, -1); + mInteger = rb_define_module_under(mGeneratorMethods, "Integer"); + rb_define_method(mInteger, "to_json", mInteger_to_json, -1); + mFloat = rb_define_module_under(mGeneratorMethods, "Float"); + rb_define_method(mFloat, "to_json", mFloat_to_json, -1); + mString = rb_define_module_under(mGeneratorMethods, "String"); + rb_define_singleton_method(mString, "included", mString_included_s, 1); + rb_define_method(mString, "to_json", mString_to_json, -1); + rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1); + rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0); + mString_Extend = rb_define_module_under(mString, "Extend"); + rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1); + mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass"); + rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1); + mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass"); + rb_define_method(mFalseClass, "to_json", mFalseClass_to_json, -1); + mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass"); + rb_define_method(mNilClass, "to_json", mNilClass_to_json, -1); + + i_to_s = rb_intern("to_s"); + i_to_json = rb_intern("to_json"); + i_new = rb_intern("new"); + i_indent = rb_intern("indent"); + i_space = rb_intern("space"); + i_space_before = rb_intern("space_before"); + i_object_nl = rb_intern("object_nl"); + i_array_nl = rb_intern("array_nl"); + i_check_circular = rb_intern("check_circular"); + i_pack = rb_intern("pack"); + i_unpack = rb_intern("unpack"); + i_create_id = rb_intern("create_id"); + i_extend = rb_intern("extend"); +} diff --git a/ext/json/ext/generator/unicode.c b/ext/json/ext/generator/unicode.c new file mode 100644 index 000000000..44e1f41f9 --- /dev/null +++ b/ext/json/ext/generator/unicode.c @@ -0,0 +1,184 @@ +/* vim: set cin et sw=4 ts=4: */ + +#include "unicode.h" + +#define unicode_escape(buffer, character) \ + snprintf(buf, 7, "\\u%04x", (unsigned int) (character)); \ + rb_str_buf_cat(buffer, buf, 6); + +/* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + +/* + * Index into the table below with the first byte of a UTF-8 sequence to + * get the number of trailing bytes that are supposed to follow it. + * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is + * left as-is for anyone who may want to do such conversion, which was + * allowed in earlier algorithms. + */ +static const char trailingBytesForUTF8[256] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 +}; + +/* + * Magic values subtracted from a buffer value during UTF8 conversion. + * This table contains as many values as there might be trailing bytes + * in a UTF-8 sequence. + */ +static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, + 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; + +/* + * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed + * into the first byte, depending on how many bytes follow. There are + * as many entries in this table as there are UTF-8 sequence types. + * (I.e., one byte sequence, two byte... etc.). Remember that sequencs + * for *legal* UTF-8 will be 4 or fewer bytes total. + */ +static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + +/* + * Utility routine to tell whether a sequence of bytes is legal UTF-8. + * This must be called with the length pre-determined by the first byte. + * If not calling this from ConvertUTF8to*, then the length can be set by: + * length = trailingBytesForUTF8[*source]+1; + * and the sequence is illegal right away if there aren't that many bytes + * available. + * If presented with a length > 4, this returns 0. The Unicode + * definition of UTF-8 goes up to 4-byte sequences. + */ + +inline static unsigned char isLegalUTF8(const UTF8 *source, int length) +{ + UTF8 a; + const UTF8 *srcptr = source+length; + switch (length) { + default: return 0; + /* Everything else falls through when "1"... */ + case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; + case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; + case 2: if ((a = (*--srcptr)) > 0xBF) return 0; + + switch (*source) { + /* no fall-through in this inner switch */ + case 0xE0: if (a < 0xA0) return 0; break; + case 0xED: if (a > 0x9F) return 0; break; + case 0xF0: if (a < 0x90) return 0; break; + case 0xF4: if (a > 0x8F) return 0; break; + default: if (a < 0x80) return 0; + } + + case 1: if (*source >= 0x80 && *source < 0xC2) return 0; + } + if (*source > 0xF4) return 0; + return 1; +} + +void JSON_convert_UTF8_to_JSON(VALUE buffer, VALUE string, ConversionFlags flags) +{ + char buf[7]; + const UTF8* source = (UTF8 *) RSTRING_PTR(string); + const UTF8* sourceEnd = source + RSTRING_LEN(string); + + while (source < sourceEnd) { + UTF32 ch = 0; + unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; + if (source + extraBytesToRead >= sourceEnd) { + rb_raise(rb_path2class("JSON::GeneratorError"), + "partial character in source, but hit end"); + } + if (!isLegalUTF8(source, extraBytesToRead+1)) { + rb_raise(rb_path2class("JSON::GeneratorError"), + "source sequence is illegal/malformed"); + } + /* + * The cases all fall through. See "Note A" below. + */ + switch (extraBytesToRead) { + case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ + case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ + case 3: ch += *source++; ch <<= 6; + case 2: ch += *source++; ch <<= 6; + case 1: ch += *source++; ch <<= 6; + case 0: ch += *source++; + } + ch -= offsetsFromUTF8[extraBytesToRead]; + + if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + source -= (extraBytesToRead+1); /* return to the illegal value itself */ + rb_raise(rb_path2class("JSON::GeneratorError"), + "source sequence is illegal/malformed"); + } else { + unicode_escape(buffer, UNI_REPLACEMENT_CHAR); + } + } else { + /* normal case */ + if (ch == '"') { + rb_str_buf_cat2(buffer, "\\\""); + } else if (ch == '\\') { + rb_str_buf_cat2(buffer, "\\\\"); + } else if (ch == '/') { + rb_str_buf_cat2(buffer, "\\/"); + } else if (ch >= 0x20 && ch <= 0x7f) { + rb_str_buf_cat(buffer, (char *) source - 1, 1); + } else if (ch == '\n') { + rb_str_buf_cat2(buffer, "\\n"); + } else if (ch == '\r') { + rb_str_buf_cat2(buffer, "\\r"); + } else if (ch == '\t') { + rb_str_buf_cat2(buffer, "\\t"); + } else if (ch == '\f') { + rb_str_buf_cat2(buffer, "\\f"); + } else if (ch == '\b') { + rb_str_buf_cat2(buffer, "\\b"); + } else if (ch < 0x20) { + unicode_escape(buffer, (UTF16) ch); + } else { + unicode_escape(buffer, (UTF16) ch); + } + } + } else if (ch > UNI_MAX_UTF16) { + if (flags == strictConversion) { + source -= (extraBytesToRead+1); /* return to the start */ + rb_raise(rb_path2class("JSON::GeneratorError"), + "source sequence is illegal/malformed"); + } else { + unicode_escape(buffer, UNI_REPLACEMENT_CHAR); + } + } else { + /* target is a character in range 0xFFFF - 0x10FFFF. */ + ch -= halfBase; + unicode_escape(buffer, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START)); + unicode_escape(buffer, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START)); + } + } +} diff --git a/ext/json/ext/generator/unicode.h b/ext/json/ext/generator/unicode.h new file mode 100755 index 000000000..841474bce --- /dev/null +++ b/ext/json/ext/generator/unicode.h @@ -0,0 +1,53 @@ +#include "ruby.h" + +#ifndef _GENERATOR_UNICODE_H_ +#define _GENERATOR_UNICODE_H_ + +typedef enum { + conversionOK = 0, /* conversion successful */ + sourceExhausted, /* partial character in source, but hit end */ + targetExhausted, /* insuff. room in target for conversion */ + sourceIllegal /* source sequence is illegal/malformed */ +} ConversionResult; + +typedef enum { + strictConversion = 0, + lenientConversion +} ConversionFlags; + +typedef unsigned long UTF32; /* at least 32 bits */ +typedef unsigned short UTF16; /* at least 16 bits */ +typedef unsigned char UTF8; /* typically 8 bits */ + +#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD +#define UNI_MAX_BMP (UTF32)0x0000FFFF +#define UNI_MAX_UTF16 (UTF32)0x0010FFFF +#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF +#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF + +#define UNI_SUR_HIGH_START (UTF32)0xD800 +#define UNI_SUR_HIGH_END (UTF32)0xDBFF +#define UNI_SUR_LOW_START (UTF32)0xDC00 +#define UNI_SUR_LOW_END (UTF32)0xDFFF + +static const int halfShift = 10; /* used for shifting by 10 bits */ + +static const UTF32 halfBase = 0x0010000UL; +static const UTF32 halfMask = 0x3FFUL; + +void JSON_convert_UTF8_to_JSON(VALUE buffer, VALUE string, ConversionFlags flags); + +#ifndef RARRAY_PTR +#define RARRAY_PTR(ARRAY) RARRAY(ARRAY)->ptr +#endif +#ifndef RARRAY_LEN +#define RARRAY_LEN(ARRAY) RARRAY(ARRAY)->len +#endif +#ifndef RSTRING_PTR +#define RSTRING_PTR(string) RSTRING(string)->ptr +#endif +#ifndef RSTRING_LEN +#define RSTRING_LEN(string) RSTRING(string)->len +#endif + +#endif diff --git a/ext/json/ext/parser/extconf.h b/ext/json/ext/parser/extconf.h new file mode 100644 index 000000000..cda0cc8ea --- /dev/null +++ b/ext/json/ext/parser/extconf.h @@ -0,0 +1,3 @@ +#ifndef EXTCONF_H +#define EXTCONF_H +#endif diff --git a/ext/json/ext/parser/extconf.rb b/ext/json/ext/parser/extconf.rb new file mode 100644 index 000000000..085c8d060 --- /dev/null +++ b/ext/json/ext/parser/extconf.rb @@ -0,0 +1,9 @@ +require 'mkmf' +require 'rbconfig' + +if CONFIG['CC'] =~ /gcc/ + #CONFIG['CC'] += ' -Wall -ggdb' + CONFIG['CC'] += ' -Wall' +end + +create_makefile 'json/ext/parser' diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c new file mode 100644 index 000000000..7448e5fb7 --- /dev/null +++ b/ext/json/ext/parser/parser.c @@ -0,0 +1,1601 @@ +#line 1 "parser.rl" +/* vim: set cin et sw=4 ts=4: */ + +#include "ruby.h" +#include "re.h" +#include "st.h" +#include "unicode.h" + +#define EVIL 0x666 + +static VALUE mJSON, mExt, cParser, eParserError, eNestingError; + +static ID i_json_creatable_p, i_json_create, i_create_id, i_chr, i_max_nesting; + +typedef struct JSON_ParserStruct { + VALUE Vsource; + char *source; + long len; + char *memo; + VALUE create_id; + int max_nesting; + int current_nesting; +} JSON_Parser; + +static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); + +#define GET_STRUCT \ + JSON_Parser *json; \ + Data_Get_Struct(self, JSON_Parser, json); + +#line 58 "parser.rl" + + + +#line 41 "parser.c" +static const int JSON_object_start = 1; +static const int JSON_object_first_final = 27; +static const int JSON_object_error = 0; + +static const int JSON_object_en_main = 1; + +#line 91 "parser.rl" + + +static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + VALUE last_name = Qnil; + + if (json->max_nesting && json->current_nesting > json->max_nesting) { + rb_raise(eNestingError, "nesting of %d is to deep", json->current_nesting); + } + + *result = rb_hash_new(); + + +#line 63 "parser.c" + { + cs = JSON_object_start; + } +#line 105 "parser.rl" + +#line 69 "parser.c" + { + if ( p == pe ) + goto _out; + switch ( cs ) + { +case 1: + if ( (*p) == 123 ) + goto st2; + goto st0; +st0: + goto _out0; +st2: + if ( ++p == pe ) + goto _out2; +case 2: + switch( (*p) ) { + case 13: goto st2; + case 32: goto st2; + case 34: goto tr2; + case 47: goto st23; + case 125: goto tr4; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st2; + goto st0; +tr2: +#line 77 "parser.rl" + { + char *np = JSON_parse_string(json, p, pe, &last_name); + if (np == NULL) goto _out3; else {p = (( np))-1;} + } + goto st3; +st3: + if ( ++p == pe ) + goto _out3; +case 3: +#line 106 "parser.c" + switch( (*p) ) { + case 13: goto st3; + case 32: goto st3; + case 47: goto st4; + case 58: goto st8; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st3; + goto st0; +st4: + if ( ++p == pe ) + goto _out4; +case 4: + switch( (*p) ) { + case 42: goto st5; + case 47: goto st7; + } + goto st0; +st5: + if ( ++p == pe ) + goto _out5; +case 5: + if ( (*p) == 42 ) + goto st6; + goto st5; +st6: + if ( ++p == pe ) + goto _out6; +case 6: + switch( (*p) ) { + case 42: goto st6; + case 47: goto st3; + } + goto st5; +st7: + if ( ++p == pe ) + goto _out7; +case 7: + if ( (*p) == 10 ) + goto st3; + goto st7; +st8: + if ( ++p == pe ) + goto _out8; +case 8: + switch( (*p) ) { + case 13: goto st8; + case 32: goto st8; + case 34: goto tr11; + case 45: goto tr11; + case 47: goto st19; + case 91: goto tr11; + case 102: goto tr11; + case 110: goto tr11; + case 116: goto tr11; + case 123: goto tr11; + } + if ( (*p) > 10 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr11; + } else if ( (*p) >= 9 ) + goto st8; + goto st0; +tr11: +#line 66 "parser.rl" + { + VALUE v = Qnil; + char *np = JSON_parse_value(json, p, pe, &v); + if (np == NULL) { + goto _out9; + } else { + rb_hash_aset(*result, last_name, v); + {p = (( np))-1;} + } + } + goto st9; +st9: + if ( ++p == pe ) + goto _out9; +case 9: +#line 187 "parser.c" + switch( (*p) ) { + case 13: goto st9; + case 32: goto st9; + case 44: goto st10; + case 47: goto st15; + case 125: goto tr4; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st9; + goto st0; +st10: + if ( ++p == pe ) + goto _out10; +case 10: + switch( (*p) ) { + case 13: goto st10; + case 32: goto st10; + case 34: goto tr2; + case 47: goto st11; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st10; + goto st0; +st11: + if ( ++p == pe ) + goto _out11; +case 11: + switch( (*p) ) { + case 42: goto st12; + case 47: goto st14; + } + goto st0; +st12: + if ( ++p == pe ) + goto _out12; +case 12: + if ( (*p) == 42 ) + goto st13; + goto st12; +st13: + if ( ++p == pe ) + goto _out13; +case 13: + switch( (*p) ) { + case 42: goto st13; + case 47: goto st10; + } + goto st12; +st14: + if ( ++p == pe ) + goto _out14; +case 14: + if ( (*p) == 10 ) + goto st10; + goto st14; +st15: + if ( ++p == pe ) + goto _out15; +case 15: + switch( (*p) ) { + case 42: goto st16; + case 47: goto st18; + } + goto st0; +st16: + if ( ++p == pe ) + goto _out16; +case 16: + if ( (*p) == 42 ) + goto st17; + goto st16; +st17: + if ( ++p == pe ) + goto _out17; +case 17: + switch( (*p) ) { + case 42: goto st17; + case 47: goto st9; + } + goto st16; +st18: + if ( ++p == pe ) + goto _out18; +case 18: + if ( (*p) == 10 ) + goto st9; + goto st18; +tr4: +#line 82 "parser.rl" + { goto _out27; } + goto st27; +st27: + if ( ++p == pe ) + goto _out27; +case 27: +#line 283 "parser.c" + goto st0; +st19: + if ( ++p == pe ) + goto _out19; +case 19: + switch( (*p) ) { + case 42: goto st20; + case 47: goto st22; + } + goto st0; +st20: + if ( ++p == pe ) + goto _out20; +case 20: + if ( (*p) == 42 ) + goto st21; + goto st20; +st21: + if ( ++p == pe ) + goto _out21; +case 21: + switch( (*p) ) { + case 42: goto st21; + case 47: goto st8; + } + goto st20; +st22: + if ( ++p == pe ) + goto _out22; +case 22: + if ( (*p) == 10 ) + goto st8; + goto st22; +st23: + if ( ++p == pe ) + goto _out23; +case 23: + switch( (*p) ) { + case 42: goto st24; + case 47: goto st26; + } + goto st0; +st24: + if ( ++p == pe ) + goto _out24; +case 24: + if ( (*p) == 42 ) + goto st25; + goto st24; +st25: + if ( ++p == pe ) + goto _out25; +case 25: + switch( (*p) ) { + case 42: goto st25; + case 47: goto st2; + } + goto st24; +st26: + if ( ++p == pe ) + goto _out26; +case 26: + if ( (*p) == 10 ) + goto st2; + goto st26; + } + _out0: cs = 0; goto _out; + _out2: cs = 2; goto _out; + _out3: cs = 3; goto _out; + _out4: cs = 4; goto _out; + _out5: cs = 5; goto _out; + _out6: cs = 6; goto _out; + _out7: cs = 7; goto _out; + _out8: cs = 8; goto _out; + _out9: cs = 9; goto _out; + _out10: cs = 10; goto _out; + _out11: cs = 11; goto _out; + _out12: cs = 12; goto _out; + _out13: cs = 13; goto _out; + _out14: cs = 14; goto _out; + _out15: cs = 15; goto _out; + _out16: cs = 16; goto _out; + _out17: cs = 17; goto _out; + _out18: cs = 18; goto _out; + _out27: cs = 27; goto _out; + _out19: cs = 19; goto _out; + _out20: cs = 20; goto _out; + _out21: cs = 21; goto _out; + _out22: cs = 22; goto _out; + _out23: cs = 23; goto _out; + _out24: cs = 24; goto _out; + _out25: cs = 25; goto _out; + _out26: cs = 26; goto _out; + + _out: {} + } +#line 106 "parser.rl" + + if (cs >= JSON_object_first_final) { + VALUE klassname = rb_hash_aref(*result, json->create_id); + if (!NIL_P(klassname)) { + VALUE klass = rb_path2class(StringValueCStr(klassname)); + if RTEST(rb_funcall(klass, i_json_creatable_p, 0)) { + *result = rb_funcall(klass, i_json_create, 1, *result); + } + } + return p + 1; + } else { + return NULL; + } +} + + +#line 397 "parser.c" +static const int JSON_value_start = 1; +static const int JSON_value_first_final = 12; +static const int JSON_value_error = 0; + +static const int JSON_value_en_main = 1; + +#line 177 "parser.rl" + + +static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + + +#line 412 "parser.c" + { + cs = JSON_value_start; + } +#line 184 "parser.rl" + +#line 418 "parser.c" + { + if ( p == pe ) + goto _out; + switch ( cs ) + { +case 1: + switch( (*p) ) { + case 34: goto tr0; + case 45: goto tr2; + case 91: goto tr3; + case 102: goto st2; + case 110: goto st6; + case 116: goto st9; + case 123: goto tr7; + } + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr2; + goto st0; +st0: + goto _out0; +tr0: +#line 136 "parser.rl" + { + char *np = JSON_parse_string(json, p, pe, result); + if (np == NULL) goto _out12; else {p = (( np))-1;} + } + goto st12; +tr2: +#line 141 "parser.rl" + { + char *np; + np = JSON_parse_float(json, p, pe, result); + if (np != NULL) {p = (( np))-1;} + np = JSON_parse_integer(json, p, pe, result); + if (np != NULL) {p = (( np))-1;} + goto _out12; + } + goto st12; +tr3: +#line 150 "parser.rl" + { + char *np; + json->current_nesting += 1; + np = JSON_parse_array(json, p, pe, result); + json->current_nesting -= 1; + if (np == NULL) goto _out12; else {p = (( np))-1;} + } + goto st12; +tr7: +#line 158 "parser.rl" + { + char *np; + json->current_nesting += 1; + np = JSON_parse_object(json, p, pe, result); + json->current_nesting -= 1; + if (np == NULL) goto _out12; else {p = (( np))-1;} + } + goto st12; +tr11: +#line 130 "parser.rl" + { + *result = Qfalse; + } + goto st12; +tr14: +#line 127 "parser.rl" + { + *result = Qnil; + } + goto st12; +tr17: +#line 133 "parser.rl" + { + *result = Qtrue; + } + goto st12; +st12: + if ( ++p == pe ) + goto _out12; +case 12: +#line 166 "parser.rl" + { goto _out12; } +#line 501 "parser.c" + goto st0; +st2: + if ( ++p == pe ) + goto _out2; +case 2: + if ( (*p) == 97 ) + goto st3; + goto st0; +st3: + if ( ++p == pe ) + goto _out3; +case 3: + if ( (*p) == 108 ) + goto st4; + goto st0; +st4: + if ( ++p == pe ) + goto _out4; +case 4: + if ( (*p) == 115 ) + goto st5; + goto st0; +st5: + if ( ++p == pe ) + goto _out5; +case 5: + if ( (*p) == 101 ) + goto tr11; + goto st0; +st6: + if ( ++p == pe ) + goto _out6; +case 6: + if ( (*p) == 117 ) + goto st7; + goto st0; +st7: + if ( ++p == pe ) + goto _out7; +case 7: + if ( (*p) == 108 ) + goto st8; + goto st0; +st8: + if ( ++p == pe ) + goto _out8; +case 8: + if ( (*p) == 108 ) + goto tr14; + goto st0; +st9: + if ( ++p == pe ) + goto _out9; +case 9: + if ( (*p) == 114 ) + goto st10; + goto st0; +st10: + if ( ++p == pe ) + goto _out10; +case 10: + if ( (*p) == 117 ) + goto st11; + goto st0; +st11: + if ( ++p == pe ) + goto _out11; +case 11: + if ( (*p) == 101 ) + goto tr17; + goto st0; + } + _out0: cs = 0; goto _out; + _out12: cs = 12; goto _out; + _out2: cs = 2; goto _out; + _out3: cs = 3; goto _out; + _out4: cs = 4; goto _out; + _out5: cs = 5; goto _out; + _out6: cs = 6; goto _out; + _out7: cs = 7; goto _out; + _out8: cs = 8; goto _out; + _out9: cs = 9; goto _out; + _out10: cs = 10; goto _out; + _out11: cs = 11; goto _out; + + _out: {} + } +#line 185 "parser.rl" + + if (cs >= JSON_value_first_final) { + return p; + } else { + return NULL; + } +} + + +#line 599 "parser.c" +static const int JSON_integer_start = 1; +static const int JSON_integer_first_final = 5; +static const int JSON_integer_error = 0; + +static const int JSON_integer_en_main = 1; + +#line 201 "parser.rl" + + +static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + + +#line 614 "parser.c" + { + cs = JSON_integer_start; + } +#line 208 "parser.rl" + json->memo = p; + +#line 621 "parser.c" + { + if ( p == pe ) + goto _out; + switch ( cs ) + { +case 1: + switch( (*p) ) { + case 45: goto st2; + case 48: goto st3; + } + if ( 49 <= (*p) && (*p) <= 57 ) + goto st4; + goto st0; +st0: + goto _out0; +st2: + if ( ++p == pe ) + goto _out2; +case 2: + if ( (*p) == 48 ) + goto st3; + if ( 49 <= (*p) && (*p) <= 57 ) + goto st4; + goto st0; +st3: + if ( ++p == pe ) + goto _out3; +case 3: + if ( 48 <= (*p) && (*p) <= 57 ) + goto st0; + goto tr4; +tr4: +#line 198 "parser.rl" + { goto _out5; } + goto st5; +st5: + if ( ++p == pe ) + goto _out5; +case 5: +#line 661 "parser.c" + goto st0; +st4: + if ( ++p == pe ) + goto _out4; +case 4: + if ( 48 <= (*p) && (*p) <= 57 ) + goto st4; + goto tr4; + } + _out0: cs = 0; goto _out; + _out2: cs = 2; goto _out; + _out3: cs = 3; goto _out; + _out5: cs = 5; goto _out; + _out4: cs = 4; goto _out; + + _out: {} + } +#line 210 "parser.rl" + + if (cs >= JSON_integer_first_final) { + long len = p - json->memo; + *result = rb_Integer(rb_str_new(json->memo, len)); + return p + 1; + } else { + return NULL; + } +} + + +#line 691 "parser.c" +static const int JSON_float_start = 1; +static const int JSON_float_first_final = 10; +static const int JSON_float_error = 0; + +static const int JSON_float_en_main = 1; + +#line 232 "parser.rl" + + +static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + + +#line 706 "parser.c" + { + cs = JSON_float_start; + } +#line 239 "parser.rl" + json->memo = p; + +#line 713 "parser.c" + { + if ( p == pe ) + goto _out; + switch ( cs ) + { +case 1: + switch( (*p) ) { + case 45: goto st2; + case 48: goto st3; + } + if ( 49 <= (*p) && (*p) <= 57 ) + goto st9; + goto st0; +st0: + goto _out0; +st2: + if ( ++p == pe ) + goto _out2; +case 2: + if ( (*p) == 48 ) + goto st3; + if ( 49 <= (*p) && (*p) <= 57 ) + goto st9; + goto st0; +st3: + if ( ++p == pe ) + goto _out3; +case 3: + switch( (*p) ) { + case 46: goto st4; + case 69: goto st6; + case 101: goto st6; + } + goto st0; +st4: + if ( ++p == pe ) + goto _out4; +case 4: + if ( 48 <= (*p) && (*p) <= 57 ) + goto st5; + goto st0; +st5: + if ( ++p == pe ) + goto _out5; +case 5: + switch( (*p) ) { + case 69: goto st6; + case 101: goto st6; + } + if ( (*p) > 46 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st5; + } else if ( (*p) >= 45 ) + goto st0; + goto tr7; +tr7: +#line 226 "parser.rl" + { goto _out10; } + goto st10; +st10: + if ( ++p == pe ) + goto _out10; +case 10: +#line 777 "parser.c" + goto st0; +st6: + if ( ++p == pe ) + goto _out6; +case 6: + switch( (*p) ) { + case 43: goto st7; + case 45: goto st7; + } + if ( 48 <= (*p) && (*p) <= 57 ) + goto st8; + goto st0; +st7: + if ( ++p == pe ) + goto _out7; +case 7: + if ( 48 <= (*p) && (*p) <= 57 ) + goto st8; + goto st0; +st8: + if ( ++p == pe ) + goto _out8; +case 8: + switch( (*p) ) { + case 69: goto st0; + case 101: goto st0; + } + if ( (*p) > 46 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st8; + } else if ( (*p) >= 45 ) + goto st0; + goto tr7; +st9: + if ( ++p == pe ) + goto _out9; +case 9: + switch( (*p) ) { + case 46: goto st4; + case 69: goto st6; + case 101: goto st6; + } + if ( 48 <= (*p) && (*p) <= 57 ) + goto st9; + goto st0; + } + _out0: cs = 0; goto _out; + _out2: cs = 2; goto _out; + _out3: cs = 3; goto _out; + _out4: cs = 4; goto _out; + _out5: cs = 5; goto _out; + _out10: cs = 10; goto _out; + _out6: cs = 6; goto _out; + _out7: cs = 7; goto _out; + _out8: cs = 8; goto _out; + _out9: cs = 9; goto _out; + + _out: {} + } +#line 241 "parser.rl" + + if (cs >= JSON_float_first_final) { + long len = p - json->memo; + *result = rb_Float(rb_str_new(json->memo, len)); + return p + 1; + } else { + return NULL; + } +} + + + +#line 850 "parser.c" +static const int JSON_array_start = 1; +static const int JSON_array_first_final = 17; +static const int JSON_array_error = 0; + +static const int JSON_array_en_main = 1; + +#line 277 "parser.rl" + + +static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + + if (json->max_nesting && json->current_nesting > json->max_nesting) { + rb_raise(eNestingError, "nesting of %d is to deep", json->current_nesting); + } + *result = rb_ary_new(); + + +#line 870 "parser.c" + { + cs = JSON_array_start; + } +#line 289 "parser.rl" + +#line 876 "parser.c" + { + if ( p == pe ) + goto _out; + switch ( cs ) + { +case 1: + if ( (*p) == 91 ) + goto st2; + goto st0; +st0: + goto _out0; +st2: + if ( ++p == pe ) + goto _out2; +case 2: + switch( (*p) ) { + case 13: goto st2; + case 32: goto st2; + case 34: goto tr2; + case 45: goto tr2; + case 47: goto st13; + case 91: goto tr2; + case 93: goto tr4; + case 102: goto tr2; + case 110: goto tr2; + case 116: goto tr2; + case 123: goto tr2; + } + if ( (*p) > 10 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr2; + } else if ( (*p) >= 9 ) + goto st2; + goto st0; +tr2: +#line 258 "parser.rl" + { + VALUE v = Qnil; + char *np = JSON_parse_value(json, p, pe, &v); + if (np == NULL) { + goto _out3; + } else { + rb_ary_push(*result, v); + {p = (( np))-1;} + } + } + goto st3; +st3: + if ( ++p == pe ) + goto _out3; +case 3: +#line 928 "parser.c" + switch( (*p) ) { + case 13: goto st3; + case 32: goto st3; + case 44: goto st4; + case 47: goto st9; + case 93: goto tr4; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st3; + goto st0; +st4: + if ( ++p == pe ) + goto _out4; +case 4: + switch( (*p) ) { + case 13: goto st4; + case 32: goto st4; + case 34: goto tr2; + case 45: goto tr2; + case 47: goto st5; + case 91: goto tr2; + case 102: goto tr2; + case 110: goto tr2; + case 116: goto tr2; + case 123: goto tr2; + } + if ( (*p) > 10 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr2; + } else if ( (*p) >= 9 ) + goto st4; + goto st0; +st5: + if ( ++p == pe ) + goto _out5; +case 5: + switch( (*p) ) { + case 42: goto st6; + case 47: goto st8; + } + goto st0; +st6: + if ( ++p == pe ) + goto _out6; +case 6: + if ( (*p) == 42 ) + goto st7; + goto st6; +st7: + if ( ++p == pe ) + goto _out7; +case 7: + switch( (*p) ) { + case 42: goto st7; + case 47: goto st4; + } + goto st6; +st8: + if ( ++p == pe ) + goto _out8; +case 8: + if ( (*p) == 10 ) + goto st4; + goto st8; +st9: + if ( ++p == pe ) + goto _out9; +case 9: + switch( (*p) ) { + case 42: goto st10; + case 47: goto st12; + } + goto st0; +st10: + if ( ++p == pe ) + goto _out10; +case 10: + if ( (*p) == 42 ) + goto st11; + goto st10; +st11: + if ( ++p == pe ) + goto _out11; +case 11: + switch( (*p) ) { + case 42: goto st11; + case 47: goto st3; + } + goto st10; +st12: + if ( ++p == pe ) + goto _out12; +case 12: + if ( (*p) == 10 ) + goto st3; + goto st12; +tr4: +#line 269 "parser.rl" + { goto _out17; } + goto st17; +st17: + if ( ++p == pe ) + goto _out17; +case 17: +#line 1033 "parser.c" + goto st0; +st13: + if ( ++p == pe ) + goto _out13; +case 13: + switch( (*p) ) { + case 42: goto st14; + case 47: goto st16; + } + goto st0; +st14: + if ( ++p == pe ) + goto _out14; +case 14: + if ( (*p) == 42 ) + goto st15; + goto st14; +st15: + if ( ++p == pe ) + goto _out15; +case 15: + switch( (*p) ) { + case 42: goto st15; + case 47: goto st2; + } + goto st14; +st16: + if ( ++p == pe ) + goto _out16; +case 16: + if ( (*p) == 10 ) + goto st2; + goto st16; + } + _out0: cs = 0; goto _out; + _out2: cs = 2; goto _out; + _out3: cs = 3; goto _out; + _out4: cs = 4; goto _out; + _out5: cs = 5; goto _out; + _out6: cs = 6; goto _out; + _out7: cs = 7; goto _out; + _out8: cs = 8; goto _out; + _out9: cs = 9; goto _out; + _out10: cs = 10; goto _out; + _out11: cs = 11; goto _out; + _out12: cs = 12; goto _out; + _out17: cs = 17; goto _out; + _out13: cs = 13; goto _out; + _out14: cs = 14; goto _out; + _out15: cs = 15; goto _out; + _out16: cs = 16; goto _out; + + _out: {} + } +#line 290 "parser.rl" + + if(cs >= JSON_array_first_final) { + return p + 1; + } else { + rb_raise(eParserError, "unexpected token at '%s'", p); + } +} + +static VALUE json_string_unescape(char *p, char *pe) +{ + VALUE result = rb_str_buf_new(pe - p + 1); + + while (p < pe) { + if (*p == '\\') { + p++; + if (p >= pe) return Qnil; /* raise an exception later, \ at end */ + switch (*p) { + case '"': + case '\\': + rb_str_buf_cat(result, p, 1); + p++; + break; + case 'b': + rb_str_buf_cat2(result, "\b"); + p++; + break; + case 'f': + rb_str_buf_cat2(result, "\f"); + p++; + break; + case 'n': + rb_str_buf_cat2(result, "\n"); + p++; + break; + case 'r': + rb_str_buf_cat2(result, "\r"); + p++; + break; + case 't': + rb_str_buf_cat2(result, "\t"); + p++; + break; + case 'u': + if (p > pe - 4) { + return Qnil; + } else { + p = JSON_convert_UTF16_to_UTF8(result, p, pe, strictConversion); + } + break; + default: + rb_str_buf_cat(result, p, 1); + p++; + break; + } + } else { + char *q = p; + while (*q != '\\' && q < pe) q++; + rb_str_buf_cat(result, p, q - p); + p = q; + } + } + return result; +} + + +#line 1154 "parser.c" +static const int JSON_string_start = 1; +static const int JSON_string_first_final = 8; +static const int JSON_string_error = 0; + +static const int JSON_string_en_main = 1; + +#line 368 "parser.rl" + + +static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + + *result = rb_str_new("", 0); + +#line 1170 "parser.c" + { + cs = JSON_string_start; + } +#line 376 "parser.rl" + json->memo = p; + +#line 1177 "parser.c" + { + if ( p == pe ) + goto _out; + switch ( cs ) + { +case 1: + if ( (*p) == 34 ) + goto st2; + goto st0; +st0: + goto _out0; +st2: + if ( ++p == pe ) + goto _out2; +case 2: + switch( (*p) ) { + case 34: goto tr2; + case 92: goto st3; + } + if ( 0 <= (*p) && (*p) <= 31 ) + goto st0; + goto st2; +tr2: +#line 360 "parser.rl" + { + *result = json_string_unescape(json->memo + 1, p); + if (NIL_P(*result)) goto _out8; else {p = (( p + 1))-1;} + } +#line 365 "parser.rl" + { goto _out8; } + goto st8; +st8: + if ( ++p == pe ) + goto _out8; +case 8: +#line 1213 "parser.c" + goto st0; +st3: + if ( ++p == pe ) + goto _out3; +case 3: + if ( (*p) == 117 ) + goto st4; + if ( 0 <= (*p) && (*p) <= 31 ) + goto st0; + goto st2; +st4: + if ( ++p == pe ) + goto _out4; +case 4: + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st5; + } else if ( (*p) > 70 ) { + if ( 97 <= (*p) && (*p) <= 102 ) + goto st5; + } else + goto st5; + goto st0; +st5: + if ( ++p == pe ) + goto _out5; +case 5: + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st6; + } else if ( (*p) > 70 ) { + if ( 97 <= (*p) && (*p) <= 102 ) + goto st6; + } else + goto st6; + goto st0; +st6: + if ( ++p == pe ) + goto _out6; +case 6: + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st7; + } else if ( (*p) > 70 ) { + if ( 97 <= (*p) && (*p) <= 102 ) + goto st7; + } else + goto st7; + goto st0; +st7: + if ( ++p == pe ) + goto _out7; +case 7: + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st2; + } else if ( (*p) > 70 ) { + if ( 97 <= (*p) && (*p) <= 102 ) + goto st2; + } else + goto st2; + goto st0; + } + _out0: cs = 0; goto _out; + _out2: cs = 2; goto _out; + _out8: cs = 8; goto _out; + _out3: cs = 3; goto _out; + _out4: cs = 4; goto _out; + _out5: cs = 5; goto _out; + _out6: cs = 6; goto _out; + _out7: cs = 7; goto _out; + + _out: {} + } +#line 378 "parser.rl" + + if (cs >= JSON_string_first_final) { + return p + 1; + } else { + return NULL; + } +} + + + +#line 1299 "parser.c" +static const int JSON_start = 1; +static const int JSON_first_final = 10; +static const int JSON_error = 0; + +static const int JSON_en_main = 1; + +#line 412 "parser.rl" + + +/* + * Document-class: JSON::Ext::Parser + * + * This is the JSON parser implemented as a C extension. It can be configured + * to be used by setting + * + * JSON.parser = JSON::Ext::Parser + * + * with the method parser= in JSON. + * + */ + +/* + * call-seq: new(source, opts => {}) + * + * Creates a new JSON::Ext::Parser instance for the string _source_. + * + * Creates a new JSON::Ext::Parser instance for the string _source_. + * + * It will be configured by the _opts_ hash. _opts_ can have the following + * keys: + * + * _opts_ can have the following keys: + * * *max_nesting*: The maximum depth of nesting allowed in the parsed data + * structures. Disable depth checking with :max_nesting => false. + */ +static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) +{ + char *ptr; + long len; + VALUE source, opts; + GET_STRUCT; + rb_scan_args(argc, argv, "11", &source, &opts); + source = StringValue(source); + ptr = RSTRING_PTR(source); + len = RSTRING_LEN(source); + if (len < 2) { + rb_raise(eParserError, "A JSON text must at least contain two octets!"); + } + json->max_nesting = 19; + if (!NIL_P(opts)) { + opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash"); + if (NIL_P(opts)) { + rb_raise(rb_eArgError, "opts needs to be like a hash"); + } else { + VALUE s_max_nesting = ID2SYM(i_max_nesting); + if (st_lookup(RHASH(opts)->tbl, s_max_nesting, 0)) { + VALUE max_nesting = rb_hash_aref(opts, s_max_nesting); + if (RTEST(max_nesting)) { + Check_Type(max_nesting, T_FIXNUM); + json->max_nesting = FIX2INT(max_nesting); + } else { + json->max_nesting = 0; + } + } + } + } + json->current_nesting = 0; + /* + Convert these? + if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) { + rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); + } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) { + rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); + } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) { + rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); + } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) { + rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); + } + */ + json->len = len; + json->source = ptr; + json->Vsource = source; + json->create_id = rb_funcall(mJSON, i_create_id, 0); + return self; +} + +/* + * call-seq: parse() + * + * Parses the current JSON text _source_ and returns the complete data + * structure as a result. + */ +static VALUE cParser_parse(VALUE self) +{ + char *p, *pe; + int cs = EVIL; + VALUE result = Qnil; + GET_STRUCT; + + +#line 1400 "parser.c" + { + cs = JSON_start; + } +#line 505 "parser.rl" + p = json->source; + pe = p + json->len; + +#line 1408 "parser.c" + { + if ( p == pe ) + goto _out; + switch ( cs ) + { +st1: + if ( ++p == pe ) + goto _out1; +case 1: + switch( (*p) ) { + case 13: goto st1; + case 32: goto st1; + case 47: goto st2; + case 91: goto tr3; + case 123: goto tr4; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st1; + goto st0; +st0: + goto _out0; +st2: + if ( ++p == pe ) + goto _out2; +case 2: + switch( (*p) ) { + case 42: goto st3; + case 47: goto st5; + } + goto st0; +st3: + if ( ++p == pe ) + goto _out3; +case 3: + if ( (*p) == 42 ) + goto st4; + goto st3; +st4: + if ( ++p == pe ) + goto _out4; +case 4: + switch( (*p) ) { + case 42: goto st4; + case 47: goto st1; + } + goto st3; +st5: + if ( ++p == pe ) + goto _out5; +case 5: + if ( (*p) == 10 ) + goto st1; + goto st5; +tr3: +#line 401 "parser.rl" + { + char *np; + json->current_nesting = 1; + np = JSON_parse_array(json, p, pe, &result); + if (np == NULL) goto _out10; else {p = (( np))-1;} + } + goto st10; +tr4: +#line 394 "parser.rl" + { + char *np; + json->current_nesting = 1; + np = JSON_parse_object(json, p, pe, &result); + if (np == NULL) goto _out10; else {p = (( np))-1;} + } + goto st10; +st10: + if ( ++p == pe ) + goto _out10; +case 10: +#line 1484 "parser.c" + switch( (*p) ) { + case 13: goto st10; + case 32: goto st10; + case 47: goto st6; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st10; + goto st0; +st6: + if ( ++p == pe ) + goto _out6; +case 6: + switch( (*p) ) { + case 42: goto st7; + case 47: goto st9; + } + goto st0; +st7: + if ( ++p == pe ) + goto _out7; +case 7: + if ( (*p) == 42 ) + goto st8; + goto st7; +st8: + if ( ++p == pe ) + goto _out8; +case 8: + switch( (*p) ) { + case 42: goto st8; + case 47: goto st10; + } + goto st7; +st9: + if ( ++p == pe ) + goto _out9; +case 9: + if ( (*p) == 10 ) + goto st10; + goto st9; + } + _out1: cs = 1; goto _out; + _out0: cs = 0; goto _out; + _out2: cs = 2; goto _out; + _out3: cs = 3; goto _out; + _out4: cs = 4; goto _out; + _out5: cs = 5; goto _out; + _out10: cs = 10; goto _out; + _out6: cs = 6; goto _out; + _out7: cs = 7; goto _out; + _out8: cs = 8; goto _out; + _out9: cs = 9; goto _out; + + _out: {} + } +#line 508 "parser.rl" + + if (cs >= JSON_first_final && p == pe) { + return result; + } else { + rb_raise(eParserError, "unexpected token at '%s'", p); + } +} + +static JSON_Parser *JSON_allocate() +{ + JSON_Parser *json = ALLOC(JSON_Parser); + MEMZERO(json, JSON_Parser, 1); + return json; +} + +static void JSON_mark(JSON_Parser *json) +{ + rb_gc_mark_maybe(json->Vsource); + rb_gc_mark_maybe(json->create_id); +} + +static void JSON_free(JSON_Parser *json) +{ + free(json); +} + +static VALUE cJSON_parser_s_allocate(VALUE klass) +{ + JSON_Parser *json = JSON_allocate(); + return Data_Wrap_Struct(klass, JSON_mark, JSON_free, json); +} + +/* + * call-seq: source() + * + * Returns a copy of the current _source_ string, that was used to construct + * this Parser. + */ +static VALUE cParser_source(VALUE self) +{ + GET_STRUCT; + return rb_str_dup(json->Vsource); +} + +void Init_parser() +{ + mJSON = rb_define_module("JSON"); + mExt = rb_define_module_under(mJSON, "Ext"); + cParser = rb_define_class_under(mExt, "Parser", rb_cObject); + eParserError = rb_path2class("JSON::ParserError"); + eNestingError = rb_path2class("JSON::NestingError"); + rb_define_alloc_func(cParser, cJSON_parser_s_allocate); + rb_define_method(cParser, "initialize", cParser_initialize, -1); + rb_define_method(cParser, "parse", cParser_parse, 0); + rb_define_method(cParser, "source", cParser_source, 0); + + i_json_creatable_p = rb_intern("json_creatable?"); + i_json_create = rb_intern("json_create"); + i_create_id = rb_intern("create_id"); + i_chr = rb_intern("chr"); + i_max_nesting = rb_intern("max_nesting"); +} diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl new file mode 100644 index 000000000..9ce8c6fc2 --- /dev/null +++ b/ext/json/ext/parser/parser.rl @@ -0,0 +1,569 @@ +/* vim: set cin et sw=4 ts=4: */ + +#include "ruby.h" +#include "re.h" +#include "st.h" +#include "unicode.h" + +#define EVIL 0x666 + +static VALUE mJSON, mExt, cParser, eParserError, eNestingError; + +static ID i_json_creatable_p, i_json_create, i_create_id, i_chr, i_max_nesting; + +typedef struct JSON_ParserStruct { + VALUE Vsource; + char *source; + long len; + char *memo; + VALUE create_id; + int max_nesting; + int current_nesting; +} JSON_Parser; + +static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); + +#define GET_STRUCT \ + JSON_Parser *json; \ + Data_Get_Struct(self, JSON_Parser, json); + +%%{ + machine JSON_common; + + cr = '\n'; + cr_neg = [^\n]; + ws = [ \t\r\n]; + c_comment = '/*' ( any* - (any* '*/' any* ) ) '*/'; + cpp_comment = '//' cr_neg* cr; + comment = c_comment | cpp_comment; + ignore = ws | comment; + name_separator = ':'; + value_separator = ','; + Vnull = 'null'; + Vfalse = 'false'; + Vtrue = 'true'; + begin_value = [nft"\-[{] | digit; + begin_object = '{'; + end_object = '}'; + begin_array = '['; + end_array = ']'; + begin_string = '"'; + begin_name = begin_string; + begin_number = digit | '-'; +}%% + +%%{ + machine JSON_object; + include JSON_common; + + write data; + + action parse_value { + VALUE v = Qnil; + char *np = JSON_parse_value(json, fpc, pe, &v); + if (np == NULL) { + fbreak; + } else { + rb_hash_aset(*result, last_name, v); + fexec np; + } + } + + action parse_name { + char *np = JSON_parse_string(json, fpc, pe, &last_name); + if (np == NULL) fbreak; else fexec np; + } + + action exit { fbreak; } + + a_pair = ignore* begin_name >parse_name + ignore* name_separator ignore* + begin_value >parse_value; + + main := begin_object + (a_pair (ignore* value_separator a_pair)*)? + ignore* end_object @exit; +}%% + +static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + VALUE last_name = Qnil; + + if (json->max_nesting && json->current_nesting > json->max_nesting) { + rb_raise(eNestingError, "nesting of %d is to deep", json->current_nesting); + } + + *result = rb_hash_new(); + + %% write init; + %% write exec; + + if (cs >= JSON_object_first_final) { + VALUE klassname = rb_hash_aref(*result, json->create_id); + if (!NIL_P(klassname)) { + VALUE klass = rb_path2class(StringValueCStr(klassname)); + if RTEST(rb_funcall(klass, i_json_creatable_p, 0)) { + *result = rb_funcall(klass, i_json_create, 1, *result); + } + } + return p + 1; + } else { + return NULL; + } +} + +%%{ + machine JSON_value; + include JSON_common; + + write data; + + action parse_null { + *result = Qnil; + } + action parse_false { + *result = Qfalse; + } + action parse_true { + *result = Qtrue; + } + action parse_string { + char *np = JSON_parse_string(json, fpc, pe, result); + if (np == NULL) fbreak; else fexec np; + } + + action parse_number { + char *np; + np = JSON_parse_float(json, fpc, pe, result); + if (np != NULL) fexec np; + np = JSON_parse_integer(json, fpc, pe, result); + if (np != NULL) fexec np; + fbreak; + } + + action parse_array { + char *np; + json->current_nesting += 1; + np = JSON_parse_array(json, fpc, pe, result); + json->current_nesting -= 1; + if (np == NULL) fbreak; else fexec np; + } + + action parse_object { + char *np; + json->current_nesting += 1; + np = JSON_parse_object(json, fpc, pe, result); + json->current_nesting -= 1; + if (np == NULL) fbreak; else fexec np; + } + + action exit { fbreak; } + +main := ( + Vnull @parse_null | + Vfalse @parse_false | + Vtrue @parse_true | + begin_number >parse_number | + begin_string >parse_string | + begin_array >parse_array | + begin_object >parse_object + ) %*exit; +}%% + +static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + + %% write init; + %% write exec; + + if (cs >= JSON_value_first_final) { + return p; + } else { + return NULL; + } +} + +%%{ + machine JSON_integer; + + write data; + + action exit { fbreak; } + + main := '-'? ('0' | [1-9][0-9]*) (^[0-9] @exit); +}%% + +static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + + %% write init; + json->memo = p; + %% write exec; + + if (cs >= JSON_integer_first_final) { + long len = p - json->memo; + *result = rb_Integer(rb_str_new(json->memo, len)); + return p + 1; + } else { + return NULL; + } +} + +%%{ + machine JSON_float; + include JSON_common; + + write data; + + action exit { fbreak; } + + main := '-'? ( + (('0' | [1-9][0-9]*) '.' [0-9]+ ([Ee] [+\-]?[0-9]+)?) + | (('0' | [1-9][0-9]*) ([Ee] [+\-]?[0-9]+)) + ) (^[0-9Ee.\-] @exit ); +}%% + +static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + + %% write init; + json->memo = p; + %% write exec; + + if (cs >= JSON_float_first_final) { + long len = p - json->memo; + *result = rb_Float(rb_str_new(json->memo, len)); + return p + 1; + } else { + return NULL; + } +} + + +%%{ + machine JSON_array; + include JSON_common; + + write data; + + action parse_value { + VALUE v = Qnil; + char *np = JSON_parse_value(json, fpc, pe, &v); + if (np == NULL) { + fbreak; + } else { + rb_ary_push(*result, v); + fexec np; + } + } + + action exit { fbreak; } + + next_element = value_separator ignore* begin_value >parse_value; + + main := begin_array ignore* + ((begin_value >parse_value ignore*) + (ignore* next_element ignore*)*)? + end_array @exit; +}%% + +static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + + if (json->max_nesting && json->current_nesting > json->max_nesting) { + rb_raise(eNestingError, "nesting of %d is to deep", json->current_nesting); + } + *result = rb_ary_new(); + + %% write init; + %% write exec; + + if(cs >= JSON_array_first_final) { + return p + 1; + } else { + rb_raise(eParserError, "unexpected token at '%s'", p); + } +} + +static VALUE json_string_unescape(char *p, char *pe) +{ + VALUE result = rb_str_buf_new(pe - p + 1); + + while (p < pe) { + if (*p == '\\') { + p++; + if (p >= pe) return Qnil; /* raise an exception later, \ at end */ + switch (*p) { + case '"': + case '\\': + rb_str_buf_cat(result, p, 1); + p++; + break; + case 'b': + rb_str_buf_cat2(result, "\b"); + p++; + break; + case 'f': + rb_str_buf_cat2(result, "\f"); + p++; + break; + case 'n': + rb_str_buf_cat2(result, "\n"); + p++; + break; + case 'r': + rb_str_buf_cat2(result, "\r"); + p++; + break; + case 't': + rb_str_buf_cat2(result, "\t"); + p++; + break; + case 'u': + if (p > pe - 4) { + return Qnil; + } else { + p = JSON_convert_UTF16_to_UTF8(result, p, pe, strictConversion); + } + break; + default: + rb_str_buf_cat(result, p, 1); + p++; + break; + } + } else { + char *q = p; + while (*q != '\\' && q < pe) q++; + rb_str_buf_cat(result, p, q - p); + p = q; + } + } + return result; +} + +%%{ + machine JSON_string; + include JSON_common; + + write data; + + action parse_string { + *result = json_string_unescape(json->memo + 1, p); + if (NIL_P(*result)) fbreak; else fexec p + 1; + } + + action exit { fbreak; } + + main := '"' ((^(["\\] | 0..0x1f) | '\\'["\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | '\\'^(["\\/bfnrtu]|0..0x1f))* %parse_string) '"' @exit; +}%% + +static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + + *result = rb_str_new("", 0); + %% write init; + json->memo = p; + %% write exec; + + if (cs >= JSON_string_first_final) { + return p + 1; + } else { + return NULL; + } +} + + +%%{ + machine JSON; + + write data; + + include JSON_common; + + action parse_object { + char *np; + json->current_nesting = 1; + np = JSON_parse_object(json, fpc, pe, &result); + if (np == NULL) fbreak; else fexec np; + } + + action parse_array { + char *np; + json->current_nesting = 1; + np = JSON_parse_array(json, fpc, pe, &result); + if (np == NULL) fbreak; else fexec np; + } + + main := ignore* ( + begin_object >parse_object | + begin_array >parse_array + ) ignore*; +}%% + +/* + * Document-class: JSON::Ext::Parser + * + * This is the JSON parser implemented as a C extension. It can be configured + * to be used by setting + * + * JSON.parser = JSON::Ext::Parser + * + * with the method parser= in JSON. + * + */ + +/* + * call-seq: new(source, opts => {}) + * + * Creates a new JSON::Ext::Parser instance for the string _source_. + * + * Creates a new JSON::Ext::Parser instance for the string _source_. + * + * It will be configured by the _opts_ hash. _opts_ can have the following + * keys: + * + * _opts_ can have the following keys: + * * *max_nesting*: The maximum depth of nesting allowed in the parsed data + * structures. Disable depth checking with :max_nesting => false. + */ +static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) +{ + char *ptr; + long len; + VALUE source, opts; + GET_STRUCT; + rb_scan_args(argc, argv, "11", &source, &opts); + source = StringValue(source); + ptr = RSTRING_PTR(source); + len = RSTRING_LEN(source); + if (len < 2) { + rb_raise(eParserError, "A JSON text must at least contain two octets!"); + } + json->max_nesting = 19; + if (!NIL_P(opts)) { + opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash"); + if (NIL_P(opts)) { + rb_raise(rb_eArgError, "opts needs to be like a hash"); + } else { + VALUE s_max_nesting = ID2SYM(i_max_nesting); + if (st_lookup(RHASH(opts)->tbl, s_max_nesting, 0)) { + VALUE max_nesting = rb_hash_aref(opts, s_max_nesting); + if (RTEST(max_nesting)) { + Check_Type(max_nesting, T_FIXNUM); + json->max_nesting = FIX2INT(max_nesting); + } else { + json->max_nesting = 0; + } + } + } + } + json->current_nesting = 0; + /* + Convert these? + if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) { + rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); + } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) { + rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); + } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) { + rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); + } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) { + rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); + } + */ + json->len = len; + json->source = ptr; + json->Vsource = source; + json->create_id = rb_funcall(mJSON, i_create_id, 0); + return self; +} + +/* + * call-seq: parse() + * + * Parses the current JSON text _source_ and returns the complete data + * structure as a result. + */ +static VALUE cParser_parse(VALUE self) +{ + char *p, *pe; + int cs = EVIL; + VALUE result = Qnil; + GET_STRUCT; + + %% write init; + p = json->source; + pe = p + json->len; + %% write exec; + + if (cs >= JSON_first_final && p == pe) { + return result; + } else { + rb_raise(eParserError, "unexpected token at '%s'", p); + } +} + +static JSON_Parser *JSON_allocate() +{ + JSON_Parser *json = ALLOC(JSON_Parser); + MEMZERO(json, JSON_Parser, 1); + return json; +} + +static void JSON_mark(JSON_Parser *json) +{ + rb_gc_mark_maybe(json->Vsource); + rb_gc_mark_maybe(json->create_id); +} + +static void JSON_free(JSON_Parser *json) +{ + free(json); +} + +static VALUE cJSON_parser_s_allocate(VALUE klass) +{ + JSON_Parser *json = JSON_allocate(); + return Data_Wrap_Struct(klass, JSON_mark, JSON_free, json); +} + +/* + * call-seq: source() + * + * Returns a copy of the current _source_ string, that was used to construct + * this Parser. + */ +static VALUE cParser_source(VALUE self) +{ + GET_STRUCT; + return rb_str_dup(json->Vsource); +} + +void Init_parser() +{ + mJSON = rb_define_module("JSON"); + mExt = rb_define_module_under(mJSON, "Ext"); + cParser = rb_define_class_under(mExt, "Parser", rb_cObject); + eParserError = rb_path2class("JSON::ParserError"); + eNestingError = rb_path2class("JSON::NestingError"); + rb_define_alloc_func(cParser, cJSON_parser_s_allocate); + rb_define_method(cParser, "initialize", cParser_initialize, -1); + rb_define_method(cParser, "parse", cParser_parse, 0); + rb_define_method(cParser, "source", cParser_source, 0); + + i_json_creatable_p = rb_intern("json_creatable?"); + i_json_create = rb_intern("json_create"); + i_create_id = rb_intern("create_id"); + i_chr = rb_intern("chr"); + i_max_nesting = rb_intern("max_nesting"); +} diff --git a/ext/json/ext/parser/unicode.c b/ext/json/ext/parser/unicode.c new file mode 100644 index 000000000..609a0e83e --- /dev/null +++ b/ext/json/ext/parser/unicode.c @@ -0,0 +1,156 @@ +/* vim: set cin et sw=4 ts=4: */ + +#include "unicode.h" + +/* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + +/* + * Index into the table below with the first byte of a UTF-8 sequence to + * get the number of trailing bytes that are supposed to follow it. + * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is + * left as-is for anyone who may want to do such conversion, which was + * allowed in earlier algorithms. + */ +static const char trailingBytesForUTF8[256] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 +}; + +/* + * Magic values subtracted from a buffer value during UTF8 conversion. + * This table contains as many values as there might be trailing bytes + * in a UTF-8 sequence. + */ +static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, + 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; + +/* + * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed + * into the first byte, depending on how many bytes follow. There are + * as many entries in this table as there are UTF-8 sequence types. + * (I.e., one byte sequence, two byte... etc.). Remember that sequencs + * for *legal* UTF-8 will be 4 or fewer bytes total. + */ +static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + +char *JSON_convert_UTF16_to_UTF8 ( + VALUE buffer, + char *source, + char *sourceEnd, + ConversionFlags flags) +{ + UTF16 *tmp, *tmpPtr, *tmpEnd; + char buf[5]; + long n = 0, i; + char *p = source - 1; + + while (p < sourceEnd && p[0] == '\\' && p[1] == 'u') { + p += 6; + n++; + } + p = source + 1; + buf[4] = 0; + tmpPtr = tmp = ALLOC_N(UTF16, n); + tmpEnd = tmp + n; + for (i = 0; i < n; i++) { + buf[0] = *p++; + buf[1] = *p++; + buf[2] = *p++; + buf[3] = *p++; + tmpPtr[i] = strtol(buf, NULL, 16); + p += 2; + } + + while (tmpPtr < tmpEnd) { + UTF32 ch; + unsigned short bytesToWrite = 0; + const UTF32 byteMask = 0xBF; + const UTF32 byteMark = 0x80; + ch = *tmpPtr++; + /* If we have a surrogate pair, convert to UTF32 first. */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + /* If the 16 bits following the high surrogate are in the source + * buffer... */ + if (tmpPtr < tmpEnd) { + UTF32 ch2 = *tmpPtr; + /* If it's a low surrogate, convert to UTF32. */ + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + ++tmpPtr; + } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ + free(tmp); + rb_raise(rb_path2class("JSON::ParserError"), + "source sequence is illegal/malformed near %s", source); + } + } else { /* We don't have the 16 bits following the high surrogate. */ + free(tmp); + rb_raise(rb_path2class("JSON::ParserError"), + "partial character in source, but hit end near %s", source); + break; + } + } else if (flags == strictConversion) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { + free(tmp); + rb_raise(rb_path2class("JSON::ParserError"), + "source sequence is illegal/malformed near %s", source); + } + } + /* Figure out how many bytes the result will require */ + if (ch < (UTF32) 0x80) { + bytesToWrite = 1; + } else if (ch < (UTF32) 0x800) { + bytesToWrite = 2; + } else if (ch < (UTF32) 0x10000) { + bytesToWrite = 3; + } else if (ch < (UTF32) 0x110000) { + bytesToWrite = 4; + } else { + bytesToWrite = 3; + ch = UNI_REPLACEMENT_CHAR; + } + + buf[0] = 0; + buf[1] = 0; + buf[2] = 0; + buf[3] = 0; + p = buf + bytesToWrite; + switch (bytesToWrite) { /* note: everything falls through. */ + case 4: *--p = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6; + case 3: *--p = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6; + case 2: *--p = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6; + case 1: *--p = (UTF8) (ch | firstByteMark[bytesToWrite]); + } + rb_str_buf_cat(buffer, p, bytesToWrite); + } + free(tmp); + source += 5 + (n - 1) * 6; + return source; +} diff --git a/ext/json/ext/parser/unicode.h b/ext/json/ext/parser/unicode.h new file mode 100755 index 000000000..155da0cee --- /dev/null +++ b/ext/json/ext/parser/unicode.h @@ -0,0 +1,58 @@ + +#ifndef _PARSER_UNICODE_H_ +#define _PARSER_UNICODE_H_ + +#include "ruby.h" + +typedef unsigned long UTF32; /* at least 32 bits */ +typedef unsigned short UTF16; /* at least 16 bits */ +typedef unsigned char UTF8; /* typically 8 bits */ + +#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD +#define UNI_MAX_BMP (UTF32)0x0000FFFF +#define UNI_MAX_UTF16 (UTF32)0x0010FFFF +#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF +#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF + +#define UNI_SUR_HIGH_START (UTF32)0xD800 +#define UNI_SUR_HIGH_END (UTF32)0xDBFF +#define UNI_SUR_LOW_START (UTF32)0xDC00 +#define UNI_SUR_LOW_END (UTF32)0xDFFF + +static const int halfShift = 10; /* used for shifting by 10 bits */ + +static const UTF32 halfBase = 0x0010000UL; +static const UTF32 halfMask = 0x3FFUL; + +typedef enum { + conversionOK = 0, /* conversion successful */ + sourceExhausted, /* partial character in source, but hit end */ + targetExhausted, /* insuff. room in target for conversion */ + sourceIllegal /* source sequence is illegal/malformed */ +} ConversionResult; + +typedef enum { + strictConversion = 0, + lenientConversion +} ConversionFlags; + +char *JSON_convert_UTF16_to_UTF8 ( + VALUE buffer, + char *source, + char *sourceEnd, + ConversionFlags flags); + +#ifndef RARRAY_PTR +#define RARRAY_PTR(ARRAY) RARRAY(ARRAY)->ptr +#endif +#ifndef RARRAY_LEN +#define RARRAY_LEN(ARRAY) RARRAY(ARRAY)->len +#endif +#ifndef RSTRING_PTR +#define RSTRING_PTR(string) RSTRING(string)->ptr +#endif +#ifndef RSTRING_LEN +#define RSTRING_LEN(string) RSTRING(string)->len +#endif + +#endif diff --git a/ext/nkf/nkf-utf8/nkf.c b/ext/nkf/nkf-utf8/nkf.c index 3cd1b160d..30bb6c47b 100644 --- a/ext/nkf/nkf-utf8/nkf.c +++ b/ext/nkf/nkf-utf8/nkf.c @@ -41,7 +41,7 @@ ***********************************************************************/ /* $Id$ */ #define NKF_VERSION "2.0.8" -#define NKF_RELEASE_DATE "2007-01-28" +#define NKF_RELEASE_DATE "2007-05-28" #include "config.h" #include "utf8tbl.h" @@ -351,10 +351,12 @@ static nkf_char e_iconv(nkf_char c2,nkf_char c1,nkf_char c0); * 0: Shift_JIS, eucJP-ascii * 1: eucJP-ms * 2: CP932, CP51932 + * 3: CP10001 */ -#define UCS_MAP_ASCII 0 -#define UCS_MAP_MS 1 -#define UCS_MAP_CP932 2 +#define UCS_MAP_ASCII 0 +#define UCS_MAP_MS 1 +#define UCS_MAP_CP932 2 +#define UCS_MAP_CP10001 3 static int ms_ucs_map_f = UCS_MAP_ASCII; #endif #ifdef UTF8_INPUT_ENABLE @@ -1233,6 +1235,14 @@ void options(unsigned char *cp) #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_CP932; #endif + }else if(strcmp(codeset, "CP10001") == 0){ + input_f = SJIS_INPUT; +#ifdef SHIFTJIS_CP932 + cp51932_f = TRUE; +#endif +#ifdef UTF8_OUTPUT_ENABLE + ms_ucs_map_f = UCS_MAP_CP10001; +#endif }else if(strcmp(codeset, "EUCJP") == 0 || strcmp(codeset, "EUC-JP") == 0){ input_f = EUC_INPUT; @@ -1371,6 +1381,11 @@ void options(unsigned char *cp) #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_CP932; #endif + }else if(strcmp(codeset, "CP10001") == 0){ + output_conv = s_oconv; +#ifdef UTF8_OUTPUT_ENABLE + ms_ucs_map_f = UCS_MAP_CP10001; +#endif }else if(strcmp(codeset, "EUCJP") == 0 || strcmp(codeset, "EUC-JP") == 0){ output_conv = e_oconv; @@ -2676,6 +2691,12 @@ nkf_char kanji_convert(FILE *f) } else { /* bogus code, skip SSO and one byte */ NEXT; } + } else if (ms_ucs_map_f == UCS_MAP_CP10001 && + (c1 == 0xFD || c1 == 0xFE)) { + /* CP10001 */ + c2 = X0201; + c1 &= 0x7f; + SEND; } else { /* already established */ c2 = c1; @@ -2885,35 +2906,41 @@ nkf_char kanji_convert(FILE *f) (*oconv)(0, ESC); SEND; } - } else if ((c1 == NL || c1 == CR) && broken_f&4) { - input_mode = ASCII; set_iconv(FALSE, 0); - SEND; - } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) { - if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) { - i_ungetc(SPACE,f); - continue; - } else { - i_ungetc(c1,f); - } - c1 = NL; - SEND; - } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) { - if ((c1=(*i_getc)(f))!=EOF) { - if (c1==SPACE) { - i_ungetc(SPACE,f); - continue; - } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) { - i_ungetc(SPACE,f); - continue; - } else { - i_ungetc(c1,f); + } else if (c1 == NL || c1 == CR) { + if (broken_f&4) { + input_mode = ASCII; set_iconv(FALSE, 0); + SEND; + } else if (mime_decode_f && !mime_decode_mode){ + if (c1 == NL) { + if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) { + i_ungetc(SPACE,f); + continue; + } else { + i_ungetc(c1,f); + } + c1 = NL; + SEND; + } else { /* if (c1 == CR)*/ + if ((c1=(*i_getc)(f))!=EOF) { + if (c1==SPACE) { + i_ungetc(SPACE,f); + continue; + } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) { + i_ungetc(SPACE,f); + continue; + } else { + i_ungetc(c1,f); + } + i_ungetc(NL,f); + } else { + i_ungetc(c1,f); + } + c1 = CR; + SEND; } - i_ungetc(NL,f); - } else { - i_ungetc(c1,f); } - c1 = CR; - SEND; + if (crmode_f == CR && c1 == NL) crmode_f = CRLF; + else crmode_f = c1; } else if (c1 == DEL && input_mode == X0208 ) { /* CP5022x */ c2 = c1; @@ -3125,9 +3152,6 @@ nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1) static const nkf_char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} }; #ifdef SHIFTJIS_CP932 if (!cp932inv_f && is_ibmext_in_sjis(c2)){ -#if 0 - extern const unsigned short shiftjis_cp932[3][189]; -#endif val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40]; if (val){ c2 = val >> 8; @@ -3136,9 +3160,6 @@ nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1) } if (cp932inv_f && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){ -#if 0 - extern const unsigned short cp932inv[2][189]; -#endif nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40]; if (c){ c2 = c >> 8; @@ -3148,9 +3169,6 @@ nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1) #endif /* SHIFTJIS_CP932 */ #ifdef X0212_ENABLE if (!x0213_f && is_ibmext_in_sjis(c2)){ -#if 0 - extern const unsigned short shiftjis_x0212[3][189]; -#endif val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40]; if (val){ if (val > 0x7FFF){ @@ -3481,14 +3499,6 @@ nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0) nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1) { -#if 0 - extern const unsigned short *const utf8_to_euc_2bytes[]; - extern const unsigned short *const utf8_to_euc_2bytes_ms[]; - extern const unsigned short *const utf8_to_euc_2bytes_932[]; - extern const unsigned short *const *const utf8_to_euc_3bytes[]; - extern const unsigned short *const *const utf8_to_euc_3bytes_ms[]; - extern const unsigned short *const *const utf8_to_euc_3bytes_932[]; -#endif const unsigned short *const *pp; const unsigned short *const *const *ppp; static const int no_best_fit_chars_table_C2[] = @@ -3538,11 +3548,27 @@ nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char * } }else if(ms_ucs_map_f == UCS_MAP_MS){ if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1; + }else if(ms_ucs_map_f == UCS_MAP_CP10001){ + switch(c2){ + case 0xC2: + switch(c1){ + case 0xA2: + case 0xA3: + case 0xA5: + case 0xA6: + case 0xAC: + case 0xAF: + case 0xB8: + return 1; + } + break; + } } } pp = ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 : ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms : + ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac : utf8_to_euc_2bytes; ret = w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1); }else if(c0 < 0xF0){ @@ -3565,6 +3591,19 @@ nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char * if(c1 == 0x80 || c0 == 0x9C) return 1; break; } + }else if(ms_ucs_map_f == UCS_MAP_CP10001){ + switch(c2){ + case 0xE3: + switch(c1){ + case 0x82: + if(c0 == 0x94) return 1; + break; + case 0x83: + if(c0 == 0xBB) return 1; + break; + } + break; + } }else{ switch(c2){ case 0xE2: @@ -3596,8 +3635,10 @@ nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char * ppp = ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 : ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms : + ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac : utf8_to_euc_3bytes; ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1); +// fprintf(stderr, "wret: %X %X %X -> %X %X\n",c2,c1,c0,*p2,*p1,ret); }else return -1; #ifdef SHIFTJIS_CP932 if (!ret && !cp932inv_f && is_eucg3(*p2)) { @@ -3739,15 +3780,17 @@ void encode_fallback_subchar(nkf_char c) #ifdef UTF8_OUTPUT_ENABLE nkf_char e2w_conv(nkf_char c2, nkf_char c1) { -#if 0 - extern const unsigned short euc_to_utf8_1byte[]; - extern const unsigned short *const euc_to_utf8_2bytes[]; - extern const unsigned short *const euc_to_utf8_2bytes_ms[]; - extern const unsigned short *const x0212_to_utf8_2bytes[]; -#endif const unsigned short *p; if (c2 == X0201) { + if (ms_ucs_map_f == UCS_MAP_CP10001) { + switch (c1) { + case 0x20: + return 0xA0; + case 0x7D: + return 0xA9; + } + } p = euc_to_utf8_1byte; #ifdef X0212_ENABLE } else if (is_eucg3(c2)){ @@ -3764,7 +3807,10 @@ nkf_char e2w_conv(nkf_char c2, nkf_char c1) c2 &= 0x7f; c2 = (c2&0x7f) - 0x21; if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes) - p = ms_ucs_map_f != UCS_MAP_ASCII ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2]; + p = + ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] : + ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] : + euc_to_utf8_2bytes_ms[c2]; else return 0; } @@ -4069,9 +4115,6 @@ nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1) else if(nkf_isgraph(ndx)){ nkf_char val = 0; const unsigned short *ptr; -#if 0 - extern const unsigned short *const x0212_shiftjis[]; -#endif ptr = x0212_shiftjis[ndx - 0x21]; if (ptr){ val = ptr[(c1 & 0x7f) - 0x21]; @@ -4147,9 +4190,6 @@ void s_oconv(nkf_char c2, nkf_char c1) #ifdef SHIFTJIS_CP932 if (cp932inv_f && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){ -#if 0 - extern const unsigned short cp932inv[2][189]; -#endif nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40]; if (c){ c2 = c >> 8; @@ -4539,6 +4579,10 @@ void z_conv(nkf_char c2, nkf_char c1) /* if (c2) c1 &= 0x7f; assertion */ + if (c2 == X0201 && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) { + (*o_zconv)(c2,c1); + return; + } if (x0201_f && z_prev2==X0201) { /* X0201 */ if (c1==(0xde&0x7f)) { /* 濁点 */ z_prev2=0; @@ -4942,15 +4986,20 @@ void set_input_codename(char *codename) void print_guessed_code(char *filename) { char *codename = "BINARY"; + char *str_crmode = NULL; if (!is_inputcode_mixed) { if (strcmp(input_codename, "") == 0) { codename = "ASCII"; } else { codename = input_codename; } + if (crmode_f == CR) str_crmode = "CR"; + else if (crmode_f == NL) str_crmode = "LF"; + else if (crmode_f == CRLF) str_crmode = "CRLF"; } if (filename != NULL) printf("%s:", filename); - printf("%s\n", codename); + if (str_crmode != NULL) printf("%s (%s)\n", codename, str_crmode); + else printf("%s\n", codename); } #endif /*WIN32DLL*/ @@ -5068,9 +5117,6 @@ nkf_char nfc_getc(FILE *f) int i=0, j, k=1, lower, upper; nkf_char buf[9]; const nkf_nfchar *array; -#if 0 - extern const struct normalization_pair normalization_table[]; -#endif buf[i] = (*g)(f); while (k > 0 && ((buf[i] & 0xc0) != 0x80)){ @@ -5437,7 +5483,7 @@ void open_mime(nkf_char mode) int i; int j; p = mime_pattern[0]; - for(i=0;mime_encode[i];i++) { + for(i=0;mime_pattern[i];i++) { if (mode == mime_encode[i]) { p = mime_pattern[i]; break; @@ -5643,10 +5689,21 @@ void mime_putc(nkf_char c) if (mimeout_mode=='Q') { if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) { - if (c <= SPACE) { + if (c == CR || c == NL) { + close_mime(); + (*o_mputc)(c); + base64_count = 0; + return; + } else if (c <= SPACE) { close_mime(); - (*o_mputc)(SPACE); - base64_count++; + if (base64_count > 70) { + (*o_mputc)(NL); + base64_count = 0; + } + if (!nkf_isblank(c)) { + (*o_mputc)(SPACE); + base64_count++; + } } (*o_mputc)(c); base64_count++; @@ -5678,7 +5735,8 @@ void mime_putc(nkf_char c) mimeout_buf_count = 1; }else{ if (base64_count > 1 - && base64_count + mimeout_buf_count > 76){ + && base64_count + mimeout_buf_count > 76 + && mimeout_buf[0] != CR && mimeout_buf[0] != NL){ (*o_mputc)(NL); base64_count = 0; if (!nkf_isspace(mimeout_buf[0])){ diff --git a/ext/nkf/nkf-utf8/utf8tbl.c b/ext/nkf/nkf-utf8/utf8tbl.c index e43ad553d..fb6c3b736 100644 --- a/ext/nkf/nkf-utf8/utf8tbl.c +++ b/ext/nkf/nkf-utf8/utf8tbl.c @@ -201,6 +201,20 @@ const unsigned short euc_to_utf8_AC[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; +const unsigned short euc_to_utf8_AC_mac[] = { + 0x2664, 0x2667, 0x2661, 0x2662, 0x2660, 0x2663, 0x2665, + 0x2666, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0x3020, 0x260E, 0x3004, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0x261E, 0x261C, 0x261D, 0x261F, 0x21C6, 0x21C4, 0x21C5, + 0, 0x21E8, 0x21E6, 0x21E7, 0x21E9, 0x2192, 0x2190, 0x2191, + 0x2193, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, +}; const unsigned short euc_to_utf8_AD[] = { 0x2460, 0x2461, 0x2462, 0x2463, 0x2464, 0x2465, 0x2466, 0x2467, 0x2468, 0x2469, 0x246A, 0x246B, 0x246C, 0x246D, 0x246E, @@ -215,6 +229,20 @@ const unsigned short euc_to_utf8_AD[] = { 0x2252, 0x2261, 0x222B, 0x222E, 0x2211, 0x221A, 0x22A5, 0x2220, 0x221F, 0x22BF, 0x2235, 0x2229, 0x222A, 0, 0x3299, }; +const unsigned short euc_to_utf8_AD_mac[] = { + 0x65E5, 0x6708, 0x706B, 0x6C34, 0x6728, 0x91D1, 0x571F, + 0x796D, 0x795D, 0x81EA, 0x81F3, 0x3239, 0x547C, 0x3231, 0x8CC7, + 0x540D, 0x3232, 0x5B66, 0x8CA1, 0x793E, 0x7279, 0x76E3, 0x4F01, + 0x5354, 0x52B4, 0x2165, 0x2166, 0x2167, 0x2168, 0x2169, 0, + 0x3349, 0x3314, 0x3322, 0x334D, 0x3318, 0x3327, 0x3303, 0x3336, + 0x3351, 0x3357, 0x330D, 0x3326, 0x3323, 0x332B, 0x334A, 0x333B, + 0x339C, 0x339D, 0x339E, 0x338E, 0x338F, 0x33C4, 0x33A1, 0, + 0, 0, 0, 0, 0, 0, 0, 0x337B, + 0x301D, 0x301F, 0x2116, 0x33CD, 0x2121, 0x32A4, 0x32A5, 0x32A6, + 0x32A7, 0x32A8, 0x3231, 0x3232, 0x3239, 0x337E, 0x337D, 0x337C, + 0x2252, 0x5927, 0x5C0F, 0x32A4, 0x32A5, 0x32A6, 0x32A7, 0x32A8, + 0x533B, 0x8CA1, 0x512A, 0x52B4, 0x5370, 0x63A7, 0x79D8, +}; const unsigned short euc_to_utf8_AE[] = { 0x3349, 0x3322, 0x334D, 0x3314, 0x3316, 0x3305, 0x3333, 0x334E, 0x3303, 0x3336, 0x3318, 0x3315, 0x3327, 0x3351, 0x334A, @@ -2346,6 +2374,33 @@ const unsigned short *const euc_to_utf8_2bytes_ms[] = { 0, euc_to_utf8_F9, euc_to_utf8_FA, euc_to_utf8_FB, euc_to_utf8_FC_ms, 0, 0, }; +/* CP10001 */ +const unsigned short *const euc_to_utf8_2bytes_mac[] = { + euc_to_utf8_A1_ms, euc_to_utf8_A2_ms, euc_to_utf8_A3, + euc_to_utf8_A4, euc_to_utf8_A5, euc_to_utf8_A6, euc_to_utf8_A7, + euc_to_utf8_A8, euc_to_utf8_A9, euc_to_utf8_AA, euc_to_utf8_AB, + euc_to_utf8_AC_mac, euc_to_utf8_AD_mac, euc_to_utf8_AE, euc_to_utf8_AF, + euc_to_utf8_B0, euc_to_utf8_B1, euc_to_utf8_B2, euc_to_utf8_B3, + euc_to_utf8_B4, euc_to_utf8_B5, euc_to_utf8_B6, euc_to_utf8_B7, + euc_to_utf8_B8, euc_to_utf8_B9, euc_to_utf8_BA, euc_to_utf8_BB, + euc_to_utf8_BC, euc_to_utf8_BD, euc_to_utf8_BE, euc_to_utf8_BF, + euc_to_utf8_C0, euc_to_utf8_C1, euc_to_utf8_C2, euc_to_utf8_C3, + euc_to_utf8_C4, euc_to_utf8_C5, euc_to_utf8_C6, euc_to_utf8_C7, + euc_to_utf8_C8, euc_to_utf8_C9, euc_to_utf8_CA, euc_to_utf8_CB, + euc_to_utf8_CC, euc_to_utf8_CD, euc_to_utf8_CE, euc_to_utf8_CF, + euc_to_utf8_D0, euc_to_utf8_D1, euc_to_utf8_D2, euc_to_utf8_D3, + euc_to_utf8_D4, euc_to_utf8_D5, euc_to_utf8_D6, euc_to_utf8_D7, + euc_to_utf8_D8, euc_to_utf8_D9, euc_to_utf8_DA, euc_to_utf8_DB, + euc_to_utf8_DC, euc_to_utf8_DD, euc_to_utf8_DE, euc_to_utf8_DF, + euc_to_utf8_E0, euc_to_utf8_E1, euc_to_utf8_E2, euc_to_utf8_E3, + euc_to_utf8_E4, euc_to_utf8_E5, euc_to_utf8_E6, euc_to_utf8_E7, + euc_to_utf8_E8, euc_to_utf8_E9, euc_to_utf8_EA, euc_to_utf8_EB, + euc_to_utf8_EC, euc_to_utf8_ED, euc_to_utf8_EE, euc_to_utf8_EF, + euc_to_utf8_F0, euc_to_utf8_F1, euc_to_utf8_F2, euc_to_utf8_F3, + euc_to_utf8_F4, euc_to_utf8_F5, 0, 0, + 0, euc_to_utf8_F9, euc_to_utf8_FA, euc_to_utf8_FB, + euc_to_utf8_FC_ms, 0, 0, +}; #ifdef X0212_ENABLE const unsigned short *const x0212_to_utf8_2bytes[] = { @@ -2397,6 +2452,16 @@ const unsigned short utf8_to_euc_C2_ms[] = { 0x216B, 0x215E, 0, 0, 0x212D, 0, 0x2279, 0, 0xA231, 0, 0xA26B, 0, 0, 0, 0, 0xA244, }; +const unsigned short utf8_to_euc_C2_mac[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0x0220, 0xA242, 0x2171, 0x2172, 0xA270, 0x5C, 0xA243, 0x2178, + 0x212F, 0x027D, 0xA26C, 0, 0x224C, 0, 0xA26E, 0xA234, + 0x216B, 0x215E, 0, 0, 0x212D, 0, 0x2279, 0, + 0xA231, 0, 0xA26B, 0, 0, 0, 0, 0xA244, +}; const unsigned short utf8_to_euc_C2_932[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -2547,6 +2612,16 @@ const unsigned short utf8_to_euc_E284[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; +const unsigned short utf8_to_euc_E284_mac[] = { + 0, 0, 0, 0x216E, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0x2B7B, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0x2B7D, 0x027E, 0, 0, 0, 0, 0, + 0, 0, 0, 0x2272, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +}; const unsigned short utf8_to_euc_E285[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -2557,6 +2632,16 @@ const unsigned short utf8_to_euc_E285[] = { 0xF373, 0xF374, 0xF375, 0xF376, 0xF377, 0xF378, 0xF379, 0xF37A, 0xF37B, 0xF37C, 0, 0, 0, 0, 0, 0, }; +const unsigned short utf8_to_euc_E285_mac[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0x2A21, 0x2A22, 0x2A23, 0x2A24, 0x2A25, 0x2A26, 0x2A27, 0x2A28, + 0x2A29, 0x2A2A, 0, 0, 0, 0, 0, 0, + 0x2A35, 0x2A36, 0x2A37, 0x2A38, 0x2A39, 0x2A3A, 0x2A3B, 0x2A3C, + 0x2A3D, 0x2A3E, 0, 0, 0, 0, 0, 0, +}; const unsigned short utf8_to_euc_E286[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -2597,6 +2682,16 @@ const unsigned short utf8_to_euc_E288_932[] = { 0, 0, 0, 0, 0x2168, 0x2268, 0, 0, 0, 0, 0, 0, 0, 0x2266, 0, 0, }; +const unsigned short utf8_to_euc_E288_mac[] = { + 0x224F, 0, 0x225F, 0x2250, 0, 0, 0, 0x2260, + 0x223A, 0, 0, 0x223B, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x2265, 0, 0, 0x2267, 0x2167, 0x2F22, + 0x225C, 0, 0, 0, 0, 0x2142, 0, 0x224A, + 0x224B, 0x2241, 0x2240, 0x2269, 0x226A, 0, 0x2F21, 0, + 0, 0, 0, 0, 0x2168, 0x2268, 0, 0, + 0, 0, 0, 0, 0, 0x2266, 0, 0, +}; const unsigned short utf8_to_euc_E289[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -2617,6 +2712,16 @@ const unsigned short utf8_to_euc_E28A[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2D79, }; +const unsigned short utf8_to_euc_E28A_mac[] = { + 0, 0, 0x223E, 0x223F, 0, 0, 0x223C, 0x223D, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0x225D, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0x2F23, +}; const unsigned short utf8_to_euc_E28C[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -2637,6 +2742,16 @@ const unsigned short utf8_to_euc_E291[] = { 0x2D31, 0x2D32, 0x2D33, 0x2D34, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; +const unsigned short utf8_to_euc_E291_mac[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0x2921, 0x2922, 0x2923, 0x2924, 0x2925, 0x2926, 0x2927, 0x2928, + 0x2929, 0x292A, 0x292B, 0x292C, 0x292D, 0x292E, 0x292F, 0x2930, + 0x2931, 0x2932, 0x2933, 0x2934, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +}; const unsigned short utf8_to_euc_E294[] = { 0x2821, 0x282C, 0x2822, 0x282D, 0, 0, 0, 0, 0, 0, 0, 0, 0x2823, 0, 0, 0x282E, @@ -2767,6 +2882,16 @@ const unsigned short utf8_to_euc_E388[] = { 0, 0x2D6A, 0x2D6B, 0, 0, 0, 0, 0, 0, 0x2D6C, 0, 0, 0, 0, 0, 0, }; +const unsigned short utf8_to_euc_E388_mac[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0x2D2E, 0x2D31, 0, 0, 0, 0, 0, + 0, 0x2D2C, 0, 0, 0, 0, 0, 0, +}; const unsigned short utf8_to_euc_E38A[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -2777,6 +2902,16 @@ const unsigned short utf8_to_euc_E38A[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; +const unsigned short utf8_to_euc_E38A_mac[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0x2D73, 0x2D74, 0x2D75, 0x2D76, + 0x2D77, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +}; const unsigned short utf8_to_euc_E38C[] = { 0, 0, 0, 0x2D46, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2D4A, 0, 0, @@ -2787,6 +2922,16 @@ const unsigned short utf8_to_euc_E38C[] = { 0, 0, 0, 0, 0, 0, 0x2D47, 0, 0, 0, 0, 0x2D4F, 0, 0, 0, 0, }; +const unsigned short utf8_to_euc_E38C_mac[] = { + 0, 0, 0, 0x2E29, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0x2E32, 0, 0, + 0, 0, 0, 0, 0x2E24, 0, 0, 0, + 0x2E2B, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x2E22, 0x2E34, 0, 0, 0x2E35, 0x2E2D, + 0, 0, 0, 0x2E37, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0x2E2A, 0, + 0, 0, 0, 0x2E36, 0, 0, 0, 0, +}; const unsigned short utf8_to_euc_E38D[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2D40, 0x2D4E, 0, 0, 0x2D43, 0, 0, @@ -2797,6 +2942,16 @@ const unsigned short utf8_to_euc_E38D[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2D5F, 0x2D6F, 0x2D6E, 0x2D6D, 0, }; +const unsigned short utf8_to_euc_E38D_mac[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0x2E21, 0x2E2F, 0, 0, 0x2E23, 0, 0, + 0, 0x2E2E, 0, 0, 0, 0, 0, 0x2E31, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0x2E6A, 0x2E69, 0x2E68, 0x2E67, 0, +}; const unsigned short utf8_to_euc_E38E[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2D53, 0x2D54, @@ -2807,6 +2962,16 @@ const unsigned short utf8_to_euc_E38E[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; +const unsigned short utf8_to_euc_E38E_mac[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0x2B2B, 0x2B2D, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0x2B21, 0x2B23, 0x2B29, 0, + 0, 0x2B27, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +}; const unsigned short utf8_to_euc_E38F[] = { 0, 0, 0, 0, 0x2D55, 0, 0, 0, 0, 0, 0, 0, 0, 0x2D63, 0, 0, @@ -2817,6 +2982,16 @@ const unsigned short utf8_to_euc_E38F[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; +const unsigned short utf8_to_euc_E38F_mac[] = { + 0, 0, 0, 0, 0x2B2E, 0, 0, 0, + 0, 0, 0, 0, 0, 0x2B7C, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +}; const unsigned short utf8_to_euc_E4B8[] = { 0x306C, 0x437A, 0xB021, 0x3C37, 0xB022, 0xB023, 0, 0x4B7C, 0x3E66, 0x3B30, 0x3E65, 0x323C, 0xB024, 0x4954, 0x4D3F, 0, @@ -6171,6 +6346,24 @@ const unsigned short *const utf8_to_euc_E2_932[] = { 0, 0, 0, 0, 0, 0, 0, 0, }; +const unsigned short *const utf8_to_euc_E2_mac[] = { + utf8_to_euc_E280_932, 0, 0, 0, + utf8_to_euc_E284_mac, utf8_to_euc_E285_mac, utf8_to_euc_E286, utf8_to_euc_E287, + utf8_to_euc_E288_mac, utf8_to_euc_E289, utf8_to_euc_E28A_mac, 0, + utf8_to_euc_E28C, 0, 0, 0, + 0, utf8_to_euc_E291_mac, 0, 0, + utf8_to_euc_E294, utf8_to_euc_E295, utf8_to_euc_E296, utf8_to_euc_E297, + utf8_to_euc_E298, utf8_to_euc_E299, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, +}; const unsigned short *const utf8_to_euc_E3[] = { utf8_to_euc_E380, utf8_to_euc_E381, utf8_to_euc_E382, utf8_to_euc_E383, 0, 0, 0, 0, @@ -6207,6 +6400,24 @@ const unsigned short *const utf8_to_euc_E3_932[] = { 0, 0, 0, 0, 0, 0, 0, 0, }; +const unsigned short *const utf8_to_euc_E3_mac[] = { + utf8_to_euc_E380_932, utf8_to_euc_E381, utf8_to_euc_E382_932, utf8_to_euc_E383, + 0, 0, 0, 0, + utf8_to_euc_E388_mac, 0, utf8_to_euc_E38A_mac, 0, + utf8_to_euc_E38C_mac, utf8_to_euc_E38D_mac, utf8_to_euc_E38E_mac, utf8_to_euc_E38F_mac, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, +}; const unsigned short *const utf8_to_euc_E4[] = { 0, 0, 0, 0, 0, 0, 0, 0, @@ -6441,6 +6652,36 @@ const unsigned short *const utf8_to_euc_2bytes_932[] = { 0, 0, 0, 0, 0, 0, 0, 0, }; +const unsigned short *const utf8_to_euc_2bytes_mac[] = { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, utf8_to_euc_C2_mac, utf8_to_euc_C3, + utf8_to_euc_C4, utf8_to_euc_C5, 0, utf8_to_euc_C7, + 0, 0, 0, utf8_to_euc_CB, + 0, 0, utf8_to_euc_CE, utf8_to_euc_CF, + utf8_to_euc_D0, utf8_to_euc_D1, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, +}; const unsigned short *const *const utf8_to_euc_3bytes[] = { 0, 0, utf8_to_euc_E2, utf8_to_euc_E3, utf8_to_euc_E4, utf8_to_euc_E5, utf8_to_euc_E6, utf8_to_euc_E7, @@ -6459,6 +6700,12 @@ const unsigned short *const *const utf8_to_euc_3bytes_932[] = { utf8_to_euc_E8, utf8_to_euc_E9, 0, 0, 0, 0, 0, utf8_to_euc_EF_ms, }; +const unsigned short *const *const utf8_to_euc_3bytes_mac[] = { + 0, 0, utf8_to_euc_E2_mac, utf8_to_euc_E3_mac, + utf8_to_euc_E4, utf8_to_euc_E5, utf8_to_euc_E6, utf8_to_euc_E7, + utf8_to_euc_E8, utf8_to_euc_E9, 0, 0, + 0, 0, 0, utf8_to_euc_EF_ms, +}; #ifdef UNICODE_NORMALIZATION diff --git a/ext/nkf/nkf-utf8/utf8tbl.h b/ext/nkf/nkf-utf8/utf8tbl.h index 1f40f0b36..29413d4fa 100644 --- a/ext/nkf/nkf-utf8/utf8tbl.h +++ b/ext/nkf/nkf-utf8/utf8tbl.h @@ -5,6 +5,7 @@ extern const unsigned short euc_to_utf8_1byte[]; extern const unsigned short *const euc_to_utf8_2bytes[]; extern const unsigned short *const euc_to_utf8_2bytes_ms[]; +extern const unsigned short *const euc_to_utf8_2bytes_mac[]; extern const unsigned short *const x0212_to_utf8_2bytes[]; #endif /* UTF8_OUTPUT_ENABLE */ @@ -12,9 +13,11 @@ extern const unsigned short *const x0212_to_utf8_2bytes[]; extern const unsigned short *const utf8_to_euc_2bytes[]; extern const unsigned short *const utf8_to_euc_2bytes_ms[]; extern const unsigned short *const utf8_to_euc_2bytes_932[]; +extern const unsigned short *const utf8_to_euc_2bytes_mac[]; extern const unsigned short *const *const utf8_to_euc_3bytes[]; extern const unsigned short *const *const utf8_to_euc_3bytes_ms[]; extern const unsigned short *const *const utf8_to_euc_3bytes_932[]; +extern const unsigned short *const *const utf8_to_euc_3bytes_mac[]; #endif /* UTF8_INPUT_ENABLE */ #ifdef UNICODE_NORMALIZATION |