diff options
-rw-r--r-- | ChangeLog | 19 | ||||
-rw-r--r-- | enc/trans/iso2022.trans | 28 | ||||
-rw-r--r-- | enc/trans/japanese.trans | 44 | ||||
-rw-r--r-- | enc/trans/korean.trans | 10 | ||||
-rw-r--r-- | enc/trans/newline.trans | 32 | ||||
-rw-r--r-- | enc/trans/single_byte.trans | 41 | ||||
-rw-r--r-- | enc/trans/utf_16_32.trans | 80 | ||||
-rw-r--r-- | tool/transcode-tblgen.rb | 23 |
8 files changed, 152 insertions, 125 deletions
@@ -1,3 +1,22 @@ +Mon Sep 1 02:31:16 2008 Tanaka Akira <akr@fsij.org> + + * tool/transcode-tblgen.rb (transcode_generated_code): defined for + generating table at once. + (transcode_tblgen): returns an empty string. + (transcode_generate_node): ditto. + + * enc/trans/newline.trans: use transcode_generated_code. + + * enc/trans/iso2022.trans: ditto. + + * enc/trans/single_byte.trans: ditto. + + * enc/trans/utf_16_32.trans: ditto. + + * enc/trans/japanese.trans: ditto. + + * enc/trans/korean.trans: ditto. + Mon Sep 1 02:10:03 2008 Tanaka Akira <akr@fsij.org> * tool/transcode-tblgen.rb (citrus_decode_mapsrc): print logging diff --git a/enc/trans/iso2022.trans b/enc/trans/iso2022.trans index 76a68d95d..4340fa9b2 100644 --- a/enc/trans/iso2022.trans +++ b/enc/trans/iso2022.trans @@ -10,10 +10,22 @@ map_jisx0208_rest = {} map_jisx0208_rest["{21-7e}"] = :func_so + + transcode_generate_node(ActionMap.parse(map), "iso2022jp_to_eucjp") + transcode_generate_node(ActionMap.parse(map_jisx0208_rest), "iso2022jp_to_eucjp_jisx0208_rest") + + map_eucjp = { + "{0e,0f,1b}" => :undef, + "{00-0d,10-1a,1c-7f}" => :func_so, + "{a1-fe}{a1-fe}" => :func_so, + "8e{a1-fe}" => :undef, + "8f{a1-fe}{a1-fe}" => :undef, + } + + transcode_generate_node(ActionMap.parse(map_eucjp), "eucjp_to_iso2022jp") %> -<%= transcode_generate_node(ActionMap.parse(map), "iso2022jp_to_eucjp") %> -<%= transcode_generate_node(ActionMap.parse(map_jisx0208_rest), "iso2022jp_to_eucjp_jisx0208_rest") %> +<%= transcode_generated_code %> static VALUE fun_si_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l) @@ -65,18 +77,6 @@ rb_ISO_2022_JP_to_EUC_JP = { NULL, fun_si_iso2022jp_to_eucjp, NULL, fun_so_iso2022jp_to_eucjp }; -<% - map_eucjp = { - "{0e,0f,1b}" => :undef, - "{00-0d,10-1a,1c-7f}" => :func_so, - "{a1-fe}{a1-fe}" => :func_so, - "8e{a1-fe}" => :undef, - "8f{a1-fe}{a1-fe}" => :undef, - } -%> - -<%= transcode_generate_node(ActionMap.parse(map_eucjp), "eucjp_to_iso2022jp") %> - static int fun_so_eucjp_to_iso2022jp(rb_transcoding *t, const unsigned char *s, size_t l, unsigned char *o) { diff --git a/enc/trans/japanese.trans b/enc/trans/japanese.trans index f9d761fa2..abb976537 100644 --- a/enc/trans/japanese.trans +++ b/enc/trans/japanese.trans @@ -1,64 +1,68 @@ #include "transcode_data.h" -<%= transcode_tblgen "Shift_JIS", "UTF-8", [ +<% + transcode_tblgen "Shift_JIS", "UTF-8", [ ["{00-7f}", :nomap], *citrus_decode_mapsrc("mskanji", 1, "JISX0201-KANA/UCS"), *citrus_decode_mapsrc("mskanji", 2, "JISX0208:1990/UCS"), - ] %> -<%= transcode_tblgen "Windows-31J", "UTF-8", [ + ] + transcode_tblgen "Windows-31J", "UTF-8", [ ["{00-7f}", :nomap], *citrus_decode_mapsrc("mskanji", 1, "JISX0201-KANA/UCS"), *citrus_decode_mapsrc("mskanji", 2, "JISX0208VDC:NEC/UCS,CP932VDC:NEC_IBM/UCS,CP932VDC:IBM/UCS,CP932UDA/UCS,JISX0208:MS/UCS"), - ] %> + ] -<%= transcode_tblgen "UTF-8", "Shift_JIS", [ + transcode_tblgen "UTF-8", "Shift_JIS", [ ["{00-7f}", :nomap], *citrus_decode_mapsrc("mskanji", 1, "UCS/JISX0201-KANA"), *citrus_decode_mapsrc("mskanji", 2, "UCS/JISX0208:1990"), - ] %> -<%= transcode_tblgen "UTF-8", "Windows-31J", [ + ] + transcode_tblgen "UTF-8", "Windows-31J", [ ["{00-7f}", :nomap], *citrus_decode_mapsrc("mskanji", 1, "UCS/JISX0201-KANA"), *citrus_decode_mapsrc("mskanji", 2, "UCS/JISX0208VDC:NEC,UCS/CP932VDC:NEC_IBM,UCS/CP932VDC:IBM,UCS/CP932UDA,UCS/JISX0208:MS"), - ] %> + ] -<%= transcode_tblgen "EUC-JP", "UTF-8", [ + transcode_tblgen "EUC-JP", "UTF-8", [ ["{00-7f}", :nomap], *citrus_decode_mapsrc("euc", 0x8080, "JISX0208:1990/UCS"), *citrus_decode_mapsrc("euc", 0x0080, "JISX0201-KANA/UCS"), *citrus_decode_mapsrc("euc", 0x8000, "JISX0212/UCS"), - ] %> -<%= transcode_tblgen "EUC-JP-MS", "UTF-8", [ + ] + transcode_tblgen "EUC-JP-MS", "UTF-8", [ ["{00-7f}", :nomap], *citrus_decode_mapsrc("euc", 0x8080, "JISX0208VDC:NEC/UCS,JISX0208UDC/UCS,JISX0208:MS/UCS"), *citrus_decode_mapsrc("euc", 0x0080, "JISX0201-KANA/UCS"), *citrus_decode_mapsrc("euc", 0x8000, "JISX0212VDC:IBM/UCS,JISX0212UDC/UCS,JISX0212:MS/UCS"), - ] %> -<%= transcode_tblgen "CP51932", "UTF-8", [ + ] + transcode_tblgen "CP51932", "UTF-8", [ ["{00-7f}", :nomap], *citrus_decode_mapsrc("euc", 0x8080, "JISX0208VDC:NEC/UCS,CP932VDC:NEC_IBM/UCS,JISX0208:MS/UCS"), *citrus_decode_mapsrc("euc", 0x0080, "JISX0201-KANA/UCS"), - ] %> + ] -<%= transcode_tblgen "UTF-8", "EUC-JP", [ + transcode_tblgen "UTF-8", "EUC-JP", [ ["{00-7f}", :nomap], *citrus_decode_mapsrc("euc", 0x8080, "UCS/JISX0208:1990"), *citrus_decode_mapsrc("euc", 0x0080, "UCS/JISX0201-KANA"), *citrus_decode_mapsrc("euc", 0x8000, "UCS/JISX0212"), - ] %> -<%= transcode_tblgen "UTF-8", "EUC-JP-MS", [ + ] + transcode_tblgen "UTF-8", "EUC-JP-MS", [ ["{00-7f}", :nomap], *citrus_decode_mapsrc("euc", 0x8080, "UCS/JISX0208VDC:NEC,UCS/JISX0208UDC,UCS/JISX0208:MS"), *citrus_decode_mapsrc("euc", 0x0080, "UCS/JISX0201-KANA"), *citrus_decode_mapsrc("euc", 0x8000, "UCS/JISX0212VDC:IBM,UCS/JISX0212UDC,UCS/JISX0212:MS"), - ] %> -<%= transcode_tblgen "UTF-8", "CP51932", [ + ] + transcode_tblgen "UTF-8", "CP51932", [ ["{00-7f}", :nomap], *citrus_decode_mapsrc("euc", 0x8080, "UCS/JISX0208VDC:NEC,UCS/CP932VDC:NEC_IBM,UCS/JISX0208:MS"), *citrus_decode_mapsrc("euc", 0x0080, "UCS/JISX0201-KANA"), - ] %> + ] +%> + +<%= transcode_generated_code %> void Init_japanese(void) diff --git a/enc/trans/korean.trans b/enc/trans/korean.trans index f04fa1561..ef1cdfcb8 100644 --- a/enc/trans/korean.trans +++ b/enc/trans/korean.trans @@ -3,12 +3,14 @@ <% require "euckr-tbl" require "cp949-tbl" + + transcode_tblgen "UTF-8", "EUC-KR", [["{00-7f}", :nomap], *UCS_TO_EUCKR_TBL] + transcode_tblgen "EUC-KR", "UTF-8", [["{00-7f}", :nomap], *EUCKR_TO_UCS_TBL] + transcode_tblgen "UTF-8", "CP949", [["{00-7f}", :nomap], *UCS_TO_CP949_TBL] + transcode_tblgen "CP949", "UTF-8", [["{00-7f}", :nomap], *CP949_TO_UCS_TBL] %> -<%= transcode_tblgen "UTF-8", "EUC-KR", [["{00-7f}", :nomap], *UCS_TO_EUCKR_TBL] %> -<%= transcode_tblgen "EUC-KR", "UTF-8", [["{00-7f}", :nomap], *EUCKR_TO_UCS_TBL] %> -<%= transcode_tblgen "UTF-8", "CP949", [["{00-7f}", :nomap], *UCS_TO_CP949_TBL] %> -<%= transcode_tblgen "CP949", "UTF-8", [["{00-7f}", :nomap], *CP949_TO_UCS_TBL] %> +<%= transcode_generated_code %> void Init_korean(void) diff --git a/enc/trans/newline.trans b/enc/trans/newline.trans index 409da1dc3..5d6c9bf05 100644 --- a/enc/trans/newline.trans +++ b/enc/trans/newline.trans @@ -3,9 +3,23 @@ <% map_normalize = {} map_normalize["{00-ff}"] = :func_so + + transcode_generate_node(ActionMap.parse(map_normalize), "universal_newline") + + map_crlf = {} + map_crlf["{00-09,0b-ff}"] = :nomap + map_crlf["0a"] = "0d0a" + + transcode_generate_node(ActionMap.parse(map_crlf), "crlf_newline") + + map_cr = {} + map_cr["{00-09,0b-ff}"] = :nomap + map_cr["0a"] = "0d" + + transcode_generate_node(ActionMap.parse(map_cr), "cr_newline") %> -<%= transcode_generate_node(ActionMap.parse(map_normalize), "universal_newline") %> +<%= transcode_generated_code %> static int fun_so_universal_newline(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) @@ -48,14 +62,6 @@ rb_universal_newline = { NULL, NULL, NULL, fun_so_universal_newline }; -<% - map_crlf = {} - map_crlf["{00-09,0b-ff}"] = :nomap - map_crlf["0a"] = "0d0a" -%> - -<%= transcode_generate_node(ActionMap.parse(map_crlf), "crlf_newline") %> - static const rb_transcoder rb_crlf_newline = { "", "crlf_newline", &crlf_newline, @@ -66,14 +72,6 @@ rb_crlf_newline = { NULL, NULL, NULL, NULL }; -<% - map_cr = {} - map_cr["{00-09,0b-ff}"] = :nomap - map_cr["0a"] = "0d" -%> - -<%= transcode_generate_node(ActionMap.parse(map_cr), "cr_newline") %> - static const rb_transcoder rb_cr_newline = { "", "cr_newline", &cr_newline, diff --git a/enc/trans/single_byte.trans b/enc/trans/single_byte.trans index 8c9565c60..57254bf3d 100644 --- a/enc/trans/single_byte.trans +++ b/enc/trans/single_byte.trans @@ -20,14 +20,11 @@ require 'iso-8859-14-tbl' require 'iso-8859-15-tbl' -%> - -<%= transcode_tblgen "US-ASCII", "UTF-8", us_ascii_map %> -<%= transcode_tblgen "UTF-8", "US-ASCII", us_ascii_map %> -<%= transcode_tblgen "ASCII-8BIT", "UTF-8", us_ascii_map %> -<%= transcode_tblgen "UTF-8", "ASCII-8BIT", us_ascii_map %> + transcode_tblgen "US-ASCII", "UTF-8", us_ascii_map + transcode_tblgen "UTF-8", "US-ASCII", us_ascii_map + transcode_tblgen "ASCII-8BIT", "UTF-8", us_ascii_map + transcode_tblgen "UTF-8", "ASCII-8BIT", us_ascii_map -<% def transcode_tblgen_iso8859(name, tbl_to_ucs) tbl_to_ucs = CONTROL1_TO_UCS_TBL + tbl_to_ucs name_ident = name.tr('-','_') @@ -37,22 +34,24 @@ code << transcode_tblgen("UTF-8", name, [["{00-7f}", :nomap], *tbl_to_ucs.map {|a,b| [b,a] }]) code end + + transcode_tblgen_iso8859("ISO-8859-1", ISO_8859_1_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-2", ISO_8859_2_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-3", ISO_8859_3_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-4", ISO_8859_4_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-5", ISO_8859_5_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-6", ISO_8859_6_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-7", ISO_8859_7_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-8", ISO_8859_8_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-9", ISO_8859_9_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-10", ISO_8859_10_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-11", ISO_8859_11_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-13", ISO_8859_13_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-14", ISO_8859_14_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-15", ISO_8859_15_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-1", ISO_8859_1_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-2", ISO_8859_2_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-3", ISO_8859_3_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-4", ISO_8859_4_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-5", ISO_8859_5_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-6", ISO_8859_6_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-7", ISO_8859_7_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-8", ISO_8859_8_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-9", ISO_8859_9_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-10", ISO_8859_10_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-11", ISO_8859_11_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-13", ISO_8859_13_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-14", ISO_8859_14_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-15", ISO_8859_15_TO_UCS_TBL) %> +<%= transcode_generated_code %> void Init_single_byte(void) diff --git a/enc/trans/utf_16_32.trans b/enc/trans/utf_16_32.trans index 3b1165430..b71c289ba 100644 --- a/enc/trans/utf_16_32.trans +++ b/enc/trans/utf_16_32.trans @@ -1,5 +1,42 @@ #include "transcode_data.h" +<% + map = {} + map["{00-ff}{00-d7,e0-ff}0000"] = :func_so + map["{00-ff}{00-ff}{01-10}00"] = :func_so + transcode_generate_node(ActionMap.parse(map), "from_UTF_32LE") + + map = {} + map["{00-d7,e0-ff}{00-ff}"] = :func_so + map["{d8-db}{00-ff}{dc-df}{00-ff}"] = :func_so + transcode_generate_node(ActionMap.parse(map), "from_UTF_16BE") + + map = {} + map["{00-7f}"] = :func_so + map["{c2-df}{80-bf}"] = :func_so + map["e0{a0-bf}{80-bf}"] = :func_so + map["{e1-ec}{80-bf}{80-bf}"] = :func_so + map["ed{80-9f}{80-bf}"] = :func_so + map["{ee-ef}{80-bf}{80-bf}"] = :func_so + map["f0{90-bf}{80-bf}{80-bf}"] = :func_so + map["{f1-f3}{80-bf}{80-bf}{80-bf}"] = :func_so + map["f4{80-8f}{80-bf}{80-bf}"] = :func_so + am = ActionMap.parse(map) + transcode_generate_node(am, "to_UTF_16BE") + + map = {} + map["{00-ff}{00-d7,e0-ff}"] = :func_so + map["{00-ff}{d8-db}{00-ff}{dc-df}"] = :func_so + transcode_generate_node(ActionMap.parse(map), "from_UTF_16LE") + + map = {} + map["0000{00-d7,e0-ff}{00-ff}"] = :func_so + map["00{01-10}{00-ff}{00-ff}"] = :func_so + transcode_generate_node(ActionMap.parse(map), "from_UTF_32BE") +%> + +<%= transcode_generated_code %> + static int fun_so_from_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) { @@ -222,13 +259,6 @@ fun_so_to_utf_32le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned return 4; } -<%= - map = {} - map["{00-d7,e0-ff}{00-ff}"] = :func_so - map["{d8-db}{00-ff}{dc-df}{00-ff}"] = :func_so - transcode_generate_node(ActionMap.parse(map), "from_UTF_16BE") -%> - static const rb_transcoder rb_from_UTF_16BE = { "UTF-16BE", "UTF-8", &from_UTF_16BE, @@ -239,21 +269,6 @@ rb_from_UTF_16BE = { NULL, NULL, NULL, &fun_so_from_utf_16be }; -<%= - map = {} - map["{00-7f}"] = :func_so - map["{c2-df}{80-bf}"] = :func_so - map["e0{a0-bf}{80-bf}"] = :func_so - map["{e1-ec}{80-bf}{80-bf}"] = :func_so - map["ed{80-9f}{80-bf}"] = :func_so - map["{ee-ef}{80-bf}{80-bf}"] = :func_so - map["f0{90-bf}{80-bf}{80-bf}"] = :func_so - map["{f1-f3}{80-bf}{80-bf}{80-bf}"] = :func_so - map["f4{80-8f}{80-bf}{80-bf}"] = :func_so - am = ActionMap.parse(map) - transcode_generate_node(am, "to_UTF_16BE") -%> - static const rb_transcoder rb_to_UTF_16BE = { "UTF-8", "UTF-16BE", &to_UTF_16BE, @@ -264,13 +279,6 @@ rb_to_UTF_16BE = { NULL, NULL, NULL, &fun_so_to_utf_16be }; -<%= - map = {} - map["{00-ff}{00-d7,e0-ff}"] = :func_so - map["{00-ff}{d8-db}{00-ff}{dc-df}"] = :func_so - transcode_generate_node(ActionMap.parse(map), "from_UTF_16LE") -%> - static const rb_transcoder rb_from_UTF_16LE = { "UTF-16LE", "UTF-8", &from_UTF_16LE, @@ -291,13 +299,6 @@ rb_to_UTF_16LE = { NULL, NULL, NULL, &fun_so_to_utf_16le }; -<%= - map = {} - map["0000{00-d7,e0-ff}{00-ff}"] = :func_so - map["00{01-10}{00-ff}{00-ff}"] = :func_so - transcode_generate_node(ActionMap.parse(map), "from_UTF_32BE") -%> - static const rb_transcoder rb_from_UTF_32BE = { "UTF-32BE", "UTF-8", &from_UTF_32BE, @@ -318,13 +319,6 @@ rb_to_UTF_32BE = { NULL, NULL, NULL, &fun_so_to_utf_32be }; -<%= - map = {} - map["{00-ff}{00-d7,e0-ff}0000"] = :func_so - map["{00-ff}{00-ff}{01-10}00"] = :func_so - transcode_generate_node(ActionMap.parse(map), "from_UTF_32LE") -%> - static const rb_transcoder rb_from_UTF_32LE = { "UTF-32LE", "UTF-8", &from_UTF_32LE, diff --git a/tool/transcode-tblgen.rb b/tool/transcode-tblgen.rb index c3f7ee39e..44be6a9d9 100644 --- a/tool/transcode-tblgen.rb +++ b/tool/transcode-tblgen.rb @@ -389,6 +389,12 @@ End code << generate_lookup_node(name_hint, table) name_hint end + + def gennode(name_hint=nil, valid_encoding=nil) + code = '' + name = generate_node(code, name_hint, valid_encoding) + return name, code + end end def citrus_mskanji_cstomb(csid, index) @@ -529,12 +535,12 @@ def transcode_compile_tree(name, from, map) valid_encoding = nil end - code = '' - defined_name = am.generate_node(code, name, valid_encoding) + defined_name, code = am.gennode(name, valid_encoding) return defined_name, code, max_input end TRANSCODERS = [] +TRANSCODE_GENERATED_CODE = '' def transcode_tblgen(from, to, map) STDERR.puts "converter from #{from} to #{to}" if VERBOSE_MODE @@ -565,14 +571,19 @@ static const rb_transcoder NULL, NULL, NULL }; End - tree_code + "\n" + transcoder_code + TRANSCODE_GENERATED_CODE << tree_code + "\n" + transcoder_code + '' end def transcode_generate_node(am, name_hint=nil) STDERR.puts "converter for #{name_hint}" if VERBOSE_MODE - code = '' - am.generate_node(code, name_hint) - code + name, code = am.gennode(name_hint) + TRANSCODE_GENERATED_CODE << code + '' +end + +def transcode_generated_code + TRANSCODE_GENERATED_CODE end def transcode_register_code |