summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog19
-rw-r--r--enc/trans/iso2022.trans28
-rw-r--r--enc/trans/japanese.trans44
-rw-r--r--enc/trans/korean.trans10
-rw-r--r--enc/trans/newline.trans32
-rw-r--r--enc/trans/single_byte.trans41
-rw-r--r--enc/trans/utf_16_32.trans80
-rw-r--r--tool/transcode-tblgen.rb23
8 files changed, 152 insertions, 125 deletions
diff --git a/ChangeLog b/ChangeLog
index e071d74e6..3b7ca7039 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,22 @@
+Mon Sep 1 02:31:16 2008 Tanaka Akira <akr@fsij.org>
+
+ * tool/transcode-tblgen.rb (transcode_generated_code): defined for
+ generating table at once.
+ (transcode_tblgen): returns an empty string.
+ (transcode_generate_node): ditto.
+
+ * enc/trans/newline.trans: use transcode_generated_code.
+
+ * enc/trans/iso2022.trans: ditto.
+
+ * enc/trans/single_byte.trans: ditto.
+
+ * enc/trans/utf_16_32.trans: ditto.
+
+ * enc/trans/japanese.trans: ditto.
+
+ * enc/trans/korean.trans: ditto.
+
Mon Sep 1 02:10:03 2008 Tanaka Akira <akr@fsij.org>
* tool/transcode-tblgen.rb (citrus_decode_mapsrc): print logging
diff --git a/enc/trans/iso2022.trans b/enc/trans/iso2022.trans
index 76a68d95d..4340fa9b2 100644
--- a/enc/trans/iso2022.trans
+++ b/enc/trans/iso2022.trans
@@ -10,10 +10,22 @@
map_jisx0208_rest = {}
map_jisx0208_rest["{21-7e}"] = :func_so
+
+ transcode_generate_node(ActionMap.parse(map), "iso2022jp_to_eucjp")
+ transcode_generate_node(ActionMap.parse(map_jisx0208_rest), "iso2022jp_to_eucjp_jisx0208_rest")
+
+ map_eucjp = {
+ "{0e,0f,1b}" => :undef,
+ "{00-0d,10-1a,1c-7f}" => :func_so,
+ "{a1-fe}{a1-fe}" => :func_so,
+ "8e{a1-fe}" => :undef,
+ "8f{a1-fe}{a1-fe}" => :undef,
+ }
+
+ transcode_generate_node(ActionMap.parse(map_eucjp), "eucjp_to_iso2022jp")
%>
-<%= transcode_generate_node(ActionMap.parse(map), "iso2022jp_to_eucjp") %>
-<%= transcode_generate_node(ActionMap.parse(map_jisx0208_rest), "iso2022jp_to_eucjp_jisx0208_rest") %>
+<%= transcode_generated_code %>
static VALUE
fun_si_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l)
@@ -65,18 +77,6 @@ rb_ISO_2022_JP_to_EUC_JP = {
NULL, fun_si_iso2022jp_to_eucjp, NULL, fun_so_iso2022jp_to_eucjp
};
-<%
- map_eucjp = {
- "{0e,0f,1b}" => :undef,
- "{00-0d,10-1a,1c-7f}" => :func_so,
- "{a1-fe}{a1-fe}" => :func_so,
- "8e{a1-fe}" => :undef,
- "8f{a1-fe}{a1-fe}" => :undef,
- }
-%>
-
-<%= transcode_generate_node(ActionMap.parse(map_eucjp), "eucjp_to_iso2022jp") %>
-
static int
fun_so_eucjp_to_iso2022jp(rb_transcoding *t, const unsigned char *s, size_t l, unsigned char *o)
{
diff --git a/enc/trans/japanese.trans b/enc/trans/japanese.trans
index f9d761fa2..abb976537 100644
--- a/enc/trans/japanese.trans
+++ b/enc/trans/japanese.trans
@@ -1,64 +1,68 @@
#include "transcode_data.h"
-<%= transcode_tblgen "Shift_JIS", "UTF-8", [
+<%
+ transcode_tblgen "Shift_JIS", "UTF-8", [
["{00-7f}", :nomap],
*citrus_decode_mapsrc("mskanji", 1, "JISX0201-KANA/UCS"),
*citrus_decode_mapsrc("mskanji", 2, "JISX0208:1990/UCS"),
- ] %>
-<%= transcode_tblgen "Windows-31J", "UTF-8", [
+ ]
+ transcode_tblgen "Windows-31J", "UTF-8", [
["{00-7f}", :nomap],
*citrus_decode_mapsrc("mskanji", 1, "JISX0201-KANA/UCS"),
*citrus_decode_mapsrc("mskanji", 2,
"JISX0208VDC:NEC/UCS,CP932VDC:NEC_IBM/UCS,CP932VDC:IBM/UCS,CP932UDA/UCS,JISX0208:MS/UCS"),
- ] %>
+ ]
-<%= transcode_tblgen "UTF-8", "Shift_JIS", [
+ transcode_tblgen "UTF-8", "Shift_JIS", [
["{00-7f}", :nomap],
*citrus_decode_mapsrc("mskanji", 1, "UCS/JISX0201-KANA"),
*citrus_decode_mapsrc("mskanji", 2, "UCS/JISX0208:1990"),
- ] %>
-<%= transcode_tblgen "UTF-8", "Windows-31J", [
+ ]
+ transcode_tblgen "UTF-8", "Windows-31J", [
["{00-7f}", :nomap],
*citrus_decode_mapsrc("mskanji", 1, "UCS/JISX0201-KANA"),
*citrus_decode_mapsrc("mskanji", 2,
"UCS/JISX0208VDC:NEC,UCS/CP932VDC:NEC_IBM,UCS/CP932VDC:IBM,UCS/CP932UDA,UCS/JISX0208:MS"),
- ] %>
+ ]
-<%= transcode_tblgen "EUC-JP", "UTF-8", [
+ transcode_tblgen "EUC-JP", "UTF-8", [
["{00-7f}", :nomap],
*citrus_decode_mapsrc("euc", 0x8080, "JISX0208:1990/UCS"),
*citrus_decode_mapsrc("euc", 0x0080, "JISX0201-KANA/UCS"),
*citrus_decode_mapsrc("euc", 0x8000, "JISX0212/UCS"),
- ] %>
-<%= transcode_tblgen "EUC-JP-MS", "UTF-8", [
+ ]
+ transcode_tblgen "EUC-JP-MS", "UTF-8", [
["{00-7f}", :nomap],
*citrus_decode_mapsrc("euc", 0x8080, "JISX0208VDC:NEC/UCS,JISX0208UDC/UCS,JISX0208:MS/UCS"),
*citrus_decode_mapsrc("euc", 0x0080, "JISX0201-KANA/UCS"),
*citrus_decode_mapsrc("euc", 0x8000, "JISX0212VDC:IBM/UCS,JISX0212UDC/UCS,JISX0212:MS/UCS"),
- ] %>
-<%= transcode_tblgen "CP51932", "UTF-8", [
+ ]
+ transcode_tblgen "CP51932", "UTF-8", [
["{00-7f}", :nomap],
*citrus_decode_mapsrc("euc", 0x8080, "JISX0208VDC:NEC/UCS,CP932VDC:NEC_IBM/UCS,JISX0208:MS/UCS"),
*citrus_decode_mapsrc("euc", 0x0080, "JISX0201-KANA/UCS"),
- ] %>
+ ]
-<%= transcode_tblgen "UTF-8", "EUC-JP", [
+ transcode_tblgen "UTF-8", "EUC-JP", [
["{00-7f}", :nomap],
*citrus_decode_mapsrc("euc", 0x8080, "UCS/JISX0208:1990"),
*citrus_decode_mapsrc("euc", 0x0080, "UCS/JISX0201-KANA"),
*citrus_decode_mapsrc("euc", 0x8000, "UCS/JISX0212"),
- ] %>
-<%= transcode_tblgen "UTF-8", "EUC-JP-MS", [
+ ]
+ transcode_tblgen "UTF-8", "EUC-JP-MS", [
["{00-7f}", :nomap],
*citrus_decode_mapsrc("euc", 0x8080, "UCS/JISX0208VDC:NEC,UCS/JISX0208UDC,UCS/JISX0208:MS"),
*citrus_decode_mapsrc("euc", 0x0080, "UCS/JISX0201-KANA"),
*citrus_decode_mapsrc("euc", 0x8000, "UCS/JISX0212VDC:IBM,UCS/JISX0212UDC,UCS/JISX0212:MS"),
- ] %>
-<%= transcode_tblgen "UTF-8", "CP51932", [
+ ]
+ transcode_tblgen "UTF-8", "CP51932", [
["{00-7f}", :nomap],
*citrus_decode_mapsrc("euc", 0x8080, "UCS/JISX0208VDC:NEC,UCS/CP932VDC:NEC_IBM,UCS/JISX0208:MS"),
*citrus_decode_mapsrc("euc", 0x0080, "UCS/JISX0201-KANA"),
- ] %>
+ ]
+%>
+
+<%= transcode_generated_code %>
void
Init_japanese(void)
diff --git a/enc/trans/korean.trans b/enc/trans/korean.trans
index f04fa1561..ef1cdfcb8 100644
--- a/enc/trans/korean.trans
+++ b/enc/trans/korean.trans
@@ -3,12 +3,14 @@
<%
require "euckr-tbl"
require "cp949-tbl"
+
+ transcode_tblgen "UTF-8", "EUC-KR", [["{00-7f}", :nomap], *UCS_TO_EUCKR_TBL]
+ transcode_tblgen "EUC-KR", "UTF-8", [["{00-7f}", :nomap], *EUCKR_TO_UCS_TBL]
+ transcode_tblgen "UTF-8", "CP949", [["{00-7f}", :nomap], *UCS_TO_CP949_TBL]
+ transcode_tblgen "CP949", "UTF-8", [["{00-7f}", :nomap], *CP949_TO_UCS_TBL]
%>
-<%= transcode_tblgen "UTF-8", "EUC-KR", [["{00-7f}", :nomap], *UCS_TO_EUCKR_TBL] %>
-<%= transcode_tblgen "EUC-KR", "UTF-8", [["{00-7f}", :nomap], *EUCKR_TO_UCS_TBL] %>
-<%= transcode_tblgen "UTF-8", "CP949", [["{00-7f}", :nomap], *UCS_TO_CP949_TBL] %>
-<%= transcode_tblgen "CP949", "UTF-8", [["{00-7f}", :nomap], *CP949_TO_UCS_TBL] %>
+<%= transcode_generated_code %>
void
Init_korean(void)
diff --git a/enc/trans/newline.trans b/enc/trans/newline.trans
index 409da1dc3..5d6c9bf05 100644
--- a/enc/trans/newline.trans
+++ b/enc/trans/newline.trans
@@ -3,9 +3,23 @@
<%
map_normalize = {}
map_normalize["{00-ff}"] = :func_so
+
+ transcode_generate_node(ActionMap.parse(map_normalize), "universal_newline")
+
+ map_crlf = {}
+ map_crlf["{00-09,0b-ff}"] = :nomap
+ map_crlf["0a"] = "0d0a"
+
+ transcode_generate_node(ActionMap.parse(map_crlf), "crlf_newline")
+
+ map_cr = {}
+ map_cr["{00-09,0b-ff}"] = :nomap
+ map_cr["0a"] = "0d"
+
+ transcode_generate_node(ActionMap.parse(map_cr), "cr_newline")
%>
-<%= transcode_generate_node(ActionMap.parse(map_normalize), "universal_newline") %>
+<%= transcode_generated_code %>
static int
fun_so_universal_newline(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
@@ -48,14 +62,6 @@ rb_universal_newline = {
NULL, NULL, NULL, fun_so_universal_newline
};
-<%
- map_crlf = {}
- map_crlf["{00-09,0b-ff}"] = :nomap
- map_crlf["0a"] = "0d0a"
-%>
-
-<%= transcode_generate_node(ActionMap.parse(map_crlf), "crlf_newline") %>
-
static const rb_transcoder
rb_crlf_newline = {
"", "crlf_newline", &crlf_newline,
@@ -66,14 +72,6 @@ rb_crlf_newline = {
NULL, NULL, NULL, NULL
};
-<%
- map_cr = {}
- map_cr["{00-09,0b-ff}"] = :nomap
- map_cr["0a"] = "0d"
-%>
-
-<%= transcode_generate_node(ActionMap.parse(map_cr), "cr_newline") %>
-
static const rb_transcoder
rb_cr_newline = {
"", "cr_newline", &cr_newline,
diff --git a/enc/trans/single_byte.trans b/enc/trans/single_byte.trans
index 8c9565c60..57254bf3d 100644
--- a/enc/trans/single_byte.trans
+++ b/enc/trans/single_byte.trans
@@ -20,14 +20,11 @@
require 'iso-8859-14-tbl'
require 'iso-8859-15-tbl'
-%>
-
-<%= transcode_tblgen "US-ASCII", "UTF-8", us_ascii_map %>
-<%= transcode_tblgen "UTF-8", "US-ASCII", us_ascii_map %>
-<%= transcode_tblgen "ASCII-8BIT", "UTF-8", us_ascii_map %>
-<%= transcode_tblgen "UTF-8", "ASCII-8BIT", us_ascii_map %>
+ transcode_tblgen "US-ASCII", "UTF-8", us_ascii_map
+ transcode_tblgen "UTF-8", "US-ASCII", us_ascii_map
+ transcode_tblgen "ASCII-8BIT", "UTF-8", us_ascii_map
+ transcode_tblgen "UTF-8", "ASCII-8BIT", us_ascii_map
-<%
def transcode_tblgen_iso8859(name, tbl_to_ucs)
tbl_to_ucs = CONTROL1_TO_UCS_TBL + tbl_to_ucs
name_ident = name.tr('-','_')
@@ -37,22 +34,24 @@
code << transcode_tblgen("UTF-8", name, [["{00-7f}", :nomap], *tbl_to_ucs.map {|a,b| [b,a] }])
code
end
+
+ transcode_tblgen_iso8859("ISO-8859-1", ISO_8859_1_TO_UCS_TBL)
+ transcode_tblgen_iso8859("ISO-8859-2", ISO_8859_2_TO_UCS_TBL)
+ transcode_tblgen_iso8859("ISO-8859-3", ISO_8859_3_TO_UCS_TBL)
+ transcode_tblgen_iso8859("ISO-8859-4", ISO_8859_4_TO_UCS_TBL)
+ transcode_tblgen_iso8859("ISO-8859-5", ISO_8859_5_TO_UCS_TBL)
+ transcode_tblgen_iso8859("ISO-8859-6", ISO_8859_6_TO_UCS_TBL)
+ transcode_tblgen_iso8859("ISO-8859-7", ISO_8859_7_TO_UCS_TBL)
+ transcode_tblgen_iso8859("ISO-8859-8", ISO_8859_8_TO_UCS_TBL)
+ transcode_tblgen_iso8859("ISO-8859-9", ISO_8859_9_TO_UCS_TBL)
+ transcode_tblgen_iso8859("ISO-8859-10", ISO_8859_10_TO_UCS_TBL)
+ transcode_tblgen_iso8859("ISO-8859-11", ISO_8859_11_TO_UCS_TBL)
+ transcode_tblgen_iso8859("ISO-8859-13", ISO_8859_13_TO_UCS_TBL)
+ transcode_tblgen_iso8859("ISO-8859-14", ISO_8859_14_TO_UCS_TBL)
+ transcode_tblgen_iso8859("ISO-8859-15", ISO_8859_15_TO_UCS_TBL)
%>
-<%= transcode_tblgen_iso8859("ISO-8859-1", ISO_8859_1_TO_UCS_TBL) %>
-<%= transcode_tblgen_iso8859("ISO-8859-2", ISO_8859_2_TO_UCS_TBL) %>
-<%= transcode_tblgen_iso8859("ISO-8859-3", ISO_8859_3_TO_UCS_TBL) %>
-<%= transcode_tblgen_iso8859("ISO-8859-4", ISO_8859_4_TO_UCS_TBL) %>
-<%= transcode_tblgen_iso8859("ISO-8859-5", ISO_8859_5_TO_UCS_TBL) %>
-<%= transcode_tblgen_iso8859("ISO-8859-6", ISO_8859_6_TO_UCS_TBL) %>
-<%= transcode_tblgen_iso8859("ISO-8859-7", ISO_8859_7_TO_UCS_TBL) %>
-<%= transcode_tblgen_iso8859("ISO-8859-8", ISO_8859_8_TO_UCS_TBL) %>
-<%= transcode_tblgen_iso8859("ISO-8859-9", ISO_8859_9_TO_UCS_TBL) %>
-<%= transcode_tblgen_iso8859("ISO-8859-10", ISO_8859_10_TO_UCS_TBL) %>
-<%= transcode_tblgen_iso8859("ISO-8859-11", ISO_8859_11_TO_UCS_TBL) %>
-<%= transcode_tblgen_iso8859("ISO-8859-13", ISO_8859_13_TO_UCS_TBL) %>
-<%= transcode_tblgen_iso8859("ISO-8859-14", ISO_8859_14_TO_UCS_TBL) %>
-<%= transcode_tblgen_iso8859("ISO-8859-15", ISO_8859_15_TO_UCS_TBL) %>
+<%= transcode_generated_code %>
void
Init_single_byte(void)
diff --git a/enc/trans/utf_16_32.trans b/enc/trans/utf_16_32.trans
index 3b1165430..b71c289ba 100644
--- a/enc/trans/utf_16_32.trans
+++ b/enc/trans/utf_16_32.trans
@@ -1,5 +1,42 @@
#include "transcode_data.h"
+<%
+ map = {}
+ map["{00-ff}{00-d7,e0-ff}0000"] = :func_so
+ map["{00-ff}{00-ff}{01-10}00"] = :func_so
+ transcode_generate_node(ActionMap.parse(map), "from_UTF_32LE")
+
+ map = {}
+ map["{00-d7,e0-ff}{00-ff}"] = :func_so
+ map["{d8-db}{00-ff}{dc-df}{00-ff}"] = :func_so
+ transcode_generate_node(ActionMap.parse(map), "from_UTF_16BE")
+
+ map = {}
+ map["{00-7f}"] = :func_so
+ map["{c2-df}{80-bf}"] = :func_so
+ map["e0{a0-bf}{80-bf}"] = :func_so
+ map["{e1-ec}{80-bf}{80-bf}"] = :func_so
+ map["ed{80-9f}{80-bf}"] = :func_so
+ map["{ee-ef}{80-bf}{80-bf}"] = :func_so
+ map["f0{90-bf}{80-bf}{80-bf}"] = :func_so
+ map["{f1-f3}{80-bf}{80-bf}{80-bf}"] = :func_so
+ map["f4{80-8f}{80-bf}{80-bf}"] = :func_so
+ am = ActionMap.parse(map)
+ transcode_generate_node(am, "to_UTF_16BE")
+
+ map = {}
+ map["{00-ff}{00-d7,e0-ff}"] = :func_so
+ map["{00-ff}{d8-db}{00-ff}{dc-df}"] = :func_so
+ transcode_generate_node(ActionMap.parse(map), "from_UTF_16LE")
+
+ map = {}
+ map["0000{00-d7,e0-ff}{00-ff}"] = :func_so
+ map["00{01-10}{00-ff}{00-ff}"] = :func_so
+ transcode_generate_node(ActionMap.parse(map), "from_UTF_32BE")
+%>
+
+<%= transcode_generated_code %>
+
static int
fun_so_from_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
{
@@ -222,13 +259,6 @@ fun_so_to_utf_32le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned
return 4;
}
-<%=
- map = {}
- map["{00-d7,e0-ff}{00-ff}"] = :func_so
- map["{d8-db}{00-ff}{dc-df}{00-ff}"] = :func_so
- transcode_generate_node(ActionMap.parse(map), "from_UTF_16BE")
-%>
-
static const rb_transcoder
rb_from_UTF_16BE = {
"UTF-16BE", "UTF-8", &from_UTF_16BE,
@@ -239,21 +269,6 @@ rb_from_UTF_16BE = {
NULL, NULL, NULL, &fun_so_from_utf_16be
};
-<%=
- map = {}
- map["{00-7f}"] = :func_so
- map["{c2-df}{80-bf}"] = :func_so
- map["e0{a0-bf}{80-bf}"] = :func_so
- map["{e1-ec}{80-bf}{80-bf}"] = :func_so
- map["ed{80-9f}{80-bf}"] = :func_so
- map["{ee-ef}{80-bf}{80-bf}"] = :func_so
- map["f0{90-bf}{80-bf}{80-bf}"] = :func_so
- map["{f1-f3}{80-bf}{80-bf}{80-bf}"] = :func_so
- map["f4{80-8f}{80-bf}{80-bf}"] = :func_so
- am = ActionMap.parse(map)
- transcode_generate_node(am, "to_UTF_16BE")
-%>
-
static const rb_transcoder
rb_to_UTF_16BE = {
"UTF-8", "UTF-16BE", &to_UTF_16BE,
@@ -264,13 +279,6 @@ rb_to_UTF_16BE = {
NULL, NULL, NULL, &fun_so_to_utf_16be
};
-<%=
- map = {}
- map["{00-ff}{00-d7,e0-ff}"] = :func_so
- map["{00-ff}{d8-db}{00-ff}{dc-df}"] = :func_so
- transcode_generate_node(ActionMap.parse(map), "from_UTF_16LE")
-%>
-
static const rb_transcoder
rb_from_UTF_16LE = {
"UTF-16LE", "UTF-8", &from_UTF_16LE,
@@ -291,13 +299,6 @@ rb_to_UTF_16LE = {
NULL, NULL, NULL, &fun_so_to_utf_16le
};
-<%=
- map = {}
- map["0000{00-d7,e0-ff}{00-ff}"] = :func_so
- map["00{01-10}{00-ff}{00-ff}"] = :func_so
- transcode_generate_node(ActionMap.parse(map), "from_UTF_32BE")
-%>
-
static const rb_transcoder
rb_from_UTF_32BE = {
"UTF-32BE", "UTF-8", &from_UTF_32BE,
@@ -318,13 +319,6 @@ rb_to_UTF_32BE = {
NULL, NULL, NULL, &fun_so_to_utf_32be
};
-<%=
- map = {}
- map["{00-ff}{00-d7,e0-ff}0000"] = :func_so
- map["{00-ff}{00-ff}{01-10}00"] = :func_so
- transcode_generate_node(ActionMap.parse(map), "from_UTF_32LE")
-%>
-
static const rb_transcoder
rb_from_UTF_32LE = {
"UTF-32LE", "UTF-8", &from_UTF_32LE,
diff --git a/tool/transcode-tblgen.rb b/tool/transcode-tblgen.rb
index c3f7ee39e..44be6a9d9 100644
--- a/tool/transcode-tblgen.rb
+++ b/tool/transcode-tblgen.rb
@@ -389,6 +389,12 @@ End
code << generate_lookup_node(name_hint, table)
name_hint
end
+
+ def gennode(name_hint=nil, valid_encoding=nil)
+ code = ''
+ name = generate_node(code, name_hint, valid_encoding)
+ return name, code
+ end
end
def citrus_mskanji_cstomb(csid, index)
@@ -529,12 +535,12 @@ def transcode_compile_tree(name, from, map)
valid_encoding = nil
end
- code = ''
- defined_name = am.generate_node(code, name, valid_encoding)
+ defined_name, code = am.gennode(name, valid_encoding)
return defined_name, code, max_input
end
TRANSCODERS = []
+TRANSCODE_GENERATED_CODE = ''
def transcode_tblgen(from, to, map)
STDERR.puts "converter from #{from} to #{to}" if VERBOSE_MODE
@@ -565,14 +571,19 @@ static const rb_transcoder
NULL, NULL, NULL
};
End
- tree_code + "\n" + transcoder_code
+ TRANSCODE_GENERATED_CODE << tree_code + "\n" + transcoder_code
+ ''
end
def transcode_generate_node(am, name_hint=nil)
STDERR.puts "converter for #{name_hint}" if VERBOSE_MODE
- code = ''
- am.generate_node(code, name_hint)
- code
+ name, code = am.gennode(name_hint)
+ TRANSCODE_GENERATED_CODE << code
+ ''
+end
+
+def transcode_generated_code
+ TRANSCODE_GENERATED_CODE
end
def transcode_register_code