From 6c425b26a85e5faab867fc0382be0dbe92a71a81 Mon Sep 17 00:00:00 2001 From: naruse Date: Sun, 31 Aug 2008 16:23:04 +0000 Subject: * tool/transcode-tblgen.rb: add table generator from Citrus maps. * enc/trans/japanese.trans: use Citrus maps. * enc/trans/CP: add maps from Citrus. * enc/trans/JIS: ditto. * test/ruby/test_transcode.rb: Shift_JIS and EUC-JP doesn't support IBM extended characters. git-svn-id: http://svn.ruby-lang.org/repos/ruby/trunk@19003 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- tool/transcode-tblgen.rb | 121 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 116 insertions(+), 5 deletions(-) (limited to 'tool') diff --git a/tool/transcode-tblgen.rb b/tool/transcode-tblgen.rb index f79fc551e..4925d2168 100644 --- a/tool/transcode-tblgen.rb +++ b/tool/transcode-tblgen.rb @@ -207,7 +207,7 @@ class ActionMap ss.each_firstbyte {|byte, rest| h[byte] ||= {} if h[byte][rest] - raise "ambiguous" + raise "ambiguous %s or %s (%02X/%s)" % [h[byte][rest], action, byte, rest] end h[byte][rest] = action } @@ -391,6 +391,117 @@ End end end +def citrus_mskanji_cstomb(csid, index) + case csid + when 0 + index + when 1 + index + 0x80 + when 2, 3 + row = index >> 8 + raise "illegal byte sequence" if row < 0x21 + if csid == 3 + if row <= 0x2F + offset = (row == 0x22 || row >= 0x26) ? 0xED : 0xF0 + elsif row >= 0x4D && row <= 0x7E + offset = 0xCE + else + raise "illegal byte sequence" + end + else + raise "illegal byte sequence" if row > 0x97 + offset = (row < 0x5F) ? 0x81 : 0xC1 + end + col = index & 0xFF + raise "illegal byte sequence" if (col < 0x21 || col > 0x7E) + + row -= 0x21 + col -= 0x21 + if (row & 1) == 0 + col += 0x40 + col += 1 if (col >= 0x7F) + else + col += 0x9F; + end + row = row / 2 + offset + (row << 8) | col + end.to_s(16) +end + +def citrus_euc_cstomb(csid, index) + case csid + when 0x0000 + index + when 0x8080 + index | 0x8080 + when 0x0080 + index | 0x8E80 + when 0x8000 + index | 0x8F8080 + end.to_s(16) +end + +def citrus_cstomb(ces, csid, index) + case ces + when 'mskanji' + citrus_mskanji_cstomb(csid, index) + when 'euc' + citrus_euc_cstomb(csid, index) + end +end + +SUBDIR = %w/APPLE AST BIG5 CNS CP EBCDIC GB GEORGIAN ISO646 ISO-8859 JIS KAZAKH KOI KS MISC TCVN/ + + +def citrus_decode_mapsrc(ces, csid, mapsrcs) + table = [] + mapsrcs.split(',').each do |mapsrc| + path = [$srcdir] + mode = nil + if mapsrc.start_with?('UCS') + mode = :from_ucs + from = mapsrc[4..-1] + path << SUBDIR.find{|x| from.start_with?(x) } + else + mode = :to_ucs + path << SUBDIR.find{|x| mapsrc.start_with?(x) } + end + path << mapsrc.gsub(':', '@') + path = File.join(*path) + path << ".src" + path[path.rindex('/')] = '%' + puts 'load mapsrc %s' % path + open(path) do |f| + f.each_line do |l| + break if /^BEGIN_MAP/ =~ l + end + f.each_line do |l| + next if /^\s*(?:#|$)/ =~ l + break if /^END_MAP/ =~ l + case mode + when :from_ucs + case l + when /0x(\w+)\s*-\s*0x(\w+)\s*=\s*INVALID/ + # table.push << ["{#$1-#$2}", :invalid] + when /(0x\w+)\s*=\s*(0x\w+)/ + table.push << [$1.hex, citrus_cstomb(ces, csid, $2.hex)] + else + raise "unknown notation '%s'"% l + end + when :to_ucs + case l + when /(0x\w+)\s*=\s*(0x\w+)/ + table.push << [citrus_cstomb(ces, csid, $1.hex), $2.hex] + else + raise "unknown notation '%s'"% l + end + end + end + end + end + return table +end + def encode_utf8(map) r = [] map.each {|k, v| @@ -567,8 +678,8 @@ op.parse! VERBOSE_MODE = verbose_mode arg = ARGV.shift -dir = File.dirname(arg) -$:.unshift dir unless $:.include? dir +$srcdir = File.dirname(arg) +$:.unshift $srcdir unless $:.include? $srcdir src = File.read(arg) src.force_encoding("ascii-8bit") if src.respond_to? :force_encoding this_script = File.read(__FILE__) @@ -585,7 +696,7 @@ if !force_mode && output_filename && File.readable?(output_filename) if %r{/\* src="([0-9a-z_.-]+)",} =~ line name = $1 next if name == File.basename(arg) || name == File.basename(__FILE__) - path = File.join(dir, name) + path = File.join($srcdir, name) if File.readable? path chk_signature << "/* #{make_signature(name, File.read(path))} */\n" end @@ -613,7 +724,7 @@ libs = libs2 - libs1 lib_sigs = '' libs.each {|lib| lib = File.basename(lib) - path = File.join(dir, lib) + path = File.join($srcdir, lib) if File.readable? path lib_sigs << "/* #{make_signature(lib, File.read(path))} */\n" end -- cgit