summaryrefslogtreecommitdiffstats
path: root/tool
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-08-31 16:23:04 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-08-31 16:23:04 +0000
commit6c425b26a85e5faab867fc0382be0dbe92a71a81 (patch)
treeb5c007d2925d004d7b5bbfa261cba5cb39a6411c /tool
parent07093652ba18cdbbfa46444b2db407a41a857be0 (diff)
downloadruby-6c425b26a85e5faab867fc0382be0dbe92a71a81.tar.gz
ruby-6c425b26a85e5faab867fc0382be0dbe92a71a81.tar.xz
ruby-6c425b26a85e5faab867fc0382be0dbe92a71a81.zip
* tool/transcode-tblgen.rb: add table generator from Citrus maps.
* enc/trans/japanese.trans: use Citrus maps. * enc/trans/CP: add maps from Citrus. * enc/trans/JIS: ditto. * test/ruby/test_transcode.rb: Shift_JIS and EUC-JP doesn't support IBM extended characters. git-svn-id: http://svn.ruby-lang.org/repos/ruby/trunk@19003 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'tool')
-rw-r--r--tool/transcode-tblgen.rb121
1 files changed, 116 insertions, 5 deletions
diff --git a/tool/transcode-tblgen.rb b/tool/transcode-tblgen.rb
index f79fc551e..4925d2168 100644
--- a/tool/transcode-tblgen.rb
+++ b/tool/transcode-tblgen.rb
@@ -207,7 +207,7 @@ class ActionMap
ss.each_firstbyte {|byte, rest|
h[byte] ||= {}
if h[byte][rest]
- raise "ambiguous"
+ raise "ambiguous %s or %s (%02X/%s)" % [h[byte][rest], action, byte, rest]
end
h[byte][rest] = action
}
@@ -391,6 +391,117 @@ End
end
end
+def citrus_mskanji_cstomb(csid, index)
+ case csid
+ when 0
+ index
+ when 1
+ index + 0x80
+ when 2, 3
+ row = index >> 8
+ raise "illegal byte sequence" if row < 0x21
+ if csid == 3
+ if row <= 0x2F
+ offset = (row == 0x22 || row >= 0x26) ? 0xED : 0xF0
+ elsif row >= 0x4D && row <= 0x7E
+ offset = 0xCE
+ else
+ raise "illegal byte sequence"
+ end
+ else
+ raise "illegal byte sequence" if row > 0x97
+ offset = (row < 0x5F) ? 0x81 : 0xC1
+ end
+ col = index & 0xFF
+ raise "illegal byte sequence" if (col < 0x21 || col > 0x7E)
+
+ row -= 0x21
+ col -= 0x21
+ if (row & 1) == 0
+ col += 0x40
+ col += 1 if (col >= 0x7F)
+ else
+ col += 0x9F;
+ end
+ row = row / 2 + offset
+ (row << 8) | col
+ end.to_s(16)
+end
+
+def citrus_euc_cstomb(csid, index)
+ case csid
+ when 0x0000
+ index
+ when 0x8080
+ index | 0x8080
+ when 0x0080
+ index | 0x8E80
+ when 0x8000
+ index | 0x8F8080
+ end.to_s(16)
+end
+
+def citrus_cstomb(ces, csid, index)
+ case ces
+ when 'mskanji'
+ citrus_mskanji_cstomb(csid, index)
+ when 'euc'
+ citrus_euc_cstomb(csid, index)
+ end
+end
+
+SUBDIR = %w/APPLE AST BIG5 CNS CP EBCDIC GB GEORGIAN ISO646 ISO-8859 JIS KAZAKH KOI KS MISC TCVN/
+
+
+def citrus_decode_mapsrc(ces, csid, mapsrcs)
+ table = []
+ mapsrcs.split(',').each do |mapsrc|
+ path = [$srcdir]
+ mode = nil
+ if mapsrc.start_with?('UCS')
+ mode = :from_ucs
+ from = mapsrc[4..-1]
+ path << SUBDIR.find{|x| from.start_with?(x) }
+ else
+ mode = :to_ucs
+ path << SUBDIR.find{|x| mapsrc.start_with?(x) }
+ end
+ path << mapsrc.gsub(':', '@')
+ path = File.join(*path)
+ path << ".src"
+ path[path.rindex('/')] = '%'
+ puts 'load mapsrc %s' % path
+ open(path) do |f|
+ f.each_line do |l|
+ break if /^BEGIN_MAP/ =~ l
+ end
+ f.each_line do |l|
+ next if /^\s*(?:#|$)/ =~ l
+ break if /^END_MAP/ =~ l
+ case mode
+ when :from_ucs
+ case l
+ when /0x(\w+)\s*-\s*0x(\w+)\s*=\s*INVALID/
+ # table.push << ["{#$1-#$2}", :invalid]
+ when /(0x\w+)\s*=\s*(0x\w+)/
+ table.push << [$1.hex, citrus_cstomb(ces, csid, $2.hex)]
+ else
+ raise "unknown notation '%s'"% l
+ end
+ when :to_ucs
+ case l
+ when /(0x\w+)\s*=\s*(0x\w+)/
+ table.push << [citrus_cstomb(ces, csid, $1.hex), $2.hex]
+ else
+ raise "unknown notation '%s'"% l
+ end
+ end
+ end
+ end
+ end
+ return table
+end
+
def encode_utf8(map)
r = []
map.each {|k, v|
@@ -567,8 +678,8 @@ op.parse!
VERBOSE_MODE = verbose_mode
arg = ARGV.shift
-dir = File.dirname(arg)
-$:.unshift dir unless $:.include? dir
+$srcdir = File.dirname(arg)
+$:.unshift $srcdir unless $:.include? $srcdir
src = File.read(arg)
src.force_encoding("ascii-8bit") if src.respond_to? :force_encoding
this_script = File.read(__FILE__)
@@ -585,7 +696,7 @@ if !force_mode && output_filename && File.readable?(output_filename)
if %r{/\* src="([0-9a-z_.-]+)",} =~ line
name = $1
next if name == File.basename(arg) || name == File.basename(__FILE__)
- path = File.join(dir, name)
+ path = File.join($srcdir, name)
if File.readable? path
chk_signature << "/* #{make_signature(name, File.read(path))} */\n"
end
@@ -613,7 +724,7 @@ libs = libs2 - libs1
lib_sigs = ''
libs.each {|lib|
lib = File.basename(lib)
- path = File.join(dir, lib)
+ path = File.join($srcdir, lib)
if File.readable? path
lib_sigs << "/* #{make_signature(lib, File.read(path))} */\n"
end