diff options
author | Matt Wilson <msw@redhat.com> | 2000-04-17 20:00:33 +0000 |
---|---|---|
committer | Matt Wilson <msw@redhat.com> | 2000-04-17 20:00:33 +0000 |
commit | 6f122a2794396f1777e3fe41bc98fda27e67ae44 (patch) | |
tree | 3074b305703c1f0170673657ae0e5834a1085f5a /utils | |
parent | 96040f2d2acfcc3052eefd2102227aabbaf70f6f (diff) | |
download | anaconda-6f122a2794396f1777e3fe41bc98fda27e67ae44.tar.gz anaconda-6f122a2794396f1777e3fe41bc98fda27e67ae44.tar.xz anaconda-6f122a2794396f1777e3fe41bc98fda27e67ae44.zip |
merge from anaconda-6-2j-branch
Diffstat (limited to 'utils')
-rwxr-xr-x | utils/uniqkanji.pl | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/utils/uniqkanji.pl b/utils/uniqkanji.pl new file mode 100755 index 000000000..fe8391290 --- /dev/null +++ b/utils/uniqkanji.pl @@ -0,0 +1,44 @@ +#!/usr/bin/perl + +print STDERR "Scanning message and help files and surveys which multibyte chars are used...\n"; + +while (<>) { + &entry_kanji($_); +} + +print STDERR "\n$count unique chars\n"; + +$fillzero = 0; +$charset = 0; +$jismode = $bigfivemode = 0; + +print STDERR "Spit out the list of characters being used in the input.\n"; + +foreach $i (sort(keys(%usedkanji))) +{ + print chr($i / 256) . chr($i % 256) . "\n"; +} + + +sub entry_kanji +{ + local($line) = @_; + local($i, $len, $c, $kchar); + + $len = length($line); + + for ($i = 0; $i < $len; $i++) { + $line =~ s/^(.)//; + $c = ord($1); + if ($c >= 0xa0 && $c <= 0xff) { + $line =~ s/^(.)//; + $kchar = $c * 256 + ord($1); + $i++; + if (!$usedkanji{$kchar}) { + $usedkanji{$kchar} = 1; + printf(STDERR "%04x ", $kchar); + $count++; + } + } + } +} |