blob: fe8391290a1c54f6862ee0a7adb83d58e4451153 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
|
#!/usr/bin/perl
print STDERR "Scanning message and help files and surveys which multibyte chars are used...\n";
while (<>) {
&entry_kanji($_);
}
print STDERR "\n$count unique chars\n";
$fillzero = 0;
$charset = 0;
$jismode = $bigfivemode = 0;
print STDERR "Spit out the list of characters being used in the input.\n";
foreach $i (sort(keys(%usedkanji)))
{
print chr($i / 256) . chr($i % 256) . "\n";
}
sub entry_kanji
{
local($line) = @_;
local($i, $len, $c, $kchar);
$len = length($line);
for ($i = 0; $i < $len; $i++) {
$line =~ s/^(.)//;
$c = ord($1);
if ($c >= 0xa0 && $c <= 0xff) {
$line =~ s/^(.)//;
$kchar = $c * 256 + ord($1);
$i++;
if (!$usedkanji{$kchar}) {
$usedkanji{$kchar} = 1;
printf(STDERR "%04x ", $kchar);
$count++;
}
}
}
}
|