#!/usr/bin/perl # This table is used to eliminate things like French (France) $defaultTerritory{"Albanian"} = "Albania"; $defaultTerritory{"Belarusian"} = "Belarus"; $defaultTerritory{"Bulgarian"} = "Bulgaria"; $defaultTerritory{"Croatian"} = "Croatia"; $defaultTerritory{"Czech"} = "Czech Republic"; $defaultTerritory{"Danish"} = "Denmark"; $defaultTerritory{"Estonian"} = "Estonia"; $defaultTerritory{"Finnish"} = "Finland"; $defaultTerritory{"Greek"} = "Greece"; $defaultTerritory{"Hungarian"} = "Hungary"; $defaultTerritory{"Icelandic"} = "Iceland"; $defaultTerritory{"Indonesian"} = "Indonesia"; $defaultTerritory{"Irish"} = "Ireland"; $defaultTerritory{"Japanese"} = "Japan"; $defaultTerritory{"Lithuanian"} = "Lithuania"; $defaultTerritory{"Macedonian"} = "Macedonia"; $defaultTerritory{"Norwegian"} = "Norway"; $defaultTerritory{"Polish"} = "Poland"; $defaultTerritory{"Romanian"} = "Romania"; $defaultTerritory{"Russian"} = "Russia"; $defaultTerritory{"Slovak"} = "Slovak"; $defaultTerritory{"Slovenian"} = "Slovenian"; $defaultTerritory{"Ukrainian"} = "Ukraine"; $defaultTerritory{"Vietnamese"} = "Vietnam"; $defaultTerritory{"Turkish"} = "Turkey"; $defaultTerritory{"Thai"} = "Thailand"; $charMap{"GB2312"} = "iso01"; $charMap{"BIG5"} = "iso01"; $charMap{"EUC-JP"} = "iso01"; $charMap{"EUC-TW"} = "iso01"; $charMap{"EUC-KR"} = "iso01"; $charMap{"GB2312"} = "iso01"; $charMap{"BIG5"} = "iso01"; $charMap{"KOI8-U"} = "koi8-u"; $charMap{"KOI8-R"} = "koi8-u"; $charMap{"ISO-8859-1"} = "iso01"; $charMap{"ISO-8859-2"} = "iso02"; $charMap{"ISO-8859-3"} = "iso03"; $charMap{"ISO-8859-5"} = "iso05"; $charMap{"ISO-8859-6"} = "iso06"; $charMap{"ISO-8859-7"} = "iso07"; $charMap{"ISO-8859-8"} = "iso08"; $charMap{"ISO-8859-9"} = "iso09"; $charMap{"ISO-8859-15"} = "iso15"; $charFont{"EUC-JP"} = "lat0-sun16"; $charFont{"EUC-TW"} = "lat0-sun16"; $charFont{"EUC-KR"} = "lat0-sun16"; $charFont{"GB2312"} = "lat0-sun16"; $charFont{"BIG5"} = "lat0-sun16"; $charFont{"KOI8-U"} = "cyr-sun16"; $charFont{"KOI8-R"} = "cyr-sun16"; $charFont{"ISO-8859-1"} = "lat0-sun16"; $charFont{"ISO-8859-2"} = "lat2-sun16"; #$charFont{"ISO-8859-3"} = "iso03"; mk_MK -- no font available $charFont{"ISO-8859-5"} = "cyr-sun16"; $charFont{"ISO-8859-6"} = "LatArCyrHeb-16"; $charFont{"ISO-8859-7"} = "iso07.16"; $charFont{"ISO-8859-8"} = "LatArCyrHeb-16"; $charFont{"ISO-8859-9"} = "lat5-sun16"; $charFont{"ISO-8859-15"} = "lat0-sun16"; open(FILE, "locale -a|"); while () { #for (['he_IL', 'en_US']) { chop; # limit to items of the form xx_.* /^[a-zA-Z][a-zA-Z]_/ || next; $lang=$_; $lang =~ s/eucjp/eucJP/; $lang =~ s/euckr/eucKR/; $lang =~ s/^zh_CN$/zh_CN.GB2312/; $lang =~ s/^zh_TW$/zh_TW.Big5/; # someone put nb_NO in locale.alias. yuck. We don't # want to offer that. ("nb_" eq substr($lang, 0, 3)) && next; ("zh_CN.gb18030" eq $lang) && next; ("zh_TW.euctw" eq $lang) && next; open(INFO, "LANG=$lang locale language territory charmap|"); $name = ; $territory = ; $charmap = ; close(INFO); chop $name; chop $territory; chop $charmap; # We don't want UTF-8. ($charmap eq "UTF-8") && next; # Some languages name's are the same as their ISO id's! ($name eq substr($lang, 0, 2)) && next; if ($defaultTerritory{$name} eq "$territory") { $fullName = $name; } else { $fullName = "$name ($territory)"; } $langList{$lang} = $fullName; $charmapList{$lang} = $charmap; } close(FILE); for $key (keys %langList) { $langName = $langList{$key}; if ($nameList{$langName}) { # we have a conflict. longest wins if (length($key) > length($nameList{$langName})) { $nameList{$langName} = $key; } else { } } else { $nameList{$langName} = $key; } } @names = sort(keys %nameList); for $name (@names) { $short = $nameList{$name}; $map = $charmapList{$short}; if ($charMap{$map} and $charFont{$map}) { print "$short $charMap{$map} $charFont{$map} $name\n"; } }