summaryrefslogtreecommitdiffstats
path: root/contrib/idn/idnkit-1.0-src/util/UCD.pm
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/idn/idnkit-1.0-src/util/UCD.pm')
-rw-r--r--contrib/idn/idnkit-1.0-src/util/UCD.pm194
1 files changed, 194 insertions, 0 deletions
diff --git a/contrib/idn/idnkit-1.0-src/util/UCD.pm b/contrib/idn/idnkit-1.0-src/util/UCD.pm
new file mode 100644
index 0000000..19629e6
--- /dev/null
+++ b/contrib/idn/idnkit-1.0-src/util/UCD.pm
@@ -0,0 +1,194 @@
+# $Id: UCD.pm,v 1.1.1.1 2003/06/04 00:27:53 marka Exp $
+#
+# Copyright (c) 2000,2001 Japan Network Information Center.
+# All rights reserved.
+#
+# By using this file, you agree to the terms and conditions set forth bellow.
+#
+# LICENSE TERMS AND CONDITIONS
+#
+# The following License Terms and Conditions apply, unless a different
+# license is obtained from Japan Network Information Center ("JPNIC"),
+# a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
+# Chiyoda-ku, Tokyo 101-0047, Japan.
+#
+# 1. Use, Modification and Redistribution (including distribution of any
+# modified or derived work) in source and/or binary forms is permitted
+# under this License Terms and Conditions.
+#
+# 2. Redistribution of source code must retain the copyright notices as they
+# appear in each source code file, this License Terms and Conditions.
+#
+# 3. Redistribution in binary form must reproduce the Copyright Notice,
+# this License Terms and Conditions, in the documentation and/or other
+# materials provided with the distribution. For the purposes of binary
+# distribution the "Copyright Notice" refers to the following language:
+# "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
+#
+# 4. The name of JPNIC may not be used to endorse or promote products
+# derived from this Software without specific prior written approval of
+# JPNIC.
+#
+# 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+# ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+
+package UCD;
+
+#
+# UCD.pm -- parser for Unicode Character Database files.
+#
+# This file is an aggregation of the following modules, each of which
+# provides a parser for a specific data file of UCD.
+# UCD::UnicodeData -- for UnicodeData.txt
+# UCD::CaseFolding -- for CaseFolding.txt
+# UCD::SpecialCasing -- for SpecialCasing.txt
+# UCD::CompositionExclusions -- for CompositionExclusions-1.txt
+#
+# Each module provides two subroutines:
+#
+# $line = getline(\*HANDLE);
+# reads next non-comment line from HANDLE, and returns it.
+# undef will be returned upon EOF.
+#
+# %fields = parse($line);
+# parses a line and extract fields, and returns a list of
+# field name and its value, suitable for assignment to a hash.
+#
+
+package UCD::UnicodeData;
+
+use strict;
+use Carp;
+
+sub getline {
+ my $fh = shift;
+ my $s = <$fh>;
+ $s =~ s/\r?\n$// if $s;
+ $s;
+}
+
+sub parseline {
+ my $s = shift;
+
+ my @f = split /;/, $s, -1;
+ return (CODE => hex($f[0]),
+ NAME => $f[1],
+ CATEGORY => $f[2],
+ CLASS => $f[3]+0,
+ BIDI => $f[4],
+ DECOMP => dcmap($f[5]),
+ DECIMAL => dvalue($f[6]),
+ DIGIT => dvalue($f[7]),
+ NUMERIC => dvalue($f[8]),
+ MIRRORED => $f[9] eq 'Y',
+ NAME10 => $f[10],
+ COMMENT => $f[11],
+ UPPER => ucode($f[12]),
+ LOWER => ucode($f[13]),
+ TITLE => ucode($f[14]));
+}
+
+sub dcmap {
+ my $v = shift;
+ return undef if $v eq '';
+ $v =~ /^(?:(<[^>]+>)\s*)?(\S.*)/
+ or croak "invalid decomposition mapping \"$v\"";
+ my $tag = $1 || '';
+ [$tag, map {hex($_)} split(' ', $2)];
+}
+
+sub ucode {
+ my $v = shift;
+ return undef if $v eq '';
+ hex($v);
+}
+
+sub dvalue {
+ my $v = shift;
+ return undef if $v eq '';
+ $v;
+}
+
+package UCD::CaseFolding;
+
+use strict;
+
+sub getline {
+ my $fh = shift;
+ while (defined(my $s = <$fh>)) {
+ next if $s =~ /^\#/;
+ next if $s =~ /^\s*$/;
+ $s =~ s/\r?\n$//;
+ return $s;
+ }
+ undef;
+}
+
+sub parseline {
+ my $s = shift;
+ my @f = split /;\s*/, $s, -1;
+ return (CODE => hex($f[0]),
+ TYPE => $f[1],
+ MAP => [map(hex, split ' ', $f[2])],
+ );
+}
+
+package UCD::SpecialCasing;
+
+use strict;
+
+sub getline {
+ my $fh = shift;
+ while (defined(my $s = <$fh>)) {
+ next if $s =~ /^\#/;
+ next if $s =~ /^\s*$/;
+ $s =~ s/\r?\n$//;
+ return $s;
+ }
+ undef;
+}
+
+sub parseline {
+ my $s = shift;
+
+ my @f = split /;\s*/, $s, -1;
+ my $cond = (@f > 5) ? $f[4] : undef;
+ return (CODE => hex($f[0]),
+ LOWER => [map(hex, split ' ', $f[1])],
+ TITLE => [map(hex, split ' ', $f[2])],
+ UPPER => [map(hex, split ' ', $f[3])],
+ CONDITION => $cond);
+}
+
+package UCD::CompositionExclusions;
+
+use strict;
+
+sub getline {
+ my $fh = shift;
+ while (defined(my $s = <$fh>)) {
+ next if $s =~ /^\#/;
+ next if $s =~ /^\s*$/;
+ $s =~ s/\r?\n$//;
+ return $s;
+ }
+ undef;
+}
+
+sub parseline {
+ my $s = shift;
+ m/^[0-9A-Fa-f]+/;
+ return (CODE => hex($&));
+}
+
+1;