summaryrefslogtreecommitdiffstats
path: root/contrib/idn/idnkit-1.0-src/util
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/idn/idnkit-1.0-src/util')
-rw-r--r--contrib/idn/idnkit-1.0-src/util/Makefile43
-rw-r--r--contrib/idn/idnkit-1.0-src/util/SparseMap.pm575
-rw-r--r--contrib/idn/idnkit-1.0-src/util/UCD.pm194
-rwxr-xr-xcontrib/idn/idnkit-1.0-src/util/generate_nameprep_data.pl405
-rwxr-xr-xcontrib/idn/idnkit-1.0-src/util/generate_normalize_data.pl586
5 files changed, 1803 insertions, 0 deletions
diff --git a/contrib/idn/idnkit-1.0-src/util/Makefile b/contrib/idn/idnkit-1.0-src/util/Makefile
new file mode 100644
index 0000000..2f49ddd
--- /dev/null
+++ b/contrib/idn/idnkit-1.0-src/util/Makefile
@@ -0,0 +1,43 @@
+# $Id: Makefile,v 1.1.1.1 2003/06/04 00:27:52 marka Exp $
+
+PERL = /usr/local/bin/perl -w
+
+V301_NORM_DIR = data/unicode-3.0.0
+V310_NORM_DIR = data/unicode-3.1.0
+V320_NORM_DIR = data/unicode-3.2.0
+NORM_GENERATOR = ./generate_normalize_data.pl
+
+NAMEPREP_DIR = data/nameprep
+NAMEPREP_FILES = \
+ $(NAMEPREP_DIR)/nameprep.rfc3491.map \
+ $(NAMEPREP_DIR)/nameprep.rfc3491.prohibited \
+ $(NAMEPREP_DIR)/nameprep.rfc3491.unassigned \
+ $(NAMEPREP_DIR)/nameprep.rfc3491.bidi
+NAMEPREP_GENERATOR = ./generate_nameprep_data.pl
+NAMEPREP_VERSIONS = rfc3491
+
+all: normalize nameprep
+
+normalize: ../lib/unicodedata_320.c
+
+nameprep: ../lib/nameprepdata.c
+
+../lib/unicodedata_301.c: $(NORM_GENERATOR)
+ -rm -f $@.tmp
+ $(PERL) $(NORM_GENERATOR) -prefix v301_ -dir $(V301_NORM_DIR) \
+ > $@.tmp && mv $@.tmp $@
+
+../lib/unicodedata_310.c: $(NORM_GENERATOR)
+ -rm -f $@.tmp
+ $(PERL) $(NORM_GENERATOR) -prefix v310_ -dir $(V310_NORM_DIR) \
+ > $@.tmp && mv $@.tmp $@
+
+../lib/unicodedata_320.c: $(NORM_GENERATOR)
+ -rm -f $@.tmp
+ $(PERL) $(NORM_GENERATOR) -prefix v320_ -dir $(V320_NORM_DIR) \
+ > $@.tmp && mv $@.tmp $@
+
+../lib/nameprepdata.c: $(NAMEPREP_GENERATOR) $(NAMEPREP_FILES)
+ -rm -f $@.tmp
+ $(PERL) $(NAMEPREP_GENERATOR) -dir $(NAMEPREP_DIR) \
+ $(NAMEPREP_VERSIONS) > $@.tmp && mv $@.tmp $@
diff --git a/contrib/idn/idnkit-1.0-src/util/SparseMap.pm b/contrib/idn/idnkit-1.0-src/util/SparseMap.pm
new file mode 100644
index 0000000..834c795
--- /dev/null
+++ b/contrib/idn/idnkit-1.0-src/util/SparseMap.pm
@@ -0,0 +1,575 @@
+# $Id: SparseMap.pm,v 1.1.1.1 2003/06/04 00:27:53 marka Exp $
+#
+# Copyright (c) 2001 Japan Network Information Center. All rights reserved.
+#
+# By using this file, you agree to the terms and conditions set forth bellow.
+#
+# LICENSE TERMS AND CONDITIONS
+#
+# The following License Terms and Conditions apply, unless a different
+# license is obtained from Japan Network Information Center ("JPNIC"),
+# a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
+# Chiyoda-ku, Tokyo 101-0047, Japan.
+#
+# 1. Use, Modification and Redistribution (including distribution of any
+# modified or derived work) in source and/or binary forms is permitted
+# under this License Terms and Conditions.
+#
+# 2. Redistribution of source code must retain the copyright notices as they
+# appear in each source code file, this License Terms and Conditions.
+#
+# 3. Redistribution in binary form must reproduce the Copyright Notice,
+# this License Terms and Conditions, in the documentation and/or other
+# materials provided with the distribution. For the purposes of binary
+# distribution the "Copyright Notice" refers to the following language:
+# "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
+#
+# 4. The name of JPNIC may not be used to endorse or promote products
+# derived from this Software without specific prior written approval of
+# JPNIC.
+#
+# 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+# ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+
+package SparseMap;
+
+use strict;
+use Carp;
+
+my $debug = 0;
+
+sub new {
+ # common options are:
+ # BITS => [8, 7, 6], # 3-level map, 2nd level bits=7, 3rd = 6.
+ # MAX => 0x110000 # actually, max + 1.
+ my $class = shift;
+ my $self = {@_};
+
+ croak "BITS unspecified" unless exists $self->{BITS};
+ croak "BITS is not an array reference"
+ unless ref($self->{BITS}) eq 'ARRAY';
+ croak "MAX unspecified" unless exists $self->{MAX};
+
+ $self->{MAXLV} = @{$self->{BITS}} - 1;
+ $self->{FIXED} = 0;
+
+ my $lv0size = (indices($self, $self->{MAX} - 1))[0] + 1;
+
+ my @map = (undef) x $lv0size;
+ $self->{MAP} = \@map;
+
+ bless $self, $class;
+}
+
+sub add1 {
+ my ($self, $n, $val) = @_;
+
+ croak "Already fixed" if $self->{FIXED};
+ carp("data ($n) out of range"), return if $n >= $self->{MAX};
+
+ my @index = $self->indices($n);
+ my $r = $self->{MAP};
+ my $maxlv = $self->{MAXLV};
+ my $idx;
+ my $lv;
+
+ for ($lv = 0; $lv < $maxlv - 1; $lv++) {
+ $idx = $index[$lv];
+ $r->[$idx] = $self->create_imap($lv + 1, undef)
+ unless defined $r->[$idx];
+ $r = $r->[$idx];
+ }
+ $idx = $index[$lv];
+ $r->[$idx] = $self->create_dmap() unless defined $r->[$idx];
+ $self->add_to_dmap($r->[$idx], $index[$maxlv], $val);
+}
+
+sub fix {
+ my $self = shift;
+ my $map = $self->{MAP};
+ my $maxlv = $self->{MAXLV};
+ my @tmp;
+ my @zero;
+
+ carp "Already fixed" if $self->{FIXED};
+ $self->collapse_tree();
+ $self->fill_default();
+ $self->{FIXED} = 1;
+}
+
+sub indices {
+ my $self = shift;
+ my $v = shift;
+ my @bits = @{$self->{BITS}};
+ my @idx;
+
+ print "indices($v,", join(',', @bits), ") = " if $debug;
+ for (my $i = @bits - 1; $i >= 0; $i--) {
+ my $bit = $bits[$i];
+ unshift @idx, $v & ((1 << $bit) - 1);
+ $v = $v >> $bit;
+ }
+ print "(", join(',', @idx), ")\n" if $debug;
+ @idx;
+}
+
+sub get {
+ my $self = shift;
+ my $v = shift;
+ my $map = $self->{MAP};
+ my @index = $self->indices($v);
+
+ croak "Not yet fixed" unless $self->{FIXED};
+
+ my $lastidx = pop @index;
+ foreach my $idx (@index) {
+ return $map->{DEFAULT} unless defined $map->[$idx];
+ $map = $map->[$idx];
+ }
+ $map->[$lastidx];
+}
+
+sub indirectmap {
+ my $self = shift;
+
+ croak "Not yet fixed" unless $self->{FIXED};
+
+ my @maps = $self->collect_maps();
+ my $maxlv = $self->{MAXLV};
+ my @bits = @{$self->{BITS}};
+
+ my @indirect = ();
+ for (my $lv = 0; $lv < $maxlv; $lv++) {
+ my $offset;
+ my $chunksz;
+ my $mapsz = @{$maps[$lv]->[0]};
+ if ($lv < $maxlv - 1) {
+ # indirect map
+ $offset = @indirect + @{$maps[$lv]} * @{$maps[$lv]->[0]};
+ $chunksz = (1 << $bits[$lv + 1]);
+ } else {
+ # direct map
+ $offset = 0;
+ $chunksz = 1;
+ }
+ my $nextmaps = $maps[$lv + 1];
+ foreach my $mapref (@{$maps[$lv]}) {
+ croak "mapsize inconsistent ", scalar(@$mapref),
+ " should be ", $mapsz, " (lv $lv)\n" if @$mapref != $mapsz;
+ foreach my $m (@$mapref) {
+ my $idx;
+ for ($idx = 0; $idx < @$nextmaps; $idx++) {
+ last if $nextmaps->[$idx] == $m;
+ }
+ croak "internal error: map corrupted" if $idx >= @$nextmaps;
+ push @indirect, $offset + $chunksz * $idx;
+ }
+ }
+ }
+ @indirect;
+}
+
+sub cprog_imap {
+ my $self = shift;
+ my %opt = @_;
+ my $name = $opt{NAME} || 'map';
+ my @indirect = $self->indirectmap();
+ my $prog;
+ my $i;
+ my ($idtype, $idcol, $idwid);
+
+ my $max = 0;
+ $max < $_ and $max = $_ foreach @indirect;
+
+ if ($max < 256) {
+ $idtype = 'char';
+ $idcol = 8;
+ $idwid = 3;
+ } elsif ($max < 65536) {
+ $idtype = 'short';
+ $idcol = 8;
+ $idwid = 5;
+ } else {
+ $idtype = 'long';
+ $idcol = 4;
+ $idwid = 10;
+ }
+ $prog = "static const unsigned $idtype ${name}_imap[] = {\n";
+ $i = 0;
+ foreach my $v (@indirect) {
+ if ($i % $idcol == 0) {
+ $prog .= "\n" if $i != 0;
+ $prog .= "\t";
+ }
+ $prog .= sprintf "%${idwid}d, ", $v;
+ $i++;
+ }
+ $prog .= "\n};\n";
+ $prog;
+}
+
+sub cprog {
+ my $self = shift;
+ $self->cprog_imap(@_) . "\n" . $self->cprog_dmap(@_);
+}
+
+sub stat {
+ my $self = shift;
+ my @maps = $self->collect_maps();
+ my $elsize = $self->{ELSIZE};
+ my $i;
+ my $total = 0;
+ my @lines;
+
+ for ($i = 0; $i < $self->{MAXLV}; $i++) {
+ my $nmaps = @{$maps[$i]};
+ my $mapsz = @{$maps[$i]->[0]};
+ push @lines, "level $i: $nmaps maps (size $mapsz) ";
+ push @lines, "[", $nmaps * $mapsz * $elsize, "]" if $elsize;
+ push @lines, "\n";
+ }
+ my $ndmaps = @{$maps[$i]};
+ push @lines, "level $i: $ndmaps dmaps";
+ my $r = $maps[$i]->[0];
+ if (ref($r) eq 'ARRAY') {
+ push @lines, " (size ", scalar(@$r), ")";
+ }
+ push @lines, "\n";
+ join '', @lines;
+}
+
+sub collapse_tree {
+ my $self = shift;
+ my @tmp;
+
+ $self->_collapse_tree_rec($self->{MAP}, 0, \@tmp);
+}
+
+sub _collapse_tree_rec {
+ my ($self, $r, $lv, $refs) = @_;
+ my $ref = $refs->[$lv];
+ my $maxlv = $self->{MAXLV};
+ my $found;
+
+ return $r unless defined $r;
+
+ $ref = $refs->[$lv] = [] unless defined $ref;
+
+ if ($lv == $maxlv) {
+ $found = $self->find_dmap($ref, $r);
+ } else {
+ for (my $i = 0; $i < @$r; $i++) {
+ $r->[$i] = $self->_collapse_tree_rec($r->[$i], $lv + 1, $refs);
+ }
+ $found = $self->find_imap($ref, $r);
+ }
+ unless ($found) {
+ $found = $r;
+ push @$ref, $found;
+ }
+ return $found;
+}
+
+sub fill_default {
+ my $self = shift;
+ my $maxlv = $self->{MAXLV};
+ my $bits = $self->{BITS};
+ my @zeros;
+
+ $zeros[$maxlv] = $self->create_dmap();
+ for (my $lv = $maxlv - 1; $lv >= 0; $lv--) {
+ my $r = $zeros[$lv + 1];
+ $zeros[$lv] = $self->create_imap($lv, $r);
+ }
+ _fill_default_rec($self->{MAP}, 0, $maxlv, \@zeros);
+}
+
+sub _fill_default_rec {
+ my ($r, $lv, $maxlv, $zeros) = @_;
+
+ return if $lv == $maxlv;
+ for (my $i = 0; $i < @$r; $i++) {
+ if (defined($r->[$i])) {
+ _fill_default_rec($r->[$i], $lv + 1, $maxlv, $zeros);
+ } else {
+ $r->[$i] = $zeros->[$lv + 1];
+ }
+ }
+}
+
+sub create_imap {
+ my ($self, $lv, $v) = @_;
+ my @map;
+ @map = ($v) x (1 << $self->{BITS}->[$lv]);
+ \@map;
+}
+
+sub find_imap {
+ my ($self, $maps, $map) = @_;
+ my $i;
+
+ foreach my $el (@$maps) {
+ next unless @$el == @$map;
+ for ($i = 0; $i < @$el; $i++) {
+ last unless ($el->[$i] || 0) == ($map->[$i] || 0);
+ }
+ return $el if $i >= @$el;
+ }
+ undef;
+}
+
+sub collect_maps {
+ my $self = shift;
+ my @maps;
+ _collect_maps_rec($self->{MAP}, 0, $self->{MAXLV}, \@maps);
+ @maps;
+}
+
+sub _collect_maps_rec {
+ my ($r, $lv, $maxlv, $maps) = @_;
+ my $mapref = $maps->[$lv];
+
+ return unless defined $r;
+ foreach my $ref (@{$mapref}) {
+ return if $ref == $r;
+ }
+ push @{$maps->[$lv]}, $r;
+ if ($lv < $maxlv) {
+ _collect_maps_rec($_, $lv + 1, $maxlv, $maps) foreach @{$r};
+ }
+}
+
+sub add {confess "Subclass responsibility";}
+sub create_dmap {confess "Subclass responsibility";}
+sub add_to_dmap {confess "Subclass responsibility";}
+sub find_dmap {confess "Subclass responsibility";}
+sub cprog_dmap {confess "Subclass responsibility";}
+
+1;
+
+package SparseMap::Bit;
+
+use strict;
+use vars qw(@ISA);
+use Carp;
+#use SparseMap;
+
+@ISA = qw(SparseMap);
+
+sub new {
+ my $class = shift;
+ my $self = $class->SUPER::new(@_);
+ $self->{DEFAULT} = 0;
+ bless $self, $class;
+}
+
+sub add {
+ my $self = shift;
+
+ $self->add1($_, undef) foreach @_;
+}
+
+sub create_dmap {
+ my $self = shift;
+ my $bmbits = $self->{BITS}->[-1];
+
+ my $s = "\0" x (1 << ($bmbits - 3));
+ \$s;
+}
+
+sub add_to_dmap {
+ my ($self, $map, $idx, $val) = @_;
+ vec($$map, $idx, 1) = 1;
+}
+
+sub find_dmap {
+ my ($self, $ref, $r) = @_;
+ foreach my $map (@$ref) {
+ return $map if $$map eq $$r;
+ }
+ return undef;
+}
+
+sub cprog_dmap {
+ my $self = shift;
+ my %opt = @_;
+ my $name = $opt{NAME} || 'map';
+ my @maps = $self->collect_maps();
+ my @bitmap = @{$maps[-1]};
+ my $prog;
+ my $bmsize = 1 << ($self->{BITS}->[-1] - 3);
+
+ $prog = <<"END";
+static const struct {
+ unsigned char bm[$bmsize];
+} ${name}_bitmap[] = {
+END
+
+ foreach my $bm (@bitmap) {
+ my $i = 0;
+ $prog .= "\t{{\n";
+ foreach my $v (unpack 'C*', $$bm) {
+ if ($i % 16 == 0) {
+ $prog .= "\n" if $i != 0;
+ $prog .= "\t";
+ }
+ $prog .= sprintf "%3d,", $v;
+ $i++;
+ }
+ $prog .= "\n\t}},\n";
+ }
+ $prog .= "};\n";
+ $prog;
+}
+
+1;
+
+package SparseMap::Int;
+
+use strict;
+use vars qw(@ISA);
+use Carp;
+#use SparseMap;
+
+@ISA = qw(SparseMap);
+
+sub new {
+ my $class = shift;
+ my $self = $class->SUPER::new(@_);
+ $self->{DEFAULT} = 0 unless exists $self->{DEFAULT};
+ bless $self, $class;
+}
+
+sub add {
+ my $self = shift;
+ while (@_ > 0) {
+ my $n = shift;
+ my $val = shift;
+ $self->add1($n, $val);
+ }
+}
+
+sub create_dmap {
+ my $self = shift;
+ my $tblbits = $self->{BITS}->[-1];
+ my $default = $self->{DEFAULT};
+
+ my @tbl = ($default) x (1 << $tblbits);
+ \@tbl;
+}
+
+sub add_to_dmap {
+ my ($self, $map, $idx, $val) = @_;
+ $map->[$idx] = $val;
+}
+
+sub find_dmap {
+ my ($self, $ref, $r) = @_;
+ foreach my $map (@$ref) {
+ if (@$map == @$r) {
+ my $i;
+ for ($i = 0; $i < @$map; $i++) {
+ last if $map->[$i] != $r->[$i];
+ }
+ return $map if $i == @$map;
+ }
+ }
+ return undef;
+}
+
+sub cprog_dmap {
+ my $self = shift;
+ my %opt = @_;
+ my $name = $opt{NAME} || 'map';
+ my @maps = $self->collect_maps();
+ my @table = @{$maps[-1]};
+ my $prog;
+ my $i;
+ my ($idtype, $idcol, $idwid);
+ my $tblsize = 1 << $self->{BITS}->[-1];
+
+ my ($min, $max);
+ foreach my $a (@table) {
+ foreach my $v (@$a) {
+ $min = $v if !defined($min) or $min > $v;
+ $max = $v if !defined($max) or $max < $v;
+ }
+ }
+ if (exists $opt{MAPTYPE}) {
+ $idtype = $opt{MAPTYPE};
+ } else {
+ my $u = $min < 0 ? '' : 'unsigned ';
+ my $absmax = abs($max);
+ $absmax = abs($min) if abs($min) > $absmax;
+
+ if ($absmax < 256) {
+ $idtype = "${u}char";
+ } elsif ($absmax < 65536) {
+ $idtype = "${u}short";
+ } else {
+ $idtype = "${u}long";
+ }
+ }
+
+ $idwid = decimalwidth($max);
+ $idwid = decimalwidth($min) if decimalwidth($min) > $idwid;
+
+ $prog = <<"END";
+static const struct {
+ $idtype tbl[$tblsize];
+} ${name}_table[] = {
+END
+
+ foreach my $a (@table) {
+ my $i = 0;
+ my $col = 0;
+ $prog .= "\t{{\n\t";
+ foreach my $v (@$a) {
+ my $s = sprintf "%${idwid}d, ", $v;
+ $col += length($s);
+ if ($col > 70) {
+ $prog .= "\n\t";
+ $col = length($s);
+ }
+ $prog .= $s;
+ }
+ $prog .= "\n\t}},\n";
+ }
+ $prog .= "};\n";
+ $prog;
+}
+
+sub decimalwidth {
+ my $n = shift;
+ my $neg = 0;
+ my $w;
+
+ if ($n < 0) {
+ $neg = 1;
+ $n = -$n;
+ }
+ if ($n < 100) {
+ $w = 2;
+ } elsif ($n < 10000) {
+ $w = 4;
+ } elsif ($n < 1000000) {
+ $w = 6;
+ } elsif ($n < 100000000) {
+ $w = 8;
+ } else {
+ $w = 10;
+ }
+ $w + $neg;
+}
+
+1;
diff --git a/contrib/idn/idnkit-1.0-src/util/UCD.pm b/contrib/idn/idnkit-1.0-src/util/UCD.pm
new file mode 100644
index 0000000..19629e6
--- /dev/null
+++ b/contrib/idn/idnkit-1.0-src/util/UCD.pm
@@ -0,0 +1,194 @@
+# $Id: UCD.pm,v 1.1.1.1 2003/06/04 00:27:53 marka Exp $
+#
+# Copyright (c) 2000,2001 Japan Network Information Center.
+# All rights reserved.
+#
+# By using this file, you agree to the terms and conditions set forth bellow.
+#
+# LICENSE TERMS AND CONDITIONS
+#
+# The following License Terms and Conditions apply, unless a different
+# license is obtained from Japan Network Information Center ("JPNIC"),
+# a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
+# Chiyoda-ku, Tokyo 101-0047, Japan.
+#
+# 1. Use, Modification and Redistribution (including distribution of any
+# modified or derived work) in source and/or binary forms is permitted
+# under this License Terms and Conditions.
+#
+# 2. Redistribution of source code must retain the copyright notices as they
+# appear in each source code file, this License Terms and Conditions.
+#
+# 3. Redistribution in binary form must reproduce the Copyright Notice,
+# this License Terms and Conditions, in the documentation and/or other
+# materials provided with the distribution. For the purposes of binary
+# distribution the "Copyright Notice" refers to the following language:
+# "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
+#
+# 4. The name of JPNIC may not be used to endorse or promote products
+# derived from this Software without specific prior written approval of
+# JPNIC.
+#
+# 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+# ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+
+package UCD;
+
+#
+# UCD.pm -- parser for Unicode Character Database files.
+#
+# This file is an aggregation of the following modules, each of which
+# provides a parser for a specific data file of UCD.
+# UCD::UnicodeData -- for UnicodeData.txt
+# UCD::CaseFolding -- for CaseFolding.txt
+# UCD::SpecialCasing -- for SpecialCasing.txt
+# UCD::CompositionExclusions -- for CompositionExclusions-1.txt
+#
+# Each module provides two subroutines:
+#
+# $line = getline(\*HANDLE);
+# reads next non-comment line from HANDLE, and returns it.
+# undef will be returned upon EOF.
+#
+# %fields = parse($line);
+# parses a line and extract fields, and returns a list of
+# field name and its value, suitable for assignment to a hash.
+#
+
+package UCD::UnicodeData;
+
+use strict;
+use Carp;
+
+sub getline {
+ my $fh = shift;
+ my $s = <$fh>;
+ $s =~ s/\r?\n$// if $s;
+ $s;
+}
+
+sub parseline {
+ my $s = shift;
+
+ my @f = split /;/, $s, -1;
+ return (CODE => hex($f[0]),
+ NAME => $f[1],
+ CATEGORY => $f[2],
+ CLASS => $f[3]+0,
+ BIDI => $f[4],
+ DECOMP => dcmap($f[5]),
+ DECIMAL => dvalue($f[6]),
+ DIGIT => dvalue($f[7]),
+ NUMERIC => dvalue($f[8]),
+ MIRRORED => $f[9] eq 'Y',
+ NAME10 => $f[10],
+ COMMENT => $f[11],
+ UPPER => ucode($f[12]),
+ LOWER => ucode($f[13]),
+ TITLE => ucode($f[14]));
+}
+
+sub dcmap {
+ my $v = shift;
+ return undef if $v eq '';
+ $v =~ /^(?:(<[^>]+>)\s*)?(\S.*)/
+ or croak "invalid decomposition mapping \"$v\"";
+ my $tag = $1 || '';
+ [$tag, map {hex($_)} split(' ', $2)];
+}
+
+sub ucode {
+ my $v = shift;
+ return undef if $v eq '';
+ hex($v);
+}
+
+sub dvalue {
+ my $v = shift;
+ return undef if $v eq '';
+ $v;
+}
+
+package UCD::CaseFolding;
+
+use strict;
+
+sub getline {
+ my $fh = shift;
+ while (defined(my $s = <$fh>)) {
+ next if $s =~ /^\#/;
+ next if $s =~ /^\s*$/;
+ $s =~ s/\r?\n$//;
+ return $s;
+ }
+ undef;
+}
+
+sub parseline {
+ my $s = shift;
+ my @f = split /;\s*/, $s, -1;
+ return (CODE => hex($f[0]),
+ TYPE => $f[1],
+ MAP => [map(hex, split ' ', $f[2])],
+ );
+}
+
+package UCD::SpecialCasing;
+
+use strict;
+
+sub getline {
+ my $fh = shift;
+ while (defined(my $s = <$fh>)) {
+ next if $s =~ /^\#/;
+ next if $s =~ /^\s*$/;
+ $s =~ s/\r?\n$//;
+ return $s;
+ }
+ undef;
+}
+
+sub parseline {
+ my $s = shift;
+
+ my @f = split /;\s*/, $s, -1;
+ my $cond = (@f > 5) ? $f[4] : undef;
+ return (CODE => hex($f[0]),
+ LOWER => [map(hex, split ' ', $f[1])],
+ TITLE => [map(hex, split ' ', $f[2])],
+ UPPER => [map(hex, split ' ', $f[3])],
+ CONDITION => $cond);
+}
+
+package UCD::CompositionExclusions;
+
+use strict;
+
+sub getline {
+ my $fh = shift;
+ while (defined(my $s = <$fh>)) {
+ next if $s =~ /^\#/;
+ next if $s =~ /^\s*$/;
+ $s =~ s/\r?\n$//;
+ return $s;
+ }
+ undef;
+}
+
+sub parseline {
+ my $s = shift;
+ m/^[0-9A-Fa-f]+/;
+ return (CODE => hex($&));
+}
+
+1;
diff --git a/contrib/idn/idnkit-1.0-src/util/generate_nameprep_data.pl b/contrib/idn/idnkit-1.0-src/util/generate_nameprep_data.pl
new file mode 100755
index 0000000..31dd18b
--- /dev/null
+++ b/contrib/idn/idnkit-1.0-src/util/generate_nameprep_data.pl
@@ -0,0 +1,405 @@
+#! /usr/local/bin/perl -w
+# $Id: generate_nameprep_data.pl,v 1.1.1.1 2003/06/04 00:27:54 marka Exp $
+#
+# Copyright (c) 2001 Japan Network Information Center. All rights reserved.
+#
+# By using this file, you agree to the terms and conditions set forth bellow.
+#
+# LICENSE TERMS AND CONDITIONS
+#
+# The following License Terms and Conditions apply, unless a different
+# license is obtained from Japan Network Information Center ("JPNIC"),
+# a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
+# Chiyoda-ku, Tokyo 101-0047, Japan.
+#
+# 1. Use, Modification and Redistribution (including distribution of any
+# modified or derived work) in source and/or binary forms is permitted
+# under this License Terms and Conditions.
+#
+# 2. Redistribution of source code must retain the copyright notices as they
+# appear in each source code file, this License Terms and Conditions.
+#
+# 3. Redistribution in binary form must reproduce the Copyright Notice,
+# this License Terms and Conditions, in the documentation and/or other
+# materials provided with the distribution. For the purposes of binary
+# distribution the "Copyright Notice" refers to the following language:
+# "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
+#
+# 4. The name of JPNIC may not be used to endorse or promote products
+# derived from this Software without specific prior written approval of
+# JPNIC.
+#
+# 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+# ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+
+use v5.6.0; # for pack('U')
+use bytes;
+
+use lib qw(.);
+
+use SparseMap;
+use Getopt::Long;
+
+(my $myid = '$Id: generate_nameprep_data.pl,v 1.1.1.1 2003/06/04 00:27:54 marka Exp $') =~ s/\$([^\$]+)\$/\$-$1-\$/;
+
+my @map_bits = (9, 7, 5);
+my @proh_bits = (7, 7, 7);
+my @unas_bits = (7, 7, 7);
+my @bidi_bits = (9, 7, 5);
+
+my @bidi_types = ('OTHERS', 'R_AL', 'L');
+
+my $dir = '.';
+my @versions = ();
+
+GetOptions('dir=s', \$dir) or die usage();
+@versions = @ARGV;
+
+print_header();
+
+bits_definition("MAP", @map_bits);
+bits_definition("PROH", @proh_bits);
+bits_definition("UNAS", @unas_bits);
+bits_definition("BIDI", @bidi_bits);
+
+generate_data($_) foreach @ARGV;
+
+sub usage {
+ die "Usage: $0 [-dir dir] version..\n";
+}
+
+sub generate_data {
+ my $version = shift;
+ generate_mapdata($version, "$dir/nameprep.$version.map");
+ generate_prohibiteddata($version, "$dir/nameprep.$version.prohibited");
+ generate_unassigneddata($version, "$dir/nameprep.$version.unassigned");
+ generate_bididata($version, "$dir/nameprep.$version.bidi");
+}
+
+#
+# Generate mapping data.
+#
+sub generate_mapdata {
+ my $version = shift;
+ my $file = shift;
+
+ my $map = SparseMap::Int->new(BITS => [@map_bits],
+ MAX => 0x110000,
+ MAPALL => 1,
+ DEFAULT => 0);
+ open FILE, $file or die "cannot open $file: $!\n";
+
+ my $mapbuf = "\0"; # dummy
+ my %maphash = ();
+ while (<FILE>) {
+ if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) {
+ my $same_as = $1;
+ if (grep {$_ eq $same_as} @versions > 0) {
+ generate_map_ref($version, $same_as);
+ close FILE;
+ return;
+ }
+ next;
+ }
+ next if /^\#/;
+ next if /^\s*$/;
+ register_map($map, \$mapbuf, \%maphash, $_);
+ }
+ close FILE;
+ generate_map($version, $map, \$mapbuf);
+}
+
+#
+# Generate prohibited character data.
+#
+sub generate_prohibiteddata {
+ my $version = shift;
+ my $file = shift;
+
+ my $proh = SparseMap::Bit->new(BITS => [@proh_bits],
+ MAX => 0x110000);
+ open FILE, $file or die "cannot open $file: $!\n";
+ while (<FILE>) {
+ if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) {
+ my $same_as = $1;
+ if (grep {$_ eq $same_as} @versions > 0) {
+ generate_prohibited_ref($version, $same_as);
+ close FILE;
+ return;
+ }
+ next;
+ }
+ next if /^\#/;
+ next if /^\s*$/;
+ register_prohibited($proh, $_);
+ }
+ close FILE;
+ generate_prohibited($version, $proh);
+}
+
+#
+# Generate unassigned codepoint data.
+#
+sub generate_unassigneddata {
+ my $version = shift;
+ my $file = shift;
+
+ my $unas = SparseMap::Bit->new(BITS => [@unas_bits],
+ MAX => 0x110000);
+ open FILE, $file or die "cannot open $file: $!\n";
+ while (<FILE>) {
+ if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) {
+ my $same_as = $1;
+ if (grep {$_ eq $same_as} @versions > 0) {
+ generate_unassigned_ref($version, $same_as);
+ close FILE;
+ return;
+ }
+ next;
+ }
+ next if /^\#/;
+ next if /^\s*$/;
+ register_unassigned($unas, $_);
+ }
+ close FILE;
+ generate_unassigned($version, $unas);
+}
+
+#
+# Generate data of bidi "R" or "AL" characters.
+#
+sub generate_bididata {
+ my $version = shift;
+ my $file = shift;
+
+ my $bidi = SparseMap::Int->new(BITS => [@bidi_bits],
+ MAX => 0x110000);
+ open FILE, $file or die "cannot open $file: $!\n";
+
+ my $type = 0;
+ while (<FILE>) {
+ if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) {
+ my $same_as = $1;
+ if (grep {$_ eq $same_as} @versions > 0) {
+ generate_unassigned_ref($version, $same_as);
+ close FILE;
+ return;
+ }
+ next;
+ }
+ if (/^%\s*BIDI_TYPE\s+(\S+)$/) {
+ my $i = 0;
+ for ($i = 0; $i < @bidi_types; $i++) {
+ if ($1 eq $bidi_types[$i]) {
+ $type = $i;
+ last;
+ }
+ }
+ die "unrecognized line: $_" if ($i >= @bidi_types);
+ next;
+ }
+ next if /^\#/;
+ next if /^\s*$/;
+ register_bidi($bidi, $type, $_);
+ }
+ close FILE;
+
+ generate_bidi($version, $bidi);
+}
+
+sub print_header {
+ print <<"END";
+/* \$Id\$ */
+/* $myid */
+/*
+ * Do not edit this file!
+ * This file is generated from NAMEPREP specification.
+ */
+
+END
+}
+
+sub bits_definition {
+ my $name = shift;
+ my @bits = @_;
+ my $i = 0;
+
+ foreach my $n (@bits) {
+ print "#define ${name}_BITS_$i\t$n\n";
+ $i++;
+ }
+ print "\n";
+}
+
+sub register_map {
+ my ($map, $bufref, $hashref, $line) = @_;
+
+ my ($from, $to) = split /;/, $line;
+ my @fcode = map {hex($_)} split ' ', $from;
+ my @tcode = map {hex($_)} split ' ', $to;
+
+ my $ucs4 = pack('V*', @tcode);
+ $ucs4 =~ s/\000+$//;
+
+ my $offset;
+ if (exists $hashref->{$ucs4}) {
+ $offset = $hashref->{$ucs4};
+ } else {
+ $offset = length $$bufref;
+ $$bufref .= pack('C', length($ucs4)) . $ucs4;
+ $hashref->{$ucs4} = $offset;
+ }
+
+ die "unrecognized line: $line" if @fcode != 1;
+ $map->add($fcode[0], $offset);
+}
+
+sub generate_map {
+ my ($version, $map, $bufref) = @_;
+
+ $map->fix();
+
+ print $map->cprog(NAME => "nameprep_${version}_map");
+ print "\nstatic const unsigned char nameprep_${version}_map_data[] = \{\n";
+ print_uchararray($$bufref);
+ print "};\n\n";
+}
+
+sub generate_map_ref {
+ my ($version, $refversion) = @_;
+ print <<"END";
+#define nameprep_${version}_map_imap nameprep_${refversion}_map_imap
+#define nameprep_${version}_map_table nameprep_${refversion}_map_table
+#define nameprep_${version}_map_data nameprep_${refversion}_map_data
+
+END
+}
+
+sub print_uchararray {
+ my @chars = unpack 'C*', $_[0];
+ my $i = 0;
+ foreach my $v (@chars) {
+ if ($i % 12 == 0) {
+ print "\n" if $i != 0;
+ print "\t";
+ }
+ printf "%3d, ", $v;
+ $i++;
+ }
+ print "\n";
+}
+
+sub register_prohibited {
+ my $proh = shift;
+ register_bitmap($proh, @_);
+}
+
+sub register_unassigned {
+ my $unas = shift;
+ register_bitmap($unas, @_);
+}
+
+sub register_bidi {
+ my $bidi = shift;
+ my $type = shift;
+ register_intmap($bidi, $type, @_);
+}
+
+sub generate_prohibited {
+ my ($version, $proh) = @_;
+ generate_bitmap($proh, "nameprep_${version}_prohibited");
+ print "\n";
+}
+
+sub generate_prohibited_ref {
+ my ($version, $refversion) = @_;
+ print <<"END";
+#define nameprep_${version}_prohibited_imap nameprep_${refversion}_prohibited_imap
+#define nameprep_${version}_prohibited_bitmap nameprep_${refversion}_prohibited_bitmap
+
+END
+}
+
+sub generate_unassigned {
+ my ($version, $unas) = @_;
+ generate_bitmap($unas, "nameprep_${version}_unassigned");
+ print "\n";
+}
+
+sub generate_unassigned_ref {
+ my ($version, $refversion) = @_;
+ print <<"END";
+#define nameprep_${version}_unassigned_imap nameprep_${refversion}_unassigned_imap
+#define nameprep_${version}_unassigned_bitmap nameprep_${refversion}_unassigned_bitmap
+
+END
+}
+
+sub generate_bidi {
+ my ($version, $bidi) = @_;
+
+ $bidi->fix();
+
+ print $bidi->cprog(NAME => "nameprep_${version}_bidi");
+ print "\n";
+ print "static const unsigned char nameprep_${version}_bidi_data[] = \{\n";
+
+ foreach my $type (@bidi_types) {
+ printf "\tidn_biditype_%s, \n", lc($type);
+ }
+ print "};\n\n";
+}
+
+sub generate_bidi_ref {
+ my ($version, $refversion) = @_;
+ print <<"END";
+#define nameprep_${version}_bidi_imap nameprep_${refversion}_bidi_imap
+#define nameprep_${version}_bidi_table nameprep_${refversion}_bidi_table
+
+END
+}
+
+sub register_bitmap {
+ my $map = shift;
+ my $line = shift;
+
+ /^([0-9A-Fa-f]+)(?:-([0-9A-Fa-f]+))?/ or die "unrecognized line: $line";
+ my $start = hex($1);
+ my $end = defined($2) ? hex($2) : undef;
+ if (defined $end) {
+ $map->add($start .. $end);
+ } else {
+ $map->add($start);
+ }
+}
+
+sub register_intmap {
+ my $map = shift;
+ my $value = shift;
+ my $line = shift;
+
+ /^([0-9A-Fa-f]+)(?:-([0-9A-Fa-f]+))?/ or die "unrecognized line: $line";
+ my $start = hex($1);
+ my $end = defined($2) ? hex($2) : $start;
+ for (my $i = $start; $i <= $end; $i++) {
+ $map->add($i, $value);
+ }
+}
+
+sub generate_bitmap {
+ my $map = shift;
+ my $name = shift;
+ $map->fix();
+ #$map->stat();
+ print $map->cprog(NAME => $name);
+}
diff --git a/contrib/idn/idnkit-1.0-src/util/generate_normalize_data.pl b/contrib/idn/idnkit-1.0-src/util/generate_normalize_data.pl
new file mode 100755
index 0000000..fe81648
--- /dev/null
+++ b/contrib/idn/idnkit-1.0-src/util/generate_normalize_data.pl
@@ -0,0 +1,586 @@
+#! /usr/local/bin/perl -w
+# $Id: generate_normalize_data.pl,v 1.1.1.1 2003/06/04 00:27:55 marka Exp $
+#
+# Copyright (c) 2000,2001 Japan Network Information Center.
+# All rights reserved.
+#
+# By using this file, you agree to the terms and conditions set forth bellow.
+#
+# LICENSE TERMS AND CONDITIONS
+#
+# The following License Terms and Conditions apply, unless a different
+# license is obtained from Japan Network Information Center ("JPNIC"),
+# a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
+# Chiyoda-ku, Tokyo 101-0047, Japan.
+#
+# 1. Use, Modification and Redistribution (including distribution of any
+# modified or derived work) in source and/or binary forms is permitted
+# under this License Terms and Conditions.
+#
+# 2. Redistribution of source code must retain the copyright notices as they
+# appear in each source code file, this License Terms and Conditions.
+#
+# 3. Redistribution in binary form must reproduce the Copyright Notice,
+# this License Terms and Conditions, in the documentation and/or other
+# materials provided with the distribution. For the purposes of binary
+# distribution the "Copyright Notice" refers to the following language:
+# "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
+#
+# 4. The name of JPNIC may not be used to endorse or promote products
+# derived from this Software without specific prior written approval of
+# JPNIC.
+#
+# 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+# ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+
+#
+# Generate lib/unicodedata.c from UnicodeData.txt,
+# CompositionExclusions-1.txt, SpecialCasing.txt and CaseFolding.txt,
+# all of them available from ftp://ftp.unicode.org/Public/UNIDATA/.
+#
+
+use strict;
+use lib qw(.);
+
+use Getopt::Long;
+use UCD;
+use SparseMap;
+
+use constant UCS_MAX => 0x110000;
+use constant END_BIT => 0x80000000;
+
+my $DECOMP_COMPAT_BIT = 0x8000;
+
+my $CASEMAP_FINAL_BIT = 0x1;
+my $CASEMAP_NONFINAL_BIT = 0x2;
+my $CASEMAP_LAST_BIT = 0x10;
+
+my $LETTER_BIT = 1;
+my $NSPMARK_BIT = 2;
+
+(my $myid = '$Id: generate_normalize_data.pl,v 1.1.1.1 2003/06/04 00:27:55 marka Exp $') =~ s/\$([^\$]+)\$/\$-$1-\$/;
+
+my @default_bits = (9, 7, 5);
+#my @default_bits = (7, 7, 7);
+my @canon_class_bits = @default_bits;
+my @decomp_bits = @default_bits;
+my @comp_bits = @default_bits;
+my @folding_bits = @default_bits;
+my @casemap_bits = @default_bits;
+my @casemap_ctx_bits = @default_bits;
+
+my $prefix = '';
+my $dir = '.';
+my $unicodedatafile = 'UnicodeData.txt';
+my $exclusionfile = 'CompositionExclusions.txt';
+my $specialcasefile = 'SpecialCasing.txt';
+my $casefoldingfile = 'CaseFolding.txt';
+my $verbose;
+
+GetOptions('dir|d=s' => \$dir,
+ 'unicodedata|u=s' => \$unicodedatafile,
+ 'exclude|e=s' => \$exclusionfile,
+ 'specialcase|s=s' => \$specialcasefile,
+ 'casefold|c=s' => \$casefoldingfile,
+ 'prefix|p=s' => \$prefix,
+ 'verbose|v' => \$verbose,
+) or usage();
+
+foreach my $r (\$unicodedatafile, \$exclusionfile,
+ \$specialcasefile, \$casefoldingfile) {
+ $$r = "$dir/$$r" unless $$r =~ m|^/|;
+}
+
+my %exclusions;
+my %lower_special;
+my %upper_special;
+
+my @decomp_data;
+my @comp_data;
+my @toupper_data;
+my @tolower_data;
+my @folding_data;
+
+#
+# Create Mapping/Bitmap objects.
+#
+
+# canonical class
+my $canon_class = SparseMap::Int->new(BITS => [@canon_class_bits],
+ MAX => UCS_MAX,
+ MAPALL => 1,
+ DEFAULT => 0);
+
+# canonical/compatibility decomposition
+my $decomp = SparseMap::Int->new(BITS => [@decomp_bits],
+ MAX => UCS_MAX,
+ MAPALL => 1,
+ DEFAULT => 0);
+
+# canonical composition
+my $comp = SparseMap::Int->new(BITS => [@comp_bits],
+ MAX => UCS_MAX,
+ MAPALL => 1,
+ DEFAULT => 0);
+
+# uppercase/lowercase
+my $upper = SparseMap::Int->new(BITS => [@casemap_bits],
+ MAX => UCS_MAX,
+ MAPALL => 1,
+ DEFAULT => 0);
+my $lower = SparseMap::Int->new(BITS => [@casemap_bits],
+ MAX => UCS_MAX,
+ MAPALL => 1,
+ DEFAULT => 0);
+
+# final/nonfinal context
+my $casemap_ctx = SparseMap::Int->new(BITS => [@casemap_ctx_bits],
+ MAX => UCS_MAX,
+ MAPALL => 1,
+ DEFAULT => 0);
+
+# casefolding
+my $folding = SparseMap::Int->new(BITS => [@folding_bits],
+ MAX => UCS_MAX,
+ MAPALL => 1,
+ DEFAULT => 0);
+
+#
+# Read datafiles.
+#
+
+read_exclusion_file();
+read_specialcasing_file();
+read_unicodedata_file();
+read_casefolding_file();
+
+print_header();
+print_canon_class();
+print_composition();
+print_decomposition();
+print_casemap();
+print_casemap_context();
+print_casefolding();
+
+exit;
+
+sub usage {
+ print STDERR <<"END";
+Usage: $0 [options..]
+ options:
+ -d DIR directory where Unicode Character Data files resides [./]
+ -u FILE name of the UnicodeData file [UnicodeData.txt]
+ -e FILE name of the CompositionExclusion file [CompositionExclusions-1.txt]
+ -s FILE name of the SpecialCasing file [SpecialCasing.txt]
+ -c FILE name of the CaseFolding file [CaseFolding.txt]
+END
+ exit 1;
+}
+
+#
+# read_exclusion_file -- read CompositionExclusions-1.txt.
+#
+sub read_exclusion_file {
+ open EXCLUDE, $exclusionfile or die "cannot open $exclusionfile: $!\n";
+ while ($_ = UCD::CompositionExclusions::getline(\*EXCLUDE)) {
+ my %data = UCD::CompositionExclusions::parseline($_);
+ $exclusions{$data{CODE}} = 1;
+ }
+ close EXCLUDE;
+}
+
+#
+# read_specialcasing_file -- read SpecialCasing.txt
+#
+sub read_specialcasing_file {
+ open SPCASE, $specialcasefile or die "cannot open $specialcasefile: $!\n";
+ while ($_ = UCD::SpecialCasing::getline(\*SPCASE)) {
+ my %data = UCD::SpecialCasing::parseline($_);
+ my $code = $data{CODE};
+ my $lower = $data{LOWER};
+ my $upper = $data{UPPER};
+ my $cond = $data{CONDITION} || '';
+
+ next unless $cond eq '' or $cond =~ /^(NON_)?FINAL/;
+
+ if (defined $cond && (@$lower > 1 || $lower->[0] != $code)
+ or @$lower > 1 or $lower->[0] != $code) {
+ $lower_special{$code} = [$lower, $cond];
+ }
+ if (defined $cond && (@$upper > 1 || $upper->[0] != $code)
+ or @$upper > 1 or $upper->[0] != $code) {
+ $upper_special{$code} = [$upper, $cond];
+ }
+ }
+ close SPCASE;
+}
+
+#
+# read_unicodedata_file -- read UnicodeData.txt
+#
+sub read_unicodedata_file {
+ open UCD, $unicodedatafile or die "cannot open $unicodedatafile: $!\n";
+
+ @decomp_data = (0);
+ @toupper_data = (0);
+ @tolower_data = (0);
+
+ my @comp_cand; # canonical composition candidates
+ my %nonstarter;
+
+ while ($_ = UCD::UnicodeData::getline(\*UCD)) {
+ my %data = UCD::UnicodeData::parseline($_);
+ my $code = $data{CODE};
+
+ # combining class
+ if ($data{CLASS} > 0) {
+ $nonstarter{$code} = 1;
+ $canon_class->add($code, $data{CLASS});
+ }
+
+ # uppercasing
+ if (exists $upper_special{$code} or defined $data{UPPER}) {
+ my $offset = @toupper_data;
+ my @casedata;
+
+ $upper->add($code, $offset);
+ if (exists $upper_special{$code}) {
+ push @casedata, $upper_special{$code};
+ }
+ if (defined $data{UPPER}) {
+ push @casedata, $data{UPPER};
+ }
+ push @toupper_data, casemap_data(@casedata);
+ }
+
+ # lowercasing
+ if (exists $lower_special{$code} or defined $data{LOWER}) {
+ my $offset = @tolower_data;
+ my @casedata;
+
+ $lower->add($code, $offset);
+ if (exists $lower_special{$code}) {
+ push @casedata, $lower_special{$code};
+ }
+ if (defined $data{LOWER}) {
+ push @casedata, $data{LOWER};
+ }
+ push @tolower_data, casemap_data(@casedata);
+ }
+
+ # composition/decomposition
+ if ($data{DECOMP}) {
+ my ($tag, @decomp) = @{$data{DECOMP}};
+ my $offset = @decomp_data;
+
+ # composition
+ if ($tag eq '' and @decomp > 1 and not exists $exclusions{$code}) {
+ # canonical composition candidate
+ push @comp_cand, [$code, @decomp];
+ }
+
+ # decomposition
+ if ($tag ne '') {
+ # compatibility decomposition
+ $offset |= $DECOMP_COMPAT_BIT;
+ }
+ $decomp->add($code, $offset);
+ push @decomp_data, @decomp;
+ $decomp_data[-1] |= END_BIT;
+
+ }
+
+ # final/nonfinal context
+ if ($data{CATEGORY} =~ /L[ult]/) {
+ $casemap_ctx->add($code, $LETTER_BIT);
+ } elsif ($data{CATEGORY} eq 'Mn') {
+ $casemap_ctx->add($code, $NSPMARK_BIT);
+ }
+ }
+ close UCD;
+
+ # Eliminate composition candidates whose decomposition starts with
+ # a non-starter.
+ @comp_cand = grep {not exists $nonstarter{$_->[1]}} @comp_cand;
+
+ @comp_data = ([0, 0, 0]);
+ my $last_code = -1;
+ my $last_offset = @comp_data;
+ for my $r (sort {$a->[1] <=> $b->[1] || $a->[2] <=> $b->[2]} @comp_cand) {
+ if ($r->[1] != $last_code) {
+ $comp->add($last_code,
+ ($last_offset | ((@comp_data - $last_offset)<<16)))
+ unless $last_code == -1;
+ $last_code = $r->[1];
+ $last_offset = @comp_data;
+ }
+ push @comp_data, $r;
+ }
+ $comp->add($last_code,
+ ($last_offset | ((@comp_data - $last_offset)<<16)));
+}
+
+sub casemap_data {
+ my @data = @_;
+ my @result = ();
+ while (@data > 0) {
+ my $r = shift @data;
+ my $flag = 0;
+ if (ref $r) {
+ if ($r->[1] eq 'FINAL') {
+ $flag |= $CASEMAP_FINAL_BIT;
+ } elsif ($r->[1] eq 'NON_FINAL') {
+ $flag |= $CASEMAP_NONFINAL_BIT;
+ } elsif ($r->[1] ne '') {
+ die "unknown condition \"", $r->[1], "\"\n";
+ }
+ }
+ $flag |= $CASEMAP_LAST_BIT if @data == 0;
+ push @result, $flag;
+ push @result, (ref $r) ? @{$r->[0]} : $r;
+ $result[-1] |= END_BIT;
+ }
+ @result;
+}
+
+#
+# read_casefolding_file -- read CaseFolding.txt
+#
+sub read_casefolding_file {
+ open FOLD, $casefoldingfile or die "cannto open $casefoldingfile: $!\n";
+
+ # dummy.
+ @folding_data = (0);
+
+ while ($_ = UCD::CaseFolding::getline(\*FOLD)) {
+ my %data = UCD::CaseFolding::parseline($_);
+
+ $folding->add($data{CODE}, scalar(@folding_data));
+ push @folding_data, @{$data{MAP}};
+ $folding_data[-1] |= END_BIT;
+ }
+ close FOLD;
+}
+
+sub print_header {
+ print <<"END";
+/* \$Id\$ */
+/* $myid */
+/*
+ * Do not edit this file!
+ * This file is generated from UnicodeData.txt, CompositionExclusions-1.txt,
+ * SpecialCasing.txt and CaseFolding.txt.
+ */
+
+END
+}
+
+#
+# print_canon_class -- generate data for canonical class
+#
+sub print_canon_class {
+ $canon_class->fix();
+ print STDERR "** cannon_class\n", $canon_class->stat() if $verbose;
+
+ print <<"END";
+
+/*
+ * Canonical Class
+ */
+
+END
+ print_bits("CANON_CLASS", @canon_class_bits);
+ print "\n";
+ print $canon_class->cprog(NAME => "${prefix}canon_class");
+}
+
+#
+# print_composition -- generate data for canonical composition
+#
+sub print_composition {
+ $comp->fix();
+ print STDERR "** composition\n", $comp->stat() if $verbose;
+
+ print <<"END";
+
+/*
+ * Canonical Composition
+ */
+
+END
+ print_bits("CANON_COMPOSE", @comp_bits);
+ print "\n";
+ print $comp->cprog(NAME => "${prefix}compose");
+ print <<"END";
+
+static const struct composition ${prefix}compose_seq[] = {
+END
+ my $i = 0;
+ foreach my $r (@comp_data) {
+ if ($i % 2 == 0) {
+ print "\n" if $i != 0;
+ print "\t";
+ }
+ printf "{ 0x%08x, 0x%08x }, ", $r->[2], $r->[0];
+ $i++;
+ }
+ print "\n};\n\n";
+}
+
+#
+# print_decomposition -- generate data for canonical/compatibility
+# decomposition
+#
+sub print_decomposition {
+ $decomp->fix();
+ print STDERR "** decomposition\n", $decomp->stat() if $verbose;
+
+ print <<"END";
+
+/*
+ * Canonical/Compatibility Decomposition
+ */
+
+END
+ print_bits("DECOMP", @decomp_bits);
+ print "#define DECOMP_COMPAT\t$DECOMP_COMPAT_BIT\n\n";
+
+ print $decomp->cprog(NAME => "${prefix}decompose");
+
+ print "static const unsigned long ${prefix}decompose_seq[] = {\n";
+ print_ulseq(@decomp_data);
+ print "};\n\n";
+}
+
+#
+# print_casemap -- generate data for case mapping
+#
+sub print_casemap {
+ $upper->fix();
+ $lower->fix();
+ print STDERR "** upper mapping\n", $upper->stat() if $verbose;
+ print STDERR "** lower mapping\n", $lower->stat() if $verbose;
+
+ print <<"END";
+
+/*
+ * Lowercase <-> Uppercase mapping
+ */
+
+/*
+ * Flags for special case mapping.
+ */
+#define CMF_FINAL $CASEMAP_FINAL_BIT
+#define CMF_NONFINAL $CASEMAP_NONFINAL_BIT
+#define CMF_LAST $CASEMAP_LAST_BIT
+#define CMF_CTXDEP (CMF_FINAL|CMF_NONFINAL)
+
+END
+ print_bits("CASEMAP", @casemap_bits);
+ print "\n";
+ print $upper->cprog(NAME => "${prefix}toupper");
+ print $lower->cprog(NAME => "${prefix}tolower");
+
+ print "static const unsigned long ${prefix}toupper_seq[] = {\n";
+ print_ulseq(@toupper_data);
+ print "};\n\n";
+
+ print "static const unsigned long ${prefix}tolower_seq[] = {\n";
+ print_ulseq(@tolower_data);
+ print "};\n\n";
+}
+
+#
+# print_casefolding -- generate data for case folding
+#
+sub print_casefolding {
+ $folding->fix();
+ print STDERR "** case folding\n", $folding->stat() if $verbose;
+
+ print <<"END";
+
+/*
+ * Case Folding
+ */
+
+END
+ print_bits("CASE_FOLDING", @folding_bits);
+ print "\n";
+ print $folding->cprog(NAME => "${prefix}case_folding");
+
+ print "static const unsigned long ${prefix}case_folding_seq[] = {\n";
+ print_ulseq(@folding_data);
+ print "};\n\n";
+}
+
+#
+# print_casemap_context -- gerarate data for determining context
+# (final/non-final)
+#
+sub print_casemap_context {
+ $casemap_ctx->fix();
+ print STDERR "** casemap context\n", $casemap_ctx->stat() if $verbose;
+
+ print <<"END";
+
+/*
+ * Cased characters and non-spacing marks (for casemap context)
+ */
+
+END
+
+ print_bits("CASEMAP_CTX", @casemap_ctx_bits);
+ print <<"END";
+
+#define CTX_CASED $LETTER_BIT
+#define CTX_NSM $NSPMARK_BIT
+
+END
+ print $casemap_ctx->cprog(NAME => "${prefix}casemap_ctx");
+}
+
+sub sprint_composition_hash {
+ my $i = 0;
+ my $s = '';
+ foreach my $r (@_) {
+ if ($i % 2 == 0) {
+ $s .= "\n" if $i != 0;
+ $s .= "\t";
+ }
+ $s .= sprintf "{0x%04x, 0x%04x, 0x%04x}, ", @{$r};
+ $i++;
+ }
+ $s;
+}
+
+sub print_bits {
+ my $prefix = shift;
+ my $i = 0;
+ foreach my $bit (@_) {
+ print "#define ${prefix}_BITS_$i\t$bit\n";
+ $i++;
+ }
+}
+
+sub print_ulseq {
+ my $i = 0;
+ foreach my $v (@_) {
+ if ($i % 4 == 0) {
+ print "\n" if $i != 0;
+ print "\t";
+ }
+ printf "0x%08x, ", $v;
+ $i++;
+ }
+ print "\n";
+}