5 files changed, 1803 insertions, 0 deletions
diff --git a/contrib/idn/idnkit-1.0-src/util/Makefile b/contrib/idn/idnkit-1.0-src/util/Makefile
new file mode 100644
index 0000000..2f49ddd
--- /dev/null
+++ b/contrib/idn/idnkit-1.0-src/util/Makefile
@@ -0,0 +1,43 @@
+# $Id: Makefile,v 1.1.1.1 2003/06/04 00:27:52 marka Exp $
+
+PERL = /usr/local/bin/perl -w
+
+V301_NORM_DIR = data/unicode-3.0.0
+V310_NORM_DIR = data/unicode-3.1.0
+V320_NORM_DIR = data/unicode-3.2.0
+NORM_GENERATOR = ./generate_normalize_data.pl
+
+NAMEPREP_DIR  = data/nameprep
+NAMEPREP_FILES = \
+	$(NAMEPREP_DIR)/nameprep.rfc3491.map \
+	$(NAMEPREP_DIR)/nameprep.rfc3491.prohibited \
+	$(NAMEPREP_DIR)/nameprep.rfc3491.unassigned \
+	$(NAMEPREP_DIR)/nameprep.rfc3491.bidi
+NAMEPREP_GENERATOR = ./generate_nameprep_data.pl
+NAMEPREP_VERSIONS = rfc3491
+
+all: normalize nameprep
+
+normalize: ../lib/unicodedata_320.c
+
+nameprep: ../lib/nameprepdata.c
+
+../lib/unicodedata_301.c: $(NORM_GENERATOR)
+	-rm -f $@.tmp
+	$(PERL) $(NORM_GENERATOR) -prefix v301_ -dir $(V301_NORM_DIR) \
+		> $@.tmp && mv $@.tmp $@
+
+../lib/unicodedata_310.c: $(NORM_GENERATOR)
+	-rm -f $@.tmp
+	$(PERL) $(NORM_GENERATOR) -prefix v310_ -dir $(V310_NORM_DIR) \
+		> $@.tmp && mv $@.tmp $@
+
+../lib/unicodedata_320.c: $(NORM_GENERATOR)
+	-rm -f $@.tmp
+	$(PERL) $(NORM_GENERATOR) -prefix v320_ -dir $(V320_NORM_DIR) \
+		> $@.tmp && mv $@.tmp $@
+
+../lib/nameprepdata.c: $(NAMEPREP_GENERATOR) $(NAMEPREP_FILES)
+	-rm -f $@.tmp
+	$(PERL) $(NAMEPREP_GENERATOR) -dir $(NAMEPREP_DIR) \
+		$(NAMEPREP_VERSIONS) > $@.tmp && mv $@.tmp $@
diff --git a/contrib/idn/idnkit-1.0-src/util/SparseMap.pm b/contrib/idn/idnkit-1.0-src/util/SparseMap.pm
new file mode 100644
index 0000000..834c795
--- /dev/null
+++ b/contrib/idn/idnkit-1.0-src/util/SparseMap.pm
@@ -0,0 +1,575 @@
+# $Id: SparseMap.pm,v 1.1.1.1 2003/06/04 00:27:53 marka Exp $
+#
+# Copyright (c) 2001 Japan Network Information Center.  All rights reserved.
+#
+# By using this file, you agree to the terms and conditions set forth bellow.
+# 
+# 			LICENSE TERMS AND CONDITIONS 
+# 
+# The following License Terms and Conditions apply, unless a different
+# license is obtained from Japan Network Information Center ("JPNIC"),
+# a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
+# Chiyoda-ku, Tokyo 101-0047, Japan.
+# 
+# 1. Use, Modification and Redistribution (including distribution of any
+#    modified or derived work) in source and/or binary forms is permitted
+#    under this License Terms and Conditions.
+# 
+# 2. Redistribution of source code must retain the copyright notices as they
+#    appear in each source code file, this License Terms and Conditions.
+# 
+# 3. Redistribution in binary form must reproduce the Copyright Notice,
+#    this License Terms and Conditions, in the documentation and/or other
+#    materials provided with the distribution.  For the purposes of binary
+#    distribution the "Copyright Notice" refers to the following language:
+#    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
+# 
+# 4. The name of JPNIC may not be used to endorse or promote products
+#    derived from this Software without specific prior written approval of
+#    JPNIC.
+# 
+# 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
+#    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+#    PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL JPNIC BE LIABLE
+#    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+#    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+#    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+#    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+#    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+#    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+#    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+
+package SparseMap;
+
+use strict;
+use Carp;
+
+my $debug = 0;
+
+sub new {
+    # common options are:
+    #   BITS => [8, 7, 6],	# 3-level map, 2nd level bits=7, 3rd = 6.
+    #   MAX  => 0x110000	# actually, max + 1.
+    my $class = shift;
+    my $self = {@_};
+
+    croak "BITS unspecified" unless exists $self->{BITS};
+    croak "BITS is not an array reference"
+	unless ref($self->{BITS}) eq 'ARRAY';
+    croak "MAX unspecified" unless exists $self->{MAX};
+
+    $self->{MAXLV} = @{$self->{BITS}} - 1;
+    $self->{FIXED} = 0;
+
+    my $lv0size = (indices($self, $self->{MAX} - 1))[0] + 1;
+
+    my @map = (undef) x $lv0size;
+    $self->{MAP} = \@map;
+
+    bless $self, $class;
+}
+
+sub add1 {
+    my ($self, $n, $val) = @_;
+
+    croak "Already fixed" if $self->{FIXED};
+    carp("data ($n) out of range"), return if $n >= $self->{MAX};
+
+    my @index = $self->indices($n);
+    my $r = $self->{MAP};
+    my $maxlv = $self->{MAXLV};
+    my $idx;
+    my $lv;
+
+    for ($lv = 0; $lv < $maxlv - 1; $lv++) {
+	$idx = $index[$lv];
+	$r->[$idx] = $self->create_imap($lv + 1, undef)
+	    unless defined $r->[$idx];
+	$r = $r->[$idx];
+    }
+    $idx = $index[$lv];
+    $r->[$idx] = $self->create_dmap() unless defined $r->[$idx];
+    $self->add_to_dmap($r->[$idx], $index[$maxlv], $val);
+}
+
+sub fix {
+    my $self = shift;
+    my $map = $self->{MAP};
+    my $maxlv = $self->{MAXLV};
+    my @tmp;
+    my @zero;
+
+    carp "Already fixed" if $self->{FIXED};
+    $self->collapse_tree();
+    $self->fill_default();
+    $self->{FIXED} = 1;
+}
+
+sub indices {
+    my $self = shift;
+    my $v = shift;
+    my @bits = @{$self->{BITS}};
+    my @idx;
+
+    print "indices($v,", join(',', @bits), ") = " if $debug;
+    for (my $i = @bits - 1; $i >= 0; $i--) {
+	my $bit = $bits[$i];
+	unshift @idx, $v & ((1 << $bit) - 1);
+	$v = $v >> $bit;
+    }
+    print "(", join(',', @idx), ")\n" if $debug;
+    @idx;
+}
+
+sub get {
+    my $self = shift;
+    my $v = shift;
+    my $map = $self->{MAP};
+    my @index = $self->indices($v);
+
+    croak "Not yet fixed" unless $self->{FIXED};
+
+    my $lastidx = pop @index;
+    foreach my $idx (@index) {
+	return $map->{DEFAULT} unless defined $map->[$idx];
+	$map = $map->[$idx];
+    }
+    $map->[$lastidx];
+}
+
+sub indirectmap {
+    my $self = shift;
+
+    croak "Not yet fixed" unless $self->{FIXED};
+
+    my @maps = $self->collect_maps();
+    my $maxlv = $self->{MAXLV};
+    my @bits = @{$self->{BITS}};
+
+    my @indirect = ();
+    for (my $lv = 0; $lv < $maxlv; $lv++) {
+	my $offset;
+	my $chunksz;
+	my $mapsz = @{$maps[$lv]->[0]};
+	if ($lv < $maxlv - 1) {
+	    # indirect map
+	    $offset = @indirect + @{$maps[$lv]} * @{$maps[$lv]->[0]};
+	    $chunksz = (1 << $bits[$lv + 1]);
+	} else {
+	    # direct map
+	    $offset = 0;
+	    $chunksz = 1;
+	}
+	my $nextmaps = $maps[$lv + 1];
+	foreach my $mapref (@{$maps[$lv]}) {
+	    croak "mapsize inconsistent ", scalar(@$mapref),
+	        " should be ", $mapsz, " (lv $lv)\n" if @$mapref != $mapsz;
+	    foreach my $m (@$mapref) {
+		my $idx;
+		for ($idx = 0; $idx < @$nextmaps; $idx++) {
+		    last if $nextmaps->[$idx] == $m;
+		}
+		croak "internal error: map corrupted" if $idx >= @$nextmaps;
+		push @indirect, $offset + $chunksz * $idx;
+	    }
+	}
+    }
+    @indirect;
+}
+
+sub cprog_imap {
+    my $self = shift;
+    my %opt = @_;
+    my $name = $opt{NAME} || 'map';
+    my @indirect = $self->indirectmap();
+    my $prog;
+    my $i;
+    my ($idtype, $idcol, $idwid);
+
+    my $max = 0;
+    $max < $_ and $max = $_ foreach @indirect;
+
+    if ($max < 256) {
+	$idtype = 'char';
+	$idcol = 8;
+	$idwid = 3;
+    } elsif ($max < 65536) {
+	$idtype = 'short';
+	$idcol = 8;
+	$idwid = 5;
+    } else {
+	$idtype = 'long';
+	$idcol = 4;
+	$idwid = 10;
+    }
+    $prog = "static const unsigned $idtype ${name}_imap[] = {\n";
+    $i = 0;
+    foreach my $v (@indirect) {
+	if ($i % $idcol == 0) {
+	    $prog .= "\n" if $i != 0;
+	    $prog .= "\t";
+	}
+	$prog .= sprintf "%${idwid}d, ", $v;
+	$i++;
+    }
+    $prog .= "\n};\n";
+    $prog;
+}
+
+sub cprog {
+    my $self = shift;
+    $self->cprog_imap(@_) . "\n" . $self->cprog_dmap(@_);
+}
+
+sub stat {
+    my $self = shift;
+    my @maps = $self->collect_maps();
+    my $elsize = $self->{ELSIZE};
+    my $i;
+    my $total = 0;
+    my @lines;
+
+    for ($i = 0; $i < $self->{MAXLV}; $i++) {
+	my $nmaps = @{$maps[$i]};
+	my $mapsz = @{$maps[$i]->[0]};
+	push @lines, "level $i: $nmaps maps (size $mapsz) ";
+	push @lines, "[", $nmaps * $mapsz * $elsize, "]" if $elsize;
+	push @lines, "\n";
+    }
+    my $ndmaps = @{$maps[$i]};
+    push @lines, "level $i: $ndmaps dmaps";
+    my $r = $maps[$i]->[0];
+    if (ref($r) eq 'ARRAY') {
+	push @lines, " (size ", scalar(@$r), ")";
+    }
+    push @lines, "\n";
+    join '', @lines;
+}
+
+sub collapse_tree {
+    my $self = shift;
+    my @tmp;
+
+    $self->_collapse_tree_rec($self->{MAP}, 0, \@tmp);
+}
+
+sub _collapse_tree_rec {
+    my ($self, $r, $lv, $refs) = @_;
+    my $ref = $refs->[$lv];
+    my $maxlv = $self->{MAXLV};
+    my $found;
+
+    return $r unless defined $r;
+
+    $ref = $refs->[$lv] = [] unless defined $ref;
+
+    if ($lv == $maxlv) {
+	$found = $self->find_dmap($ref, $r);
+    } else {
+	for (my $i = 0; $i < @$r; $i++) {
+	    $r->[$i] = $self->_collapse_tree_rec($r->[$i], $lv + 1, $refs);
+	}
+	$found = $self->find_imap($ref, $r);
+    }
+    unless ($found) {
+	$found = $r;
+	push @$ref, $found;
+    }
+    return $found;
+}
+
+sub fill_default {
+    my $self = shift;
+    my $maxlv = $self->{MAXLV};
+    my $bits = $self->{BITS};
+    my @zeros;
+
+    $zeros[$maxlv] = $self->create_dmap();
+    for (my $lv = $maxlv - 1; $lv >= 0; $lv--) {
+	my $r = $zeros[$lv + 1];
+	$zeros[$lv] = $self->create_imap($lv, $r);
+    }
+    _fill_default_rec($self->{MAP}, 0, $maxlv, \@zeros);
+}
+
+sub _fill_default_rec {
+    my ($r, $lv, $maxlv, $zeros) = @_;
+
+    return if $lv == $maxlv;
+    for (my $i = 0; $i < @$r; $i++) {
+	if (defined($r->[$i])) {
+	    _fill_default_rec($r->[$i], $lv + 1, $maxlv, $zeros);
+	} else {
+	    $r->[$i] = $zeros->[$lv + 1];
+	}
+    }
+}
+
+sub create_imap {
+    my ($self, $lv, $v) = @_;
+    my @map;
+    @map = ($v) x (1 << $self->{BITS}->[$lv]);
+    \@map;
+}
+
+sub find_imap {
+    my ($self, $maps, $map) = @_;
+    my $i;
+
+    foreach my $el (@$maps) {
+	next unless @$el == @$map;
+	for ($i = 0; $i < @$el; $i++) {
+	    last unless ($el->[$i] || 0) == ($map->[$i] || 0);
+	}
+	return $el if $i >= @$el;
+    }
+    undef;
+}
+
+sub collect_maps {
+    my $self = shift;
+    my @maps;
+    _collect_maps_rec($self->{MAP}, 0, $self->{MAXLV}, \@maps);
+    @maps;
+}
+
+sub _collect_maps_rec {
+    my ($r, $lv, $maxlv, $maps) = @_;
+    my $mapref = $maps->[$lv];
+
+    return unless defined $r;
+    foreach my $ref (@{$mapref}) {
+	return if $ref == $r;
+    }
+    push @{$maps->[$lv]}, $r;
+    if ($lv < $maxlv) {
+	_collect_maps_rec($_, $lv + 1, $maxlv, $maps) foreach @{$r};
+    }
+}
+    
+sub add {confess "Subclass responsibility";}
+sub create_dmap {confess "Subclass responsibility";}
+sub add_to_dmap {confess "Subclass responsibility";}
+sub find_dmap {confess "Subclass responsibility";}
+sub cprog_dmap {confess "Subclass responsibility";}
+
+1;
+
+package SparseMap::Bit;
+
+use strict;
+use vars qw(@ISA);
+use Carp;
+#use SparseMap;
+
+@ISA = qw(SparseMap);
+
+sub new {
+    my $class = shift;
+    my $self = $class->SUPER::new(@_);
+    $self->{DEFAULT} = 0;
+    bless $self, $class;
+}
+
+sub add {
+    my $self = shift;
+
+    $self->add1($_, undef) foreach @_;
+}
+
+sub create_dmap {
+    my $self = shift;
+    my $bmbits = $self->{BITS}->[-1];
+
+    my $s = "\0" x (1 << ($bmbits - 3));
+    \$s;
+}
+
+sub add_to_dmap {
+    my ($self, $map, $idx, $val) = @_;
+    vec($$map, $idx, 1) = 1;
+}
+
+sub find_dmap {
+    my ($self, $ref, $r) = @_;
+    foreach my $map (@$ref) {
+	return $map if $$map eq $$r;
+    }
+    return undef;
+}
+
+sub cprog_dmap {
+    my $self = shift;
+    my %opt = @_;
+    my $name = $opt{NAME} || 'map';
+    my @maps = $self->collect_maps();
+    my @bitmap = @{$maps[-1]};
+    my $prog;
+    my $bmsize = 1 << ($self->{BITS}->[-1] - 3);
+
+    $prog = <<"END";
+static const struct {
+	unsigned char bm[$bmsize];
+} ${name}_bitmap[] = {
+END
+
+    foreach my $bm (@bitmap) {
+	my $i = 0;
+	$prog .= "\t{{\n";
+	foreach my $v (unpack 'C*', $$bm) {
+	    if ($i % 16 == 0) {
+		$prog .= "\n" if $i != 0;
+		$prog .= "\t";
+	    }
+	    $prog .= sprintf "%3d,", $v;
+	    $i++;
+	}
+	$prog .= "\n\t}},\n";
+    }
+    $prog .= "};\n";
+    $prog;
+}
+
+1;
+
+package SparseMap::Int;
+
+use strict;
+use vars qw(@ISA);
+use Carp;
+#use SparseMap;
+
+@ISA = qw(SparseMap);
+
+sub new {
+    my $class = shift;
+    my $self = $class->SUPER::new(@_);
+    $self->{DEFAULT} = 0 unless exists $self->{DEFAULT};
+    bless $self, $class;
+}
+
+sub add {
+    my $self = shift;
+    while (@_ > 0) {
+	my $n = shift;
+	my $val = shift;
+	$self->add1($n, $val);
+    }
+}
+
+sub create_dmap {
+    my $self = shift;
+    my $tblbits = $self->{BITS}->[-1];
+    my $default = $self->{DEFAULT};
+
+    my @tbl = ($default) x (1 << $tblbits);
+    \@tbl;
+}
+
+sub add_to_dmap {
+    my ($self, $map, $idx, $val) = @_;
+    $map->[$idx] = $val;
+}
+
+sub find_dmap {
+    my ($self, $ref, $r) = @_;
+    foreach my $map (@$ref) {
+	if (@$map == @$r) {
+	    my $i;
+	    for ($i = 0; $i < @$map; $i++) {
+		last if $map->[$i] != $r->[$i];
+	    }
+	    return $map if $i == @$map;
+	}
+    }
+    return undef;
+}
+
+sub cprog_dmap {
+    my $self = shift;
+    my %opt = @_;
+    my $name = $opt{NAME} || 'map';
+    my @maps = $self->collect_maps();
+    my @table = @{$maps[-1]};
+    my $prog;
+    my $i;
+    my ($idtype, $idcol, $idwid);
+    my $tblsize = 1 << $self->{BITS}->[-1];
+
+    my ($min, $max);
+    foreach my $a (@table) {
+	foreach my $v (@$a) {
+	    $min = $v if !defined($min) or $min > $v;
+	    $max = $v if !defined($max) or $max < $v;
+	}
+    }
+    if (exists $opt{MAPTYPE}) {
+	$idtype = $opt{MAPTYPE};
+    } else {
+	my $u = $min < 0 ? '' : 'unsigned ';
+	my $absmax = abs($max);
+	$absmax = abs($min) if abs($min) > $absmax;
+
+	if ($absmax < 256) {
+	    $idtype = "${u}char";
+	} elsif ($absmax < 65536) {
+	    $idtype = "${u}short";
+	} else {
+	    $idtype = "${u}long";
+	}
+    }
+
+    $idwid = decimalwidth($max);
+    $idwid = decimalwidth($min) if decimalwidth($min) > $idwid;
+
+    $prog = <<"END";
+static const struct {
+	$idtype tbl[$tblsize];
+} ${name}_table[] = {
+END
+
+    foreach my $a (@table) {
+	my $i = 0;
+	my $col = 0;
+	$prog .= "\t{{\n\t";
+	foreach my $v (@$a) {
+	    my $s = sprintf "%${idwid}d, ", $v;
+	    $col += length($s);
+	    if ($col > 70) {
+		$prog .= "\n\t";
+		$col = length($s);
+	    }
+	    $prog .= $s;
+	}
+	$prog .= "\n\t}},\n";
+    }
+    $prog .= "};\n";
+    $prog;
+}
+
+sub decimalwidth {
+    my $n = shift;
+    my $neg = 0;
+    my $w;
+
+    if ($n < 0) {
+	$neg = 1;
+	$n = -$n;
+    }
+    if ($n < 100) {
+	$w = 2;
+    } elsif ($n < 10000) {
+	$w = 4;
+    } elsif ($n < 1000000) {
+	$w = 6;
+    } elsif ($n < 100000000) {
+	$w = 8;
+    } else {
+	$w = 10;
+    }
+    $w + $neg;
+}
+
+1;
diff --git a/contrib/idn/idnkit-1.0-src/util/UCD.pm b/contrib/idn/idnkit-1.0-src/util/UCD.pm
new file mode 100644
index 0000000..19629e6
--- /dev/null
+++ b/contrib/idn/idnkit-1.0-src/util/UCD.pm
@@ -0,0 +1,194 @@
+# $Id: UCD.pm,v 1.1.1.1 2003/06/04 00:27:53 marka Exp $
+#
+# Copyright (c) 2000,2001 Japan Network Information Center.
+# All rights reserved.
+#  
+# By using this file, you agree to the terms and conditions set forth bellow.
+# 
+# 			LICENSE TERMS AND CONDITIONS 
+# 
+# The following License Terms and Conditions apply, unless a different
+# license is obtained from Japan Network Information Center ("JPNIC"),
+# a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
+# Chiyoda-ku, Tokyo 101-0047, Japan.
+# 
+# 1. Use, Modification and Redistribution (including distribution of any
+#    modified or derived work) in source and/or binary forms is permitted
+#    under this License Terms and Conditions.
+# 
+# 2. Redistribution of source code must retain the copyright notices as they
+#    appear in each source code file, this License Terms and Conditions.
+# 
+# 3. Redistribution in binary form must reproduce the Copyright Notice,
+#    this License Terms and Conditions, in the documentation and/or other
+#    materials provided with the distribution.  For the purposes of binary
+#    distribution the "Copyright Notice" refers to the following language:
+#    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
+# 
+# 4. The name of JPNIC may not be used to endorse or promote products
+#    derived from this Software without specific prior written approval of
+#    JPNIC.
+# 
+# 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
+#    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+#    PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL JPNIC BE LIABLE
+#    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+#    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+#    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+#    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+#    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+#    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+#    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+
+package UCD;
+
+#
+# UCD.pm -- parser for Unicode Character Database files.
+#
+# This file is an aggregation of the following modules, each of which
+# provides a parser for a specific data file of UCD.
+#	UCD::UnicodeData		-- for UnicodeData.txt
+#	UCD::CaseFolding		-- for CaseFolding.txt
+#	UCD::SpecialCasing		-- for SpecialCasing.txt
+#	UCD::CompositionExclusions	-- for CompositionExclusions-1.txt
+#
+# Each module provides two subroutines:
+#
+#   $line = getline(\*HANDLE);
+#	reads next non-comment line from HANDLE, and returns it.
+#	undef will be returned upon EOF.
+#
+#   %fields = parse($line);
+#	parses a line and extract fields, and returns a list of
+#	field name and its value, suitable for assignment to a hash.
+#
+
+package UCD::UnicodeData;
+
+use strict;
+use Carp;
+
+sub getline {
+    my $fh = shift;
+    my $s = <$fh>;
+    $s =~ s/\r?\n$// if $s;
+    $s;
+}
+
+sub parseline {
+    my $s = shift;
+
+    my @f = split /;/, $s, -1;
+    return (CODE     => hex($f[0]),
+	    NAME     => $f[1],
+	    CATEGORY => $f[2],
+	    CLASS    => $f[3]+0,
+	    BIDI     => $f[4],
+	    DECOMP   => dcmap($f[5]),
+	    DECIMAL  => dvalue($f[6]),
+	    DIGIT    => dvalue($f[7]),
+	    NUMERIC  => dvalue($f[8]),
+	    MIRRORED => $f[9] eq 'Y',
+	    NAME10   => $f[10],
+	    COMMENT  => $f[11],
+	    UPPER    => ucode($f[12]),
+	    LOWER    => ucode($f[13]),
+	    TITLE    => ucode($f[14]));
+}
+
+sub dcmap {
+    my $v = shift;
+    return undef if $v eq '';
+    $v =~ /^(?:(<[^>]+>)\s*)?(\S.*)/
+	or croak "invalid decomposition mapping \"$v\"";
+    my $tag = $1 || '';
+    [$tag, map {hex($_)} split(' ', $2)];
+}
+
+sub ucode {
+    my $v = shift;
+    return undef if $v eq '';
+    hex($v);
+}
+
+sub dvalue {
+    my $v = shift;
+    return undef if $v eq '';
+    $v;
+}
+
+package UCD::CaseFolding;
+
+use strict;
+
+sub getline {
+    my $fh = shift;
+    while (defined(my $s = <$fh>)) {
+	next if $s =~ /^\#/;
+	next if $s =~ /^\s*$/;
+	$s =~ s/\r?\n$//;
+	return $s;
+    }
+    undef;
+}
+
+sub parseline {
+    my $s = shift;
+    my @f = split /;\s*/, $s, -1;
+    return (CODE => hex($f[0]),
+	    TYPE => $f[1],
+	    MAP  => [map(hex, split ' ', $f[2])],
+	   );
+}
+
+package UCD::SpecialCasing;
+
+use strict;
+
+sub getline {
+    my $fh = shift;
+    while (defined(my $s = <$fh>)) {
+	next if $s =~ /^\#/;
+	next if $s =~ /^\s*$/;
+	$s =~ s/\r?\n$//;
+	return $s;
+    }
+    undef;
+}
+
+sub parseline {
+    my $s = shift;
+
+    my @f = split /;\s*/, $s, -1;
+    my $cond = (@f > 5) ? $f[4] : undef;
+    return (CODE => hex($f[0]),
+	    LOWER => [map(hex, split ' ', $f[1])],
+	    TITLE => [map(hex, split ' ', $f[2])],
+	    UPPER => [map(hex, split ' ', $f[3])],
+	    CONDITION => $cond);
+}
+
+package UCD::CompositionExclusions;
+
+use strict;
+
+sub getline {
+    my $fh = shift;
+    while (defined(my $s = <$fh>)) {
+	next if $s =~ /^\#/;
+	next if $s =~ /^\s*$/;
+	$s =~ s/\r?\n$//;
+	return $s;
+    }
+    undef;
+}
+
+sub parseline {
+    my $s = shift;
+    m/^[0-9A-Fa-f]+/;
+    return (CODE => hex($&));
+}
+
+1;
diff --git a/contrib/idn/idnkit-1.0-src/util/generate_nameprep_data.pl b/contrib/idn/idnkit-1.0-src/util/generate_nameprep_data.pl
new file mode 100755
index 0000000..31dd18b
--- /dev/null
+++ b/contrib/idn/idnkit-1.0-src/util/generate_nameprep_data.pl
@@ -0,0 +1,405 @@
+#! /usr/local/bin/perl -w
+# $Id: generate_nameprep_data.pl,v 1.1.1.1 2003/06/04 00:27:54 marka Exp $
+#
+# Copyright (c) 2001 Japan Network Information Center.  All rights reserved.
+#  
+# By using this file, you agree to the terms and conditions set forth bellow.
+# 
+# 			LICENSE TERMS AND CONDITIONS 
+# 
+# The following License Terms and Conditions apply, unless a different
+# license is obtained from Japan Network Information Center ("JPNIC"),
+# a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
+# Chiyoda-ku, Tokyo 101-0047, Japan.
+# 
+# 1. Use, Modification and Redistribution (including distribution of any
+#    modified or derived work) in source and/or binary forms is permitted
+#    under this License Terms and Conditions.
+# 
+# 2. Redistribution of source code must retain the copyright notices as they
+#    appear in each source code file, this License Terms and Conditions.
+# 
+# 3. Redistribution in binary form must reproduce the Copyright Notice,
+#    this License Terms and Conditions, in the documentation and/or other
+#    materials provided with the distribution.  For the purposes of binary
+#    distribution the "Copyright Notice" refers to the following language:
+#    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
+# 
+# 4. The name of JPNIC may not be used to endorse or promote products
+#    derived from this Software without specific prior written approval of
+#    JPNIC.
+# 
+# 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
+#    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+#    PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL JPNIC BE LIABLE
+#    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+#    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+#    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+#    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+#    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+#    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+#    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+
+use v5.6.0;		# for pack('U')
+use bytes;
+
+use lib qw(.);
+
+use SparseMap;
+use Getopt::Long;
+
+(my $myid = '$Id: generate_nameprep_data.pl,v 1.1.1.1 2003/06/04 00:27:54 marka Exp $') =~ s/\$([^\$]+)\$/\$-$1-\$/;
+
+my @map_bits = (9, 7, 5);
+my @proh_bits = (7, 7, 7);
+my @unas_bits = (7, 7, 7);
+my @bidi_bits = (9, 7, 5);
+
+my @bidi_types = ('OTHERS', 'R_AL', 'L');
+
+my $dir = '.';
+my @versions = ();
+
+GetOptions('dir=s', \$dir) or die usage();
+@versions = @ARGV;
+
+print_header();
+
+bits_definition("MAP", @map_bits);
+bits_definition("PROH", @proh_bits);
+bits_definition("UNAS", @unas_bits);
+bits_definition("BIDI", @bidi_bits);
+
+generate_data($_) foreach @ARGV;
+
+sub usage {
+    die "Usage: $0 [-dir dir] version..\n";
+}
+
+sub generate_data {
+    my $version = shift;
+    generate_mapdata($version, "$dir/nameprep.$version.map");
+    generate_prohibiteddata($version, "$dir/nameprep.$version.prohibited");
+    generate_unassigneddata($version, "$dir/nameprep.$version.unassigned");
+    generate_bididata($version, "$dir/nameprep.$version.bidi");
+}
+
+#
+# Generate mapping data.
+#
+sub generate_mapdata {
+    my $version = shift;
+    my $file = shift;
+
+    my $map = SparseMap::Int->new(BITS => [@map_bits],
+				  MAX => 0x110000,
+				  MAPALL => 1,
+				  DEFAULT => 0);
+    open FILE, $file or die "cannot open $file: $!\n";
+
+    my $mapbuf = "\0";	# dummy
+    my %maphash = ();
+    while (<FILE>) {
+	if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) {
+	    my $same_as = $1;
+	    if (grep {$_ eq $same_as} @versions > 0) {
+		generate_map_ref($version, $same_as);
+		close FILE;
+		return;
+	    }
+	    next;
+	}
+	next if /^\#/;
+	next if /^\s*$/;
+	register_map($map, \$mapbuf, \%maphash, $_);
+    }
+    close FILE;
+    generate_map($version, $map, \$mapbuf);
+}
+
+#
+# Generate prohibited character data.
+#
+sub generate_prohibiteddata {
+    my $version = shift;
+    my $file = shift;
+
+    my $proh = SparseMap::Bit->new(BITS => [@proh_bits],
+				   MAX => 0x110000);
+    open FILE, $file or die "cannot open $file: $!\n";
+    while (<FILE>) {
+	if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) {
+	    my $same_as = $1;
+	    if (grep {$_ eq $same_as} @versions > 0) {
+		generate_prohibited_ref($version, $same_as);
+		close FILE;
+		return;
+	    }
+	    next;
+	}
+	next if /^\#/;
+	next if /^\s*$/;
+	register_prohibited($proh, $_);
+    }
+    close FILE;
+    generate_prohibited($version, $proh);
+}
+
+#
+# Generate unassigned codepoint data.
+#
+sub generate_unassigneddata {
+    my $version = shift;
+    my $file = shift;
+
+    my $unas = SparseMap::Bit->new(BITS => [@unas_bits],
+				   MAX => 0x110000);
+    open FILE, $file or die "cannot open $file: $!\n";
+    while (<FILE>) {
+	if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) {
+	    my $same_as = $1;
+	    if (grep {$_ eq $same_as} @versions > 0) {
+		generate_unassigned_ref($version, $same_as);
+		close FILE;
+		return;
+	    }
+	    next;
+	}
+	next if /^\#/;
+	next if /^\s*$/;
+	register_unassigned($unas, $_);
+    }
+    close FILE;
+    generate_unassigned($version, $unas);
+}
+
+#
+# Generate data of bidi "R" or "AL" characters.
+#
+sub generate_bididata {
+    my $version = shift;
+    my $file = shift;
+
+    my $bidi = SparseMap::Int->new(BITS => [@bidi_bits],
+				   MAX => 0x110000);
+    open FILE, $file or die "cannot open $file: $!\n";
+
+    my $type = 0;
+    while (<FILE>) {
+	if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) {
+	    my $same_as = $1;
+	    if (grep {$_ eq $same_as} @versions > 0) {
+		generate_unassigned_ref($version, $same_as);
+		close FILE;
+		return;
+	    }
+	    next;
+	}
+	if (/^%\s*BIDI_TYPE\s+(\S+)$/) {
+	    my $i = 0;
+	    for ($i = 0; $i < @bidi_types; $i++) {
+		if ($1 eq $bidi_types[$i]) {
+		    $type = $i;
+		    last;
+		}
+	    }
+	    die "unrecognized line: $_" if ($i >= @bidi_types);
+	    next;
+	}
+	next if /^\#/;
+	next if /^\s*$/;
+	register_bidi($bidi, $type, $_);
+    }
+    close FILE;
+
+    generate_bidi($version, $bidi);
+}
+
+sub print_header {
+    print <<"END";
+/* \$Id\$ */
+/* $myid */
+/*
+ * Do not edit this file!
+ * This file is generated from NAMEPREP specification.
+ */
+
+END
+}
+
+sub bits_definition {
+    my $name = shift;
+    my @bits = @_;
+    my $i = 0;
+
+    foreach my $n (@bits) {
+	print "#define ${name}_BITS_$i\t$n\n";
+	$i++;
+    }
+    print "\n";
+}
+
+sub register_map {
+    my ($map, $bufref, $hashref, $line) = @_;
+
+    my ($from, $to) = split /;/, $line;
+    my @fcode = map {hex($_)} split ' ', $from;
+    my @tcode = map {hex($_)} split ' ', $to;
+
+    my $ucs4 = pack('V*', @tcode);
+    $ucs4 =~ s/\000+$//;
+
+    my $offset;
+    if (exists $hashref->{$ucs4}) {
+	$offset = $hashref->{$ucs4};
+    } else {
+	$offset = length $$bufref;
+	$$bufref .= pack('C', length($ucs4)) . $ucs4;
+	$hashref->{$ucs4} = $offset;
+    }
+
+    die "unrecognized line: $line" if @fcode != 1;
+    $map->add($fcode[0], $offset);
+}
+
+sub generate_map {
+    my ($version, $map, $bufref) = @_;
+
+    $map->fix();
+
+    print $map->cprog(NAME => "nameprep_${version}_map");
+    print "\nstatic const unsigned char nameprep_${version}_map_data[] = \{\n";
+    print_uchararray($$bufref);
+    print "};\n\n";
+}
+
+sub generate_map_ref {
+    my ($version, $refversion) = @_;
+    print <<"END";
+#define nameprep_${version}_map_imap	nameprep_${refversion}_map_imap
+#define nameprep_${version}_map_table	nameprep_${refversion}_map_table
+#define nameprep_${version}_map_data	nameprep_${refversion}_map_data
+
+END
+}
+
+sub print_uchararray {
+    my @chars = unpack 'C*', $_[0];
+    my $i = 0;
+    foreach my $v (@chars) {
+	if ($i % 12 == 0) {
+	    print "\n" if $i != 0;
+	    print "\t";
+	}
+	printf "%3d, ", $v;
+	$i++;
+    }
+    print "\n";
+}
+
+sub register_prohibited {
+    my $proh = shift;
+    register_bitmap($proh, @_);
+}
+
+sub register_unassigned {
+    my $unas = shift;
+    register_bitmap($unas, @_);
+}
+
+sub register_bidi {
+    my $bidi = shift;
+    my $type = shift;
+    register_intmap($bidi, $type, @_);
+}
+
+sub generate_prohibited {
+    my ($version, $proh) = @_;
+    generate_bitmap($proh, "nameprep_${version}_prohibited");
+    print "\n";
+}
+
+sub generate_prohibited_ref {
+    my ($version, $refversion) = @_;
+    print <<"END";
+#define nameprep_${version}_prohibited_imap	nameprep_${refversion}_prohibited_imap
+#define nameprep_${version}_prohibited_bitmap	nameprep_${refversion}_prohibited_bitmap
+
+END
+}
+
+sub generate_unassigned {
+    my ($version, $unas) = @_;
+    generate_bitmap($unas, "nameprep_${version}_unassigned");
+    print "\n";
+}
+
+sub generate_unassigned_ref {
+    my ($version, $refversion) = @_;
+    print <<"END";
+#define nameprep_${version}_unassigned_imap	nameprep_${refversion}_unassigned_imap
+#define nameprep_${version}_unassigned_bitmap	nameprep_${refversion}_unassigned_bitmap
+
+END
+}
+
+sub generate_bidi {
+    my ($version, $bidi) = @_;
+
+    $bidi->fix();
+
+    print $bidi->cprog(NAME => "nameprep_${version}_bidi");
+    print "\n";
+    print "static const unsigned char nameprep_${version}_bidi_data[] = \{\n";
+
+    foreach my $type (@bidi_types) {
+	printf "\tidn_biditype_%s, \n", lc($type);
+    }
+    print "};\n\n";
+}
+
+sub generate_bidi_ref {
+    my ($version, $refversion) = @_;
+    print <<"END";
+#define nameprep_${version}_bidi_imap	nameprep_${refversion}_bidi_imap
+#define nameprep_${version}_bidi_table	nameprep_${refversion}_bidi_table
+
+END
+}
+
+sub register_bitmap {
+    my $map = shift;
+    my $line = shift;
+
+    /^([0-9A-Fa-f]+)(?:-([0-9A-Fa-f]+))?/ or die "unrecognized line: $line";
+    my $start = hex($1);
+    my $end = defined($2) ? hex($2) : undef;
+    if (defined $end) {
+	$map->add($start .. $end);
+    } else {
+	$map->add($start);
+    }
+}
+
+sub register_intmap {
+    my $map = shift;
+    my $value = shift;
+    my $line = shift;
+
+    /^([0-9A-Fa-f]+)(?:-([0-9A-Fa-f]+))?/ or die "unrecognized line: $line";
+    my $start = hex($1);
+    my $end = defined($2) ? hex($2) : $start;
+    for (my $i = $start; $i <= $end; $i++) {
+	$map->add($i, $value);
+    }
+}
+
+sub generate_bitmap {
+    my $map = shift;
+    my $name = shift;
+    $map->fix();
+    #$map->stat();
+    print $map->cprog(NAME => $name);
+}
diff --git a/contrib/idn/idnkit-1.0-src/util/generate_normalize_data.pl b/contrib/idn/idnkit-1.0-src/util/generate_normalize_data.pl
new file mode 100755
index 0000000..fe81648
--- /dev/null
+++ b/contrib/idn/idnkit-1.0-src/util/generate_normalize_data.pl
@@ -0,0 +1,586 @@
+#! /usr/local/bin/perl -w
+# $Id: generate_normalize_data.pl,v 1.1.1.1 2003/06/04 00:27:55 marka Exp $
+#
+# Copyright (c) 2000,2001 Japan Network Information Center.
+# All rights reserved.
+#  
+# By using this file, you agree to the terms and conditions set forth bellow.
+# 
+# 			LICENSE TERMS AND CONDITIONS 
+# 
+# The following License Terms and Conditions apply, unless a different
+# license is obtained from Japan Network Information Center ("JPNIC"),
+# a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
+# Chiyoda-ku, Tokyo 101-0047, Japan.
+# 
+# 1. Use, Modification and Redistribution (including distribution of any
+#    modified or derived work) in source and/or binary forms is permitted
+#    under this License Terms and Conditions.
+# 
+# 2. Redistribution of source code must retain the copyright notices as they
+#    appear in each source code file, this License Terms and Conditions.
+# 
+# 3. Redistribution in binary form must reproduce the Copyright Notice,
+#    this License Terms and Conditions, in the documentation and/or other
+#    materials provided with the distribution.  For the purposes of binary
+#    distribution the "Copyright Notice" refers to the following language:
+#    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
+# 
+# 4. The name of JPNIC may not be used to endorse or promote products
+#    derived from this Software without specific prior written approval of
+#    JPNIC.
+# 
+# 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
+#    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+#    PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL JPNIC BE LIABLE
+#    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+#    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+#    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+#    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+#    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+#    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+#    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+
+# 
+# Generate lib/unicodedata.c from UnicodeData.txt,
+# CompositionExclusions-1.txt, SpecialCasing.txt and CaseFolding.txt,
+# all of them available from ftp://ftp.unicode.org/Public/UNIDATA/.
+#
+
+use strict;
+use lib qw(.);
+
+use Getopt::Long;
+use UCD;
+use SparseMap;
+
+use constant UCS_MAX => 0x110000;
+use constant END_BIT => 0x80000000;
+
+my $DECOMP_COMPAT_BIT = 0x8000;
+
+my $CASEMAP_FINAL_BIT = 0x1;
+my $CASEMAP_NONFINAL_BIT = 0x2;
+my $CASEMAP_LAST_BIT = 0x10;
+
+my $LETTER_BIT = 1;
+my $NSPMARK_BIT = 2;
+
+(my $myid = '$Id: generate_normalize_data.pl,v 1.1.1.1 2003/06/04 00:27:55 marka Exp $') =~ s/\$([^\$]+)\$/\$-$1-\$/;
+
+my @default_bits = (9, 7, 5);
+#my @default_bits = (7, 7, 7);
+my @canon_class_bits = @default_bits;
+my @decomp_bits = @default_bits;
+my @comp_bits = @default_bits;
+my @folding_bits = @default_bits;
+my @casemap_bits = @default_bits;
+my @casemap_ctx_bits = @default_bits;
+
+my $prefix = '';
+my $dir = '.';
+my $unicodedatafile = 'UnicodeData.txt';
+my $exclusionfile = 'CompositionExclusions.txt';
+my $specialcasefile = 'SpecialCasing.txt';
+my $casefoldingfile = 'CaseFolding.txt';
+my $verbose;
+
+GetOptions('dir|d=s' => \$dir,
+	   'unicodedata|u=s' => \$unicodedatafile,
+	   'exclude|e=s' => \$exclusionfile,	
+	   'specialcase|s=s' => \$specialcasefile,
+	   'casefold|c=s' => \$casefoldingfile,
+	   'prefix|p=s' => \$prefix,
+	   'verbose|v' => \$verbose,
+) or usage();
+
+foreach my $r (\$unicodedatafile, \$exclusionfile,
+	       \$specialcasefile, \$casefoldingfile) {
+    $$r = "$dir/$$r" unless $$r =~ m|^/|;
+}
+
+my %exclusions;
+my %lower_special;
+my %upper_special;
+
+my @decomp_data;
+my @comp_data;
+my @toupper_data;
+my @tolower_data;
+my @folding_data;
+
+#
+# Create Mapping/Bitmap objects.
+#
+
+# canonical class
+my $canon_class = SparseMap::Int->new(BITS => [@canon_class_bits],
+				     MAX => UCS_MAX,
+				     MAPALL => 1,
+				     DEFAULT => 0);
+
+# canonical/compatibility decomposition
+my $decomp = SparseMap::Int->new(BITS => [@decomp_bits],
+				 MAX => UCS_MAX,
+				 MAPALL => 1,
+				 DEFAULT => 0);
+
+# canonical composition
+my $comp = SparseMap::Int->new(BITS => [@comp_bits],
+			       MAX => UCS_MAX,
+			       MAPALL => 1,
+			       DEFAULT => 0);
+
+# uppercase/lowercase
+my $upper = SparseMap::Int->new(BITS => [@casemap_bits],
+			        MAX => UCS_MAX,
+			        MAPALL => 1,
+			        DEFAULT => 0);
+my $lower = SparseMap::Int->new(BITS => [@casemap_bits],
+			        MAX => UCS_MAX,
+			        MAPALL => 1,
+			        DEFAULT => 0);
+
+# final/nonfinal context
+my $casemap_ctx = SparseMap::Int->new(BITS => [@casemap_ctx_bits],
+				      MAX => UCS_MAX,
+				      MAPALL => 1,
+				      DEFAULT => 0);
+
+# casefolding
+my $folding = SparseMap::Int->new(BITS => [@folding_bits],
+				  MAX => UCS_MAX,
+				  MAPALL => 1,
+				  DEFAULT => 0);
+
+#
+# Read datafiles.
+#
+
+read_exclusion_file();
+read_specialcasing_file();
+read_unicodedata_file();
+read_casefolding_file();
+
+print_header();
+print_canon_class();
+print_composition();
+print_decomposition();
+print_casemap();
+print_casemap_context();
+print_casefolding();
+
+exit;
+
+sub usage {
+    print STDERR <<"END";
+Usage: $0 [options..]
+  options:
+    -d DIR  directory where Unicode Character Data files resides [./]
+    -u FILE name of the UnicodeData file [UnicodeData.txt]
+    -e FILE name of the CompositionExclusion file [CompositionExclusions-1.txt]
+    -s FILE name of the SpecialCasing file [SpecialCasing.txt]
+    -c FILE name of the CaseFolding file [CaseFolding.txt]
+END
+    exit 1;
+}
+
+#
+# read_exclusion_file -- read CompositionExclusions-1.txt.
+#
+sub read_exclusion_file {
+    open EXCLUDE, $exclusionfile   or die "cannot open $exclusionfile: $!\n";
+    while ($_ = UCD::CompositionExclusions::getline(\*EXCLUDE)) {
+	my %data = UCD::CompositionExclusions::parseline($_);
+	$exclusions{$data{CODE}} = 1;
+    }
+    close EXCLUDE;
+}
+
+#
+# read_specialcasing_file -- read SpecialCasing.txt
+#
+sub read_specialcasing_file {
+    open SPCASE, $specialcasefile or die "cannot open $specialcasefile: $!\n";
+    while ($_ = UCD::SpecialCasing::getline(\*SPCASE)) {
+	my %data = UCD::SpecialCasing::parseline($_);
+	my $code = $data{CODE};
+	my $lower = $data{LOWER};
+	my $upper = $data{UPPER};
+	my $cond = $data{CONDITION} || '';
+
+	next unless $cond eq '' or $cond =~ /^(NON_)?FINAL/;
+
+	if (defined $cond && (@$lower > 1 || $lower->[0] != $code)
+	    or @$lower > 1 or $lower->[0] != $code) {
+	    $lower_special{$code} = [$lower, $cond];
+	}
+	if (defined $cond && (@$upper > 1 || $upper->[0] != $code)
+	    or @$upper > 1 or $upper->[0] != $code) {
+	    $upper_special{$code} = [$upper, $cond];
+	}
+    }
+    close SPCASE;
+}
+
+#
+# read_unicodedata_file -- read UnicodeData.txt
+#
+sub read_unicodedata_file {
+    open UCD, $unicodedatafile or die "cannot open $unicodedatafile: $!\n";
+
+    @decomp_data = (0);
+    @toupper_data = (0);
+    @tolower_data = (0);
+
+    my @comp_cand;	# canonical composition candidates
+    my %nonstarter;
+
+    while ($_ = UCD::UnicodeData::getline(\*UCD)) {
+	my %data = UCD::UnicodeData::parseline($_);
+	my $code = $data{CODE};
+
+	# combining class
+	if ($data{CLASS} > 0) {
+	    $nonstarter{$code} = 1;
+	    $canon_class->add($code, $data{CLASS});
+	}
+
+	# uppercasing
+	if (exists $upper_special{$code} or defined $data{UPPER}) {
+	    my $offset = @toupper_data;
+	    my @casedata;
+
+	    $upper->add($code, $offset);
+	    if (exists $upper_special{$code}) {
+		push @casedata, $upper_special{$code};
+	    }
+	    if (defined $data{UPPER}) {
+		push @casedata, $data{UPPER};
+	    }
+	    push @toupper_data, casemap_data(@casedata);
+	}
+
+	# lowercasing
+	if (exists $lower_special{$code} or defined $data{LOWER}) {
+	    my $offset = @tolower_data;
+	    my @casedata;
+
+	    $lower->add($code, $offset);
+	    if (exists $lower_special{$code}) {
+		push @casedata, $lower_special{$code};
+	    }
+	    if (defined $data{LOWER}) {
+		push @casedata, $data{LOWER};
+	    }
+	    push @tolower_data, casemap_data(@casedata);
+	}
+
+	# composition/decomposition
+	if ($data{DECOMP}) {
+	    my ($tag, @decomp) = @{$data{DECOMP}};
+	    my $offset = @decomp_data;
+
+	    # composition
+	    if ($tag eq '' and @decomp > 1 and not exists $exclusions{$code}) {
+		# canonical composition candidate
+		push @comp_cand, [$code, @decomp];
+	    }
+
+	    # decomposition
+	    if ($tag ne '') {
+		# compatibility decomposition
+		$offset |= $DECOMP_COMPAT_BIT;
+	    }
+	    $decomp->add($code, $offset);
+	    push @decomp_data, @decomp;
+	    $decomp_data[-1] |= END_BIT;
+
+	}
+
+	# final/nonfinal context
+	if ($data{CATEGORY} =~ /L[ult]/) {
+	    $casemap_ctx->add($code, $LETTER_BIT);
+	} elsif ($data{CATEGORY} eq 'Mn') {
+	    $casemap_ctx->add($code, $NSPMARK_BIT);
+	}
+    }
+    close UCD;
+
+    # Eliminate composition candidates whose decomposition starts with
+    # a non-starter.
+    @comp_cand = grep {not exists $nonstarter{$_->[1]}} @comp_cand;
+
+    @comp_data = ([0, 0, 0]);
+    my $last_code = -1;
+    my $last_offset = @comp_data;
+    for my $r (sort {$a->[1] <=> $b->[1] || $a->[2] <=> $b->[2]} @comp_cand) {
+	if ($r->[1] != $last_code) {
+	    $comp->add($last_code,
+		       ($last_offset | ((@comp_data - $last_offset)<<16)))
+		unless $last_code == -1;
+	    $last_code = $r->[1];
+	    $last_offset = @comp_data;
+	}
+	push @comp_data, $r;
+    }
+    $comp->add($last_code,
+	       ($last_offset | ((@comp_data - $last_offset)<<16)));
+}
+
+sub casemap_data {
+    my @data = @_;
+    my @result = ();
+    while (@data > 0) {
+	my $r = shift @data;
+	my $flag = 0;
+	if (ref $r) {
+	    if ($r->[1] eq 'FINAL') {
+		$flag |= $CASEMAP_FINAL_BIT;
+	    } elsif ($r->[1] eq 'NON_FINAL') {
+		$flag |= $CASEMAP_NONFINAL_BIT;
+	    } elsif ($r->[1] ne '') {
+		die "unknown condition \"", $r->[1], "\"\n";
+	    }
+	}
+	$flag |= $CASEMAP_LAST_BIT if @data == 0;
+	push @result, $flag;
+	push @result, (ref $r) ? @{$r->[0]} : $r;
+	$result[-1] |= END_BIT;
+    }
+    @result;
+}
+
+#
+# read_casefolding_file -- read CaseFolding.txt
+#
+sub read_casefolding_file {
+    open FOLD, $casefoldingfile or die "cannto open $casefoldingfile: $!\n";
+
+    # dummy.
+    @folding_data = (0);
+
+    while ($_ = UCD::CaseFolding::getline(\*FOLD)) {
+	my %data = UCD::CaseFolding::parseline($_);
+
+	$folding->add($data{CODE}, scalar(@folding_data));
+	push @folding_data, @{$data{MAP}};
+	$folding_data[-1] |= END_BIT;
+    }
+    close FOLD;
+}
+
+sub print_header {
+    print <<"END";
+/* \$Id\$ */
+/* $myid */
+/*
+ * Do not edit this file!
+ * This file is generated from UnicodeData.txt, CompositionExclusions-1.txt,
+ * SpecialCasing.txt and CaseFolding.txt.
+ */
+
+END
+}
+
+#
+# print_canon_class -- generate data for canonical class
+#
+sub print_canon_class {
+    $canon_class->fix();
+    print STDERR "** cannon_class\n", $canon_class->stat() if $verbose;
+
+    print <<"END";
+
+/*
+ * Canonical Class
+ */
+
+END
+    print_bits("CANON_CLASS", @canon_class_bits);
+    print "\n";
+    print $canon_class->cprog(NAME => "${prefix}canon_class");
+}
+
+#
+# print_composition -- generate data for canonical composition
+#
+sub print_composition {
+    $comp->fix();
+    print STDERR "** composition\n", $comp->stat() if $verbose;
+
+    print <<"END";
+
+/*
+ * Canonical Composition
+ */
+
+END
+    print_bits("CANON_COMPOSE", @comp_bits);
+    print "\n";
+    print $comp->cprog(NAME => "${prefix}compose");
+    print <<"END";
+
+static const struct composition ${prefix}compose_seq[] = {
+END
+    my $i = 0;
+    foreach my $r (@comp_data) {
+	if ($i % 2 == 0) {
+	    print "\n" if $i != 0;
+	    print "\t";
+	}
+	printf "{ 0x%08x, 0x%08x }, ", $r->[2], $r->[0];
+	$i++;
+    }
+    print "\n};\n\n";
+}
+
+#
+# print_decomposition -- generate data for canonical/compatibility
+# decomposition
+#
+sub print_decomposition {
+    $decomp->fix();
+    print STDERR "** decomposition\n", $decomp->stat() if $verbose;
+
+    print <<"END";
+
+/*
+ * Canonical/Compatibility Decomposition
+ */
+
+END
+    print_bits("DECOMP", @decomp_bits);
+    print "#define DECOMP_COMPAT\t$DECOMP_COMPAT_BIT\n\n";
+
+    print $decomp->cprog(NAME => "${prefix}decompose");
+
+    print "static const unsigned long ${prefix}decompose_seq[] = {\n";
+    print_ulseq(@decomp_data);
+    print "};\n\n";
+}
+
+#
+# print_casemap -- generate data for case mapping
+#
+sub print_casemap {
+    $upper->fix();
+    $lower->fix();
+    print STDERR "** upper mapping\n", $upper->stat() if $verbose;
+    print STDERR "** lower mapping\n", $lower->stat() if $verbose;
+
+    print <<"END";
+
+/*
+ * Lowercase <-> Uppercase mapping
+ */
+
+/*
+ * Flags for special case mapping.
+ */
+#define CMF_FINAL	$CASEMAP_FINAL_BIT
+#define CMF_NONFINAL	$CASEMAP_NONFINAL_BIT
+#define CMF_LAST	$CASEMAP_LAST_BIT
+#define CMF_CTXDEP	(CMF_FINAL|CMF_NONFINAL)
+
+END
+    print_bits("CASEMAP", @casemap_bits);
+    print "\n";
+    print $upper->cprog(NAME => "${prefix}toupper");
+    print $lower->cprog(NAME => "${prefix}tolower");
+
+    print "static const unsigned long ${prefix}toupper_seq[] = {\n";
+    print_ulseq(@toupper_data);
+    print "};\n\n";
+
+    print "static const unsigned long ${prefix}tolower_seq[] = {\n";
+    print_ulseq(@tolower_data);
+    print "};\n\n";
+}
+
+#
+# print_casefolding -- generate data for case folding
+#
+sub print_casefolding {
+    $folding->fix();
+    print STDERR "** case folding\n", $folding->stat() if $verbose;
+
+    print <<"END";
+
+/*
+ * Case Folding
+ */
+
+END
+    print_bits("CASE_FOLDING", @folding_bits);
+    print "\n";
+    print $folding->cprog(NAME => "${prefix}case_folding");
+
+    print "static const unsigned long ${prefix}case_folding_seq[] = {\n";
+    print_ulseq(@folding_data);
+    print "};\n\n";
+}
+
+#
+# print_casemap_context -- gerarate data for determining context
+# (final/non-final)
+#
+sub print_casemap_context {
+    $casemap_ctx->fix();
+    print STDERR "** casemap context\n", $casemap_ctx->stat() if $verbose;
+
+    print <<"END";
+
+/*
+ * Cased characters and non-spacing marks (for casemap context)
+ */
+
+END
+
+    print_bits("CASEMAP_CTX", @casemap_ctx_bits);
+    print <<"END";
+
+#define CTX_CASED	$LETTER_BIT
+#define CTX_NSM		$NSPMARK_BIT
+
+END
+    print $casemap_ctx->cprog(NAME => "${prefix}casemap_ctx");
+}
+
+sub sprint_composition_hash {
+    my $i = 0;
+    my $s = '';
+    foreach my $r (@_) {
+	if ($i % 2 == 0) {
+	    $s .= "\n" if $i != 0;
+	    $s .= "\t";
+	}
+	$s .= sprintf "{0x%04x, 0x%04x, 0x%04x}, ", @{$r};
+	$i++;
+    }
+    $s;
+}
+
+sub print_bits {
+    my $prefix = shift;
+    my $i = 0;
+    foreach my $bit (@_) {
+	print "#define ${prefix}_BITS_$i\t$bit\n";
+	$i++;
+    }
+}
+
+sub print_ulseq {
+    my $i = 0;
+    foreach my $v (@_) {
+	if ($i % 4 == 0) {
+	    print "\n" if $i != 0;
+	    print "\t";
+	}
+	printf "0x%08x, ", $v;
+	$i++;
+    }
+    print "\n";
+}