diff options
Diffstat (limited to 'contrib/idn/idnkit-1.0-src/util')
-rw-r--r-- | contrib/idn/idnkit-1.0-src/util/Makefile | 43 | ||||
-rw-r--r-- | contrib/idn/idnkit-1.0-src/util/SparseMap.pm | 575 | ||||
-rw-r--r-- | contrib/idn/idnkit-1.0-src/util/UCD.pm | 194 | ||||
-rwxr-xr-x | contrib/idn/idnkit-1.0-src/util/generate_nameprep_data.pl | 405 | ||||
-rwxr-xr-x | contrib/idn/idnkit-1.0-src/util/generate_normalize_data.pl | 586 |
5 files changed, 1803 insertions, 0 deletions
diff --git a/contrib/idn/idnkit-1.0-src/util/Makefile b/contrib/idn/idnkit-1.0-src/util/Makefile new file mode 100644 index 0000000..2f49ddd --- /dev/null +++ b/contrib/idn/idnkit-1.0-src/util/Makefile @@ -0,0 +1,43 @@ +# $Id: Makefile,v 1.1.1.1 2003/06/04 00:27:52 marka Exp $ + +PERL = /usr/local/bin/perl -w + +V301_NORM_DIR = data/unicode-3.0.0 +V310_NORM_DIR = data/unicode-3.1.0 +V320_NORM_DIR = data/unicode-3.2.0 +NORM_GENERATOR = ./generate_normalize_data.pl + +NAMEPREP_DIR = data/nameprep +NAMEPREP_FILES = \ + $(NAMEPREP_DIR)/nameprep.rfc3491.map \ + $(NAMEPREP_DIR)/nameprep.rfc3491.prohibited \ + $(NAMEPREP_DIR)/nameprep.rfc3491.unassigned \ + $(NAMEPREP_DIR)/nameprep.rfc3491.bidi +NAMEPREP_GENERATOR = ./generate_nameprep_data.pl +NAMEPREP_VERSIONS = rfc3491 + +all: normalize nameprep + +normalize: ../lib/unicodedata_320.c + +nameprep: ../lib/nameprepdata.c + +../lib/unicodedata_301.c: $(NORM_GENERATOR) + -rm -f $@.tmp + $(PERL) $(NORM_GENERATOR) -prefix v301_ -dir $(V301_NORM_DIR) \ + > $@.tmp && mv $@.tmp $@ + +../lib/unicodedata_310.c: $(NORM_GENERATOR) + -rm -f $@.tmp + $(PERL) $(NORM_GENERATOR) -prefix v310_ -dir $(V310_NORM_DIR) \ + > $@.tmp && mv $@.tmp $@ + +../lib/unicodedata_320.c: $(NORM_GENERATOR) + -rm -f $@.tmp + $(PERL) $(NORM_GENERATOR) -prefix v320_ -dir $(V320_NORM_DIR) \ + > $@.tmp && mv $@.tmp $@ + +../lib/nameprepdata.c: $(NAMEPREP_GENERATOR) $(NAMEPREP_FILES) + -rm -f $@.tmp + $(PERL) $(NAMEPREP_GENERATOR) -dir $(NAMEPREP_DIR) \ + $(NAMEPREP_VERSIONS) > $@.tmp && mv $@.tmp $@ diff --git a/contrib/idn/idnkit-1.0-src/util/SparseMap.pm b/contrib/idn/idnkit-1.0-src/util/SparseMap.pm new file mode 100644 index 0000000..834c795 --- /dev/null +++ b/contrib/idn/idnkit-1.0-src/util/SparseMap.pm @@ -0,0 +1,575 @@ +# $Id: SparseMap.pm,v 1.1.1.1 2003/06/04 00:27:53 marka Exp $ +# +# Copyright (c) 2001 Japan Network Information Center. All rights reserved. +# +# By using this file, you agree to the terms and conditions set forth bellow. +# +# LICENSE TERMS AND CONDITIONS +# +# The following License Terms and Conditions apply, unless a different +# license is obtained from Japan Network Information Center ("JPNIC"), +# a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, +# Chiyoda-ku, Tokyo 101-0047, Japan. +# +# 1. Use, Modification and Redistribution (including distribution of any +# modified or derived work) in source and/or binary forms is permitted +# under this License Terms and Conditions. +# +# 2. Redistribution of source code must retain the copyright notices as they +# appear in each source code file, this License Terms and Conditions. +# +# 3. Redistribution in binary form must reproduce the Copyright Notice, +# this License Terms and Conditions, in the documentation and/or other +# materials provided with the distribution. For the purposes of binary +# distribution the "Copyright Notice" refers to the following language: +# "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." +# +# 4. The name of JPNIC may not be used to endorse or promote products +# derived from this Software without specific prior written approval of +# JPNIC. +# +# 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +# ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. +# + +package SparseMap; + +use strict; +use Carp; + +my $debug = 0; + +sub new { + # common options are: + # BITS => [8, 7, 6], # 3-level map, 2nd level bits=7, 3rd = 6. + # MAX => 0x110000 # actually, max + 1. + my $class = shift; + my $self = {@_}; + + croak "BITS unspecified" unless exists $self->{BITS}; + croak "BITS is not an array reference" + unless ref($self->{BITS}) eq 'ARRAY'; + croak "MAX unspecified" unless exists $self->{MAX}; + + $self->{MAXLV} = @{$self->{BITS}} - 1; + $self->{FIXED} = 0; + + my $lv0size = (indices($self, $self->{MAX} - 1))[0] + 1; + + my @map = (undef) x $lv0size; + $self->{MAP} = \@map; + + bless $self, $class; +} + +sub add1 { + my ($self, $n, $val) = @_; + + croak "Already fixed" if $self->{FIXED}; + carp("data ($n) out of range"), return if $n >= $self->{MAX}; + + my @index = $self->indices($n); + my $r = $self->{MAP}; + my $maxlv = $self->{MAXLV}; + my $idx; + my $lv; + + for ($lv = 0; $lv < $maxlv - 1; $lv++) { + $idx = $index[$lv]; + $r->[$idx] = $self->create_imap($lv + 1, undef) + unless defined $r->[$idx]; + $r = $r->[$idx]; + } + $idx = $index[$lv]; + $r->[$idx] = $self->create_dmap() unless defined $r->[$idx]; + $self->add_to_dmap($r->[$idx], $index[$maxlv], $val); +} + +sub fix { + my $self = shift; + my $map = $self->{MAP}; + my $maxlv = $self->{MAXLV}; + my @tmp; + my @zero; + + carp "Already fixed" if $self->{FIXED}; + $self->collapse_tree(); + $self->fill_default(); + $self->{FIXED} = 1; +} + +sub indices { + my $self = shift; + my $v = shift; + my @bits = @{$self->{BITS}}; + my @idx; + + print "indices($v,", join(',', @bits), ") = " if $debug; + for (my $i = @bits - 1; $i >= 0; $i--) { + my $bit = $bits[$i]; + unshift @idx, $v & ((1 << $bit) - 1); + $v = $v >> $bit; + } + print "(", join(',', @idx), ")\n" if $debug; + @idx; +} + +sub get { + my $self = shift; + my $v = shift; + my $map = $self->{MAP}; + my @index = $self->indices($v); + + croak "Not yet fixed" unless $self->{FIXED}; + + my $lastidx = pop @index; + foreach my $idx (@index) { + return $map->{DEFAULT} unless defined $map->[$idx]; + $map = $map->[$idx]; + } + $map->[$lastidx]; +} + +sub indirectmap { + my $self = shift; + + croak "Not yet fixed" unless $self->{FIXED}; + + my @maps = $self->collect_maps(); + my $maxlv = $self->{MAXLV}; + my @bits = @{$self->{BITS}}; + + my @indirect = (); + for (my $lv = 0; $lv < $maxlv; $lv++) { + my $offset; + my $chunksz; + my $mapsz = @{$maps[$lv]->[0]}; + if ($lv < $maxlv - 1) { + # indirect map + $offset = @indirect + @{$maps[$lv]} * @{$maps[$lv]->[0]}; + $chunksz = (1 << $bits[$lv + 1]); + } else { + # direct map + $offset = 0; + $chunksz = 1; + } + my $nextmaps = $maps[$lv + 1]; + foreach my $mapref (@{$maps[$lv]}) { + croak "mapsize inconsistent ", scalar(@$mapref), + " should be ", $mapsz, " (lv $lv)\n" if @$mapref != $mapsz; + foreach my $m (@$mapref) { + my $idx; + for ($idx = 0; $idx < @$nextmaps; $idx++) { + last if $nextmaps->[$idx] == $m; + } + croak "internal error: map corrupted" if $idx >= @$nextmaps; + push @indirect, $offset + $chunksz * $idx; + } + } + } + @indirect; +} + +sub cprog_imap { + my $self = shift; + my %opt = @_; + my $name = $opt{NAME} || 'map'; + my @indirect = $self->indirectmap(); + my $prog; + my $i; + my ($idtype, $idcol, $idwid); + + my $max = 0; + $max < $_ and $max = $_ foreach @indirect; + + if ($max < 256) { + $idtype = 'char'; + $idcol = 8; + $idwid = 3; + } elsif ($max < 65536) { + $idtype = 'short'; + $idcol = 8; + $idwid = 5; + } else { + $idtype = 'long'; + $idcol = 4; + $idwid = 10; + } + $prog = "static const unsigned $idtype ${name}_imap[] = {\n"; + $i = 0; + foreach my $v (@indirect) { + if ($i % $idcol == 0) { + $prog .= "\n" if $i != 0; + $prog .= "\t"; + } + $prog .= sprintf "%${idwid}d, ", $v; + $i++; + } + $prog .= "\n};\n"; + $prog; +} + +sub cprog { + my $self = shift; + $self->cprog_imap(@_) . "\n" . $self->cprog_dmap(@_); +} + +sub stat { + my $self = shift; + my @maps = $self->collect_maps(); + my $elsize = $self->{ELSIZE}; + my $i; + my $total = 0; + my @lines; + + for ($i = 0; $i < $self->{MAXLV}; $i++) { + my $nmaps = @{$maps[$i]}; + my $mapsz = @{$maps[$i]->[0]}; + push @lines, "level $i: $nmaps maps (size $mapsz) "; + push @lines, "[", $nmaps * $mapsz * $elsize, "]" if $elsize; + push @lines, "\n"; + } + my $ndmaps = @{$maps[$i]}; + push @lines, "level $i: $ndmaps dmaps"; + my $r = $maps[$i]->[0]; + if (ref($r) eq 'ARRAY') { + push @lines, " (size ", scalar(@$r), ")"; + } + push @lines, "\n"; + join '', @lines; +} + +sub collapse_tree { + my $self = shift; + my @tmp; + + $self->_collapse_tree_rec($self->{MAP}, 0, \@tmp); +} + +sub _collapse_tree_rec { + my ($self, $r, $lv, $refs) = @_; + my $ref = $refs->[$lv]; + my $maxlv = $self->{MAXLV}; + my $found; + + return $r unless defined $r; + + $ref = $refs->[$lv] = [] unless defined $ref; + + if ($lv == $maxlv) { + $found = $self->find_dmap($ref, $r); + } else { + for (my $i = 0; $i < @$r; $i++) { + $r->[$i] = $self->_collapse_tree_rec($r->[$i], $lv + 1, $refs); + } + $found = $self->find_imap($ref, $r); + } + unless ($found) { + $found = $r; + push @$ref, $found; + } + return $found; +} + +sub fill_default { + my $self = shift; + my $maxlv = $self->{MAXLV}; + my $bits = $self->{BITS}; + my @zeros; + + $zeros[$maxlv] = $self->create_dmap(); + for (my $lv = $maxlv - 1; $lv >= 0; $lv--) { + my $r = $zeros[$lv + 1]; + $zeros[$lv] = $self->create_imap($lv, $r); + } + _fill_default_rec($self->{MAP}, 0, $maxlv, \@zeros); +} + +sub _fill_default_rec { + my ($r, $lv, $maxlv, $zeros) = @_; + + return if $lv == $maxlv; + for (my $i = 0; $i < @$r; $i++) { + if (defined($r->[$i])) { + _fill_default_rec($r->[$i], $lv + 1, $maxlv, $zeros); + } else { + $r->[$i] = $zeros->[$lv + 1]; + } + } +} + +sub create_imap { + my ($self, $lv, $v) = @_; + my @map; + @map = ($v) x (1 << $self->{BITS}->[$lv]); + \@map; +} + +sub find_imap { + my ($self, $maps, $map) = @_; + my $i; + + foreach my $el (@$maps) { + next unless @$el == @$map; + for ($i = 0; $i < @$el; $i++) { + last unless ($el->[$i] || 0) == ($map->[$i] || 0); + } + return $el if $i >= @$el; + } + undef; +} + +sub collect_maps { + my $self = shift; + my @maps; + _collect_maps_rec($self->{MAP}, 0, $self->{MAXLV}, \@maps); + @maps; +} + +sub _collect_maps_rec { + my ($r, $lv, $maxlv, $maps) = @_; + my $mapref = $maps->[$lv]; + + return unless defined $r; + foreach my $ref (@{$mapref}) { + return if $ref == $r; + } + push @{$maps->[$lv]}, $r; + if ($lv < $maxlv) { + _collect_maps_rec($_, $lv + 1, $maxlv, $maps) foreach @{$r}; + } +} + +sub add {confess "Subclass responsibility";} +sub create_dmap {confess "Subclass responsibility";} +sub add_to_dmap {confess "Subclass responsibility";} +sub find_dmap {confess "Subclass responsibility";} +sub cprog_dmap {confess "Subclass responsibility";} + +1; + +package SparseMap::Bit; + +use strict; +use vars qw(@ISA); +use Carp; +#use SparseMap; + +@ISA = qw(SparseMap); + +sub new { + my $class = shift; + my $self = $class->SUPER::new(@_); + $self->{DEFAULT} = 0; + bless $self, $class; +} + +sub add { + my $self = shift; + + $self->add1($_, undef) foreach @_; +} + +sub create_dmap { + my $self = shift; + my $bmbits = $self->{BITS}->[-1]; + + my $s = "\0" x (1 << ($bmbits - 3)); + \$s; +} + +sub add_to_dmap { + my ($self, $map, $idx, $val) = @_; + vec($$map, $idx, 1) = 1; +} + +sub find_dmap { + my ($self, $ref, $r) = @_; + foreach my $map (@$ref) { + return $map if $$map eq $$r; + } + return undef; +} + +sub cprog_dmap { + my $self = shift; + my %opt = @_; + my $name = $opt{NAME} || 'map'; + my @maps = $self->collect_maps(); + my @bitmap = @{$maps[-1]}; + my $prog; + my $bmsize = 1 << ($self->{BITS}->[-1] - 3); + + $prog = <<"END"; +static const struct { + unsigned char bm[$bmsize]; +} ${name}_bitmap[] = { +END + + foreach my $bm (@bitmap) { + my $i = 0; + $prog .= "\t{{\n"; + foreach my $v (unpack 'C*', $$bm) { + if ($i % 16 == 0) { + $prog .= "\n" if $i != 0; + $prog .= "\t"; + } + $prog .= sprintf "%3d,", $v; + $i++; + } + $prog .= "\n\t}},\n"; + } + $prog .= "};\n"; + $prog; +} + +1; + +package SparseMap::Int; + +use strict; +use vars qw(@ISA); +use Carp; +#use SparseMap; + +@ISA = qw(SparseMap); + +sub new { + my $class = shift; + my $self = $class->SUPER::new(@_); + $self->{DEFAULT} = 0 unless exists $self->{DEFAULT}; + bless $self, $class; +} + +sub add { + my $self = shift; + while (@_ > 0) { + my $n = shift; + my $val = shift; + $self->add1($n, $val); + } +} + +sub create_dmap { + my $self = shift; + my $tblbits = $self->{BITS}->[-1]; + my $default = $self->{DEFAULT}; + + my @tbl = ($default) x (1 << $tblbits); + \@tbl; +} + +sub add_to_dmap { + my ($self, $map, $idx, $val) = @_; + $map->[$idx] = $val; +} + +sub find_dmap { + my ($self, $ref, $r) = @_; + foreach my $map (@$ref) { + if (@$map == @$r) { + my $i; + for ($i = 0; $i < @$map; $i++) { + last if $map->[$i] != $r->[$i]; + } + return $map if $i == @$map; + } + } + return undef; +} + +sub cprog_dmap { + my $self = shift; + my %opt = @_; + my $name = $opt{NAME} || 'map'; + my @maps = $self->collect_maps(); + my @table = @{$maps[-1]}; + my $prog; + my $i; + my ($idtype, $idcol, $idwid); + my $tblsize = 1 << $self->{BITS}->[-1]; + + my ($min, $max); + foreach my $a (@table) { + foreach my $v (@$a) { + $min = $v if !defined($min) or $min > $v; + $max = $v if !defined($max) or $max < $v; + } + } + if (exists $opt{MAPTYPE}) { + $idtype = $opt{MAPTYPE}; + } else { + my $u = $min < 0 ? '' : 'unsigned '; + my $absmax = abs($max); + $absmax = abs($min) if abs($min) > $absmax; + + if ($absmax < 256) { + $idtype = "${u}char"; + } elsif ($absmax < 65536) { + $idtype = "${u}short"; + } else { + $idtype = "${u}long"; + } + } + + $idwid = decimalwidth($max); + $idwid = decimalwidth($min) if decimalwidth($min) > $idwid; + + $prog = <<"END"; +static const struct { + $idtype tbl[$tblsize]; +} ${name}_table[] = { +END + + foreach my $a (@table) { + my $i = 0; + my $col = 0; + $prog .= "\t{{\n\t"; + foreach my $v (@$a) { + my $s = sprintf "%${idwid}d, ", $v; + $col += length($s); + if ($col > 70) { + $prog .= "\n\t"; + $col = length($s); + } + $prog .= $s; + } + $prog .= "\n\t}},\n"; + } + $prog .= "};\n"; + $prog; +} + +sub decimalwidth { + my $n = shift; + my $neg = 0; + my $w; + + if ($n < 0) { + $neg = 1; + $n = -$n; + } + if ($n < 100) { + $w = 2; + } elsif ($n < 10000) { + $w = 4; + } elsif ($n < 1000000) { + $w = 6; + } elsif ($n < 100000000) { + $w = 8; + } else { + $w = 10; + } + $w + $neg; +} + +1; diff --git a/contrib/idn/idnkit-1.0-src/util/UCD.pm b/contrib/idn/idnkit-1.0-src/util/UCD.pm new file mode 100644 index 0000000..19629e6 --- /dev/null +++ b/contrib/idn/idnkit-1.0-src/util/UCD.pm @@ -0,0 +1,194 @@ +# $Id: UCD.pm,v 1.1.1.1 2003/06/04 00:27:53 marka Exp $ +# +# Copyright (c) 2000,2001 Japan Network Information Center. +# All rights reserved. +# +# By using this file, you agree to the terms and conditions set forth bellow. +# +# LICENSE TERMS AND CONDITIONS +# +# The following License Terms and Conditions apply, unless a different +# license is obtained from Japan Network Information Center ("JPNIC"), +# a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, +# Chiyoda-ku, Tokyo 101-0047, Japan. +# +# 1. Use, Modification and Redistribution (including distribution of any +# modified or derived work) in source and/or binary forms is permitted +# under this License Terms and Conditions. +# +# 2. Redistribution of source code must retain the copyright notices as they +# appear in each source code file, this License Terms and Conditions. +# +# 3. Redistribution in binary form must reproduce the Copyright Notice, +# this License Terms and Conditions, in the documentation and/or other +# materials provided with the distribution. For the purposes of binary +# distribution the "Copyright Notice" refers to the following language: +# "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." +# +# 4. The name of JPNIC may not be used to endorse or promote products +# derived from this Software without specific prior written approval of +# JPNIC. +# +# 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +# ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. +# + +package UCD; + +# +# UCD.pm -- parser for Unicode Character Database files. +# +# This file is an aggregation of the following modules, each of which +# provides a parser for a specific data file of UCD. +# UCD::UnicodeData -- for UnicodeData.txt +# UCD::CaseFolding -- for CaseFolding.txt +# UCD::SpecialCasing -- for SpecialCasing.txt +# UCD::CompositionExclusions -- for CompositionExclusions-1.txt +# +# Each module provides two subroutines: +# +# $line = getline(\*HANDLE); +# reads next non-comment line from HANDLE, and returns it. +# undef will be returned upon EOF. +# +# %fields = parse($line); +# parses a line and extract fields, and returns a list of +# field name and its value, suitable for assignment to a hash. +# + +package UCD::UnicodeData; + +use strict; +use Carp; + +sub getline { + my $fh = shift; + my $s = <$fh>; + $s =~ s/\r?\n$// if $s; + $s; +} + +sub parseline { + my $s = shift; + + my @f = split /;/, $s, -1; + return (CODE => hex($f[0]), + NAME => $f[1], + CATEGORY => $f[2], + CLASS => $f[3]+0, + BIDI => $f[4], + DECOMP => dcmap($f[5]), + DECIMAL => dvalue($f[6]), + DIGIT => dvalue($f[7]), + NUMERIC => dvalue($f[8]), + MIRRORED => $f[9] eq 'Y', + NAME10 => $f[10], + COMMENT => $f[11], + UPPER => ucode($f[12]), + LOWER => ucode($f[13]), + TITLE => ucode($f[14])); +} + +sub dcmap { + my $v = shift; + return undef if $v eq ''; + $v =~ /^(?:(<[^>]+>)\s*)?(\S.*)/ + or croak "invalid decomposition mapping \"$v\""; + my $tag = $1 || ''; + [$tag, map {hex($_)} split(' ', $2)]; +} + +sub ucode { + my $v = shift; + return undef if $v eq ''; + hex($v); +} + +sub dvalue { + my $v = shift; + return undef if $v eq ''; + $v; +} + +package UCD::CaseFolding; + +use strict; + +sub getline { + my $fh = shift; + while (defined(my $s = <$fh>)) { + next if $s =~ /^\#/; + next if $s =~ /^\s*$/; + $s =~ s/\r?\n$//; + return $s; + } + undef; +} + +sub parseline { + my $s = shift; + my @f = split /;\s*/, $s, -1; + return (CODE => hex($f[0]), + TYPE => $f[1], + MAP => [map(hex, split ' ', $f[2])], + ); +} + +package UCD::SpecialCasing; + +use strict; + +sub getline { + my $fh = shift; + while (defined(my $s = <$fh>)) { + next if $s =~ /^\#/; + next if $s =~ /^\s*$/; + $s =~ s/\r?\n$//; + return $s; + } + undef; +} + +sub parseline { + my $s = shift; + + my @f = split /;\s*/, $s, -1; + my $cond = (@f > 5) ? $f[4] : undef; + return (CODE => hex($f[0]), + LOWER => [map(hex, split ' ', $f[1])], + TITLE => [map(hex, split ' ', $f[2])], + UPPER => [map(hex, split ' ', $f[3])], + CONDITION => $cond); +} + +package UCD::CompositionExclusions; + +use strict; + +sub getline { + my $fh = shift; + while (defined(my $s = <$fh>)) { + next if $s =~ /^\#/; + next if $s =~ /^\s*$/; + $s =~ s/\r?\n$//; + return $s; + } + undef; +} + +sub parseline { + my $s = shift; + m/^[0-9A-Fa-f]+/; + return (CODE => hex($&)); +} + +1; diff --git a/contrib/idn/idnkit-1.0-src/util/generate_nameprep_data.pl b/contrib/idn/idnkit-1.0-src/util/generate_nameprep_data.pl new file mode 100755 index 0000000..31dd18b --- /dev/null +++ b/contrib/idn/idnkit-1.0-src/util/generate_nameprep_data.pl @@ -0,0 +1,405 @@ +#! /usr/local/bin/perl -w +# $Id: generate_nameprep_data.pl,v 1.1.1.1 2003/06/04 00:27:54 marka Exp $ +# +# Copyright (c) 2001 Japan Network Information Center. All rights reserved. +# +# By using this file, you agree to the terms and conditions set forth bellow. +# +# LICENSE TERMS AND CONDITIONS +# +# The following License Terms and Conditions apply, unless a different +# license is obtained from Japan Network Information Center ("JPNIC"), +# a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, +# Chiyoda-ku, Tokyo 101-0047, Japan. +# +# 1. Use, Modification and Redistribution (including distribution of any +# modified or derived work) in source and/or binary forms is permitted +# under this License Terms and Conditions. +# +# 2. Redistribution of source code must retain the copyright notices as they +# appear in each source code file, this License Terms and Conditions. +# +# 3. Redistribution in binary form must reproduce the Copyright Notice, +# this License Terms and Conditions, in the documentation and/or other +# materials provided with the distribution. For the purposes of binary +# distribution the "Copyright Notice" refers to the following language: +# "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." +# +# 4. The name of JPNIC may not be used to endorse or promote products +# derived from this Software without specific prior written approval of +# JPNIC. +# +# 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +# ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. +# + +use v5.6.0; # for pack('U') +use bytes; + +use lib qw(.); + +use SparseMap; +use Getopt::Long; + +(my $myid = '$Id: generate_nameprep_data.pl,v 1.1.1.1 2003/06/04 00:27:54 marka Exp $') =~ s/\$([^\$]+)\$/\$-$1-\$/; + +my @map_bits = (9, 7, 5); +my @proh_bits = (7, 7, 7); +my @unas_bits = (7, 7, 7); +my @bidi_bits = (9, 7, 5); + +my @bidi_types = ('OTHERS', 'R_AL', 'L'); + +my $dir = '.'; +my @versions = (); + +GetOptions('dir=s', \$dir) or die usage(); +@versions = @ARGV; + +print_header(); + +bits_definition("MAP", @map_bits); +bits_definition("PROH", @proh_bits); +bits_definition("UNAS", @unas_bits); +bits_definition("BIDI", @bidi_bits); + +generate_data($_) foreach @ARGV; + +sub usage { + die "Usage: $0 [-dir dir] version..\n"; +} + +sub generate_data { + my $version = shift; + generate_mapdata($version, "$dir/nameprep.$version.map"); + generate_prohibiteddata($version, "$dir/nameprep.$version.prohibited"); + generate_unassigneddata($version, "$dir/nameprep.$version.unassigned"); + generate_bididata($version, "$dir/nameprep.$version.bidi"); +} + +# +# Generate mapping data. +# +sub generate_mapdata { + my $version = shift; + my $file = shift; + + my $map = SparseMap::Int->new(BITS => [@map_bits], + MAX => 0x110000, + MAPALL => 1, + DEFAULT => 0); + open FILE, $file or die "cannot open $file: $!\n"; + + my $mapbuf = "\0"; # dummy + my %maphash = (); + while (<FILE>) { + if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) { + my $same_as = $1; + if (grep {$_ eq $same_as} @versions > 0) { + generate_map_ref($version, $same_as); + close FILE; + return; + } + next; + } + next if /^\#/; + next if /^\s*$/; + register_map($map, \$mapbuf, \%maphash, $_); + } + close FILE; + generate_map($version, $map, \$mapbuf); +} + +# +# Generate prohibited character data. +# +sub generate_prohibiteddata { + my $version = shift; + my $file = shift; + + my $proh = SparseMap::Bit->new(BITS => [@proh_bits], + MAX => 0x110000); + open FILE, $file or die "cannot open $file: $!\n"; + while (<FILE>) { + if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) { + my $same_as = $1; + if (grep {$_ eq $same_as} @versions > 0) { + generate_prohibited_ref($version, $same_as); + close FILE; + return; + } + next; + } + next if /^\#/; + next if /^\s*$/; + register_prohibited($proh, $_); + } + close FILE; + generate_prohibited($version, $proh); +} + +# +# Generate unassigned codepoint data. +# +sub generate_unassigneddata { + my $version = shift; + my $file = shift; + + my $unas = SparseMap::Bit->new(BITS => [@unas_bits], + MAX => 0x110000); + open FILE, $file or die "cannot open $file: $!\n"; + while (<FILE>) { + if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) { + my $same_as = $1; + if (grep {$_ eq $same_as} @versions > 0) { + generate_unassigned_ref($version, $same_as); + close FILE; + return; + } + next; + } + next if /^\#/; + next if /^\s*$/; + register_unassigned($unas, $_); + } + close FILE; + generate_unassigned($version, $unas); +} + +# +# Generate data of bidi "R" or "AL" characters. +# +sub generate_bididata { + my $version = shift; + my $file = shift; + + my $bidi = SparseMap::Int->new(BITS => [@bidi_bits], + MAX => 0x110000); + open FILE, $file or die "cannot open $file: $!\n"; + + my $type = 0; + while (<FILE>) { + if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) { + my $same_as = $1; + if (grep {$_ eq $same_as} @versions > 0) { + generate_unassigned_ref($version, $same_as); + close FILE; + return; + } + next; + } + if (/^%\s*BIDI_TYPE\s+(\S+)$/) { + my $i = 0; + for ($i = 0; $i < @bidi_types; $i++) { + if ($1 eq $bidi_types[$i]) { + $type = $i; + last; + } + } + die "unrecognized line: $_" if ($i >= @bidi_types); + next; + } + next if /^\#/; + next if /^\s*$/; + register_bidi($bidi, $type, $_); + } + close FILE; + + generate_bidi($version, $bidi); +} + +sub print_header { + print <<"END"; +/* \$Id\$ */ +/* $myid */ +/* + * Do not edit this file! + * This file is generated from NAMEPREP specification. + */ + +END +} + +sub bits_definition { + my $name = shift; + my @bits = @_; + my $i = 0; + + foreach my $n (@bits) { + print "#define ${name}_BITS_$i\t$n\n"; + $i++; + } + print "\n"; +} + +sub register_map { + my ($map, $bufref, $hashref, $line) = @_; + + my ($from, $to) = split /;/, $line; + my @fcode = map {hex($_)} split ' ', $from; + my @tcode = map {hex($_)} split ' ', $to; + + my $ucs4 = pack('V*', @tcode); + $ucs4 =~ s/\000+$//; + + my $offset; + if (exists $hashref->{$ucs4}) { + $offset = $hashref->{$ucs4}; + } else { + $offset = length $$bufref; + $$bufref .= pack('C', length($ucs4)) . $ucs4; + $hashref->{$ucs4} = $offset; + } + + die "unrecognized line: $line" if @fcode != 1; + $map->add($fcode[0], $offset); +} + +sub generate_map { + my ($version, $map, $bufref) = @_; + + $map->fix(); + + print $map->cprog(NAME => "nameprep_${version}_map"); + print "\nstatic const unsigned char nameprep_${version}_map_data[] = \{\n"; + print_uchararray($$bufref); + print "};\n\n"; +} + +sub generate_map_ref { + my ($version, $refversion) = @_; + print <<"END"; +#define nameprep_${version}_map_imap nameprep_${refversion}_map_imap +#define nameprep_${version}_map_table nameprep_${refversion}_map_table +#define nameprep_${version}_map_data nameprep_${refversion}_map_data + +END +} + +sub print_uchararray { + my @chars = unpack 'C*', $_[0]; + my $i = 0; + foreach my $v (@chars) { + if ($i % 12 == 0) { + print "\n" if $i != 0; + print "\t"; + } + printf "%3d, ", $v; + $i++; + } + print "\n"; +} + +sub register_prohibited { + my $proh = shift; + register_bitmap($proh, @_); +} + +sub register_unassigned { + my $unas = shift; + register_bitmap($unas, @_); +} + +sub register_bidi { + my $bidi = shift; + my $type = shift; + register_intmap($bidi, $type, @_); +} + +sub generate_prohibited { + my ($version, $proh) = @_; + generate_bitmap($proh, "nameprep_${version}_prohibited"); + print "\n"; +} + +sub generate_prohibited_ref { + my ($version, $refversion) = @_; + print <<"END"; +#define nameprep_${version}_prohibited_imap nameprep_${refversion}_prohibited_imap +#define nameprep_${version}_prohibited_bitmap nameprep_${refversion}_prohibited_bitmap + +END +} + +sub generate_unassigned { + my ($version, $unas) = @_; + generate_bitmap($unas, "nameprep_${version}_unassigned"); + print "\n"; +} + +sub generate_unassigned_ref { + my ($version, $refversion) = @_; + print <<"END"; +#define nameprep_${version}_unassigned_imap nameprep_${refversion}_unassigned_imap +#define nameprep_${version}_unassigned_bitmap nameprep_${refversion}_unassigned_bitmap + +END +} + +sub generate_bidi { + my ($version, $bidi) = @_; + + $bidi->fix(); + + print $bidi->cprog(NAME => "nameprep_${version}_bidi"); + print "\n"; + print "static const unsigned char nameprep_${version}_bidi_data[] = \{\n"; + + foreach my $type (@bidi_types) { + printf "\tidn_biditype_%s, \n", lc($type); + } + print "};\n\n"; +} + +sub generate_bidi_ref { + my ($version, $refversion) = @_; + print <<"END"; +#define nameprep_${version}_bidi_imap nameprep_${refversion}_bidi_imap +#define nameprep_${version}_bidi_table nameprep_${refversion}_bidi_table + +END +} + +sub register_bitmap { + my $map = shift; + my $line = shift; + + /^([0-9A-Fa-f]+)(?:-([0-9A-Fa-f]+))?/ or die "unrecognized line: $line"; + my $start = hex($1); + my $end = defined($2) ? hex($2) : undef; + if (defined $end) { + $map->add($start .. $end); + } else { + $map->add($start); + } +} + +sub register_intmap { + my $map = shift; + my $value = shift; + my $line = shift; + + /^([0-9A-Fa-f]+)(?:-([0-9A-Fa-f]+))?/ or die "unrecognized line: $line"; + my $start = hex($1); + my $end = defined($2) ? hex($2) : $start; + for (my $i = $start; $i <= $end; $i++) { + $map->add($i, $value); + } +} + +sub generate_bitmap { + my $map = shift; + my $name = shift; + $map->fix(); + #$map->stat(); + print $map->cprog(NAME => $name); +} diff --git a/contrib/idn/idnkit-1.0-src/util/generate_normalize_data.pl b/contrib/idn/idnkit-1.0-src/util/generate_normalize_data.pl new file mode 100755 index 0000000..fe81648 --- /dev/null +++ b/contrib/idn/idnkit-1.0-src/util/generate_normalize_data.pl @@ -0,0 +1,586 @@ +#! /usr/local/bin/perl -w +# $Id: generate_normalize_data.pl,v 1.1.1.1 2003/06/04 00:27:55 marka Exp $ +# +# Copyright (c) 2000,2001 Japan Network Information Center. +# All rights reserved. +# +# By using this file, you agree to the terms and conditions set forth bellow. +# +# LICENSE TERMS AND CONDITIONS +# +# The following License Terms and Conditions apply, unless a different +# license is obtained from Japan Network Information Center ("JPNIC"), +# a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, +# Chiyoda-ku, Tokyo 101-0047, Japan. +# +# 1. Use, Modification and Redistribution (including distribution of any +# modified or derived work) in source and/or binary forms is permitted +# under this License Terms and Conditions. +# +# 2. Redistribution of source code must retain the copyright notices as they +# appear in each source code file, this License Terms and Conditions. +# +# 3. Redistribution in binary form must reproduce the Copyright Notice, +# this License Terms and Conditions, in the documentation and/or other +# materials provided with the distribution. For the purposes of binary +# distribution the "Copyright Notice" refers to the following language: +# "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." +# +# 4. The name of JPNIC may not be used to endorse or promote products +# derived from this Software without specific prior written approval of +# JPNIC. +# +# 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +# ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. +# + +# +# Generate lib/unicodedata.c from UnicodeData.txt, +# CompositionExclusions-1.txt, SpecialCasing.txt and CaseFolding.txt, +# all of them available from ftp://ftp.unicode.org/Public/UNIDATA/. +# + +use strict; +use lib qw(.); + +use Getopt::Long; +use UCD; +use SparseMap; + +use constant UCS_MAX => 0x110000; +use constant END_BIT => 0x80000000; + +my $DECOMP_COMPAT_BIT = 0x8000; + +my $CASEMAP_FINAL_BIT = 0x1; +my $CASEMAP_NONFINAL_BIT = 0x2; +my $CASEMAP_LAST_BIT = 0x10; + +my $LETTER_BIT = 1; +my $NSPMARK_BIT = 2; + +(my $myid = '$Id: generate_normalize_data.pl,v 1.1.1.1 2003/06/04 00:27:55 marka Exp $') =~ s/\$([^\$]+)\$/\$-$1-\$/; + +my @default_bits = (9, 7, 5); +#my @default_bits = (7, 7, 7); +my @canon_class_bits = @default_bits; +my @decomp_bits = @default_bits; +my @comp_bits = @default_bits; +my @folding_bits = @default_bits; +my @casemap_bits = @default_bits; +my @casemap_ctx_bits = @default_bits; + +my $prefix = ''; +my $dir = '.'; +my $unicodedatafile = 'UnicodeData.txt'; +my $exclusionfile = 'CompositionExclusions.txt'; +my $specialcasefile = 'SpecialCasing.txt'; +my $casefoldingfile = 'CaseFolding.txt'; +my $verbose; + +GetOptions('dir|d=s' => \$dir, + 'unicodedata|u=s' => \$unicodedatafile, + 'exclude|e=s' => \$exclusionfile, + 'specialcase|s=s' => \$specialcasefile, + 'casefold|c=s' => \$casefoldingfile, + 'prefix|p=s' => \$prefix, + 'verbose|v' => \$verbose, +) or usage(); + +foreach my $r (\$unicodedatafile, \$exclusionfile, + \$specialcasefile, \$casefoldingfile) { + $$r = "$dir/$$r" unless $$r =~ m|^/|; +} + +my %exclusions; +my %lower_special; +my %upper_special; + +my @decomp_data; +my @comp_data; +my @toupper_data; +my @tolower_data; +my @folding_data; + +# +# Create Mapping/Bitmap objects. +# + +# canonical class +my $canon_class = SparseMap::Int->new(BITS => [@canon_class_bits], + MAX => UCS_MAX, + MAPALL => 1, + DEFAULT => 0); + +# canonical/compatibility decomposition +my $decomp = SparseMap::Int->new(BITS => [@decomp_bits], + MAX => UCS_MAX, + MAPALL => 1, + DEFAULT => 0); + +# canonical composition +my $comp = SparseMap::Int->new(BITS => [@comp_bits], + MAX => UCS_MAX, + MAPALL => 1, + DEFAULT => 0); + +# uppercase/lowercase +my $upper = SparseMap::Int->new(BITS => [@casemap_bits], + MAX => UCS_MAX, + MAPALL => 1, + DEFAULT => 0); +my $lower = SparseMap::Int->new(BITS => [@casemap_bits], + MAX => UCS_MAX, + MAPALL => 1, + DEFAULT => 0); + +# final/nonfinal context +my $casemap_ctx = SparseMap::Int->new(BITS => [@casemap_ctx_bits], + MAX => UCS_MAX, + MAPALL => 1, + DEFAULT => 0); + +# casefolding +my $folding = SparseMap::Int->new(BITS => [@folding_bits], + MAX => UCS_MAX, + MAPALL => 1, + DEFAULT => 0); + +# +# Read datafiles. +# + +read_exclusion_file(); +read_specialcasing_file(); +read_unicodedata_file(); +read_casefolding_file(); + +print_header(); +print_canon_class(); +print_composition(); +print_decomposition(); +print_casemap(); +print_casemap_context(); +print_casefolding(); + +exit; + +sub usage { + print STDERR <<"END"; +Usage: $0 [options..] + options: + -d DIR directory where Unicode Character Data files resides [./] + -u FILE name of the UnicodeData file [UnicodeData.txt] + -e FILE name of the CompositionExclusion file [CompositionExclusions-1.txt] + -s FILE name of the SpecialCasing file [SpecialCasing.txt] + -c FILE name of the CaseFolding file [CaseFolding.txt] +END + exit 1; +} + +# +# read_exclusion_file -- read CompositionExclusions-1.txt. +# +sub read_exclusion_file { + open EXCLUDE, $exclusionfile or die "cannot open $exclusionfile: $!\n"; + while ($_ = UCD::CompositionExclusions::getline(\*EXCLUDE)) { + my %data = UCD::CompositionExclusions::parseline($_); + $exclusions{$data{CODE}} = 1; + } + close EXCLUDE; +} + +# +# read_specialcasing_file -- read SpecialCasing.txt +# +sub read_specialcasing_file { + open SPCASE, $specialcasefile or die "cannot open $specialcasefile: $!\n"; + while ($_ = UCD::SpecialCasing::getline(\*SPCASE)) { + my %data = UCD::SpecialCasing::parseline($_); + my $code = $data{CODE}; + my $lower = $data{LOWER}; + my $upper = $data{UPPER}; + my $cond = $data{CONDITION} || ''; + + next unless $cond eq '' or $cond =~ /^(NON_)?FINAL/; + + if (defined $cond && (@$lower > 1 || $lower->[0] != $code) + or @$lower > 1 or $lower->[0] != $code) { + $lower_special{$code} = [$lower, $cond]; + } + if (defined $cond && (@$upper > 1 || $upper->[0] != $code) + or @$upper > 1 or $upper->[0] != $code) { + $upper_special{$code} = [$upper, $cond]; + } + } + close SPCASE; +} + +# +# read_unicodedata_file -- read UnicodeData.txt +# +sub read_unicodedata_file { + open UCD, $unicodedatafile or die "cannot open $unicodedatafile: $!\n"; + + @decomp_data = (0); + @toupper_data = (0); + @tolower_data = (0); + + my @comp_cand; # canonical composition candidates + my %nonstarter; + + while ($_ = UCD::UnicodeData::getline(\*UCD)) { + my %data = UCD::UnicodeData::parseline($_); + my $code = $data{CODE}; + + # combining class + if ($data{CLASS} > 0) { + $nonstarter{$code} = 1; + $canon_class->add($code, $data{CLASS}); + } + + # uppercasing + if (exists $upper_special{$code} or defined $data{UPPER}) { + my $offset = @toupper_data; + my @casedata; + + $upper->add($code, $offset); + if (exists $upper_special{$code}) { + push @casedata, $upper_special{$code}; + } + if (defined $data{UPPER}) { + push @casedata, $data{UPPER}; + } + push @toupper_data, casemap_data(@casedata); + } + + # lowercasing + if (exists $lower_special{$code} or defined $data{LOWER}) { + my $offset = @tolower_data; + my @casedata; + + $lower->add($code, $offset); + if (exists $lower_special{$code}) { + push @casedata, $lower_special{$code}; + } + if (defined $data{LOWER}) { + push @casedata, $data{LOWER}; + } + push @tolower_data, casemap_data(@casedata); + } + + # composition/decomposition + if ($data{DECOMP}) { + my ($tag, @decomp) = @{$data{DECOMP}}; + my $offset = @decomp_data; + + # composition + if ($tag eq '' and @decomp > 1 and not exists $exclusions{$code}) { + # canonical composition candidate + push @comp_cand, [$code, @decomp]; + } + + # decomposition + if ($tag ne '') { + # compatibility decomposition + $offset |= $DECOMP_COMPAT_BIT; + } + $decomp->add($code, $offset); + push @decomp_data, @decomp; + $decomp_data[-1] |= END_BIT; + + } + + # final/nonfinal context + if ($data{CATEGORY} =~ /L[ult]/) { + $casemap_ctx->add($code, $LETTER_BIT); + } elsif ($data{CATEGORY} eq 'Mn') { + $casemap_ctx->add($code, $NSPMARK_BIT); + } + } + close UCD; + + # Eliminate composition candidates whose decomposition starts with + # a non-starter. + @comp_cand = grep {not exists $nonstarter{$_->[1]}} @comp_cand; + + @comp_data = ([0, 0, 0]); + my $last_code = -1; + my $last_offset = @comp_data; + for my $r (sort {$a->[1] <=> $b->[1] || $a->[2] <=> $b->[2]} @comp_cand) { + if ($r->[1] != $last_code) { + $comp->add($last_code, + ($last_offset | ((@comp_data - $last_offset)<<16))) + unless $last_code == -1; + $last_code = $r->[1]; + $last_offset = @comp_data; + } + push @comp_data, $r; + } + $comp->add($last_code, + ($last_offset | ((@comp_data - $last_offset)<<16))); +} + +sub casemap_data { + my @data = @_; + my @result = (); + while (@data > 0) { + my $r = shift @data; + my $flag = 0; + if (ref $r) { + if ($r->[1] eq 'FINAL') { + $flag |= $CASEMAP_FINAL_BIT; + } elsif ($r->[1] eq 'NON_FINAL') { + $flag |= $CASEMAP_NONFINAL_BIT; + } elsif ($r->[1] ne '') { + die "unknown condition \"", $r->[1], "\"\n"; + } + } + $flag |= $CASEMAP_LAST_BIT if @data == 0; + push @result, $flag; + push @result, (ref $r) ? @{$r->[0]} : $r; + $result[-1] |= END_BIT; + } + @result; +} + +# +# read_casefolding_file -- read CaseFolding.txt +# +sub read_casefolding_file { + open FOLD, $casefoldingfile or die "cannto open $casefoldingfile: $!\n"; + + # dummy. + @folding_data = (0); + + while ($_ = UCD::CaseFolding::getline(\*FOLD)) { + my %data = UCD::CaseFolding::parseline($_); + + $folding->add($data{CODE}, scalar(@folding_data)); + push @folding_data, @{$data{MAP}}; + $folding_data[-1] |= END_BIT; + } + close FOLD; +} + +sub print_header { + print <<"END"; +/* \$Id\$ */ +/* $myid */ +/* + * Do not edit this file! + * This file is generated from UnicodeData.txt, CompositionExclusions-1.txt, + * SpecialCasing.txt and CaseFolding.txt. + */ + +END +} + +# +# print_canon_class -- generate data for canonical class +# +sub print_canon_class { + $canon_class->fix(); + print STDERR "** cannon_class\n", $canon_class->stat() if $verbose; + + print <<"END"; + +/* + * Canonical Class + */ + +END + print_bits("CANON_CLASS", @canon_class_bits); + print "\n"; + print $canon_class->cprog(NAME => "${prefix}canon_class"); +} + +# +# print_composition -- generate data for canonical composition +# +sub print_composition { + $comp->fix(); + print STDERR "** composition\n", $comp->stat() if $verbose; + + print <<"END"; + +/* + * Canonical Composition + */ + +END + print_bits("CANON_COMPOSE", @comp_bits); + print "\n"; + print $comp->cprog(NAME => "${prefix}compose"); + print <<"END"; + +static const struct composition ${prefix}compose_seq[] = { +END + my $i = 0; + foreach my $r (@comp_data) { + if ($i % 2 == 0) { + print "\n" if $i != 0; + print "\t"; + } + printf "{ 0x%08x, 0x%08x }, ", $r->[2], $r->[0]; + $i++; + } + print "\n};\n\n"; +} + +# +# print_decomposition -- generate data for canonical/compatibility +# decomposition +# +sub print_decomposition { + $decomp->fix(); + print STDERR "** decomposition\n", $decomp->stat() if $verbose; + + print <<"END"; + +/* + * Canonical/Compatibility Decomposition + */ + +END + print_bits("DECOMP", @decomp_bits); + print "#define DECOMP_COMPAT\t$DECOMP_COMPAT_BIT\n\n"; + + print $decomp->cprog(NAME => "${prefix}decompose"); + + print "static const unsigned long ${prefix}decompose_seq[] = {\n"; + print_ulseq(@decomp_data); + print "};\n\n"; +} + +# +# print_casemap -- generate data for case mapping +# +sub print_casemap { + $upper->fix(); + $lower->fix(); + print STDERR "** upper mapping\n", $upper->stat() if $verbose; + print STDERR "** lower mapping\n", $lower->stat() if $verbose; + + print <<"END"; + +/* + * Lowercase <-> Uppercase mapping + */ + +/* + * Flags for special case mapping. + */ +#define CMF_FINAL $CASEMAP_FINAL_BIT +#define CMF_NONFINAL $CASEMAP_NONFINAL_BIT +#define CMF_LAST $CASEMAP_LAST_BIT +#define CMF_CTXDEP (CMF_FINAL|CMF_NONFINAL) + +END + print_bits("CASEMAP", @casemap_bits); + print "\n"; + print $upper->cprog(NAME => "${prefix}toupper"); + print $lower->cprog(NAME => "${prefix}tolower"); + + print "static const unsigned long ${prefix}toupper_seq[] = {\n"; + print_ulseq(@toupper_data); + print "};\n\n"; + + print "static const unsigned long ${prefix}tolower_seq[] = {\n"; + print_ulseq(@tolower_data); + print "};\n\n"; +} + +# +# print_casefolding -- generate data for case folding +# +sub print_casefolding { + $folding->fix(); + print STDERR "** case folding\n", $folding->stat() if $verbose; + + print <<"END"; + +/* + * Case Folding + */ + +END + print_bits("CASE_FOLDING", @folding_bits); + print "\n"; + print $folding->cprog(NAME => "${prefix}case_folding"); + + print "static const unsigned long ${prefix}case_folding_seq[] = {\n"; + print_ulseq(@folding_data); + print "};\n\n"; +} + +# +# print_casemap_context -- gerarate data for determining context +# (final/non-final) +# +sub print_casemap_context { + $casemap_ctx->fix(); + print STDERR "** casemap context\n", $casemap_ctx->stat() if $verbose; + + print <<"END"; + +/* + * Cased characters and non-spacing marks (for casemap context) + */ + +END + + print_bits("CASEMAP_CTX", @casemap_ctx_bits); + print <<"END"; + +#define CTX_CASED $LETTER_BIT +#define CTX_NSM $NSPMARK_BIT + +END + print $casemap_ctx->cprog(NAME => "${prefix}casemap_ctx"); +} + +sub sprint_composition_hash { + my $i = 0; + my $s = ''; + foreach my $r (@_) { + if ($i % 2 == 0) { + $s .= "\n" if $i != 0; + $s .= "\t"; + } + $s .= sprintf "{0x%04x, 0x%04x, 0x%04x}, ", @{$r}; + $i++; + } + $s; +} + +sub print_bits { + my $prefix = shift; + my $i = 0; + foreach my $bit (@_) { + print "#define ${prefix}_BITS_$i\t$bit\n"; + $i++; + } +} + +sub print_ulseq { + my $i = 0; + foreach my $v (@_) { + if ($i % 4 == 0) { + print "\n" if $i != 0; + print "\t"; + } + printf "0x%08x, ", $v; + $i++; + } + print "\n"; +} |