summaryrefslogtreecommitdiffstats
path: root/contrib/idn/idnkit-1.0-src/util/generate_nameprep_data.pl
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/idn/idnkit-1.0-src/util/generate_nameprep_data.pl')
-rwxr-xr-xcontrib/idn/idnkit-1.0-src/util/generate_nameprep_data.pl405
1 files changed, 405 insertions, 0 deletions
diff --git a/contrib/idn/idnkit-1.0-src/util/generate_nameprep_data.pl b/contrib/idn/idnkit-1.0-src/util/generate_nameprep_data.pl
new file mode 100755
index 0000000..31dd18b
--- /dev/null
+++ b/contrib/idn/idnkit-1.0-src/util/generate_nameprep_data.pl
@@ -0,0 +1,405 @@
+#! /usr/local/bin/perl -w
+# $Id: generate_nameprep_data.pl,v 1.1.1.1 2003/06/04 00:27:54 marka Exp $
+#
+# Copyright (c) 2001 Japan Network Information Center. All rights reserved.
+#
+# By using this file, you agree to the terms and conditions set forth bellow.
+#
+# LICENSE TERMS AND CONDITIONS
+#
+# The following License Terms and Conditions apply, unless a different
+# license is obtained from Japan Network Information Center ("JPNIC"),
+# a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
+# Chiyoda-ku, Tokyo 101-0047, Japan.
+#
+# 1. Use, Modification and Redistribution (including distribution of any
+# modified or derived work) in source and/or binary forms is permitted
+# under this License Terms and Conditions.
+#
+# 2. Redistribution of source code must retain the copyright notices as they
+# appear in each source code file, this License Terms and Conditions.
+#
+# 3. Redistribution in binary form must reproduce the Copyright Notice,
+# this License Terms and Conditions, in the documentation and/or other
+# materials provided with the distribution. For the purposes of binary
+# distribution the "Copyright Notice" refers to the following language:
+# "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
+#
+# 4. The name of JPNIC may not be used to endorse or promote products
+# derived from this Software without specific prior written approval of
+# JPNIC.
+#
+# 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+# ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+
+use v5.6.0; # for pack('U')
+use bytes;
+
+use lib qw(.);
+
+use SparseMap;
+use Getopt::Long;
+
+(my $myid = '$Id: generate_nameprep_data.pl,v 1.1.1.1 2003/06/04 00:27:54 marka Exp $') =~ s/\$([^\$]+)\$/\$-$1-\$/;
+
+my @map_bits = (9, 7, 5);
+my @proh_bits = (7, 7, 7);
+my @unas_bits = (7, 7, 7);
+my @bidi_bits = (9, 7, 5);
+
+my @bidi_types = ('OTHERS', 'R_AL', 'L');
+
+my $dir = '.';
+my @versions = ();
+
+GetOptions('dir=s', \$dir) or die usage();
+@versions = @ARGV;
+
+print_header();
+
+bits_definition("MAP", @map_bits);
+bits_definition("PROH", @proh_bits);
+bits_definition("UNAS", @unas_bits);
+bits_definition("BIDI", @bidi_bits);
+
+generate_data($_) foreach @ARGV;
+
+sub usage {
+ die "Usage: $0 [-dir dir] version..\n";
+}
+
+sub generate_data {
+ my $version = shift;
+ generate_mapdata($version, "$dir/nameprep.$version.map");
+ generate_prohibiteddata($version, "$dir/nameprep.$version.prohibited");
+ generate_unassigneddata($version, "$dir/nameprep.$version.unassigned");
+ generate_bididata($version, "$dir/nameprep.$version.bidi");
+}
+
+#
+# Generate mapping data.
+#
+sub generate_mapdata {
+ my $version = shift;
+ my $file = shift;
+
+ my $map = SparseMap::Int->new(BITS => [@map_bits],
+ MAX => 0x110000,
+ MAPALL => 1,
+ DEFAULT => 0);
+ open FILE, $file or die "cannot open $file: $!\n";
+
+ my $mapbuf = "\0"; # dummy
+ my %maphash = ();
+ while (<FILE>) {
+ if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) {
+ my $same_as = $1;
+ if (grep {$_ eq $same_as} @versions > 0) {
+ generate_map_ref($version, $same_as);
+ close FILE;
+ return;
+ }
+ next;
+ }
+ next if /^\#/;
+ next if /^\s*$/;
+ register_map($map, \$mapbuf, \%maphash, $_);
+ }
+ close FILE;
+ generate_map($version, $map, \$mapbuf);
+}
+
+#
+# Generate prohibited character data.
+#
+sub generate_prohibiteddata {
+ my $version = shift;
+ my $file = shift;
+
+ my $proh = SparseMap::Bit->new(BITS => [@proh_bits],
+ MAX => 0x110000);
+ open FILE, $file or die "cannot open $file: $!\n";
+ while (<FILE>) {
+ if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) {
+ my $same_as = $1;
+ if (grep {$_ eq $same_as} @versions > 0) {
+ generate_prohibited_ref($version, $same_as);
+ close FILE;
+ return;
+ }
+ next;
+ }
+ next if /^\#/;
+ next if /^\s*$/;
+ register_prohibited($proh, $_);
+ }
+ close FILE;
+ generate_prohibited($version, $proh);
+}
+
+#
+# Generate unassigned codepoint data.
+#
+sub generate_unassigneddata {
+ my $version = shift;
+ my $file = shift;
+
+ my $unas = SparseMap::Bit->new(BITS => [@unas_bits],
+ MAX => 0x110000);
+ open FILE, $file or die "cannot open $file: $!\n";
+ while (<FILE>) {
+ if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) {
+ my $same_as = $1;
+ if (grep {$_ eq $same_as} @versions > 0) {
+ generate_unassigned_ref($version, $same_as);
+ close FILE;
+ return;
+ }
+ next;
+ }
+ next if /^\#/;
+ next if /^\s*$/;
+ register_unassigned($unas, $_);
+ }
+ close FILE;
+ generate_unassigned($version, $unas);
+}
+
+#
+# Generate data of bidi "R" or "AL" characters.
+#
+sub generate_bididata {
+ my $version = shift;
+ my $file = shift;
+
+ my $bidi = SparseMap::Int->new(BITS => [@bidi_bits],
+ MAX => 0x110000);
+ open FILE, $file or die "cannot open $file: $!\n";
+
+ my $type = 0;
+ while (<FILE>) {
+ if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) {
+ my $same_as = $1;
+ if (grep {$_ eq $same_as} @versions > 0) {
+ generate_unassigned_ref($version, $same_as);
+ close FILE;
+ return;
+ }
+ next;
+ }
+ if (/^%\s*BIDI_TYPE\s+(\S+)$/) {
+ my $i = 0;
+ for ($i = 0; $i < @bidi_types; $i++) {
+ if ($1 eq $bidi_types[$i]) {
+ $type = $i;
+ last;
+ }
+ }
+ die "unrecognized line: $_" if ($i >= @bidi_types);
+ next;
+ }
+ next if /^\#/;
+ next if /^\s*$/;
+ register_bidi($bidi, $type, $_);
+ }
+ close FILE;
+
+ generate_bidi($version, $bidi);
+}
+
+sub print_header {
+ print <<"END";
+/* \$Id\$ */
+/* $myid */
+/*
+ * Do not edit this file!
+ * This file is generated from NAMEPREP specification.
+ */
+
+END
+}
+
+sub bits_definition {
+ my $name = shift;
+ my @bits = @_;
+ my $i = 0;
+
+ foreach my $n (@bits) {
+ print "#define ${name}_BITS_$i\t$n\n";
+ $i++;
+ }
+ print "\n";
+}
+
+sub register_map {
+ my ($map, $bufref, $hashref, $line) = @_;
+
+ my ($from, $to) = split /;/, $line;
+ my @fcode = map {hex($_)} split ' ', $from;
+ my @tcode = map {hex($_)} split ' ', $to;
+
+ my $ucs4 = pack('V*', @tcode);
+ $ucs4 =~ s/\000+$//;
+
+ my $offset;
+ if (exists $hashref->{$ucs4}) {
+ $offset = $hashref->{$ucs4};
+ } else {
+ $offset = length $$bufref;
+ $$bufref .= pack('C', length($ucs4)) . $ucs4;
+ $hashref->{$ucs4} = $offset;
+ }
+
+ die "unrecognized line: $line" if @fcode != 1;
+ $map->add($fcode[0], $offset);
+}
+
+sub generate_map {
+ my ($version, $map, $bufref) = @_;
+
+ $map->fix();
+
+ print $map->cprog(NAME => "nameprep_${version}_map");
+ print "\nstatic const unsigned char nameprep_${version}_map_data[] = \{\n";
+ print_uchararray($$bufref);
+ print "};\n\n";
+}
+
+sub generate_map_ref {
+ my ($version, $refversion) = @_;
+ print <<"END";
+#define nameprep_${version}_map_imap nameprep_${refversion}_map_imap
+#define nameprep_${version}_map_table nameprep_${refversion}_map_table
+#define nameprep_${version}_map_data nameprep_${refversion}_map_data
+
+END
+}
+
+sub print_uchararray {
+ my @chars = unpack 'C*', $_[0];
+ my $i = 0;
+ foreach my $v (@chars) {
+ if ($i % 12 == 0) {
+ print "\n" if $i != 0;
+ print "\t";
+ }
+ printf "%3d, ", $v;
+ $i++;
+ }
+ print "\n";
+}
+
+sub register_prohibited {
+ my $proh = shift;
+ register_bitmap($proh, @_);
+}
+
+sub register_unassigned {
+ my $unas = shift;
+ register_bitmap($unas, @_);
+}
+
+sub register_bidi {
+ my $bidi = shift;
+ my $type = shift;
+ register_intmap($bidi, $type, @_);
+}
+
+sub generate_prohibited {
+ my ($version, $proh) = @_;
+ generate_bitmap($proh, "nameprep_${version}_prohibited");
+ print "\n";
+}
+
+sub generate_prohibited_ref {
+ my ($version, $refversion) = @_;
+ print <<"END";
+#define nameprep_${version}_prohibited_imap nameprep_${refversion}_prohibited_imap
+#define nameprep_${version}_prohibited_bitmap nameprep_${refversion}_prohibited_bitmap
+
+END
+}
+
+sub generate_unassigned {
+ my ($version, $unas) = @_;
+ generate_bitmap($unas, "nameprep_${version}_unassigned");
+ print "\n";
+}
+
+sub generate_unassigned_ref {
+ my ($version, $refversion) = @_;
+ print <<"END";
+#define nameprep_${version}_unassigned_imap nameprep_${refversion}_unassigned_imap
+#define nameprep_${version}_unassigned_bitmap nameprep_${refversion}_unassigned_bitmap
+
+END
+}
+
+sub generate_bidi {
+ my ($version, $bidi) = @_;
+
+ $bidi->fix();
+
+ print $bidi->cprog(NAME => "nameprep_${version}_bidi");
+ print "\n";
+ print "static const unsigned char nameprep_${version}_bidi_data[] = \{\n";
+
+ foreach my $type (@bidi_types) {
+ printf "\tidn_biditype_%s, \n", lc($type);
+ }
+ print "};\n\n";
+}
+
+sub generate_bidi_ref {
+ my ($version, $refversion) = @_;
+ print <<"END";
+#define nameprep_${version}_bidi_imap nameprep_${refversion}_bidi_imap
+#define nameprep_${version}_bidi_table nameprep_${refversion}_bidi_table
+
+END
+}
+
+sub register_bitmap {
+ my $map = shift;
+ my $line = shift;
+
+ /^([0-9A-Fa-f]+)(?:-([0-9A-Fa-f]+))?/ or die "unrecognized line: $line";
+ my $start = hex($1);
+ my $end = defined($2) ? hex($2) : undef;
+ if (defined $end) {
+ $map->add($start .. $end);
+ } else {
+ $map->add($start);
+ }
+}
+
+sub register_intmap {
+ my $map = shift;
+ my $value = shift;
+ my $line = shift;
+
+ /^([0-9A-Fa-f]+)(?:-([0-9A-Fa-f]+))?/ or die "unrecognized line: $line";
+ my $start = hex($1);
+ my $end = defined($2) ? hex($2) : $start;
+ for (my $i = $start; $i <= $end; $i++) {
+ $map->add($i, $value);
+ }
+}
+
+sub generate_bitmap {
+ my $map = shift;
+ my $name = shift;
+ $map->fix();
+ #$map->stat();
+ print $map->cprog(NAME => $name);
+}