summaryrefslogtreecommitdiffstats
path: root/contrib/idn/idnkit-1.0-src/lib/tests/utffilter
blob: cebd4dce2f065eb6c95e5662440e7a94830c70a6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#! /usr/bin/perl -wp
#
# Copyright (c) 2002 Japan Network Information Center.
# All rights reserved.
#  
# By using this file, you agree to the terms and conditions set forth bellow.
# 
# 			LICENSE TERMS AND CONDITIONS 
# 
# The following License Terms and Conditions apply, unless a different
# license is obtained from Japan Network Information Center ("JPNIC"),
# a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
# Chiyoda-ku, Tokyo 101-0047, Japan.
# 
# 1. Use, Modification and Redistribution (including distribution of any
#    modified or derived work) in source and/or binary forms is permitted
#    under this License Terms and Conditions.
# 
# 2. Redistribution of source code must retain the copyright notices as they
#    appear in each source code file, this License Terms and Conditions.
# 
# 3. Redistribution in binary form must reproduce the Copyright Notice,
#    this License Terms and Conditions, in the documentation and/or other
#    materials provided with the distribution.  For the purposes of binary
#    distribution the "Copyright Notice" refers to the following language:
#    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
# 
# 4. The name of JPNIC may not be used to endorse or promote products
#    derived from this Software without specific prior written approval of
#    JPNIC.
# 
# 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
#    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
#    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
#    PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL JPNIC BE LIABLE
#    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
#    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
#    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
#    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
#    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
#    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
#    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
#

sub to_utf8 ($) {
    my ($utf32) = hex($_[0]);
    my ($mask, $length);
    
    if ($utf32 < 0x80) {
	$mask = 0;
	$length = 1;
    } elsif ($utf32 < 0x800) {
	$mask = 0xc0;
	$length = 2;
    } elsif ($utf32 < 0x10000) {
	$mask = 0xe0;
	$length = 3;
    } elsif ($utf32 < 0x200000) {
	$mask = 0xf0;
	$length = 4;
    } elsif ($utf32 < 0x4000000) {
	$mask = 0xf8;
	$length = 5;
    } elsif ($utf32 < 0x80000000) {
	$mask = 0xfc;
	$length = 6;
    } else {
	return '';
    }

    my ($result, $offset);
    $offset = 6 * ($length - 1);
    $result .= sprintf('\\x%02x', ($utf32 >> $offset) | $mask);
    while ($offset > 0) {
	$offset -= 6;
	$result .= sprintf('\\x%02x', (($utf32 >> $offset) & 0x3f) | 0x80);
    }

    return $result;
}

s/<U\+([0-9A-Fa-f]+)>/&to_utf8($1)/eg;