diff options
Diffstat (limited to 'contrib/idn/idnkit-1.0-src/lib/punycode.c')
-rw-r--r-- | contrib/idn/idnkit-1.0-src/lib/punycode.c | 434 |
1 files changed, 434 insertions, 0 deletions
diff --git a/contrib/idn/idnkit-1.0-src/lib/punycode.c b/contrib/idn/idnkit-1.0-src/lib/punycode.c new file mode 100644 index 0000000..d803af5 --- /dev/null +++ b/contrib/idn/idnkit-1.0-src/lib/punycode.c @@ -0,0 +1,434 @@ +#ifndef lint +static char *rcsid = "$Id: punycode.c,v 1.1.1.1 2003/06/04 00:26:06 marka Exp $"; +#endif + +/* + * Copyright (c) 2001,2002 Japan Network Information Center. + * All rights reserved. + * + * By using this file, you agree to the terms and conditions set forth bellow. + * + * LICENSE TERMS AND CONDITIONS + * + * The following License Terms and Conditions apply, unless a different + * license is obtained from Japan Network Information Center ("JPNIC"), + * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, + * Chiyoda-ku, Tokyo 101-0047, Japan. + * + * 1. Use, Modification and Redistribution (including distribution of any + * modified or derived work) in source and/or binary forms is permitted + * under this License Terms and Conditions. + * + * 2. Redistribution of source code must retain the copyright notices as they + * appear in each source code file, this License Terms and Conditions. + * + * 3. Redistribution in binary form must reproduce the Copyright Notice, + * this License Terms and Conditions, in the documentation and/or other + * materials provided with the distribution. For the purposes of binary + * distribution the "Copyright Notice" refers to the following language: + * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." + * + * 4. The name of JPNIC may not be used to endorse or promote products + * derived from this Software without specific prior written approval of + * JPNIC. + * + * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + */ + +#include <config.h> + +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +#include <idn/result.h> +#include <idn/assert.h> +#include <idn/logmacro.h> +#include <idn/converter.h> +#include <idn/ucs4.h> +#include <idn/debug.h> +#include <idn/punycode.h> +#include <idn/util.h> + +/* + * Although draft-ietf-idn-punycode-00.txt doesn't specify the ACE + * signature, we have to choose one. In order to prevent the converted + * name from beginning with a hyphen, we should choose a prefix rather + * than a suffix. + */ +#ifndef IDN_PUNYCODE_PREFIX +#define IDN_PUNYCODE_PREFIX "xn--" +#endif + +#define INVALID_UCS 0x80000000 +#define MAX_UCS 0x10FFFF + +/* + * As the draft states, it is possible that `delta' may overflow during + * the encoding. The upper bound of 'delta' is: + * <# of chars. of input string> + <max. difference in code point> * + * <# of chars. of input string + 1> + * For this value not to be greater than 0xffffffff (since the calculation + * is done using unsigned long, which is at least 32bit long), the maxmum + * input string size is about 3850 characters, which is long enough for + * a domain label... + */ +#define PUNYCODE_MAXINPUT 3800 + +/* + * Parameters. + */ +#define PUNYCODE_BASE 36 +#define PUNYCODE_TMIN 1 +#define PUNYCODE_TMAX 26 +#define PUNYCODE_SKEW 38 +#define PUNYCODE_DAMP 700 +#define PUNYCODE_INITIAL_BIAS 72 +#define PUNYCODE_INITIAL_N 0x80 + +static int punycode_getwc(const char *s, size_t len, + int bias, unsigned long *vp); +static int punycode_putwc(char *s, size_t len, + unsigned long delta, int bias); +static int punycode_update_bias(unsigned long delta, + size_t npoints, int first); + +idn_result_t +idn__punycode_decode(idn_converter_t ctx, void *privdata, + const char *from, unsigned long *to, size_t tolen) { + unsigned long *to_org = to; + unsigned long c, idx; + size_t prefixlen = strlen(IDN_PUNYCODE_PREFIX); + size_t fromlen; + size_t uidx, fidx, ucslen; + int first, bias; + idn_result_t r; + + assert(ctx != NULL); + + TRACE(("idn__punycode_decode(from=\"%s\", tolen=%d)\n", + idn__debug_xstring(from, 50), (int)tolen)); + + if (!idn__util_asciihaveaceprefix(from, IDN_PUNYCODE_PREFIX)) { + if (*from == '\0') { + r = idn_ucs4_utf8toucs4(from, to, tolen); + goto ret; + } + r = idn_invalid_encoding; + goto ret; + } + from += prefixlen; + fromlen = strlen(from); + + /* + * Find the last delimiter, and copy the characters + * before it verbatim. + */ + ucslen = 0; + for (fidx = fromlen; fidx > 0; fidx--) { + if (from[fidx - 1] == '-') { + if (tolen < fidx) { + r = idn_buffer_overflow; + goto ret; + } + for (uidx = 0; uidx < fidx - 1; uidx++) { + to[uidx] = from[uidx]; + } + ucslen = uidx; + break; + } + } + + first = 1; + bias = PUNYCODE_INITIAL_BIAS; + c = PUNYCODE_INITIAL_N; + idx = 0; + while (fidx < fromlen) { + int len; + unsigned long delta; + int i; + + len = punycode_getwc(from + fidx, fromlen - fidx, bias, &delta); + if (len == 0) { + r = idn_invalid_encoding; + goto ret; + } + fidx += len; + + bias = punycode_update_bias(delta, ucslen + 1, first); + first = 0; + idx += delta; + c += idx / (ucslen + 1); + uidx = idx % (ucslen + 1); + + /* Insert 'c' at uidx. */ + if (tolen-- <= 0) { + r = idn_buffer_overflow; + goto ret; + } + for (i = ucslen; i > uidx; i--) + to[i] = to[i - 1]; + to[uidx] = c; + + ucslen++; + idx = uidx + 1; + } + + /* Terminate with NUL. */ + if (tolen <= 0) { + r = idn_buffer_overflow; + goto ret; + } + to[ucslen] = '\0'; + r = idn_success; + +ret: + if (r == idn_success) { + TRACE(("idn__punycode_decode(): succcess (to=\"%s\")\n", + idn__debug_ucs4xstring(to_org, 50))); + } else { + TRACE(("idn__punycode_decode(): %s\n", idn_result_tostring(r))); + } + return (r); +} + +idn_result_t +idn__punycode_encode(idn_converter_t ctx, void *privdata, + const unsigned long *from, char *to, size_t tolen) { + char *to_org = to; + unsigned long cur_code, next_code, delta; + size_t prefixlen = strlen(IDN_PUNYCODE_PREFIX); + size_t fromlen; + size_t ucsdone; + size_t toidx; + int uidx, bias, first; + idn_result_t r; + + assert(ctx != NULL); + + TRACE(("idn__punycode_encode(from=\"%s\", tolen=%d)\n", + idn__debug_ucs4xstring(from, 50), (int)tolen)); + + if (*from == '\0') { + r = idn_ucs4_ucs4toutf8(from, to, tolen); + goto ret; + } else if (idn__util_ucs4haveaceprefix(from, IDN_PUNYCODE_PREFIX)) { + r = idn_prohibited; + goto ret; + } + + if (tolen < prefixlen) { + r = idn_buffer_overflow; + goto ret; + } + memcpy(to, IDN_PUNYCODE_PREFIX, prefixlen); + to += prefixlen; + tolen -= prefixlen; + + fromlen = idn_ucs4_strlen(from); + + /* + * If the input string is too long (actually too long to be sane), + * return failure in order to prevent possible overflow. + */ + if (fromlen > PUNYCODE_MAXINPUT) { + ERROR(("idn__punycode_encode(): " + "the input string is too long to convert Punycode\n", + idn__debug_ucs4xstring(from, 50))); + r = idn_failure; + goto ret; + } + + ucsdone = 0; /* number of characters processed */ + toidx = 0; + + /* + * First, pick up basic code points and copy them to 'to'. + */ + for (uidx = 0; uidx < fromlen; uidx++) { + if (from[uidx] < 0x80) { + if (toidx >= tolen) { + r = idn_buffer_overflow; + goto ret; + } + to[toidx++] = from[uidx]; + ucsdone++; + } + } + + /* + * If there are any basic code points, output a delimiter + * (hyphen-minus). + */ + if (toidx > 0) { + if (toidx >= tolen) { + r = idn_buffer_overflow; + goto ret; + } + to[toidx++] = '-'; + to += toidx; + tolen -= toidx; + } + + /* + * Then encode non-basic characters. + */ + first = 1; + cur_code = PUNYCODE_INITIAL_N; + bias = PUNYCODE_INITIAL_BIAS; + delta = 0; + while (ucsdone < fromlen) { + int limit = -1, rest; + + /* + * Find the smallest code point equal to or greater + * than 'cur_code'. Also remember the index of the + * last occurence of the code point. + */ + for (next_code = MAX_UCS, uidx = fromlen - 1; + uidx >= 0; uidx--) { + if (from[uidx] >= cur_code && from[uidx] < next_code) { + next_code = from[uidx]; + limit = uidx; + } + } + /* There must be such code point. */ + assert(limit >= 0); + + delta += (next_code - cur_code) * (ucsdone + 1); + cur_code = next_code; + + /* + * Scan the input string again, and encode characters + * whose code point is 'cur_code'. Use 'limit' to avoid + * unnecessary scan. + */ + for (uidx = 0, rest = ucsdone; uidx <= limit; uidx++) { + if (from[uidx] < cur_code) { + delta++; + rest--; + } else if (from[uidx] == cur_code) { + int sz = punycode_putwc(to, tolen, delta, bias); + if (sz == 0) { + r = idn_buffer_overflow; + goto ret; + } + to += sz; + tolen -= sz; + ucsdone++; + bias = punycode_update_bias(delta, ucsdone, + first); + delta = 0; + first = 0; + } + } + delta += rest + 1; + cur_code++; + } + + /* + * Terminate with NUL. + */ + if (tolen <= 0) { + r = idn_buffer_overflow; + goto ret; + } + *to = '\0'; + r = idn_success; + +ret: + if (r == idn_success) { + TRACE(("idn__punycode_encode(): succcess (to=\"%s\")\n", + idn__debug_xstring(to_org, 50))); + } else { + TRACE(("idn__punycode_encode(): %s\n", idn_result_tostring(r))); + } + return (r); +} + +static int +punycode_getwc(const char *s, size_t len, int bias, unsigned long *vp) { + size_t orglen = len; + unsigned long v = 0, w = 1; + int k; + + for (k = PUNYCODE_BASE - bias; len > 0; k += PUNYCODE_BASE) { + int c = *s++; + int t = (k < PUNYCODE_TMIN) ? PUNYCODE_TMIN : + (k > PUNYCODE_TMAX) ? PUNYCODE_TMAX : k; + + len--; + if ('a' <= c && c <= 'z') + c = c - 'a'; + else if ('A' <= c && c <= 'Z') + c = c - 'A'; + else if ('0' <= c && c <= '9') + c = c - '0' + 26; + else + c = -1; + + if (c < 0) + return (0); /* invalid character */ + + v += c * w; + + if (c < t) { + *vp = v; + return (orglen - len); + } + + w *= (PUNYCODE_BASE - t); + } + + return (0); /* final character missing */ +} + +static int +punycode_putwc(char *s, size_t len, unsigned long delta, int bias) { + const char *punycode_base36 = "abcdefghijklmnopqrstuvwxyz0123456789"; + int k; + char *sorg = s; + + for (k = PUNYCODE_BASE - bias; 1; k += PUNYCODE_BASE) { + int t = (k < PUNYCODE_TMIN) ? PUNYCODE_TMIN : + (k > PUNYCODE_TMAX) ? PUNYCODE_TMAX : k; + + if (delta < t) + break; + if (len < 1) + return (0); + *s++ = punycode_base36[t + ((delta - t) % (PUNYCODE_BASE - t))]; + len--; + delta = (delta - t) / (PUNYCODE_BASE - t); + } + if (len < 1) + return (0); + *s++ = punycode_base36[delta]; + return (s - sorg); +} + +static int +punycode_update_bias(unsigned long delta, size_t npoints, int first) { + int k = 0; + + delta /= first ? PUNYCODE_DAMP : 2; + delta += delta / npoints; + + while (delta > ((PUNYCODE_BASE - PUNYCODE_TMIN) * PUNYCODE_TMAX) / 2) { + delta /= PUNYCODE_BASE - PUNYCODE_TMIN; + k++; + } + return (PUNYCODE_BASE * k + + (((PUNYCODE_BASE - PUNYCODE_TMIN + 1) * delta) / + (delta + PUNYCODE_SKEW))); +} |