diff options
Diffstat (limited to 'contrib/idn/idnkit-1.0-src/lib/unicode.c')
-rw-r--r-- | contrib/idn/idnkit-1.0-src/lib/unicode.c | 309 |
1 files changed, 309 insertions, 0 deletions
diff --git a/contrib/idn/idnkit-1.0-src/lib/unicode.c b/contrib/idn/idnkit-1.0-src/lib/unicode.c new file mode 100644 index 0000000..b584b8f --- /dev/null +++ b/contrib/idn/idnkit-1.0-src/lib/unicode.c @@ -0,0 +1,309 @@ +#ifndef lint +static char *rcsid = "$Id: unicode.c,v 1.1.1.1 2003/06/04 00:26:16 marka Exp $"; +#endif + +/* + * Copyright (c) 2000,2001,2002 Japan Network Information Center. + * All rights reserved. + * + * By using this file, you agree to the terms and conditions set forth bellow. + * + * LICENSE TERMS AND CONDITIONS + * + * The following License Terms and Conditions apply, unless a different + * license is obtained from Japan Network Information Center ("JPNIC"), + * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, + * Chiyoda-ku, Tokyo 101-0047, Japan. + * + * 1. Use, Modification and Redistribution (including distribution of any + * modified or derived work) in source and/or binary forms is permitted + * under this License Terms and Conditions. + * + * 2. Redistribution of source code must retain the copyright notices as they + * appear in each source code file, this License Terms and Conditions. + * + * 3. Redistribution in binary form must reproduce the Copyright Notice, + * this License Terms and Conditions, in the documentation and/or other + * materials provided with the distribution. For the purposes of binary + * distribution the "Copyright Notice" refers to the following language: + * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." + * + * 4. The name of JPNIC may not be used to endorse or promote products + * derived from this Software without specific prior written approval of + * JPNIC. + * + * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + */ + +#include <config.h> + +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +#include <idn/result.h> +#include <idn/logmacro.h> +#include <idn/assert.h> +#include <idn/unicode.h> + +#define UNICODE_CURRENT "3.2.0" + +#define UCS_MAX 0x10ffff +#define END_BIT 0x80000000 + +/* + * Some constants for Hangul decomposition/composition. + */ +#define SBase 0xac00 +#define LBase 0x1100 +#define VBase 0x1161 +#define TBase 0x11a7 +#define LCount 19 +#define VCount 21 +#define TCount 28 +#define SLast (SBase + LCount * VCount * TCount) + +/* + * Symbol composition macro. + */ +#define compose_sym(a, b) compose_symX(a, b) +#define compose_symX(a, b) a ## b + +struct composition { + unsigned long c2; /* 2nd character */ + unsigned long comp; /* composed character */ +}; + +#include "unicodedata_320.c" +#define VERSION v320 +#include "unicode_template.c" +#undef VERSION + +typedef int (*unicode_canonclassproc)(unsigned long v); +typedef int (*unicode_decomposeproc)(unsigned long c, + const unsigned long **seqp); +typedef int (*unicode_composeproc)(unsigned long c, + const struct composition **compp); + +static struct idn__unicode_ops { + char *version; + unicode_canonclassproc canonclass_proc; + unicode_decomposeproc decompose_proc; + unicode_composeproc compose_proc; +} unicode_versions[] = { +#define MAKE_UNICODE_HANDLE(version, suffix) \ + { version, \ + compose_sym(canonclass_, suffix), \ + compose_sym(decompose_, suffix), \ + compose_sym(compose_, suffix) } + MAKE_UNICODE_HANDLE("3.2.0", v320), + { NULL }, +#undef MAKE_UNICODE_HANDLE +}; + +idn_result_t +idn__unicode_create(const char *version, + idn__unicode_version_t *versionp) { + idn__unicode_version_t v; + + assert(versionp != NULL); + TRACE(("idn__unicode_create(version=%-.50s)\n", + version == NULL ? "<NULL>" : version)); + + if (version == NULL) + version = UNICODE_CURRENT; + + for (v = unicode_versions; v->version != NULL; v++) { + if (strcmp(v->version, version) == 0) { + *versionp = v; + return (idn_success); + } + } + return (idn_notfound); +} + +void +idn__unicode_destroy(idn__unicode_version_t version) { + assert(version != NULL); + TRACE(("idn__unicode_destroy()\n")); + /* Nothing to do */ +} + +int +idn__unicode_canonicalclass(idn__unicode_version_t version, unsigned long c) { + if (c > UCS_MAX) + return (0); + + return (*version->canonclass_proc)(c); +} + +idn_result_t +idn__unicode_decompose(idn__unicode_version_t version, + int compat, unsigned long *v, size_t vlen, + unsigned long c, int *decomp_lenp) { + unsigned long *vorg = v; + int seqidx; + const unsigned long *seq; + + assert(v != NULL && vlen >= 0 && decomp_lenp != NULL); + + if (c > UCS_MAX) + return (idn_notfound); + + /* + * First, check for Hangul. + */ + if (SBase <= c && c < SLast) { + int idx, t_offset, v_offset, l_offset; + + idx = c - SBase; + t_offset = idx % TCount; + idx /= TCount; + v_offset = idx % VCount; + l_offset = idx / VCount; + if ((t_offset == 0 && vlen < 2) || (t_offset > 0 && vlen < 3)) + return (idn_buffer_overflow); + *v++ = LBase + l_offset; + *v++ = VBase + v_offset; + if (t_offset > 0) + *v++ = TBase + t_offset; + *decomp_lenp = v - vorg; + return (idn_success); + } + + /* + * Look up decomposition table. If no decomposition is defined + * or if it is a compatibility decomosition when canonical + * decomposition requested, return 'idn_notfound'. + */ + seqidx = (*version->decompose_proc)(c, &seq); + if (seqidx == 0 || (compat == 0 && (seqidx & DECOMP_COMPAT) != 0)) + return (idn_notfound); + + /* + * Copy the decomposed sequence. The end of the sequence are + * marked with END_BIT. + */ + do { + unsigned long c; + int dlen; + idn_result_t r; + + c = *seq & ~END_BIT; + + /* Decompose recursively. */ + r = idn__unicode_decompose(version, compat, v, vlen, c, &dlen); + if (r == idn_success) { + v += dlen; + vlen -= dlen; + } else if (r == idn_notfound) { + if (vlen < 1) + return (idn_buffer_overflow); + *v++ = c; + vlen--; + } else { + return (r); + } + + } while ((*seq++ & END_BIT) == 0); + + *decomp_lenp = v - vorg; + + return (idn_success); +} + +int +idn__unicode_iscompositecandidate(idn__unicode_version_t version, + unsigned long c) { + const struct composition *dummy; + + if (c > UCS_MAX) + return (0); + + /* Check for Hangul */ + if ((LBase <= c && c < LBase + LCount) || (SBase <= c && c < SLast)) + return (1); + + /* + * Look up composition table. If there are no composition + * that begins with the given character, it is not a + * composition candidate. + */ + if ((*version->compose_proc)(c, &dummy) == 0) + return (0); + else + return (1); +} + +idn_result_t +idn__unicode_compose(idn__unicode_version_t version, unsigned long c1, + unsigned long c2, unsigned long *compp) { + int n; + int lo, hi; + const struct composition *cseq; + + assert(compp != NULL); + + if (c1 > UCS_MAX || c2 > UCS_MAX) + return (idn_notfound); + + /* + * Check for Hangul. + */ + if (LBase <= c1 && c1 < LBase + LCount && + VBase <= c2 && c2 < VBase + VCount) { + /* + * Hangul L and V. + */ + *compp = SBase + + ((c1 - LBase) * VCount + (c2 - VBase)) * TCount; + return (idn_success); + } else if (SBase <= c1 && c1 < SLast && + TBase <= c2 && c2 < TBase + TCount && + (c1 - SBase) % TCount == 0) { + /* + * Hangul LV and T. + */ + *compp = c1 + (c2 - TBase); + return (idn_success); + } + + /* + * Look up composition table. If the result is 0, no composition + * is defined. Otherwise, upper 16bits of the result contains + * the number of composition that begins with 'c1', and the lower + * 16bits is the offset in 'compose_seq'. + */ + if ((n = (*version->compose_proc)(c1, &cseq)) == 0) + return (idn_notfound); + + /* + * The composite sequences are sorted by the 2nd character 'c2'. + * So we can use binary search. + */ + lo = 0; + hi = n - 1; + while (lo <= hi) { + int mid = (lo + hi) / 2; + + if (cseq[mid].c2 < c2) { + lo = mid + 1; + } else if (cseq[mid].c2 > c2) { + hi = mid - 1; + } else { + *compp = cseq[mid].comp; + return (idn_success); + } + } + return (idn_notfound); +} |