summaryrefslogtreecommitdiffstats
path: root/contrib/idn/idnkit-1.0-src/lib/normalizer.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/idn/idnkit-1.0-src/lib/normalizer.c')
-rw-r--r--contrib/idn/idnkit-1.0-src/lib/normalizer.c439
1 files changed, 439 insertions, 0 deletions
diff --git a/contrib/idn/idnkit-1.0-src/lib/normalizer.c b/contrib/idn/idnkit-1.0-src/lib/normalizer.c
new file mode 100644
index 0000000..604a1e5
--- /dev/null
+++ b/contrib/idn/idnkit-1.0-src/lib/normalizer.c
@@ -0,0 +1,439 @@
+#ifndef lint
+static char *rcsid = "$Id: normalizer.c,v 1.1.1.1 2003/06/04 00:26:05 marka Exp $";
+#endif
+
+/*
+ * Copyright (c) 2000,2002 Japan Network Information Center.
+ * All rights reserved.
+ *
+ * By using this file, you agree to the terms and conditions set forth bellow.
+ *
+ * LICENSE TERMS AND CONDITIONS
+ *
+ * The following License Terms and Conditions apply, unless a different
+ * license is obtained from Japan Network Information Center ("JPNIC"),
+ * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
+ * Chiyoda-ku, Tokyo 101-0047, Japan.
+ *
+ * 1. Use, Modification and Redistribution (including distribution of any
+ * modified or derived work) in source and/or binary forms is permitted
+ * under this License Terms and Conditions.
+ *
+ * 2. Redistribution of source code must retain the copyright notices as they
+ * appear in each source code file, this License Terms and Conditions.
+ *
+ * 3. Redistribution in binary form must reproduce the Copyright Notice,
+ * this License Terms and Conditions, in the documentation and/or other
+ * materials provided with the distribution. For the purposes of binary
+ * distribution the "Copyright Notice" refers to the following language:
+ * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
+ *
+ * 4. The name of JPNIC may not be used to endorse or promote products
+ * derived from this Software without specific prior written approval of
+ * JPNIC.
+ *
+ * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#include <config.h>
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include <idn/assert.h>
+#include <idn/logmacro.h>
+#include <idn/result.h>
+#include <idn/normalizer.h>
+#include <idn/strhash.h>
+#include <idn/unormalize.h>
+#include <idn/unicode.h>
+#include <idn/ucs4.h>
+#include <idn/debug.h>
+#include <idn/util.h>
+
+#define MAX_LOCAL_SCHEME 3
+
+#define INITIALIZED (scheme_hash != NULL)
+
+typedef struct {
+ char *name;
+ idn_normalizer_proc_t proc;
+} normalize_scheme_t;
+
+struct idn_normalizer {
+ int nschemes;
+ int scheme_size;
+ normalize_scheme_t **schemes;
+ normalize_scheme_t *local_buf[MAX_LOCAL_SCHEME];
+ int reference_count;
+};
+
+static idn__strhash_t scheme_hash;
+
+static idn__unicode_version_t vcur = NULL;
+static idn__unicode_version_t v320 = NULL;
+#define INIT_VERSION(version, var) \
+ if (var == NULL) { \
+ idn_result_t r = idn__unicode_create(version, &var); \
+ if (r != idn_success) \
+ return (r); \
+ }
+
+static idn_result_t expand_schemes(idn_normalizer_t ctx);
+static idn_result_t register_standard_normalizers(void);
+static idn_result_t normalizer_formkc(const unsigned long *from,
+ unsigned long *to, size_t tolen);
+static idn_result_t normalizer_formkc_v320(const unsigned long *from,
+ unsigned long *to,
+ size_t tolen);
+
+static struct standard_normalizer {
+ char *name;
+ idn_normalizer_proc_t proc;
+} standard_normalizer[] = {
+ { "unicode-form-kc", normalizer_formkc },
+ { "unicode-form-kc/3.2.0", normalizer_formkc_v320 },
+ { "RFC3491", normalizer_formkc_v320 },
+ { NULL, NULL },
+};
+
+idn_result_t
+idn_normalizer_initialize(void) {
+ idn__strhash_t hash;
+ idn_result_t r;
+
+ TRACE(("idn_normalizer_initialize()\n"));
+
+ if (scheme_hash != NULL) {
+ r = idn_success; /* already initialized */
+ goto ret;
+ }
+
+ if ((r = idn__strhash_create(&hash)) != idn_success)
+ goto ret;
+ scheme_hash = hash;
+
+ /* Register standard normalizers */
+ r = register_standard_normalizers();
+ret:
+ TRACE(("idn_normalizer_initialize(): %s\n", idn_result_tostring(r)));
+ return (r);
+}
+
+idn_result_t
+idn_normalizer_create(idn_normalizer_t *ctxp) {
+ idn_normalizer_t ctx;
+ idn_result_t r;
+
+ assert(ctxp != NULL);
+ TRACE(("idn_normalizer_create()\n"));
+
+ if ((ctx = malloc(sizeof(struct idn_normalizer))) == NULL) {
+ r = idn_nomemory;
+ goto ret;
+ }
+
+ ctx->nschemes = 0;
+ ctx->scheme_size = MAX_LOCAL_SCHEME;
+ ctx->schemes = ctx->local_buf;
+ ctx->reference_count = 1;
+ *ctxp = ctx;
+
+ r = idn_success;
+ret:
+ TRACE(("idn_normalizer_create(): %s\n", idn_result_tostring(r)));
+ return (r);
+}
+
+void
+idn_normalizer_destroy(idn_normalizer_t ctx) {
+ assert(ctx != NULL);
+
+ TRACE(("idn_normalizer_destroy()\n"));
+
+ ctx->reference_count--;
+ if (ctx->reference_count <= 0) {
+ TRACE(("idn_normalizer_destroy(): the object is destroyed\n"));
+ if (ctx->schemes != ctx->local_buf)
+ free(ctx->schemes);
+ free(ctx);
+ } else {
+ TRACE(("idn_normalizer_destroy(): "
+ "update reference count (%d->%d)\n",
+ ctx->reference_count + 1, ctx->reference_count));
+ }
+}
+
+void
+idn_normalizer_incrref(idn_normalizer_t ctx) {
+ assert(ctx != NULL);
+
+ TRACE(("idn_normalizer_incrref()\n"));
+ TRACE(("idn_normalizer_incrref: update reference count (%d->%d)\n",
+ ctx->reference_count, ctx->reference_count + 1));
+
+ ctx->reference_count++;
+}
+
+idn_result_t
+idn_normalizer_add(idn_normalizer_t ctx, const char *scheme_name) {
+ idn_result_t r;
+ void *v;
+ normalize_scheme_t *scheme;
+
+ assert(ctx != NULL && scheme_name != NULL);
+
+ TRACE(("idn_normalizer_add(scheme_name=%s)\n", scheme_name));
+
+ assert(INITIALIZED);
+
+ if (idn__strhash_get(scheme_hash, scheme_name, &v) != idn_success) {
+ ERROR(("idn_normalizer_add(): invalid scheme \"%-.30s\"\n",
+ scheme_name));
+ r = idn_invalid_name;
+ goto ret;
+ }
+
+ scheme = v;
+
+ assert(ctx->nschemes <= ctx->scheme_size);
+
+ if (ctx->nschemes == ctx->scheme_size &&
+ (r = expand_schemes(ctx)) != idn_success) {
+ goto ret;
+ }
+
+ ctx->schemes[ctx->nschemes++] = scheme;
+ r = idn_success;
+ret:
+ TRACE(("idn_normalizer_add(): %s\n", idn_result_tostring(r)));
+ return (r);
+}
+
+idn_result_t
+idn_normalizer_addall(idn_normalizer_t ctx, const char **scheme_names,
+ int nschemes) {
+ idn_result_t r;
+ int i;
+
+ assert(ctx != NULL && scheme_names != NULL);
+
+ TRACE(("idn_normalizer_addall(nschemes=%d)\n", nschemes));
+
+ for (i = 0; i < nschemes; i++) {
+ r = idn_normalizer_add(ctx, (const char *)*scheme_names);
+ if (r != idn_success)
+ goto ret;
+ scheme_names++;
+ }
+
+ r = idn_success;
+ret:
+ TRACE(("idn_normalizer_addall(): %s\n", idn_result_tostring(r)));
+ return (r);
+}
+
+idn_result_t
+idn_normalizer_normalize(idn_normalizer_t ctx, const unsigned long *from,
+ unsigned long *to, size_t tolen) {
+ idn_result_t r;
+ unsigned long *src, *dst;
+ unsigned long *buffers[2] = {NULL, NULL};
+ size_t buflen[2] = {0, 0};
+ size_t dstlen;
+ int idx;
+ int i;
+
+ assert(scheme_hash != NULL);
+ assert(ctx != NULL && from != NULL && to != NULL);
+
+ TRACE(("idn_normalizer_normalize(from=\"%s\", tolen=%d)\n",
+ idn__debug_ucs4xstring(from, 50), (int)tolen));
+
+ if (ctx->nschemes <= 0) {
+ if (tolen < idn_ucs4_strlen(from) + 1) {
+ r = idn_buffer_overflow;
+ goto ret;
+ }
+ idn_ucs4_strcpy(to, from);
+ r = idn_success;
+ goto ret;
+ }
+
+ /*
+ * Normalize.
+ */
+ src = (void *)from;
+ dstlen = idn_ucs4_strlen(from) + 1;
+
+ i = 0;
+ while (i < ctx->nschemes) {
+ TRACE(("idn_normalizer_normalize(): normalize %s\n",
+ ctx->schemes[i]->name));
+
+ /*
+ * Choose destination area to restore the result of a mapping.
+ */
+ if (i + 1 == ctx->nschemes) {
+ dst = to;
+ dstlen = tolen;
+ } else {
+ if (src == buffers[0])
+ idx = 1;
+ else
+ idx = 0;
+
+ if (buflen[idx] < dstlen) {
+ void *newbuf;
+
+ newbuf = realloc(buffers[idx],
+ sizeof(long) * dstlen);
+ if (newbuf == NULL) {
+ r = idn_nomemory;
+ goto ret;
+ }
+ buffers[idx] = (unsigned long *)newbuf;
+ buflen[idx] = dstlen;
+ }
+
+ dst = buffers[idx];
+ dstlen = buflen[idx];
+ }
+
+ /*
+ * Perform i-th normalization scheme.
+ * If buffer size is not enough, we double it and try again.
+ */
+ r = (ctx->schemes[i]->proc)(src, dst, dstlen);
+ if (r == idn_buffer_overflow && dst != to) {
+ dstlen *= 2;
+ continue;
+ }
+ if (r != idn_success)
+ goto ret;
+
+ src = dst;
+ i++;
+ }
+
+ r = idn_success;
+ret:
+ free(buffers[0]);
+ free(buffers[1]);
+ if (r == idn_success) {
+ TRACE(("idn_normalizer_normalize(): success (to=\"%s\")\n",
+ idn__debug_ucs4xstring(to, 50)));
+ } else {
+ TRACE(("idn_normalizer_normalize(): %s\n",
+ idn_result_tostring(r)));
+ }
+ return (r);
+}
+
+idn_result_t
+idn_normalizer_register(const char *scheme_name, idn_normalizer_proc_t proc) {
+ idn_result_t r;
+ normalize_scheme_t *scheme;
+
+ assert(scheme_name != NULL && proc != NULL);
+
+ TRACE(("idn_normalizer_register(scheme_name=%s)\n", scheme_name));
+
+ assert(INITIALIZED);
+
+ scheme = malloc(sizeof(*scheme) + strlen(scheme_name) + 1);
+ if (scheme == NULL) {
+ r = idn_nomemory;
+ goto ret;
+ }
+ scheme->name = (char *)(scheme + 1);
+ (void)strcpy(scheme->name, scheme_name);
+ scheme->proc = proc;
+
+ r = idn__strhash_put(scheme_hash, scheme_name, scheme);
+ if (r != idn_success)
+ goto ret;
+
+ r = idn_success;
+ret:
+ TRACE(("idn_normalizer_register(): %s\n", idn_result_tostring(r)));
+ return (r);
+}
+
+static idn_result_t
+expand_schemes(idn_normalizer_t ctx) {
+ normalize_scheme_t **new_schemes;
+ int new_size = ctx->scheme_size * 2;
+
+ if (ctx->schemes == ctx->local_buf) {
+ new_schemes = malloc(sizeof(normalize_scheme_t) * new_size);
+ } else {
+ new_schemes = realloc(ctx->schemes,
+ sizeof(normalize_scheme_t) * new_size);
+ }
+ if (new_schemes == NULL)
+ return (idn_nomemory);
+
+ if (ctx->schemes == ctx->local_buf)
+ memcpy(new_schemes, ctx->local_buf, sizeof(ctx->local_buf));
+
+ ctx->schemes = new_schemes;
+ ctx->scheme_size = new_size;
+
+ return (idn_success);
+}
+
+static idn_result_t
+register_standard_normalizers(void) {
+ int i;
+ int failed = 0;
+
+ for (i = 0; standard_normalizer[i].name != NULL; i++) {
+ idn_result_t r;
+ r = idn_normalizer_register(standard_normalizer[i].name,
+ standard_normalizer[i].proc);
+ if (r != idn_success) {
+ WARNING(("idn_normalizer_initialize(): "
+ "failed to register \"%-.100s\"\n",
+ standard_normalizer[i].name));
+ failed++;
+ }
+ }
+ if (failed > 0)
+ return (idn_failure);
+ else
+ return (idn_success);
+}
+
+/*
+ * Unicode Normalization Forms -- latest version
+ */
+
+static idn_result_t
+normalizer_formkc(const unsigned long *from, unsigned long *to, size_t tolen) {
+ INIT_VERSION(NULL, vcur);
+ return (idn__unormalize_formkc(vcur, from, to, tolen));
+}
+
+/*
+ * Unicode Normalization Forms -- version 3.2.0
+ */
+
+static idn_result_t
+normalizer_formkc_v320(const unsigned long *from, unsigned long *to,
+ size_t tolen) {
+ INIT_VERSION("3.2.0", v320);
+ return (idn__unormalize_formkc(v320, from, to, tolen));
+}