summaryrefslogtreecommitdiffstats
path: root/contrib/idn/idnkit-1.0-src/lib/ucsset.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/idn/idnkit-1.0-src/lib/ucsset.c')
-rw-r--r--contrib/idn/idnkit-1.0-src/lib/ucsset.c368
1 files changed, 368 insertions, 0 deletions
diff --git a/contrib/idn/idnkit-1.0-src/lib/ucsset.c b/contrib/idn/idnkit-1.0-src/lib/ucsset.c
new file mode 100644
index 0000000..76e2970
--- /dev/null
+++ b/contrib/idn/idnkit-1.0-src/lib/ucsset.c
@@ -0,0 +1,368 @@
+#ifndef lint
+static char *rcsid = "$Id: ucsset.c,v 1.1.1.1 2003/06/04 00:26:15 marka Exp $";
+#endif
+
+/*
+ * Copyright (c) 2001 Japan Network Information Center. All rights reserved.
+ *
+ * By using this file, you agree to the terms and conditions set forth bellow.
+ *
+ * LICENSE TERMS AND CONDITIONS
+ *
+ * The following License Terms and Conditions apply, unless a different
+ * license is obtained from Japan Network Information Center ("JPNIC"),
+ * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
+ * Chiyoda-ku, Tokyo 101-0047, Japan.
+ *
+ * 1. Use, Modification and Redistribution (including distribution of any
+ * modified or derived work) in source and/or binary forms is permitted
+ * under this License Terms and Conditions.
+ *
+ * 2. Redistribution of source code must retain the copyright notices as they
+ * appear in each source code file, this License Terms and Conditions.
+ *
+ * 3. Redistribution in binary form must reproduce the Copyright Notice,
+ * this License Terms and Conditions, in the documentation and/or other
+ * materials provided with the distribution. For the purposes of binary
+ * distribution the "Copyright Notice" refers to the following language:
+ * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
+ *
+ * 4. The name of JPNIC may not be used to endorse or promote products
+ * derived from this Software without specific prior written approval of
+ * JPNIC.
+ *
+ * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#include <config.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <idn/result.h>
+#include <idn/assert.h>
+#include <idn/logmacro.h>
+#include <idn/ucsset.h>
+
+#define UCS_MAX 0x80000000UL
+
+#define INIT_SIZE 50
+
+/*
+ * Code point range.
+ *
+ * The set of code points is represented by an array of code point ranges.
+ * In the building phase, specified ranges by 'idn_ucsset_add' or
+ * 'idn_ucsset_addrange' are simply appended to the array.
+ * And 'idn_ucsset_fix' sorts the array by the code point value, and also
+ * merges any intersecting ranges. Since the array is sorted, a binary
+ * search can be used for looking up.
+ */
+typedef struct {
+ unsigned long from;
+ unsigned long to;
+} range_t;
+
+/*
+ * Code point segment.
+ *
+ * To speed up searching further, the entire region of UCS-4 code points
+ * (U+0000 - U+7FFFFFFF) are divided into segments. For each segment,
+ * the first and last element of the range array corresponding to the
+ * segment are computed by 'idn_ucsset_fix'. This narrows down the
+ * (initial) search range.
+ */
+typedef struct {
+ int range_start; /* index of ucsset.ranges */
+ int range_end; /* ditto */
+} segment_t;
+
+/*
+ * Code point to segment index conversion.
+ *
+ * Below is the function that maps a code point to the corresponding segment.
+ * The mapping is non-uniform, so that BMP, the following 16 planes that
+ * comprise Unicode code points together with BMP, and other planes
+ * have different granularity.
+ */
+#define SEG_THLD1 0x10000 /* BMP */
+#define SEG_THLD2 0x110000 /* Unicode (BMP+16planes) */
+#define SEG_SFT1 10 /* BMP: 1K code points/segment */
+#define SEG_SFT2 14 /* following 16 planes: 16K cp/seg */
+#define SEG_SFT3 24 /* rest: 16M cp/seg */
+#define SEG_OFF1 (SEG_THLD1 >> SEG_SFT1)
+#define SEG_OFF2 (((SEG_THLD2 - SEG_THLD1) >> SEG_SFT2) + SEG_OFF1)
+#define SEG_INDEX(v) \
+ (((v) < SEG_THLD1) ? ((v) >> SEG_SFT1) : \
+ ((v) < SEG_THLD2) ? ((((v) - SEG_THLD1) >> SEG_SFT2) + SEG_OFF1) : \
+ ((((v) - SEG_THLD2) >> SEG_SFT3) + SEG_OFF2))
+#define SEG_LEN (SEG_INDEX(UCS_MAX - 1) + 1)
+
+/*
+ * Representation of set of UCS code points.
+ */
+typedef struct idn_ucsset {
+ segment_t segments[SEG_LEN];
+ int fixed;
+ int size; /* allocated size of 'ranges' */
+ int nranges; /* num of ranges */
+ range_t *ranges;
+ int refcnt; /* reference count */
+} ucsset;
+
+static idn_result_t addrange(idn_ucsset_t ctx, unsigned long from,
+ unsigned long to, char *func_name);
+static int comp_range(const void *v1, const void *v2);
+
+idn_result_t
+idn_ucsset_create(idn_ucsset_t *ctx) {
+ idn_ucsset_t bm;
+
+ assert(ctx != NULL);
+
+ TRACE(("idn_ucsset_create()\n"));
+
+ if ((bm = malloc(sizeof(ucsset))) == NULL) {
+ WARNING(("idn_ucsset_create: malloc failed\n"));
+ return idn_nomemory;
+ }
+ bm->size = bm->nranges = 0;
+ bm->ranges = NULL;
+ bm->fixed = 0;
+ bm->refcnt = 1;
+ *ctx = bm;
+ return (idn_success);
+}
+
+void
+idn_ucsset_destroy(idn_ucsset_t ctx) {
+ assert(ctx != NULL && ctx->refcnt > 0);
+
+ TRACE(("idn_ucsset_destroy()\n"));
+
+ if (--ctx->refcnt == 0) {
+ if (ctx->ranges != NULL)
+ free(ctx->ranges);
+ free(ctx);
+ }
+}
+
+void
+idn_ucsset_incrref(idn_ucsset_t ctx) {
+ assert(ctx != NULL && ctx->refcnt > 0);
+
+ TRACE(("idn_ucsset_incrref()\n"));
+
+ ctx->refcnt++;
+}
+
+idn_result_t
+idn_ucsset_add(idn_ucsset_t ctx, unsigned long v) {
+ assert(ctx != NULL && ctx->refcnt > 0);
+
+ TRACE(("idn_ucsset_add(v=U+%lX)\n", v));
+
+ return (addrange(ctx, v, v, "idn_ucsset_add"));
+}
+
+idn_result_t
+idn_ucsset_addrange(idn_ucsset_t ctx, unsigned long from,
+ unsigned long to)
+{
+ assert(ctx != NULL && ctx->refcnt > 0);
+
+ TRACE(("idn_ucsset_addrange(from=U+%lX, to=U+%lX)\n",
+ from, to));
+
+ return (addrange(ctx, from, to, "idn_ucsset_addrange"));
+}
+
+void
+idn_ucsset_fix(idn_ucsset_t ctx) {
+ int nranges;
+ range_t *ranges;
+ segment_t *segments;
+ int i, j;
+
+ assert(ctx != NULL && ctx->refcnt > 0);
+
+ TRACE(("idn_ucsset_fix()\n"));
+
+ nranges = ctx->nranges;
+ ranges = ctx->ranges;
+ segments = ctx->segments;
+
+ if (ctx->fixed)
+ return;
+
+ ctx->fixed = 1;
+
+ /* Initialize segment array */
+ for (i = 0; i < SEG_LEN; i++) {
+ segments[i].range_start = -1;
+ segments[i].range_end = -1;
+ }
+
+ /* If the set is empty, there's nothing to be done. */
+ if (nranges == 0)
+ return;
+
+ /* Sort ranges. */
+ qsort(ranges, nranges, sizeof(range_t), comp_range);
+
+ /* Merge overlapped/continuous ranges. */
+ for (i = 0, j = 1; j < nranges; j++) {
+ if (ranges[i].to + 1 >= ranges[j].from) {
+ /* can be merged */
+ if (ranges[i].to < ranges[j].to) {
+ ranges[i].to = ranges[j].to;
+ }
+ } else {
+ i++;
+ if (i < j)
+ ranges[i] = ranges[j];
+ }
+ }
+ /* 'i' points the last range in the array. */
+ ctx->nranges = nranges = ++i;
+
+ /* Create segment array. */
+ for (i = 0; i < nranges; i++) {
+ int fidx = SEG_INDEX(ranges[i].from);
+ int tidx = SEG_INDEX(ranges[i].to);
+
+ for (j = fidx; j <= tidx; j++) {
+ if (segments[j].range_start < 0)
+ segments[j].range_start = i;
+ segments[j].range_end = i;
+ }
+ }
+
+#if 0
+ /*
+ * Does the standard guarantee realloc() always succeeds
+ * when shrinking?
+ */
+ /* Shrink malloc'ed space if possible. */
+ ctx->ranges = realloc(ctx->ranges, ctx->nranges * sizeof(range_t));
+#endif
+}
+
+idn_result_t
+idn_ucsset_lookup(idn_ucsset_t ctx, unsigned long v, int *found) {
+ int idx;
+ segment_t *segments;
+
+ assert(ctx != NULL && ctx->refcnt > 0 && found != NULL);
+
+ TRACE(("idn_ucsset_lookup(v=U+%lX)\n", v));
+
+ /* Make sure it is fixed. */
+ if (!ctx->fixed) {
+ WARNING(("idn_ucsset_lookup: not fixed yet\n"));
+ return (idn_failure);
+ }
+
+ /* Check the given code point. */
+ if (v >= UCS_MAX)
+ return (idn_invalid_codepoint);
+
+ /* Get the segment 'v' belongs to. */
+ segments = ctx->segments;
+ idx = SEG_INDEX(v);
+
+ /* Do binary search. */
+ *found = 0;
+ if (segments[idx].range_start >= 0) {
+ int lo = segments[idx].range_start;
+ int hi = segments[idx].range_end;
+ range_t *ranges = ctx->ranges;
+
+ while (lo <= hi) {
+ int mid = (lo + hi) / 2;
+ if (v < ranges[mid].from) {
+ hi = mid - 1;
+ } else if (v > ranges[mid].to) {
+ lo = mid + 1;
+ } else {
+ *found = 1;
+ break;
+ }
+ }
+ }
+ return (idn_success);
+}
+
+static idn_result_t
+addrange(idn_ucsset_t ctx, unsigned long from, unsigned long to,
+ char *func_name)
+{
+ range_t *newbuf;
+
+ /* Check the given code points. */
+ if (from > UCS_MAX) {
+ WARNING(("%s: code point out of range (U+%lX)\n",
+ func_name, from));
+ return (idn_invalid_codepoint);
+ } else if (to > UCS_MAX) {
+ WARNING(("%s: code point out of range (U+%lX)\n",
+ func_name, to));
+ return (idn_invalid_codepoint);
+ } else if (from > to) {
+ WARNING(("%s: invalid range spec (U+%lX-U+%lX)\n",
+ func_name, from, to));
+ return (idn_invalid_codepoint);
+ }
+
+ /* Make sure it is not fixed yet. */
+ if (ctx->fixed) {
+ WARNING(("%s: attempt to add to already fixed object\n",
+ func_name));
+ return (idn_failure);
+ }
+
+ /* Append the specified range to the 'ranges' array. */
+ if (ctx->nranges >= ctx->size) {
+ /* Make it bigger. */
+ if (ctx->size == 0)
+ ctx->size = INIT_SIZE;
+ else
+ ctx->size *= 2;
+ newbuf = realloc(ctx->ranges, ctx->size * sizeof(range_t));
+ if (newbuf == NULL)
+ return (idn_nomemory);
+ ctx->ranges = newbuf;
+ }
+ ctx->ranges[ctx->nranges].from = from;
+ ctx->ranges[ctx->nranges].to = to;
+ ctx->nranges++;
+
+ return (idn_success);
+}
+
+static int
+comp_range(const void *v1, const void *v2) {
+ /*
+ * Range comparation function suitable for qsort().
+ */
+ const range_t *r1 = v1;
+ const range_t *r2 = v2;
+
+ if (r1->from < r2->from)
+ return (-1);
+ else if (r1->from > r2->from)
+ return (1);
+ else
+ return (0);
+}