diff options
Diffstat (limited to 'contrib/idn/idnkit-1.0-src/lib/ucsmap.c')
-rw-r--r-- | contrib/idn/idnkit-1.0-src/lib/ucsmap.c | 380 |
1 files changed, 380 insertions, 0 deletions
diff --git a/contrib/idn/idnkit-1.0-src/lib/ucsmap.c b/contrib/idn/idnkit-1.0-src/lib/ucsmap.c new file mode 100644 index 0000000..633456d --- /dev/null +++ b/contrib/idn/idnkit-1.0-src/lib/ucsmap.c @@ -0,0 +1,380 @@ +#ifndef lint +static char *rcsid = "$Id: ucsmap.c,v 1.1.1.1 2003/06/04 00:26:14 marka Exp $"; +#endif + +/* + * Copyright (c) 2001 Japan Network Information Center. All rights reserved. + * + * By using this file, you agree to the terms and conditions set forth bellow. + * + * LICENSE TERMS AND CONDITIONS + * + * The following License Terms and Conditions apply, unless a different + * license is obtained from Japan Network Information Center ("JPNIC"), + * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, + * Chiyoda-ku, Tokyo 101-0047, Japan. + * + * 1. Use, Modification and Redistribution (including distribution of any + * modified or derived work) in source and/or binary forms is permitted + * under this License Terms and Conditions. + * + * 2. Redistribution of source code must retain the copyright notices as they + * appear in each source code file, this License Terms and Conditions. + * + * 3. Redistribution in binary form must reproduce the Copyright Notice, + * this License Terms and Conditions, in the documentation and/or other + * materials provided with the distribution. For the purposes of binary + * distribution the "Copyright Notice" refers to the following language: + * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." + * + * 4. The name of JPNIC may not be used to endorse or promote products + * derived from this Software without specific prior written approval of + * JPNIC. + * + * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + */ + +#include <config.h> + +#include <stdlib.h> +#include <string.h> + +#include <idn/result.h> +#include <idn/assert.h> +#include <idn/log.h> +#include <idn/logmacro.h> +#include <idn/ucsmap.h> + +#define INIT_SIZE 50 +#define DEFAULT_BUF_SIZE 500 +#define UCSMAP_HASH_SIZE 103 +#define MAX_MAPLEN 0xffff + +/* + * This module implements UCS 1-to-N mapping. + * To speed up mapping table lookup, a combination of hash and + * binary search is used. + */ + +/* + * Mapping entry. + * Entries are sorted by its hash index and code point. + */ +typedef struct { + short hidx; /* hash index */ + unsigned short len; /* length of mapped sequence */ + unsigned long ucs; /* code point to be mapped */ + unsigned long *map; /* mapped sequence of code points */ +} ucsmap_entry_t; + +/* + * Hash table entry. + * Since the entries pointed by ucsmap_hash_t.entry are sorted, + * binary search can be used. + */ +typedef struct { + ucsmap_entry_t *entry; /* sorted by code point */ + int n; /* length of 'entry' */ +} ucsmap_hash_t; + +/* + * UCS character buffer for storing target character sequence. + */ +typedef struct ucsmap_buf { + struct ucsmap_buf *next; + unsigned long buf[1]; /* actually a variable length array */ +} ucsmap_buf_t; + +/* + * Mapping object. + */ +typedef struct idn_ucsmap { + ucsmap_hash_t hash[UCSMAP_HASH_SIZE]; + ucsmap_entry_t *entries; /* array of entries */ + size_t entry_size; /* allocated size */ + size_t nentries; /* # of entries in use */ + ucsmap_buf_t *mapdata; /* list of character buffers */ + size_t mapdata_size; /* allocated size of current buffer */ + size_t mapdata_used; /* # of chars in use */ + int fixed; /* already fixed? */ + int refcnt; /* reference count */ +} ucsmap_t; + +static int ucsmap_hash(unsigned long v); +static unsigned long *save_mapped_sequence(idn_ucsmap_t ctx, + unsigned long *map, + size_t maplen); +static void free_mapbuf(ucsmap_buf_t *buf); +static int comp_entry(const void *v1, const void *v2); + +idn_result_t +idn_ucsmap_create(idn_ucsmap_t *ctxp) { + idn_ucsmap_t ctx; + + assert(ctxp != NULL); + + TRACE(("idn_ucsmap_create()\n")); + + if ((ctx = malloc(sizeof(*ctx))) == NULL) { + WARNING(("idn_ucsmap_create: malloc failed\n")); + return (idn_nomemory); + } + + ctx->entry_size = 0; + ctx->nentries = 0; + ctx->entries = NULL; + ctx->mapdata = NULL; + ctx->mapdata_size = 0; + ctx->mapdata_used = 0; + ctx->fixed = 0; + ctx->refcnt = 1; + *ctxp = ctx; + return (idn_success); +} + +void +idn_ucsmap_destroy(idn_ucsmap_t ctx) { + assert(ctx != NULL && ctx->refcnt > 0); + + TRACE(("idn_ucsmap_destroy()\n")); + + if (--ctx->refcnt == 0) { + if (ctx->entries != NULL) + free(ctx->entries); + if (ctx->mapdata != NULL) + free_mapbuf(ctx->mapdata); + free(ctx); + } +} + +void +idn_ucsmap_incrref(idn_ucsmap_t ctx) { + assert(ctx != NULL && ctx->refcnt > 0); + + ctx->refcnt++; +} + +idn_result_t +idn_ucsmap_add(idn_ucsmap_t ctx, unsigned long ucs, + unsigned long *map, size_t maplen) +{ + ucsmap_entry_t *e; + ucsmap_entry_t *newbuf; + + assert(ctx != NULL && ctx->refcnt > 0); + + TRACE(("idn_ucsmap_add(ucs=U+%lX, maplen=%u)\n", ucs, maplen)); + + /* Make sure it is not fixed yet. */ + if (ctx->fixed) { + WARNING(("idn_ucsmap_add: attempt to add to fixed map\n")); + return (idn_failure); + } + + if (maplen > MAX_MAPLEN) { + WARNING(("idn_ucsmap_add: maplen too large (> %d)\n", + MAX_MAPLEN)); + return (idn_failure); + } + + /* Append an entry. */ + if (ctx->nentries >= ctx->entry_size) { + if (ctx->entry_size == 0) + ctx->entry_size = INIT_SIZE; + else + ctx->entry_size *= 2; + newbuf = realloc(ctx->entries, sizeof(*e) * ctx->entry_size); + if (newbuf == NULL) + return (idn_nomemory); + ctx->entries = newbuf; + } + e = &ctx->entries[ctx->nentries]; + e->hidx = ucsmap_hash(ucs); + e->len = maplen; + e->ucs = ucs; + if (maplen > 0) { + /* Save mapped sequence in the buffer. */ + e->map = save_mapped_sequence(ctx, map, maplen); + if (e->map == NULL) + return (idn_nomemory); + } else { + /* + * Zero 'maplen' is perfectly valid meaning one-to-zero + * mapping. + */ + e->map = NULL; + } + ctx->nentries++; + + return (idn_success); +} + +void +idn_ucsmap_fix(idn_ucsmap_t ctx) { + ucsmap_entry_t *e; + int last_hidx; + int i; + + assert(ctx != NULL && ctx->refcnt > 0); + + TRACE(("idn_ucsmap_fix()\n")); + + if (ctx->fixed) + return; + + ctx->fixed = 1; + + /* Initialize hash. */ + for (i = 0; i < UCSMAP_HASH_SIZE; i++) { + ctx->hash[i].entry = NULL; + ctx->hash[i].n = 0; + } + + if (ctx->nentries == 0) + return; + + /* Sort entries by the hash value and code point. */ + qsort(ctx->entries, ctx->nentries, sizeof(ucsmap_entry_t), comp_entry); + + /* + * Now the entries are sorted by their hash value, and + * sorted by its code point among the ones with the same hash value. + */ + + /* Build hash table. */ + last_hidx = -1; + for (i = 0, e = ctx->entries; i < ctx->nentries; i++, e++) { + if (e->hidx != last_hidx) { + ctx->hash[e->hidx].entry = e; + last_hidx = e->hidx; + } + ctx->hash[last_hidx].n++; + } +} + +idn_result_t +idn_ucsmap_map(idn_ucsmap_t ctx, unsigned long v, unsigned long *to, + size_t tolen, size_t *maplenp) { + int hash; + ucsmap_entry_t *e; + int n; + int hi, lo, mid; + + assert(ctx != NULL && ctx->refcnt > 0 && to != NULL && + maplenp != NULL); + + TRACE(("idn_ucsmap_map(v=U+%lX)\n", v)); + + if (!ctx->fixed) { + WARNING(("idn_ucsmap_map: not fixed yet\n")); + return (idn_failure); + } + + /* First, look up hash table. */ + hash = ucsmap_hash(v); + if ((n = ctx->hash[hash].n) == 0) + goto nomap; + + /* Then do binary search. */ + e = ctx->hash[hash].entry; + lo = 0; + hi = n - 1; + while (lo <= hi) { + mid = (lo + hi) / 2; + if (v < e[mid].ucs) + hi = mid - 1; + else if (v > e[mid].ucs) + lo = mid + 1; + else { + /* Found. */ + if (tolen < e[mid].len) + return (idn_buffer_overflow); + memcpy(to, e[mid].map, sizeof(*to) * e[mid].len); + *maplenp = e[mid].len; + return (idn_success); + } + } + + /* + * Not found. Put the original character to 'to' + * just for convenience. + */ + nomap: + if (tolen < 1) + return (idn_buffer_overflow); + *to = v; + *maplenp = 1; + return (idn_nomapping); +} + +static int +ucsmap_hash(unsigned long v) { + return (v % UCSMAP_HASH_SIZE); +} + +static unsigned long * +save_mapped_sequence(idn_ucsmap_t ctx, unsigned long *map, size_t maplen) { + ucsmap_buf_t *buf; + unsigned long *p; + size_t allocsize; + + /* + * If the current buffer (the first one in the ctx->mapdata list) + * has enough space, use it. Otherwise, allocate a new buffer and + * insert it at the beginning of the list. + */ + if (ctx->mapdata_used + maplen > ctx->mapdata_size) { + if (maplen > DEFAULT_BUF_SIZE) + allocsize = maplen * 2; + else + allocsize = DEFAULT_BUF_SIZE; + buf = malloc(sizeof(ucsmap_hash_t) + + sizeof(unsigned long) * allocsize); + if (buf == NULL) + return (NULL); + buf->next = ctx->mapdata; + ctx->mapdata = buf; + ctx->mapdata_size = allocsize; + ctx->mapdata_used = 0; + } + p = ctx->mapdata->buf + ctx->mapdata_used; + memcpy(p, map, sizeof(unsigned long) * maplen); + ctx->mapdata_used += maplen; + return (p); +} + +static void +free_mapbuf(ucsmap_buf_t *buf) { + while (buf != NULL) { + ucsmap_buf_t *next = buf->next; + free(buf); + buf = next; + } +} + +static int +comp_entry(const void *v1, const void *v2) { + const ucsmap_entry_t *e1 = v1; + const ucsmap_entry_t *e2 = v2; + + if (e1->hidx < e2->hidx) + return (-1); + else if (e1->hidx > e2->hidx) + return (1); + else if (e1->ucs < e2->ucs) + return (-1); + else if (e1->ucs > e2->ucs) + return (1); + else + return (0); +} |