diff options
Diffstat (limited to 'contrib/idn/idnkit-1.0-src/lib/utf8.c')
-rw-r--r-- | contrib/idn/idnkit-1.0-src/lib/utf8.c | 276 |
1 files changed, 276 insertions, 0 deletions
diff --git a/contrib/idn/idnkit-1.0-src/lib/utf8.c b/contrib/idn/idnkit-1.0-src/lib/utf8.c new file mode 100644 index 0000000..7a9acd6 --- /dev/null +++ b/contrib/idn/idnkit-1.0-src/lib/utf8.c @@ -0,0 +1,276 @@ +#ifndef lint +static char *rcsid = "$Id: utf8.c,v 1.1.1.1 2003/06/04 00:26:44 marka Exp $"; +#endif + +/* + * Copyright (c) 2000 Japan Network Information Center. All rights reserved. + * + * By using this file, you agree to the terms and conditions set forth bellow. + * + * LICENSE TERMS AND CONDITIONS + * + * The following License Terms and Conditions apply, unless a different + * license is obtained from Japan Network Information Center ("JPNIC"), + * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, + * Chiyoda-ku, Tokyo 101-0047, Japan. + * + * 1. Use, Modification and Redistribution (including distribution of any + * modified or derived work) in source and/or binary forms is permitted + * under this License Terms and Conditions. + * + * 2. Redistribution of source code must retain the copyright notices as they + * appear in each source code file, this License Terms and Conditions. + * + * 3. Redistribution in binary form must reproduce the Copyright Notice, + * this License Terms and Conditions, in the documentation and/or other + * materials provided with the distribution. For the purposes of binary + * distribution the "Copyright Notice" refers to the following language: + * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." + * + * 4. The name of JPNIC may not be used to endorse or promote products + * derived from this Software without specific prior written approval of + * JPNIC. + * + * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + */ + +#include <config.h> + +#include <stddef.h> + +#include <idn/assert.h> +#include <idn/logmacro.h> +#include <idn/utf8.h> +#include <idn/debug.h> + +#define UTF8_WIDTH(c) \ + (((c) < 0x80) ? 1 : \ + ((c) < 0xc0) ? 0 : \ + ((c) < 0xe0) ? 2 : \ + ((c) < 0xf0) ? 3 : \ + ((c) < 0xf8) ? 4 : \ + ((c) < 0xfc) ? 5 : \ + ((c) < 0xfe) ? 6 : 0) + +#define VALID_CONT_BYTE(c) (0x80 <= (c) && (c) < 0xc0) + +int +idn_utf8_mblen(const char *s) { + int c = *(unsigned char *)s; + + assert(s != NULL); + +#if 0 + TRACE(("idn_utf8_mblen(s=<%s>)\n", idn__debug_hexstring(s, 6))); +#endif + + return UTF8_WIDTH(c); +} + +int +idn_utf8_getmb(const char *s, size_t len, char *buf) { + /* buf must be at least 7-bytes long */ + const unsigned char *p = (const unsigned char *)s; + unsigned char *q = (unsigned char *)buf; + int width = UTF8_WIDTH(*p); + int w; + + assert(s != NULL); + +#if 0 + TRACE(("idn_utf8_getmb(s=<%s>,len=%d)\n", + idn__debug_hexstring(s, 6), len)); +#endif + + if (width == 0 || len < width) + return (0); + + /* Copy the first byte. */ + *q++ = *p++; + + /* .. and the rest. */ + w = width; + while (--w > 0) { + if (!VALID_CONT_BYTE(*p)) + return (0); + *q++ = *p++; + } + return (width); +} + +int +idn_utf8_getwc(const char *s, size_t len, unsigned long *vp) { + unsigned long v; + unsigned long min; + const unsigned char *p = (const unsigned char *)s; + int c; + int width; + int rest; + + assert(s != NULL); + +#if 0 + TRACE(("idn_utf8_getwc(s=<%s>,len=%d)\n", + idn__debug_hexstring(s, 10), len)); +#endif + + c = *p++; + width = UTF8_WIDTH(c); + + switch (width) { + case 0: + return (0); + case 1: + v = c; + min = 0; + break; + case 2: + v = c & 0x1f; + min = 0x80; + break; + case 3: + v = c & 0xf; + min = 0x800; + break; + case 4: + v = c & 0x7; + min = 0x10000; + break; + case 5: + v = c & 3; + min = 0x200000; + break; + case 6: + v = c & 1; + min = 0x4000000; + break; + default: + FATAL(("idn_utf8_getint: internal error\n")); + return (0); + } + + if (len < width) + return (0); + + rest = width - 1; + while (rest-- > 0) { + if (!VALID_CONT_BYTE(*p)) + return (0); + v = (v << 6) | (*p & 0x3f); + p++; + } + + if (v < min) + return (0); + + *vp = v; + return (width); +} + +int +idn_utf8_putwc(char *s, size_t len, unsigned long v) { + unsigned char *p = (unsigned char *)s; + int mask; + int off; + int l; + + assert(s != NULL); + +#if 0 + TRACE(("idn_utf8_putwc(v=%lx)\n", v)); +#endif + + if (v < 0x80) { + mask = 0; + l = 1; + } else if (v < 0x800) { + mask = 0xc0; + l = 2; + } else if (v < 0x10000) { + mask = 0xe0; + l = 3; + } else if (v < 0x200000) { + mask = 0xf0; + l = 4; + } else if (v < 0x4000000) { + mask = 0xf8; + l = 5; + } else if (v < 0x80000000) { + mask = 0xfc; + l = 6; + } else { + return (0); + } + + if (len < l) + return (0); + + off = 6 * (l - 1); + *p++ = (v >> off) | mask; + mask = 0x80; + while (off > 0) { + off -= 6; + *p++ = ((v >> off) & 0x3f) | mask; + } + return l; +} + +int +idn_utf8_isvalidchar(const char *s) { + unsigned long dummy; + + TRACE(("idn_utf8_isvalidchar(s=<%s>)\n", + idn__debug_hexstring(s, 6))); + + return (idn_utf8_getwc(s, 6, &dummy) > 0); +} + +int +idn_utf8_isvalidstring(const char *s) { + unsigned long dummy; + int width; + + assert(s != NULL); + + TRACE(("idn_utf8_isvalidstring(s=<%s>)\n", + idn__debug_hexstring(s, 20))); + + while (*s != '\0') { + width = idn_utf8_getwc(s, 6, &dummy); + if (width == 0) + return (0); + s += width; + } + return (1); +} + +char * +idn_utf8_findfirstbyte(const char *s, const char *known_top) { + const unsigned char *p = (const unsigned char *)s; + const unsigned char *t = (const unsigned char *)known_top; + + assert(s != NULL && known_top != NULL && known_top <= s); + + TRACE(("idn_utf8_findfirstbyte(s=<%s>)\n", + idn__debug_hexstring(s, 8))); + + while (p >= t) { + if (!VALID_CONT_BYTE(*p)) + break; + p--; + } + if (p < t || UTF8_WIDTH(*p) == 0) + return (NULL); + + return ((char *)p); +} |