summaryrefslogtreecommitdiffstats
path: root/contrib/idn/idnkit-1.0-src/lib/ucs4.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/idn/idnkit-1.0-src/lib/ucs4.c')
-rw-r--r--contrib/idn/idnkit-1.0-src/lib/ucs4.c461
1 files changed, 461 insertions, 0 deletions
diff --git a/contrib/idn/idnkit-1.0-src/lib/ucs4.c b/contrib/idn/idnkit-1.0-src/lib/ucs4.c
new file mode 100644
index 0000000..a5f4e8c
--- /dev/null
+++ b/contrib/idn/idnkit-1.0-src/lib/ucs4.c
@@ -0,0 +1,461 @@
+#ifndef lint
+static char *rcsid = "$Id: ucs4.c,v 1.1.1.1 2003/06/04 00:26:14 marka Exp $";
+#endif
+
+/*
+ * Copyright (c) 2001 Japan Network Information Center. All rights reserved.
+ *
+ * By using this file, you agree to the terms and conditions set forth bellow.
+ *
+ * LICENSE TERMS AND CONDITIONS
+ *
+ * The following License Terms and Conditions apply, unless a different
+ * license is obtained from Japan Network Information Center ("JPNIC"),
+ * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
+ * Chiyoda-ku, Tokyo 101-0047, Japan.
+ *
+ * 1. Use, Modification and Redistribution (including distribution of any
+ * modified or derived work) in source and/or binary forms is permitted
+ * under this License Terms and Conditions.
+ *
+ * 2. Redistribution of source code must retain the copyright notices as they
+ * appear in each source code file, this License Terms and Conditions.
+ *
+ * 3. Redistribution in binary form must reproduce the Copyright Notice,
+ * this License Terms and Conditions, in the documentation and/or other
+ * materials provided with the distribution. For the purposes of binary
+ * distribution the "Copyright Notice" refers to the following language:
+ * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
+ *
+ * 4. The name of JPNIC may not be used to endorse or promote products
+ * derived from this Software without specific prior written approval of
+ * JPNIC.
+ *
+ * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#include <config.h>
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <idn/assert.h>
+#include <idn/result.h>
+#include <idn/logmacro.h>
+#include <idn/util.h>
+#include <idn/ucs4.h>
+#include <idn/debug.h>
+
+/*
+ * Unicode surrogate pair.
+ */
+#define IS_SURROGATE_HIGH(v) (0xd800 <= (v) && (v) <= 0xdbff)
+#define IS_SURROGATE_LOW(v) (0xdc00 <= (v) && (v) <= 0xdfff)
+#define SURROGATE_HIGH(v) (SURROGATE_H_OFF + (((v) - 0x10000) >> 10))
+#define SURROGATE_LOW(v) (SURROGATE_L_OFF + ((v) & 0x3ff))
+#define SURROGATE_BASE 0x10000
+#define SURROGATE_H_OFF 0xd800
+#define SURROGATE_L_OFF 0xdc00
+#define COMBINE_SURROGATE(h, l) \
+ (SURROGATE_BASE + (((h)-SURROGATE_H_OFF)<<10) + ((l)-SURROGATE_L_OFF))
+
+/*
+ * ASCII ctype macros.
+ * Note that these macros evaluate the argument multiple times. Be careful.
+ */
+#define ASCII_TOUPPER(c) \
+ (('a' <= (c) && (c) <= 'z') ? ((c) - 'a' + 'A') : (c))
+#define ASCII_TOLOWER(c) \
+ (('A' <= (c) && (c) <= 'Z') ? ((c) - 'A' + 'a') : (c))
+
+idn_result_t
+idn_ucs4_ucs4toutf16(const unsigned long *ucs4, unsigned short *utf16,
+ size_t tolen) {
+ unsigned short *utf16p = utf16;
+ unsigned long v;
+ idn_result_t r;
+
+ TRACE(("idn_ucs4_ucs4toutf16(ucs4=\"%s\", tolen=%d)\n",
+ idn__debug_ucs4xstring(ucs4, 50), (int)tolen));
+
+ while (*ucs4 != '\0') {
+ v = *ucs4++;
+
+ if (IS_SURROGATE_LOW(v) || IS_SURROGATE_HIGH(v)) {
+ WARNING(("idn_ucs4_ucs4toutf16: UCS4 string contains "
+ "surrogate pair\n"));
+ r = idn_invalid_encoding;
+ goto ret;
+ } else if (v > 0xffff) {
+ /* Convert to surrogate pair */
+ if (v >= 0x110000) {
+ r = idn_invalid_encoding;
+ goto ret;
+ }
+ if (tolen < 2) {
+ r = idn_buffer_overflow;
+ goto ret;
+ }
+ *utf16p++ = SURROGATE_HIGH(v);
+ *utf16p++ = SURROGATE_LOW(v);
+ tolen -= 2;
+ } else {
+ if (tolen < 1) {
+ r = idn_buffer_overflow;
+ goto ret;
+ }
+ *utf16p++ = v;
+ tolen--;
+ }
+ }
+
+ if (tolen < 1) {
+ r = idn_buffer_overflow;
+ goto ret;
+ }
+ *utf16p = '\0';
+
+ r = idn_success;
+ret:
+ if (r == idn_success) {
+ TRACE(("idn_ucs4_ucs4toutf16(): success (utf16=\"%s\")\n",
+ idn__debug_utf16xstring(utf16, 50)));
+ } else {
+ TRACE(("idn_ucs4_ucs4toutf16(): %s\n",
+ idn_result_tostring(r)));
+ }
+ return (r);
+}
+
+idn_result_t
+idn_ucs4_utf16toucs4(const unsigned short *utf16, unsigned long *ucs4,
+ size_t tolen) {
+ unsigned long *ucs4p = ucs4;
+ unsigned short v0, v1;
+ idn_result_t r;
+
+ TRACE(("idn_ucs4_utf16toucs4(utf16=\"%s\", tolen=%d)\n",
+ idn__debug_utf16xstring(utf16, 50), (int)tolen));
+
+ while (*utf16 != '\0') {
+ v0 = *utf16;
+
+ if (tolen < 1) {
+ r = idn_buffer_overflow;
+ goto ret;
+ }
+
+ if (IS_SURROGATE_HIGH(v0)) {
+ v1 = *(utf16 + 1);
+ if (!IS_SURROGATE_LOW(v1)) {
+ WARNING(("idn_ucs4_utf16toucs4: "
+ "corrupted surrogate pair\n"));
+ r = idn_invalid_encoding;
+ goto ret;
+ }
+ *ucs4p++ = COMBINE_SURROGATE(v0, v1);
+ tolen--;
+ utf16 += 2;
+
+ } else {
+ *ucs4p++ = v0;
+ tolen--;
+ utf16++;
+
+ }
+ }
+
+ if (tolen < 1) {
+ r = idn_buffer_overflow;
+ goto ret;
+ }
+ *ucs4p = '\0';
+
+ r = idn_success;
+ret:
+ if (r == idn_success) {
+ TRACE(("idn_ucs4_utf16toucs4(): success (ucs4=\"%s\")\n",
+ idn__debug_ucs4xstring(ucs4, 50)));
+ } else {
+ TRACE(("idn_ucs4_utf16toucs4(): %s\n",
+ idn_result_tostring(r)));
+ }
+ return (r);
+}
+
+idn_result_t
+idn_ucs4_utf8toucs4(const char *utf8, unsigned long *ucs4, size_t tolen) {
+ const unsigned char *utf8p = (const unsigned char *)utf8;
+ unsigned long *ucs4p = ucs4;
+ unsigned long v, min;
+ unsigned char c;
+ int width;
+ int i;
+ idn_result_t r;
+
+ TRACE(("idn_ucs4_utf8toucs4(utf8=\"%s\", tolen=%d)\n",
+ idn__debug_xstring(utf8, 50), (int)tolen));
+
+ while(*utf8p != '\0') {
+ c = *utf8p++;
+ if (c < 0x80) {
+ v = c;
+ min = 0;
+ width = 1;
+ } else if (c < 0xc0) {
+ WARNING(("idn_ucs4_utf8toucs4: invalid character\n"));
+ r = idn_invalid_encoding;
+ goto ret;
+ } else if (c < 0xe0) {
+ v = c & 0x1f;
+ min = 0x80;
+ width = 2;
+ } else if (c < 0xf0) {
+ v = c & 0x0f;
+ min = 0x800;
+ width = 3;
+ } else if (c < 0xf8) {
+ v = c & 0x07;
+ min = 0x10000;
+ width = 4;
+ } else if (c < 0xfc) {
+ v = c & 0x03;
+ min = 0x200000;
+ width = 5;
+ } else if (c < 0xfe) {
+ v = c & 0x01;
+ min = 0x4000000;
+ width = 6;
+ } else {
+ WARNING(("idn_ucs4_utf8toucs4: invalid character\n"));
+ r = idn_invalid_encoding;
+ goto ret;
+ }
+
+ for (i = width - 1; i > 0; i--) {
+ c = *utf8p++;
+ if (c < 0x80 || 0xc0 <= c) {
+ WARNING(("idn_ucs4_utf8toucs4: "
+ "invalid character\n"));
+ r = idn_invalid_encoding;
+ goto ret;
+ }
+ v = (v << 6) | (c & 0x3f);
+ }
+
+ if (v < min) {
+ WARNING(("idn_ucs4_utf8toucs4: invalid character\n"));
+ r = idn_invalid_encoding;
+ goto ret;
+ }
+ if (IS_SURROGATE_LOW(v) || IS_SURROGATE_HIGH(v)) {
+ WARNING(("idn_ucs4_utf8toucs4: UTF-8 string contains "
+ "surrogate pair\n"));
+ r = idn_invalid_encoding;
+ goto ret;
+ }
+ if (tolen < 1) {
+ r = idn_buffer_overflow;
+ goto ret;
+ }
+ tolen--;
+ *ucs4p++ = v;
+ }
+
+ if (tolen < 1) {
+ r = idn_buffer_overflow;
+ goto ret;
+ }
+ *ucs4p = '\0';
+
+ r = idn_success;
+ret:
+ if (r == idn_success) {
+ TRACE(("idn_ucs4_utf8toucs4(): success (ucs4=\"%s\")\n",
+ idn__debug_ucs4xstring(ucs4, 50)));
+ } else {
+ TRACE(("idn_ucs4_utf8toucs4(): %s\n",
+ idn_result_tostring(r)));
+ }
+ return (r);
+}
+
+idn_result_t
+idn_ucs4_ucs4toutf8(const unsigned long *ucs4, char *utf8, size_t tolen) {
+ unsigned char *utf8p = (unsigned char *)utf8;
+ unsigned long v;
+ int width;
+ int mask;
+ int offset;
+ idn_result_t r;
+
+ TRACE(("idn_ucs4_ucs4toutf8(ucs4=\"%s\", tolen=%d)\n",
+ idn__debug_ucs4xstring(ucs4, 50), (int)tolen));
+
+ while (*ucs4 != '\0') {
+ v = *ucs4++;
+ if (IS_SURROGATE_LOW(v) || IS_SURROGATE_HIGH(v)) {
+ WARNING(("idn_ucs4_ucs4toutf8: UCS4 string contains "
+ "surrogate pair\n"));
+ r = idn_invalid_encoding;
+ goto ret;
+ }
+ if (v < 0x80) {
+ mask = 0;
+ width = 1;
+ } else if (v < 0x800) {
+ mask = 0xc0;
+ width = 2;
+ } else if (v < 0x10000) {
+ mask = 0xe0;
+ width = 3;
+ } else if (v < 0x200000) {
+ mask = 0xf0;
+ width = 4;
+ } else if (v < 0x4000000) {
+ mask = 0xf8;
+ width = 5;
+ } else if (v < 0x80000000) {
+ mask = 0xfc;
+ width = 6;
+ } else {
+ WARNING(("idn_ucs4_ucs4toutf8: invalid character\n"));
+ r = idn_invalid_encoding;
+ goto ret;
+ }
+
+ if (tolen < width) {
+ r = idn_buffer_overflow;
+ goto ret;
+ }
+ offset = 6 * (width - 1);
+ *utf8p++ = (v >> offset) | mask;
+ mask = 0x80;
+ while (offset > 0) {
+ offset -= 6;
+ *utf8p++ = ((v >> offset) & 0x3f) | mask;
+ }
+ tolen -= width;
+ }
+
+ if (tolen < 1) {
+ r = idn_buffer_overflow;
+ goto ret;
+ }
+ *utf8p = '\0';
+
+ r = idn_success;
+ret:
+ if (r == idn_success) {
+ TRACE(("idn_ucs4_ucs4toutf8(): success (utf8=\"%s\")\n",
+ idn__debug_xstring(utf8, 50)));
+ } else {
+ TRACE(("idn_ucs4_ucs4toutf8(): %s\n",
+ idn_result_tostring(r)));
+ }
+ return (r);
+}
+
+size_t
+idn_ucs4_strlen(const unsigned long *ucs4) {
+ size_t len;
+
+ for (len = 0; *ucs4 != '\0'; ucs4++, len++)
+ /* nothing to do */ ;
+
+ return (len);
+}
+
+unsigned long *
+idn_ucs4_strcpy(unsigned long *to, const unsigned long *from) {
+ unsigned long *result = to;
+
+ while (*from != '\0')
+ *to++ = *from++;
+ *to = '\0';
+
+ return (result);
+}
+
+unsigned long *
+idn_ucs4_strcat(unsigned long *to, const unsigned long *from) {
+ unsigned long *result = to;
+
+ while (*to != '\0')
+ to++;
+
+ while (*from != '\0')
+ *to++ = *from++;
+ *to = '\0';
+
+ return (result);
+}
+
+int
+idn_ucs4_strcmp(const unsigned long *str1, const unsigned long *str2) {
+ while (*str1 != '\0') {
+ if (*str1 > *str2)
+ return (1);
+ else if (*str1 < *str2)
+ return (-1);
+ str1++;
+ str2++;
+ }
+
+ if (*str1 > *str2)
+ return (1);
+ else if (*str1 < *str2)
+ return (-1);
+
+ return (0);
+}
+
+int
+idn_ucs4_strcasecmp(const unsigned long *str1, const unsigned long *str2) {
+ unsigned long c1, c2;
+
+ while (*str1 != '\0') {
+ c1 = ASCII_TOLOWER(*str1);
+ c2 = ASCII_TOLOWER(*str2);
+ if (c1 > c2)
+ return (1);
+ else if (c1 < c2)
+ return (-1);
+ str1++;
+ str2++;
+ }
+
+ c1 = ASCII_TOLOWER(*str1);
+ c2 = ASCII_TOLOWER(*str2);
+ if (c1 > c2)
+ return (1);
+ else if (c1 < c2)
+ return (-1);
+
+ return (0);
+}
+
+
+unsigned long *
+idn_ucs4_strdup(const unsigned long *str) {
+ size_t length = idn_ucs4_strlen(str);
+ unsigned long *dupstr;
+
+ dupstr = (unsigned long *)malloc(sizeof(*str) * (length + 1));
+ if (dupstr == NULL)
+ return NULL;
+ memcpy(dupstr, str, sizeof(*str) * (length + 1));
+
+ return dupstr;
+}