summaryrefslogtreecommitdiffstats
path: root/ldap/clients/dsgw/collate.c
diff options
context:
space:
mode:
Diffstat (limited to 'ldap/clients/dsgw/collate.c')
-rw-r--r--ldap/clients/dsgw/collate.c400
1 files changed, 400 insertions, 0 deletions
diff --git a/ldap/clients/dsgw/collate.c b/ldap/clients/dsgw/collate.c
new file mode 100644
index 00000000..23ef759e
--- /dev/null
+++ b/ldap/clients/dsgw/collate.c
@@ -0,0 +1,400 @@
+/**
+ * PROPRIETARY/CONFIDENTIAL. Use of this product is subject to
+ * license terms. Copyright © 2001 Sun Microsystems, Inc.
+ * Some preexisting portions Copyright © 2001 Netscape Communications Corp.
+ * All rights reserved.
+ */
+/*
+ * collate.c -- routines to collate character strings
+ *
+ * Copyright (c) 1997 Netscape Communications Corp.
+ * All rights reserved.
+ */
+
+#include <stdio.h>
+#include "dsgw.h"
+#include <ldap.h> /* ldap_utf8* */
+
+#include <unicode/ucol.h> /* Collation */
+#include <unicode/ucnv.h> /* Conversion */
+#include <unicode/ustring.h> /* UTF8 conversion */
+
+#ifdef _WINDOWS
+#undef strcasecmp
+#define strcasecmp _strcmpi
+#endif
+
+/*
+ Convert the given string s, encoded in UTF8, into a Unicode (UTF16 or 32, depending on sizeof(UChar))
+ string for use with collation and key generation
+ The given string U will be filled in if it's capacity (given by Ulen) is big enough,
+ otherwise, it will be malloced (or realloced if already allocated)
+*/
+static UErrorCode
+SetUnicodeStringFromUTF_8 (UChar** U, int32_t* Ulen, int *isAlloced, const char *s)
+ /* Copy the UTF-8 string bv into the UnicodeString U,
+ but remove leading and trailing whitespace, and
+ convert consecutive whitespaces into a single space.
+ Ulen is set to the number of UChars in the array (not necessarily the number of bytes!)
+ */
+{
+ size_t n;
+ int32_t len = 0; /* length of non-space string */
+ int32_t needLen = 0; /* number of bytes needed for string */
+ UErrorCode err = U_ZERO_ERROR;
+ const char* begin; /* will point to beginning of non-space in s */
+
+ /* first, set s to the first non-space char in bv->bv_val */
+ while (s && *s && ldap_utf8isspace((char *)s)) { /* cast away const */
+ const char *next = LDAP_UTF8NEXT((char *)s); /* cast away const */
+ s = next;
+ }
+ begin = s;
+
+ if (!s || !*s) {
+ return U_INVALID_FORMAT_ERROR; /* don't know what else to use here */
+ }
+
+ /* next, find the length of the non-space string */
+ while (s && *s && !ldap_utf8isspace((char *)s)) { /* cast away const */
+ const char *next = LDAP_UTF8NEXT((char *)s); /* cast away const */
+ len += (next - s); /* count bytes, not chars */
+ needLen++; /* needLen counts chars */
+ s = next;
+ }
+
+ if (needLen == 0) { /* bogus */
+ return U_INVALID_FORMAT_ERROR; /* don't know what else to use here */
+ }
+
+ needLen++; /* +1 for trailing UChar space */
+ if (needLen > *Ulen) { /* need more space */
+ if (*isAlloced) { /* realloc space */
+ *U = (UChar *)dsgw_ch_realloc((char *)*U, sizeof(UChar) * needLen);
+ } else { /* must use malloc */
+ *U = (UChar *)dsgw_ch_malloc(sizeof(UChar) * needLen);
+ *isAlloced = 1; /* no longer using fixed buffer */
+ }
+ *Ulen = needLen;
+ }
+ u_strFromUTF8(*U, sizeof(UChar) * (*Ulen), NULL, begin, len, &err);
+
+ return err;
+}
+
+static UCollator*
+get_collator (int flavor)
+{
+ static UCollator* collator[2] = {NULL, NULL};
+/* dsgw_emitf("get_collator (%i)<br>\n", flavor); */
+ if (collator[flavor] == NULL &&
+ gc->gc_ClientLanguage && gc->gc_ClientLanguage[0]) {
+ /* Try to create a Collation for the client's preferred language */
+ ACCEPT_LANGUAGE_LIST langlist;
+ size_t langs;
+/* dsgw_emitf ("ClientLanguage = \"%s\"<br>\n", gc->gc_ClientLanguage); */
+ langs = AcceptLangList (gc->gc_ClientLanguage, langlist);
+ if (langs <= 0) {
+dsgw_emitf ("AcceptLangList (%s) = %lu<br>\n",
+ gc->gc_ClientLanguage, (unsigned long)langs);
+ } else {
+ UCollator* fallback_collator = NULL;
+ UCollator* default_collator = NULL;
+ UErrorCode err = U_ZERO_ERROR;
+ size_t i;
+
+ for (i = 0; i < langs; ++i) {
+ /* Try to create a Collation for langs[i] */
+ char* lang = langlist[i];
+ collator[flavor] = ucol_open(lang, &err);
+ if (err == U_ZERO_ERROR && collator[flavor]) {
+dsgw_emitf("<!-- New Collator (%s) == SUCCESS -->\n", lang);
+ break;
+ } else {
+ if (err == U_USING_FALLBACK_WARNING) {
+ if (fallback_collator == NULL) {
+ fallback_collator = collator[flavor];
+dsgw_emitf("<!-- New Collator (%s) == USING_FALLBACK_LOCALE -->\n", lang);
+ } else {
+ ucol_close (collator[flavor]);
+ }
+ } else if (err == U_USING_DEFAULT_WARNING) {
+ if (default_collator == NULL) {
+ default_collator = collator[flavor];
+dsgw_emitf("<!-- New Collator (%s) == USING_DEFAULT_LOCALE -->\n", lang);
+ } else {
+ ucol_close (collator[flavor]);
+ }
+ } else {
+dsgw_emitf("New Collator error (%s) == %i<br>\n", lang, err);
+ }
+ collator[flavor] = NULL;
+ }
+ }
+ if (collator[flavor] == NULL) {
+ if (fallback_collator != NULL) {
+ collator[flavor] = fallback_collator;
+ fallback_collator = NULL;
+ } else if (default_collator != NULL) {
+ collator[flavor] = default_collator;
+ default_collator = NULL;
+ }
+ }
+ if (collator[flavor] != NULL) {
+ switch (flavor) {
+ case CASE_EXACT:
+dsgw_emits("<!-- CollationSetStrength (TERTIARY) -->\n");
+ ucol_setAttribute (collator[flavor], UCOL_STRENGTH, UCOL_TERTIARY, &err);
+ break;
+ default: /* CASE_IGNORE */
+ if (dsgw_scriptorder()->so_caseIgnoreAccents) {
+dsgw_emits("<!-- CollationSetStrength (PRIMARY) -->\n");
+ ucol_setAttribute (collator[flavor], UCOL_STRENGTH, UCOL_PRIMARY, &err);
+ } else {
+dsgw_emits("<!-- CollationSetStrength (SECONDARY) -->\n");
+ ucol_setAttribute (collator[flavor], UCOL_STRENGTH, UCOL_SECONDARY, &err);
+ }
+ break;
+ }
+ }
+ if (default_collator != NULL) {
+ ucol_close (default_collator);
+ default_collator = NULL;
+ }
+ if (fallback_collator != NULL) {
+ ucol_close (fallback_collator);
+ fallback_collator = NULL;
+ }
+ }
+ }
+ return collator[flavor];
+}
+
+static int
+valcmp (const char** L, const char** R)
+{
+ return strcmp (*L, *R);
+}
+
+static int
+valcasecmp (const char** L, const char** R)
+{
+ return strcasecmp (*L, *R);
+}
+
+static int
+strXcollate (int flavor, const char* L, const char* R)
+{
+ UCollator* collator = get_collator (flavor);
+ if (collator != NULL) {
+ UChar LuBuffer[128];
+ UChar* Lu = LuBuffer;
+ int32_t LuLen = u_strlen(LuBuffer);
+ int LuisAlloced = 0;
+ if (SetUnicodeStringFromUTF_8 (&Lu, &LuLen, &LuisAlloced, L) == U_ZERO_ERROR) {
+ UChar RuBuffer[128];
+ UChar* Ru = RuBuffer;
+ int32_t RuLen = u_strlen(RuBuffer);
+ int RuisAlloced = 0;
+ if (SetUnicodeStringFromUTF_8 (&Ru, &RuLen, &RuisAlloced, R) == U_ZERO_ERROR) {
+ UCollationResult colres = ucol_strcoll(collator, Lu, LuLen, Ru, RuLen);
+ int result = 0;
+ switch (colres) {
+ case UCOL_LESS:
+ result = -1;
+ break;
+ case UCOL_GREATER:
+ result = 1;
+ break;
+ default:
+ break;
+ }
+#ifdef DSGW_DEBUG
+ {
+ auto char* Le = dsgw_strdup_escaped (L);
+ auto char* Re = dsgw_strdup_escaped (R);
+ dsgw_log ("strXcollate:%s %s %s\n",
+ Le, result < 0 ? "<" : (result == 0 ? "=" : ">"), Re);
+ free (Le);
+ free (Re);
+ }
+#endif
+ if (RuisAlloced) {
+ free(Ru);
+ Ru = NULL;
+ }
+ if (LuisAlloced) {
+ free(Lu);
+ Lu = NULL;
+ }
+
+ return result;
+ }
+ if (LuisAlloced) {
+ free(Lu);
+ Lu = NULL;
+ }
+ }
+ }
+ return flavor ? strcasecmp (L, R) : strcmp (L, R);
+}
+
+static int
+strcollate (const char* L, const char* R)
+{
+ return strXcollate (CASE_EXACT, L, R);
+}
+
+static int
+strcasecollate (const char* L, const char* R)
+{
+ return strXcollate (CASE_INSENSITIVE, L, R);
+}
+
+static int
+valcollate (const char** L, const char** R)
+{
+ return strXcollate (CASE_EXACT, *L, *R);
+}
+
+static int
+valcasecollate (const char** L, const char** R)
+{
+ return strXcollate (CASE_INSENSITIVE, *L, *R);
+}
+
+strcmp_t
+dsgw_strcmp (int flavor)
+{
+ if (get_collator (flavor) != NULL) {
+ return flavor ? strcasecollate : strcollate;
+ }
+ return flavor ? strcasecmp : strcmp;
+}
+
+valcmp_t
+dsgw_valcmp (int flavor)
+{
+ if (get_collator (flavor) != NULL) {
+ return flavor ? valcasecollate : valcollate;
+ }
+ return flavor ? valcasecmp : valcmp;
+}
+
+static size_t
+dsgw_scriptof (const char* s, scriptrange_t** ranges)
+{
+ auto size_t result = 0;
+ if (s && ranges) {
+ auto unsigned long u;
+ while ((u = LDAP_UTF8GETCC (s)) != 0) {
+ auto size_t ss;
+ auto scriptrange_t* sr;
+ for (ss = 0; (sr = ranges[ss]) != NULL; ++ss) {
+ do {
+ if (sr->sr_min <= u && u <= sr->sr_max) {
+ break;
+ }
+ } while ((sr = sr->sr_next) != NULL);
+ if (sr) {
+ if (result < ss) result = ss;
+ break;
+ }
+ }
+ if (!sr) {
+ result = ss;
+ break;
+ }
+ }
+ }
+#ifdef DSGW_DEBUG
+ dsgw_log ("script %lu\n", (unsigned long)result);
+#endif
+ return result;
+}
+
+static struct berval key_first = {0, 0};
+static struct berval key_last = {0, 0};
+
+struct berval* dsgw_key_first = &key_first;
+struct berval* dsgw_key_last = &key_last;
+
+void LDAP_C LDAP_CALLBACK
+dsgw_keyfree( void *arg, const struct berval* key )
+{
+ if (key->bv_val) free (key->bv_val);
+ else if (key == dsgw_key_first || key == dsgw_key_last) return;
+ free ((void*)key);
+}
+
+int LDAP_C LDAP_CALLBACK
+dsgw_keycmp( void *arg, const struct berval *L, const struct berval *R )
+{
+ int result = 0;
+ if (L == R) {
+ } else if (L->bv_val == NULL) { /* L is either first or last */
+ result = (L == dsgw_key_last) ? 1 : -1;
+ } else if (R->bv_val == NULL) { /* R is either first or last */
+ result = (R == dsgw_key_last) ? -1 : 1;
+ } else
+ /* copied from slapi_berval_cmp(), in ../../servers/slapd/plugin.c: */
+ if (L->bv_len < R->bv_len) {
+ result = memcmp (L->bv_val, R->bv_val, L->bv_len);
+ if (result == 0)
+ result = -1;
+ } else {
+ result = memcmp (L->bv_val, R->bv_val, R->bv_len);
+ if (result == 0 && (L->bv_len > R->bv_len))
+ result = 1;
+ }
+ return result;
+}
+
+struct berval*
+dsgw_strkeygen (int flavor, const char* s)
+{
+ auto struct berval* v = (struct berval*)dsgw_ch_malloc (sizeof (struct berval));
+ auto UCollator* collator = get_collator (flavor);
+ v->bv_val = NULL;
+ if (collator != NULL) {
+ UChar uBuffer[128];
+ UChar* u = uBuffer;
+ int32_t uLen = u_strlen(uBuffer);
+ int uisAlloced = 0;
+ if (SetUnicodeStringFromUTF_8 (&u, &uLen, &uisAlloced, s) == U_ZERO_ERROR) {
+ char keyBuffer[128]; /* try to use static space buffer to avoid malloc */
+ int32_t keyLen = u_strlen(keyBuffer);
+ char* key = keyBuffer; /* but key can grow if necessary */
+ int32_t realLen = ucol_getSortKey(collator, u, uLen, (uint8_t *)key, keyLen);
+ if (realLen > keyLen) { /* need more space */
+ key = (char*)dsgw_ch_malloc(sizeof(char) * realLen);
+ keyLen = ucol_getSortKey(collator, u, uLen, (uint8_t *)key, realLen);
+ }
+ v->bv_len = realLen + 2;
+ v->bv_val = dsgw_ch_malloc (v->bv_len);
+ memcpy(v->bv_val+1, key, realLen);
+ if (uisAlloced) {
+ free(u);
+ u = NULL;
+ }
+ if (key != keyBuffer) {
+ free(key);
+ key = NULL;
+ }
+ }
+ }
+ if (v->bv_val == NULL) {
+ v->bv_len = (s ? strlen (s) : 0) + 2;
+ v->bv_val = dsgw_ch_malloc (v->bv_len);
+ if (v->bv_len > 2) memcpy (v->bv_val+1, s, v->bv_len-2);
+ if (flavor) {
+ register char* t;
+ for (t = v->bv_val+1; *t; ++t) {
+ if (isascii (*t)) *t = tolower (*t);
+ }
+ }
+ }
+ v->bv_val[0] = (char) dsgw_scriptof (s, dsgw_scriptorder()->so_sort);
+ v->bv_val[v->bv_len-1] = '\0';
+ return v;
+}