diff options
author | cvsadm <cvsadm> | 2005-01-21 00:44:34 +0000 |
---|---|---|
committer | cvsadm <cvsadm> | 2005-01-21 00:44:34 +0000 |
commit | b2093e3016027d6b5cf06b3f91f30769bfc099e2 (patch) | |
tree | cf58939393a9032182c4fbc4441164a9456e82f8 /ldap/servers/plugins/collation/collate.c | |
download | ds-ldapserver7x.tar.gz ds-ldapserver7x.tar.xz ds-ldapserver7x.zip |
Moving NSCP Directory Server from DirectoryBranch to TRUNK, initial drop. (foxworth)ldapserver7x
Diffstat (limited to 'ldap/servers/plugins/collation/collate.c')
-rw-r--r-- | ldap/servers/plugins/collation/collate.c | 454 |
1 files changed, 454 insertions, 0 deletions
diff --git a/ldap/servers/plugins/collation/collate.c b/ldap/servers/plugins/collation/collate.c new file mode 100644 index 00000000..603caf53 --- /dev/null +++ b/ldap/servers/plugins/collation/collate.c @@ -0,0 +1,454 @@ +/** BEGIN COPYRIGHT BLOCK + * Copyright 2001 Sun Microsystems, Inc. + * Portions copyright 1999, 2001-2003 Netscape Communications Corporation. + * All rights reserved. + * END COPYRIGHT BLOCK **/ +/* collate.c - implementation of indexing, using a Collation */ + +#include "collate.h" +#include <string.h> /* memcpy */ + +#include <unicode/ucol.h> /* Collation */ +#include <unicode/ucnv.h> /* Conversion */ +#include <unicode/ustring.h> /* UTF8 conversion */ + +#include <ldap.h> /* LDAP_UTF8LEN */ +#include <slap.h> /* for strcasecmp on non-UNIX platforms and correct debug macro */ + +void +collation_init( char *configpath ) + /* Called once per process, to initialize globals. */ +{ + /* ICU needs no initialization? */ +} + +typedef struct coll_profile_t { /* Collator characteristics */ + const char* language; + const char* country; + const char* variant; + UColAttributeValue strength; /* one of UCOL_PRIMARY = 0, UCOL_SECONDARY = 1, UCOL_TERTIARY = 2, UCOL_QUATERNARY = 3, UCOL_IDENTICAL = 4 */ + UColAttributeValue decomposition; /* one of UCOL_OFF = 0, UCOL_DEFAULT = 1, UCOL_ON = 2 */ +} coll_profile_t; + +typedef struct coll_id_t { /* associates an OID with a coll_profile_t */ + char* oid; + coll_profile_t* profile; +} coll_id_t; + +/* A list of all OIDs that identify collator profiles: */ +static const coll_id_t** collation_id = NULL; +static size_t collation_ids = 0; + +int +collation_config (size_t cargc, char** cargv, + const char* fname, size_t lineno) + /* Process one line from a configuration file. + Return 0 if it's OK, -1 if it's not recognized. + Any other return value is a process exit code. + */ +{ + if (cargc <= 0) { /* Bizarre. Oh, well... */ + } else if (!strcasecmp (cargv[0], "NLS")) { + /* ignore - not needed anymore with ICU - was used to get path for NLS_Initialize */ + } else if (!strcasecmp (cargv[0], "collation")) { + if ( cargc < 7 ) { + LDAPDebug (LDAP_DEBUG_ANY, + "%s: line %lu ignored: only %lu arguments (expected " + "collation language country variant strength decomposition oid ...)\n", + fname, (unsigned long)lineno, (unsigned long)cargc ); + } else { + auto size_t arg; + auto coll_profile_t* profile = (coll_profile_t*) slapi_ch_calloc (1, sizeof (coll_profile_t)); + if (*cargv[1]) profile->language = slapi_ch_strdup (cargv[1]); + if (*cargv[2]) profile->country = slapi_ch_strdup (cargv[2]); + if (*cargv[3]) profile->variant = slapi_ch_strdup (cargv[3]); + switch (atoi(cargv[4])) { + case 1: profile->strength = UCOL_PRIMARY; break; + case 2: profile->strength = UCOL_SECONDARY; /* no break here? fall through? wtf? */ + case 3: profile->strength = UCOL_TERTIARY; break; + case 4: profile->strength = UCOL_IDENTICAL; break; + default: profile->strength = UCOL_SECONDARY; + LDAPDebug (LDAP_DEBUG_ANY, + "%s: line %lu: strength \"%s\" not supported (will use 2)\n", + fname, (unsigned long)lineno, cargv[4]); + break; + } + switch (atoi(cargv[5])) { + case 1: profile->decomposition = UCOL_OFF; break; + case 2: profile->decomposition = UCOL_DEFAULT; /* no break here? fall through? wtf? */ + case 3: profile->decomposition = UCOL_ON; break; + default: profile->decomposition = UCOL_DEFAULT; + LDAPDebug (LDAP_DEBUG_ANY, + "%s: line %lu: decomposition \"%s\" not supported (will use 2)\n", + fname, (unsigned long)lineno, cargv[5]); + break; + } + + { + char descStr[256]; + char nameOrder[256]; + char nameSubstring[256]; + char oidString[256]; + char *tmpStr=NULL; + Slapi_MatchingRuleEntry *mrentry=slapi_matchingrule_new(); + + if(UCOL_PRIMARY == profile->strength) { + strcpy(nameOrder,"caseIgnoreOrderingMatch"); + strcpy(nameSubstring,"caseIgnoreSubstringMatch"); + } + else { + strcpy(nameOrder,"caseExactOrderingMatch"); + strcpy(nameSubstring,"caseExactSubstringMatch"); + } + + if(cargc > 7) { + strcat(nameOrder,"-"); + strcat(nameOrder,cargv[7]); + strcat(nameSubstring,"-"); + strcat(nameSubstring,cargv[7]); + slapi_matchingrule_set(mrentry,SLAPI_MATCHINGRULE_NAME, + (void *)slapi_ch_strdup(nameOrder)); + } + else { + if(0 != cargv[1][0]) { + strcat(nameOrder,"-"); + strcat(nameSubstring,"-"); + } + strcat(nameOrder,cargv[1]); + strcat(nameSubstring,cargv[1]); + slapi_matchingrule_set(mrentry,SLAPI_MATCHINGRULE_NAME, + (void *)slapi_ch_strdup(nameOrder)); + } + strcpy(oidString,cargv[6]); + slapi_matchingrule_set(mrentry,SLAPI_MATCHINGRULE_OID, + (void *)slapi_ch_strdup(oidString)); + if(0 != cargv[2][0]) { + sprintf(descStr,"%s-%s",cargv[1],cargv[2]); + } + else { + strcpy(descStr,cargv[1]); + } + slapi_matchingrule_set(mrentry,SLAPI_MATCHINGRULE_DESC, + (void *)slapi_ch_strdup(descStr)); + slapi_matchingrule_set(mrentry,SLAPI_MATCHINGRULE_SYNTAX, + (void *)slapi_ch_strdup(DIRSTRING_SYNTAX_OID)); + slapi_matchingrule_register(mrentry); + slapi_matchingrule_get(mrentry,SLAPI_MATCHINGRULE_NAME, + (void *)&tmpStr); + slapi_ch_free((void **)&tmpStr); + slapi_matchingrule_get(mrentry,SLAPI_MATCHINGRULE_OID, + (void *)&tmpStr); + slapi_ch_free((void **)&tmpStr); + slapi_matchingrule_set(mrentry,SLAPI_MATCHINGRULE_NAME, + (void *)slapi_ch_strdup(nameSubstring)); + strcat(oidString,".6"); + slapi_matchingrule_set(mrentry,SLAPI_MATCHINGRULE_OID, + (void *)slapi_ch_strdup(oidString)); + slapi_matchingrule_register(mrentry); + slapi_matchingrule_free(&mrentry,1); + } + + + for (arg = 6; arg < cargc; ++arg) { + auto coll_id_t* id = (coll_id_t*) slapi_ch_malloc (sizeof (coll_id_t)); + id->oid = slapi_ch_strdup (cargv[arg]); + id->profile = profile; + if (collation_ids <= 0) { + collation_id = (const coll_id_t**) slapi_ch_malloc (2 * sizeof (coll_id_t*)); + } else { + collation_id = (const coll_id_t**) slapi_ch_realloc + ((void*)collation_id, (collation_ids + 2) * sizeof (coll_id_t*)); + } + collation_id [collation_ids++] = id; + collation_id [collation_ids] = NULL; + } + } + } else { + return -1; /* unrecognized */ + } + return 0; /* success */ +} + +typedef struct collation_indexer_t + /* A kind of indexer, implemented using an ICU Collator */ +{ + UCollator* collator; + UConverter* converter; + struct berval** ix_keys; + int is_default_collator; +} collation_indexer_t; + +/* + Caller must ensure that U == NULL and Ulen == 0 the first time called +*/ +static UErrorCode +SetUnicodeStringFromUTF_8 (UChar** U, int32_t* Ulen, int *isAlloced, const struct berval* bv) + /* Copy the UTF-8 string bv into the UnicodeString U, + but remove leading and trailing whitespace, and + convert consecutive whitespaces into a single space. + Ulen is set to the number of UChars in the array (not necessarily the number of bytes!) + */ +{ + size_t n; + int32_t len = 0; /* length of non-space string */ + int32_t needLen = 0; /* number of bytes needed for string */ + UErrorCode err = U_ZERO_ERROR; + const char* s = bv->bv_val; + const char* begin = NULL; /* will point to beginning of non-space in val */ + const char* end = NULL; /* will point to the first space after the last non-space char in val */ + int32_t nUchars = 0; + + if (!bv->bv_len) { /* no value? */ + return U_INVALID_FORMAT_ERROR; /* don't know what else to use here */ + } + + /* first, set s to the first non-space char in bv->bv_val */ + for (n = 0; (n < bv->bv_len) && ldap_utf8isspace((char *)s); ) { /* cast away const */ + const char *next = LDAP_UTF8NEXT((char *)s); /* cast away const */ + n += (next - s); /* count bytes, not chars */ + s = next; + } + begin = s; /* begin points to first non-space char in val */ + + if (n >= bv->bv_len) { /* value is all spaces? */ + return U_INVALID_FORMAT_ERROR; /* don't know what else to use here */ + } + + s = bv->bv_val + (bv->bv_len-1); /* move s to last char of bv_val */ + end = s; /* end points at last char of bv_val - may change below */ + /* find the last non-null and non-space char of val */ + for (n = bv->bv_len; (n > 0) && (!*s || ldap_utf8isspace((char *)s));) { + const char *prev = LDAP_UTF8PREV((char *)s); + end = prev; + n -= (s - prev); /* count bytes, not chars */ + s = prev; + } + + /* end now points at last non-null/non-space of val */ + if (n < 0) { /* bogus */ + return U_INVALID_FORMAT_ERROR; /* don't know what else to use here */ + } + + len = LDAP_UTF8NEXT((char *)end) - begin; + + u_strFromUTF8(*U, *Ulen, &nUchars, begin, len, &err); + if (nUchars > *Ulen) { /* need more space */ + if (*isAlloced) { /* realloc space */ + *U = (UChar *)slapi_ch_realloc((char *)*U, sizeof(UChar) * nUchars); + } else { /* must use malloc */ + *U = (UChar *)slapi_ch_malloc(sizeof(UChar) * nUchars); + *isAlloced = 1; /* no longer using fixed buffer */ + } + *Ulen = nUchars; + err = U_ZERO_ERROR; /* reset */ + u_strFromUTF8(*U, *Ulen, NULL, begin, len, &err); + } else { + *Ulen = nUchars; + } + + return err; +} + +static struct berval** +collation_index (indexer_t* ix, struct berval** bvec, struct berval** prefixes) +{ + collation_indexer_t* etc = (collation_indexer_t*) ix->ix_etc; + struct berval** keys = NULL; + if (bvec) { + char keyBuffer[128]; /* try to use static space buffer to avoid malloc */ + int32_t keyLen = sizeof(keyBuffer); + char* key = keyBuffer; /* but key can grow if necessary */ + size_t keyn = 0; + struct berval** bv; + UChar charBuffer[128]; /* try to use static space buffer */ + int32_t nChars = sizeof(charBuffer)/sizeof(UChar); /* but grow if necessary */ + UChar *chars = charBuffer; /* try to reuse this */ + int isAlloced = 0; /* using fixed buffer */ + + for (bv = bvec; *bv; ++bv) { + /* if chars is allocated, nChars will be the capacity and the number of chars in chars */ + /* otherwise, nChars will be the number of chars, which may be less than the capacity */ + if (!isAlloced) { + nChars = sizeof(charBuffer)/sizeof(UChar); /* reset */ + } + if (U_ZERO_ERROR == SetUnicodeStringFromUTF_8 (&chars, &nChars, &isAlloced, *bv)) { + /* nChars is now the number of UChar in chars, which may be less than the + capacity of charBuffer if not allocated */ + struct berval* prefix = prefixes ? prefixes[bv-bvec] : NULL; + const size_t prefixLen = prefix ? prefix->bv_len : 0; + struct berval* bk = NULL; + int32_t realLen; /* real length of key, not keyLen which is buffer size */ + + /* try to get the sort key using key and keyLen; only grow key + if we need to */ + /* can use -1 for char len since the conversion from UTF8 + null terminates the string */ + realLen = ucol_getSortKey(etc->collator, chars, nChars, (uint8_t *)key, keyLen); + if (realLen > keyLen) { /* need more space */ + if (key == keyBuffer) { + key = (char*)slapi_ch_malloc(sizeof(char) * realLen); + } else { + key = (char*)slapi_ch_realloc(key, sizeof(char) * realLen); + } + keyLen = ucol_getSortKey(etc->collator, chars, nChars, (uint8_t *)key, realLen); + } + if (realLen > 0) { + bk = (struct berval*) slapi_ch_malloc (sizeof(struct berval)); + + bk->bv_len = prefixLen + realLen; + bk->bv_val = slapi_ch_malloc (bk->bv_len + 1); + if (prefixLen) { + memcpy(bk->bv_val, prefix->bv_val, prefixLen); + } + memcpy(bk->bv_val + prefixLen, key, realLen); + bk->bv_val[bk->bv_len] = '\0'; + LDAPDebug (LDAP_DEBUG_FILTER, "collation_index(%.*s) %lu bytes\n", + bk->bv_len, bk->bv_val, (unsigned long)bk->bv_len); + keys = (struct berval**) + slapi_ch_realloc ((void*)keys, sizeof(struct berval*) * (keyn + 2)); + keys[keyn++] = bk; + keys[keyn] = NULL; + } + } + } + if (chars != charBuffer) { /* realloc'ed, need to free */ + slapi_ch_free((void **)&chars); + } + if (key != keyBuffer) { /* realloc'ed, need to free */ + slapi_ch_free_string(&key); + } + } + if (etc->ix_keys != NULL) ber_bvecfree (etc->ix_keys); + etc->ix_keys = keys; + return keys; +} + +static void +collation_indexer_destroy (indexer_t* ix) + /* The destructor function for a collation-based indexer. */ +{ + collation_indexer_t* etc = (collation_indexer_t*) ix->ix_etc; + if (etc->converter) { + ucnv_close(etc->converter); + etc->converter = NULL; + } + if (!etc->is_default_collator) { + /* Don't delete the default collation - it seems to cause problems */ + ucol_close(etc->collator); + etc->collator = NULL; + } + if (etc->ix_keys != NULL) { + ber_bvecfree (etc->ix_keys); + etc->ix_keys = NULL; + } + slapi_ch_free((void**)&ix->ix_etc); + ix->ix_etc = NULL; /* just for hygiene */ +} + +static UErrorCode +s_newNamedLocaleFromComponents(char **locale, const char *lang, const char *country, const char *variant) +{ + UErrorCode err = U_ZERO_ERROR; + int hasLang = (lang && *lang); + int hasC = (country && *country); + int hasVar = (variant && *variant); + + *locale = NULL; + if (hasLang) { + *locale = PR_smprintf("%s%s%s%s%s", lang, (hasC ? "_" : ""), (hasC ? country : ""), + (hasVar ? "_" : ""), (hasVar ? variant : "")); + } else { + err = U_INVALID_FORMAT_ERROR; /* don't know what else to use here */ + } + + return err; +} + +indexer_t* +collation_indexer_create (const char* oid) + /* Return a new indexer, based on the collation identified by oid. + Return NULL if this can't be done. + */ +{ + indexer_t* ix = NULL; + const coll_id_t** id = collation_id; + char* locale = NULL; /* NULL == default locale */ + if (id) for (; *id; ++id) { + if (!strcasecmp (oid, (*id)->oid)) { + const coll_profile_t* profile = (*id)->profile; + const int is_default = (profile->language == NULL && + profile->country == NULL && + profile->variant == NULL); + UErrorCode err = U_ZERO_ERROR; + if ( ! is_default) { + if (locale) { + PR_smprintf_free(locale); + locale = NULL; + } + err = s_newNamedLocaleFromComponents(&locale, + profile->language, + profile->country, + profile->variant); + } + if (err == U_ZERO_ERROR) { + UCollator* coll = ucol_open(locale, &err); + /* + * If we found exactly the right collator for this locale, + * or if we found a fallback one, or if we are happy with + * the default, use it. + */ + if (err == U_ZERO_ERROR || err == U_USING_FALLBACK_WARNING || + (err == U_USING_DEFAULT_WARNING && is_default)) { + collation_indexer_t* etc = (collation_indexer_t*) + slapi_ch_calloc (1, sizeof (collation_indexer_t)); + ix = (indexer_t*) slapi_ch_calloc (1, sizeof (indexer_t)); + ucol_setAttribute (coll, UCOL_STRENGTH, profile->strength, &err); + if (err != U_ZERO_ERROR) { + LDAPDebug (LDAP_DEBUG_ANY, "collation_indexer_create: could not " + "set the collator strength for oid %s to %d: err %d\n", + oid, profile->strength, err); + } + ucol_setAttribute (coll, UCOL_DECOMPOSITION_MODE, profile->decomposition, &err); + if (err != U_ZERO_ERROR) { + LDAPDebug (LDAP_DEBUG_ANY, "collation_indexer_create: could not " + "set the collator decomposition mode for oid %s to %d: err %d\n", + oid, profile->decomposition, err); + } + etc->collator = coll; + etc->is_default_collator = is_default; + for (id = collation_id; *id; ++id) { + if ((*id)->profile == profile) { + break; /* found the 'official' id */ + } + } + ix->ix_etc = etc; + ix->ix_oid = (*id)->oid; + ix->ix_index = collation_index; + ix->ix_destroy = collation_indexer_destroy; + break; /* return */ + /* free (etc); */ + /* free (ix); */ + } else if (err == U_USING_DEFAULT_WARNING) { + LDAPDebug (LDAP_DEBUG_FILTER, "collation_indexer_create: could not " + "create an indexer for OID %s for locale %s and could not " + "use default locale\n", + oid, (locale ? locale : "(default)"), NULL); + } else { /* error */ + LDAPDebug (LDAP_DEBUG_FILTER, "collation_indexer_create: could not " + "create an indexer for OID %s for locale %s: err = %d\n", + oid, (locale ? locale : "(default)"), err); + } + if (coll) { + ucol_close (coll); + coll = NULL; + } + } + break; /* failed to create the specified collator */ + } + } + if (locale) { + PR_smprintf_free(locale); + locale = NULL; + } + return ix; +} |