From e8db3a652ce9e1ca433e3df0cf6527cc25684369 Mon Sep 17 00:00:00 2001 From: William Brown Date: Tue, 25 Jul 2017 16:09:59 +1000 Subject: [PATCH 1/2] Ticket 49330 - Improve ndn cache performance. Bug Description: Normalised DN's are a costly process to update and maintain. As a result, a normalised DN cache was created. Yet it was never able to perform well. In some datasets with large sets of dn attr types, the NDN cache actively hurt performance. The issue stemmed from 3 major issues in the design of the NDN cache. First, it is a global cache which means it exists behind a rwlock. This causes delay as threads wait behind the lock to access or update the cache (especially on a miss). Second, the cache was limited to 4073 buckets. Despite the fact that a prime number on a hash causes a skew in distribution, this was in an NSPR hash - which does not grow dynamically, rather devolving a bucket to a linked list. AS a result, once you passed ~3000 your lookup performance would degrade rapidly to O(1) Finally, the cache's lru policy did not evict least used - it evicted the 10,000 least used. So if you tuned your cache to match the NSPR map, every inclusion that would trigger a delete of old values would effectively empty your cache. ON bigger set sizes, this has to walk the map (at O(1)) to clean 10,000 elements. Premature optimisation strikes again .... Fix Description: Throw it out. Rewrite. We now use a hash algo that has proper distribution across a set. The hash sizes slots to a power of two. Finally, each thread has a private cache rather than shared which completely eliminates a lock contention and even NUMA performance issues. Interestingly this fix should have improvements for DB imports, memberof and refint performance and more. Some testing has shown in simple search workloads a 10% improvement in throughput, and on complex searches a 47x improvement. https://pagure.io/389-ds-base/issue/49330 Author: wibrown Review by: ??? --- Makefile.am | 4 +- dirsrvtests/tests/suites/basic/basic_test.py | 1 + ldap/servers/slapd/back-ldbm/monitor.c | 11 +- ldap/servers/slapd/dn.c | 673 +++++++++++++++------------ ldap/servers/slapd/libglobs.c | 31 +- ldap/servers/slapd/main.c | 12 +- ldap/servers/slapd/proto-slap.h | 4 +- ldap/servers/slapd/slap.h | 4 +- ldap/servers/slapd/slapi-private.h | 4 +- test/libslapd/operation/v3_compat.c | 4 + test/libslapd/pblock/v3_compat.c | 9 + 11 files changed, 433 insertions(+), 324 deletions(-) diff --git a/Makefile.am b/Makefile.am index 134206d..26f1a27 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1228,8 +1228,8 @@ libslapd_la_SOURCES = ldap/servers/slapd/add.c \ ldap/servers/slapd/slapi_pal.c \ $(libavl_a_SOURCES) -libslapd_la_CPPFLAGS = $(AM_CPPFLAGS) $(DSPLUGIN_CPPFLAGS) $(SASL_INCLUDES) @db_inc@ $(SVRCORE_INCLUDES) @kerberos_inc@ @pcre_inc@ -libslapd_la_LIBADD = $(LDAPSDK_LINK) $(SASL_LINK) $(SVRCORE_LINK) $(NSS_LINK) $(NSPR_LINK) $(KERBEROS_LINK) $(PCRE_LINK) $(THREADLIB) $(SYSTEMD_LINK) +libslapd_la_CPPFLAGS = $(AM_CPPFLAGS) $(DSPLUGIN_CPPFLAGS) $(SASL_INCLUDES) @db_inc@ $(SVRCORE_INCLUDES) @kerberos_inc@ @pcre_inc@ $(SDS_CPPFLAGS) +libslapd_la_LIBADD = $(LDAPSDK_LINK) $(SASL_LINK) $(SVRCORE_LINK) $(NSS_LINK) $(NSPR_LINK) $(KERBEROS_LINK) $(PCRE_LINK) $(THREADLIB) $(SYSTEMD_LINK) libsds.la libslapd_la_LDFLAGS = $(AM_LDFLAGS) $(SLAPD_LDFLAGS) diff --git a/dirsrvtests/tests/suites/basic/basic_test.py b/dirsrvtests/tests/suites/basic/basic_test.py index 7b77686..1a35efe 100644 --- a/dirsrvtests/tests/suites/basic/basic_test.py +++ b/dirsrvtests/tests/suites/basic/basic_test.py @@ -325,6 +325,7 @@ def test_basic_acl(topology_st, import_example_ldif): """Run some basic access control(ACL) tests""" log.info('Running test_basic_acl...') + topology_st.standalone.start() DENY_ACI = ('(targetattr = "*") (version 3.0;acl "deny user";deny (all)' + '(userdn = "ldap:///' + USER1_DN + '");)') diff --git a/ldap/servers/slapd/back-ldbm/monitor.c b/ldap/servers/slapd/back-ldbm/monitor.c index 403d6a2..5f57b9f 100644 --- a/ldap/servers/slapd/back-ldbm/monitor.c +++ b/ldap/servers/slapd/back-ldbm/monitor.c @@ -50,6 +50,9 @@ ldbm_back_monitor_instance_search(Slapi_PBlock *pb __attribute__((unused)), PRUint64 hits, tries; long nentries, maxentries, count; size_t size, maxsize; + size_t thread_size; + size_t evicts; + size_t slots; /* NPCTE fix for bugid 544365, esc 0. <04-Jul-2001> */ struct stat astat; /* end of NPCTE fix for bugid 544365 */ @@ -124,7 +127,7 @@ ldbm_back_monitor_instance_search(Slapi_PBlock *pb __attribute__((unused)), } /* normalized dn cache stats */ if (ndn_cache_started()) { - ndn_cache_get_stats(&hits, &tries, &size, &maxsize, &count); + ndn_cache_get_stats(&hits, &tries, &size, &maxsize, &thread_size, &evicts, &slots, &count); sprintf(buf, "%" PRIu64, tries); MSET("normalizedDnCacheTries"); sprintf(buf, "%" PRIu64, hits); @@ -133,6 +136,8 @@ ldbm_back_monitor_instance_search(Slapi_PBlock *pb __attribute__((unused)), MSET("normalizedDnCacheMisses"); sprintf(buf, "%lu", (unsigned long)(100.0 * (double)hits / (double)(tries > 0 ? tries : 1))); MSET("normalizedDnCacheHitRatio"); + sprintf(buf, "%"PRIu64, evicts); + MSET("NormalizedDnCacheEvictions"); sprintf(buf, "%lu", (long unsigned int)size); MSET("currentNormalizedDnCacheSize"); if (maxsize == 0) { @@ -141,6 +146,10 @@ ldbm_back_monitor_instance_search(Slapi_PBlock *pb __attribute__((unused)), sprintf(buf, "%lu", (long unsigned int)maxsize); } MSET("maxNormalizedDnCacheSize"); + sprintf(buf, "%"PRIu64, thread_size); + MSET("NormalizedDnCacheThreadSize"); + sprintf(buf, "%"PRIu64, slots); + MSET("NormalizedDnCacheThreadSlots"); sprintf(buf, "%ld", count); MSET("currentNormalizedDnCacheCount"); } diff --git a/ldap/servers/slapd/dn.c b/ldap/servers/slapd/dn.c index 252d4e1..9ec1518 100644 --- a/ldap/servers/slapd/dn.c +++ b/ldap/servers/slapd/dn.c @@ -22,6 +22,9 @@ #include "slap.h" #include +/* For the ndn cache - this gives up siphash13 */ +#include + #undef SDN_DEBUG static void add_rdn_av(char *avstart, char *avend, int *rdn_av_countp, struct berval **rdn_avsp, struct berval *avstack); @@ -32,52 +35,89 @@ static void rdn_av_swap(struct berval *av1, struct berval *av2, int escape); static int does_cn_uses_dn_syntax_in_dns(char *type, char *dn); /* normalized dn cache related definitions*/ -struct - ndn_cache_lru -{ - struct ndn_cache_lru *prev; - struct ndn_cache_lru *next; - char *key; -}; - -struct - ndn_cache_ctx -{ - struct ndn_cache_lru *head; - struct ndn_cache_lru *tail; +struct ndn_cache_stats { Slapi_Counter *cache_hits; Slapi_Counter *cache_tries; - Slapi_Counter *cache_misses; - size_t cache_size; - size_t cache_max_size; - long cache_count; + Slapi_Counter *cache_count; + Slapi_Counter *cache_size; + Slapi_Counter *cache_evicts; + size_t max_size; + size_t thread_max_size; + size_t slots; }; -struct - ndn_hash_val -{ +struct ndn_cache_value { + size_t size; + size_t slot; + char *dn; char *ndn; - size_t len; - int size; - struct ndn_cache_lru *lru_node; /* used to speed up lru shuffling */ + struct ndn_cache_value *next; + struct ndn_cache_value *prev; + struct ndn_cache_value *child; }; -#define NDN_FLUSH_COUNT 10000 /* number of DN's to remove when cache fills up */ -#define NDN_MIN_COUNT 1000 /* the minimum number of DN's to keep in the cache */ -#define NDN_CACHE_BUCKETS 2053 /* prime number */ +/* + * This uses a similar alloc trick to IDList to keep + * The amount of derefs small. + */ +struct ndn_cache { + /* + * We keep per thread stats and flush them occasionally + */ + size_t max_size; + /* Need to track this because we need to provide diffs to counter */ + size_t last_count; + size_t count; + /* Number of ops */ + size_t tries; + /* hit vs miss. in theroy miss == tries - hits.*/ + size_t hits; + /* How many values we kicked out */ + size_t evicts; + /* Need to track this because we need to provide diffs to counter */ + size_t last_size; + size_t size; + + size_t slots; + /* + * This is used by siphash to prevent hash bugket attacks + */ + char key[16]; + + struct ndn_cache_value *head; + struct ndn_cache_value *tail; + struct ndn_cache_value *table[1]; +}; + +/* + * This means we need 1 MB minimum per thread + * + */ +#define NDN_CACHE_MINIMUM_CAPACITY 1048576 +/* + * This helps us define the number of hashtable slots + * to create. We assume an average DN is 64 chars long + * This way we end up we a ht entry of: + * 8 bytes: from the table pointing to us. + * 8 bytes: next ptr + * 8 bytes: prev ptr + * 8 bytes + 64: dn + * 8 bytes + 64: ndn itself. + * This gives us 168 bytes. In theory this means + * 6241 entries, but we have to clamp this to a power of + * two, so we have 8192 slots. In reality, dns may be + * shorter *and* the dn may be the same as the ndn + * so we *may* store more ndns that this. Again, a good reason + * to round the ht size up! + */ +#define NDN_ENTRY_AVG_SIZE 168 +/* + * After how many operations do we sync our per-thread stats. + */ +#define NDN_STAT_COMMIT_FREQUENCY 256 -static PLHashNumber ndn_hash_string(const void *key); static int ndn_cache_lookup(char *dn, size_t dn_len, char **result, char **udn, int *rc); -static void ndn_cache_update_lru(struct ndn_cache_lru **node); static void ndn_cache_add(char *dn, size_t dn_len, char *ndn, size_t ndn_len); -static void ndn_cache_delete(char *dn); -static void ndn_cache_flush(void); -static void ndn_cache_free(void); -static int ndn_started = 0; -static PRLock *lru_lock = NULL; -static Slapi_RWLock *ndn_cache_lock = NULL; -static struct ndn_cache_ctx *ndn_cache = NULL; -static PLHashTable *ndn_cache_hashtable = NULL; #define ISBLANK(c) ((c) == ' ') #define ISBLANKSTR(s) (((*(s)) == '2') && (*((s) + 1) == '0')) @@ -2698,166 +2738,285 @@ slapi_sdn_get_size(const Slapi_DN *sdn) * */ +static pthread_key_t ndn_cache_key; +static pthread_once_t ndn_cache_key_once = PTHREAD_ONCE_INIT; +static struct ndn_cache_stats t_cache_stats = {0}; /* - * Hashing function using Bernstein's method + * WARNING: For some reason we try to use the NDN cache *before* + * we have a chance to configure it. As a result, we need to rely + * on a trick in the way we start, that we start in one thread + * so we can manipulate ints as though they were atomics, then + * we start in *one* thread, so it's set, then when threads + * fork the get barriers, so we can go from there. However we *CANNOT* + * change this at runtime without expensive atomics per op, so lets + * not bother until we improve libglobs to be COW. */ -static PLHashNumber -ndn_hash_string(const void *key) -{ - PLHashNumber hash = 5381; - unsigned char *x = (unsigned char *)key; - int c; +static int32_t ndn_enabled = 0; + +static struct ndn_cache * +ndn_thread_cache_create(size_t thread_max_size, size_t slots) { + size_t t_cache_size = sizeof(struct ndn_cache) + (slots * sizeof(struct ndn_cache_value *)); + struct ndn_cache *t_cache = slapi_ch_calloc(1, t_cache_size); + + t_cache->max_size = thread_max_size; + t_cache->slots = slots; + + return t_cache; +} + +static void +ndn_thread_cache_commit_status(struct ndn_cache *t_cache) { + /* + * Every so often we commit these atomically. We do this infrequently + * to avoid the costly atomics. + */ + if (t_cache->tries % NDN_STAT_COMMIT_FREQUENCY == 0) { + /* We can just add tries and hits. */ + slapi_counter_add(t_cache_stats.cache_evicts, t_cache->evicts); + slapi_counter_add(t_cache_stats.cache_tries, t_cache->tries); + slapi_counter_add(t_cache_stats.cache_hits, t_cache->hits); + t_cache->hits = 0; + t_cache->tries = 0; + t_cache->evicts = 0; + /* Count and size need diff */ + int64_t diff = (t_cache->size - t_cache->last_size); + if (diff > 0) { + // We have more .... + slapi_counter_add(t_cache_stats.cache_size, (uint64_t)diff); + } else if (diff < 0) { + slapi_counter_subtract(t_cache_stats.cache_size, (uint64_t)llabs(diff)); + } + t_cache->last_size = t_cache->size; + + diff = (t_cache->count - t_cache->last_count); + if (diff > 0) { + // We have more .... + slapi_counter_add(t_cache_stats.cache_count, (uint64_t)diff); + } else if (diff < 0) { + slapi_counter_subtract(t_cache_stats.cache_count, (uint64_t)llabs(diff)); + } + t_cache->last_count = t_cache->count; - while ((c = *x++)) { - hash = ((hash << 5) + hash) ^ c; } - return hash; } -void +static void +ndn_thread_cache_value_destroy(struct ndn_cache *t_cache, struct ndn_cache_value *v) { + /* Update stats */ + t_cache->size = t_cache->size - v->size; + t_cache->count--; + t_cache->evicts++; + + if (v == t_cache->head) { + t_cache->head = v->prev; + } + if (v == t_cache->tail) { + t_cache->tail = v->next; + } + + /* Cut the node out. */ + if (v->next != NULL) { + v->next->prev = v->prev; + } + if (v->prev != NULL) { + v->prev->next = v->next; + } + /* Set the pointer in the table to NULL */ + /* Now see if we were in a list */ + struct ndn_cache_value *slot_node = t_cache->table[v->slot]; + if (slot_node == v) { + t_cache->table[v->slot] = v->child; + } else { + struct ndn_cache_value *former_slot_node = NULL; + do { + former_slot_node = slot_node; + slot_node = slot_node->child; + } while(slot_node != v); + /* Okay, now slot_node is us, and former is our parent */ + former_slot_node->child = v->child; + } + + slapi_ch_free(&(v->dn)); + slapi_ch_free(&(v->ndn)); + slapi_ch_free(&v); +} + +static void +ndn_thread_cache_destroy(void *v_cache) { + struct ndn_cache *t_cache = (struct ndn_cache *)v_cache; + /* + * FREE ALL THE NODES!!! + */ + struct ndn_cache_value *node = t_cache->tail; + struct ndn_cache_value *next_node = NULL; + while (node) { + next_node = node->next; + ndn_thread_cache_value_destroy(t_cache, node); + node = next_node; + } + slapi_ch_free((void **)&t_cache); +} + +static void +ndn_cache_key_init() { + if (pthread_key_create(&ndn_cache_key, ndn_thread_cache_destroy) != 0) { + /* Log a scary warning? */ + slapi_log_err(SLAPI_LOG_ERR, "ndn_cache_init", "Failed to create pthread key, aborting.\n"); + } +} + +int32_t ndn_cache_init() { - if (!config_get_ndn_cache_enabled() || ndn_started) { - return; + ndn_enabled = config_get_ndn_cache_enabled(); + if (ndn_enabled == 0) { + /* + * Don't configure the keys or anything, need a restart + * to enable. We'll just never use ndn cache in this + * run. + */ + return 0; } - ndn_cache_hashtable = PL_NewHashTable(NDN_CACHE_BUCKETS, ndn_hash_string, PL_CompareStrings, PL_CompareValues, 0, 0); - ndn_cache = (struct ndn_cache_ctx *)slapi_ch_malloc(sizeof(struct ndn_cache_ctx)); - ndn_cache->cache_max_size = config_get_ndn_cache_size(); - ndn_cache->cache_hits = slapi_counter_new(); - ndn_cache->cache_tries = slapi_counter_new(); - ndn_cache->cache_misses = slapi_counter_new(); - ndn_cache->cache_count = 0; - ndn_cache->cache_size = sizeof(struct ndn_cache_ctx) + sizeof(PLHashTable) + sizeof(PLHashTable); - ndn_cache->head = NULL; - ndn_cache->tail = NULL; - ndn_started = 1; - if (NULL == (lru_lock = PR_NewLock()) || NULL == (ndn_cache_lock = slapi_new_rwlock())) { - ndn_cache_destroy(); - slapi_log_err(SLAPI_LOG_ERR, "ndn_cache_init", "Failed to create locks. Disabling cache.\n"); + + /* Create the pthread key */ + (void)pthread_once(&ndn_cache_key_once, ndn_cache_key_init); + + /* Create the global stats. */ + t_cache_stats.max_size = config_get_ndn_cache_size(); + t_cache_stats.cache_evicts = slapi_counter_new(); + t_cache_stats.cache_tries = slapi_counter_new(); + t_cache_stats.cache_hits = slapi_counter_new(); + t_cache_stats.cache_count = slapi_counter_new(); + t_cache_stats.cache_size = slapi_counter_new(); + /* Get thread numbers and calc the per thread size */ + int32_t maxthreads = (int32_t)config_get_threadnumber(); + size_t tentative_size = t_cache_stats.max_size / maxthreads; + if (tentative_size < NDN_CACHE_MINIMUM_CAPACITY) { + tentative_size = NDN_CACHE_MINIMUM_CAPACITY; + t_cache_stats.max_size = NDN_CACHE_MINIMUM_CAPACITY * maxthreads; + } + t_cache_stats.thread_max_size = tentative_size; + + /* + * Slots *must* be a power of two, even if the number of entries + * we store will be *less* than this. + */ + size_t possible_elements = tentative_size / NDN_ENTRY_AVG_SIZE; + /* + * So this is like 1048576 / 168, so we get 6241. Now we need to + * shift this to get the number of bits. + */ + size_t shifts = 0; + while (possible_elements > 0) { + shifts++; + possible_elements = possible_elements >> 1; } + /* + * So now we can use this to make the slot count. + */ + t_cache_stats.slots = 1 << shifts; + /* Done? */ + return 0; } void ndn_cache_destroy() { - if (!ndn_started) { + if (ndn_enabled == 0) { return; } - if (lru_lock) { - PR_DestroyLock(lru_lock); - lru_lock = NULL; - } - if (ndn_cache_lock) { - slapi_destroy_rwlock(ndn_cache_lock); - ndn_cache_lock = NULL; - } - if (ndn_cache_hashtable) { - ndn_cache_free(); - PL_HashTableDestroy(ndn_cache_hashtable); - ndn_cache_hashtable = NULL; - } - config_set_ndn_cache_enabled(CONFIG_NDN_CACHE, "off", NULL, 1); - slapi_counter_destroy(&ndn_cache->cache_hits); - slapi_counter_destroy(&ndn_cache->cache_tries); - slapi_counter_destroy(&ndn_cache->cache_misses); - slapi_ch_free((void **)&ndn_cache); - - ndn_started = 0; + slapi_counter_destroy(&(t_cache_stats.cache_tries)); + slapi_counter_destroy(&(t_cache_stats.cache_hits)); + slapi_counter_destroy(&(t_cache_stats.cache_count)); + slapi_counter_destroy(&(t_cache_stats.cache_size)); + slapi_counter_destroy(&(t_cache_stats.cache_evicts)); } int ndn_cache_started() { - return ndn_started; + return ndn_enabled; } /* * Look up this dn in the ndn cache */ static int -ndn_cache_lookup(char *dn, size_t dn_len, char **result, char **udn, int *rc) +ndn_cache_lookup(char *dn, size_t dn_len, char **ndn, char **udn, int *rc) { - struct ndn_hash_val *ndn_ht_val = NULL; - char *ndn, *key; - int rv = 0; - - if (NULL == udn) { - return rv; + if (ndn_enabled == 0 || NULL == udn) { + return 0; } *udn = NULL; - if (ndn_started == 0) { - return rv; - } + if (dn_len == 0) { - *result = dn; + *ndn = dn; *rc = 0; return 1; } - slapi_counter_increment(ndn_cache->cache_tries); - slapi_rwlock_rdlock(ndn_cache_lock); - ndn_ht_val = (struct ndn_hash_val *)PL_HashTableLookupConst(ndn_cache_hashtable, dn); - if (ndn_ht_val) { - ndn_cache_update_lru(&ndn_ht_val->lru_node); - slapi_counter_increment(ndn_cache->cache_hits); - if ((ndn_ht_val->len != dn_len) || - /* even if the lengths match, dn may not be normalized yet. - * (e.g., 'cn="o=ABC",o=XYZ' vs. 'cn=o\3DABC,o=XYZ') */ - (memcmp(dn, ndn_ht_val->ndn, dn_len))) { - *rc = 1; /* free result */ - ndn = slapi_ch_malloc(ndn_ht_val->len + 1); - memcpy(ndn, ndn_ht_val->ndn, ndn_ht_val->len); - ndn[ndn_ht_val->len] = '\0'; - *result = ndn; - } else { - /* the dn was already normalized, just return the dn as the result */ - *result = dn; - *rc = 0; - } - rv = 1; - } else { - /* copy/preserve the udn, so we can use it as the key when we add dn's to the hashtable */ - key = slapi_ch_malloc(dn_len + 1); - memcpy(key, dn, dn_len); - key[dn_len] = '\0'; - *udn = key; + + struct ndn_cache *t_cache = pthread_getspecific(ndn_cache_key); + if (t_cache == NULL) { + t_cache = ndn_thread_cache_create(t_cache_stats.thread_max_size, t_cache_stats.slots); + pthread_setspecific(ndn_cache_key, t_cache); + /* If we have no cache, we can't look up ... */ + return 0; } - slapi_rwlock_unlock(ndn_cache_lock); - return rv; -} + t_cache->tries++; -/* - * Move this lru node to the top of the list - */ -static void -ndn_cache_update_lru(struct ndn_cache_lru **node) -{ - struct ndn_cache_lru *prev, *next, *curr_node = *node; + /* + * Hash our DN ... + */ + uint64_t dn_hash = sds_siphash13(dn, dn_len, t_cache->key); + /* Where should it be? */ + size_t expect_slot = dn_hash % t_cache->slots; - if (curr_node == NULL) { - return; - } - PR_Lock(lru_lock); - if (curr_node->prev == NULL) { - /* already the top node */ - PR_Unlock(lru_lock); - return; - } - prev = curr_node->prev; - next = curr_node->next; - if (next) { - next->prev = prev; - prev->next = next; - } else { - /* this was the tail, so reset the tail */ - ndn_cache->tail = prev; - prev->next = NULL; + /* + * Is it there? + */ + if (t_cache->table[expect_slot] != NULL) { + /* + * Check it really matches, could be collision. + */ + struct ndn_cache_value *node = t_cache->table[expect_slot]; + while (node != NULL) { + if (strncmp(dn, node->dn, dn_len) == 0) { + /* We found it! */ + /* Update LRU */ + /* Are we already the tail? */ + if (t_cache->tail != node) { + if (node->next != NULL) { + node->next->prev = node->prev; + } + if (node->prev != NULL) { + node->prev->next = node->next; + } + node->prev = NULL; + if (t_cache->tail != NULL) { + node->next = t_cache->tail; + t_cache->tail->prev = node; + } + t_cache->tail = node; + } + + /* Update that we have a hit.*/ + t_cache->hits++; + /* Cope the NDN to the caller. */ + *ndn = slapi_ch_strdup(node->ndn); + /* Indicate to the caller to free this. */ + *rc = 1; + ndn_thread_cache_commit_status(t_cache); + return 1; + } + node = node->child; + } } - curr_node->prev = NULL; - curr_node->next = ndn_cache->head; - ndn_cache->head->prev = curr_node; - ndn_cache->head = curr_node; - PR_Unlock(lru_lock); + /* If we miss, we need to duplicate dn to udn here. */ + *udn = slapi_ch_strdup(dn); + *rc = 0; + ndn_thread_cache_commit_status(t_cache); + return 0; } /* @@ -2866,12 +3025,10 @@ ndn_cache_update_lru(struct ndn_cache_lru **node) static void ndn_cache_add(char *dn, size_t dn_len, char *ndn, size_t ndn_len) { - struct ndn_hash_val *ht_entry; - struct ndn_cache_lru *new_node = NULL; - PLHashEntry *he; - int size; - - if (ndn_started == 0 || dn_len == 0) { + if (ndn_enabled == 0) { + return; + } + if (dn_len == 0) { return; } if (strlen(ndn) > ndn_len) { @@ -2881,161 +3038,91 @@ ndn_cache_add(char *dn, size_t dn_len, char *ndn, size_t ndn_len) /* * Calculate the approximate memory footprint of the hash entry, key, and lru entry. */ - size = (dn_len * 2) + ndn_len + sizeof(PLHashEntry) + sizeof(struct ndn_hash_val) + sizeof(struct ndn_cache_lru); + struct ndn_cache_value *new_value = slapi_ch_calloc(1, sizeof(struct ndn_cache_value)); + new_value->size = sizeof(struct ndn_cache_value) + dn_len + ndn_len; + /* DN is alloc for us */ + new_value->dn = dn; + /* But we need to copy ndn */ + new_value->ndn = slapi_ch_strdup(ndn); + /* - * Create our LRU node + * Get our local cache out. */ - new_node = (struct ndn_cache_lru *)slapi_ch_malloc(sizeof(struct ndn_cache_lru)); - if (new_node == NULL) { - slapi_log_err(SLAPI_LOG_ERR, "ndn_cache_add", "Failed to allocate new lru node.\n"); - return; + struct ndn_cache *t_cache = pthread_getspecific(ndn_cache_key); + if (t_cache == NULL) { + t_cache = ndn_thread_cache_create(t_cache_stats.thread_max_size, t_cache_stats.slots); + pthread_setspecific(ndn_cache_key, t_cache); } - new_node->prev = NULL; - new_node->key = dn; /* dn has already been allocated */ /* - * Its possible this dn was added to the hash by another thread. + * Hash the DN */ - slapi_rwlock_wrlock(ndn_cache_lock); - ht_entry = (struct ndn_hash_val *)PL_HashTableLookupConst(ndn_cache_hashtable, dn); - if (ht_entry) { - /* already exists, free the node and return */ - slapi_rwlock_unlock(ndn_cache_lock); - slapi_ch_free_string(&new_node->key); - slapi_ch_free((void **)&new_node); - return; - } + uint64_t dn_hash = sds_siphash13(new_value->dn, dn_len, t_cache->key); /* - * Create the hash entry + * Get the insert slot: This works because the number spaces of dn_hash is + * a 64bit int, and slots is a power of two. As a result, we end up with + * even distribution of the values. */ - ht_entry = (struct ndn_hash_val *)slapi_ch_malloc(sizeof(struct ndn_hash_val)); - if (ht_entry == NULL) { - slapi_rwlock_unlock(ndn_cache_lock); - slapi_log_err(SLAPI_LOG_ERR, "ndn_cache_add", "Failed to allocate new hash entry.\n"); - slapi_ch_free_string(&new_node->key); - slapi_ch_free((void **)&new_node); - return; - } - ht_entry->ndn = slapi_ch_malloc(ndn_len + 1); - memcpy(ht_entry->ndn, ndn, ndn_len); - ht_entry->ndn[ndn_len] = '\0'; - ht_entry->len = ndn_len; - ht_entry->size = size; - ht_entry->lru_node = new_node; + size_t insert_slot = dn_hash % t_cache->slots; + /* Track this for free */ + new_value->slot = insert_slot; + /* - * Check if our cache is full + * Okay, check if we have space, else we need to trim nodes from + * the LRU */ - PR_Lock(lru_lock); /* grab the lru lock now, as ndn_cache_flush needs it */ - if (ndn_cache->cache_max_size != 0 && ((ndn_cache->cache_size + size) > ndn_cache->cache_max_size)) { - ndn_cache_flush(); + while (t_cache->head && (t_cache->size + new_value->size) > t_cache->max_size) { + struct ndn_cache_value *trim_node = t_cache->head; + ndn_thread_cache_value_destroy(t_cache, trim_node); } + /* - * Set the ndn cache lru nodes + * Add it! */ - if (ndn_cache->head == NULL && ndn_cache->tail == NULL) { - /* this is the first node */ - ndn_cache->head = new_node; - ndn_cache->tail = new_node; - new_node->next = NULL; + if (t_cache->table[insert_slot] == NULL) { + t_cache->table[insert_slot] = new_value; } else { - new_node->next = ndn_cache->head; - if (ndn_cache->head) - ndn_cache->head->prev = new_node; + /* + * Hash collision! We need to replace the bucket then .... + */ + struct ndn_cache_value *parent_node = t_cache->table[insert_slot]; + while (parent_node->child != NULL) { + parent_node = parent_node->child; + } + parent_node->child = new_value; } - ndn_cache->head = new_node; - PR_Unlock(lru_lock); + /* - * Add the new object to the hashtable, and update our stats + * Finally, stick this onto the tail because it's the newest. */ - he = PL_HashTableAdd(ndn_cache_hashtable, new_node->key, (void *)ht_entry); - if (he == NULL) { - slapi_log_err(SLAPI_LOG_ERR, "ndn_cache_add", "Failed to add new entry to hash(%s)\n", dn); - } else { - ndn_cache->cache_count++; - ndn_cache->cache_size += size; - } - slapi_rwlock_unlock(ndn_cache_lock); -} - -/* - * cache is full, remove the least used dn's. lru_lock/ndn_cache write lock are already taken - */ -static void -ndn_cache_flush(void) -{ - struct ndn_cache_lru *node, *next, *flush_node; - int i; - - node = ndn_cache->tail; - for (i = 0; node && i < NDN_FLUSH_COUNT && ndn_cache->cache_count > NDN_MIN_COUNT; i++) { - flush_node = node; - /* update the lru */ - next = node->prev; - next->next = NULL; - ndn_cache->tail = next; - node = next; - /* now update the hash */ - ndn_cache->cache_count--; - ndn_cache_delete(flush_node->key); - slapi_ch_free_string(&flush_node->key); - slapi_ch_free((void **)&flush_node); - } - - slapi_log_err(SLAPI_LOG_CACHE, "ndn_cache_flush", "Flushed cache.\n"); -} - -static void -ndn_cache_free(void) -{ - struct ndn_cache_lru *node, *next, *flush_node; - - if (!ndn_cache) { - return; + if (t_cache->head == NULL) { + t_cache->head = new_value; } - - node = ndn_cache->tail; - while (node && ndn_cache->cache_count) { - flush_node = node; - /* update the lru */ - next = node->prev; - if (next) { - next->next = NULL; - } - ndn_cache->tail = next; - node = next; - /* now update the hash */ - ndn_cache->cache_count--; - ndn_cache_delete(flush_node->key); - slapi_ch_free_string(&flush_node->key); - slapi_ch_free((void **)&flush_node); + if (t_cache->tail != NULL) { + new_value->next = t_cache->tail; + t_cache->tail->prev = new_value; } -} + t_cache->tail = new_value; -/* this is already "write" locked from ndn_cache_add */ -static void -ndn_cache_delete(char *dn) -{ - struct ndn_hash_val *ht_entry; + /* + * And update the stats. + */ + t_cache->size = t_cache->size + new_value->size; + t_cache->count++; - ht_entry = (struct ndn_hash_val *)PL_HashTableLookupConst(ndn_cache_hashtable, dn); - if (ht_entry) { - ndn_cache->cache_size -= ht_entry->size; - slapi_ch_free_string(&ht_entry->ndn); - slapi_ch_free((void **)&ht_entry); - PL_HashTableRemove(ndn_cache_hashtable, dn); - } } /* stats for monitor */ void -ndn_cache_get_stats(PRUint64 *hits, PRUint64 *tries, size_t *size, size_t *max_size, long *count) -{ - slapi_rwlock_rdlock(ndn_cache_lock); - *hits = slapi_counter_get_value(ndn_cache->cache_hits); - *tries = slapi_counter_get_value(ndn_cache->cache_tries); - *size = ndn_cache->cache_size; - *max_size = ndn_cache->cache_max_size; - *count = ndn_cache->cache_count; - slapi_rwlock_unlock(ndn_cache_lock); +ndn_cache_get_stats(PRUint64 *hits, PRUint64 *tries, size_t *size, size_t *max_size, size_t *thread_size, size_t *evicts, size_t *slots, long *count) +{ + *max_size = t_cache_stats.max_size; + *thread_size = t_cache_stats.thread_max_size; + *slots = t_cache_stats.slots; + *evicts = slapi_counter_get_value(t_cache_stats.cache_evicts); + *hits = slapi_counter_get_value(t_cache_stats.cache_hits); + *tries = slapi_counter_get_value(t_cache_stats.cache_tries); + *size = slapi_counter_get_value(t_cache_stats.cache_size); + *count = slapi_counter_get_value(t_cache_stats.cache_count); } /* Common ancestor sdn is allocated. diff --git a/ldap/servers/slapd/libglobs.c b/ldap/servers/slapd/libglobs.c index 06aa0dc..6a8ab15 100644 --- a/ldap/servers/slapd/libglobs.c +++ b/ldap/servers/slapd/libglobs.c @@ -1936,9 +1936,7 @@ config_set_ndn_cache_max_size(const char *attrname, char *value, char *errorbuf, size = NDN_DEFAULT_SIZE; } if (apply) { - CFG_LOCK_WRITE(slapdFrontendConfig); - slapdFrontendConfig->ndn_cache_max_size = size; - CFG_UNLOCK_WRITE(slapdFrontendConfig); + __atomic_store_8(&(slapdFrontendConfig->ndn_cache_max_size), size, __ATOMIC_RELEASE); } return retVal; @@ -3894,7 +3892,7 @@ int config_set_threadnumber(const char *attrname, char *value, char *errorbuf, int apply) { int retVal = LDAP_SUCCESS; - long threadnum = 0; + int32_t threadnum = 0; char *endp = NULL; slapdFrontendConfig_t *slapdFrontendConfig = getFrontendConfig(); @@ -3917,10 +3915,7 @@ config_set_threadnumber(const char *attrname, char *value, char *errorbuf, int a retVal = LDAP_OPERATIONS_ERROR; } if (apply) { - CFG_LOCK_WRITE(slapdFrontendConfig); - /* max_threads = threadnum; */ - slapdFrontendConfig->threadnumber = threadnum; - CFG_UNLOCK_WRITE(slapdFrontendConfig); + __atomic_store_4(&(slapdFrontendConfig->threadnumber), threadnum, __ATOMIC_RELAXED); } return retVal; } @@ -5401,22 +5396,20 @@ config_get_encryptionalias(void) return retVal; } -long +int32_t config_get_threadnumber(void) { slapdFrontendConfig_t *slapdFrontendConfig = getFrontendConfig(); - long retVal; + int32_t retVal; - CFG_LOCK_READ(slapdFrontendConfig); - retVal = slapdFrontendConfig->threadnumber; - CFG_UNLOCK_READ(slapdFrontendConfig); + retVal = __atomic_load_4(&(slapdFrontendConfig->threadnumber), __ATOMIC_RELAXED); - if (retVal == -1) { + if (retVal <= 0) { retVal = util_get_hardware_threads(); } /* We *still* can't detect hardware threads. Okay, return 30 :( */ - if (retVal == -1) { + if (retVal <= 0) { retVal = 30; } @@ -6072,16 +6065,12 @@ config_get_max_filter_nest_level() return __atomic_load_4(&(slapdFrontendConfig->max_filter_nest_level), __ATOMIC_ACQUIRE); } -size_t +uint64_t config_get_ndn_cache_size() { slapdFrontendConfig_t *slapdFrontendConfig = getFrontendConfig(); - size_t retVal; - CFG_LOCK_READ(slapdFrontendConfig); - retVal = slapdFrontendConfig->ndn_cache_max_size; - CFG_UNLOCK_READ(slapdFrontendConfig); - return retVal; + return __atomic_load_8(&(slapdFrontendConfig->ndn_cache_max_size), __ATOMIC_ACQUIRE); } int32_t diff --git a/ldap/servers/slapd/main.c b/ldap/servers/slapd/main.c index 68f7751..520e1ab 100644 --- a/ldap/servers/slapd/main.c +++ b/ldap/servers/slapd/main.c @@ -1011,7 +1011,11 @@ main(int argc, char **argv) } /* initialize the normalized DN cache */ - ndn_cache_init(); + if (ndn_cache_init() != 0) { + slapi_log_err(SLAPI_LOG_EMERG, "main", "Unable to create ndn cache\n"); + return_value = 1; + goto cleanup; + } global_backend_lock_init(); @@ -2159,6 +2163,8 @@ slapd_exemode_ldif2db(struct main_config *mcfg) } slapi_pblock_destroy(pb); slapi_ch_free((void **)&(mcfg->myname)); + charray_free(mcfg->cmd_line_instance_names); + charray_free(mcfg->db2ldif_include); charray_free(mcfg->db2index_attrs); charray_free(mcfg->ldif_file); return (return_value); @@ -2340,6 +2346,8 @@ slapd_exemode_db2ldif(int argc, char **argv, struct main_config *mcfg) } } slapi_ch_free((void **)&(mcfg->myname)); + charray_free(mcfg->cmd_line_instance_names); + charray_free(mcfg->db2ldif_include); if (mcfg->db2ldif_dump_replica) { eq_stop(); /* event queue should be shutdown before closing all plugins (especailly, replication plugin) */ @@ -2511,6 +2519,7 @@ slapd_exemode_db2archive(struct main_config *mcfg) int32_t task_flags = SLAPI_TASK_RUNNING_FROM_COMMANDLINE; slapi_pblock_set(pb, SLAPI_TASK_FLAGS, &task_flags); return_value = (backend_plugin->plg_db2archive)(pb); + slapi_ch_free((void **)&(mcfg->myname)); slapi_pblock_destroy(pb); return return_value; } @@ -2558,6 +2567,7 @@ slapd_exemode_archive2db(struct main_config *mcfg) slapi_pblock_set(pb, SLAPI_TASK_FLAGS, &task_flags); slapi_pblock_set(pb, SLAPI_BACKEND_INSTANCE_NAME, mcfg->cmd_line_instance_name); return_value = (backend_plugin->plg_archive2db)(pb); + slapi_ch_free((void **)&(mcfg->myname)); slapi_pblock_destroy(pb); return return_value; } diff --git a/ldap/servers/slapd/proto-slap.h b/ldap/servers/slapd/proto-slap.h index daadfe5..83e4dce 100644 --- a/ldap/servers/slapd/proto-slap.h +++ b/ldap/servers/slapd/proto-slap.h @@ -462,7 +462,7 @@ char *config_get_rootpwstoragescheme(void); char *config_get_localuser(void); char *config_get_workingdir(void); char *config_get_encryptionalias(void); -long config_get_threadnumber(void); +int32_t config_get_threadnumber(void); int config_get_maxthreadsperconn(void); int config_get_maxdescriptors(void); int config_get_reservedescriptors(void); @@ -533,7 +533,7 @@ PRInt64 config_get_disk_threshold(void); int config_get_disk_grace_period(void); int config_get_disk_logging_critical(void); int config_get_ndn_cache_count(void); -size_t config_get_ndn_cache_size(void); +uint64_t config_get_ndn_cache_size(void); int config_get_ndn_cache_enabled(void); int config_get_return_orig_type_switch(void); char *config_get_allowed_sasl_mechs(void); diff --git a/ldap/servers/slapd/slap.h b/ldap/servers/slapd/slap.h index 841dc92..4e3cbd3 100644 --- a/ldap/servers/slapd/slap.h +++ b/ldap/servers/slapd/slap.h @@ -2261,7 +2261,7 @@ typedef struct _slapdFrontendConfig char *SNMPorganization; char *SNMPlocation; char *SNMPcontact; - long threadnumber; + int32_t threadnumber; int timelimit; char *accesslog; struct berval **defaultreferral; @@ -2433,7 +2433,7 @@ typedef struct _slapdFrontendConfig /* normalized dn cache */ slapi_onoff_t ndn_cache_enabled; - size_t ndn_cache_max_size; + uint64_t ndn_cache_max_size; slapi_onoff_t return_orig_type; /* if on, search returns original type set in attr list */ slapi_onoff_t sasl_mapping_fallback; diff --git a/ldap/servers/slapd/slapi-private.h b/ldap/servers/slapd/slapi-private.h index 87d9367..5a08259 100644 --- a/ldap/servers/slapd/slapi-private.h +++ b/ldap/servers/slapd/slapi-private.h @@ -373,10 +373,10 @@ Slapi_DN *slapi_sdn_init_normdn_ndn_passin(Slapi_DN *sdn, const char *dn); Slapi_DN *slapi_sdn_init_normdn_passin(Slapi_DN *sdn, const char *dn); char *slapi_dn_normalize_original(char *dn); char *slapi_dn_normalize_case_original(char *dn); -void ndn_cache_init(void); +int32_t ndn_cache_init(void); void ndn_cache_destroy(void); int ndn_cache_started(void); -void ndn_cache_get_stats(PRUint64 *hits, PRUint64 *tries, size_t *size, size_t *max_size, long *count); +void ndn_cache_get_stats(PRUint64 *hits, PRUint64 *tries, size_t *size, size_t *max_size, size_t *thread_size, size_t *evicts, size_t *slots, long *count); #define NDN_DEFAULT_SIZE 20971520 /* 20mb - size of normalized dn cache */ /* filter.c */ diff --git a/test/libslapd/operation/v3_compat.c b/test/libslapd/operation/v3_compat.c index be70c25..0b47062 100644 --- a/test/libslapd/operation/v3_compat.c +++ b/test/libslapd/operation/v3_compat.c @@ -24,6 +24,8 @@ void test_libslapd_operation_v3c_target_spec(void **state __attribute__((unused))) { + /* Need to start the ndn cache ... */ + ndn_cache_init(); /* Will we need to test PB / op interactions? */ /* Test the operation of the target spec is maintained. */ Slapi_Operation *op = slapi_operation_new(SLAPI_OP_FLAG_INTERNAL); @@ -53,4 +55,6 @@ test_libslapd_operation_v3c_target_spec(void **state __attribute__((unused))) /* target_spec in now the b_sdn, so operation free will free it */ // slapi_sdn_free(&b_sdn); operation_free(&op, NULL); + /* Close ndn cache */ + ndn_cache_destroy(); } diff --git a/test/libslapd/pblock/v3_compat.c b/test/libslapd/pblock/v3_compat.c index 22a99f3..25bf728 100644 --- a/test/libslapd/pblock/v3_compat.c +++ b/test/libslapd/pblock/v3_compat.c @@ -9,6 +9,9 @@ #include "../../test_slapd.h" #include +/* We need this for ndn init */ +#include + /* * Assert that the compatability requirements of the plugin V3 pblock API * are upheld. @@ -21,6 +24,7 @@ void test_libslapd_pblock_v3c_target_dn(void **state __attribute__((unused))) { + ndn_cache_init(); /* Create a pblock */ Slapi_PBlock *pb = slapi_pblock_new(); Slapi_Operation *op = slapi_operation_new(SLAPI_OP_FLAG_INTERNAL); @@ -68,12 +72,14 @@ test_libslapd_pblock_v3c_target_dn(void **state __attribute__((unused))) /* It works! */ slapi_pblock_destroy(pb); + ndn_cache_destroy(); } void test_libslapd_pblock_v3c_target_sdn(void **state __attribute__((unused))) { + ndn_cache_init(); /* SLAPI_TARGET_SDN */ Slapi_PBlock *pb = slapi_pblock_new(); Slapi_Operation *op = slapi_operation_new(SLAPI_OP_FLAG_INTERNAL); @@ -137,6 +143,7 @@ test_libslapd_pblock_v3c_target_sdn(void **state __attribute__((unused))) /* It works! */ slapi_pblock_destroy(pb); + ndn_cache_destroy(); } /* nf here means "no implicit free". For now implies no dup */ @@ -177,6 +184,7 @@ _test_libslapi_pblock_v3c_generic_nf_char(Slapi_PBlock *pb, int type, int *confl void test_libslapd_pblock_v3c_original_target_dn(void **state __attribute__((unused))) { + ndn_cache_init(); /* SLAPI_ORIGINAL_TARGET_DN */ Slapi_PBlock *pb = slapi_pblock_new(); Slapi_Operation *op = slapi_operation_new(SLAPI_OP_FLAG_INTERNAL); @@ -190,6 +198,7 @@ test_libslapd_pblock_v3c_original_target_dn(void **state __attribute__((unused)) /* It works! */ slapi_pblock_destroy(pb); + ndn_cache_destroy(); } void -- 1.8.3.1