From b2a771f96f6344841a78015a591418e5ab6ef608 Mon Sep 17 00:00:00 2001 From: John Dennis Date: Thu, 16 Apr 2009 17:48:13 -0400 Subject: add dynamic hash table data structure implementation Apply suggested fixes by Simo after code review * return statements no longer use () unless it's an expression * remove all use of assert() in library * use bool,true,false instead of int,TRUE,FALSE * add check for NULL hash table in public entry points * example code in header file now a seperate file * assure consistent use of unsigned long data type * add more debugging support * break out generation of integer key into convert_key() function * table parameters now tunable rather than hardcoded * table can now accept custom alloc()/free() functions * add function create_table_ex() to pass extra table parameters * remove MUL(), DIV(), MOD() macros * hash statistics now separate struct which can be queried * test program now accepts tuning parameters, iteration count; has better error checking and reporting fix min/max load factor comman line args in test program --- common/Makefile.am | 2 +- common/configure.ac | 6 +- common/dhash/Makefile.am | 15 + common/dhash/dhash.c | 923 +++++++++++++++++++++++++++++++++++++++++++ common/dhash/dhash.h | 342 ++++++++++++++++ common/dhash/dhash.pc.in | 11 + common/dhash/dhash_example.c | 117 ++++++ common/dhash/dhash_test.c | 489 +++++++++++++++++++++++ 8 files changed, 1903 insertions(+), 2 deletions(-) create mode 100644 common/dhash/Makefile.am create mode 100644 common/dhash/dhash.c create mode 100644 common/dhash/dhash.h create mode 100644 common/dhash/dhash.pc.in create mode 100644 common/dhash/dhash_example.c create mode 100644 common/dhash/dhash_test.c (limited to 'common') diff --git a/common/Makefile.am b/common/Makefile.am index ea23cb953..94ccd27b4 100644 --- a/common/Makefile.am +++ b/common/Makefile.am @@ -1 +1 @@ -SUBDIRS = collection ini trace +SUBDIRS = collection ini trace dhash diff --git a/common/configure.ac b/common/configure.ac index 8804b4d74..120d1034f 100644 --- a/common/configure.ac +++ b/common/configure.ac @@ -17,7 +17,11 @@ AC_ARG_ENABLE([trace], AS_IF([test ["$trace_level" -gt "0"] -a ["$trace_level" -lt "8"] ],[AC_SUBST([TRACE_VAR],["-DTRACE_LEVEL=$trace_level"])]) -AC_CONFIG_FILES([Makefile collection/Makefile collection/collection.pc ini/Makefile ini/ini_config.pc trace/Makefile]) +AC_CONFIG_FILES([Makefile + collection/Makefile collection/collection.pc + dhash/Makefile dhash/dhash.pc + ini/Makefile ini/ini_config.pc + trace/Makefile]) AC_OUTPUT diff --git a/common/dhash/Makefile.am b/common/dhash/Makefile.am new file mode 100644 index 000000000..d2b92bce2 --- /dev/null +++ b/common/dhash/Makefile.am @@ -0,0 +1,15 @@ +AM_CPPFLAGS = -Wall + +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = dhash.pc + +lib_LTLIBRARIES = libdhash.la +libdhash_la_SOURCES = dhash.c +pkginclude_HEADERS = dhash.h + +check_PROGRAMS = dhash_test dhash_example +dhash_test_LDADD = dhash.o +dhash_example_LDADD = dhash.o + +examplesdir = $(docdir)/examples +dist_examples_DATA = dhash_test.c dhash_example.c diff --git a/common/dhash/dhash.c b/common/dhash/dhash.c new file mode 100644 index 000000000..92283a23f --- /dev/null +++ b/common/dhash/dhash.c @@ -0,0 +1,923 @@ +/*****************************************************************************/ +/******************************** Documentation ******************************/ +/*****************************************************************************/ + +/* + * See documentation in corresponding header file dhash.h. + * + * Compilation controls: + * DEBUG controls some informative traces, mainly for debugging. + * HASH_STATISTICS causes hash_accesses and hash_collisions to be maintained; + * when combined with DEBUG, these are displayed by hash_destroy(). + * + */ + +/*****************************************************************************/ +/******************************* Include Files *******************************/ +/*****************************************************************************/ + +#include +#include +#include +#include +#include "dhash.h" + +/*****************************************************************************/ +/****************************** Internal Defines *****************************/ +/*****************************************************************************/ + +#define PRIME_1 37 +#define PRIME_2 1048583 + + /* + * Fast arithmetic, relying on powers of 2, and on pre-processor + * concatenation property + */ + +/*****************************************************************************/ +/************************** Internal Type Definitions ************************/ +/*****************************************************************************/ + +typedef struct element_t { + hash_entry_t entry; + struct element_t *next; +} element_t, *segment_t; + + +struct hash_table_str { + unsigned long p; /* Next bucket to be split */ + unsigned long maxp; /* upper bound on p during expansion */ + unsigned long entry_count; /* current # entries */ + unsigned long bucket_count; /* current # buckets */ + unsigned long segment_count; /* current # segments */ + unsigned long min_load_factor; + unsigned long max_load_factor; + unsigned long directory_size; + unsigned int directory_size_shift; + unsigned long segment_size; + unsigned int segment_size_shift; + hash_delete_callback delete_callback; + hash_alloc_func alloc; + hash_free_func free; + segment_t **directory; +#ifdef HASH_STATISTICS + hash_statistics_t statistics; +#endif + +}; + +typedef unsigned long address_t; + +typedef struct hash_keys_callback_data_t { + unsigned long index; + hash_key_t *keys; +} hash_keys_callback_data_t; + +typedef struct hash_values_callback_data_t { + unsigned long index; + hash_value_t *values; +} hash_values_callback_data_t; + +struct _hash_iter_context_t { + struct hash_iter_context_t iter; + hash_table_t *table; + unsigned long i, j; + segment_t *s; + element_t *p; +}; + +/*****************************************************************************/ +/********************** External Function Declarations *********************/ +/*****************************************************************************/ + +/*****************************************************************************/ +/********************** Internal Function Declarations *********************/ +/*****************************************************************************/ + +static address_t convert_key(hash_key_t *key); +static address_t hash(hash_table_t *table, hash_key_t *key); +static bool key_equal(hash_key_t *a, hash_key_t *b); +static int contract_table(hash_table_t *table); +static int expand_table(hash_table_t *table); +static hash_entry_t *hash_iter_next(struct hash_iter_context_t *iter); + +/*****************************************************************************/ +/************************* External Global Variables ***********************/ +/*****************************************************************************/ + +/*****************************************************************************/ +/************************* Internal Global Variables ***********************/ +/*****************************************************************************/ + +#if DEBUG +int debug_level = 1; +#endif + +/*****************************************************************************/ +/*************************** Internal Functions ****************************/ +/*****************************************************************************/ + +static address_t convert_key(hash_key_t *key) +{ + address_t h; + unsigned char *k; + + switch(key->type) { + case HASH_KEY_ULONG: + h = key->ul; + break; + case HASH_KEY_STRING: + /* Convert string to integer */ + for (h = 0, k = (unsigned char *) key->str; *k; k++) + h = h * PRIME_1 ^ (*k - ' '); + break; + default: + h = key->ul; + break; + } + return h; +} + +static address_t hash(hash_table_t *table, hash_key_t *key) +{ + address_t h, address; + + h = convert_key(key); + h %= PRIME_2; + address = h & (table->maxp-1); /* h % maxp */ + if (address < table->p) + address = h & ((table->maxp << 1)-1); /* h % (2*table->maxp) */ + + return address; +} + +static bool is_valid_key_type(hash_key_enum key_type) +{ + switch(key_type) { + case HASH_KEY_ULONG: + case HASH_KEY_STRING: + return true; + default: + return false; + } +} + +static bool is_valid_value_type(hash_value_enum value_type) +{ + switch(value_type) { + case HASH_VALUE_UNDEF: + case HASH_VALUE_PTR: + case HASH_VALUE_INT: + case HASH_VALUE_UINT: + case HASH_VALUE_LONG: + case HASH_VALUE_ULONG: + case HASH_VALUE_FLOAT: + case HASH_VALUE_DOUBLE: + return true; + default: + return false; + } +} + +static bool key_equal(hash_key_t *a, hash_key_t *b) +{ + if (a->type != b->type) return false; + + switch(a->type) { + case HASH_KEY_ULONG: + return (a->ul == b->ul); + case HASH_KEY_STRING: + return (strcmp(a->str, b->str) == 0); + } + return false; +} + + +static int expand_table(hash_table_t *table) +{ + address_t new_address; + unsigned long old_segment_index, new_segment_index; + unsigned long old_segment_dir, new_segment_dir; + segment_t *old_segment, *new_segment; + element_t *current, **previous, **last_of_new; + + if (table->bucket_count < (table->directory_size << table->segment_size_shift)) { +#ifdef DEBUG + if (debug_level >= 1) + fprintf(stderr, "expand_table on entry: bucket_count=%lu, segment_count=%lu p=%lu maxp=%lu\n", + table->bucket_count, table->segment_count, table->p, table->maxp); +#endif +#ifdef HASH_STATISTICS + table->statistics.table_expansions++; +#endif + + /* + * Locate the bucket to be split + */ + old_segment_dir = table->p >> table->segment_size_shift; + old_segment = table->directory[old_segment_dir]; + old_segment_index = table->p & (table->segment_size-1); /* p % segment_size */ + /* + * Expand address space; if necessary create a new segment + */ + new_address = table->maxp + table->p; + new_segment_dir = new_address >> table->segment_size_shift; + new_segment_index = new_address & (table->segment_size-1); /* new_address % segment_size */ + if (new_segment_index == 0) { + if ((table->directory[new_segment_dir] = (segment_t *) table->alloc(table->segment_size * sizeof(segment_t))) == NULL) { + return HASH_ERROR_NO_MEMORY; + } + memset(table->directory[new_segment_dir], 0, table->segment_size * sizeof(segment_t)); + table->segment_count++; + } + new_segment = table->directory[new_segment_dir]; + /* + * Adjust state variables + */ + table->p++; + if (table->p == table->maxp) { + table->maxp <<= 1; /* table->maxp *= 2 */ + table->p = 0; + } + table->bucket_count++; + /* + * Relocate records to the new bucket + */ + previous = &old_segment[old_segment_index]; + current = *previous; + last_of_new = &new_segment[new_segment_index]; + *last_of_new = NULL; + while (current != NULL) { + if (hash(table, ¤t->entry.key) == new_address) { + /* + * Attach it to the end of the new chain + */ + *last_of_new = current; + /* + * Remove it from old chain + */ + *previous = current->next; + last_of_new = ¤t->next; + current = current->next; + *last_of_new = NULL; + } else { + /* + * leave it on the old chain + */ + previous = ¤t->next; + current = current->next; + } + } +#ifdef DEBUG + if (debug_level >= 1) + fprintf(stderr, "expand_table on exit: bucket_count=%lu, segment_count=%lu p=%lu maxp=%lu\n", + table->bucket_count, table->segment_count, table->p, table->maxp); +#endif + } + return HASH_SUCCESS; +} + +static int contract_table(hash_table_t *table) +{ + address_t new_address, old_address; + unsigned long old_segment_index, new_segment_index; + unsigned long old_segment_dir, new_segment_dir; + segment_t *old_segment, *new_segment; + element_t *current; + + if (table->bucket_count > table->segment_size) { +#ifdef DEBUG + if (debug_level >= 1) + fprintf(stderr, "contract_table on entry: bucket_count=%lu, segment_count=%lu p=%lu maxp=%lu\n", + table->bucket_count, table->segment_count, table->p, table->maxp); +#endif + +#ifdef HASH_STATISTICS + table->statistics.table_contractions++; +#endif + /* + * Locate the bucket to be merged with the last bucket + */ + old_address = table->bucket_count - 1; + old_segment_dir = old_address >> table->segment_size_shift; + old_segment = table->directory[old_segment_dir]; + old_segment_index = old_address & (table->segment_size-1); /* old_address % segment_size */ + + /* + * Adjust state variables + */ + if (table->p > 0) { + table->p--; + } else { + table->maxp >>= 1; + table->p = table->maxp - 1; + } + table->bucket_count--; + + /* + * Find the last bucket to merge back + */ + if((current = old_segment[old_segment_index]) != NULL) { + new_address = hash(table, &old_segment[old_segment_index]->entry.key); + new_segment_dir = new_address >> table->segment_size_shift; + new_segment_index = new_address & (table->segment_size-1); /* new_address % segment_size */ + new_segment = table->directory[new_segment_dir]; + + /* + * Relocate records to the new bucket by splicing the two chains + * together by inserting the old chain at the head of the new chain. + * First find the end of the old chain, then set its next pointer to + * point to the head of the new chain, set the head of the new chain to + * point to the old chain, then finally set the head of the old chain to + * NULL. + */ + + while (current->next != NULL) { + current = current->next; + } + + current->next = new_segment[new_segment_index]; + new_segment[new_segment_index] = old_segment[old_segment_index]; + old_segment[old_segment_index] = NULL; + } + /* + * If we have removed the last of the chains in this segment then free the + * segment since its no longer in use. + */ + if (old_segment_index == 0) { + table->segment_count--; + table->free(table->directory[old_segment_dir]); + } + +#ifdef DEBUG + if (debug_level >= 1) + fprintf(stderr, "contract_table on exit: bucket_count=%lu, segment_count=%lu p=%lu maxp=%lu\n", + table->bucket_count, table->segment_count, table->p, table->maxp); +#endif + + } + return HASH_SUCCESS; +} + +static int lookup(hash_table_t *table, hash_key_t *key, element_t **element_arg, segment_t **chain_arg) +{ + address_t h; + segment_t *current_segment; + unsigned long segment_index, segment_dir; + segment_t *chain, element; + + *element_arg = NULL; + *chain_arg = NULL; + + if (!table) return HASH_ERROR_BAD_TABLE; + +#ifdef HASH_STATISTICS + table->statistics.hash_accesses++; +#endif + h = hash(table, key); + segment_dir = h >> table->segment_size_shift; + segment_index = h & (table->segment_size-1); /* h % segment_size */ + /* + * valid segment ensured by hash() + */ + current_segment = table->directory[segment_dir]; + +#ifdef DEBUG + if (debug_level >= 2) + fprintf(stderr, "lookup: h=%lu, segment_dir=%lu, segment_index=%lu current_segment=%p\n", + h, segment_dir, segment_index, current_segment); +#endif + + if (current_segment == NULL) return EFAULT; + chain = ¤t_segment[segment_index]; + element = *chain; + /* + * Follow collision chain + */ + while (element != NULL && !key_equal(&element->entry.key, key)) { + chain = &element->next; + element = *chain; +#ifdef HASH_STATISTICS + table->statistics.hash_collisions++; +#endif + } + *element_arg = element; + *chain_arg = chain; + + return HASH_SUCCESS; +} + +static bool hash_keys_callback(hash_entry_t *item, void *user_data) +{ + hash_keys_callback_data_t *data = (hash_keys_callback_data_t *)user_data; + + data->keys[data->index++] = item->key; + return true; +} + +static bool hash_values_callback(hash_entry_t *item, void *user_data) +{ + hash_values_callback_data_t *data = (hash_values_callback_data_t *)user_data; + + data->values[data->index++] = item->value; + return true; +} + +/*****************************************************************************/ +/**************************** Exported Functions ***************************/ +/*****************************************************************************/ + +const char* hash_error_string(int error) +{ + switch(error) { + case HASH_SUCCESS: return "Success"; + case HASH_ERROR_BAD_KEY_TYPE: return "Bad key type"; + case HASH_ERROR_BAD_VALUE_TYPE: return "Bad value type"; + case HASH_ERROR_NO_MEMORY: return "No memory"; + case HASH_ERROR_KEY_NOT_FOUND: return "Key not found"; + case HASH_ERROR_BAD_TABLE: return "Bad table"; + } + return NULL; +} + + +int hash_create(unsigned long count, hash_table_t **tbl, hash_delete_callback delete_callback) +{ + return hash_create_ex(count, tbl, 0, 0, 0, 0, NULL, NULL, delete_callback); +} + +int hash_create_ex(unsigned long count, hash_table_t **tbl, + unsigned int directory_bits, unsigned int segment_bits, + unsigned long min_load_factor, unsigned long max_load_factor, + hash_alloc_func alloc_func, + hash_free_func free_func, + hash_delete_callback delete_callback) +{ + unsigned long i; + unsigned int n_addr_bits; + address_t addr; + hash_table_t *table = NULL; + + if (alloc_func == NULL) alloc_func = malloc; + if (free_func == NULL) free_func = free; + + /* Compute directory and segment parameters */ + if (directory_bits == 0) directory_bits = HASH_DEFAULT_DIRECTORY_BITS; + if (segment_bits == 0) segment_bits = HASH_DEFAULT_SEGMENT_BITS; + + for (addr = ~0, n_addr_bits = 0; addr; addr >>= 1, n_addr_bits++); + + if (directory_bits + segment_bits > n_addr_bits) return EINVAL; + + if ((table = (hash_table_t *) alloc_func(sizeof(hash_table_t))) == NULL) { + return HASH_ERROR_NO_MEMORY; + } + memset(table, 0, sizeof(hash_table_t)); + table->alloc = alloc_func; + table->free = free_func; + + table->directory_size_shift = directory_bits; + for (i = 0, table->directory_size = 1; i < table->directory_size_shift; i++, table->directory_size <<= 1); + + table->segment_size_shift = segment_bits; + for (i = 0, table->segment_size = 1; i < table->segment_size_shift; i++, table->segment_size <<= 1); + + + /* Allocate directory */ + if ((table->directory = (segment_t **) table->alloc(table->directory_size * sizeof(segment_t *))) == NULL) { + return HASH_ERROR_NO_MEMORY; + } + memset(table->directory, 0, table->directory_size * sizeof(segment_t *)); + + /* + * Adjust count to be nearest higher power of 2, minimum SEGMENT_SIZE, then + * convert into segments. + */ + i = table->segment_size; + while (i < count) + i <<= 1; + count = i >> table->segment_size_shift; + + table->segment_count = 0; + table->p = 0; + table->entry_count = 0; + table->delete_callback = delete_callback; + + /* + * Allocate initial 'i' segments of buckets + */ + for (i = 0; i < count; i++) { + if ((table->directory[i] = (segment_t *) table->alloc(table->segment_size * sizeof(segment_t))) == NULL) { + hash_destroy(table); + return HASH_ERROR_NO_MEMORY; + } + memset(table->directory[i], 0, table->segment_size * sizeof(segment_t)); + table->segment_count++; + } + table->bucket_count = table->segment_count << table->segment_size_shift; + table->maxp = table->bucket_count; + table->min_load_factor = min_load_factor == 0 ? HASH_DEFAULT_MIN_LOAD_FACTOR : min_load_factor; + table->max_load_factor = max_load_factor == 0 ? HASH_DEFAULT_MAX_LOAD_FACTOR : max_load_factor; + +#if DEBUG + if (debug_level >= 1) + fprintf(stderr, "hash_create_ex: table=%p count=%lu maxp=%lu segment_count=%lu\n", + table, count, table->maxp, table->segment_count); +#endif +#ifdef HASH_STATISTICS + memset(&table->statistics, 0, sizeof(table->statistics)); +#endif + + *tbl = table; + return HASH_SUCCESS; +} + +#ifdef HASH_STATISTICS +int hash_get_statistics(hash_table_t *table, hash_statistics_t *statistics) +{ + if (!table) return HASH_ERROR_BAD_TABLE; + if (!statistics) return EINVAL; + + *statistics = table->statistics; + + return HASH_SUCCESS; +} +#endif + +int hash_destroy(hash_table_t *table) +{ + unsigned long i, j; + segment_t *s; + element_t *p, *q; + + if (!table) return HASH_ERROR_BAD_TABLE; + + if (table != NULL) { + for (i = 0; i < table->segment_count; i++) { + /* test probably unnecessary */ + if ((s = table->directory[i]) != NULL) { + for (j = 0; j < table->segment_size; j++) { + p = s[j]; + while (p != NULL) { + q = p->next; + if (table->delete_callback) table->delete_callback(&p->entry); + if (p->entry.key.type == HASH_KEY_STRING) table->free ((char *)p->entry.key.str); + table->free((char *) p); + p = q; + } + } + table->free(s); + } + } + table->free(table->directory); + table->free(table); + table = NULL; + } + return HASH_SUCCESS; +} + +int hash_iterate(hash_table_t *table, hash_iterate_callback callback, void *user_data) +{ + unsigned long i, j; + segment_t *s; + element_t *p; + + if (!table) return HASH_ERROR_BAD_TABLE; + + if (table != NULL) { + for (i = 0; i < table->segment_count; i++) { + /* test probably unnecessary */ + if ((s = table->directory[i]) != NULL) { + for (j = 0; j < table->segment_size; j++) { + p = s[j]; + while (p != NULL) { + if(!(*callback)(&p->entry, user_data)) return HASH_SUCCESS; + p = p->next; + } + } + } + } + } + return HASH_SUCCESS; +} + +static hash_entry_t *hash_iter_next(struct hash_iter_context_t *iter_arg) +{ + struct _hash_iter_context_t *iter = (struct _hash_iter_context_t *) iter_arg; + hash_entry_t *entry; + + if (iter->table == NULL) return NULL; + goto state_3a; + + state_1: + iter->i++; + if(iter->i >= iter->table->segment_count) return NULL; + /* test probably unnecessary */ + iter->s = iter->table->directory[iter->i]; + if (iter->s == NULL) goto state_1; + iter->j = 0; + state_2: + if (iter->j >= iter->table->segment_size) goto state_1; + iter->p = iter->s[iter->j]; + state_3a: + if (iter->p == NULL) goto state_3b; + entry = &iter->p->entry; + iter->p = iter->p->next; + return entry; + state_3b: + iter->j++; + goto state_2; + + /* Should never reach here */ + fprintf(stderr, "ERROR hash_iter_next reached invalid state\n"); + return NULL; +} + +struct hash_iter_context_t *new_hash_iter_context(hash_table_t *table) +{ + struct _hash_iter_context_t *iter; + + if (!table) return NULL;; + + if ((iter = table->alloc(sizeof(struct _hash_iter_context_t))) == NULL) { + return NULL; + } + + + iter->iter.next = (hash_iter_next_t) hash_iter_next; + + iter->table = table; + iter->i = 0; + iter->j = 0; + iter->s = table->directory[iter->i]; + iter->p = iter->s[iter->j]; + + return (struct hash_iter_context_t *)iter; +} + +unsigned long hash_count(hash_table_t *table) +{ + return table->entry_count; +} + + +int hash_keys(hash_table_t *table, unsigned long *count_arg, hash_key_t **keys_arg) +{ + unsigned long count = table->entry_count; + hash_key_t *keys; + hash_keys_callback_data_t data; + + if (!table) return HASH_ERROR_BAD_TABLE; + + if (count == 0) { + *count_arg = 0; + *keys_arg = NULL; + return HASH_SUCCESS; + } + + if ((keys = table->alloc(sizeof(hash_key_t) * count)) == NULL) { + *count_arg = -1; + *keys_arg = NULL; + return HASH_ERROR_NO_MEMORY; + } + + data.index = 0; + data.keys = keys; + + hash_iterate(table, hash_keys_callback, &data); + + *count_arg = count; + *keys_arg = keys; + return HASH_SUCCESS; +} + +int hash_values(hash_table_t *table, unsigned long *count_arg, hash_value_t **values_arg) +{ + unsigned long count = table->entry_count; + hash_value_t *values; + hash_values_callback_data_t data; + + if (!table) return HASH_ERROR_BAD_TABLE; + + if (count == 0) { + *count_arg = 0; + *values_arg = NULL; + return HASH_SUCCESS; + } + + if ((values = table->alloc(sizeof(hash_value_t) * count)) == NULL) { + *count_arg = -1; + *values_arg = NULL; + return HASH_ERROR_NO_MEMORY; + } + + data.index = 0; + data.values = values; + + hash_iterate(table, hash_values_callback, &data); + + *count_arg = count; + *values_arg = values; + return HASH_SUCCESS; +} + +typedef struct hash_entries_callback_data_t { + unsigned long index; + hash_entry_t *entries; +} hash_entries_callback_data_t; + +static bool hash_entries_callback(hash_entry_t *item, void *user_data) +{ + hash_entries_callback_data_t *data = (hash_entries_callback_data_t *)user_data; + + data->entries[data->index++] = *item; + return true; +} + +int hash_entries(hash_table_t *table, unsigned long *count_arg, hash_entry_t **entries_arg) +{ + unsigned long count = table->entry_count; + hash_entry_t *entries; + hash_entries_callback_data_t data; + + if (!table) return HASH_ERROR_BAD_TABLE; + + if (count == 0) { + *count_arg = 0; + *entries_arg = NULL; + return HASH_SUCCESS; + } + + if ((entries = table->alloc(sizeof(hash_entry_t) * count)) == NULL) { + *count_arg = -1; + *entries_arg = NULL; + return HASH_ERROR_NO_MEMORY; + } + + data.index = 0; + data.entries = entries; + + hash_iterate(table, hash_entries_callback, &data); + + *count_arg = count; + *entries_arg = entries; + return HASH_SUCCESS; +} + +bool hash_has_key(hash_table_t *table, hash_key_t *key) +{ + hash_value_t value; + + if (hash_lookup(table, key, &value) == HASH_SUCCESS) + return true; + else + return false; +} + + +int hash_get_default(hash_table_t *table, hash_key_t *key, hash_value_t *value, hash_value_t *default_value) +{ + int error; + + if (!table) return HASH_ERROR_BAD_TABLE; + + if ((error = hash_lookup(table, key, value)) != HASH_SUCCESS) { + if ((error = hash_enter(table, key, default_value)) != HASH_SUCCESS) { + return error; + } + *value = *default_value; + return HASH_SUCCESS; + } + + return HASH_SUCCESS; +} + +int hash_enter(hash_table_t *table, hash_key_t *key, hash_value_t *value) +{ + int error; + segment_t element, *chain; + size_t len; + + if (!table) return HASH_ERROR_BAD_TABLE; + + if (!is_valid_key_type(key->type)) + return HASH_ERROR_BAD_KEY_TYPE; + + if (!is_valid_value_type(value->type)) + return HASH_ERROR_BAD_VALUE_TYPE; + + lookup(table, key, &element, &chain); + + if (element == NULL) { /* not found */ + if ((element = (element_t *) table->alloc(sizeof(element_t))) == NULL) { + /* Allocation failed, return NULL */ + return HASH_ERROR_NO_MEMORY; + } + memset(element, 0, sizeof(element_t)); + /* + * Initialize new element + */ + switch(element->entry.key.type = key->type) { + case HASH_KEY_ULONG: + element->entry.key.ul = key->ul; + break; + case HASH_KEY_STRING: + len = strlen(key->str)+1; + if ((element->entry.key.str = table->alloc(len)) == NULL) { + table->free(element); + return HASH_ERROR_NO_MEMORY; + } + memcpy((void *)element->entry.key.str, key->str, len); + break; + } + switch(element->entry.value.type = value->type) { + case HASH_VALUE_UNDEF: + element->entry.value.ul = 0; + break; + case HASH_VALUE_PTR: + element->entry.value.ptr = value->ptr; + break; + case HASH_VALUE_INT: + element->entry.value.i = value->i; + break; + case HASH_VALUE_UINT: + element->entry.value.ui = value->ui; + break; + case HASH_VALUE_LONG: + element->entry.value.l = value->l; + break; + case HASH_VALUE_ULONG: + element->entry.value.ul = value->ul; + break; + case HASH_VALUE_FLOAT: + element->entry.value.f = value->f; + break; + case HASH_VALUE_DOUBLE: + element->entry.value.d = value->d; + break; + } + *chain = element; /* link into chain */ + element->next = NULL; + /* + * Table over-full? + */ + if (++table->entry_count / table->bucket_count > table->max_load_factor) { + if ((error = expand_table(table)) != HASH_SUCCESS) { /* doesn't affect element */ + return error; + } + } + } /* end not found */ + return HASH_SUCCESS; +} + +int hash_lookup(hash_table_t *table, hash_key_t *key, hash_value_t *value) +{ + segment_t element, *chain; + + if (!table) return HASH_ERROR_BAD_TABLE; + + if (!is_valid_key_type(key->type)) + return HASH_ERROR_BAD_KEY_TYPE; + + lookup(table, key, &element, &chain); + + if (element) { + *value = element->entry.value; + return HASH_SUCCESS; + } else { + return HASH_ERROR_KEY_NOT_FOUND; + } +} + +int hash_delete(hash_table_t *table, hash_key_t *key) +{ + int error; + segment_t element, *chain; + + if (!table) return HASH_ERROR_BAD_TABLE; + + if (!is_valid_key_type(key->type)) + return HASH_ERROR_BAD_KEY_TYPE; + + lookup(table, key, &element, &chain); + + if (element) { + if (table->delete_callback) table->delete_callback(&element->entry); + *chain = element->next; /* remove from chain */ + /* + * Table too sparse? + */ + if (--table->entry_count / table->bucket_count < table->min_load_factor) { + if ((error = contract_table(table)) != HASH_SUCCESS) { /* doesn't affect element */ + return error; + } + } + if (element->entry.key.type == HASH_KEY_STRING) table->free ((char *)element->entry.key.str); + table->free(element); + return HASH_SUCCESS; + } else { + return HASH_ERROR_KEY_NOT_FOUND; + } +} + + diff --git a/common/dhash/dhash.h b/common/dhash/dhash.h new file mode 100644 index 000000000..4f97f69eb --- /dev/null +++ b/common/dhash/dhash.h @@ -0,0 +1,342 @@ +#ifndef DHASH_H +#define DHASH_H + +/*****************************************************************************/ +/******************************** Documentation ******************************/ +/*****************************************************************************/ + +#if 0 + +Dynamic hash table implementation based on article in CACM April 1988 pp. +446-457, by Per-Ake Larson. + +This implementation was based on a 3/7/1989 submission to comp.sources.misc by +Esmond Pitt (ejp@ausmelb.oz.AU) that emulated the hsearch(3) interface. The interface +was reworked to be much more flexible and significant new functionality was +added by John Dennis (jdennis@sharpeye.com). + +A hash table maintains a set of pairs. Lookups are performed by +locating the key in the table and returning its value. + +A dynamic hash table keeps the number of hash collisions constant +independent of the number of entries in the hash table. + +Both keys and values may be of different types. Two different key types are +supported, strings and unsigned longs. If the key type is a string the hash +library will automatically allocate memory to hold the hash key string and +will automatically free the memory for the key string when the hash entry +is destroyed. Items in the hash table only match when their key types match +AND the keys themselves match. For example if there were two hash entries, +one whose key type was an unsigned long equal to 1 and one whose key type was +a string equal to "1" they would not match, these are considered two +distinct entries. + +The value of the key may be a undefined, pointer, an int, an unsigned int, a +long, an unsigned long, a float, or a double. The hash library does nothing +with user pointers (value.type == HASH_VALUE_PTR). Its the user responsibility +to free items pointed to by these pointers when a hash entry is deleted or the +hash table is destroyed (see hash_delete_callback and/or hash_destroy). + +See dhash_example.c for an illustration of how one might use the API. It does not +represent complete API coverage nor the optimal way to do things in all cases, +it is just a general example. + +#endif + +/*****************************************************************************/ +/******************************* Include Files *******************************/ +/*****************************************************************************/ + +#include + +/*****************************************************************************/ +/*********************************** Defines *********************************/ +/*****************************************************************************/ + +#if 1 +#define HASH_STATISTICS +#endif + +#define HASH_DEFAULT_DIRECTORY_BITS 5 +#define HASH_DEFAULT_SEGMENT_BITS 5 +#define HASH_DEFAULT_MIN_LOAD_FACTOR 1 +#define HASH_DEFAULT_MAX_LOAD_FACTOR 5 + +#define HASH_ERROR_BASE -2000 +#define HASH_ERROR_LIMIT (HASH_ERROR_BASE+20) +#define IS_HASH_ERROR(error) (((error) >= HASH_ERROR_BASE) && ((error) < HASH_ERROR_LIMIT)) + +#define HASH_SUCCESS 0 +#define HASH_ERROR_BAD_KEY_TYPE (HASH_ERROR_BASE + 1) +#define HASH_ERROR_BAD_VALUE_TYPE (HASH_ERROR_BASE + 2) +#define HASH_ERROR_NO_MEMORY (HASH_ERROR_BASE + 3) +#define HASH_ERROR_KEY_NOT_FOUND (HASH_ERROR_BASE + 4) +#define HASH_ERROR_BAD_TABLE (HASH_ERROR_BASE + 5) + +/*****************************************************************************/ +/******************************* Type Definitions ****************************/ +/*****************************************************************************/ + +struct hash_table_str; +typedef struct hash_table_str hash_table_t; + +typedef enum { + HASH_KEY_STRING, + HASH_KEY_ULONG +} hash_key_enum; + +typedef enum +{ + HASH_VALUE_UNDEF, + HASH_VALUE_PTR, + HASH_VALUE_INT, + HASH_VALUE_UINT, + HASH_VALUE_LONG, + HASH_VALUE_ULONG, + HASH_VALUE_FLOAT, + HASH_VALUE_DOUBLE +} hash_value_enum; + +typedef struct hash_key_t { + hash_key_enum type; + union { + const char *str; + unsigned long ul; + }; +} hash_key_t; + +typedef struct hash_value_t { + hash_value_enum type; + union { + void *ptr; + int i; + unsigned int ui; + long l; + unsigned long ul; + float f; + double d; + }; +} hash_value_t; + +typedef struct hash_entry_t { + hash_key_t key; + hash_value_t value; +} hash_entry_t; + +#ifdef HASH_STATISTICS +typedef struct hash_statistics_t { + unsigned long hash_accesses; + unsigned long hash_collisions; + unsigned long table_expansions; + unsigned long table_contractions; +} hash_statistics_t; +#endif + + +/* typedef's for callback based iteration */ +typedef bool(*hash_iterate_callback)(hash_entry_t *item, void *user_data); +typedef void (*hash_delete_callback)(hash_entry_t *item); + +/* typedef's for iteration object based iteration */ +struct hash_iter_context_t; +typedef hash_entry_t *(*hash_iter_next_t)(struct hash_iter_context_t *iter); +struct hash_iter_context_t { + hash_iter_next_t next; +}; + +/* typedef for hash_create_ex() */ +typedef void *(*hash_alloc_func)(size_t size); +typedef void (*hash_free_func)(void *ptr); + +/*****************************************************************************/ +/************************* External Global Variables ***********************/ +/*****************************************************************************/ + +/*****************************************************************************/ +/**************************** Exported Functions ***************************/ +/*****************************************************************************/ + +/* + * Given an error code returned by a hash function, map it to a error string. + * Returns NULL if the error code is unrecognized. + */ +const char* hash_error_string(int error); + +/* + * Create a new hash table with room for n initial entries. hash_create returns + * an opaque pointer to the new hash table in the table parameter. Functions + * operating on the hash table pass in this hash table pointer. This means you + * may have as many concurrent hash tables as you want. The delete_callback + * parameter is a pointer to a function which will be called just prior to a + * hash entry being deleted. This is useful when the hash value has items which + * may need to be disposed of. The delete_callback may be NULL. + */ +int hash_create(unsigned long count, hash_table_t **tbl, hash_delete_callback delete_callback); + +/* + * Create a new hash table and fine tune it's configurable parameters. + * Refer to CACM article for explanation of parameters. + * + * directory_bits: number of address bits allocated to top level directory array. + * segment_bits: number of address bits allocated to segment array. + * min_load_factor: table contracted when the ratio of entry count to bucket count + * is less than the min_load_factor the table is contracted. + * max_load_factor: table expanded when the ratio of entry count to bucket count + * is greater than the max_load_factor the table is expanded. + * alloc_func: function pointer for allocation + * free_func: funciton pointer for freeing memory allocated with alloc_func + * + * Note directory_bits + segment_bits must be <= number of bits in unsigned long + */ +int hash_create_ex(unsigned long count, hash_table_t **tbl, + unsigned int directory_bits, unsigned int segment_bits, + unsigned long min_load_factor, unsigned long max_load_factor, + hash_alloc_func alloc_func, + hash_free_func free_func, + hash_delete_callback delete_callback); + +#ifdef HASH_STATISTICS +/* + * Return statistics for the table. + */ +int hash_get_statistics(hash_table_t *table, hash_statistics_t *statistics); +#endif + +/* + * hash_destroy deletes all entries in the hash table, freeing all memory used + * in implementing the hash table. Some hash entries may have values which are + * pointers to user data structures. User pointers are not free by hash_destroy, + * they must be free by the caller. This may be done by iterating over all the + * entries in the table using hash_iterate() and freeing the values or by + * registering a delete callback when the table is created with + * hash_create(). Note, hash keys which are strings will be automatically freed + * by hash_destroy, the caller does not need to free the key strings. + */ +int hash_destroy(hash_table_t *table); + +/* + * Enter or update an item in the table referenced by key. If the key does not + * exist yet the item will be created and inserted into the table with the + * value, otherwise the value for the existing key is updated. The return value + * may be HASH_ERROR_BAD_KEY_TYPE or HASH_ERROR_BAD_VALUE_TYPE if the key or + * value type respectively is invalid. This function might also return + * HASH_ERROR_NO_MEMORY. + */ +int hash_enter(hash_table_t *table, hash_key_t *key, hash_value_t *value); + +/* + * Using the key lookup the value associated with it in the table. If the key is + * not in the table HASH_ERROR_KEY_NOT_FOUND is returned. If the type of key is + * invalid HASH_ERROR_BAD_KEY_TYPE is returned. Otherwise HASH_SUCCESS is + * returned. If the result is anything other than HASH_SUCCESS the value pointer + * is not updated. + */ +int hash_lookup(hash_table_t *table, hash_key_t *key, hash_value_t *value); + +/* + * Like hash_lookup() except if the key is not in the table then it is entered + * into the table and assigned the default_value. Thus it is not possible for + * hash_get_default() to return HASH_ERROR_KEY_NOT_FOUND. + */ +int hash_get_default(hash_table_t *table, hash_key_t *key, hash_value_t *value, hash_value_t *default_value); + +/* + * Delete the item from the table. The key and its type are specified in the key + * parameter which are passed by reference. If the key was in the table + * HASH_SUCCESS is returned otherwise HASH_ERROR_KEY_NOT_FOUND is + * returned. Memory allocated to hold the key if it was a string is free by the + * hash library, but values which are pointers to user data must be freed by the + * caller (see delete_callback). + */ +int hash_delete(hash_table_t *table, hash_key_t *key); + +/* + * Often it is useful to operate on every key and/or value in the hash + * table. The hash_iterate function will invoke the users callback on every item + * in the table as long as the callback returns a non-zero value. Returning a + * zero from the callback can be used to halt the iteration prematurely if some + * condition is met. The user_data parameter is passed to the callback + * function. It can be used for any purpose the caller wants. The callback + * parameter list is: + * + * bool callback(hash_entry_t *item, hash_table_t *user_data); + * + * WARNING: Do not modify the contents of the table during an iteration it will + * produce undefined results. If you need to visit each item in the table and + * potentially delete or insert some entries the proper way to do this is to + * obtain a list of keys or items using hash_keys() or hash_items() which + * returns a copy of the keys or items. You may then loop on the list returned + * and safely update the table (don't forget to free the list when done). + */ +int hash_iterate(hash_table_t *table, hash_iterate_callback callback, void *user_data); + +/* + * This is another method to iterate on every key/value in the hash table. + * However, unlike hash_iterate which requires a callback this function returns + * an iterator object which contains a next() function pointer. Each time + * next() is invoked it returns a pointer to the next hash entry in the table, + * then NULL when all entries have been visited. In some circumstances it's more + * convenient than having to define a callback. Like hash_iterate() one must + * never modify the table contents during iteration. If you intend to modify the + * table during iteration use the functions which return an indepent list of + * keys, values, or items instead and iterate on the list. The iterator object + * must be free'ed when you're done iterating by calling free() on it. + * + * Example: + * + * struct hash_iter_context_t *iter; + * hash_entry_t *entry; + * + * iter = new_hash_iter_context(table); + * while ((entry = iter->next(iter)) != NULL) { + * do_something(entry); + * } + * free(iter); + */ +struct hash_iter_context_t *new_hash_iter_context(hash_table_t *table); + +/* + * Return a count of how many items are currently in the table. + */ +unsigned long hash_count(hash_table_t *table); + +/* + * Get an array of all the keys in the table returned through the keys + * parameter. The number of elements in the array is returned in the count + * parameter. Each key in the array is a copy of the key in the table. Any + * pointers in the key will be shared with the table entry thus both the item in + * the array and the item in the table point to the same object. The array + * should be freed by calling free(). The function may return + * HASH_ERROR_NO_MEMORY, otherwise HASH_SUCCESS. + */ +int hash_keys(hash_table_t *table, unsigned long *count, hash_key_t **keys); + +/* + * Get an array of all the values in the table returned through the values + * parameter. The number of elements in the array is returned in the count + * parameter. Each value in the array is a copy of the value in the table. Any + * pointers in the value will be shared with the table entry thus both the item in + * the array and the item in the table point to the same object. The array + * should be freed by calling free(). The function may return + * HASH_ERROR_NO_MEMORY, otherwise HASH_SUCCESS. + */ +int hash_values(hash_table_t *table, unsigned long *count, hash_value_t **values); + + +/* + * Get an array of all the entries in the table returned through the entries + * parameter. The number of elements in the array is returned in the count + * parameter. Each entry in the array is a copy of the entry in the table. Any + * pointers in the entry will be shared with the table entry thus both the item in + * the array and the item in the table point to the same object. The array + * should be freed by calling free(). The function may return + * HASH_ERROR_NO_MEMORY, otherwise HASH_SUCCESS. + */ +int hash_entries(hash_table_t *table, unsigned long *count, hash_entry_t **entries); + +/* + * Return boolean if the key is in the table. + */ +bool hash_has_key(hash_table_t *table, hash_key_t *key); + +#endif diff --git a/common/dhash/dhash.pc.in b/common/dhash/dhash.pc.in new file mode 100644 index 000000000..be62b373a --- /dev/null +++ b/common/dhash/dhash.pc.in @@ -0,0 +1,11 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: dhash +Description: A hash table which will dynamically resize to achieve optimal storage & access time properties +Version: @PACKAGE_VERSION@ +Libs: -L${libdir} -ldhash +Cflags: -I${includedir} +URL: http://fedorahosted.org/sssd/ diff --git a/common/dhash/dhash_example.c b/common/dhash/dhash_example.c new file mode 100644 index 000000000..af46ca889 --- /dev/null +++ b/common/dhash/dhash_example.c @@ -0,0 +1,117 @@ +#include +#include +#include +#include +#include "dhash.h" + +struct my_data_t { + int foo; + char bar[128]; +}; + +void delete_callback(hash_entry_t *entry) +{ + if (entry->value.type == HASH_VALUE_PTR) free(entry->value.ptr); +} + +bool visit_callback(hash_entry_t *entry, void *user_data) +{ + unsigned long *count = (unsigned long *)user_data; + struct my_data_t *my_data = (struct my_data_t *) entry->value.ptr; + + (*count)++; + + printf("%s = [foo=%d bar=%s]\n", entry->key.str, my_data->foo, my_data->bar); + return true; +} + +struct my_data_t *new_data(int foo, char *bar) +{ + struct my_data_t *my_data = malloc(sizeof(struct my_data_t)); + my_data->foo = foo; + strncpy(my_data->bar, bar, sizeof(my_data->bar)); + return my_data; +} +int main(int argc, char **argv) +{ + static hash_table_t *table = NULL; + hash_key_t key, *keys; + hash_value_t value; + struct hash_iter_context_t *iter; + hash_entry_t *entry; + unsigned long i, n_entries; + int error; + struct my_data_t *my_data = new_data(1024, "Hello World!"); + unsigned long count; + + /* Create a hash table */ + if ((error = hash_create(10, &table, delete_callback)) != HASH_SUCCESS) { + fprintf(stderr, "cannot create hash table (%s)\n", hash_error_string(error)); + return error; + } + + /* Enter a key named "My Data" and specify it's value as a pointer to my_data */ + key.type = HASH_KEY_STRING; + key.str = "My Data"; + value.type = HASH_VALUE_PTR; + value.ptr = my_data; + + if ((error = hash_enter(table, &key, &value)) != HASH_SUCCESS) { + fprintf(stderr, "cannot add to table \"%s\" (%s)\n", key.str, hash_error_string(error)); + return error; + } + + /* Get a list of keys and print them out, free the list when we're done */ + if ((error = hash_keys(table, &count, &keys)) != HASH_SUCCESS) { + fprintf(stderr, "cannot get key list (%s)\n", hash_error_string(error)); + return error; + } + + for (i = 0; i < count; i++) + printf("key: %s\n", keys[i].str); + free(keys); + + /* Lookup the key named "My Data" */ + key.type = HASH_KEY_STRING; + key.str = "My Data"; + if ((error = hash_lookup(table, &key, &value)) != HASH_SUCCESS) { + fprintf(stderr, "cannot find key \"%s\" (%s)\n", key.str, hash_error_string(error)); + } + + /* Visit each entry in the table, callback will increment count on each visit */ + printf("Iterate using callback\n"); + count = 0; + hash_iterate(table, visit_callback, &count); + + /* Assure number of visits equal the table size */ + assert(count == hash_count(table)); + + /* Visit each entry using iterator object */ + printf("Iterate using iterator\n"); + n_entries = 0; + iter = new_hash_iter_context(table); + while ((entry = iter->next(iter)) != NULL) { + struct my_data_t *my_data = (struct my_data_t *) entry->value.ptr; + + printf("%s = [foo=%d bar=%s]\n", entry->key.str, my_data->foo, my_data->bar); + n_entries++; + } + free(iter); + /* Assure number of visits equal the table size */ + assert(n_entries == hash_count(table)); + + /* Remove the entry, deletion callback will be invoked */ + key.type = HASH_KEY_STRING; + key.str = "My Data"; + if ((error = hash_delete(table, &key)) != HASH_SUCCESS) { + fprintf(stderr, "cannot delete from table (%s)\n", hash_error_string(error)); + } + + /* Assure key is no longer in table */ + assert (!hash_has_key(table, &key)); + + /* Free the table */ + hash_destroy(table); + + return 0; +} diff --git a/common/dhash/dhash_test.c b/common/dhash/dhash_test.c new file mode 100644 index 000000000..8509964f6 --- /dev/null +++ b/common/dhash/dhash_test.c @@ -0,0 +1,489 @@ +#include +#include +#include +#include +#include +#include +#include "dhash.h" + +#define DEFAULT_MAX_TEST (500) +hash_entry_t *iter_result_1 = NULL; +hash_entry_t *iter_result_2 = NULL; + +unsigned long max_test = DEFAULT_MAX_TEST; +int verbose = 0; + +const char *error_string(int error) +{ + if (IS_HASH_ERROR(error)) + return hash_error_string(error); + + return strerror(error); +} + +char *key_string(hash_key_t *key) +{ + static char buf[1024]; + + switch(key->type) { + case HASH_KEY_ULONG: + snprintf(buf, sizeof(buf), "key ulong = %lu", key->ul); + break; + case HASH_KEY_STRING: + snprintf(buf, sizeof(buf), "key string = \"%s\"", key->str); + break; + default: + snprintf(buf, sizeof(buf), "unknown key type = %d", key->type); + break; + } + return buf; +} + +char *value_string(hash_value_t *value) +{ + static char buf[1024]; + + switch(value->type) { + case HASH_VALUE_UNDEF: + snprintf(buf, sizeof(buf), "value undefined"); + break; + case HASH_VALUE_PTR: + snprintf(buf, sizeof(buf), "value str = \"%s\"", (char *)value->ptr); + break; + case HASH_VALUE_INT: + snprintf(buf, sizeof(buf), "value int = %d", value->i); + break; + case HASH_VALUE_UINT: + snprintf(buf, sizeof(buf), "value unsigned int = %u", value->ui); + break; + case HASH_VALUE_LONG: + snprintf(buf, sizeof(buf), "value long = %ld", value->l); + break; + case HASH_VALUE_ULONG: + snprintf(buf, sizeof(buf), "value unsigned long = %lu", value->ul); + break; + case HASH_VALUE_FLOAT: + snprintf(buf, sizeof(buf), "value float = %f", value->f); + break; + case HASH_VALUE_DOUBLE: + snprintf(buf, sizeof(buf), "value double = %f", value->f); + break; + default: + snprintf(buf, sizeof(buf), "unknown value type = %d", value->type); + break; + } + + return buf; +} + +char *entry_string(hash_entry_t *entry) +{ + static char buf[1024]; + + snprintf(buf, sizeof(buf), "[%s] = [%s]", key_string(&entry->key), value_string(&entry->value)); + + return buf; + +} + +bool callback(hash_entry_t *item, void *user_data) +{ + unsigned long *callback_count = (unsigned long *)user_data; + + iter_result_1[*callback_count] = *item; + + (*callback_count)++; + + if (verbose) printf("%s\n", entry_string(item)); + return true; +} + +void delete_callback(hash_entry_t *item) +{ + if (item->value.type == HASH_VALUE_PTR) free(item->value.ptr); +} + +typedef struct test_val_t { + long val; + char *str; +} test_val_t; + + +int main(int argc, char **argv) +{ + test_val_t *test = NULL; + long i, k; + int status; + hash_value_t value; + hash_key_t key; + char buf[1024]; + hash_table_t *table = NULL; + unsigned long callback_count = 0; + unsigned int directory_bits = HASH_DEFAULT_DIRECTORY_BITS; + unsigned int segment_bits = HASH_DEFAULT_SEGMENT_BITS; + unsigned long min_load_factor = HASH_DEFAULT_MIN_LOAD_FACTOR; + unsigned long max_load_factor = HASH_DEFAULT_MAX_LOAD_FACTOR; + + while (1) { + int arg; + int option_index = 0; + static struct option long_options[] = { + {"count", 1, 0, 'c'}, + {"verbose", 1, 0, 'v'}, + {"quiet", 1, 0, 'q'}, + {"directory-bits", 1, 0, 'd'}, + {"segment-bits", 1, 0, 's'}, + {"min-load-factor", 1, 0, 'l'}, + {"max-load-factor", 1, 0, 'h'}, + {0, 0, 0, 0} + }; + + arg = getopt_long(argc, argv, "c:vqd:s:l:h:", + long_options, &option_index); + if (arg == -1) break; + + switch (arg) { + case 'c': + max_test = atol(optarg); + break; + case 'v': + verbose = 1; + break; + case 'q': + verbose = 0; + break; + case 'd': + directory_bits = atoi(optarg); + break; + case 's': + segment_bits = atoi(optarg); + break; + case 'l': + min_load_factor = atol(optarg); + break; + case 'h': + max_load_factor = atol(optarg); + break; + } + } + + if ((test = (test_val_t *) calloc(max_test, sizeof(test_val_t))) == NULL) { + fprintf(stderr, "Failed to allocate test array\n"); + exit(1); + } + if ((iter_result_1 = (hash_entry_t *) calloc(max_test, sizeof(hash_entry_t))) == NULL) { + fprintf(stderr, "Failed to allocate iter_result_1 array\n"); + exit(1); + } + if ((iter_result_2 = (hash_entry_t *) calloc(max_test, sizeof(hash_entry_t))) == NULL) { + fprintf(stderr, "Failed to allocate iter_result_2 array\n"); + exit(1); + } + + + /* Initialize the random number generator */ + srandom(time(0)); + + /* Create the hash table as small as possible to exercise growth */ + if ((status = hash_create_ex(1, &table, + directory_bits, segment_bits, + min_load_factor, max_load_factor, + NULL, NULL, delete_callback)) != HASH_SUCCESS) { + fprintf(stderr, "table creation failed at line %d (%s)\n", __LINE__, error_string(status)); + exit(1); + } + + /* Initialize the array of test values */ + for (i = 0; i < max_test; i++) { + test[i].val = random(); + /* If the value is odd we'll use a string as the key, + * otherwise we'll use an unsigned long as the key */ + if (test[i].val & 1) { + key.type = HASH_KEY_STRING; + sprintf(buf, "%ld", test[i].val); + test[i].str = strdup(buf); + } + } + + /* Enter all the test values into the hash table */ + for (i = 0; i < max_test; i++) { + if (test[i].val & 1) { + key.type = HASH_KEY_STRING; + key.str = test[i].str; + value.type = HASH_VALUE_PTR; + value.ptr = (void *) strdup(test[i].str); + } + else { + key.type = HASH_KEY_ULONG; + key.ul = test[i].val; + value.type = HASH_VALUE_LONG; + value.l = test[i].val; + } + + if (hash_has_key(table, &key)) { + fprintf(stderr, "Error: %ld already in table when inserting, i = %lu, at line %d\n", + test[i].val, i, __LINE__); + exit(1); + } + if ((status = hash_enter(table, &key, &value)) != HASH_SUCCESS) { + fprintf(stderr, "Error: %ld failed insertion at line %d (%s) \n", + test[i].val, __LINE__, error_string(status)); + exit(1); + } + } + + /* Now visit each entry in the table using a callback iterator, + * store what we found in iter_result_1 for testing the iterator object later on */ + if (verbose) printf("callback iterate:\n"); + callback_count = 0; + if ((status = hash_iterate(table, callback, &callback_count)) != HASH_SUCCESS) { + fprintf(stderr, "hash_iterate failed at line %d (%s)\n", __LINE__, error_string(status)); + exit(1); + } + if (verbose) printf("hash_count=%ld, callback_count=%ld\n", hash_count(table), callback_count); + + if (hash_count(table) != callback_count) { + fprintf(stderr, "Error: hash_count(%ld) != callback_count(%ld) at line %d\n", + hash_count(table), callback_count, __LINE__); + exit(1); + } + + /* Now vist each entry in the table using an iterator object */ + { + struct hash_iter_context_t *iter; + unsigned long n_items; + hash_entry_t *entry; + + if (verbose) printf("iter iterate:\n"); + + n_items = 0; + iter = new_hash_iter_context(table); + + while ((entry = iter->next(iter)) != NULL) { + if (verbose) printf("%s\n", entry_string(entry)); + iter_result_2[n_items] = *entry; + n_items++; + } + if (verbose) printf("hash_count=%ld, n_items=%ld\n", hash_count(table), n_items); + + if (hash_count(table) != n_items) { + fprintf(stderr, "Error: hash_count(%ld) != n_items(%ld) at line %d\n", + hash_count(table), n_items, __LINE__); + exit(1); + } + free(iter); + } + + /* Both iterators should have visited each item in the same order, verify ... */ + for (i = 0; i < max_test; i++) { + if (memcmp(&iter_result_1[i], &iter_result_2[i], sizeof(iter_result_1[0])) != 0) { + fprintf(stderr, "Error: iter_result_1[%lu] != iter_result_2[%lu] at line %d\n", + i, i, __LINE__); + exit(1); + } + } + + /* Get an array of keys in the table, print them out */ + { + unsigned long i, count; + hash_key_t *keys; + + if (verbose) printf("\nhash_keys:\n"); + if ((status = hash_keys(table, &count, &keys)) != HASH_SUCCESS) { + fprintf(stderr, "hash_keys failed at line %d (%s)\n", + __LINE__, error_string(status)); + exit(1); + } + + if (hash_count(table) != count) { + fprintf(stderr, "Error: hash_count(%ld) != hash_keys() count(%ld) at line %d\n", + hash_count(table), count, __LINE__); + exit(1); + } + + for (i = 0; i < count; i++) { + if (verbose) printf("%s\n", key_string(&keys[i])); + } + free(keys); + } + + /* Get an array of values in the table, print them out */ + { + unsigned long i, count; + hash_value_t *values; + + if (verbose) printf("\nhash_values:\n"); + hash_values(table, &count, &values); + + if (hash_count(table) != count) { + fprintf(stderr, "Error: hash_count(%ld) != hash_values() count(%ld) at line %d\n", + hash_count(table), count, __LINE__); + exit(1); + } + + for (i = 0; i < count; i++) { + if (verbose) printf("%s\n", value_string(&values[i])); + } + free(values); + } + + /* Get an array of items in the table, print them out */ + { + unsigned long i, count; + hash_entry_t *entries; + + if (verbose) printf("\nhash_entries:\n"); + hash_entries(table, &count, &entries); + + if (hash_count(table) != count) { + fprintf(stderr, "Error: hash_count(%ld) != hash_entries() count(%ld) at line %d\n", + hash_count(table), count, __LINE__); + exit(1); + } + + for (i = 0; i < count; i++) { + if (verbose) printf("%s\n", entry_string(&entries[i])); + } + free(entries); + } + + /* See if we can find every key */ + for (i = max_test - 1; i >= 0; i--) { + if (test[i].val & 1) { + key.type = HASH_KEY_STRING; + key.str = test[i].str; + } + else { + key.type = HASH_KEY_ULONG; + key.ul = test[i].val; + } + if ((status = hash_lookup(table, &key, &value)) != HASH_SUCCESS) { + fprintf(stderr, "Error: failed first lookup for value %ld at index %ld at line %d (%s)\n", + test[i].val, i, __LINE__, error_string(status)); + exit(1); + } + else { + switch(value.type) { + case HASH_VALUE_PTR: + if (strcmp((char *)value.ptr, test[i].str) != 0) { + fprintf(stderr, "Error: corrupt ptr data for %lu at line %d\n", i, __LINE__); + exit(1); + } + break; + case HASH_VALUE_LONG: + if (value.l != test[i].val) { + fprintf(stderr, "Error: corrupt long data for %lu at line %d\n", i, __LINE__); + exit(1); + } + break; + default: + fprintf(stderr, "Error: unknown value type for %lu\n", i); + break; + } + } + } + + + /* + * Delete a key, make sure we can't find it, assure we can find all other + * keys. + */ + for (i = 0; i < max_test; i++) { + if (test[i].val & 1) { + key.type = HASH_KEY_STRING; + key.str = test[i].str; + } + else { + key.type = HASH_KEY_ULONG; + key.ul = test[i].val; + } + + if ((status = hash_lookup(table, &key, &value)) != HASH_SUCCESS) { + fprintf(stderr, "Error: failed delete lookup for value %ld at index %ld at line %d (%s)\n", + test[i].val, i, __LINE__, error_string(status)); + exit(1); + } + + if ((status = hash_delete(table, &key)) != HASH_SUCCESS) { + fprintf(stderr, "Error: %ld not in table when deleting, i = %lu at line %d (%s)\n", + test[i].val, i, __LINE__, error_string(status)); + exit(1); + } + + if (hash_lookup(table, &key, &value) != HASH_ERROR_KEY_NOT_FOUND) { + fprintf(stderr, "Error: found in table after deletion, value = %ld at index %ld at line %d\n", + test[i].val, i, __LINE__); + exit(1); + } + /* See if we can find all remaining keys */ + for (k = i + 1; k < max_test; k++) { + if (test[k].val & 1) { + key.type = HASH_KEY_STRING; + key.str = test[k].str; + } else { + key.type = HASH_KEY_ULONG; + key.ul = test[k].val; + } + if ((status = hash_lookup(table, &key, &value)) != HASH_SUCCESS) { + fprintf(stderr, "Error: failed second lookup for value %ld, i = %lu k = %lu at line %d (%s)\n", + test[k].val, i, k, __LINE__, error_string(status)); + exit(1); + } else { + switch(value.type) { + case HASH_VALUE_PTR: + if (strcmp((char *)value.ptr, test[k].str) != 0) { + fprintf(stderr, "Error: corrupt ptr data for %lu at line %d\n", k, __LINE__); + exit(1); + } + break; + case HASH_VALUE_LONG: + if (value.l != test[k].val) { + fprintf(stderr, "Error: corrupt long data for %lu at line %d\n", k, __LINE__); + exit(1); + } + break; + default: + fprintf(stderr, "Error: unknown value type (%d) for %lu\n", value.type, k); + break; + } + } + } + } + + if (verbose) printf("\n"); + +#ifdef HASH_STATISTICS + { + hash_statistics_t stats; + + if ((status = hash_get_statistics(table, &stats)) != HASH_SUCCESS) { + fprintf(stderr, "Error: could not get statistics at line %d (%s)\n", + __LINE__, error_string(status)); + exit(1); + } + + printf("Statistics: Accesses = %ld, Collisions = %ld, Collision Rate = %.2f, Expansions = %ld, Contractions = %ld\n", + stats.hash_accesses, stats.hash_collisions, + ((float)stats.hash_collisions / (float)stats.hash_accesses), + stats.table_expansions, stats.table_contractions); + } +#endif + + if ((status = hash_destroy(table)) != HASH_SUCCESS) { + fprintf(stderr, "table destruction failed at line %d (%s)\n", + __LINE__, error_string(status)); + exit(1); + } + + for (i = 0; i < max_test; i++) { + if (test[i].val & 1) { + free(test[i].str); + } + } + free(test); + free(iter_result_1); + free(iter_result_2); + + printf("Successfully tested %lu values\n", max_test); + return 0; +} -- cgit