diff options
-rw-r--r-- | dhash/dhash.c | 133 | ||||
-rw-r--r-- | dhash/dhash.h | 79 | ||||
-rw-r--r-- | dhash/examples/dhash_test.c | 47 |
3 files changed, 182 insertions, 77 deletions
diff --git a/dhash/dhash.c b/dhash/dhash.c index f92e463..79f9c38 100644 --- a/dhash/dhash.c +++ b/dhash/dhash.c @@ -53,10 +53,13 @@ #define PRIME_1 37 #define PRIME_2 1048583 - /* - * Fast arithmetic, relying on powers of 2, and on pre-processor - * concatenation property - */ +#ifndef MIN + #define MIN(a,b) (((a) < (b)) ? (a) : (b)) +#endif + +#ifndef MAX + #define MAX(a,b) (((a) > (b)) ? (a) : (b)) +#endif #define halloc(table, size) table->halloc(size, table->halloc_pvt) #define hfree(table, ptr) table->hfree(ptr, table->halloc_pvt) @@ -247,7 +250,7 @@ static int expand_table(hash_table_t *table) if (table->bucket_count < (table->directory_size << table->segment_size_shift)) { #ifdef DEBUG - if (debug_level >= 1) + if (debug_level >= 2) fprintf(stderr, "expand_table on entry: bucket_count=%lu, segment_count=%lu p=%lu maxp=%lu\n", table->bucket_count, table->segment_count, table->p, table->maxp); #endif @@ -314,7 +317,7 @@ static int expand_table(hash_table_t *table) } } #ifdef DEBUG - if (debug_level >= 1) + if (debug_level >= 2) fprintf(stderr, "expand_table on exit: bucket_count=%lu, segment_count=%lu p=%lu maxp=%lu\n", table->bucket_count, table->segment_count, table->p, table->maxp); #endif @@ -330,9 +333,9 @@ static int contract_table(hash_table_t *table) segment_t *old_segment, *new_segment; element_t *current; - if (table->bucket_count > table->segment_size) { + if ((table->bucket_count > table->segment_size) && (table->segment_count > 1)) { #ifdef DEBUG - if (debug_level >= 1) + if (debug_level >= 2) fprintf(stderr, "contract_table on entry: bucket_count=%lu, segment_count=%lu p=%lu maxp=%lu\n", table->bucket_count, table->segment_count, table->p, table->maxp); #endif @@ -395,7 +398,7 @@ static int contract_table(hash_table_t *table) } #ifdef DEBUG - if (debug_level >= 1) + if (debug_level >= 2) fprintf(stderr, "contract_table on exit: bucket_count=%lu, segment_count=%lu p=%lu maxp=%lu\n", table->bucket_count, table->segment_count, table->p, table->maxp); #endif @@ -428,7 +431,7 @@ static int lookup(hash_table_t *table, hash_key_t *key, element_t **element_arg, current_segment = table->directory[segment_dir]; #ifdef DEBUG - if (debug_level >= 2) + if (debug_level >= 3) fprintf(stderr, "lookup: h=%lu, segment_dir=%lu, segment_index=%lu current_segment=%p\n", h, segment_dir, segment_index, current_segment); #endif @@ -505,19 +508,47 @@ int hash_create_ex(unsigned long count, hash_table_t **tbl, hash_free_func *free_func, void *alloc_private_data, hash_delete_callback *delete_callback, - void *delete_private_data) -{ + void *delete_private_data) { unsigned long i; - unsigned int n_addr_bits; + unsigned int n_addr_bits, requested_bits; + unsigned int requested_directory_bits; + unsigned int requested_segment_bits; address_t addr; hash_table_t *table = NULL; + /* Initialize to NULL in case of an early return due to an error */ + *tbl = NULL; + if (alloc_func == NULL) alloc_func = sys_malloc_wrapper; if (free_func == NULL) free_func = sys_free_wrapper; /* Compute directory and segment parameters */ - if (directory_bits == 0) directory_bits = HASH_DEFAULT_DIRECTORY_BITS; - if (segment_bits == 0) segment_bits = HASH_DEFAULT_SEGMENT_BITS; + + /* compute power of 2 >= count; it's the number of requested buckets */ + if (count > 1) { + for (requested_bits = 0; (1 << requested_bits) < count; requested_bits++); + } else { + requested_bits = 1; + } + + /* + * If caller didn't specify directory & segment allocation then + * compute it from the requested count. + */ + if (directory_bits == 0 || segment_bits == 0) { + /* Equally divide buckets between the directory and segments */ + requested_directory_bits = MAX(requested_bits >> 1, 1); + requested_segment_bits = MAX(requested_bits - requested_directory_bits, 1); + + /* If the requested count wasn't specified pick a default */ + if (count == 0) { + directory_bits = MAX(requested_directory_bits, HASH_DEFAULT_DIRECTORY_BITS); + segment_bits = MAX(requested_segment_bits, HASH_DEFAULT_SEGMENT_BITS); + } else { + directory_bits = requested_directory_bits; + segment_bits = requested_segment_bits; + } + } for (addr = ~0, n_addr_bits = 0; addr; addr >>= 1, n_addr_bits++); @@ -534,45 +565,46 @@ int hash_create_ex(unsigned long count, hash_table_t **tbl, table->halloc_pvt = alloc_private_data; table->directory_size_shift = directory_bits; - for (i = 0, table->directory_size = 1; i < table->directory_size_shift; i++, table->directory_size <<= 1); + table->directory_size = directory_bits ? 1 << directory_bits : 0; table->segment_size_shift = segment_bits; - for (i = 0, table->segment_size = 1; i < table->segment_size_shift; i++, table->segment_size <<= 1); - + table->segment_size = segment_bits ? 1 << segment_bits : 0; /* Allocate directory */ table->directory = (segment_t **)halloc(table, table->directory_size * sizeof(segment_t *)); if (table->directory == NULL) { + hash_destroy(table); return HASH_ERROR_NO_MEMORY; } memset(table->directory, 0, table->directory_size * sizeof(segment_t *)); /* - * Adjust count to be nearest higher power of 2, minimum SEGMENT_SIZE, then - * convert into segments. + * If one wanted to pre-allocate all the buckets necessary to meet the needs + * of the requested count it would be done like this: + * + * table->segment_count = MIN((count + table->segment_size-1) / table->segment_size, + * table->directory_size); + * + * But with dynamic hash tables there is little point to this, the whole idea is to + * allow the table to grow/shrink dynamically, therefore we start with just one + * segment of buckets, the minimum necessary. */ - i = table->segment_size; - while (i < count) - i <<= 1; - count = i >> table->segment_size_shift; - - table->segment_count = 0; + table->segment_count = 1; table->p = 0; table->entry_count = 0; table->delete_callback = delete_callback; table->delete_pvt = delete_private_data; /* - * Allocate initial 'i' segments of buckets + * Allocate initial segments of buckets */ - for (i = 0; i < count; i++) { + for (i = 0; i < table->segment_count; i++) { table->directory[i] = (segment_t *)halloc(table, table->segment_size * sizeof(segment_t)); if (table->directory[i] == NULL) { hash_destroy(table); return HASH_ERROR_NO_MEMORY; } memset(table->directory[i], 0, table->segment_size * sizeof(segment_t)); - table->segment_count++; } table->bucket_count = table->segment_count << table->segment_size_shift; table->maxp = table->bucket_count; @@ -580,9 +612,16 @@ int hash_create_ex(unsigned long count, hash_table_t **tbl, table->max_load_factor = max_load_factor == 0 ? HASH_DEFAULT_MAX_LOAD_FACTOR : max_load_factor; #if DEBUG - if (debug_level >= 1) - fprintf(stderr, "hash_create_ex: table=%p count=%lu maxp=%lu segment_count=%lu\n", - table, count, table->maxp, table->segment_count); + if (debug_level >= 1) { + fprintf(stderr, "hash_create_ex: count=%lu available buckets=%lu bucket_count=%lu maxp=%lu\n", + count, table->directory_size*table->segment_size, table->bucket_count, table->maxp); + fprintf(stderr, " directory_bits=%u segment_bits=%u requested_bits=%u\n", + directory_bits, segment_bits, requested_bits); + fprintf(stderr, " directory_size=%lu segment_size=%lu segment_count=%lu\n", + table->directory_size, table->segment_size, table->segment_count); + fprintf(stderr, " min_load_factor=%lu max_load_factor=%lu\n", + table->min_load_factor, table->max_load_factor); + } #endif #ifdef HASH_STATISTICS memset(&table->statistics, 0, sizeof(table->statistics)); @@ -613,25 +652,27 @@ int hash_destroy(hash_table_t *table) if (!table) return HASH_ERROR_BAD_TABLE; if (table != NULL) { - for (i = 0; i < table->segment_count; i++) { - /* test probably unnecessary */ - if ((s = table->directory[i]) != NULL) { - for (j = 0; j < table->segment_size; j++) { - p = s[j]; - while (p != NULL) { - q = p->next; - hdelete_callback(table, HASH_TABLE_DESTROY, &p->entry); - if (p->entry.key.type == HASH_KEY_STRING) { - hfree(table, (char *)p->entry.key.str); + if (table->directory) { + for (i = 0; i < table->segment_count; i++) { + /* test probably unnecessary */ + if ((s = table->directory[i]) != NULL) { + for (j = 0; j < table->segment_size; j++) { + p = s[j]; + while (p != NULL) { + q = p->next; + hdelete_callback(table, HASH_TABLE_DESTROY, &p->entry); + if (p->entry.key.type == HASH_KEY_STRING) { + hfree(table, (char *)p->entry.key.str); + } + hfree(table, (char *)p); + p = q; } - hfree(table, (char *)p); - p = q; } + hfree(table, s); } - hfree(table, s); } + hfree(table, table->directory); } - hfree(table, table->directory); hfree(table, table); table = NULL; } diff --git a/dhash/dhash.h b/dhash/dhash.h index a10ed24..baa0d6a 100644 --- a/dhash/dhash.h +++ b/dhash/dhash.h @@ -203,13 +203,40 @@ const char* hash_error_string(int error); * Create a new hash table with room for n initial entries. hash_create returns * an opaque pointer to the new hash table in the table parameter. Functions * operating on the hash table pass in this hash table pointer. This means you - * may have as many concurrent hash tables as you want. The delete_callback - * parameter is a pointer to a function which will be called just prior to a - * hash entry being deleted. This is useful when the hash value has items which - * may need to be disposed of. The delete_callback may be NULL. - * The delete_private_data is data passed to the delete_callback, this way - * custom callbacks can have a private context to reach data they need to use - * before performning their operations. delete_private_data may be NULL. + * may have as many concurrent hash tables as you want. + + * If the table creation is successful tbl is set to the new table and + * HASH_SUCCESS is returned, otherwise tbl will be set to NULL and the + * return value will be a HASH error code. + * + * count + * Expected number of entries in the table. This is a tuning + * parameter because a dynamic hash table dynamically adjusts it's + * internal data strucuture to optimize for the actual number of + * entries. The count parameter allows for some more optimization, + * however it's not critical to get it right, the table will still + * perform well. If you have no idea of how many entries to expect + * then pass 0, a reasonable default will be selected. + * tbl + * Pointer to a hash_table_t variable which is initialized to the + * new hash table (i.e. the returned table). If the table fails to + * be created due to errors the tbl parameter will be set to NULL + * and the return value will be a HASH error code. + * delete_callback + * Pointer to a function which will be called just prior to a hash + * entry being deleted. This is useful when the hash value has + * items which may need to be disposed of. The delete_callback may + * be NULL. + * delete_private_data + * Void pointer passed to the delete_callback, this way delete + * callbacks can have a private context to reach data they need to + * use before performning their operations. delete_private_data + * may be NULL. + * + * hash_create is identical to calling: + * + * hash_create_ex(count, tbl, 0, 0, 0, 0, NULL, NULL, NULL, + * delete_callback, delete_private_data); */ int hash_create(unsigned long count, hash_table_t **tbl, hash_delete_callback *delete_callback, @@ -217,22 +244,32 @@ int hash_create(unsigned long count, hash_table_t **tbl, /* * Create a new hash table and fine tune it's configurable parameters. - * Refer to CACM article for explanation of parameters. + * Refer to CACM article for explanation of parameters. See + * hash_create for other parameters and return values. * - * directory_bits: number of address bits allocated to top level directory array. - * segment_bits: number of address bits allocated to segment array. - * min_load_factor: table contracted when the ratio of entry count to bucket count - * is less than the min_load_factor the table is contracted. - * max_load_factor: table expanded when the ratio of entry count to bucket count - * is greater than the max_load_factor the table is expanded. - * alloc_func: function pointer for allocation - * free_func: funciton pointer for freeing memory allocated with alloc_func - * alloc_private data: data passed to the alloc and free functions so that - * custom functions can refernce other private data they may - * need during their execution without having to use global - * variables. + * directory_bits + * Number of address bits allocated to top level directory array. + * If directory_bits or segment_bits is zero then directory_bits + * and segment_bits will be computed based on the count parameter. + * segment_bits + * number of address bits allocated to segment array. + * min_load_factor + * The table contracted when the ratio of entry count to bucket count + * is less than the min_load_factor the table is contracted. + * max_load_factor + * The table expanded when the ratio of entry count to bucket count + * is greater than the max_load_factor the table is expanded. + * alloc_func + * Function pointer for allocation. + * free_func + * Function pointer for freeing memory allocated with alloc_func. + * alloc_private_data + * Data pointer passed to the alloc and free functions so that + * custom functions can refernce other private data they may need + * during their execution without having to use global variables. * - * Note directory_bits + segment_bits must be <= number of bits in unsigned long + * Note directory_bits + segment_bits must be <= number of bits in + * unsigned long */ int hash_create_ex(unsigned long count, hash_table_t **tbl, unsigned int directory_bits, diff --git a/dhash/examples/dhash_test.c b/dhash/examples/dhash_test.c index 6c02de1..8c11227 100644 --- a/dhash/examples/dhash_test.c +++ b/dhash/examples/dhash_test.c @@ -132,7 +132,9 @@ typedef struct test_val_t { int main(int argc, char **argv) { test_val_t *test = NULL; - long i, k; + long i, j, k; + bool duplicate; + long val; int status; hash_value_t value; hash_value_t old_value; @@ -141,8 +143,10 @@ int main(int argc, char **argv) char buf[1024]; hash_table_t *table = NULL; unsigned long callback_count = 0; - unsigned int directory_bits = HASH_DEFAULT_DIRECTORY_BITS; - unsigned int segment_bits = HASH_DEFAULT_SEGMENT_BITS; + unsigned long table_size = 0; + unsigned int seed = time(0); + unsigned int directory_bits = 0; + unsigned int segment_bits = 0; unsigned long min_load_factor = HASH_DEFAULT_MIN_LOAD_FACTOR; unsigned long max_load_factor = HASH_DEFAULT_MAX_LOAD_FACTOR; @@ -153,20 +157,22 @@ int main(int argc, char **argv) {"count", 1, 0, 'c'}, {"verbose", 1, 0, 'v'}, {"quiet", 1, 0, 'q'}, + {"table-size", 1, 0, 't'}, {"directory-bits", 1, 0, 'd'}, {"segment-bits", 1, 0, 's'}, {"min-load-factor", 1, 0, 'l'}, {"max-load-factor", 1, 0, 'h'}, + {"seed", 1, 0, 'r'}, {0, 0, 0, 0} }; - arg = getopt_long(argc, argv, "c:vqd:s:l:h:", + arg = getopt_long(argc, argv, "c:vqt:d:s:l:h:r:", long_options, &option_index); if (arg == -1) break; switch (arg) { case 'c': - max_test = atol(optarg); + max_test = strtoul(optarg, NULL, 0); break; case 'v': verbose = 1; @@ -174,6 +180,9 @@ int main(int argc, char **argv) case 'q': verbose = 0; break; + case 't': + table_size = strtoul(optarg, NULL, 0); + break; case 'd': directory_bits = atoi(optarg); break; @@ -181,10 +190,13 @@ int main(int argc, char **argv) segment_bits = atoi(optarg); break; case 'l': - min_load_factor = atol(optarg); + min_load_factor = strtoul(optarg, NULL, 0); break; case 'h': - max_load_factor = atol(optarg); + max_load_factor = strtoul(optarg, NULL, 0); + break; + case 'r': + seed = strtoul(optarg, NULL, 0); break; } } @@ -204,10 +216,11 @@ int main(int argc, char **argv) /* Initialize the random number generator */ - srandom(time(0)); + srandom(seed); + printf("random seed: %#x\n", seed); /* Create the hash table as small as possible to exercise growth */ - if ((status = hash_create_ex(1, &table, + if ((status = hash_create_ex(table_size, &table, directory_bits, segment_bits, min_load_factor, max_load_factor, NULL, NULL, NULL, @@ -218,7 +231,19 @@ int main(int argc, char **argv) /* Initialize the array of test values */ for (i = 0; i < max_test; i++) { - test[i].val = random(); + /* Get random value, make sure it's unique */ + duplicate = true; + while (duplicate) { + duplicate = false; + val = random(); + for (j = 0; j < i; j++) { + if (test[j].val == val) { + duplicate = true; + break; + } + } + } + test[i].val = val; /* If the value is odd we'll use a string as the key, * otherwise we'll use an unsigned long as the key */ if (test[i].val & 1) { @@ -228,6 +253,8 @@ int main(int argc, char **argv) } } + printf("Completed building test values, beginning test ...\n"); + /* Enter all the test values into the hash table */ for (i = 0; i < max_test; i++) { if (test[i].val & 1) { |