summaryrefslogtreecommitdiffstats
path: root/dhash/dhash.c
diff options
context:
space:
mode:
authorJohn Dennis <jdennis@redhat.com>2011-09-06 14:36:59 -0400
committerStephen Gallagher <sgallagh@redhat.com>2012-04-25 07:43:22 -0400
commitadd869cb76e6ba1e3a16923faf83b68148e9278b (patch)
treeec3901426dcadd3b1add34232b1cd776cde03b13 /dhash/dhash.c
parentb597c0e70474f1ee40fd78d006fc1da1a04d4bba (diff)
downloadding-libs2-add869cb76e6ba1e3a16923faf83b68148e9278b.tar.gz
ding-libs2-add869cb76e6ba1e3a16923faf83b68148e9278b.tar.xz
ding-libs2-add869cb76e6ba1e3a16923faf83b68148e9278b.zip
* Resolves: bug #735464 Fix the loop limit used to initialize the table directory, was based on count, now limited to segment_count.
* Do not pre-allocate all buckets based on requested table size. This defeats the point of a dynamic hash table which adjusts it's memory usage depending upon the number of items it holds. Pre-allocation also runs afoul of the table contraction logic, if the table is pre-allocated to a large size it will subsequently try to compact it negating the pre-allocation. Now the initial allocation is restricted to one directory segment (i.e. table->segment_count == 1) * If the caller did not specify the directory_bits and segment_bits then compute them from the requested table size. The directory_size and segment_size must be powers of 2 to accmodate fast arithmetic. An optimal maximum number of buckets would be equal to the anticipated table size. If there were no collisions that would mean each item would be located in it's own bucket whose chain length is 1, the item would be immediatly found upon indexing into the bucket table. * There is a requirment there be at least one directory segment, however contract_table() failed to enforce this requirement. Add a check to enforce the requirement. * If hash_create() or hash_create_ex() failed it left the tbl parameter uninitialized but returned an error code. Now upon failure tbl is initialized to NULL as well as returning an error code. * Clean up how the directory and segment sizes were computed. It had been using a loop and shifting 1 bit at a time, now it shifts all the bits in one operation and assures at least one directory and segment are allocated. * In the event of an early exit from hash_create_ex() due to an error make sure all previously allocated resources are cleaned up by calling hash_destroy(). There was only one place this was missing. hash_destroy() blindly assumed the table had a directory, normally it would unless hash_destroy was called due to an early exit in hash_create_ex(). Modify hash_destroy() to check for the existence of the directory before cleaning up the directory contents. * Update the function documentation in dhash.h to explain each parameter of hash_create() and hash_create_ex(). * Enhance dhash_test.c - To output the random seed used to intialize the random number generator and add command line option to set the random seed. This permits recreating a failed test run. - Add command line parameter to set the initial table size. - Use proper string to long conversion routines when reading command line parameters (e.g. strtoul() instead of atoi()) - Add logic to make sure each test value is unique. * Add additional information to the debug output to show the computed directory_bits, segment_bits, sizes, etc. * Adjust the debug_level conditionals to be less verbose.
Diffstat (limited to 'dhash/dhash.c')
-rw-r--r--dhash/dhash.c133
1 files changed, 87 insertions, 46 deletions
diff --git a/dhash/dhash.c b/dhash/dhash.c
index f92e463..79f9c38 100644
--- a/dhash/dhash.c
+++ b/dhash/dhash.c
@@ -53,10 +53,13 @@
#define PRIME_1 37
#define PRIME_2 1048583
- /*
- * Fast arithmetic, relying on powers of 2, and on pre-processor
- * concatenation property
- */
+#ifndef MIN
+ #define MIN(a,b) (((a) < (b)) ? (a) : (b))
+#endif
+
+#ifndef MAX
+ #define MAX(a,b) (((a) > (b)) ? (a) : (b))
+#endif
#define halloc(table, size) table->halloc(size, table->halloc_pvt)
#define hfree(table, ptr) table->hfree(ptr, table->halloc_pvt)
@@ -247,7 +250,7 @@ static int expand_table(hash_table_t *table)
if (table->bucket_count < (table->directory_size << table->segment_size_shift)) {
#ifdef DEBUG
- if (debug_level >= 1)
+ if (debug_level >= 2)
fprintf(stderr, "expand_table on entry: bucket_count=%lu, segment_count=%lu p=%lu maxp=%lu\n",
table->bucket_count, table->segment_count, table->p, table->maxp);
#endif
@@ -314,7 +317,7 @@ static int expand_table(hash_table_t *table)
}
}
#ifdef DEBUG
- if (debug_level >= 1)
+ if (debug_level >= 2)
fprintf(stderr, "expand_table on exit: bucket_count=%lu, segment_count=%lu p=%lu maxp=%lu\n",
table->bucket_count, table->segment_count, table->p, table->maxp);
#endif
@@ -330,9 +333,9 @@ static int contract_table(hash_table_t *table)
segment_t *old_segment, *new_segment;
element_t *current;
- if (table->bucket_count > table->segment_size) {
+ if ((table->bucket_count > table->segment_size) && (table->segment_count > 1)) {
#ifdef DEBUG
- if (debug_level >= 1)
+ if (debug_level >= 2)
fprintf(stderr, "contract_table on entry: bucket_count=%lu, segment_count=%lu p=%lu maxp=%lu\n",
table->bucket_count, table->segment_count, table->p, table->maxp);
#endif
@@ -395,7 +398,7 @@ static int contract_table(hash_table_t *table)
}
#ifdef DEBUG
- if (debug_level >= 1)
+ if (debug_level >= 2)
fprintf(stderr, "contract_table on exit: bucket_count=%lu, segment_count=%lu p=%lu maxp=%lu\n",
table->bucket_count, table->segment_count, table->p, table->maxp);
#endif
@@ -428,7 +431,7 @@ static int lookup(hash_table_t *table, hash_key_t *key, element_t **element_arg,
current_segment = table->directory[segment_dir];
#ifdef DEBUG
- if (debug_level >= 2)
+ if (debug_level >= 3)
fprintf(stderr, "lookup: h=%lu, segment_dir=%lu, segment_index=%lu current_segment=%p\n",
h, segment_dir, segment_index, current_segment);
#endif
@@ -505,19 +508,47 @@ int hash_create_ex(unsigned long count, hash_table_t **tbl,
hash_free_func *free_func,
void *alloc_private_data,
hash_delete_callback *delete_callback,
- void *delete_private_data)
-{
+ void *delete_private_data) {
unsigned long i;
- unsigned int n_addr_bits;
+ unsigned int n_addr_bits, requested_bits;
+ unsigned int requested_directory_bits;
+ unsigned int requested_segment_bits;
address_t addr;
hash_table_t *table = NULL;
+ /* Initialize to NULL in case of an early return due to an error */
+ *tbl = NULL;
+
if (alloc_func == NULL) alloc_func = sys_malloc_wrapper;
if (free_func == NULL) free_func = sys_free_wrapper;
/* Compute directory and segment parameters */
- if (directory_bits == 0) directory_bits = HASH_DEFAULT_DIRECTORY_BITS;
- if (segment_bits == 0) segment_bits = HASH_DEFAULT_SEGMENT_BITS;
+
+ /* compute power of 2 >= count; it's the number of requested buckets */
+ if (count > 1) {
+ for (requested_bits = 0; (1 << requested_bits) < count; requested_bits++);
+ } else {
+ requested_bits = 1;
+ }
+
+ /*
+ * If caller didn't specify directory & segment allocation then
+ * compute it from the requested count.
+ */
+ if (directory_bits == 0 || segment_bits == 0) {
+ /* Equally divide buckets between the directory and segments */
+ requested_directory_bits = MAX(requested_bits >> 1, 1);
+ requested_segment_bits = MAX(requested_bits - requested_directory_bits, 1);
+
+ /* If the requested count wasn't specified pick a default */
+ if (count == 0) {
+ directory_bits = MAX(requested_directory_bits, HASH_DEFAULT_DIRECTORY_BITS);
+ segment_bits = MAX(requested_segment_bits, HASH_DEFAULT_SEGMENT_BITS);
+ } else {
+ directory_bits = requested_directory_bits;
+ segment_bits = requested_segment_bits;
+ }
+ }
for (addr = ~0, n_addr_bits = 0; addr; addr >>= 1, n_addr_bits++);
@@ -534,45 +565,46 @@ int hash_create_ex(unsigned long count, hash_table_t **tbl,
table->halloc_pvt = alloc_private_data;
table->directory_size_shift = directory_bits;
- for (i = 0, table->directory_size = 1; i < table->directory_size_shift; i++, table->directory_size <<= 1);
+ table->directory_size = directory_bits ? 1 << directory_bits : 0;
table->segment_size_shift = segment_bits;
- for (i = 0, table->segment_size = 1; i < table->segment_size_shift; i++, table->segment_size <<= 1);
-
+ table->segment_size = segment_bits ? 1 << segment_bits : 0;
/* Allocate directory */
table->directory = (segment_t **)halloc(table, table->directory_size * sizeof(segment_t *));
if (table->directory == NULL) {
+ hash_destroy(table);
return HASH_ERROR_NO_MEMORY;
}
memset(table->directory, 0, table->directory_size * sizeof(segment_t *));
/*
- * Adjust count to be nearest higher power of 2, minimum SEGMENT_SIZE, then
- * convert into segments.
+ * If one wanted to pre-allocate all the buckets necessary to meet the needs
+ * of the requested count it would be done like this:
+ *
+ * table->segment_count = MIN((count + table->segment_size-1) / table->segment_size,
+ * table->directory_size);
+ *
+ * But with dynamic hash tables there is little point to this, the whole idea is to
+ * allow the table to grow/shrink dynamically, therefore we start with just one
+ * segment of buckets, the minimum necessary.
*/
- i = table->segment_size;
- while (i < count)
- i <<= 1;
- count = i >> table->segment_size_shift;
-
- table->segment_count = 0;
+ table->segment_count = 1;
table->p = 0;
table->entry_count = 0;
table->delete_callback = delete_callback;
table->delete_pvt = delete_private_data;
/*
- * Allocate initial 'i' segments of buckets
+ * Allocate initial segments of buckets
*/
- for (i = 0; i < count; i++) {
+ for (i = 0; i < table->segment_count; i++) {
table->directory[i] = (segment_t *)halloc(table, table->segment_size * sizeof(segment_t));
if (table->directory[i] == NULL) {
hash_destroy(table);
return HASH_ERROR_NO_MEMORY;
}
memset(table->directory[i], 0, table->segment_size * sizeof(segment_t));
- table->segment_count++;
}
table->bucket_count = table->segment_count << table->segment_size_shift;
table->maxp = table->bucket_count;
@@ -580,9 +612,16 @@ int hash_create_ex(unsigned long count, hash_table_t **tbl,
table->max_load_factor = max_load_factor == 0 ? HASH_DEFAULT_MAX_LOAD_FACTOR : max_load_factor;
#if DEBUG
- if (debug_level >= 1)
- fprintf(stderr, "hash_create_ex: table=%p count=%lu maxp=%lu segment_count=%lu\n",
- table, count, table->maxp, table->segment_count);
+ if (debug_level >= 1) {
+ fprintf(stderr, "hash_create_ex: count=%lu available buckets=%lu bucket_count=%lu maxp=%lu\n",
+ count, table->directory_size*table->segment_size, table->bucket_count, table->maxp);
+ fprintf(stderr, " directory_bits=%u segment_bits=%u requested_bits=%u\n",
+ directory_bits, segment_bits, requested_bits);
+ fprintf(stderr, " directory_size=%lu segment_size=%lu segment_count=%lu\n",
+ table->directory_size, table->segment_size, table->segment_count);
+ fprintf(stderr, " min_load_factor=%lu max_load_factor=%lu\n",
+ table->min_load_factor, table->max_load_factor);
+ }
#endif
#ifdef HASH_STATISTICS
memset(&table->statistics, 0, sizeof(table->statistics));
@@ -613,25 +652,27 @@ int hash_destroy(hash_table_t *table)
if (!table) return HASH_ERROR_BAD_TABLE;
if (table != NULL) {
- for (i = 0; i < table->segment_count; i++) {
- /* test probably unnecessary */
- if ((s = table->directory[i]) != NULL) {
- for (j = 0; j < table->segment_size; j++) {
- p = s[j];
- while (p != NULL) {
- q = p->next;
- hdelete_callback(table, HASH_TABLE_DESTROY, &p->entry);
- if (p->entry.key.type == HASH_KEY_STRING) {
- hfree(table, (char *)p->entry.key.str);
+ if (table->directory) {
+ for (i = 0; i < table->segment_count; i++) {
+ /* test probably unnecessary */
+ if ((s = table->directory[i]) != NULL) {
+ for (j = 0; j < table->segment_size; j++) {
+ p = s[j];
+ while (p != NULL) {
+ q = p->next;
+ hdelete_callback(table, HASH_TABLE_DESTROY, &p->entry);
+ if (p->entry.key.type == HASH_KEY_STRING) {
+ hfree(table, (char *)p->entry.key.str);
+ }
+ hfree(table, (char *)p);
+ p = q;
}
- hfree(table, (char *)p);
- p = q;
}
+ hfree(table, s);
}
- hfree(table, s);
}
+ hfree(table, table->directory);
}
- hfree(table, table->directory);
hfree(table, table);
table = NULL;
}