summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohn Dennis <jdennis@redhat.com>2011-09-06 14:36:59 -0400
committerStephen Gallagher <sgallagh@redhat.com>2012-04-25 07:43:22 -0400
commitadd869cb76e6ba1e3a16923faf83b68148e9278b (patch)
treeec3901426dcadd3b1add34232b1cd776cde03b13
parentb597c0e70474f1ee40fd78d006fc1da1a04d4bba (diff)
downloadding-libs-add869cb76e6ba1e3a16923faf83b68148e9278b.tar.gz
ding-libs-add869cb76e6ba1e3a16923faf83b68148e9278b.tar.xz
ding-libs-add869cb76e6ba1e3a16923faf83b68148e9278b.zip
* Resolves: bug #735464 Fix the loop limit used to initialize the table directory, was based on count, now limited to segment_count.
* Do not pre-allocate all buckets based on requested table size. This defeats the point of a dynamic hash table which adjusts it's memory usage depending upon the number of items it holds. Pre-allocation also runs afoul of the table contraction logic, if the table is pre-allocated to a large size it will subsequently try to compact it negating the pre-allocation. Now the initial allocation is restricted to one directory segment (i.e. table->segment_count == 1) * If the caller did not specify the directory_bits and segment_bits then compute them from the requested table size. The directory_size and segment_size must be powers of 2 to accmodate fast arithmetic. An optimal maximum number of buckets would be equal to the anticipated table size. If there were no collisions that would mean each item would be located in it's own bucket whose chain length is 1, the item would be immediatly found upon indexing into the bucket table. * There is a requirment there be at least one directory segment, however contract_table() failed to enforce this requirement. Add a check to enforce the requirement. * If hash_create() or hash_create_ex() failed it left the tbl parameter uninitialized but returned an error code. Now upon failure tbl is initialized to NULL as well as returning an error code. * Clean up how the directory and segment sizes were computed. It had been using a loop and shifting 1 bit at a time, now it shifts all the bits in one operation and assures at least one directory and segment are allocated. * In the event of an early exit from hash_create_ex() due to an error make sure all previously allocated resources are cleaned up by calling hash_destroy(). There was only one place this was missing. hash_destroy() blindly assumed the table had a directory, normally it would unless hash_destroy was called due to an early exit in hash_create_ex(). Modify hash_destroy() to check for the existence of the directory before cleaning up the directory contents. * Update the function documentation in dhash.h to explain each parameter of hash_create() and hash_create_ex(). * Enhance dhash_test.c - To output the random seed used to intialize the random number generator and add command line option to set the random seed. This permits recreating a failed test run. - Add command line parameter to set the initial table size. - Use proper string to long conversion routines when reading command line parameters (e.g. strtoul() instead of atoi()) - Add logic to make sure each test value is unique. * Add additional information to the debug output to show the computed directory_bits, segment_bits, sizes, etc. * Adjust the debug_level conditionals to be less verbose.
-rw-r--r--dhash/dhash.c133
-rw-r--r--dhash/dhash.h79
-rw-r--r--dhash/examples/dhash_test.c47
3 files changed, 182 insertions, 77 deletions
diff --git a/dhash/dhash.c b/dhash/dhash.c
index f92e463..79f9c38 100644
--- a/dhash/dhash.c
+++ b/dhash/dhash.c
@@ -53,10 +53,13 @@
#define PRIME_1 37
#define PRIME_2 1048583
- /*
- * Fast arithmetic, relying on powers of 2, and on pre-processor
- * concatenation property
- */
+#ifndef MIN
+ #define MIN(a,b) (((a) < (b)) ? (a) : (b))
+#endif
+
+#ifndef MAX
+ #define MAX(a,b) (((a) > (b)) ? (a) : (b))
+#endif
#define halloc(table, size) table->halloc(size, table->halloc_pvt)
#define hfree(table, ptr) table->hfree(ptr, table->halloc_pvt)
@@ -247,7 +250,7 @@ static int expand_table(hash_table_t *table)
if (table->bucket_count < (table->directory_size << table->segment_size_shift)) {
#ifdef DEBUG
- if (debug_level >= 1)
+ if (debug_level >= 2)
fprintf(stderr, "expand_table on entry: bucket_count=%lu, segment_count=%lu p=%lu maxp=%lu\n",
table->bucket_count, table->segment_count, table->p, table->maxp);
#endif
@@ -314,7 +317,7 @@ static int expand_table(hash_table_t *table)
}
}
#ifdef DEBUG
- if (debug_level >= 1)
+ if (debug_level >= 2)
fprintf(stderr, "expand_table on exit: bucket_count=%lu, segment_count=%lu p=%lu maxp=%lu\n",
table->bucket_count, table->segment_count, table->p, table->maxp);
#endif
@@ -330,9 +333,9 @@ static int contract_table(hash_table_t *table)
segment_t *old_segment, *new_segment;
element_t *current;
- if (table->bucket_count > table->segment_size) {
+ if ((table->bucket_count > table->segment_size) && (table->segment_count > 1)) {
#ifdef DEBUG
- if (debug_level >= 1)
+ if (debug_level >= 2)
fprintf(stderr, "contract_table on entry: bucket_count=%lu, segment_count=%lu p=%lu maxp=%lu\n",
table->bucket_count, table->segment_count, table->p, table->maxp);
#endif
@@ -395,7 +398,7 @@ static int contract_table(hash_table_t *table)
}
#ifdef DEBUG
- if (debug_level >= 1)
+ if (debug_level >= 2)
fprintf(stderr, "contract_table on exit: bucket_count=%lu, segment_count=%lu p=%lu maxp=%lu\n",
table->bucket_count, table->segment_count, table->p, table->maxp);
#endif
@@ -428,7 +431,7 @@ static int lookup(hash_table_t *table, hash_key_t *key, element_t **element_arg,
current_segment = table->directory[segment_dir];
#ifdef DEBUG
- if (debug_level >= 2)
+ if (debug_level >= 3)
fprintf(stderr, "lookup: h=%lu, segment_dir=%lu, segment_index=%lu current_segment=%p\n",
h, segment_dir, segment_index, current_segment);
#endif
@@ -505,19 +508,47 @@ int hash_create_ex(unsigned long count, hash_table_t **tbl,
hash_free_func *free_func,
void *alloc_private_data,
hash_delete_callback *delete_callback,
- void *delete_private_data)
-{
+ void *delete_private_data) {
unsigned long i;
- unsigned int n_addr_bits;
+ unsigned int n_addr_bits, requested_bits;
+ unsigned int requested_directory_bits;
+ unsigned int requested_segment_bits;
address_t addr;
hash_table_t *table = NULL;
+ /* Initialize to NULL in case of an early return due to an error */
+ *tbl = NULL;
+
if (alloc_func == NULL) alloc_func = sys_malloc_wrapper;
if (free_func == NULL) free_func = sys_free_wrapper;
/* Compute directory and segment parameters */
- if (directory_bits == 0) directory_bits = HASH_DEFAULT_DIRECTORY_BITS;
- if (segment_bits == 0) segment_bits = HASH_DEFAULT_SEGMENT_BITS;
+
+ /* compute power of 2 >= count; it's the number of requested buckets */
+ if (count > 1) {
+ for (requested_bits = 0; (1 << requested_bits) < count; requested_bits++);
+ } else {
+ requested_bits = 1;
+ }
+
+ /*
+ * If caller didn't specify directory & segment allocation then
+ * compute it from the requested count.
+ */
+ if (directory_bits == 0 || segment_bits == 0) {
+ /* Equally divide buckets between the directory and segments */
+ requested_directory_bits = MAX(requested_bits >> 1, 1);
+ requested_segment_bits = MAX(requested_bits - requested_directory_bits, 1);
+
+ /* If the requested count wasn't specified pick a default */
+ if (count == 0) {
+ directory_bits = MAX(requested_directory_bits, HASH_DEFAULT_DIRECTORY_BITS);
+ segment_bits = MAX(requested_segment_bits, HASH_DEFAULT_SEGMENT_BITS);
+ } else {
+ directory_bits = requested_directory_bits;
+ segment_bits = requested_segment_bits;
+ }
+ }
for (addr = ~0, n_addr_bits = 0; addr; addr >>= 1, n_addr_bits++);
@@ -534,45 +565,46 @@ int hash_create_ex(unsigned long count, hash_table_t **tbl,
table->halloc_pvt = alloc_private_data;
table->directory_size_shift = directory_bits;
- for (i = 0, table->directory_size = 1; i < table->directory_size_shift; i++, table->directory_size <<= 1);
+ table->directory_size = directory_bits ? 1 << directory_bits : 0;
table->segment_size_shift = segment_bits;
- for (i = 0, table->segment_size = 1; i < table->segment_size_shift; i++, table->segment_size <<= 1);
-
+ table->segment_size = segment_bits ? 1 << segment_bits : 0;
/* Allocate directory */
table->directory = (segment_t **)halloc(table, table->directory_size * sizeof(segment_t *));
if (table->directory == NULL) {
+ hash_destroy(table);
return HASH_ERROR_NO_MEMORY;
}
memset(table->directory, 0, table->directory_size * sizeof(segment_t *));
/*
- * Adjust count to be nearest higher power of 2, minimum SEGMENT_SIZE, then
- * convert into segments.
+ * If one wanted to pre-allocate all the buckets necessary to meet the needs
+ * of the requested count it would be done like this:
+ *
+ * table->segment_count = MIN((count + table->segment_size-1) / table->segment_size,
+ * table->directory_size);
+ *
+ * But with dynamic hash tables there is little point to this, the whole idea is to
+ * allow the table to grow/shrink dynamically, therefore we start with just one
+ * segment of buckets, the minimum necessary.
*/
- i = table->segment_size;
- while (i < count)
- i <<= 1;
- count = i >> table->segment_size_shift;
-
- table->segment_count = 0;
+ table->segment_count = 1;
table->p = 0;
table->entry_count = 0;
table->delete_callback = delete_callback;
table->delete_pvt = delete_private_data;
/*
- * Allocate initial 'i' segments of buckets
+ * Allocate initial segments of buckets
*/
- for (i = 0; i < count; i++) {
+ for (i = 0; i < table->segment_count; i++) {
table->directory[i] = (segment_t *)halloc(table, table->segment_size * sizeof(segment_t));
if (table->directory[i] == NULL) {
hash_destroy(table);
return HASH_ERROR_NO_MEMORY;
}
memset(table->directory[i], 0, table->segment_size * sizeof(segment_t));
- table->segment_count++;
}
table->bucket_count = table->segment_count << table->segment_size_shift;
table->maxp = table->bucket_count;
@@ -580,9 +612,16 @@ int hash_create_ex(unsigned long count, hash_table_t **tbl,
table->max_load_factor = max_load_factor == 0 ? HASH_DEFAULT_MAX_LOAD_FACTOR : max_load_factor;
#if DEBUG
- if (debug_level >= 1)
- fprintf(stderr, "hash_create_ex: table=%p count=%lu maxp=%lu segment_count=%lu\n",
- table, count, table->maxp, table->segment_count);
+ if (debug_level >= 1) {
+ fprintf(stderr, "hash_create_ex: count=%lu available buckets=%lu bucket_count=%lu maxp=%lu\n",
+ count, table->directory_size*table->segment_size, table->bucket_count, table->maxp);
+ fprintf(stderr, " directory_bits=%u segment_bits=%u requested_bits=%u\n",
+ directory_bits, segment_bits, requested_bits);
+ fprintf(stderr, " directory_size=%lu segment_size=%lu segment_count=%lu\n",
+ table->directory_size, table->segment_size, table->segment_count);
+ fprintf(stderr, " min_load_factor=%lu max_load_factor=%lu\n",
+ table->min_load_factor, table->max_load_factor);
+ }
#endif
#ifdef HASH_STATISTICS
memset(&table->statistics, 0, sizeof(table->statistics));
@@ -613,25 +652,27 @@ int hash_destroy(hash_table_t *table)
if (!table) return HASH_ERROR_BAD_TABLE;
if (table != NULL) {
- for (i = 0; i < table->segment_count; i++) {
- /* test probably unnecessary */
- if ((s = table->directory[i]) != NULL) {
- for (j = 0; j < table->segment_size; j++) {
- p = s[j];
- while (p != NULL) {
- q = p->next;
- hdelete_callback(table, HASH_TABLE_DESTROY, &p->entry);
- if (p->entry.key.type == HASH_KEY_STRING) {
- hfree(table, (char *)p->entry.key.str);
+ if (table->directory) {
+ for (i = 0; i < table->segment_count; i++) {
+ /* test probably unnecessary */
+ if ((s = table->directory[i]) != NULL) {
+ for (j = 0; j < table->segment_size; j++) {
+ p = s[j];
+ while (p != NULL) {
+ q = p->next;
+ hdelete_callback(table, HASH_TABLE_DESTROY, &p->entry);
+ if (p->entry.key.type == HASH_KEY_STRING) {
+ hfree(table, (char *)p->entry.key.str);
+ }
+ hfree(table, (char *)p);
+ p = q;
}
- hfree(table, (char *)p);
- p = q;
}
+ hfree(table, s);
}
- hfree(table, s);
}
+ hfree(table, table->directory);
}
- hfree(table, table->directory);
hfree(table, table);
table = NULL;
}
diff --git a/dhash/dhash.h b/dhash/dhash.h
index a10ed24..baa0d6a 100644
--- a/dhash/dhash.h
+++ b/dhash/dhash.h
@@ -203,13 +203,40 @@ const char* hash_error_string(int error);
* Create a new hash table with room for n initial entries. hash_create returns
* an opaque pointer to the new hash table in the table parameter. Functions
* operating on the hash table pass in this hash table pointer. This means you
- * may have as many concurrent hash tables as you want. The delete_callback
- * parameter is a pointer to a function which will be called just prior to a
- * hash entry being deleted. This is useful when the hash value has items which
- * may need to be disposed of. The delete_callback may be NULL.
- * The delete_private_data is data passed to the delete_callback, this way
- * custom callbacks can have a private context to reach data they need to use
- * before performning their operations. delete_private_data may be NULL.
+ * may have as many concurrent hash tables as you want.
+
+ * If the table creation is successful tbl is set to the new table and
+ * HASH_SUCCESS is returned, otherwise tbl will be set to NULL and the
+ * return value will be a HASH error code.
+ *
+ * count
+ * Expected number of entries in the table. This is a tuning
+ * parameter because a dynamic hash table dynamically adjusts it's
+ * internal data strucuture to optimize for the actual number of
+ * entries. The count parameter allows for some more optimization,
+ * however it's not critical to get it right, the table will still
+ * perform well. If you have no idea of how many entries to expect
+ * then pass 0, a reasonable default will be selected.
+ * tbl
+ * Pointer to a hash_table_t variable which is initialized to the
+ * new hash table (i.e. the returned table). If the table fails to
+ * be created due to errors the tbl parameter will be set to NULL
+ * and the return value will be a HASH error code.
+ * delete_callback
+ * Pointer to a function which will be called just prior to a hash
+ * entry being deleted. This is useful when the hash value has
+ * items which may need to be disposed of. The delete_callback may
+ * be NULL.
+ * delete_private_data
+ * Void pointer passed to the delete_callback, this way delete
+ * callbacks can have a private context to reach data they need to
+ * use before performning their operations. delete_private_data
+ * may be NULL.
+ *
+ * hash_create is identical to calling:
+ *
+ * hash_create_ex(count, tbl, 0, 0, 0, 0, NULL, NULL, NULL,
+ * delete_callback, delete_private_data);
*/
int hash_create(unsigned long count, hash_table_t **tbl,
hash_delete_callback *delete_callback,
@@ -217,22 +244,32 @@ int hash_create(unsigned long count, hash_table_t **tbl,
/*
* Create a new hash table and fine tune it's configurable parameters.
- * Refer to CACM article for explanation of parameters.
+ * Refer to CACM article for explanation of parameters. See
+ * hash_create for other parameters and return values.
*
- * directory_bits: number of address bits allocated to top level directory array.
- * segment_bits: number of address bits allocated to segment array.
- * min_load_factor: table contracted when the ratio of entry count to bucket count
- * is less than the min_load_factor the table is contracted.
- * max_load_factor: table expanded when the ratio of entry count to bucket count
- * is greater than the max_load_factor the table is expanded.
- * alloc_func: function pointer for allocation
- * free_func: funciton pointer for freeing memory allocated with alloc_func
- * alloc_private data: data passed to the alloc and free functions so that
- * custom functions can refernce other private data they may
- * need during their execution without having to use global
- * variables.
+ * directory_bits
+ * Number of address bits allocated to top level directory array.
+ * If directory_bits or segment_bits is zero then directory_bits
+ * and segment_bits will be computed based on the count parameter.
+ * segment_bits
+ * number of address bits allocated to segment array.
+ * min_load_factor
+ * The table contracted when the ratio of entry count to bucket count
+ * is less than the min_load_factor the table is contracted.
+ * max_load_factor
+ * The table expanded when the ratio of entry count to bucket count
+ * is greater than the max_load_factor the table is expanded.
+ * alloc_func
+ * Function pointer for allocation.
+ * free_func
+ * Function pointer for freeing memory allocated with alloc_func.
+ * alloc_private_data
+ * Data pointer passed to the alloc and free functions so that
+ * custom functions can refernce other private data they may need
+ * during their execution without having to use global variables.
*
- * Note directory_bits + segment_bits must be <= number of bits in unsigned long
+ * Note directory_bits + segment_bits must be <= number of bits in
+ * unsigned long
*/
int hash_create_ex(unsigned long count, hash_table_t **tbl,
unsigned int directory_bits,
diff --git a/dhash/examples/dhash_test.c b/dhash/examples/dhash_test.c
index 6c02de1..8c11227 100644
--- a/dhash/examples/dhash_test.c
+++ b/dhash/examples/dhash_test.c
@@ -132,7 +132,9 @@ typedef struct test_val_t {
int main(int argc, char **argv)
{
test_val_t *test = NULL;
- long i, k;
+ long i, j, k;
+ bool duplicate;
+ long val;
int status;
hash_value_t value;
hash_value_t old_value;
@@ -141,8 +143,10 @@ int main(int argc, char **argv)
char buf[1024];
hash_table_t *table = NULL;
unsigned long callback_count = 0;
- unsigned int directory_bits = HASH_DEFAULT_DIRECTORY_BITS;
- unsigned int segment_bits = HASH_DEFAULT_SEGMENT_BITS;
+ unsigned long table_size = 0;
+ unsigned int seed = time(0);
+ unsigned int directory_bits = 0;
+ unsigned int segment_bits = 0;
unsigned long min_load_factor = HASH_DEFAULT_MIN_LOAD_FACTOR;
unsigned long max_load_factor = HASH_DEFAULT_MAX_LOAD_FACTOR;
@@ -153,20 +157,22 @@ int main(int argc, char **argv)
{"count", 1, 0, 'c'},
{"verbose", 1, 0, 'v'},
{"quiet", 1, 0, 'q'},
+ {"table-size", 1, 0, 't'},
{"directory-bits", 1, 0, 'd'},
{"segment-bits", 1, 0, 's'},
{"min-load-factor", 1, 0, 'l'},
{"max-load-factor", 1, 0, 'h'},
+ {"seed", 1, 0, 'r'},
{0, 0, 0, 0}
};
- arg = getopt_long(argc, argv, "c:vqd:s:l:h:",
+ arg = getopt_long(argc, argv, "c:vqt:d:s:l:h:r:",
long_options, &option_index);
if (arg == -1) break;
switch (arg) {
case 'c':
- max_test = atol(optarg);
+ max_test = strtoul(optarg, NULL, 0);
break;
case 'v':
verbose = 1;
@@ -174,6 +180,9 @@ int main(int argc, char **argv)
case 'q':
verbose = 0;
break;
+ case 't':
+ table_size = strtoul(optarg, NULL, 0);
+ break;
case 'd':
directory_bits = atoi(optarg);
break;
@@ -181,10 +190,13 @@ int main(int argc, char **argv)
segment_bits = atoi(optarg);
break;
case 'l':
- min_load_factor = atol(optarg);
+ min_load_factor = strtoul(optarg, NULL, 0);
break;
case 'h':
- max_load_factor = atol(optarg);
+ max_load_factor = strtoul(optarg, NULL, 0);
+ break;
+ case 'r':
+ seed = strtoul(optarg, NULL, 0);
break;
}
}
@@ -204,10 +216,11 @@ int main(int argc, char **argv)
/* Initialize the random number generator */
- srandom(time(0));
+ srandom(seed);
+ printf("random seed: %#x\n", seed);
/* Create the hash table as small as possible to exercise growth */
- if ((status = hash_create_ex(1, &table,
+ if ((status = hash_create_ex(table_size, &table,
directory_bits, segment_bits,
min_load_factor, max_load_factor,
NULL, NULL, NULL,
@@ -218,7 +231,19 @@ int main(int argc, char **argv)
/* Initialize the array of test values */
for (i = 0; i < max_test; i++) {
- test[i].val = random();
+ /* Get random value, make sure it's unique */
+ duplicate = true;
+ while (duplicate) {
+ duplicate = false;
+ val = random();
+ for (j = 0; j < i; j++) {
+ if (test[j].val == val) {
+ duplicate = true;
+ break;
+ }
+ }
+ }
+ test[i].val = val;
/* If the value is odd we'll use a string as the key,
* otherwise we'll use an unsigned long as the key */
if (test[i].val & 1) {
@@ -228,6 +253,8 @@ int main(int argc, char **argv)
}
}
+ printf("Completed building test values, beginning test ...\n");
+
/* Enter all the test values into the hash table */
for (i = 0; i < max_test; i++) {
if (test[i].val & 1) {