summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--dhash/dhash.c133
-rw-r--r--dhash/dhash.h79
-rw-r--r--dhash/examples/dhash_test.c47
3 files changed, 182 insertions, 77 deletions
diff --git a/dhash/dhash.c b/dhash/dhash.c
index f92e463..79f9c38 100644
--- a/dhash/dhash.c
+++ b/dhash/dhash.c
@@ -53,10 +53,13 @@
#define PRIME_1 37
#define PRIME_2 1048583
- /*
- * Fast arithmetic, relying on powers of 2, and on pre-processor
- * concatenation property
- */
+#ifndef MIN
+ #define MIN(a,b) (((a) < (b)) ? (a) : (b))
+#endif
+
+#ifndef MAX
+ #define MAX(a,b) (((a) > (b)) ? (a) : (b))
+#endif
#define halloc(table, size) table->halloc(size, table->halloc_pvt)
#define hfree(table, ptr) table->hfree(ptr, table->halloc_pvt)
@@ -247,7 +250,7 @@ static int expand_table(hash_table_t *table)
if (table->bucket_count < (table->directory_size << table->segment_size_shift)) {
#ifdef DEBUG
- if (debug_level >= 1)
+ if (debug_level >= 2)
fprintf(stderr, "expand_table on entry: bucket_count=%lu, segment_count=%lu p=%lu maxp=%lu\n",
table->bucket_count, table->segment_count, table->p, table->maxp);
#endif
@@ -314,7 +317,7 @@ static int expand_table(hash_table_t *table)
}
}
#ifdef DEBUG
- if (debug_level >= 1)
+ if (debug_level >= 2)
fprintf(stderr, "expand_table on exit: bucket_count=%lu, segment_count=%lu p=%lu maxp=%lu\n",
table->bucket_count, table->segment_count, table->p, table->maxp);
#endif
@@ -330,9 +333,9 @@ static int contract_table(hash_table_t *table)
segment_t *old_segment, *new_segment;
element_t *current;
- if (table->bucket_count > table->segment_size) {
+ if ((table->bucket_count > table->segment_size) && (table->segment_count > 1)) {
#ifdef DEBUG
- if (debug_level >= 1)
+ if (debug_level >= 2)
fprintf(stderr, "contract_table on entry: bucket_count=%lu, segment_count=%lu p=%lu maxp=%lu\n",
table->bucket_count, table->segment_count, table->p, table->maxp);
#endif
@@ -395,7 +398,7 @@ static int contract_table(hash_table_t *table)
}
#ifdef DEBUG
- if (debug_level >= 1)
+ if (debug_level >= 2)
fprintf(stderr, "contract_table on exit: bucket_count=%lu, segment_count=%lu p=%lu maxp=%lu\n",
table->bucket_count, table->segment_count, table->p, table->maxp);
#endif
@@ -428,7 +431,7 @@ static int lookup(hash_table_t *table, hash_key_t *key, element_t **element_arg,
current_segment = table->directory[segment_dir];
#ifdef DEBUG
- if (debug_level >= 2)
+ if (debug_level >= 3)
fprintf(stderr, "lookup: h=%lu, segment_dir=%lu, segment_index=%lu current_segment=%p\n",
h, segment_dir, segment_index, current_segment);
#endif
@@ -505,19 +508,47 @@ int hash_create_ex(unsigned long count, hash_table_t **tbl,
hash_free_func *free_func,
void *alloc_private_data,
hash_delete_callback *delete_callback,
- void *delete_private_data)
-{
+ void *delete_private_data) {
unsigned long i;
- unsigned int n_addr_bits;
+ unsigned int n_addr_bits, requested_bits;
+ unsigned int requested_directory_bits;
+ unsigned int requested_segment_bits;
address_t addr;
hash_table_t *table = NULL;
+ /* Initialize to NULL in case of an early return due to an error */
+ *tbl = NULL;
+
if (alloc_func == NULL) alloc_func = sys_malloc_wrapper;
if (free_func == NULL) free_func = sys_free_wrapper;
/* Compute directory and segment parameters */
- if (directory_bits == 0) directory_bits = HASH_DEFAULT_DIRECTORY_BITS;
- if (segment_bits == 0) segment_bits = HASH_DEFAULT_SEGMENT_BITS;
+
+ /* compute power of 2 >= count; it's the number of requested buckets */
+ if (count > 1) {
+ for (requested_bits = 0; (1 << requested_bits) < count; requested_bits++);
+ } else {
+ requested_bits = 1;
+ }
+
+ /*
+ * If caller didn't specify directory & segment allocation then
+ * compute it from the requested count.
+ */
+ if (directory_bits == 0 || segment_bits == 0) {
+ /* Equally divide buckets between the directory and segments */
+ requested_directory_bits = MAX(requested_bits >> 1, 1);
+ requested_segment_bits = MAX(requested_bits - requested_directory_bits, 1);
+
+ /* If the requested count wasn't specified pick a default */
+ if (count == 0) {
+ directory_bits = MAX(requested_directory_bits, HASH_DEFAULT_DIRECTORY_BITS);
+ segment_bits = MAX(requested_segment_bits, HASH_DEFAULT_SEGMENT_BITS);
+ } else {
+ directory_bits = requested_directory_bits;
+ segment_bits = requested_segment_bits;
+ }
+ }
for (addr = ~0, n_addr_bits = 0; addr; addr >>= 1, n_addr_bits++);
@@ -534,45 +565,46 @@ int hash_create_ex(unsigned long count, hash_table_t **tbl,
table->halloc_pvt = alloc_private_data;
table->directory_size_shift = directory_bits;
- for (i = 0, table->directory_size = 1; i < table->directory_size_shift; i++, table->directory_size <<= 1);
+ table->directory_size = directory_bits ? 1 << directory_bits : 0;
table->segment_size_shift = segment_bits;
- for (i = 0, table->segment_size = 1; i < table->segment_size_shift; i++, table->segment_size <<= 1);
-
+ table->segment_size = segment_bits ? 1 << segment_bits : 0;
/* Allocate directory */
table->directory = (segment_t **)halloc(table, table->directory_size * sizeof(segment_t *));
if (table->directory == NULL) {
+ hash_destroy(table);
return HASH_ERROR_NO_MEMORY;
}
memset(table->directory, 0, table->directory_size * sizeof(segment_t *));
/*
- * Adjust count to be nearest higher power of 2, minimum SEGMENT_SIZE, then
- * convert into segments.
+ * If one wanted to pre-allocate all the buckets necessary to meet the needs
+ * of the requested count it would be done like this:
+ *
+ * table->segment_count = MIN((count + table->segment_size-1) / table->segment_size,
+ * table->directory_size);
+ *
+ * But with dynamic hash tables there is little point to this, the whole idea is to
+ * allow the table to grow/shrink dynamically, therefore we start with just one
+ * segment of buckets, the minimum necessary.
*/
- i = table->segment_size;
- while (i < count)
- i <<= 1;
- count = i >> table->segment_size_shift;
-
- table->segment_count = 0;
+ table->segment_count = 1;
table->p = 0;
table->entry_count = 0;
table->delete_callback = delete_callback;
table->delete_pvt = delete_private_data;
/*
- * Allocate initial 'i' segments of buckets
+ * Allocate initial segments of buckets
*/
- for (i = 0; i < count; i++) {
+ for (i = 0; i < table->segment_count; i++) {
table->directory[i] = (segment_t *)halloc(table, table->segment_size * sizeof(segment_t));
if (table->directory[i] == NULL) {
hash_destroy(table);
return HASH_ERROR_NO_MEMORY;
}
memset(table->directory[i], 0, table->segment_size * sizeof(segment_t));
- table->segment_count++;
}
table->bucket_count = table->segment_count << table->segment_size_shift;
table->maxp = table->bucket_count;
@@ -580,9 +612,16 @@ int hash_create_ex(unsigned long count, hash_table_t **tbl,
table->max_load_factor = max_load_factor == 0 ? HASH_DEFAULT_MAX_LOAD_FACTOR : max_load_factor;
#if DEBUG
- if (debug_level >= 1)
- fprintf(stderr, "hash_create_ex: table=%p count=%lu maxp=%lu segment_count=%lu\n",
- table, count, table->maxp, table->segment_count);
+ if (debug_level >= 1) {
+ fprintf(stderr, "hash_create_ex: count=%lu available buckets=%lu bucket_count=%lu maxp=%lu\n",
+ count, table->directory_size*table->segment_size, table->bucket_count, table->maxp);
+ fprintf(stderr, " directory_bits=%u segment_bits=%u requested_bits=%u\n",
+ directory_bits, segment_bits, requested_bits);
+ fprintf(stderr, " directory_size=%lu segment_size=%lu segment_count=%lu\n",
+ table->directory_size, table->segment_size, table->segment_count);
+ fprintf(stderr, " min_load_factor=%lu max_load_factor=%lu\n",
+ table->min_load_factor, table->max_load_factor);
+ }
#endif
#ifdef HASH_STATISTICS
memset(&table->statistics, 0, sizeof(table->statistics));
@@ -613,25 +652,27 @@ int hash_destroy(hash_table_t *table)
if (!table) return HASH_ERROR_BAD_TABLE;
if (table != NULL) {
- for (i = 0; i < table->segment_count; i++) {
- /* test probably unnecessary */
- if ((s = table->directory[i]) != NULL) {
- for (j = 0; j < table->segment_size; j++) {
- p = s[j];
- while (p != NULL) {
- q = p->next;
- hdelete_callback(table, HASH_TABLE_DESTROY, &p->entry);
- if (p->entry.key.type == HASH_KEY_STRING) {
- hfree(table, (char *)p->entry.key.str);
+ if (table->directory) {
+ for (i = 0; i < table->segment_count; i++) {
+ /* test probably unnecessary */
+ if ((s = table->directory[i]) != NULL) {
+ for (j = 0; j < table->segment_size; j++) {
+ p = s[j];
+ while (p != NULL) {
+ q = p->next;
+ hdelete_callback(table, HASH_TABLE_DESTROY, &p->entry);
+ if (p->entry.key.type == HASH_KEY_STRING) {
+ hfree(table, (char *)p->entry.key.str);
+ }
+ hfree(table, (char *)p);
+ p = q;
}
- hfree(table, (char *)p);
- p = q;
}
+ hfree(table, s);
}
- hfree(table, s);
}
+ hfree(table, table->directory);
}
- hfree(table, table->directory);
hfree(table, table);
table = NULL;
}
diff --git a/dhash/dhash.h b/dhash/dhash.h
index a10ed24..baa0d6a 100644
--- a/dhash/dhash.h
+++ b/dhash/dhash.h
@@ -203,13 +203,40 @@ const char* hash_error_string(int error);
* Create a new hash table with room for n initial entries. hash_create returns
* an opaque pointer to the new hash table in the table parameter. Functions
* operating on the hash table pass in this hash table pointer. This means you
- * may have as many concurrent hash tables as you want. The delete_callback
- * parameter is a pointer to a function which will be called just prior to a
- * hash entry being deleted. This is useful when the hash value has items which
- * may need to be disposed of. The delete_callback may be NULL.
- * The delete_private_data is data passed to the delete_callback, this way
- * custom callbacks can have a private context to reach data they need to use
- * before performning their operations. delete_private_data may be NULL.
+ * may have as many concurrent hash tables as you want.
+
+ * If the table creation is successful tbl is set to the new table and
+ * HASH_SUCCESS is returned, otherwise tbl will be set to NULL and the
+ * return value will be a HASH error code.
+ *
+ * count
+ * Expected number of entries in the table. This is a tuning
+ * parameter because a dynamic hash table dynamically adjusts it's
+ * internal data strucuture to optimize for the actual number of
+ * entries. The count parameter allows for some more optimization,
+ * however it's not critical to get it right, the table will still
+ * perform well. If you have no idea of how many entries to expect
+ * then pass 0, a reasonable default will be selected.
+ * tbl
+ * Pointer to a hash_table_t variable which is initialized to the
+ * new hash table (i.e. the returned table). If the table fails to
+ * be created due to errors the tbl parameter will be set to NULL
+ * and the return value will be a HASH error code.
+ * delete_callback
+ * Pointer to a function which will be called just prior to a hash
+ * entry being deleted. This is useful when the hash value has
+ * items which may need to be disposed of. The delete_callback may
+ * be NULL.
+ * delete_private_data
+ * Void pointer passed to the delete_callback, this way delete
+ * callbacks can have a private context to reach data they need to
+ * use before performning their operations. delete_private_data
+ * may be NULL.
+ *
+ * hash_create is identical to calling:
+ *
+ * hash_create_ex(count, tbl, 0, 0, 0, 0, NULL, NULL, NULL,
+ * delete_callback, delete_private_data);
*/
int hash_create(unsigned long count, hash_table_t **tbl,
hash_delete_callback *delete_callback,
@@ -217,22 +244,32 @@ int hash_create(unsigned long count, hash_table_t **tbl,
/*
* Create a new hash table and fine tune it's configurable parameters.
- * Refer to CACM article for explanation of parameters.
+ * Refer to CACM article for explanation of parameters. See
+ * hash_create for other parameters and return values.
*
- * directory_bits: number of address bits allocated to top level directory array.
- * segment_bits: number of address bits allocated to segment array.
- * min_load_factor: table contracted when the ratio of entry count to bucket count
- * is less than the min_load_factor the table is contracted.
- * max_load_factor: table expanded when the ratio of entry count to bucket count
- * is greater than the max_load_factor the table is expanded.
- * alloc_func: function pointer for allocation
- * free_func: funciton pointer for freeing memory allocated with alloc_func
- * alloc_private data: data passed to the alloc and free functions so that
- * custom functions can refernce other private data they may
- * need during their execution without having to use global
- * variables.
+ * directory_bits
+ * Number of address bits allocated to top level directory array.
+ * If directory_bits or segment_bits is zero then directory_bits
+ * and segment_bits will be computed based on the count parameter.
+ * segment_bits
+ * number of address bits allocated to segment array.
+ * min_load_factor
+ * The table contracted when the ratio of entry count to bucket count
+ * is less than the min_load_factor the table is contracted.
+ * max_load_factor
+ * The table expanded when the ratio of entry count to bucket count
+ * is greater than the max_load_factor the table is expanded.
+ * alloc_func
+ * Function pointer for allocation.
+ * free_func
+ * Function pointer for freeing memory allocated with alloc_func.
+ * alloc_private_data
+ * Data pointer passed to the alloc and free functions so that
+ * custom functions can refernce other private data they may need
+ * during their execution without having to use global variables.
*
- * Note directory_bits + segment_bits must be <= number of bits in unsigned long
+ * Note directory_bits + segment_bits must be <= number of bits in
+ * unsigned long
*/
int hash_create_ex(unsigned long count, hash_table_t **tbl,
unsigned int directory_bits,
diff --git a/dhash/examples/dhash_test.c b/dhash/examples/dhash_test.c
index 6c02de1..8c11227 100644
--- a/dhash/examples/dhash_test.c
+++ b/dhash/examples/dhash_test.c
@@ -132,7 +132,9 @@ typedef struct test_val_t {
int main(int argc, char **argv)
{
test_val_t *test = NULL;
- long i, k;
+ long i, j, k;
+ bool duplicate;
+ long val;
int status;
hash_value_t value;
hash_value_t old_value;
@@ -141,8 +143,10 @@ int main(int argc, char **argv)
char buf[1024];
hash_table_t *table = NULL;
unsigned long callback_count = 0;
- unsigned int directory_bits = HASH_DEFAULT_DIRECTORY_BITS;
- unsigned int segment_bits = HASH_DEFAULT_SEGMENT_BITS;
+ unsigned long table_size = 0;
+ unsigned int seed = time(0);
+ unsigned int directory_bits = 0;
+ unsigned int segment_bits = 0;
unsigned long min_load_factor = HASH_DEFAULT_MIN_LOAD_FACTOR;
unsigned long max_load_factor = HASH_DEFAULT_MAX_LOAD_FACTOR;
@@ -153,20 +157,22 @@ int main(int argc, char **argv)
{"count", 1, 0, 'c'},
{"verbose", 1, 0, 'v'},
{"quiet", 1, 0, 'q'},
+ {"table-size", 1, 0, 't'},
{"directory-bits", 1, 0, 'd'},
{"segment-bits", 1, 0, 's'},
{"min-load-factor", 1, 0, 'l'},
{"max-load-factor", 1, 0, 'h'},
+ {"seed", 1, 0, 'r'},
{0, 0, 0, 0}
};
- arg = getopt_long(argc, argv, "c:vqd:s:l:h:",
+ arg = getopt_long(argc, argv, "c:vqt:d:s:l:h:r:",
long_options, &option_index);
if (arg == -1) break;
switch (arg) {
case 'c':
- max_test = atol(optarg);
+ max_test = strtoul(optarg, NULL, 0);
break;
case 'v':
verbose = 1;
@@ -174,6 +180,9 @@ int main(int argc, char **argv)
case 'q':
verbose = 0;
break;
+ case 't':
+ table_size = strtoul(optarg, NULL, 0);
+ break;
case 'd':
directory_bits = atoi(optarg);
break;
@@ -181,10 +190,13 @@ int main(int argc, char **argv)
segment_bits = atoi(optarg);
break;
case 'l':
- min_load_factor = atol(optarg);
+ min_load_factor = strtoul(optarg, NULL, 0);
break;
case 'h':
- max_load_factor = atol(optarg);
+ max_load_factor = strtoul(optarg, NULL, 0);
+ break;
+ case 'r':
+ seed = strtoul(optarg, NULL, 0);
break;
}
}
@@ -204,10 +216,11 @@ int main(int argc, char **argv)
/* Initialize the random number generator */
- srandom(time(0));
+ srandom(seed);
+ printf("random seed: %#x\n", seed);
/* Create the hash table as small as possible to exercise growth */
- if ((status = hash_create_ex(1, &table,
+ if ((status = hash_create_ex(table_size, &table,
directory_bits, segment_bits,
min_load_factor, max_load_factor,
NULL, NULL, NULL,
@@ -218,7 +231,19 @@ int main(int argc, char **argv)
/* Initialize the array of test values */
for (i = 0; i < max_test; i++) {
- test[i].val = random();
+ /* Get random value, make sure it's unique */
+ duplicate = true;
+ while (duplicate) {
+ duplicate = false;
+ val = random();
+ for (j = 0; j < i; j++) {
+ if (test[j].val == val) {
+ duplicate = true;
+ break;
+ }
+ }
+ }
+ test[i].val = val;
/* If the value is odd we'll use a string as the key,
* otherwise we'll use an unsigned long as the key */
if (test[i].val & 1) {
@@ -228,6 +253,8 @@ int main(int argc, char **argv)
}
}
+ printf("Completed building test values, beginning test ...\n");
+
/* Enter all the test values into the hash table */
for (i = 0; i < max_test; i++) {
if (test[i].val & 1) {