HashTable的一个详细实现

来源:互联网 发布:淘宝怎么删百度快照 编辑:程序博客网 时间:2024/05/12 12:24

注:修改完善于一个粗糙版本http://blog.csdn.net/aishen944/article/details/1483516

1,修改了原文代码中的错误

2,主要解决了在扩容时hash效率较差的问题


 #ifndef _HASHTABLE_H #define _HASHTABLE_H  struct hashtable;  struct hashtable*  hashtable_create(unsigned long size, unsigned long(*hash_func)(const void *key),                                     int(*test_func)(const void *key1, const void *key2));  struct hashtable*  make_string_hashtable(unsigned long size);  int hashtable_put(struct hashtable *ht, const void *key, void *value);  void* hashtable_get(struct hashtable *ht, const void *key);  int hashtable_remove(struct hashtable *ht, const void *key);  int hashtable_contains(struct hashtable *ht, const void *key);  int hashtable_set(struct hashtable *ht, const void *key, void *newvalue);  unsigned long hashtable_count(struct hashtable *ht);  void hashtable_map(struct hashtable *ht, int(*mapfunc)(void*, void*, void*), void*);  void hashtable_clear(struct hashtable *ht);   void hashtable_close(struct hashtable *ht);  #endif

#include <string.h>#include <stdlib.h>#include <limits.h>#include <iostream>using namespace std;// #include "hashtable.h"/* Hashtable MAX fullness, you can amend it, but it may best, i think. */#define HASH_MAX_FULLNESS 0.75#define HASH_RESIZE_FACTOR 2#define HASH_POSITION(key, hash_func, size) ((hash_func)(key) % size)/* Because linuxget hashtable allow 0/NULL key. So we use -1 to pointempty hash mapping. */#define INVALID_PTR ((void*) ~(unsigned long)0)#define INVALID_PTR_BYTE 0xff#define NON_EMPTY(mapping) ((mapping)->key != INVALID_PTR)typedef unsigned long (*hash_func_t)(const void *key);typedef int (*test_func_t)(const void *key1, const void *key2);struct hash_mapping {void  *key;void  *value;};//可以替换上面的宏inline unsigned long hash_position(const void *key,hash_func_t hash_func,unsigned long size){return hash_func(key)%size;}inline bool non_empty(hash_mapping * mapping){return (mapping->key != INVALID_PTR);}struct hashtable {hash_func_t          hash_func;        /* Hash function pointer. */test_func_t          test_func;        /* Hash key compare function pointer. */struct hash_mapping *mappings;         /* Hashtable data entries. */unsigned long        count;            /* Current hashtable not NULL entry count. */unsigned long        size;             /* Current hashtable size. */int                  prime_offset;     /* The offset for prime size. */unsigned long        resize_threshold; /* Hashtable resize threshold, when size more than this, grow it. */};/************************************************************************ Not link functions.***********************************************************************//* Prime the hashtable size. */static unsigned long prime_size(unsigned long size, int *prime_offset) {static const unsigned long primes[] = {13, 19, 29, 41, 59, 79, 107, 149, 197, 263, 347, 457, 599, 787, 1031,1361, 1777, 2333, 3037, 3967, 5167, 6719, 8737, 11369, 14783,19219, 24989, 32491, 42257, 54941, 71429, 92861, 120721, 156941,204047, 265271, 344857, 448321, 582821, 757693, 985003, 1280519,1664681, 2164111, 2813353, 3657361, 4754591, 6180989, 8035301,10445899, 13579681, 17653589, 22949669, 29834603, 38784989,50420551, 65546729, 85210757, 110774011, 144006217, 187208107,243370577, 316381771, 411296309, 534685237, 695090819, 903618083,1174703521, 1527114613, 1837299131, 2147483647};int i=0;for(i = *prime_offset; i < sizeof(primes) / sizeof(unsigned long); ++i) {if(primes[i] >= size) {*prime_offset = i + 1;return primes[i];}}abort();    /* Hash table range out. */}/* Hash function. If not give customer hash function, use it.This implementation is the Robert Jenkins' 32 bit Mix Function,with a simple adaptation for 64-bit values.*/static unsigned long hash_pointer(const void *key) {unsigned long hashval = (unsigned long)key;hashval += (hashval << 12);hashval ^= (hashval >> 22);hashval += (hashval << 4);hashval ^= (hashval >> 9);hashval += (hashval << 10);hashval ^= (hashval >> 2);hashval += (hashval << 7);hashval ^= (hashval >> 12);#if ULONG_MAX > 4294967295hashval += (hashval << 44);hashval ^= (hashval >> 54);hashval += (hashval << 36);hashval ^= (hashval >> 41);hashval += (hashval << 42);hashval ^= (hashval >> 34);hashval += (hashval << 39);hashval ^= (hashval >> 44);#endifreturn hashval;}/* Hash key compare function. If not give customer compare function,use it. */static int cmp_pointer(const void *key1, const void *key2) {return key1 == key2;}/* Hash function. Only use in string hash table. This is a31 bit hash function.  Taken from Gnome's glib,modified to use standard C types.*/static unsigned long hash_string(const void *key) {const char *p = (const char *)key;unsigned int h = *p;if (h)for (p += 1; *p != '\0'; p++)h = (h << 5) - h + *p;return h;//return 5;}/* Hash key compare function. Only use in string hash table. */static int string_cmp_pointer(const void *key1, const void *key2) {return !strcmp((const char *)key1, (const char *)key2);}/*************************************************************************** Hash table public functions.**************************************************************************/struct hashtable*  hashtable_create(unsigned long size, hash_func_t hash_func,test_func_t test_func) {struct hashtable *ht=(struct hashtable *)malloc(sizeof(struct hashtable));ht->prime_offset = 0;unsigned long hsize = prime_size(size + 1, &ht->prime_offset);ht->mappings = (struct hash_mapping *) malloc(hsize * sizeof(struct hash_mapping));memset(ht->mappings, INVALID_PTR_BYTE, hsize * sizeof(struct hash_mapping));ht->hash_func = hash_func ? hash_func : hash_pointer;ht->test_func = test_func ? test_func : cmp_pointer;ht->count = 0;ht->size = hsize;ht->resize_threshold = hsize * HASH_MAX_FULLNESS;return ht;}struct hashtable*  make_string_hashtable(unsigned long size) {return hashtable_create(size, hash_string, string_cmp_pointer);}/* Hash table find mapping function, it is a linchpin in hash table. */static struct hash_mapping* find_mapping(struct hashtable *ht, const void *key) {struct hash_mapping *mapping = ht->mappings +hash_position(key, ht->hash_func, ht->size);if(non_empty(mapping) && !ht->test_func(mapping->key, key)) {//重新hash直到找到一个新位置for(unsigned int i = 1; i < ht->size; ++i) {mapping = ht->mappings + ((ht->hash_func)(key)+i)%ht->size;if(!non_empty(mapping))return mapping;if(non_empty(mapping) && ht->test_func(mapping->key, key)) {return mapping;//键值相等的情况}}}return mapping;}//增长表非常耗时,不但需要重新分配空间,而且需要把原来的hash数据重新hash放入新hash表static int grow_hashtable(struct hashtable *ht) {if(!ht)return 0;unsigned long newsize = prime_size(ht->size * HASH_RESIZE_FACTOR, &ht->prime_offset);//重新分配hash空间struct hash_mapping *phm = (struct hash_mapping *)malloc(newsize*sizeof(struct hash_mapping));memset(phm, INVALID_PTR_BYTE, newsize*sizeof(struct hash_mapping));struct hash_mapping *mp,*mapping;//把原hash表中的所有值都重新进行hash放入新的hash表中for (unsigned int i = 0; i < ht->size; ++i){mp = ht->mappings + i;if(non_empty(mp)){mapping = phm + hash_position(mp->key, ht->hash_func, newsize);if (non_empty(mapping)){for (unsigned int i = 1; i < ht->size; ++i)//开放寻址中冲突解决方法为线性探查{mapping = phm + ((ht->hash_func)(mp->key)+i)%newsize;if (!non_empty(mapping))break;}}mapping->key = mp->key;mapping->value = mp->value;}}ht->mappings = phm;ht->size = newsize;ht->resize_threshold = newsize * HASH_MAX_FULLNESS;return 1;}//采用线性探查的方法解决冲突问题int hashtable_put(struct hashtable *ht, const void *key, void *value) {if(ht->count >= ht->resize_threshold)grow_hashtable(ht);struct hash_mapping *mapping = find_mapping(ht, key);if (non_empty(mapping)){//key值已经存在于hash表中return 0;}mapping->key = (void*)key;mapping->value = value;ht->count += 1;return 1;}void* hashtable_get(struct hashtable *ht, const void *key) {struct hash_mapping *mapping = find_mapping(ht, key);return NON_EMPTY(mapping) ? mapping->value : NULL;}int hashtable_remove(struct hashtable *ht, const void *key) {struct hash_mapping *mapping = find_mapping(ht, key);if(!NON_EMPTY(mapping))  /* Not found. */return 0;/* Remove item. */memset(mapping, INVALID_PTR_BYTE, sizeof(struct hash_mapping));ht->count -= 1;return 1;}int hashtable_contains(struct hashtable *ht, const void *key) {return NON_EMPTY(find_mapping(ht, key));}int hashtable_set(struct hashtable *ht, const void *key, void *newvalue) {struct hash_mapping *mapping = find_mapping(ht, key);if(!NON_EMPTY(mapping)) /* Not exist. */return 0;/* Update the item. */mapping->value = newvalue;return 1;}unsigned long hashtable_count(struct hashtable *ht) {return ht->count;}void hashtable_map(struct hashtable *ht,   int(*mapfunc)(void*, void*, void*), void* maparg) {unsigned i = 0, hsize = ht->size;struct hash_mapping *mp;for(; i < hsize; ++i) {   mp = ht->mappings + i;   if(non_empty(mp) && !mapfunc(mp->key, mp->value, maparg))   return;}}void hashtable_clear(struct hashtable *ht) {memset(ht->mappings, INVALID_PTR_BYTE, ht->size * sizeof(struct hash_mapping));ht->count = 0;}void hashtable_close(struct hashtable *ht) {free(ht->mappings);free(ht);}

测试代码

int main(){struct hashtable* ht = make_string_hashtable(10);//测试扩容的情况//char *strKey[] = {"123","234","345","456","df","sdf","wer","sdf","xd","dsf","sxdf","dfdf","sdf"};//char *strValue[] = {"123","234","345","456","df","sdf","wer","sdf","xd","dsf","sxdf","dfdf","sdf"};//相同键值不同的情况,第二个键值不会被插入//char *strKey[] = {"123","123","345","456","df","sdf","wer","sdf","xd","dsf","sxdf","dfdf","sdf"};//char *strValue[] = {"xxx","yyy","345","456","df","sdf","wer","sdf","xd","dsf","sxdf","dfdf","sdf"};//修改一下hash函数,使得每一次都返回相同的值,测试碰撞问题//char *strKey[] = {"123","234","345"};//char *strValue[] = {"xxx","yyy","zzz"};//修改一下hash函数,使得每一次都返回相同的值,测试碰撞问题和扩容问题char *strKey[] = {"123","123","345","456","df","sdf","wer","sdf","xd","dsf","sxdf","dfdf","sdf"};char *strValue[] = {"xxx","yyy","345","456","df","sdf","wer","sdf","xd","dsf","sxdf","dfdf","sdf"};int i;for (i = 0; i < 13; ++i)hashtable_put(ht,strKey[i],strValue[i]);for(i = 0; i < 13; ++i)if(hashtable_get(ht,strKey[i]))cout<<(char *)hashtable_get(ht,strKey[i])<<endl;if (!hashtable_get(ht,"346"))cout<<"not in"<<endl;}