【STL学习】自己动手C++编程实现hash table(散列表)

来源:互联网 发布:手机广播软件 编辑:程序博客网 时间:2024/06/14 08:49

SGI STL中散列表采用链接法解决冲突。结构中维护了一个vector,vector中每一个元素称为一个桶(bucket),它包含的是一个链表的第一个节点。


下面代码展示了自己编程实现的hash table,C++模板类封装。


如有错误,还请包涵和指正(E-Mail:xiajunhust@gmail.com)!


HashTable.h:

//《STL源码剖析》5.7 hashtable编程实现//Author:江南烟雨//E-Mail:xiajunhust@gmail.com#include <iostream>#include <algorithm>#include <vector>//hash table中链表节点数据结构定义template <class ValueType>struct __hashtable_node{__hashtable_node *next;ValueType val;};static const int __stl_num_primes = 28;//表格大小(28个质数)//28个质数static const unsigned long __stl_prime_list[__stl_num_primes] = {53,         97,           193,         389,       769,1543,       3079,         6151,        12289,     24593,49157,      98317,        196613,      393241,    786433,1572869,    3145739,      6291469,     12582917,  25165843,50331653,   100663319,    201326611,   402653189, 805306457, 1610612741, 3221225473ul, 4294967291ul};//得出28个质数中不小于n的那个质数inline unsigned long __get_next_prime(unsigned long n){const unsigned long *first = __stl_prime_list;const unsigned long *last = __stl_prime_list + __stl_num_primes;const unsigned long *pos = std::lower_bound(first,last,n);return pos == last ? *(last - 1) : *pos;}template <class T1,class T2>void construct(T1 *p,const T2 &value){new (p) T1(value);}template <class T>void destroy(T* pointer){pointer->~T();//调用析构函数}//hash函数定义//都是仿函数template <class KeyType>struct hash{};//字符串要进行映射inline size_t __stl_hash_string(const char *s){unsigned long h = 0;for (;*s;++s){h = 5 * h + *s;}return size_t(h);}//类模板显示特化定义template <>struct hash<int>{size_t operator()(int x) const {return x;}};template <>struct hash<char *>{size_t operator()(const char *s) const {return __stl_hash_string(s);}};template <>struct hash<const char *>{size_t operator()(const char *s) const {return __stl_hash_string(s);}};template <>struct hash<char>{size_t operator()(char s) const {return s;}};template <>struct hash<unsigned char>{size_t operator()(unsigned char s) const {return s;}};template <>struct hash<signed char>{size_t operator()(signed char s) const {return s;}};// C++ Standard 规定,每一个 Adaptable Unary Function 都必须继承此类别template <class Arg,class Result>struct unary_function{typedef Arg argument_type;typedef Result result_type;};// C++ Standard 规定,每一个 Adaptable Binary Function 都必须继承此类别template <class Arg1,class Arg2,class Result>struct binary_function{typedef Arg1 first_argument_type;typedef Arg2 second_argument_type;typedef Result result_type;};//从节点中取出键值的仿函数定义//identity function;任何数值通过此函数式后,不会发生任何改变template <class T>struct identity:public unary_function<T,T>{const T& operator()(const T& x) const{return x;}};//判断键值是否相等的仿函数定义template <class T>struct equal_to:public binary_function<T,T,bool>{bool operator()(const T& x,const T& y) const{return x == y;}};//比较字符串是否相等的仿函数struct eqstr{bool operator()(const char *s1,const char *s2)const{return strcmp(s1,s2) == 0;}};//hash table数据结构定义//模板参数://ValueType:节点的实值型别//KeyType:节点的键值型别//HashFcn:hash function的函数型别//ExtractKey:从节点中取出键值的方法//EqualKey:判断键值是否相同template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>class HashTableClass{public:typedef struct __hashtable_node<ValueType> node;//hash table内部链表节点定义//hash table没有提供默认构造函数HashTableClass(size_t n,const HashFcn &hf,const EqualKey &eql,const ExtractKey &ext):hasher(hf),equals(eql),get_key(ext),num_elements(0){initialize_buckets(n);}HashTableClass(size_t n,const HashFcn &hf,const EqualKey &eql):hasher(hf),equals(eql),get_key(ExtractKey()),num_elements(0){initialize_buckets(n);}HashTableClass(const HashTableClass &ht):hasher(ht.hasher),equals(ht.equals),get_key(ht.get_key),num_elements(0){copy_from(&ht);}~HashTableClass(){clear();};//赋值操作符HashTableClass& operator= (const HashTableClass &ht){if (&ht != this){clear();hasher = ht.hasher;equals = ht.equals;get_key = ht.get_key;copy_from(&ht);}}//返回元素个数size_t size(){return num_elements;}//最大元素数目size_t max_size(){return size_t(-1);}//返回bucket vector大小size_t bucket_count(){return buckets.size();}//返回bucket vector可能的最大值size_t max_bucket_count(){return __stl_prime_list[__stl_num_primes - 1];}//插入元素,不允许重复std::pair<node *,bool> insert_unique(const ValueType &obj);//插入元素,允许重复node* insert_equal(const ValueType &obj);//打印所有节点void printAllNodes();//遍历所有bucketsvoid printAllBuckets();//查找某一键值的节点std::pair<node*,bool> find(const KeyType &key);//判断某一值出现的次数size_t count(const KeyType &key);//整体删除void clear();//复制hash表void copy_from(const HashTableClass *ht);private:std::vector<node*> buckets;//由桶组成的vectorsize_t num_elements;//总的元素个数HashFcn hasher;ExtractKey get_key;EqualKey equals;//节点配置和释放函数node* new_node(const ValueType &obj){node *tempNode = new node;tempNode->next = NULL;try{construct(&tempNode->val,obj);}catch (...){delete tempNode;throw;return NULL;}return tempNode;}void delete_node(node *n){destroy(&n->val);delete n;}//初始化buckets vectorvoid initialize_buckets(size_t n);//返回最接近n并大于等于n的质数size_t next_size(size_t n)const{return __get_next_prime(n);}//判断是否需要扩充buckets vector,如有需要则进行扩充void resize(size_t num_elements_hint);//判断元素落在哪个bucket//提供两个版本//版本一:只接受实值size_t bkt_num(const ValueType &obj) const{return bkt_num_key(get_key(obj));}//版本二:接受实值和buckets个数size_t bkt_num(const ValueType &obj,size_t n) const {return bkt_num_key(get_key(obj),n);}//版本一:只接受键值size_t bkt_num_key(const KeyType &Key) const{return hasher(Key) % (buckets.size());}//版本二:接受键值和buckets个数size_t bkt_num_key(const KeyType &Key,size_t n) const {return hasher(Key) % n;}//在不需要重新分配bucket vector的情况下插入元素,元素不允许重复std::pair<node *,bool> insert_unique_noresize(const ValueType &obj);//在不需要重新分配bucket vector的情况下插入元素,元素不允许重复node* insert_equal_noresize(const ValueType &obj);};template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>std::pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *,bool> HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::insert_unique(const ValueType &obj){resize(num_elements + 1);return insert_unique_noresize(obj);}template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::insert_equal(const ValueType &obj){resize(num_elements + 1);return insert_equal_noresize(obj);}template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>void HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::printAllNodes(){cout << endl;cout << "Current node in hash table : " << endl;for (size_t i = 0;i < buckets.size();++i){typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node* curNode = buckets[i];while(curNode){cout << curNode->val << " ";curNode = curNode->next;}}cout << endl;}template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>void HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::printAllBuckets(){cout << endl;cout << "Current buckets in hash table : " << endl;for (size_t i = 0;i < buckets.size();++i){typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node* curNode = buckets[i];if(NULL == curNode)cout << "buckets[" << i << "] is empty!" << endl;else{size_t count = 0;while(curNode){++count;curNode = curNode->next;}cout << "buckets[" << i << "] has " << count << " elements : ";curNode = buckets[i];while(curNode){cout << curNode->val << " ";curNode = curNode->next;}cout << endl;}}}template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>std::pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *,bool> HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::find(const KeyType &key){size_t bucket_index = bkt_num_key(key);node*first = buckets[bucket_index];while(first){if (equals(key,get_key(first->val))){cout << "find the element " << key << " success" << endl;return std::pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *,bool>(first,true);}first = first->next;}cout << "cannot find the element " << key << endl;return std::pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *,bool>(NULL,false);}template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>size_t HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::count(const KeyType &key){size_t bucket_index = bkt_num_key(key);node*first = buckets[bucket_index];size_t num = 0;while(first){if (equals(key,get_key(first->val))){++num;}first = first->next;}cout << "The element " << key << " appears " << num << " times" << endl;return num;}template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>void HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::clear(){for (size_t i = 0;i < buckets.size();++i){node* first = buckets[i];//删除bucket list中的每个节点while(first){node *next = first->next;delete_node(first);first = next;}buckets[i] = 0;}//总元素个数置0num_elements = 0;//vector并未释放掉空间(自动回收)}template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>void HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::copy_from(const HashTableClass *ht){buckets.clear();//清除已有vector//使得bucket vector空间和对方相同buckets.reserve(ht->buckets.size());//插入n个元素,nullbuckets.insert(buckets.end(),ht->buckets.size(),(typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *)0);for (size_t i = 0;i < ht->buckets.size();++i){if (const node *cur = ht->buckets[i]){node *tempNode = new_node(cur->val);buckets[i] = tempNode;for (node *next = cur->next;next;next = next->next){tempNode->next = new_node(next->val);tempNode = tempNode->next;}}}num_elements = ht->num_elements;}template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>void HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::initialize_buckets(size_t n){const size_t n_buckets = next_size(n);buckets.reserve(n_buckets);buckets.insert(buckets.end(),n_buckets,(node*)0);}template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>void HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::resize(size_t num_elements_hint){//buckets vector重建与否的标准://比较新的总元素个数和原buckets vector大小const size_t old_n_vector = buckets.size();//需要重新配置if (num_elements_hint > old_n_vector){const size_t n = next_size(num_elements_hint);if (n > old_n_vector)//有可能vector已到达最大{vector<node*> tempVec(n,(node *)0);for (size_t bucketIndex = 0;bucketIndex < old_n_vector;++bucketIndex){node *first = buckets[bucketIndex];//指向节点对应之串行的起始节点while(first){//计算节点落在哪一个新的bucket内size_t new_bucket_index = bkt_num_key(first->val,n);buckets[bucketIndex] = first->next;first->next = tempVec[new_bucket_index];tempVec[new_bucket_index] = first;first = buckets[bucketIndex];}}//交换新旧两个bucket vectorbuckets.swap(tempVec);}}}template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>std::pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *,bool> HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::insert_unique_noresize(const ValueType &obj){size_t bucket_index = bkt_num(obj);node *first = buckets[bucket_index];//搜索当前链表for (node *curNode = first;curNode;curNode = curNode->next){if(equals(get_key(obj),get_key(curNode->val)))return pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node*,bool>(curNode,false);}node *tempNode = new_node(obj);tempNode->next = first;buckets[bucket_index] = tempNode;++num_elements;return pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node*,bool>(tempNode,true);}template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node*HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::insert_equal_noresize(const ValueType &obj){size_t bucket_index = bkt_num(obj);node *first = buckets[bucket_index];for (node *curNode = first;curNode;curNode = curNode->next){//发现与链表中的某键值相等,马上插入,然后返回if (equals(get_key(obj),get_key(curNode->val))){node *tempNode = new_node(obj);tempNode->next = curNode->next;curNode->next = tempNode;++num_elements;return tempNode;}}//如果没发现键值相等的元素node *tempNode = new_node(obj);//将其插入链表头部tempNode->next = first;buckets[bucket_index] = tempNode;++num_elements;return tempNode;}



HashTable.cpp:

#include "HashTable.h"using namespace std;int main(){HashTableClass<int,int,hash<int>,identity<int>,equal_to<int> > *hashTableObj = new HashTableClass<int,int,hash<int>,identity<int>,equal_to<int> >(50,hash<int>(),equal_to<int>());cout << "Hash Table size : " << hashTableObj->size() << endl;cout << "Hash Table bucket count : " << hashTableObj->bucket_count() << endl;cout << "Hash Table max bucket count : " << hashTableObj->max_bucket_count() << endl;hashTableObj->insert_unique(59);hashTableObj->insert_unique(63);hashTableObj->insert_unique(108);hashTableObj->insert_unique(2);hashTableObj->insert_unique(53);hashTableObj->insert_unique(55);cout << "Hash Table size : " << hashTableObj->size() << endl;hashTableObj->printAllNodes();hashTableObj->printAllBuckets();//继续插入元素,使总元素个数达到54个for(int i = 0;i <= 47;++i)hashTableObj->insert_equal(i);cout << endl;cout << "Hash Table size : " << hashTableObj->size() << endl;cout << "Hash Table bucket count : " << hashTableObj->bucket_count() << endl;hashTableObj->printAllNodes();hashTableObj->printAllBuckets();hashTableObj->find(2);hashTableObj->count(2);HashTableClass<int,int,hash<int>,identity<int>,equal_to<int> > *hashTableObj2 = new HashTableClass<int,int,hash<int>,identity<int>,equal_to<int> >(20,hash<int>(),equal_to<int>());cout << "Hash Table 2 size : " << hashTableObj2->size() << endl;cout << "Hash Table 2 bucket count : " << hashTableObj2->bucket_count() << endl;cout << "Hash Table 2 max bucket count : " << hashTableObj2->max_bucket_count() << endl;hashTableObj2->copy_from(hashTableObj);cout << "Hash Table 2 size : " << hashTableObj2->size() << endl;cout << "Hash Table 2 bucket count : " << hashTableObj2->bucket_count() << endl;cout << "Hash Table 2 max bucket count : " << hashTableObj2->max_bucket_count() << endl;HashTableClass<int,int,hash<int>,identity<int>,equal_to<int> > hashTableObj3(*hashTableObj2);cout << "Hash Table 3 size : " << hashTableObj3.size() << endl;cout << "Hash Table 3 bucket count : " << hashTableObj3.bucket_count() << endl;cout << "Hash Table 3 max bucket count : " << hashTableObj3.max_bucket_count() << endl;HashTableClass<char *,char *,hash<char *>,identity<char*>,eqstr > *hashTableObjString = new HashTableClass<char *,char *,hash<char *>,identity<char*>,eqstr >(20,hash<char *>(),eqstr());hashTableObjString->insert_unique("jun");hashTableObjString->insert_unique("hust");cout << "Hash Table hashTableObjString size : " << hashTableObjString->size() << endl;cout << "Hash Table hashTableObjString bucket count : " << hashTableObjString->bucket_count() << endl;cout << "Hash Table hashTableObjString max bucket count : " << hashTableObjString->max_bucket_count() << endl;hashTableObjString->printAllNodes();//hashTableObjString->printAllBuckets();hashTableObjString->find("juu");delete hashTableObj;delete hashTableObj2;delete hashTableObjString;return 0;}


运行结果(VS2008+Win7):



原创粉丝点击