搜索结构之哈希

来源：互联网发布：手机环境监控软件编辑：程序博客网时间：2024/06/03 16:38

哈希表是根据哈希函数确定每个关键码在表中的存储位置，处理哈希冲突会有开散列法和闭散列法：

闭散列法：运用顺序表存储，存储效率高，但容易产生堆积，查找不易实现，需要用到二次探测；

开散列法：运用单链表存储方式，不产生堆积现象，但因为附加了指针域增加了空间开销。

二者具体的实现方法如下：

闭散列法：

#include<string>#include<vector>#include<iostream>using namespace std;enum State{EMPTY,//哈希表中该位置没有存放元素EXIST,//哈希表中该位置有元素存在DELETE,//哈希表中该位置元素已被删除（伪删除法）};template<class K, class V>struct HashNode{State _s;//标识该位置元素的状态pair<K, V> _kv;//哈希表中的元素用键值对来表示HashNode():_s(EMPTY){}};template<class K>class _HashFun_{public:size_t operator()(const K& key)//用于处理整形{return key;}};template<>class _HashFun_<string>{public:size_t BKDhash(const char* str){return atoi(str);}size_t operator()(const string& str){return BKDhash(str.c_str());}};//_HashFun_<K>用来返回key值，区别整形与字符串；IsLine区别使用二次探测还是一次探测template<class K, class V,class HashFun=_HashFun_<K>,bool IsLine=true>class Hash{public:typedef Hash<K,V,HashFun,IsLine> Self;public:Hash(size_t size = GetNextPrime(0)):_size(0){_table.resize(size);}bool Insert(const K& key, const V& value){return _Insert(key, value);}bool Remove(const K& key){return _Remove(key);}pair<HashNode<K,V>*, bool>Find(const K& key)//在表中查找值为key的元素{size_t index = HashFunc(key);size_t Addr = index;size_t i = 1;HashNode<K, V>& element = _table[index];while (_table[Addr]._s != EMPTY)//在查找某个值的过程中只要碰到了EMPTY还没找到就已经找不到了{HashNode<K, V>& elem = _table[Addr];if (_table[Addr]._kv.first == key){if (_table[Addr]._s == EXIST){cout << "已找到" << endl;return make_pair(&elem, true);}else//如果该状态为DELETE，就错误{cout << "已被删除" << endl;return make_pair(&elem, false);}}if (IsLine){index = DetectFirst(index);}else{index = DetectSecond(index, i);++i;}if (Addr == _table.size())//循环查找，直到回到刚开始查找的地方{Addr = 0;}if (index == Addr)//已经找了一圈还没找到{cout << "没找到" << endl;return make_pair(&element, false);}}//碰到了空的地方cout << "没找到" << endl;return make_pair(&element, false);}protected:vector<HashNode<K, V>> _table;size_t _size;//当前表中的有效元素//找第一个比num大的素数size_t GetNextPrime(size_t num){// 使用素数表对齐做哈希表的容量，降低哈希冲突const int _PrimeSize = 28;static const unsigned long _PrimeList[_PrimeSize] ={53ul, 97ul, 193ul, 389ul, 769ul,1543ul, 3079ul, 6151ul, 12289ul, 24593ul,49157ul, 98317ul, 196613ul, 393241ul, 786433ul,1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,1610612741ul, 3221225473ul, 4294967291ul};for (size_t idx = 0; idx < _PrimeSize; ++idx){if (_PrimeList[idx]>num){return _PrimeList[idx];}}return _PrimeList[_PrimeSize-1];}size_t DetectFirst(size_t index)//一次探测{index += 1;if (index == _table.size()){index = 0;}return index;}size_t DetectSecond(size_t index,size_t i)//二次探测{//H2=H0+i*i  H3=H0+(i+1)^2 通过这两个式子合并生成如下式子index = index + 2 * i + 1;if (index >= _table.size()){index = 0;}return index;}bool _Remove(const K& key){size_t index = HashFunc(key);size_t Addr = index;while (_table[Addr]._s == EXIST){if (_table[Addr]._kv.first == key){_table[Addr]._s = DELETE;//这里使用伪删除法，只是把状态置为DELETE即可_size--;return true;}else{Addr++;if (Addr == _table.size())//循环查找的方法{Addr = 0;}if (Addr == index)//已找完一圈{cout << "没找到" << endl;return false;}}}if (_table[Addr]._s == DELETE){cout << "该元素已经被删除" << endl;return false;}cout << "没有找到该元素" << endl;return false;}void _CheckCapacity(){/*if (_table.size() == 0){_table.resize(11);}*///哈希表的大小从一开始就给了素数表中的第一个值，所以不可能为空//这里原本负载因子不能超过0.7，但是_size为整数，相除不可能是小数if (10 * _size / _table.size()>7){//当负载因子达到一定数值时候，申请比原来大两倍的空间，//把原来的元素状态为EXIST的搬移过来，搬移的时候是要按照新开辟的空间大小来计算存储地址的/*size_t newSize = _table.size() * 2;*///使用素数表时当需要重新分配大小不需要再*2Self ht(GetNextPrime(_table.size()));for (size_t idx = 0; idx < _table.size(); ++idx){if (_table[idx]._s == EXIST){ht.Insert(_table[idx]._kv.first, _table[idx]._kv.second);}}swap(_size, ht._size);_table.swap(ht._table);//这里的处理方式类似于深拷贝}}bool _Insert(const K& key, const V& value){_CheckCapacity();size_t index = HashFunc(key);size_t i = 1;//i表示当前为第几次探测while (1){if (_table[index]._s != EXIST)//当前位置没有元素{_table[index]._kv.first = key;_table[index]._kv.second = value;_size += 1;_table[index]._s = EXIST;return true;}//如果当前位置为DEL，也可直接插入key，只有当状态为exist时才要采用闭散列法的一次探测法去逐个探测插入else{/*index += 1;if (index == _table.size()){index = 0;}*///这里不存在元素存满哈希表的情况，因为负载因子的存在导致元素插入到一定数量时就会重新开辟空间if (IsLine){index = DetectFirst(index);}else{index = DetectSecond(index, i);++i;}}}}//这里的哈希函数采用的是“除留余数”法，用当前key值除以哈希表的大小，除此之外还有：//直接定址法，平方取中法，折叠法，随机数法，数学分析法size_t HashFunc(const K& key){HashFun f;return (f(key)) % _table.size();//或者 return (HashFun()(key))%_table.size();//注意这里要用无名对象通过函数调用符来当作key值}};void TestHash2(){//int array[] = { 11, 68, 25, 37, 14, 36, 49, 57 };//size_t size = sizeof(array) / sizeof(array[0]);//Hash<int,int,_HashFun_<int>,true> hash(11);//for (size_t idx = 0; idx < size; ++idx)//{//hash.Insert(array[idx], idx);//}//hash.Insert(24, 8);//hash.Insert(27, 9);//hash.Insert(26, 10);////hash.Remove(14);//hash.Remove(14);////cout << hash.Find(68).second << " " << (*(hash.Find(68).first))._kv.first << endl;Hash<string, int, _HashFun_<string>, false> hash(11);hash.Insert("他", 1);hash.Insert("2", 2);hash.Insert("10", 10);hash.Insert("20", 20);hash.Insert("6", 6);hash.Remove("1");cout << hash.Find("1").second << " " << (*(hash.Find("1").first))._kv.first << endl;//hash.Remove(14);/*hash.Remove(14);cout << hash.Find(68).second << " " << (*(hash.Find(68).first))._kv.first << endl;*/}

开散列法，其中还加入了迭代器的实现：

//哈希的开散列法——哈希桶的实现//通过散列函数计算出来的具有相同地址的关键码位于同一个集合中，在同一子集中的关键码称为同义词//每一个子集合称为一个桶，每个桶中的同义词之间用单链表连接，桶号为1的同义词子表的表头节点就是第一个元素#include<iostream>#include<string>#include<vector>using namespace std;template<class K, class V>struct HashBucketNode{HashBucketNode<K, V>* _pnext;//哈希桶中的每个节点包含指向下一个同义词的指针和自己的键值对pair<K, V> _kv;HashBucketNode(const K& key, const V& value): _pnext(NULL), _kv(pair<K, V>(key, value)){}};template<class K>class _HashFun_{public:size_t operator()(const K& key){return key;}};template<>class _HashFun_<string>{public:size_t BKDHash(const char* str){return atoi(str);}size_t operator()(string& str){return BKDHash(str.c_str());}};template<class K, class V, class HashFun>class HashBucket;//前置声明template<class K, class V, class Ref, class Ptr, class HashFun = _HashFun_<K>>class HashIterator{public:friend class HashBucket<K, V, HashFun > ;typedef HashBucketNode<K, V> Node;typedef HashIterator<K, V, pair<K,V>&, pair<K,V>*, HashFun> Iterator;HashIterator():_pNode(NULL), _ht(){}HashIterator(Node* pNode, HashBucket<K, V, HashFun>* ht):_pNode(pNode), _ht(ht){}HashIterator(const Iterator& it):_pNode(it._pNode), _ht(it._ht){}Ref operator*(){return _pNode->_kv;}Ptr operator->(){return (&(operator*()));}bool operator==(const Iterator& it){return _pNode == it._pNode;//注意这里，不是(*this == it)}bool operator!=(const Iterator& it){return (!(*this == it));}Iterator& operator++()//前置++{Next();return *this;}Iterator operator++(int)//后置++{Iterator temp(*this);Next();return temp;}private:Node* _pNode;//指向的当前节点HashBucket<K, V, HashFun>* _ht;void Next(){if (_pNode->_pnext){_pNode = _pNode->_pnext;}else{size_t bucket = _ht->HashFunc(_pNode->_kv.first);for (size_t idx = bucket + 1; idx < _ht->_table.size(); ++idx){if (_ht->_table[idx]){_pNode = _ht->_table[idx];return;}}_pNode = NULL;//该节点后面全是空桶}}};template<class K, class V,class HashFun=_HashFun_<K>>class HashBucket{public:friend class HashIterator<K, V, pair<K, V>&, pair<K, V>*>;typedef HashBucketNode<K, V> Node;typedef HashBucket<K, V, HashFun> Self;typedef HashIterator<K, V, pair<K,V>&, pair<K,V>*> Iterator;HashBucket(size_t bucketNum = 10)//构造函数应该给出哈希表中的桶的个数:_size(0){_table.resize(GetNextPrime(bucketNum));}pair<Iterator,bool> Insert(const K& key, const V& value){//return _InsertUnique(key, value);//哈希表中的关键码唯一return _InsertEqual(key, value);//哈希表中的关键码可以重复}size_t Remove(const K& key){return _RemoveEqual(key);//删除一个键值为key的关键码//return _RemoveUnique(key);//删除所有键值为key的关键码}Iterator Find(const K& key){return _Find(key);}//这里的析构函数不能少，因为哈希桶里的节点都是通过new出来的，必须析构防止内存泄漏//逐个去析构每个桶里面的元素~HashBucket(){Destroy();}//判断哈希表是否为空bool Empty()const{return _size == 0;}//返回哈希表的键值个数size_t Size()const{return _size;}//计算key值的个数size_t Count(const K key){size_t bucket = HashFunc(key);Node* pcur = _table[bucket];size_t count = 0;while (pcur){if (pcur->_kv.first == key){count++;}pcur = pcur->_pnext;}return count;}//返回桶的个数size_t bucketCount()const{return _table.size();}//返回某个桶中的元素个数size_t CountInBucket(size_t bucket)const{size_t count = 0;Node* pcur = _table[bucket];while (pcur){count++;pcur = pcur->_pnext;}return count;}Iterator End(){return Iterator(NULL, this);//返回最后一个有效的节点的下一个节点}Iterator Begin()//返回第一个不为空的节点{for (size_t idx = 0; idx < _table.size(); ++idx){if (_table[idx]){return Iterator(_table[idx],this);}}return End();}private:vector<Node*> _table;size_t _size;//哈希表中当前的有效元素//计算哈希地址size_t HashFunc(const K& key){return (HashFun()(key))%(_table.size());}//得到下一个比当前哈希表长度大的素数size_t GetNextPrime(size_t num){// 使用素数表对齐做哈希表的容量，降低哈希冲突const int _PrimeSize = 28;static const unsigned long _PrimeList[_PrimeSize] ={53ul, 97ul, 193ul, 389ul, 769ul,1543ul, 3079ul, 6151ul, 12289ul, 24593ul,49157ul, 98317ul, 196613ul, 393241ul, 786433ul,1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,1610612741ul, 3221225473ul, 4294967291ul};for (size_t idx = 0; idx < _PrimeSize; ++idx){if (_PrimeList[idx]>num){return _PrimeList[idx];}}return _PrimeList[_PrimeSize - 1];}//插入的时候为了节省找插入位置的时间，这里采用头插法,对于关键码相同的元素只插入一个pair<Iterator, bool> _InsertEqual(const K& key, const V& value){CheckCapacity();//先计算该key值属于哈希表中的哪个桶中size_t bucket = HashFunc(key);//再利用头插法把关键码插入，如果该桶为空和不为空的情况可以一并处理Node* pNewNode = new Node(key, value);pNewNode->_pnext = _table[bucket];_table[bucket] = pNewNode;/*pNewNode->_kv.first = key;pNewNode->_kv.second = value;*///这里不需要，Node的构造函数里已经赋值过了_size++;return make_pair(Iterator(pNewNode,this), true);}//可以插入关键码相同的元素pair<Iterator, bool> _InsertUnique(const K& key, const V& value){CheckCapacity();//先计算该key值属于哈希表中的哪个桶中size_t bucket = HashFunc(key);Node* pcur = _table[bucket];//查找在该桶中是否有关键码一样的元素while (pcur){if (pcur->_kv.first == key){cout << "该关键码已经存在，插入失败" << endl;return make_pair(Iterator(pcur,this), false);}pcur = pcur->_pnext;}Node* pNewNode = new Node(key, value);pNewNode->_pnext = _table[bucket];_table[bucket] = pNewNode;_size++;return make_pair(Iterator(pNewNode, this), true);}//删除一个键值为key的关键码size_t _RemoveUnique(const K& key){//先计算该key值属于哈希表中的哪个桶中size_t bucket = HashFunc(key);size_t oldsize = _size;Node* prev = NULL;Node* pcur = _table[bucket];while (pcur){if (pcur->_kv.first == key){if (prev)//第一次进来prev为空，需要在这有判空操作{prev->_pnext = pcur->_pnext;delete pcur;_size--;return true;}//删除的是该桶的第一个元素_table[bucket] = pcur->_pnext;delete pcur;_size--;return (oldsize - _size);}prev = pcur;pcur = pcur->_pnext;}cout << "没有找到该关键码，删除失败" << endl;return (oldsize - _size);}//删除全部的键值为key的关键码size_t _RemoveEqual(const K& key){//先计算该key值属于哈希表中的哪个桶中size_t bucket = HashFunc(key);size_t oldsize = _size;Node* prev = NULL;Node* pcur = _table[bucket];size_t oldsize = _size;while (pcur){if (pcur->_kv.first == key){if (prev)//第一次进来prev为空，需要在这有判空操作{prev->_pnext = pcur->_pnext;delete pcur;pcur = prev->_pnext;_size--;}else{//删除的是该桶的第一个元素_table[bucket] = pcur->_pnext;delete pcur;pcur = _table[bucket];_size--;}}else{prev = pcur;pcur = pcur->_pnext;}}//如果删除成功，当前哈希表中的有效元素不等于刚开始的有效元素if (oldsize == _size){cout << "删除成功" << endl;return (oldsize - _size);}else{cout << "删除失败" << endl;return (oldsize - _size);}}//找到键值为key的元素Iterator _Find(const K& key){//先计算该key值属于哈希表中的哪个桶中size_t bucket = HashFunc(key);Node* pcur = _table[bucket];while (pcur){if (pcur->_kv.first == key){cout << "找到了" << key << endl;return Iterator(pcur,this);}pcur = pcur->_pnext;}cout << "没找到" << endl;return End();}//如果有效元素个数达到了哈希表的长度就增容void CheckCapacity(){if (_size == _table.size()){Self temp(GetNextPrime(_table.size()));for (size_t idx = 0; idx < _table.size(); ++idx){Node* pcur = _table[idx];while (pcur){temp.Insert(pcur->_kv.first, pcur->_kv.second);}}swap(_size, temp._size);_table.swap(temp._table);//使用成员函数的调用，只是改变了两个vector里面的指针指向，省了副本的开销}}//销毁哈希表void Destroy()//销毁哈希表{for (size_t idx = 0; idx < _table.size(); ++idx){Node* pcur = _table[idx];while (pcur){pcur = pcur->_pnext;delete _table[idx];_table[idx] = pcur;}}_size = 0;}};void IteratorTest(){int array[] = { 11, 68, 57, 25, 14, 36, 37, 49 };size_t size = sizeof(array) / sizeof(array[0]);HashBucket<int, int> hb(size);for (size_t idx = 0; idx < size; ++idx){hb.Insert(array[idx], idx + 1);}HashBucket<int, int>::Iterator it = hb.Begin();cout << (*it).first << " " << it->second << endl;while (it != hb.End()){it++;cout << (*it).first << " " << it->second << endl;}cout << endl;}

阅读全文

0 0