实现一个布隆过滤器

来源:互联网 发布:韩剧软件 编辑:程序博客网 时间:2024/05/18 17:59

布隆过滤器(Bloom Filter): 是由布隆(Burton Howard Bloom)提出的。它实际上是由一个很长的二进制向量和一系列随机映射函数组成,布隆过滤器用于检索一个元素是否在一个集合中。底层是利用哈希表来实现的,它可以通过一个Hash函数将一个元素映射成一个位阵列(Bit Array)中的一个点。这样一来,我们只要看看这个点是不是 1 就知道可以集合中有没有它了。这就是布隆过滤器的基本思想。

优点:空间效率和查询时间相比于其他数据结构有很大的优势

缺点:有一定的误识别率,删除困难

template<class K>struct _HashFunc1{size_t BKDHash(const char* str){size_t hash=0;while(size_t ch=(size_t)*(str++)){hash=hash*131+ch;}return hash;}size_t operator()(string& s){return BKDHash(s.c_str());}};template<class K>struct _HashFunc2{size_t SDBMHash(const char *str){register size_t hash = 0;while (size_t ch = (size_t)*str++){hash = 65599 * hash + ch;//hash = (size_t)ch + (hash << 6) + (hash << 16) - hash; }return hash;}size_t operator()(const string &s){return SDBMHash(s.c_str());}};template<class K>struct _HashFunc3{size_t RSHash(const char *str){if (!*str)        return 0;register size_t hash = 1315423911;while (size_t ch = (size_t)*str++){hash ^= ((hash << 5) + ch + (hash >> 2));}return hash;}size_t operator()(const string &s){return RSHash(s.c_str());}};template<class K>struct _HashFunc4{size_t RSHash(const char *str){register size_t hash = 0;size_t magic = 63689;while (size_t ch = (size_t)*str++){hash = hash * magic + ch;magic *= 378551;}return hash;}size_t operator()(const string&s){return RSHash(s.c_str());}};template<class K>struct _HashFunc5{size_t RSHash(const char *str){register size_t hash = 0;size_t ch;for (long i = 0; ch = (size_t)*str++; i++){if ((i & 1) == 0){hash ^= ((hash << 7) ^ ch ^ (hash >> 3));}else{hash ^= (~((hash << 11) ^ ch ^ (hash >> 5)));}}return hash;}size_t operator()(const string &s){return RSHash(s.c_str());}};template<class K=string,class HashFunc1=_HashFunc1<K>,class HashFunc2=_HashFunc2<K>,class HashFunc3=_HashFunc3<K>,class HashFunc4=_HashFunc4<K>,class HashFunc5=_HashFunc5<K>  >class BloomFilter{public:BloomFilter(size_t size){capacity=_GetNextPrime(size);bit.resize(capacity);}void set(const K& s){size_t index1=HashFunc1()(s);size_t index2=HashFunc1()(s);size_t index3=HashFunc1()(s);size_t index4=HashFunc1()(s);size_t index5=HashFunc1()(s);bit.Set(index1%capacity);bit.Set(index2%capacity);bit.Set(index3%capacity);bit.Set(index4%capacity);bit.Set(index5%capacity);}bool IsIn(const K& key){size_t index1 = HashFunc1()(key);if (!_bitmap.Test(index1%_capacity)){return false;}size_t index2 = HashFunc2()(key);if (!_bitmap.Test(index2%_capacity)){return false;}size_t index3 = HashFunc3()(key);if (!_bitmap.Test(index3%_capacity)){return false;}size_t index4 = HashFunc4()(key);if (!_bitmap.Test(index4%_capacity)){return false;}size_t index5 = HashFunc5()(key);if (!_bitmap.Test(index5%_capacity)){return false;}return true;}protected:unsigned long _GetNextPrime(unsigned long num){const int _PrimeSize = 28;static const unsigned long _PrimeList[_PrimeSize] ={53ul, 97ul, 193ul, 389ul, 769ul,1543ul, 3079ul, 6151ul, 12289ul, 24593ul,49157ul, 98317ul, 196613ul, 393241ul, 786433ul,1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,1610612741ul, 3221225473ul, 4294967291ul};size_t pos = 0;while (pos < _PrimeSize){if (_PrimeList[pos] > num){break;}++pos;}return _PrimeList[pos];}private:Bitmap bit;size_t capacity;};

布隆过滤器扩展,支持删除操作

template<class K=string,class HashFunc1=_HashFunc1<K>,class HashFunc2=_HashFunc2<K>,class HashFunc3=_HashFunc3<K>,class HashFunc4=_HashFunc4<K>,class HashFunc5=_HashFunc5<K> >class BloomFilterPlus{public:BloomFilterPlus(size_t range){table.resize(range*5);}void Set(const K& key){size_t index1=HashFunc1()(key)%table.size();size_t index2=HashFunc2()(key)%table.size();size_t index3=HashFunc3()(key)%table.size();size_t index4=HashFunc4()(key)%table.size();size_t index5=HashFunc5()(key)%table.size();table[index1]++;table[index2]++;table[index3]++;table[index4]++;table[index5]++;}bool Reset(const K& key){size_t index1=HashFunc1()(key)%table.size();size_t index2=HashFunc2()(key)%table.size();size_t index3=HashFunc3()(key)%table.size();size_t index4=HashFunc4()(key)%table.size();size_t index5=HashFunc5()(key)%table.size();if(table[index1]==0||table[index2]==0||table[index3]==0||table[index4]==0||table[index5]==0)return false;table[index1]--;table[index2]--;table[index3]--;table[index4]--;table[index5]--;return true;}bool Check(const K& key){size_t index1=HashFunc1()(key)%table.size();size_t index2=HashFunc2()(key)%table.size();size_t index3=HashFunc3()(key)%table.size();size_t index4=HashFunc4()(key)%table.size();size_t index5=HashFunc5()(key)%table.size();if(table[index1]&&table[index2]&&table[index3]&&table[index4]&&table[index5]&&)return true;return false;}private:vector<size_t> table;};int main(){return 0;}


原创粉丝点击