BloomFilter简易实现
来源:互联网 发布:java输入输出学生成绩 编辑:程序博客网 时间:2024/05/19 12:29
#ifndef __BLOOM_FILTER_HPP__#define __BLOOM_FILTER_HPP__#include <cstdlib>#include <cstring>template <typename T>unsigned int GetHash(const T & value){ return(value);}const int prime[] = { 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179};template <typename T>class BloomFilter{public: BloomFilter(unsigned int count, unsigned int k = 10); ~BloomFilter(); void set(const T & value); bool test(const T & value);private: BloomFilter(const BloomFilter &); BloomFilter & operator = (const BloomFilter &); void clear(const T & value);private: unsigned int m_k; unsigned int m_size; unsigned int m_count; unsigned char ** m_filter;};template <typename T>BloomFilter<T>::BloomFilter(unsigned int count, unsigned int k) : m_k(k), m_size(0), m_count(count), m_filter(NULL){ if (m_count == 0) { /* maybe it is not good enough */ abort(); } if (sizeof(prime) / sizeof(prime[0]) < 2) { abort(); } if (m_k > sizeof(prime) / sizeof(prime[0])) { m_k = sizeof(prime) / sizeof(prime[0]); } else if (m_k < 2) { m_k = 2; } m_size = ((count >> 2) + 1); /* maybe it is not good enough */ typedef unsigned char * ucharptr; m_filter = new ucharptr[m_k]; if (m_filter == NULL) { abort(); } for (int i = 0; i < m_k; ++i) { m_filter[i] = new unsigned char[m_size]; if (m_filter[i] == NULL) { abort(); } memset(m_filter[i], 0, m_size); }}template <typename T>BloomFilter<T>::~BloomFilter(){ for (int i = 0; i < m_k; ++i) { delete[] m_filter[i]; } delete[] m_filter;}template <typename T>void BloomFilter<T>::set(const T & value){ unsigned int hash = GetHash(value); for (int i = 0; i < m_k; ++i) { unsigned int key = (hash * prime[i]) % m_count; unsigned char * bit = m_filter[i]; bit[key >> 3] |= (0x01 << (key & 0x07)); }}template <typename T>void BloomFilter<T>::clear(const T & value){ unsigned int hash = GetHash(value); for (int i = 0; i < m_k; ++i) { unsigned int key = (hash * prime[i]) % m_count; unsigned char * bit = m_filter[i]; bit[key >> 3] &= ~(0x01 << (key & 0x07)); }}template <typename T>bool BloomFilter<T>::test(const T & value){ unsigned int hash = GetHash(value); for (int i = 0; i < m_k; ++i) { unsigned int key = (hash * prime[i]) % m_count; unsigned char * bit = m_filter[i]; if (!(bit[key >> 3] & (0x01 << (key & 0x07)))) { return(false); } } return(true);}#endif
#include <vector>#include <string>#include <iostream>using namespace std;#include "BloomFilter.hpp"template <>unsigned int GetHash(const string & value){ unsigned int hash = 0; typedef string::const_iterator iterator; for (iterator iter = value.begin(); iter != value.end(); ++iter) { hash += (*iter) * 5; } return(hash);}int main(int argc, char ** argv){ int array[] = { 9, 5, 4, 6, 7, 8, 0, 1, 55, -100 }; const int size = sizeof(array)/sizeof(array[0]); int min = array[0]; int max = array[0]; cout << "array: "; for (int i = 0; i < size; ++i) { if (array[i] > max) { max = array[i]; } else if (array[i] < min) { min = array[i]; } cout << array[i] << ' '; } cout << endl; BloomFilter<int> filter1(size); BloomFilter<int> filter2(5 * size); BloomFilter<int> filter3(10 * size); /* must be right, but BitMap can do it */ BloomFilter<int> filter4(max - min); for (int i = 0; i < size; ++i) { filter1.set(array[i]); filter2.set(array[i]); filter3.set(array[i]); filter4.set(array[i]); } cout << "sorted1: "; for (int value = min; value <= max; ++value) { if (filter1.test(value)) { cout << value << ' '; } } cout << endl; cout << "sorted2: "; for (int value = min; value <= max; ++value) { if (filter2.test(value)) { cout << value << ' '; } } cout << endl; cout << "sorted3: "; for (int value = min; value <= max; ++value) { if (filter3.test(value)) { cout << value << ' '; } } cout << endl; cout << "sorted4: "; for (int value = min; value <= max; ++value) { if (filter4.test(value)) { cout << value << ' '; } } cout << endl; /* ------------------------------------------ */ const char * const url[] = { "www.google.com.hk", "www.bing.com.cn", "www.baidu.com", "www.manmankan.com", "www.csdn.net" }; BloomFilter<string> filter(500); for (int i = 0; i < sizeof(url)/sizeof(url[0]); ++i) { filter.set(url[i]); } const char * const check[] = { "www.google.com.hk", "www.bing.com.cn", "www.baidu.com", "www.manmankan.com", "www.csdn.net", "www.hao123.com", "www.sohu.com", "www.soso.com", "www.sina.com", "www.nosuchurl.com" }; for (int i = 0; i < sizeof(check)/sizeof(check[0]); ++i) { if (filter.test(check[i])) { cout << check[i] << " is exist" << endl; } else { cout << check[i] << " is not exist" << endl; } } return(0);}
代码中的 位数组大小(m_size, 受传入的m_count影响), 哈希函数个数(m_k), 哈希函数的构造(GetHash)都不太恰当
- BloomFilter简易实现
- BloomFilter应用与D-Lelft BloomFilter实现
- 浅谈BloomFilter【下】用Java实现BloomFilter
- BloomFilter应用与D-Lelft BloomFilter实现
- BloomFilter检测实现代码
- bloomfilter的实现
- bloomfilter 原理及实现
- python redis实现bloomfilter
- BloomFilter
- BloomFilter
- BloomFilter
- BloomFilter
- BloomFilter
- BloomFilter
- BloomFilter
- BloomFilter
- BloomFilter
- bloomFilter
- c语言解复数的乘法,应用结构体
- 对三维数组进行排序,x相等时比较y的大小,y相等时比较z的大小,用到qsort函数。
- hibernate持久化对象状态 .
- poj 1654 这题我想多了
- 学hibernate的地方
- BloomFilter简易实现
- 在S3C2440上配置UART
- 想做硬件开发的人员必看
- ScrollLayout 的使用
- 在PropertyGrid使用JsonStore
- 锂电池充电的原理
- XEN的命令集
- php 解决can not modify header infomation的方法
- Xen的源码包编译安装