linux c/c++ 后台开发常用算法之:一致性哈希算法

来源:互联网 发布:wind数据库 华东师大 编辑:程序博客网 时间:2024/05/18 01:17

一致性哈希主要应用在大规模高可用性的分布式存储上,尤其是KV存储上面,比如memcaced, redis 集群,相比普通hash % N 的优点在于,但优点是增加或者删除节点的时候,数据的迁移会比较小,通常只有部分抖动和迁移。但是其相对于hash %N 的缺点数据均匀性肯定不如 hash %N 尤其是数据的key在一个较小的范围内,容易出现均匀的现象,但是可以通过hash函数调优,以及增加虚拟节点的方法来降低不均匀性。下面试自己写的一个c++版本的一致性哈希算法,其中m5算法是上一篇博客的代码

/**********************************function: consistent hash author: liuyidate: 2015.10.31viersion: 1.0**********************************/#ifndef CONSISTENT_HASH_H#define CONSISTENT_HASH_H#include <stdio.h>#include <string.h>#include <iostream>#include <set>#include <map>#include <string>#include <vector>#include "md5.h"using namespace std;inline unsigned int hash_fun(const char* key){unsigned int hash = 0;  unsigned int seed = 131;  unsigned int len = strlen(key);  for(int i = 0; i < len; i++)  {  hash = (hash * seed) + key[i];}  return hash;  }typedef unsigned int(*hash_fun_ptr)(const char*);class consistent_hash{public:consistent_hash(){m_virtual_node_mulriple = 1;}~consistent_hash(){m_node_map.clear();}bool init(const vector<string>& node_vect,   const int& virtual_node_mulriple,  const hash_fun_ptr& hash_func = hash_fun);size_t get_virtual_node_hash(const string& node, vector<unsigned int>& virtual_node_hash_vec);bool add_node(const string& node);bool del_node(const string& node);string get_node(const string& key);size_t get_all_node(set<string>& nodes);size_t get_node_count();private:int m_virtual_node_mulriple;map<unsigned int, string> m_node_map;hash_fun_ptr m_hash_func;};#endif 


/**********************************function: consistent hash author: liuyidate: 2015.10.31viersion: 1.0**********************************/#include "consistent_hash.h"bool consistent_hash::init(const vector<string>& node_vect,   const int& virtual_node_mulriple,  const hash_fun_ptr& hash_func){if(node_vect.empty())return false;m_virtual_node_mulriple = virtual_node_mulriple;m_hash_func = hash_func;for(size_t i = 0; i < node_vect.size(); i++){vector<unsigned int> virtual_node_hash_vec;get_virtual_node_hash(node_vect[i], virtual_node_hash_vec);for(size_t j = 0; j < virtual_node_hash_vec.size(); j++){m_node_map[virtual_node_hash_vec[j]] = node_vect[i];}}return true;}size_t consistent_hash::get_virtual_node_hash(const string& node, vector<unsigned int>& virtual_node_hash_vec){virtual_node_hash_vec.clear();virtual_node_hash_vec.reserve(m_virtual_node_mulriple*4+1);for(int i = 0; i < m_virtual_node_mulriple; i++){char virtual_node[256] = {0};sprintf(virtual_node, "%s#%u", node.c_str(), i);MD5 md5;md5.update(virtual_node);const unsigned char* digest = md5.digest();for(int h = 0; h < 4; h++){unsigned int hash_value = ((unsigned int)(digest[3+h*4]&0xFF) << 24)   |((unsigned int)(digest[2+h*4]&0xFF) << 16)  |((unsigned int)(digest[1+h*4]&0xFF) << 8)  |(digest[h*4]&0xFF);virtual_node_hash_vec.push_back(hash_value);}}return virtual_node_hash_vec.size();}bool consistent_hash::add_node(const string& node){vector<unsigned int> virtual_node_hash_vec;if(0 == get_virtual_node_hash(node, virtual_node_hash_vec))return false;for(size_t i = 0; i < virtual_node_hash_vec.size(); i++){m_node_map[virtual_node_hash_vec[i]] = node;}return true;}bool consistent_hash::del_node(const string& node){vector<unsigned int> virtual_node_hash_vec;if(0 == get_virtual_node_hash(node, virtual_node_hash_vec))return false;for(size_t i = 0; i < virtual_node_hash_vec.size(); i++){m_node_map.erase(virtual_node_hash_vec[i]);}return true;}string consistent_hash::get_node(const string& key){if(m_node_map.empty())return "";unsigned key_hash = m_hash_func(key.c_str());map<unsigned int, string>::iterator it = m_node_map.lower_bound(key_hash);if(it == m_node_map.end()){return m_node_map.begin()->second;}return it->second;}size_t consistent_hash::get_all_node(set<string>& nodes){nodes.clear();for(map<unsigned int, string>::iterator it = m_node_map.begin(); it != m_node_map.end();++it){nodes.insert(it->second);}return nodes.size();}size_t consistent_hash::get_node_count(){set<string> nodes;return get_all_node(nodes);}


#include <iostream>#include "consistent_hash.h"int main(){consistent_hash c;vector<string> a;a.push_back("192.10.59.1:80");a.push_back("192.10.59.2:80");a.push_back("192.10.59.3:80");a.push_back("192.10.59.4:80");a.push_back("192.10.59.5:80");c.init(a, 1);set<string> nodes;cout<<c.get_all_node(nodes)<<endl;cout<<c.del_node("192.10.59.1:80")<<endl;cout<<c.get_all_node(nodes)<<endl;cout<<c.add_node("192.10.59.1:80")<<endl;cout<<c.get_all_node(nodes)<<endl;for(int i = 0; i < 10000000; i++){char b[1024] = {0};sprintf(b, "%d", i);string t = b;cout<<"node="<<c.get_node(t)<<endl;}}




0 0
原创粉丝点击