hash算法总结

来源:互联网 发布:上海java培训哪家好 编辑:程序博客网 时间:2024/06/06 03:52

一.Hash简介

概念
把任意长度的输入,通过hash算法,变换成固定长度的输出,该输出就是散列值。这种转换是一种压缩映射,也就是,散列值的空间通常远小于输入的空间,不同的输入可能会散列成相同的输出。
哈希表
若结构中存在和关键字K相等的记录,则必定在f(K)的存储位置上。由此,不需比较便可直接取得所查记录。称这个对应关系f为散列函数(Hash function),按这个思想建立的表为哈希表。
哈希冲突
对不同的关键字可能得到同一散列地址,即key1≠key2,而f(key1)=f(key2),这种现象称冲突。
解决Hash冲突常用的是拉链法(哈希链表),即hash表中的每个元素是一个链表,相同的hash值构成一个链表。查找时,从hash表中的链表第一个节点开始遍历比较key值,直到key相同查找结束

时间复杂度
使用hash算法对关键字进行查找,理论上时间复杂度是O(1),但实际取决于散列函数的的选取,最坏的情况是O(n)。

二.常用字符串hash

关键字是字符串。

常用hash函数:

1.加法hash

unsigned additiveHash(string key, unsigned prime)
{
        unsigned hash, i;
        for (hash = key.length(), i = 0; i < key.length(); i++)
                hash += key[i];


        return (hash % prime);
}

2. 乘法hash

unsigned bernstein(string key, unsigned prime)
{
        unsigned hash, i;
        for (hash=0, i=0; i < key.length(); ++i)
                hash = 33*hash + key[i];


        return (hash % prime);
}
33为推荐的乘数,另外推荐的乘数还有:131, 1313, 13131, 131313等等。

3. 位运算Hash

unsigned rotatingHash(string key, unsigned prime)
{
        unsigned hash, i;
        for (hash=key.length(), i=0; i < key.length(); ++i)
                hash = (hash<<4)^(hash>>28)^key[i];
        return (hash % prime);
}

通过利用各种位运算(常见的是移位和异或)来充分的混合输入元素。

三.示例

下面是常用字符串hash算法的example:

#include <iostream>#include <string>#include <vector>#include <time.h>#include <stdlib.h>#include <string.h>using namespace std;unsigned additiveHash(string key, unsigned prime){        unsigned hash, i;        for (hash = key.length(), i = 0; i < key.length(); i++)                hash += key[i];        return (hash % prime);}unsigned bernstein(string key, unsigned prime){        unsigned hash, i;        for (hash=0, i=0; i < key.length(); ++i)                hash = 33*hash + key[i];        return (hash % prime);}unsigned rotatingHash(string key, unsigned prime){        unsigned hash, i;        for (hash=key.length(), i=0; i < key.length(); ++i)                hash = (hash<<4)^(hash>>28)^key[i];        return (hash % prime);}unsigned (*hash_func)(string key, unsigned prime);struct DataNode{        string key;        void *data;        DataNode *next;};#define HASH_TBL_LEN 20001struct HashNode{        DataNode *node;        int count;} hash_tbl[HASH_TBL_LEN];DataNode nodes[10000];void hash_init(struct HashNode *tbl, int len){        for (int i = 0; i < len; i++)        {                tbl[i].node = NULL;                tbl[i].count = 0;        }}void hash_insert(struct HashNode *tbl, int len, DataNode *node){        if (!node)                return;        unsigned index = hash_func(node->key, len);        if (tbl[index].node == NULL)        {                tbl[index].node = node;                tbl[index].node->next = NULL;                tbl[index].count = 1;        }        else        {                DataNode *p = tbl[index].node->next;                tbl[index].node->next = node;                node->next = p;                tbl[index].count++;        }}DataNode *hash_find(struct HashNode *tbl, int len, string key){        unsigned index = hash_func(key, len);        DataNode *p = tbl[index].node;        while (p)        {                if (p->key == key)                        return p;                p = p->next;        }        return NULL;}void hash_delete(struct HashNode *tbl, int len, string key){        unsigned index = hash_func(key, len);        DataNode *p = tbl[index].node;        if (!p)                return;        if (p->key == key)        {                tbl[index].node = p->next;        }        else        {                DataNode *q = p->next;                while (q != NULL)                {                        if (q->key == key)                        {                                p->next = q->next;                                tbl[index].count--;                                break;                        }                        p = q;                        q = q->next;                }        }}void test_hash(const vector<string> &keys){        hash_init(hash_tbl, HASH_TBL_LEN);        int i = 0;        int collision_cnt = 0, empty_cnt = 0;        for (vector<string>::const_iterator iter = keys.begin(); iter != keys.end(); iter++)        {                DataNode *p = nodes + i;                p->next = NULL;                p->key = *iter;                hash_insert(hash_tbl, HASH_TBL_LEN, p);                i++;        }        DataNode *p = hash_find(hash_tbl, HASH_TBL_LEN, keys[5000]);        if (p)                cout << "find: " << keys[5000] << endl;        hash_delete(hash_tbl, HASH_TBL_LEN, keys[5000]);        p = hash_find(hash_tbl, HASH_TBL_LEN, keys[5000]);        if (!p)                cout << "delete : " << keys[5000] << endl;        for (int j = 0; j < HASH_TBL_LEN; j++)        {                if (hash_tbl[j].count > 1)                        collision_cnt++;                if (hash_tbl[j].count == 0)                        empty_cnt++;        }        cout << "collision: " << collision_cnt << "  empty: " << empty_cnt << endl;}const char *alpha = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";int main(){        srand((unsigned)time(NULL));        int alpha_len = strlen(alpha);        vector<string> keys;        for (int i = 0; i < 10000; i++)        {                int len = rand() % 21 + 6;                string key;                for (int j = 0; j < len; j++)                {                        int k = rand() % alpha_len;                        key += (alpha[k]);                }                keys.push_back(key);        }        cout << "bernstein hash" << endl;        hash_func = bernstein;        test_hash(keys);        cout << "-------------------------------" << endl;        cout << "additiveHash hash" << endl;        hash_func = additiveHash;        test_hash(keys);        cout << "-------------------------------" << endl;        cout << "rotatingHash hash" << endl;        hash_func = rotatingHash;        test_hash(keys);        return 0;}


原创粉丝点击