散列函数(哈希函数)算法

来源:互联网 发布:mac怎么切换独立显卡 编辑:程序博客网 时间:2024/06/05 01:14

常用字符串哈希函数有BKDRHash,APHash,DJBHash,JSHash,RSHash,SDBMHash,PJWHash,ELFHash等等。

C++代码实现:

#include <iostream>#define M  249997 #define M1 1000003 #define M2 10000019//大素数  using namespace std;// RS Hash Function  unsigned int RSHash(char*str) {     unsigned int b=378551 ;     unsigned int a=63689 ;     unsigned int hash=0 ;          while(*str)     {         hash=hash*a+(*str++);         a*=b ;     }          return(hash % M2); }  // JS Hash Function  unsigned int JSHash(char*str) {     unsigned int hash=1315423911 ;          while(*str)     {         hash^=((hash<<5)+(*str++)+(hash>>2));     }          return(hash % M2); }  // P. J. Weinberger Hash Function  unsigned int PJWHash(char*str) {     unsigned int BitsInUnignedInt=(unsigned int)(sizeof(unsigned int)*8);     unsigned int ThreeQuarters=(unsigned int)((BitsInUnignedInt*3)/4);     unsigned int OneEighth=(unsigned int)(BitsInUnignedInt/8);     unsigned int HighBits=(unsigned int)(0xFFFFFFFF)<<(BitsInUnignedInt-OneEighth);     unsigned int hash=0 ;     unsigned int test=0 ;          while(*str)     {         hash=(hash<<OneEighth)+(*str++);         if((test=hash&HighBits)!=0)         {             hash=((hash^(test>>ThreeQuarters))&(~HighBits));         }     }          return(hash % M2); }  // ELF Hash Function  unsigned int ELFHash(char*str) {     unsigned int hash=0 ;     unsigned int x=0 ;          while(*str)     {         hash=(hash<<4)+(*str++);         if((x=hash&0xF0000000L)!=0)         {             hash^=(x>>24);             hash&=~x ;         }     }          return(hash % M2); }  // BKDR Hash Function  unsigned int BKDRHash(char*str) {     unsigned int seed=131 ;// 31 131 1313 13131 131313 etc..      unsigned int hash=0 ;          while(*str)     {         hash=hash*seed+(*str++);     }          return(hash % M2); }  // SDBM Hash Function  unsigned int SDBMHash(char*str) {     unsigned int hash=0 ;          while(*str)     {         hash=(*str++)+(hash<<6)+(hash<<16)-hash ;     }          return(hash % M2); }  // DJB Hash Function  unsigned int DJBHash(char*str) {     unsigned int hash=5381 ;          while(*str)     {         hash+=(hash<<5)+(*str++);     }          return(hash % M2); }  // AP Hash Function  unsigned int APHash(char*str) {     unsigned int hash=0 ;     int i ;          for(i=0;*str;i++)     {         if((i&1)==0)         {             hash^=((hash<<7)^(*str++)^(hash>>3));         }         else          {             hash^=(~((hash<<11)^(*str++)^(hash>>5)));         }     }          return(hash % M2); } int main(){char *str = "abcdefg";cout<<"RSHash:"<<RSHash(str)<<endl;cout<<"JSHash:"<<JSHash(str)<<endl;cout<<"PJWHash:"<<PJWHash(str)<<endl;cout<<"ELFHash:"<<ELFHash(str)<<endl;cout<<"BKDRHash:"<<BKDRHash(str)<<endl;cout<<"SDBMHash:"<<SDBMHash(str)<<endl;cout<<"DJBHash:"<<DJBHash(str)<<endl;cout<<"APHash:"<<APHash(str)<<endl;return 0;}

测试代码:测试SDBMHash

#include<stdio.h>#define BUCKETS 101unsigned int SDBMHash(char *str){    unsigned int hash = 0;    while (*str)    {        // equivalent to: hash = 65599*hash + (*str++);        hash = (*str++) + (hash << 6) + (hash << 16) - hash;    }    return (hash & 0x7FFFFFFF) % BUCKETS;}int main(void){    char *keywords[] =    {        "auto", "break", "case", "char", "const", "continue", "default", "do",        "double", "else", "enum", "extern", "float", "for", "goto", "if",        "int", "long", "register", "return", "short", "signed", "sizeof", "static",        "struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while"    };    // 哈希表每个地址的映射次数    // 0地址的映射次数用count[0]表示    int count[BUCKETS] = {0};    int i;    int size = sizeof(keywords) / sizeof(keywords[0]);    for (i = 0; i < size; i++)    {        int pos = SDBMHash(keywords[i]);        count[pos]++;    }    for (i = 0; i < size; i++)    {        int pos = SDBMHash(keywords[i]);        printf("%-10s %d %d\n", keywords[i], pos, count[pos]);    }    return 0;}


对于以上几种哈希函数,对其进行了一个小小的评测。

Hash函数数据1数据2数据3数据4数据1得分数据2得分数据3得分数据4得分平均分BKDRHash20477448196.5510090.9582.0592.64APHash23475449396.5588.4610051.2886.28DJBHash22497547496.5592.31010083.43JSHash14476150610084.6296.8317.9581.94RSHash10486150510010051.5820.5175.96SDBMHash32484950493.192.3157.0123.0872.41PJWHash302648785130043.89021.95ELFHash302648785130043.89021.95

 

其中数据1为100000个字母和数字组成的随机串哈希冲突个数。数据2为100000个有意义的英文句子哈希冲突个数。数据3为数据1的哈希值与1000003(大素数)求模后存储到线性表中冲突的个数。数据4为数据1的哈希值与10000019(更大素数)求模后存储到线性表中冲突的个数。

经过比较,得出以上平均得分。平均数为平方平均数。可以发现,BKDRHash无论是在实际效果还是编码实现中,效果都是最突出的。APHash也是较为优秀的算法。DJBHash,JSHash,RSHash与SDBMHash各有千秋。PJWHash与ELFHash效果最差,但得分相似,其算法本质是相似的。

0 0
原创粉丝点击