常用hash算法及评测

来源:互联网 发布:淘宝运营助理工作职责 编辑:程序博客网 时间:2024/05/19 04:06
常用hash算法及评测 [原创 2010-11-3 11:17:38]
我顶字号:
RS hash 算法
unsigned int RSHash(char* str, unsigned int len)
{
unsigned int b = 378551;
unsigned int a = 63689;
unsigned int hash = 0;
unsigned int i = 0;
for(i = 0; i < len; str++, i++)
{
hash = hash * a + (*str);
a = a * b;
}
return hash;
}
/* End Of RS Hash Function */

JS hash 算法
unsigned int JSHash(char* str, unsigned int len)
{
unsigned int hash = 1315423911;
unsigned int i = 0;
for(i = 0; i < len; str++, i++)
{
hash ^= ((hash << 5) + (*str) + (hash >> 2));
}
return hash;
}
/* End Of JS Hash Function */

PJW hash 算法
unsigned int PJWHash(char* str, unsigned int len)
{
const unsigned int BitsInUnsignedInt = (unsigned int)(sizeof(unsigned int) * 8);
const unsigned int ThreeQuarters = (unsigned int)((BitsInUnsignedInt * 3) / 4);
const unsigned int OneEighth = (unsigned int)(BitsInUnsignedInt / 8);
const unsigned int HighBits = (unsigned int)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth);
unsigned int hash = 0;
unsigned int test = 0;
unsigned int i = 0;
for(i = 0; i < len; str++, i++)
{
hash = (hash << OneEighth) + (*str);
if((test = hash & HighBits) != 0)
{
hash = (( hash ^ (test >> ThreeQuarters)) & (~HighBits));
}
}
return hash;
}
/* End Of P. J. Weinberger Hash Function */

ELF hash 算法
unsigned int ELFHash(char* str, unsigned int len)
{
unsigned int hash = 0;
unsigned int x = 0;
unsigned int i = 0;
for(i = 0; i < len; str++, i++)
{
hash = (hash << 4) + (*str);
if((x = hash & 0xF0000000L) != 0)
{
hash ^= (x >> 24);
}
hash &= ~x;
}
return hash;
}
/* End Of ELF Hash Function */

BKDR hash 算法
unsigned int BKDRHash(char* str, unsigned int len)
{
unsigned int seed = 131;
/* 31 131 1313 13131 131313 etc.. */
unsigned int hash = 0;
unsigned int i = 0;
for(i = 0; i < len; str++, i++)
{
hash = (hash * seed) + (*str);
}
return hash;
}
/* End Of BKDR Hash Function */

SDBM hash 算法
unsigned int SDBMHash(char* str, unsigned int len)
{
unsigned int hash = 0;
unsigned int i = 0;
for(i = 0; i < len; str++, i++)
{
hash = (*str) + (hash << 6) + (hash << 16) - hash;
}
return hash;
}
/* End Of SDBM Hash Function */

DJB hash 算法
unsigned int DJBHash(char* str, unsigned int len)
{
unsigned int hash = 5381;
unsigned int i = 0;
for(i = 0; i < len; str++, i++)
{
hash = ((hash << 5) + hash) + (*str);
}
return hash;
}
/* End Of DJB Hash Function */

DEK hash 算法
unsigned int DEKHash(char* str, unsigned int len)
{
unsigned int hash = len;
unsigned int i = 0;
for(i = 0; i < len; str++, i++)
{
hash = ((hash << 5) ^ (hash >> 27)) ^ (*str);
}
return hash;
}
/* End Of DEK Hash Function */

BP hash 算法
unsigned int BPHash(char* str, unsigned int len)
{
unsigned int hash = 0;
unsigned int i = 0;
for(i = 0; i < len; str++, i++)
{
hash = hash << 7 ^ (*str);
}
return hash;
}
/* End Of BP Hash Function */

FNV hash 算法
unsigned int FNVHash(char* str, unsigned int len)
{
const unsigned int fnv_prime = 0x811C9DC5;
unsigned int hash = 0;
unsigned int i = 0;
for(i = 0; i < len; str++, i++)
{
hash *= fnv_prime;
hash ^= (*str);
}
return hash;
}
/* End Of FNV Hash Function */

AP hash 算法
unsigned int APHash(char* str, unsigned int len)
{
unsigned int hash = 0xAAAAAAAA;
unsigned int i = 0;
for(i = 0; i < len; str++, i++)
{
hash ^= ((i & 1) == 0) ? ( (hash << 7) ^ (*str) * (hash >> 3)) :
(~((hash << 11) + (*str) ^ (hash >> 5)));
}
return hash;
}
/* End Of AP Hash Function */

各种算法评测
Hash函数数据1数据2数据3数据4数据1得分数据2得分数据3得分数据4得分平均分BKDRHash20477448196.5510090.9582.0592.64APHash23475449396.5588.4610051.2886.28DJBHash22497547496.5592.31010083.43JSHash14476150610084.6296.8317.9581.94RSHash10486150510010051.5820.5175.96SDBMHash32484950493.192.3157.0123.0872.41PJWHash302648785130043.89021.95ELFHash302648785130043.89021.95
其中
数据1为100000个字母和数字组成的随机串哈希冲突个数。
数据2为100000个有意义的英文句子哈希冲突个数。
数据3为数据1的哈希值与1000003(大素数)求模后存储到线性表中冲突的个数。
数据4为数据1的哈希值与10000019(更大素数)求模后存储到线性表中冲突的个数。
经过比较,得出以上平均得分。平均数为平方平均数。
可以发现,
BKDRHash无论是在实际效果还是编码实现中,效果都是最突出的。
APHash也是较为优秀的算法。DJBHash,JSHash,RSHash与SDBMHash各有千秋。
PJWHash与ELFHash效果最差,但得分相似,其算法本质是相似的。
原创粉丝点击