8种经典hash算法c#实现----适用于bloom filter 的K个散列函数
来源:互联网 发布:new3ds淘宝哪家店靠谱 编辑:程序博客网 时间:2024/04/27 16:36
using System;using System.Collections.Generic;using System.Linq;using System.Text;namespace WindowsFormsApplication1{ static class HashCode { // BKDR Hash Function public static int Hash1(string str) { int seed = 131; // 31 131 1313 13131 131313 etc.. int hash = 0; int count; char[] bitarray=str .ToCharArray (); count = bitarray.Length; while (count>0) { hash = hash * seed + (bitarray [bitarray .Length -count ]); count--; } return (hash & 0x7FFFFFFF); } //AP hash function public static int Hash2(string str) { int hash = 0; int i; int count; char[] bitarray = str.ToCharArray(); count = bitarray.Length; for (i=0; i<count ; i++) { if ((i & 1) == 0) { hash ^= ((hash << 7) ^ (bitarray [i]) ^ (hash >> 3)); } else { hash ^= (~((hash << 11) ^ (bitarray [i]) ^ (hash >> 5))); } count--; } return (hash & 0x7FFFFFFF); } //SDBM Hash function public static int Hash3(string str){ int hash = 0; int i; int count; char[] bitarray = str.ToCharArray(); count = bitarray.Length; while (count >0) { // equivalent to: hash = 65599*hash + (*str++); hash = (bitarray [bitarray .Length -count ]) + (hash << 6) + (hash << 16) - hash; count--; } return (hash & 0x7FFFFFFF);} // RS Hash Function public static int Hash4(string str){ int b = 378551; int a = 63689; int hash = 0; int i; int count; char[] bitarray = str.ToCharArray(); count = bitarray.Length; while (count >0) { hash = hash * a + (bitarray [bitarray .Length -count ]); a *= b; count--; } return (hash & 0x7FFFFFFF);} // JS Hash Function public static int Hash5(string str){ int hash = 1315423911; int count; char[] bitarray = str.ToCharArray(); count = bitarray.Length; while (count >0) { hash ^= ((hash << 5) + (bitarray [bitarray .Length -count ]) + (hash >> 2)); count--; } return (hash & 0x7FFFFFFF);} // P. J. Weinberger Hash Function public static int Hash6(string str){ int BitsInUnignedInt = ( int)(sizeof( int) * 8); int ThreeQuarters = ( int)((BitsInUnignedInt * 3) / 4); int OneEighth = ( int)(BitsInUnignedInt / 8); int hash = 0; unchecked { int HighBits = ( int)(0xFFFFFFFF) << (BitsInUnignedInt - OneEighth); int test = 0; int count; char[] bitarray = str.ToCharArray(); count = bitarray.Length; while (count >0) { hash = (hash << OneEighth) + (bitarray [bitarray .Length -count ]); if ((test = hash & HighBits) != 0) { hash = ((hash ^ (test >> ThreeQuarters)) & (~HighBits)); } count--; } } return (hash & 0x7FFFFFFF);} // ELF Hash Function public static int Hash7(string str){ int hash = 0; int x = 0; int i; int count; char[] bitarray = str.ToCharArray(); count = bitarray.Length; unchecked { while (count >0) { hash = (hash << 4) + (bitarray [bitarray .Length -count ]); if ((x = hash & (int)0xF0000000) != 0) { hash ^= (x >> 24); hash &= ~x; } count--; } } return (hash & 0x7FFFFFFF);} // DJB Hash Function public static int Hash8(string str){ int hash = 5381; int i; int count; char[] bitarray = str.ToCharArray(); count = bitarray.Length; while (count >0) { hash += (hash << 5) + (bitarray [bitarray .Length -count ]); count--; } return (hash & 0x7FFFFFFF);} }}
在处理大数据统计时,运用了bloom filter方法,里面使用了如上的散列函数,我只能说,内存确实降下来了。精确率确实还不错。但是,散列函数怎么会那么耗时啊。