8种经典hash算法c#实现----适用于bloom filter 的K个散列函数

来源:互联网 发布:new3ds淘宝哪家店靠谱 编辑:程序博客网 时间:2024/04/27 16:36
using System;using System.Collections.Generic;using System.Linq;using System.Text;namespace WindowsFormsApplication1{   static  class HashCode    {         // BKDR Hash Function        public static int Hash1(string  str)        {             int seed = 131; // 31 131 1313 13131 131313 etc..             int hash = 0;             int count;            char[] bitarray=str .ToCharArray ();            count = bitarray.Length;             while (count>0)             {              hash = hash * seed + (bitarray [bitarray .Length -count ]);              count--;               }               return (hash & 0x7FFFFFFF);        }        //AP hash function        public static int Hash2(string str)    {         int hash = 0;        int i;        int count;        char[] bitarray = str.ToCharArray();        count = bitarray.Length;        for (i=0; i<count ; i++)        {          if ((i & 1) == 0)         {              hash ^= ((hash << 7) ^ (bitarray [i]) ^ (hash >> 3));         }         else         {            hash ^= (~((hash << 11) ^ (bitarray [i]) ^ (hash >> 5)));         }         count--;         }             return (hash & 0x7FFFFFFF);        }        //SDBM Hash function        public static int Hash3(string str){     int hash = 0;        int i;    int count;    char[] bitarray = str.ToCharArray();    count = bitarray.Length;     while (count >0)    {        // equivalent to: hash = 65599*hash + (*str++);        hash = (bitarray [bitarray .Length -count ]) + (hash << 6) + (hash << 16) - hash;        count--;    }     return (hash & 0x7FFFFFFF);} // RS Hash Function        public static int Hash4(string str){     int b = 378551;     int a = 63689;     int hash = 0;           int i;    int count;    char[] bitarray = str.ToCharArray();    count = bitarray.Length;    while (count >0)    {        hash = hash * a + (bitarray [bitarray .Length -count ]);        a *= b;        count--;    }     return (hash & 0x7FFFFFFF);} // JS Hash Function        public static int Hash5(string str){     int hash = 1315423911;    int count;    char[] bitarray = str.ToCharArray();    count = bitarray.Length;    while (count >0)    {        hash ^= ((hash << 5) + (bitarray [bitarray .Length -count ]) + (hash >> 2));        count--;    }     return (hash & 0x7FFFFFFF);} // P. J. Weinberger Hash Function        public static int Hash6(string str){     int BitsInUnignedInt = ( int)(sizeof( int) * 8);     int ThreeQuarters    = ( int)((BitsInUnignedInt  * 3) / 4);     int OneEighth        = ( int)(BitsInUnignedInt / 8);    int hash             = 0;    unchecked {     int HighBits         = ( int)(0xFFFFFFFF) << (BitsInUnignedInt - OneEighth);      int test             = 0;    int count;    char[] bitarray = str.ToCharArray();    count = bitarray.Length;    while (count >0)    {        hash = (hash << OneEighth) + (bitarray [bitarray .Length -count ]);        if ((test = hash & HighBits) != 0)        {            hash = ((hash ^ (test >> ThreeQuarters)) & (~HighBits));        }        count--;    }    }    return (hash & 0x7FFFFFFF);} // ELF Hash Function        public static int Hash7(string str){     int hash = 0;     int x    = 0; int i;    int count;    char[] bitarray = str.ToCharArray();    count = bitarray.Length;    unchecked    {        while (count >0)        {            hash = (hash << 4) + (bitarray [bitarray .Length -count ]);            if ((x = hash & (int)0xF0000000) != 0)            {                hash ^= (x >> 24);                hash &= ~x;            }            count--;        }    }    return (hash & 0x7FFFFFFF);}  // DJB Hash Function        public static int Hash8(string str){     int hash = 5381;     int i;     int count;     char[] bitarray = str.ToCharArray();     count = bitarray.Length;    while (count >0)    {        hash += (hash << 5) + (bitarray [bitarray .Length -count ]);        count--;    }     return (hash & 0x7FFFFFFF);}     }}
在处理大数据统计时,运用了bloom filter方法,里面使用了如上的散列函数,我只能说,内存确实降下来了。精确率确实还不错。但是,散列函数怎么会那么耗时啊。
原创粉丝点击