字符串hash函数

来源：互联网发布：繁体字转换软件编辑：程序博客网时间：2024/06/11 10:44

本文搜集了一些字符串的常用hash函数。

范例1：判断两个单词是否含有相同的字母，此时我们可以用hash做。例如，“aaabb”与"aabb"含有相同的单词。（参考：http://kmplayer.iteye.com/blog/656782）

#include <iostream>  using namespace std;  int hash(const char* a)  //hash函数设计到位，节约了空间，当然我们也可以用bitset{  int tmp=0;  while(*a)  {  tmp|=1<<(*a-'a');  a++;  }  return tmp;  }  int main()  {  char* a="abc";  char* b="acb";  cout<<hash(a)<<endl;  cout<<hash(b)<<endl;  return 0;  }

范例2：判断兄弟单词，兄弟单词定义为两个单词含有的所有字母相同，但是位置不同。例如“aaccdd”和“acdacd”。何海涛在《剑指offer》上的解法，定义bitcnt[26]，用一个字符串加计数，另一个字符串减计数。

我们这里为了素数的hash构造方法。特意用素数来hash，例如，a=2，b=3，c=5，然后整个单词的hash就是其乘积。

#include <iostream>#include <cctype>using namespace std;  const int MAX = 200;  int prime[MAX] = {2,3,5};  //产生小于num的所有素数,返回值为产生素数的个数  int GeneratePrime(int num)  {  int curPossibleNum = 5;  int gap = 2;  int count = 3;  while(curPossibleNum <= num){  curPossibleNum += gap;  bool flag = true;  for(int j=0; prime[j]*prime[j]<=curPossibleNum; j++){  if(curPossibleNum % prime[j] == 0)  flag = false;  }  if(flag == true)  prime[count++] = curPossibleNum;  gap = 6 - gap;   }  return count;  } long long Hash(char str[]){ //在这里我们简单的将大小写统一long long hashValue = 1;while(*str != '\0'){hashValue = hashValue * prime[tolower(*str) - 'a'];++str;}return hashValue;}int main()  {  char* str="abdc";  GeneratePrime(26);cout<<Hash(str)<<endl;system("pause");return 0;  }

范例3：统计单词的个数，C++ Primer中采用map的方法。本文采用介绍采用hash的方法。（参考：http://kmplayer.iteye.com/blog/647471）

#include <stdio.h>  #include <stdlib.h>  #include <string.h>  #define WORDLENGTH 30  #define NHASH 300  typedef struct node* nodeptr;  typedef struct node  {  char* word;  int cnt;  nodeptr next;  } node;  int hash(char* buf)  //其实，我没懂这里为什么是31{  unsigned n=0;  char* p;  for(p=buf;*p;p++)  n=31*n+(*p);  return n%NHASH;  }  nodeptr hashTable[NHASH];  //链表法,解决hash的冲突.  void incword(char* buf)  {  int n=hash(buf);  nodeptr p;  for(p=hashTable[n];p;p=p->next)  {  if(strcmp(p->word,buf)==0)  {  p->cnt++;  return;  }  }  p=(nodeptr)malloc(sizeof(node));  p->word=(char*)malloc(strlen(buf)+1);  strcpy(p->word,buf);  p->cnt=1;  p->next=hashTable[n];  hashTable[n]=p;  }  int main ()  {  freopen("genetic.txt","r",stdin);  char buf[WORDLENGTH];  int i;  while( scanf("%s",buf)!=EOF )  incword(buf);  for(i=0;i<NHASH;i++)  {  nodeptr p=hashTable[i];  for(;p;p=p->next)  printf("%s %d\n",p->word,p->cnt);  }  return 0 ;  }

有好的字符串hash，欢迎告之。