GNU工具-gperf详解(完美哈希函数生成器)

来源:互联网 发布:网络嗅探器5.5 编辑:程序博客网 时间:2024/06/07 14:12

1.gperf是干什么的?

GNU gperf is a perfect hash function generator. For a given list of strings,it produces a hash function and hash table, in form of C or C++ code, forlooking up a value depending on the input string. The hash function isperfect, which means that the hash table has no collisions, and the hash table lookup needs a single string comparison only.

GNU gperf是完美哈希函数生成器。对于一个给定的字符串集,它会生成哈希函数和哈希表。以c或cpp代码的形式,通过输入的字符串来实现查询操作。产生的哈希函数是完美的,意味着哈希表没有冲突,并且查询操作仅仅需要一次比较操作。


2.下载安装(windows版)

下载地址:点击此处

下载后直接双击安装

设置环境变量 把安装的bin文件目录设置到path中



3.使用方法

  gperf可以根据输入文件中的字符串集合生成hash函数和哈希表(生成完美哈希),一般根据字符串计算出无冲突hash值,然后进行字符串比较

 1.配置输入文件

输入文件格式实例c.gperf

ifdointforcasecharautogotoelselongvoidenumfloatshortunionbreakwhileconstdoublestaticexternstructreturnsizeofswitchsignedtypedefdefaultunsignedcontinueregistervolatile

jscript.h:(需要的头文件)

 enum {    TK_ABSTRACT = 255,    TK_BOOLEAN,    TK_BREAK,    TK_BYTE,    TK_CASE,    TK_CATCH,    TK_CHAR,    TK_CLASS,    TK_CONST,    TK_CONTINUE,    TK_DEFAULT,    TK_DO,    TK_DOUBLE,    TK_ELSE,    TK_EXTENDS,    TK_FALSE,    TK_FINAL,    TK_FINALLY,    TK_FLOAT,    TK_FOR,    TK_FUNCTION,    TK_GOTO,    TK_IF,    TK_IMPLEMENTS,    TK_IMPORT,    TK_IN,    TK_INSTANCEOF,    TK_INT,    TK_INTERFACE,    TK_LONG,    TK_NATIVE,    TK_NEW,    TK_NULL,    TK_PACKAGE,    TK_PRIVATE,    TK_PROTECTED,    TK_PUBLIC,    TK_RETURN,    TK_SHORT,    TK_STATIC,    TK_SUPER,    TK_SWITCH,    TK_SYNCHRONIZED,    TK_THIS,    TK_THROW,    TK_THROWS,    TK_TRANSIENT,    TK_TRUE,    TK_TRY,    TK_VAR,    TK_VOID,    TK_WHILE,    TK_WITH,};



jscript.gperf文件实例:

%{/* Command-line: gperf -k"1,2,$" -t -K "name" -H "js_kw_hash" -N "js_kw_lookup" jscript.gperf *//* -g -o -j1 -t -p */#include <stdio.h>#include "jscript.h"%}struct js_keyword {  char * name;  int token;};%%# Javascript reserved words, see "keywords.html"abstract, TK_ABSTRACTboolean, TK_BOOLEANbreak, TK_BREAKbyte, TK_BYTEcase, TK_CASEcatch, TK_CATCHchar, TK_CHARclass, TK_CLASSconst, TK_CONSTcontinue, TK_CONTINUEdefault, TK_DEFAULTdo, TK_DOdouble, TK_DOUBLEelse, TK_ELSEextends, TK_EXTENDSfalse, TK_FALSEfinal, TK_FINALfinally, TK_FINALLYfloat, TK_FLOATfor, TK_FORfunction, TK_FUNCTIONgoto, TK_GOTOif, TK_IFimplements, TK_IMPLEMENTSimport, TK_IMPORTin, TK_INinstanceof, TK_INSTANCEOFint, TK_INTinterface, TK_INTERFACElong, TK_LONGnative, TK_NATIVEnew, TK_NEWnull, TK_NULLpackage, TK_PACKAGEprivate, TK_PRIVATEprotected, TK_PROTECTEDpublic, TK_PUBLICreturn, TK_RETURNshort, TK_SHORTstatic, TK_STATICsuper, TK_SUPERswitch, TK_SWITCHsynchronized, TK_SYNCHRONIZEDthis, TK_THISthrow, TK_THROWthrows, TK_THROWStransient, TK_TRANSIENTtrue, TK_TRUEtry, TK_TRYvar, TK_VARvoid, TK_VOIDwhile, TK_WHILEwith, TK_WITH%%int main(void)  //要写的程序也可以写到文件中,从而后来生成真正的程序{    char *js_word[] = {"protected", "throws", "with", 0};    char *no_js_word[] = {"protectef", "throwd", "witp", 0};    char ** cp;    cp = &js_word[0];    while (*cp != NULL)    {        struct js_keyword* p = js_kw_lookup(*cp, strlen(*cp));        printf("%s is %s javascript expression!\n", *cp,p? "in":"not in");        printf("%s is %s javascript expression!\n", *cp,p ? "in":"not in");        printf("%s->%d\n",p->name,p->token);        cp++;    }    cp = &no_js_word[0];    while (*cp != NULL)    {        printf("%s is %s javascript expression!\n", *cp, js_kw_lookup(*cp, strlen(*cp))? "in":"not in");        cp++;    }    return 0;}

2.用配置文件生成程序源文件(注意,gperf哈希函数和哈希表都是硬编码到此生成文件中)

D:\study\gperf\example>gperf -k"1,2,$" -t -K "name" -H "js_kw_hash" -N "js_kw_lookup" jscript.gperf  > jscript.c

切换到当前目录,通过设置的jscript.gperf文件生成jscript.c源文件

/* C code produced by gperf version 3.0.1 *//* Command-line: gperf -k'1,2,$' -t -K name -H js_kw_hash -N js_kw_lookup jscript.gperf  */#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \      && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \      && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \      && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \      && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \      && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \      && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \      && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \      && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \      && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \      && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \      && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \      && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \      && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \      && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \      && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \      && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \      && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \      && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \      && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \      && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \      && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \      && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))/* The character set is not based on ISO-646.  */error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."#endif#line 1 "jscript.gperf"/* Command-line: gperf -k"1,2,$" -t -K "name" -H "js_kw_hash" -N "js_kw_lookup" jscript.gperf *//* -g -o -j1 -t -p */#include <stdio.h>#include "jscript.h"#line 7 "jscript.gperf"struct js_keyword {  char * name;  int token;};#define TOTAL_KEYWORDS 53  //统计记录总数#define MIN_WORD_LENGTH 2  //字符串最小长度#define MAX_WORD_LENGTH 12 //字符串最大长度#define MIN_HASH_VALUE 2   //最小哈希值,对应于wordlist的索引值#define MAX_HASH_VALUE 79  //最大哈希值,对应于wordlist的索引值/* maximum key range = 78, duplicates = 0 */#ifdef __GNUC____inline#else#ifdef __cplusplusinline#endif#endifstatic unsigned intjs_kw_hash (str, len)     register const char *str;     register unsigned int len;{  static unsigned char asso_values[] =       /*定义包含255个字符的数组,用MAX_HASH_VALUE+1初始化*/    {      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,      80, 80, 80, 80, 80, 80, 80, 25, 35, 25,      20,  0, 10, 50,  0,  0, 80, 15, 15, 45,       0, 10,  5, 80, 15,  5,  5, 40,  5,  0,       0, 30, 80, 80, 80, 80, 80, 80, 80, 80,      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,      80, 80, 80, 80, 80, 80    };  return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]] + asso_values[(unsigned char)str[len - 1]];  //最重要的hash函数,计算wordlist索引}#ifdef __GNUC____inline#endifstruct js_keyword *js_kw_lookup (str, len)     register const char *str;     register unsigned int len;{  static struct js_keyword wordlist[] =     //定义hash表    {      {""}, {""},#line 39 "jscript.gperf"      {"in", TK_IN},#line 45 "jscript.gperf"      {"new", TK_NEW},#line 66 "jscript.gperf"      {"with", TK_WITH},#line 65 "jscript.gperf"      {"while", TK_WHILE},      {""}, {""},#line 41 "jscript.gperf"      {"int", TK_INT},#line 42 "jscript.gperf"      {"interface", TK_INTERFACE},#line 58 "jscript.gperf"      {"throw", TK_THROW},#line 55 "jscript.gperf"      {"switch", TK_SWITCH},#line 28 "jscript.gperf"      {"extends", TK_EXTENDS},      {""},#line 57 "jscript.gperf"      {"this", TK_THIS},#line 52 "jscript.gperf"      {"short", TK_SHORT},#line 59 "jscript.gperf"      {"throws", TK_THROWS},      {""}, {""},#line 27 "jscript.gperf"      {"else", TK_ELSE},#line 40 "jscript.gperf"      {"instanceof", TK_INSTANCEOF},#line 51 "jscript.gperf"      {"return", TK_RETURN},#line 36 "jscript.gperf"      {"if", TK_IF},      {""},#line 61 "jscript.gperf"      {"true", TK_TRUE},      {""}, {""},#line 48 "jscript.gperf"      {"private", TK_PRIVATE},      {""}, {""},#line 30 "jscript.gperf"      {"final", TK_FINAL},#line 44 "jscript.gperf"      {"native", TK_NATIVE},#line 24 "jscript.gperf"      {"default", TK_DEFAULT},      {""},#line 60 "jscript.gperf"      {"transient", TK_TRANSIENT},#line 32 "jscript.gperf"      {"float", TK_FLOAT},#line 26 "jscript.gperf"      {"double", TK_DOUBLE},#line 47 "jscript.gperf"      {"package", TK_PACKAGE},#line 33 "jscript.gperf"      {"for", TK_FOR},#line 64 "jscript.gperf"      {"void", TK_VOID},#line 29 "jscript.gperf"      {"false", TK_FALSE},#line 53 "jscript.gperf"      {"static", TK_STATIC},#line 25 "jscript.gperf"      {"do", TK_DO},#line 23 "jscript.gperf"      {"continue", TK_CONTINUE},#line 20 "jscript.gperf"      {"char", TK_CHAR},#line 22 "jscript.gperf"      {"const", TK_CONST},      {""},#line 31 "jscript.gperf"      {"finally", TK_FINALLY},#line 63 "jscript.gperf"      {"var", TK_VAR},#line 49 "jscript.gperf"      {"protected", TK_PROTECTED},#line 21 "jscript.gperf"      {"class", TK_CLASS},      {""},#line 15 "jscript.gperf"      {"boolean", TK_BOOLEAN},#line 62 "jscript.gperf"      {"try", TK_TRY},#line 18 "jscript.gperf"      {"case", TK_CASE},#line 19 "jscript.gperf"      {"catch", TK_CATCH},#line 38 "jscript.gperf"      {"import", TK_IMPORT},      {""},#line 34 "jscript.gperf"      {"function", TK_FUNCTION},#line 46 "jscript.gperf"      {"null", TK_NULL},#line 37 "jscript.gperf"      {"implements", TK_IMPLEMENTS},      {""}, {""}, {""}, {""},#line 54 "jscript.gperf"      {"super", TK_SUPER},      {""},#line 56 "jscript.gperf"      {"synchronized", TK_SYNCHRONIZED},      {""},#line 17 "jscript.gperf"      {"byte", TK_BYTE},#line 16 "jscript.gperf"      {"break", TK_BREAK},      {""}, {""},#line 14 "jscript.gperf"      {"abstract", TK_ABSTRACT},#line 35 "jscript.gperf"      {"goto", TK_GOTO},      {""},#line 50 "jscript.gperf"      {"public", TK_PUBLIC},      {""}, {""},#line 43 "jscript.gperf"      {"long", TK_LONG}    };  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)    {      register int key = js_kw_hash (str, len);      if (key <= MAX_HASH_VALUE && key >= 0)        {          register const char *s = wordlist[key].name;          if (*str == *s && !strcmp (str + 1, s + 1))            return &wordlist[key];        }    }  return 0;}#line 67 "jscript.gperf"int main(void){    char *js_word[] = {"protected", "throws", "with", 0};    char *no_js_word[] = {"protectef", "throwd", "witp", 0};    char ** cp;    cp = &js_word[0];   while (*cp != NULL)    {        struct js_keyword* p = js_kw_lookup(*cp, strlen(*cp));        printf("%s is %s javascript expression!\n", *cp,p? "in":"not in");        printf("%s is %s javascript expression!\n", *cp,p ? "in":"not in");        printf("%s->%d\n",p->name,p->token);        cp++;    }    cp = &no_js_word[0];    while (*cp != NULL)    {        printf("%s is %s javascript expression!\n", *cp, js_kw_lookup(*cp, strlen(*cp))? "in":"not in");        cp++;    } return 0;}

我们主要关注两个函数:

static unsigned int js_kw_hash (str, len):根据传入的字符串生成哈希值

struct js_keyword * js_kw_lookup (str, len):根据传入的字符串进行查找操作


编译



运行:




详解参见:https://www.ibm.com/developerworks/cn/linux/l-gperf.html