GNU工具-gperf详解(完美哈希函数生成器)
来源:互联网 发布:网络嗅探器5.5 编辑:程序博客网 时间:2024/06/07 14:12
1.gperf是干什么的?
GNU gperf is a perfect hash function generator. For a given list of strings,it produces a hash function and hash table, in form of C or C++ code, forlooking up a value depending on the input string. The hash function isperfect, which means that the hash table has no collisions, and the hash table lookup needs a single string comparison only.
GNU gperf是完美哈希函数生成器。对于一个给定的字符串集,它会生成哈希函数和哈希表。以c或cpp代码的形式,通过输入的字符串来实现查询操作。产生的哈希函数是完美的,意味着哈希表没有冲突,并且查询操作仅仅需要一次比较操作。
2.下载安装(windows版)
下载地址:点击此处
下载后直接双击安装
设置环境变量 把安装的bin文件目录设置到path中
3.使用方法
gperf可以根据输入文件中的字符串集合生成hash函数和哈希表(生成完美哈希),一般根据字符串计算出无冲突hash值,然后进行字符串比较
1.配置输入文件
输入文件格式实例c.gperf:
ifdointforcasecharautogotoelselongvoidenumfloatshortunionbreakwhileconstdoublestaticexternstructreturnsizeofswitchsignedtypedefdefaultunsignedcontinueregistervolatile
jscript.h:(需要的头文件)
enum { TK_ABSTRACT = 255, TK_BOOLEAN, TK_BREAK, TK_BYTE, TK_CASE, TK_CATCH, TK_CHAR, TK_CLASS, TK_CONST, TK_CONTINUE, TK_DEFAULT, TK_DO, TK_DOUBLE, TK_ELSE, TK_EXTENDS, TK_FALSE, TK_FINAL, TK_FINALLY, TK_FLOAT, TK_FOR, TK_FUNCTION, TK_GOTO, TK_IF, TK_IMPLEMENTS, TK_IMPORT, TK_IN, TK_INSTANCEOF, TK_INT, TK_INTERFACE, TK_LONG, TK_NATIVE, TK_NEW, TK_NULL, TK_PACKAGE, TK_PRIVATE, TK_PROTECTED, TK_PUBLIC, TK_RETURN, TK_SHORT, TK_STATIC, TK_SUPER, TK_SWITCH, TK_SYNCHRONIZED, TK_THIS, TK_THROW, TK_THROWS, TK_TRANSIENT, TK_TRUE, TK_TRY, TK_VAR, TK_VOID, TK_WHILE, TK_WITH,};
jscript.gperf文件实例:
%{/* Command-line: gperf -k"1,2,$" -t -K "name" -H "js_kw_hash" -N "js_kw_lookup" jscript.gperf *//* -g -o -j1 -t -p */#include <stdio.h>#include "jscript.h"%}struct js_keyword { char * name; int token;};%%# Javascript reserved words, see "keywords.html"abstract, TK_ABSTRACTboolean, TK_BOOLEANbreak, TK_BREAKbyte, TK_BYTEcase, TK_CASEcatch, TK_CATCHchar, TK_CHARclass, TK_CLASSconst, TK_CONSTcontinue, TK_CONTINUEdefault, TK_DEFAULTdo, TK_DOdouble, TK_DOUBLEelse, TK_ELSEextends, TK_EXTENDSfalse, TK_FALSEfinal, TK_FINALfinally, TK_FINALLYfloat, TK_FLOATfor, TK_FORfunction, TK_FUNCTIONgoto, TK_GOTOif, TK_IFimplements, TK_IMPLEMENTSimport, TK_IMPORTin, TK_INinstanceof, TK_INSTANCEOFint, TK_INTinterface, TK_INTERFACElong, TK_LONGnative, TK_NATIVEnew, TK_NEWnull, TK_NULLpackage, TK_PACKAGEprivate, TK_PRIVATEprotected, TK_PROTECTEDpublic, TK_PUBLICreturn, TK_RETURNshort, TK_SHORTstatic, TK_STATICsuper, TK_SUPERswitch, TK_SWITCHsynchronized, TK_SYNCHRONIZEDthis, TK_THISthrow, TK_THROWthrows, TK_THROWStransient, TK_TRANSIENTtrue, TK_TRUEtry, TK_TRYvar, TK_VARvoid, TK_VOIDwhile, TK_WHILEwith, TK_WITH%%int main(void) //要写的程序也可以写到文件中,从而后来生成真正的程序{ char *js_word[] = {"protected", "throws", "with", 0}; char *no_js_word[] = {"protectef", "throwd", "witp", 0}; char ** cp; cp = &js_word[0]; while (*cp != NULL) { struct js_keyword* p = js_kw_lookup(*cp, strlen(*cp)); printf("%s is %s javascript expression!\n", *cp,p? "in":"not in"); printf("%s is %s javascript expression!\n", *cp,p ? "in":"not in"); printf("%s->%d\n",p->name,p->token); cp++; } cp = &no_js_word[0]; while (*cp != NULL) { printf("%s is %s javascript expression!\n", *cp, js_kw_lookup(*cp, strlen(*cp))? "in":"not in"); cp++; } return 0;}
2.用配置文件生成程序源文件(注意,gperf哈希函数和哈希表都是硬编码到此生成文件中)
D:\study\gperf\example>gperf -k"1,2,$" -t -K "name" -H "js_kw_hash" -N "js_kw_lookup" jscript.gperf > jscript.c
切换到当前目录,通过设置的jscript.gperf文件生成jscript.c源文件
/* C code produced by gperf version 3.0.1 *//* Command-line: gperf -k'1,2,$' -t -K name -H js_kw_hash -N js_kw_lookup jscript.gperf */#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))/* The character set is not based on ISO-646. */error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."#endif#line 1 "jscript.gperf"/* Command-line: gperf -k"1,2,$" -t -K "name" -H "js_kw_hash" -N "js_kw_lookup" jscript.gperf *//* -g -o -j1 -t -p */#include <stdio.h>#include "jscript.h"#line 7 "jscript.gperf"struct js_keyword { char * name; int token;};#define TOTAL_KEYWORDS 53 //统计记录总数#define MIN_WORD_LENGTH 2 //字符串最小长度#define MAX_WORD_LENGTH 12 //字符串最大长度#define MIN_HASH_VALUE 2 //最小哈希值,对应于wordlist的索引值#define MAX_HASH_VALUE 79 //最大哈希值,对应于wordlist的索引值/* maximum key range = 78, duplicates = 0 */#ifdef __GNUC____inline#else#ifdef __cplusplusinline#endif#endifstatic unsigned intjs_kw_hash (str, len) register const char *str; register unsigned int len;{ static unsigned char asso_values[] = /*定义包含255个字符的数组,用MAX_HASH_VALUE+1初始化*/ { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 25, 35, 25, 20, 0, 10, 50, 0, 0, 80, 15, 15, 45, 0, 10, 5, 80, 15, 5, 5, 40, 5, 0, 0, 30, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80 }; return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]] + asso_values[(unsigned char)str[len - 1]]; //最重要的hash函数,计算wordlist索引}#ifdef __GNUC____inline#endifstruct js_keyword *js_kw_lookup (str, len) register const char *str; register unsigned int len;{ static struct js_keyword wordlist[] = //定义hash表 { {""}, {""},#line 39 "jscript.gperf" {"in", TK_IN},#line 45 "jscript.gperf" {"new", TK_NEW},#line 66 "jscript.gperf" {"with", TK_WITH},#line 65 "jscript.gperf" {"while", TK_WHILE}, {""}, {""},#line 41 "jscript.gperf" {"int", TK_INT},#line 42 "jscript.gperf" {"interface", TK_INTERFACE},#line 58 "jscript.gperf" {"throw", TK_THROW},#line 55 "jscript.gperf" {"switch", TK_SWITCH},#line 28 "jscript.gperf" {"extends", TK_EXTENDS}, {""},#line 57 "jscript.gperf" {"this", TK_THIS},#line 52 "jscript.gperf" {"short", TK_SHORT},#line 59 "jscript.gperf" {"throws", TK_THROWS}, {""}, {""},#line 27 "jscript.gperf" {"else", TK_ELSE},#line 40 "jscript.gperf" {"instanceof", TK_INSTANCEOF},#line 51 "jscript.gperf" {"return", TK_RETURN},#line 36 "jscript.gperf" {"if", TK_IF}, {""},#line 61 "jscript.gperf" {"true", TK_TRUE}, {""}, {""},#line 48 "jscript.gperf" {"private", TK_PRIVATE}, {""}, {""},#line 30 "jscript.gperf" {"final", TK_FINAL},#line 44 "jscript.gperf" {"native", TK_NATIVE},#line 24 "jscript.gperf" {"default", TK_DEFAULT}, {""},#line 60 "jscript.gperf" {"transient", TK_TRANSIENT},#line 32 "jscript.gperf" {"float", TK_FLOAT},#line 26 "jscript.gperf" {"double", TK_DOUBLE},#line 47 "jscript.gperf" {"package", TK_PACKAGE},#line 33 "jscript.gperf" {"for", TK_FOR},#line 64 "jscript.gperf" {"void", TK_VOID},#line 29 "jscript.gperf" {"false", TK_FALSE},#line 53 "jscript.gperf" {"static", TK_STATIC},#line 25 "jscript.gperf" {"do", TK_DO},#line 23 "jscript.gperf" {"continue", TK_CONTINUE},#line 20 "jscript.gperf" {"char", TK_CHAR},#line 22 "jscript.gperf" {"const", TK_CONST}, {""},#line 31 "jscript.gperf" {"finally", TK_FINALLY},#line 63 "jscript.gperf" {"var", TK_VAR},#line 49 "jscript.gperf" {"protected", TK_PROTECTED},#line 21 "jscript.gperf" {"class", TK_CLASS}, {""},#line 15 "jscript.gperf" {"boolean", TK_BOOLEAN},#line 62 "jscript.gperf" {"try", TK_TRY},#line 18 "jscript.gperf" {"case", TK_CASE},#line 19 "jscript.gperf" {"catch", TK_CATCH},#line 38 "jscript.gperf" {"import", TK_IMPORT}, {""},#line 34 "jscript.gperf" {"function", TK_FUNCTION},#line 46 "jscript.gperf" {"null", TK_NULL},#line 37 "jscript.gperf" {"implements", TK_IMPLEMENTS}, {""}, {""}, {""}, {""},#line 54 "jscript.gperf" {"super", TK_SUPER}, {""},#line 56 "jscript.gperf" {"synchronized", TK_SYNCHRONIZED}, {""},#line 17 "jscript.gperf" {"byte", TK_BYTE},#line 16 "jscript.gperf" {"break", TK_BREAK}, {""}, {""},#line 14 "jscript.gperf" {"abstract", TK_ABSTRACT},#line 35 "jscript.gperf" {"goto", TK_GOTO}, {""},#line 50 "jscript.gperf" {"public", TK_PUBLIC}, {""}, {""},#line 43 "jscript.gperf" {"long", TK_LONG} }; if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) { register int key = js_kw_hash (str, len); if (key <= MAX_HASH_VALUE && key >= 0) { register const char *s = wordlist[key].name; if (*str == *s && !strcmp (str + 1, s + 1)) return &wordlist[key]; } } return 0;}#line 67 "jscript.gperf"int main(void){ char *js_word[] = {"protected", "throws", "with", 0}; char *no_js_word[] = {"protectef", "throwd", "witp", 0}; char ** cp; cp = &js_word[0]; while (*cp != NULL) { struct js_keyword* p = js_kw_lookup(*cp, strlen(*cp)); printf("%s is %s javascript expression!\n", *cp,p? "in":"not in"); printf("%s is %s javascript expression!\n", *cp,p ? "in":"not in"); printf("%s->%d\n",p->name,p->token); cp++; } cp = &no_js_word[0]; while (*cp != NULL) { printf("%s is %s javascript expression!\n", *cp, js_kw_lookup(*cp, strlen(*cp))? "in":"not in"); cp++; } return 0;}
我们主要关注两个函数:
static unsigned int js_kw_hash (str, len):根据传入的字符串生成哈希值
struct js_keyword * js_kw_lookup (str, len):根据传入的字符串进行查找操作
运行:
详解参见:https://www.ibm.com/developerworks/cn/linux/l-gperf.html
- GNU工具-gperf详解(完美哈希函数生成器)
- gperf--GNU完美哈希函数生成器用户手册
- gperf--GNU完美哈希函数生成器用户手册(翻译)
- gperf工具的使用
- gperf学习笔记(二)
- GNU工具链(GNU toolchain)
- Python函数式编程指南(四):生成器详解
- 最小完美哈希函数
- 生成器函数(generator)
- 我所理解的生成器(关键词:生成器对象/生成器/生成器表达式/生成器函数/生成器类/generator/yield/__iter__)
- 完美哈希函数(Perfect Hash Function)
- Python生成器(Generator)详解
- Python生成器(Generator)详解
- Python生成器(Generator)详解
- 最小完美哈希函数简介
- GNU开发工具简介(一)
- GNU开发工具简介(三)
- GNU开发工具简介(二)
- Linux 系统延迟和定时机制
- Window7 下编译Opencv 3.3+contrib模块+Cmake+VS2015
- 浅谈sstream头文件
- POJ2761[Feed the dogs ] 不带修改主席树
- Linux操作系统原理与应用(陈莉君)——学习笔记
- GNU工具-gperf详解(完美哈希函数生成器)
- JavaScript运算符(操作符)详解(1)----基本运算符
- Android 四大组件之 Service (上)
- Codeforces 837D Round Subset【思维+Dp+滚动数组】
- JS实现继承的几种方式
- GNU C中嵌入汇编添加一个自定义uboot命令
- opencv常见函数
- Java8 Lambda语法 示例
- serializer嵌套序列化