openssl学习之lhash

来源：互联网发布：js防水涂料报价多少钱编辑：程序博客网时间：2024/05/18 02:20

学习openssl中的lhash，源码在crypto\lhash目录下。Lhash在openssl中用到的地方很多，如文本数据库txt_db中，具体的以后再分析。首先看看lhash的实现结构，lhash的实现很多地方和stack的实现相同。参考了赵春平老师的Openssl编程和mm350670610的这篇博文。

//哈希节点的定义，是一个单链表typedef struct lhash_node_st{void *data;//存放数据的地址struct lhash_node_st *next;//指向下一个地址#ifndef OPENSSL_NO_HASH_COMPunsigned long hash;//哈希值#endif} LHASH_NODE;//哈希表的定义typedef struct lhash_st{LHASH_NODE **b;//为一个指针数据，指向一个单链表LHASH_COMP_FN_TYPE comp;//比较函数指针LHASH_HASH_FN_TYPE hash;//哈希函数指针unsigned int num_nodes;//链表个数unsigned int num_alloc_nodes;//b分配空间的大小，已申请的个数，大于等于num_nodesunsigned int p;unsigned int pmax;//p和pmax是为了优化性能能设置的参数unsigned long up_load; /* load times 256 */unsigned long down_load; /* load times 256 */unsigned long num_items;//表中所有数据的个数//下面是一些计数器unsigned long num_expands;unsigned long num_expand_reallocs;unsigned long num_contracts;unsigned long num_contract_reallocs;unsigned long num_hash_calls;unsigned long num_comp_calls;unsigned long num_insert;unsigned long num_replace;unsigned long num_delete;unsigned long num_no_delete;unsigned long num_retrieve;unsigned long num_retrieve_miss;unsigned long num_hash_comps;int error;} LHASH;

由上面的两个结构可以知道openssl中实现的哈希表，在处理冲突中使用的拉链法。拉链是解决冲突的一种行之有效的方法。解决的方法是为每个哈希地址建立一个单链表。表中存储所有具有该哈希值相同的值。该结构的基本结构如下：

先讲一下p和pmax的用处，当哈希表已经很满的时候，发生冲突的概率更大，访问数据时要把整个链表遍历一遍，这样性能会下降，openssl中的lhash采用的是，是将hash表的数据增长一些，把那些链表的元素忘新增的部分移动一些，这样就能才产生很好的效果。如何判断哈希表是否很慢呢。通过使用装填因子来表示：

装填因子（a）=num_items/num_nodes;

在添加数据时会进行判断（在lh_insert函数中）：

//如果装填因子超过上限，则拓展哈希表if (lh->up_load <= (lh->num_items*LH_LOAD_MULT/lh->num_nodes))expand(lh);在删除数据时也同样进行判断（在lh_delete函数中）：
if((lh->num_nodes > MIN_NODES) &&
              (lh->down_load>= (lh->num_items*LH_LOAD_MULT/lh->num_nodes)))
              contract(lh);//contract和expand函数功能相反。

其中LH_LOAD_MULT=256，up_load=2*256，down_load=256

下面具体分析一下expand函数：

static void expand(LHASH *lh){LHASH_NODE **n,**n1,**n2,*np;unsigned int p,i,j;unsigned long hash,nni;    //将计算都加1lh->num_nodes++;lh->num_expands++;p=(int)lh->p++;n1= &(lh->b[p]);n2= &(lh->b[p+(int)lh->pmax]);*n2=NULL;        /* 27/07/92 - eay - undefined pointer bug */nni=lh->num_alloc_nodes;for (np= *n1; np != NULL; ){#ifndef OPENSSL_NO_HASH_COMPhash=np->hash;#elsehash=lh->hash(np->data);lh->num_hash_calls++;#endifif ((hash%nni) != p)//如果!-p则将其移动pmax的位置{ /* move it */*n1= (*n1)->next;①np->next= *n2;②*n2=np;③}else//将指针后移一个位置n1= &((*n1)->next);np= *n1;}if ((lh->p) >= lh->pmax)//当存储空间不够，扩大存储空间{……………………}}

通常申请的空间是使用空间的2倍，即num_alloc_nodes=2*num_nodes;我们可以通过查看lh_new函数中的初始化部分。其中宏定义MIN_NODES=16

ret->num_nodes=MIN_NODES/2;

ret->num_alloc_nodes=MIN_NODES;

ret->p=0;

ret->pmax=MIN_NODES/2;

开始，链表使用b[0]~b[7],当需要拓展的时候，使用到b[8~15]已申请的空间。当装填因子没达到上限时，所在数组下标的位置由nn=hash%lh->pmax;(在getrn函数中)得到，而装填因子达到上限时由nni=lh->num_alloc_nodes;(expand函数中)决定下标的位置，此时需要将下标为p的指向链表的部分元素移动到pmax下标的链表。首先遍历p下标指向的链表，通过执行nni=lh->num_alloc_nodes，判断值是否为p，若为p则跳过，否则将其移动到pmax下标的链表，直接插入到pmax下标指向链表的表头。具体移动如下图所示：