Squid--hash代码分析

来源：互联网发布：9块9淘宝包邮编辑：程序博客网时间：2024/05/17 09:11

#ifndef SQUID_HASH_H#define SQUID_HASH_H//几个函数和变量的别名typedef void HASHFREE(void *);typedef int HASHCMP(const void *, const void *);typedef unsigned int HASHHASH(const void *, unsigned int);typedef struct _hash_link hash_link;typedef struct _hash_table hash_table;//每个hash节点的数据结构struct _hash_link {    void *key;    hash_link *next;};//hash表的数据结构struct _hash_table {    hash_link **buckets;    //存储hash节点（hash_link）地址的桶链表    HASHCMP *cmp;           //hash比较函数    HASHHASH *hash;         //获取hash值函数    unsigned int size;      //buckets桶链表的大小    unsigned int current_slot;  //指向当前的桶    hash_link *next;        //指向下一个桶（相对于current_slot的下一个桶）    int count;      //hash_table中已经存储的hash节点（hash_link）的数目};SQUIDCEXTERN hash_table *hash_create(HASHCMP *, int, HASHHASH *);SQUIDCEXTERN void hash_join(hash_table *, hash_link *);SQUIDCEXTERN void hash_remove_link(hash_table *, hash_link *);SQUIDCEXTERN int hashPrime(int n);SQUIDCEXTERN hash_link *hash_lookup(hash_table *, const void *);SQUIDCEXTERN void hash_first(hash_table *);SQUIDCEXTERN hash_link *hash_next(hash_table *);SQUIDCEXTERN void hash_last(hash_table *);SQUIDCEXTERN hash_link *hash_get_bucket(hash_table *, unsigned int);SQUIDCEXTERN void hashFreeMemory(hash_table *);SQUIDCEXTERN void hashFreeItems(hash_table *, HASHFREE *);SQUIDCEXTERN HASHHASH hash_string;SQUIDCEXTERN HASHHASH hash4;SQUIDCEXTERN const char *hashKeyStr(hash_link *);/*  squid建议的hansh素数 *  Here are some good prime number choices.  It's important not to *  choose a prime number that is too close to exact powers of 2. * *  HASH_SIZE 103               // prime number < 128 *  HASH_SIZE 229               // prime number < 256 *  HASH_SIZE 467               // prime number < 512 *  HASH_SIZE 977               // prime number < 1024 *  HASH_SIZE 1979              // prime number < 2048 *  HASH_SIZE 4019              // prime number < 4096 *  HASH_SIZE 6037              // prime number < 6144 *  HASH_SIZE 7951              // prime number < 8192 *  HASH_SIZE 12149             // prime number < 12288 *  HASH_SIZE 16231             // prime number < 16384 *  HASH_SIZE 33493             // prime number < 32768 *  HASH_SIZE 65357             // prime number < 65536 *///默认的桶链表大小#define  DEFAULT_HASH_SIZE 7951 /* prime number < 8192 */#endif /* SQUID_HASH_H */

/* * DEBUG: section 00    Hash Tables * AUTHOR: Harvest Derived * * SQUID Web Proxy Cache          http://www.squid-cache.org/ * ---------------------------------------------------------- * *  Squid is the result of efforts by numerous individuals from *  the Internet community; see the CONTRIBUTORS file for full *  details.   Many organizations have provided support for Squid's *  development; see the SPONSORS file for full details.  Squid is *  Copyrighted (C) 2001 by the Regents of the University of *  California; see the COPYRIGHT file for full details.  Squid *  incorporates software developed and/or copyrighted by other *  sources; see the CREDITS file for full details. * *  This program is free software; you can redistribute it and/or modify *  it under the terms of the GNU General Public License as published by *  the Free Software Foundation; either version 2 of the License, or *  (at your option) any later version. * *  This program is distributed in the hope that it will be useful, *  but WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *  GNU General Public License for more details. * *  You should have received a copy of the GNU General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. * */#include "squid.h"#include "hash.h"#include "profiler/Profiler.h"#if HAVE_STDIO_H#include <stdio.h>#endif#if HAVE_STDLIB_H#include <stdlib.h>#endif#if HAVE_STRING_H#include <string.h>#endif#if HAVE_UNISTD_H#include <unistd.h>#endif#if HAVE_GNUMALLLOC_H#include <gnumalloc.h>#elif HAVE_MALLOC_H#include <malloc.h>#endif#if HAVE_ASSERT_H#include <assert.h>#endif#if HAVE_MATH_H#include <math.h>#endifstatic void hash_next_bucket(hash_table * hid);/*下面两个函数常用来作为第二个参数建立hash表， 也就是取hash值的操作函数*//*第二个更适合于操作字符串，第一个可以是二进制数*/unsigned inthash_string(const void *data, unsigned int size){    const unsigned char *s = static_cast<const unsigned char *>(data);    unsigned int n = 0;    unsigned int j = 0;    unsigned int i = 0;    while (*s) {        ++j;        n ^= 271 * *s;//^按位异或运算        ++s;    }    i = n ^ (j * 271);    return i % size;}/* the following function(s) were adapted from *    usr/src/lib/libc/db/hash_func.c, 4.4 BSD lite *//* Hash function from Chris Torek. */unsigned inthash4(const void *data, unsigned int size){    const char *key = static_cast<const char *>(data);    size_t loop;    unsigned int h;    size_t len;#define HASH4a   h = (h << 5) - h + *key++;#define HASH4b   h = (h << 5) + h + *key++;#define HASH4 HASH4b    h = 0;    len = strlen(key);    loop = len >> 3;    switch (len & (8 - 1)) {    case 0:        break;    case 7:        HASH4;        /* FALLTHROUGH */    case 6:        HASH4;        /* FALLTHROUGH */    case 5:        HASH4;        /* FALLTHROUGH */    case 4:        HASH4;        /* FALLTHROUGH */    case 3:        HASH4;        /* FALLTHROUGH */    case 2:        HASH4;        /* FALLTHROUGH */    case 1:        HASH4;    }    while (loop) {        --loop;        HASH4;        HASH4;        HASH4;        HASH4;        HASH4;        HASH4;        HASH4;        HASH4;    }    return h % size;}/** *  hash_create - creates a new hash table, uses the cmp_func *  to compare keys.  Returns the identification for the hash table; *  otherwise returns a negative number on error. * 创建hash表，返回hash_table的对象 */hash_table *hash_create(HASHCMP * cmp_func, int hash_sz, HASHHASH * hash_func){    hash_table *hid = (hash_table *)xcalloc(1, sizeof(hash_table));    if (!hash_sz)        hid->size = (unsigned int) DEFAULT_HASH_SIZE;//EFAULT_HASH_SIZE 7951    else        hid->size = (unsigned int) hash_sz;    /* allocate and null the buckets */    hid->buckets = (hash_link **)xcalloc(hid->size, sizeof(hash_link *));    hid->cmp = cmp_func;    hid->hash = hash_func;    hid->next = NULL;    hid->current_slot = 0;    return hid;}/** *  hash_join - joins a hash_link under its key lnk->key *  into the hash table 'hid'. * *  It does not copy any data into the hash table, only links pointers. *将hash节点链接到hash表中对应的桶节点。 */voidhash_join(hash_table * hid, hash_link * lnk){    int i;    i = hid->hash(lnk->key, hid->size);    lnk->next = hid->buckets[i];    hid->buckets[i] = lnk;    ++hid->count;}/** *  hash_lookup - locates the item under the key 'k' in the hash table *  'hid'.  Returns a pointer to the hash bucket on success; otherwise *  returns NULL. *在hash_table中定位哈希值为k的hash_link节点 */hash_link *hash_lookup(hash_table * hid, const void *k){    int b;    PROF_start(hash_lookup);    assert(k != NULL);    b = hid->hash(k, hid->size);    for (hash_link *walker = hid->buckets[b]; walker != NULL; walker = walker->next) {        if ((hid->cmp) (k, walker->key) == 0) {            PROF_stop(hash_lookup);            return (walker);        }        assert(walker != walker->next);    }    PROF_stop(hash_lookup);    return NULL;}//指向下一个桶static voidhash_next_bucket(hash_table * hid){    while (hid->next == NULL && ++hid->current_slot < hid->size)        hid->next = hid->buckets[hid->current_slot];}/** *  hash_first - initializes the hash table for the hash_next() *  function. *使hid->current_slot指向第一个桶，hid->next指向下一个桶 */voidhash_first(hash_table * hid){    assert(NULL == hid->next);    hid->current_slot = 0;    hid->next = hid->buckets[hid->current_slot];    if (NULL == hid->next)//如果hash_table还没有使用        hash_next_bucket(hid);}/** *  hash_next - returns the next item in the hash table 'hid'. *  Otherwise, returns NULL on error or end of list. * *  MUST call hash_first() before hash_next(). *获取下一个hash_link节点 */hash_link *hash_next(hash_table * hid){    hash_link *p = hid->next;    if (NULL == p)        return NULL;    hid->next = p->next;    if (NULL == hid->next)        hash_next_bucket(hid);    return p;}/** *  hash_last - resets hash traversal state to NULL * */voidhash_last(hash_table * hid){    assert(hid != NULL);    hid->next = NULL;    hid->current_slot = 0;}/** *  hash_remove_link - deletes the given hash_link node from the *  hash table 'hid'.  Does not free the item, only removes it *  from the list. * *  An assertion is triggered if the hash_link is not found in the *  list. *将hash_link为hl的节点从桶链表中移除 */voidhash_remove_link(hash_table * hid, hash_link * hl){    assert(hl != NULL);    int i = hid->hash(hl->key, hid->size);    for (hash_link **P = &hid->buckets[i]; *P; P = &(*P)->next) {        if (*P != hl)            continue;        *P = hl->next;        if (hid->next == hl) {            hid->next = hl->next;            if (NULL == hid->next)                hash_next_bucket(hid);        }        --hid->count;        return;    }    assert(0);}/** *  hash_get_bucket - returns the head item of the bucket *  in the hash table 'hid'. Otherwise, returns NULL on error. *获取hid->buckets[bucket] */hash_link *hash_get_bucket(hash_table * hid, unsigned int bucket){    if (bucket >= hid->size)        return NULL;    return (hid->buckets[bucket]);}//将所有hash_link节点集中到一起，集中释放存储空间voidhashFreeItems(hash_table * hid, HASHFREE * free_func){    hash_link *l;    int i = 0;    hash_link **list = (hash_link **)xcalloc(hid->count, sizeof(hash_link *));    hash_first(hid);    while ((l = hash_next(hid)) && i < hid->count) {        *(list + i) = l;        ++i;    }    for (int j = 0; j < i; ++j)        free_func(*(list + j));    xfree(list);}//释放hash_table空间voidhashFreeMemory(hash_table * hid){    if (hid == NULL)        return;    if (hid->buckets)        xfree(hid->buckets);    xfree(hid);}static int hash_primes[] = {    103,    229,    467,    977,    1979,    4019,    6037,    7951,    12149,    16231,    33493,    65357};inthashPrime(int n){    int I = sizeof(hash_primes) / sizeof(int);    int best_prime = hash_primes[0];    double min = fabs(log((double) n) - log((double) hash_primes[0]));    double d;    for (int i = 0; i < I; ++i) {        d = fabs(log((double) n) - log((double) hash_primes[i]));        if (d > min)            continue;        min = d;        best_prime = hash_primes[i];    }    return best_prime;}/** * return the key of a hash_link as a const string *获取hl的哈希值 */const char *hashKeyStr(hash_link * hl){    return (const char *) hl->key;}#if USE_HASH_DRIVER/** *  hash-driver - Run with a big file as stdin to insert each line into the *  hash table, then prints the whole hash table, then deletes a random item, *  and prints the table again... */intmain(void){    hash_table *hid;    LOCAL_ARRAY(char, buf, BUFSIZ);    LOCAL_ARRAY(char, todelete, BUFSIZ);    hash_link *walker = NULL;    todelete[0] = '\0';    printf("init\n");    printf("creating hash table\n");    if ((hid = hash_create((HASHCMP *) strcmp, 229, hash4)) < 0) {        printf("hash_create error.\n");        exit(1);    }    printf("done creating hash table: %d\n", hid);    while (fgets(buf, BUFSIZ, stdin)) {        buf[strlen(buf) - 1] = '\0';        printf("Inserting '%s' for item %p to hash table: %d\n",               buf, buf, hid);        hash_insert(hid, xstrdup(buf), (void *) 0x12345678);        if (random() % 17 == 0)            strcpy(todelete, buf);    }    printf("walking hash table...\n");    for (int i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) {        printf("item %5d: key: '%s' item: %p\n", i++, walker->key,               walker->item);    }    printf("done walking hash table...\n");    if (todelete[0]) {        printf("deleting %s from %d\n", todelete, hid);        if (hash_delete(hid, todelete))            printf("hash_delete error\n");    }    printf("walking hash table...\n");    for (int i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) {        printf("item %5d: key: '%s' item: %p\n", i++, walker->key,               walker->item);    }    printf("done walking hash table...\n");    printf("driver finished.\n");    exit(0);}#endif

下面具体分析：

hash表整体结构：

1、hash_create

/** *  hash_create - creates a new hash table, uses the cmp_func *  to compare keys.  Returns the identification for the hash table; *  otherwise returns a negative number on error. * 创建hash表，返回hash_table的对象 */hash_table *hash_create(HASHCMP * cmp_func, int hash_sz, HASHHASH * hash_func){    hash_table *hid = (hash_table *)xcalloc(1, sizeof(hash_table));    if (!hash_sz)        hid->size = (unsigned int) DEFAULT_HASH_SIZE;//EFAULT_HASH_SIZE 7951    else        hid->size = (unsigned int) hash_sz;    /* allocate and null the buckets */    hid->buckets = (hash_link **)xcalloc(hid->size, sizeof(hash_link *));    hid->cmp = cmp_func;    hid->hash = hash_func;    hid->next = NULL;    hid->current_slot = 0;    return hid;}

创建hash表。需要三个参数：cmp_func、hash_sz、hash_func，其中hash_sz用来表示创建的hash表的桶链表的大小，如果为0，则使用默认的大小DEFAULT_HASH_SIZE.

桶链表储存的数据类型为：hash_link * ，即它只存储hash_link节点的地址。初始化后的桶链表没有存储任何地址，全部为0。

current_slot = 0，当前指向的桶为buckets[0];指向下一个桶的指针hid->next为NULL，表示hash表还没有被使用。

2、hash_join

/** *  hash_join - joins a hash_link under its key lnk->key *  into the hash table 'hid'. * *  It does not copy any data into the hash table, only links pointers. *将hash节点链接到hash表中对应的桶节点。 */voidhash_join(hash_table * hid, hash_link * lnk){    int i;    i = hid->hash(lnk->key, hid->size);    lnk->next = hid->buckets[i];    hid->buckets[i] = lnk;    ++hid->count;}

首先利用函数hash找到节点link应该插入到的桶号i，将link的next指针指向桶号i存储的链表的首节点，再将link节点的地址储存到桶号i内，link节点成为桶号i储存的链表的首节点。

3、hash_lookup

/** *  hash_lookup - locates the item under the key 'k' in the hash table *  'hid'.  Returns a pointer to the hash bucket on success; otherwise *  returns NULL. *在hash_table中定位哈希值为k的hash_link节点 */hash_link *hash_lookup(hash_table * hid, const void *k){    int b;    PROF_start(hash_lookup);    assert(k != NULL);    b = hid->hash(k, hid->size);    for (hash_link *walker = hid->buckets[b]; walker != NULL; walker = walker->next) {        if ((hid->cmp) (k, walker->key) == 0) {            PROF_stop(hash_lookup);            return (walker);        }        assert(walker != walker->next);    }    PROF_stop(hash_lookup);    return NULL;}

首先根据哈希值k找到对应的桶链表节点b，walker指向b所在链表的首节点。

4、hash_remove_link

/** *  hash_remove_link - deletes the given hash_link node from the *  hash table 'hid'.  Does not free the item, only removes it *  from the list. * *  An assertion is triggered if the hash_link is not found in the *  list. *将hash_link为hl的节点从桶链表中移除 */voidhash_remove_link(hash_table * hid, hash_link * hl){    assert(hl != NULL);    int i = hid->hash(hl->key, hid->size);    for (hash_link **P = &hid->buckets[i]; *P; P = &(*P)->next) {        if (*P != hl)            continue;        *P = hl->next;        if (hid->next == hl) {            hid->next = hl->next;            if (NULL == hid->next)                hash_next_bucket(hid);        }        --hid->count;        return;    }    assert(0);}

移除分两种情况：

1、hl为首节点，将hid->next = hl->next

2、hl为中间节点，*p = hl->next，p指向hl的下一个节点

5、hashFreeItems

//将所有hash_link节点集中到一起，集中释放存储空间voidhashFreeItems(hash_table * hid, HASHFREE * free_func){    hash_link *l;    int i = 0;    hash_link **list = (hash_link **)xcalloc(hid->count, sizeof(hash_link *));    hash_first(hid);    while ((l = hash_next(hid)) && i < hid->count) {        *(list + i) = l;        ++i;    }    for (int j = 0; j < i; ++j)        free_func(*(list + j));    xfree(list);}

根据hid->count大小分配存储空间来存储hash_link节点的地址。调用hash_first将hid->current_slot指向第一个桶，同时将hid->next指向第二个桶。调用hash_next取得hash链表中的每一个hash_link节点，并将地址赋值到list中，最后对list中的hash_link地址统一销毁。

本文为Eliot原创，转载请注明出处：http://blog.csdn.net/xyw_blog/article/details/9791221