Squid--hash代码分析
来源:互联网 发布:9块9淘宝包邮 编辑:程序博客网 时间:2024/05/17 09:11
#ifndef SQUID_HASH_H#define SQUID_HASH_H//几个函数和变量的别名typedef void HASHFREE(void *);typedef int HASHCMP(const void *, const void *);typedef unsigned int HASHHASH(const void *, unsigned int);typedef struct _hash_link hash_link;typedef struct _hash_table hash_table;//每个hash节点的数据结构struct _hash_link { void *key; hash_link *next;};//hash表的数据结构struct _hash_table { hash_link **buckets; //存储hash节点(hash_link)地址的桶链表 HASHCMP *cmp; //hash比较函数 HASHHASH *hash; //获取hash值函数 unsigned int size; //buckets桶链表的大小 unsigned int current_slot; //指向当前的桶 hash_link *next; //指向下一个桶(相对于current_slot的下一个桶) int count; //hash_table中已经存储的hash节点(hash_link)的数目};SQUIDCEXTERN hash_table *hash_create(HASHCMP *, int, HASHHASH *);SQUIDCEXTERN void hash_join(hash_table *, hash_link *);SQUIDCEXTERN void hash_remove_link(hash_table *, hash_link *);SQUIDCEXTERN int hashPrime(int n);SQUIDCEXTERN hash_link *hash_lookup(hash_table *, const void *);SQUIDCEXTERN void hash_first(hash_table *);SQUIDCEXTERN hash_link *hash_next(hash_table *);SQUIDCEXTERN void hash_last(hash_table *);SQUIDCEXTERN hash_link *hash_get_bucket(hash_table *, unsigned int);SQUIDCEXTERN void hashFreeMemory(hash_table *);SQUIDCEXTERN void hashFreeItems(hash_table *, HASHFREE *);SQUIDCEXTERN HASHHASH hash_string;SQUIDCEXTERN HASHHASH hash4;SQUIDCEXTERN const char *hashKeyStr(hash_link *);/* squid建议的hansh素数 * Here are some good prime number choices. It's important not to * choose a prime number that is too close to exact powers of 2. * * HASH_SIZE 103 // prime number < 128 * HASH_SIZE 229 // prime number < 256 * HASH_SIZE 467 // prime number < 512 * HASH_SIZE 977 // prime number < 1024 * HASH_SIZE 1979 // prime number < 2048 * HASH_SIZE 4019 // prime number < 4096 * HASH_SIZE 6037 // prime number < 6144 * HASH_SIZE 7951 // prime number < 8192 * HASH_SIZE 12149 // prime number < 12288 * HASH_SIZE 16231 // prime number < 16384 * HASH_SIZE 33493 // prime number < 32768 * HASH_SIZE 65357 // prime number < 65536 *///默认的桶链表大小#define DEFAULT_HASH_SIZE 7951 /* prime number < 8192 */#endif /* SQUID_HASH_H */
/* * DEBUG: section 00 Hash Tables * AUTHOR: Harvest Derived * * SQUID Web Proxy Cache http://www.squid-cache.org/ * ---------------------------------------------------------- * * Squid is the result of efforts by numerous individuals from * the Internet community; see the CONTRIBUTORS file for full * details. Many organizations have provided support for Squid's * development; see the SPONSORS file for full details. Squid is * Copyrighted (C) 2001 by the Regents of the University of * California; see the COPYRIGHT file for full details. Squid * incorporates software developed and/or copyrighted by other * sources; see the CREDITS file for full details. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. * */#include "squid.h"#include "hash.h"#include "profiler/Profiler.h"#if HAVE_STDIO_H#include <stdio.h>#endif#if HAVE_STDLIB_H#include <stdlib.h>#endif#if HAVE_STRING_H#include <string.h>#endif#if HAVE_UNISTD_H#include <unistd.h>#endif#if HAVE_GNUMALLLOC_H#include <gnumalloc.h>#elif HAVE_MALLOC_H#include <malloc.h>#endif#if HAVE_ASSERT_H#include <assert.h>#endif#if HAVE_MATH_H#include <math.h>#endifstatic void hash_next_bucket(hash_table * hid);/*下面两个函数常用来作为第二个参数建立hash表, 也就是取hash值的操作函数*//*第二个更适合于操作字符串,第一个可以是二进制数*/unsigned inthash_string(const void *data, unsigned int size){ const unsigned char *s = static_cast<const unsigned char *>(data); unsigned int n = 0; unsigned int j = 0; unsigned int i = 0; while (*s) { ++j; n ^= 271 * *s;//^按位异或运算 ++s; } i = n ^ (j * 271); return i % size;}/* the following function(s) were adapted from * usr/src/lib/libc/db/hash_func.c, 4.4 BSD lite *//* Hash function from Chris Torek. */unsigned inthash4(const void *data, unsigned int size){ const char *key = static_cast<const char *>(data); size_t loop; unsigned int h; size_t len;#define HASH4a h = (h << 5) - h + *key++;#define HASH4b h = (h << 5) + h + *key++;#define HASH4 HASH4b h = 0; len = strlen(key); loop = len >> 3; switch (len & (8 - 1)) { case 0: break; case 7: HASH4; /* FALLTHROUGH */ case 6: HASH4; /* FALLTHROUGH */ case 5: HASH4; /* FALLTHROUGH */ case 4: HASH4; /* FALLTHROUGH */ case 3: HASH4; /* FALLTHROUGH */ case 2: HASH4; /* FALLTHROUGH */ case 1: HASH4; } while (loop) { --loop; HASH4; HASH4; HASH4; HASH4; HASH4; HASH4; HASH4; HASH4; } return h % size;}/** * hash_create - creates a new hash table, uses the cmp_func * to compare keys. Returns the identification for the hash table; * otherwise returns a negative number on error. * 创建hash表,返回hash_table的对象 */hash_table *hash_create(HASHCMP * cmp_func, int hash_sz, HASHHASH * hash_func){ hash_table *hid = (hash_table *)xcalloc(1, sizeof(hash_table)); if (!hash_sz) hid->size = (unsigned int) DEFAULT_HASH_SIZE;//EFAULT_HASH_SIZE 7951 else hid->size = (unsigned int) hash_sz; /* allocate and null the buckets */ hid->buckets = (hash_link **)xcalloc(hid->size, sizeof(hash_link *)); hid->cmp = cmp_func; hid->hash = hash_func; hid->next = NULL; hid->current_slot = 0; return hid;}/** * hash_join - joins a hash_link under its key lnk->key * into the hash table 'hid'. * * It does not copy any data into the hash table, only links pointers. *将hash节点链接到hash表中对应的桶节点。 */voidhash_join(hash_table * hid, hash_link * lnk){ int i; i = hid->hash(lnk->key, hid->size); lnk->next = hid->buckets[i]; hid->buckets[i] = lnk; ++hid->count;}/** * hash_lookup - locates the item under the key 'k' in the hash table * 'hid'. Returns a pointer to the hash bucket on success; otherwise * returns NULL. *在hash_table中定位哈希值为k的hash_link节点 */hash_link *hash_lookup(hash_table * hid, const void *k){ int b; PROF_start(hash_lookup); assert(k != NULL); b = hid->hash(k, hid->size); for (hash_link *walker = hid->buckets[b]; walker != NULL; walker = walker->next) { if ((hid->cmp) (k, walker->key) == 0) { PROF_stop(hash_lookup); return (walker); } assert(walker != walker->next); } PROF_stop(hash_lookup); return NULL;}//指向下一个桶static voidhash_next_bucket(hash_table * hid){ while (hid->next == NULL && ++hid->current_slot < hid->size) hid->next = hid->buckets[hid->current_slot];}/** * hash_first - initializes the hash table for the hash_next() * function. *使hid->current_slot指向第一个桶,hid->next指向下一个桶 */voidhash_first(hash_table * hid){ assert(NULL == hid->next); hid->current_slot = 0; hid->next = hid->buckets[hid->current_slot]; if (NULL == hid->next)//如果hash_table还没有使用 hash_next_bucket(hid);}/** * hash_next - returns the next item in the hash table 'hid'. * Otherwise, returns NULL on error or end of list. * * MUST call hash_first() before hash_next(). *获取下一个hash_link节点 */hash_link *hash_next(hash_table * hid){ hash_link *p = hid->next; if (NULL == p) return NULL; hid->next = p->next; if (NULL == hid->next) hash_next_bucket(hid); return p;}/** * hash_last - resets hash traversal state to NULL * */voidhash_last(hash_table * hid){ assert(hid != NULL); hid->next = NULL; hid->current_slot = 0;}/** * hash_remove_link - deletes the given hash_link node from the * hash table 'hid'. Does not free the item, only removes it * from the list. * * An assertion is triggered if the hash_link is not found in the * list. *将hash_link为hl的节点从桶链表中移除 */voidhash_remove_link(hash_table * hid, hash_link * hl){ assert(hl != NULL); int i = hid->hash(hl->key, hid->size); for (hash_link **P = &hid->buckets[i]; *P; P = &(*P)->next) { if (*P != hl) continue; *P = hl->next; if (hid->next == hl) { hid->next = hl->next; if (NULL == hid->next) hash_next_bucket(hid); } --hid->count; return; } assert(0);}/** * hash_get_bucket - returns the head item of the bucket * in the hash table 'hid'. Otherwise, returns NULL on error. *获取hid->buckets[bucket] */hash_link *hash_get_bucket(hash_table * hid, unsigned int bucket){ if (bucket >= hid->size) return NULL; return (hid->buckets[bucket]);}//将所有hash_link节点集中到一起,集中释放存储空间voidhashFreeItems(hash_table * hid, HASHFREE * free_func){ hash_link *l; int i = 0; hash_link **list = (hash_link **)xcalloc(hid->count, sizeof(hash_link *)); hash_first(hid); while ((l = hash_next(hid)) && i < hid->count) { *(list + i) = l; ++i; } for (int j = 0; j < i; ++j) free_func(*(list + j)); xfree(list);}//释放hash_table空间voidhashFreeMemory(hash_table * hid){ if (hid == NULL) return; if (hid->buckets) xfree(hid->buckets); xfree(hid);}static int hash_primes[] = { 103, 229, 467, 977, 1979, 4019, 6037, 7951, 12149, 16231, 33493, 65357};inthashPrime(int n){ int I = sizeof(hash_primes) / sizeof(int); int best_prime = hash_primes[0]; double min = fabs(log((double) n) - log((double) hash_primes[0])); double d; for (int i = 0; i < I; ++i) { d = fabs(log((double) n) - log((double) hash_primes[i])); if (d > min) continue; min = d; best_prime = hash_primes[i]; } return best_prime;}/** * return the key of a hash_link as a const string *获取hl的哈希值 */const char *hashKeyStr(hash_link * hl){ return (const char *) hl->key;}#if USE_HASH_DRIVER/** * hash-driver - Run with a big file as stdin to insert each line into the * hash table, then prints the whole hash table, then deletes a random item, * and prints the table again... */intmain(void){ hash_table *hid; LOCAL_ARRAY(char, buf, BUFSIZ); LOCAL_ARRAY(char, todelete, BUFSIZ); hash_link *walker = NULL; todelete[0] = '\0'; printf("init\n"); printf("creating hash table\n"); if ((hid = hash_create((HASHCMP *) strcmp, 229, hash4)) < 0) { printf("hash_create error.\n"); exit(1); } printf("done creating hash table: %d\n", hid); while (fgets(buf, BUFSIZ, stdin)) { buf[strlen(buf) - 1] = '\0'; printf("Inserting '%s' for item %p to hash table: %d\n", buf, buf, hid); hash_insert(hid, xstrdup(buf), (void *) 0x12345678); if (random() % 17 == 0) strcpy(todelete, buf); } printf("walking hash table...\n"); for (int i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) { printf("item %5d: key: '%s' item: %p\n", i++, walker->key, walker->item); } printf("done walking hash table...\n"); if (todelete[0]) { printf("deleting %s from %d\n", todelete, hid); if (hash_delete(hid, todelete)) printf("hash_delete error\n"); } printf("walking hash table...\n"); for (int i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) { printf("item %5d: key: '%s' item: %p\n", i++, walker->key, walker->item); } printf("done walking hash table...\n"); printf("driver finished.\n"); exit(0);}#endif
下面具体分析:
hash表整体结构:
1、hash_create
/** * hash_create - creates a new hash table, uses the cmp_func * to compare keys. Returns the identification for the hash table; * otherwise returns a negative number on error. * 创建hash表,返回hash_table的对象 */hash_table *hash_create(HASHCMP * cmp_func, int hash_sz, HASHHASH * hash_func){ hash_table *hid = (hash_table *)xcalloc(1, sizeof(hash_table)); if (!hash_sz) hid->size = (unsigned int) DEFAULT_HASH_SIZE;//EFAULT_HASH_SIZE 7951 else hid->size = (unsigned int) hash_sz; /* allocate and null the buckets */ hid->buckets = (hash_link **)xcalloc(hid->size, sizeof(hash_link *)); hid->cmp = cmp_func; hid->hash = hash_func; hid->next = NULL; hid->current_slot = 0; return hid;}创建hash表。需要三个参数:cmp_func、hash_sz、hash_func,其中hash_sz用来表示创建的hash表的桶链表的大小,如果为0,则使用默认的大小DEFAULT_HASH_SIZE.
桶链表储存的数据类型为:hash_link * ,即它只存储hash_link节点的地址。初始化后的桶链表没有存储任何地址,全部为0。
current_slot = 0,当前指向的桶为buckets[0];指向下一个桶的指针hid->next为NULL,表示hash表还没有被使用。
2、hash_join
/** * hash_join - joins a hash_link under its key lnk->key * into the hash table 'hid'. * * It does not copy any data into the hash table, only links pointers. *将hash节点链接到hash表中对应的桶节点。 */voidhash_join(hash_table * hid, hash_link * lnk){ int i; i = hid->hash(lnk->key, hid->size); lnk->next = hid->buckets[i]; hid->buckets[i] = lnk; ++hid->count;}首先利用函数hash找到节点link应该插入到的桶号i,将link的next指针指向桶号i存储的链表的首节点,再将link节点的地址储存到桶号i内,link节点成为桶号i储存的链表的首节点。
3、hash_lookup
/** * hash_lookup - locates the item under the key 'k' in the hash table * 'hid'. Returns a pointer to the hash bucket on success; otherwise * returns NULL. *在hash_table中定位哈希值为k的hash_link节点 */hash_link *hash_lookup(hash_table * hid, const void *k){ int b; PROF_start(hash_lookup); assert(k != NULL); b = hid->hash(k, hid->size); for (hash_link *walker = hid->buckets[b]; walker != NULL; walker = walker->next) { if ((hid->cmp) (k, walker->key) == 0) { PROF_stop(hash_lookup); return (walker); } assert(walker != walker->next); } PROF_stop(hash_lookup); return NULL;}首先根据哈希值k找到对应的桶链表节点b,walker指向b所在链表的首节点。
4、hash_remove_link
/** * hash_remove_link - deletes the given hash_link node from the * hash table 'hid'. Does not free the item, only removes it * from the list. * * An assertion is triggered if the hash_link is not found in the * list. *将hash_link为hl的节点从桶链表中移除 */voidhash_remove_link(hash_table * hid, hash_link * hl){ assert(hl != NULL); int i = hid->hash(hl->key, hid->size); for (hash_link **P = &hid->buckets[i]; *P; P = &(*P)->next) { if (*P != hl) continue; *P = hl->next; if (hid->next == hl) { hid->next = hl->next; if (NULL == hid->next) hash_next_bucket(hid); } --hid->count; return; } assert(0);}移除分两种情况:
1、hl为首节点,将hid->next = hl->next
2、hl为中间节点,*p = hl->next,p指向hl的下一个节点
5、hashFreeItems
//将所有hash_link节点集中到一起,集中释放存储空间voidhashFreeItems(hash_table * hid, HASHFREE * free_func){ hash_link *l; int i = 0; hash_link **list = (hash_link **)xcalloc(hid->count, sizeof(hash_link *)); hash_first(hid); while ((l = hash_next(hid)) && i < hid->count) { *(list + i) = l; ++i; } for (int j = 0; j < i; ++j) free_func(*(list + j)); xfree(list);}根据hid->count大小分配存储空间来存储hash_link节点的地址。调用hash_first将hid->current_slot指向第一个桶,同时将hid->next指向第二个桶。调用hash_next取得hash链表中的每一个hash_link节点,并将地址赋值到list中,最后对list中的hash_link地址统一销毁。
本文为Eliot原创,转载请注明出处:http://blog.csdn.net/xyw_blog/article/details/9791221
- Squid--hash代码分析
- Squid 日志分析代码
- Squid mainInitialize()函数分析
- squid配置选项分析
- Squid服务日志分析
- sarg分析squid日志
- Hash代码
- Hash分析
- Hash分析
- VC 2010 非标准STL容器hash map代码分析
- Supersonic关于一个Hash Join的测试代码分析
- squid命中率分析参数注释
- Squid Coss文件系统技术分析
- 利用webalizer分析squid日志
- 利用webalizer分析squid日志
- squid源码分析1 ----EventLoop
- 利用 nginx url hash 提高squid服务器命中率
- 【代码克隆检测】基于K-gram hash 分析特征提取技术(代码篇)
- 从网络得到图片数据保存到手机中,
- Oracle 12c 新特性之 Multitenant Architecture (五)
- C++ STL 容器自定义内存分配器
- 算法导论 Exercises 22.5
- Can not find the tag library descriptor for "http://java.sun.com/jsp/jstl/core"
- Squid--hash代码分析
- 各种智能指针的介绍
- The Elements of Programming Style (编程风格的元素)读书总结
- 无法启动Eclipe,因找不到javaw.exe
- 实用技巧(1):eclipse配色插件
- 设计模式C++学习笔记之一(Strategy策略模式)
- 一篇帖子
- FTP命令大全
- java设计模式笔记之简单工厂