Redis源代码分析之二：散列表——Dict（上）

来源：互联网发布：6月淘客不计入淘宝权重编辑：程序博客网时间：2024/04/30 11:47

先介绍Redis散列表实现的几个重要数据结构：

字典项DictEntry：

typedef struct dictEntry {    void *key;    void *val;    struct dictEntry *next;} dictEntry;

字典类型DictType：

typedef struct dictType {    unsigned int (*hashFunction)(const void *key);    void *(*keyDup)(void *privdata, const void *key);    void *(*valDup)(void *privdata, const void *obj);    int (*keyCompare)(void *privdata, const void *key1, const void *key2);    void (*keyDestructor)(void *privdata, void *key);    void (*valDestructor)(void *privdata, void *obj);} dictType;

哈希表结构dictht：

/* This is our hash table structure. Every dictionary has two of this as we * implement incremental rehashing, for the old to the new table. */typedef struct dictht {    dictEntry **table;    unsigned long size;    unsigned long sizemask;    unsigned long used;} dictht;

字典结构dict：

typedef struct dict {    dictType *type;    void *privdata;    dictht ht[2];    int rehashidx; /* rehashing not in progress if rehashidx == -1 */    int iterators; /* number of iterators currently running */} dict;

字典迭代器dictIterator：

/* If safe is set to 1 this is a safe iteartor, that means, you can call * dictAdd, dictFind, and other functions against the dictionary even while * iterating. Otherwise it is a non safe iterator, and only dictNext() * should be called while iterating. */typedef struct dictIterator {    dict *d;    int table, index, safe;    dictEntry *entry, *nextEntry;} dictIterator;

然后介绍一下字典接口定义：

dict *dictCreate(dictType *type, void *privDataPtr);int dictExpand(dict *d, unsigned long size);int dictAdd(dict *d, void *key, void *val);int dictReplace(dict *d, void *key, void *val);int dictDelete(dict *d, const void *key);int dictDeleteNoFree(dict *d, const void *key);void dictRelease(dict *d);dictEntry * dictFind(dict *d, const void *key);void *dictFetchValue(dict *d, const void *key);int dictResize(dict *d);dictIterator *dictGetIterator(dict *d);dictIterator *dictGetSafeIterator(dict *d);dictEntry *dictNext(dictIterator *iter);void dictReleaseIterator(dictIterator *iter);dictEntry *dictGetRandomKey(dict *d);void dictPrintStats(dict *d);unsigned int dictGenHashFunction(const unsigned char *buf, int len);unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len);void dictEmpty(dict *d);void dictEnableResize(void);void dictDisableResize(void);int dictRehash(dict *d, int n);int dictRehashMilliseconds(dict *d, int ms);

下面分析扩张或者创建哈希表的重要函数dictExpand：

/* Expand or create the hashtable */int dictExpand(dict *d, unsigned long size){    dictht n; /* the new hashtable */    unsigned long realsize = _dictNextPower(size);    /* the size is invalid if it is smaller than the number of     * elements already inside the hashtable */    if (dictIsRehashing(d) || d->ht[0].used > size)        return DICT_ERR;    /* Allocate the new hashtable and initialize all pointers to NULL */    n.size = realsize;    n.sizemask = realsize-1;    n.table = zcalloc(realsize*sizeof(dictEntry*));    n.used = 0;    /* Is this the first initialization? If so it's not really a rehashing     * we just set the first hash table so that it can accept keys. */    if (d->ht[0].table == NULL) {        d->ht[0] = n;        return DICT_OK;    }    /* Prepare a second hash table for incremental rehashing */    d->ht[1] = n;    d->rehashidx = 0;    return DICT_OK;}

对于传入的参数：新哈希表的大小size，首先调用内部函数_dictNextPower(size)取得大于size的最小2次幂整数，作为哈希表大小。掩码sizemask为size二进制表示长度减一的全1表示。调用内存管理函数zcalloc分配新哈希表的内存。

接下来，函数判断这是否是哈希表的首次初始化，这通过判断字典的哈希表数组ht的首个元素的dictEntry是否为空实现，如果为空，说明是首次初始化，则将该哈希表的size设为n，直接返回DICT_OK；否则，说明这是一次rehash，那么函数将准备第二个哈希表d->ht[1]，并将d的rehashidx设为0，准备进行后续的增量哈希，然后返回DICT_OK。

下面分析再哈希的实现dictRehash函数：

/* Performs N steps of incremental rehashing. Returns 1 if there are still * keys to move from the old to the new hash table, otherwise 0 is returned. * Note that a rehashing step consists in moving a bucket (that may have more * thank one key as we use chaining) from the old to the new hash table. */int dictRehash(dict *d, int n) {    if (!dictIsRehashing(d)) return 0;    while(n--) {        dictEntry *de, *nextde;        /* Check if we already rehashed the whole table... */        if (d->ht[0].used == 0) {            zfree(d->ht[0].table);            d->ht[0] = d->ht[1];            _dictReset(&d->ht[1]);            d->rehashidx = -1;            return 0;        }        /* Note that rehashidx can't overflow as we are sure there are more         * elements because ht[0].used != 0 */        while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;        de = d->ht[0].table[d->rehashidx];        /* Move all the keys in this bucket from the old to the new hash HT */        while(de) {            unsigned int h;            nextde = de->next;            /* Get the index in the new hash table */            h = dictHashKey(d, de->key) & d->ht[1].sizemask;            de->next = d->ht[1].table[h];            d->ht[1].table[h] = de;            d->ht[0].used--;            d->ht[1].used++;            de = nextde;        }        d->ht[0].table[d->rehashidx] = NULL;        d->rehashidx++;    }    return 1;}

首先判断这是否是一次合法的rehash调用，通过判断(ht)->rehashidx!=-1实现。

然后，进行n步rehash。其中的每一步都重复如下步骤：

(1) 检查我们是否已经rehash了整个哈希表（此时d->ht[0].used为0），如果是，析构旧的哈希表，将d->rehashidx置为-1。

(2) 遍历哈希表d->ht[0]，直到找到非空的字典项de，然后此后通过de->next继续遍历。在此之前，通过位操作dictHashKey(d, de->key) & d->ht[1].sizemask获得在新哈希表d->ht[1]中的索引h，并将该字典项复制到新哈希表d->ht[1]，同时更新两个哈希表的d->ht[i].used计数。然后将最初de对应的rehashidx对应的字典项标记为NULL，将->rehashidx加1，然后重复(1)层的循环。