redis源代码学习-哈希表的实现

来源：互联网发布：做广告的软件编辑：程序博客网时间：2024/06/03 22:48

在redis中有专门的文件定义自己的数据结构，这篇我学习的时其中的哈希列表的实现，包括insert/delete/find/replace/get-random-element等操作。dict结构的主要目的是解决Redis中的数据查找问题。它利用哈希函数确定key值的位置，并且拥有两张数据表，在数据容量不够时自动实现扩容，并对当前哈希表中的数据rehash，但这个rehash操作不是一次完成的，而是分散到不同的操作中逐步完成，避免一次操作花费太多时间，加快操作的响应速度。

1.基本的结构体

哈希列表的基本节点是dictEntry，它是一个key-value的结构体，由于value类型是不确定的，因而采用union结构体定义，它还包含指向下一个节点的指针。

typedef struct dictEntry {    void *key;    union {        void *val;        uint64_t u64;        int64_t s64;        double d;    } v;    struct dictEntry *next;} dictEntry;

哈希列表定义了一个类型包含一些字典集合操作，采用函数指针的方法，它主要定义了针对key和value值得各种操作。

typedef struct dictType {

//哈希计算方法，返回整形变量     uint64_t (*hashFunction)(const void *key);

 //复制key函数     void *(*keyDup)(void *privdata, const void *key);

    void *(*valDup)(void *privdata, const void *obj);

//key值比较方法    int (*keyCompare)(void *privdata, const void *key1, const void *key2);

//key的析构函数      void (*keyDestructor)(void *privdata, void *key);    void (*valDestructor)(void *privdata, void *obj);} dictType;

哈希表的结构体如下，通常一个字典中有两个表，用于实现rehash(即原有哈希表容量不够需要扩容重新对每个元素分配位置)。dict是哈希字典，包含两个哈希表和字典类型以及私有数据指针。私有数据是执行某些操作时返回调用者。dictIterator是字典迭代器，包含一个字典，是否安全，节点，下一个节点。

typedef struct dictht {    dictEntry **table;    unsigned long size;//哈希表的大小    unsigned long sizemask;    unsigned long used;//使用节点数} dictht;

typedef struct dict {    dictType *type;    void *privdata;    dictht ht[2];    long rehashidx; /* rehashing not in progress if rehashidx == -1 */    unsigned long iterators; /* number of iterators currently running */} dict;

typedef struct dictIterator {    dict *d;    long index;    int table, safe;    dictEntry *entry, *nextEntry;    /* unsafe iterator fingerprint for misuse detection. */    long long fingerprint;} dictIterator;

接着以宏的方式定义了字典中的一些基本操作，如复制值，析构值，设置等操作，关于这些宏定义函数的方法值得学习。

#define dictFreeVal(d, entry) \    if ((d)->type->valDestructor) \        (d)->type->valDestructor((d)->privdata, (entry)->v.val)//设置void*数据的方法，若有自定义dup方法，则调用，否则指针赋值#define dictSetVal(d, entry, _val_) do { \    if ((d)->type->valDup) \        (entry)->v.val = (d)->type->valDup((d)->privdata, _val_); \    else \        (entry)->v.val = (_val_); \} while(0)#define dictSetSignedIntegerVal(entry, _val_) \    do { (entry)->v.s64 = _val_; } while(0)#define dictSetUnsignedIntegerVal(entry, _val_) \    do { (entry)->v.u64 = _val_; } while(0)#define dictSetDoubleVal(entry, _val_) \    do { (entry)->v.d = _val_; } while(0)

接着定义了一些基本的插入、删除、查找操作，可以看到在每个操作中都会先判断是否处于rehash过程中，若是，则将rehash向前推进，通过这样分散rehash操作。

/* Add an element to the target hash table */int dictAdd(dict *d, void *key, void *val){    dictEntry *entry = dictAddRaw(d,key,NULL);    if (!entry) return DICT_ERR;    dictSetVal(d, entry, val);    return DICT_OK;}/* Low level add or find: * This function adds the entry but instead of setting a value returns the * dictEntry structure to the user, that will make sure to fill the value * field as he wishes. * * This function is also directly exposed to the user API to be called * mainly in order to store non-pointers inside the hash value, example: * * entry = dictAddRaw(dict,mykey,NULL); * if (entry != NULL) dictSetSignedIntegerVal(entry,1000); * * Return values: * * If key already exists NULL is returned, and "*existing" is populated * with the existing entry if existing is not NULL. * * If key was added, the hash entry is returned to be manipulated by the caller. */dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing){    int index;    dictEntry *entry;    dictht *ht;    if (dictIsRehashing(d)) _dictRehashStep(d);//每次添加数据时，若是需要rehash，则执行一步rehash操作    /* Get the index of the new element, or -1 if     * the element already exists. */    if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)        return NULL;    /* Allocate the memory and store the new entry.     * Insert the element in top, with the assumption that in a database     * system it is more likely that recently added entries are accessed     * more frequently. */    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];    entry = zmalloc(sizeof(*entry));    entry->next = ht->table[index];    ht->table[index] = entry;//将新添加的值加入链表头，假设新加入的数据会被频繁访问    ht->used++;    /* Set the hash entry fields. */    dictSetKey(d, entry, key);    return entry;}/* Add or Overwrite: * Add an element, discarding the old value if the key already exists. * Return 1 if the key was added from scratch, 0 if there was already an * element with such key and dictReplace() just performed a value update * operation. */int dictReplace(dict *d, void *key, void *val){    dictEntry *entry, *existing, auxentry;    /* Try to add the element. If the key     * does not exists dictAdd will suceed. */    entry = dictAddRaw(d,key,&existing);    if (entry) {        dictSetVal(d, entry, val);        return 1;    }    /* Set the new value and free the old one. Note that it is important     * to do that in this order, as the value may just be exactly the same     * as the previous one. In this context, think to reference counting,     * you want to increment (set), and then decrement (free), and not the     * reverse. */    auxentry = *existing;    dictSetVal(d, existing, val);    dictFreeVal(d, &auxentry);    return 0;}/* Add or Find: * dictAddOrFind() is simply a version of dictAddRaw() that always * returns the hash entry of the specified key, even if the key already * exists and can't be added (in that case the entry of the already * existing key is returned.) * * See dictAddRaw() for more information. */dictEntry *dictAddOrFind(dict *d, void *key) {    dictEntry *entry, *existing;    entry = dictAddRaw(d,key,&existing);    return entry ? entry : existing;}/* Search and remove an element. This is an helper function for * dictDelete() and dictUnlink(), please check the top comment * of those functions. */static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {    unsigned int h, idx;    dictEntry *he, *prevHe;    int table;    if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL;    if (dictIsRehashing(d)) _dictRehashStep(d);//TODO:有什么作用澹?    h = dictHashKey(d, key);    for (table = 0; table <= 1; table++) {        idx = h & d->ht[table].sizemask;        he = d->ht[table].table[idx];        prevHe = NULL;        while(he) {            if (key==he->key || dictCompareKeys(d, key, he->key)) {                /* Unlink the element from the list */                if (prevHe)                    prevHe->next = he->next;                else                    d->ht[table].table[idx] = he->next;                if (!nofree) {                    dictFreeKey(d, he);                    dictFreeVal(d, he);                    zfree(he);                }                d->ht[table].used--;                return he;            }            prevHe = he;            he = he->next;        }        if (!dictIsRehashing(d)) break;    }    return NULL; /* not found */}/* Remove an element, returning DICT_OK on success or DICT_ERR if the * element was not found. */int dictDelete(dict *ht, const void *key) {    return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR;}

dictEntry *dictFind(dict *d, const void *key){    dictEntry *he;    unsigned int h, idx, table;    if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty */    if (dictIsRehashing(d)) _dictRehashStep(d);    h = dictHashKey(d, key);    for (table = 0; table <= 1; table++) {        idx = h & d->ht[table].sizemask;        he = d->ht[table].table[idx];        while(he) {            if (key==he->key || dictCompareKeys(d, key, he->key))                return he;            he = he->next;        }        if (!dictIsRehashing(d)) return NULL;    }    return NULL;}//找到key值对应的val值指针，若找不到，返回NULLvoid *dictFetchValue(dict *d, const void *key) {    dictEntry *he;    he = dictFind(d,key);    return he ? dictGetVal(he) : NULL;}

0 0