dict

来源：互联网发布：hadoop java获取文件编辑：程序博客网时间：2024/06/06 01:18

dict.h dict.c

dict是redis中定义的一个hashtable。
它的数据结构是这样的

typedef struct dictEntry {    void *key;    union {        void *val;        uint64_t u64;        int64_t s64;        double d;    } v;    struct dictEntry *next;} dictEntry;typedef struct dictType {    uint64_t (*hashFunction)(const void *key);    void *(*keyDup)(void *privdata, const void *key);    void *(*valDup)(void *privdata, const void *obj);    int (*keyCompare)(void *privdata, const void *key1, const void *key2);    void (*keyDestructor)(void *privdata, void *key);    void (*valDestructor)(void *privdata, void *obj);} dictType;typedef struct dictht {    dictEntry **table;    unsigned long size;    unsigned long sizemask;    unsigned long used;} dictht;typedef struct dict {    dictType *type;    void *privdata;    dictht ht[2];    long rehashidx;     unsigned long iterators; } dict;

dictEntry是hashtable中的节点，它保存了value和key，还有一个指向另一个dictEntry的指针，指向另一个和他被分到同一个数组节点的dictEntry。
dictType则保存了一些hashtable需要用到的函数的指针。
dictht则是整个hashtable中实际保存数据的表。这样的表在同一个hashtable中有两个。多出来的一个他们会在扩容的时候被用到，我们在后面会介绍。table字段则是一个指向dictEntry数组的指针，size是数组的大小，sizemask是给dictEntry安排位置的掩码，用来替代求余运算，used是当前装载的dictEntry数量。
正主dict包含了一个dictType，reshashidx表示扩容的进度，iterators表示dict的迭代器。

dict的初始化

static void _dictReset(dictht *ht){    ht->table = NULL;    ht->size = 0;    ht->sizemask = 0;    ht->used = 0;}dict *dictCreate(dictType *type,        void *privDataPtr){    dict *d = zmalloc(sizeof(*d));    _dictInit(d,type,privDataPtr);    return d;}/* Initialize the hash table */int _dictInit(dict *d, dictType *type,        void *privDataPtr){    _dictReset(&d->ht[0]);    _dictReset(&d->ht[1]);    d->type = type;    d->privdata = privDataPtr;    d->rehashidx = -1;    d->iterators = 0;    return DICT_OK;}

没什么特别的地方，初始化各字段。

int dictResize(dict *d){    int minimal;    //检查是否可以扩容    if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;    //检查是否是dictht的初始化，如果是则按照dictht的最小空间扩容。    minimal = d->ht[0].used;    if (minimal < DICT_HT_INITIAL_SIZE)        minimal = DICT_HT_INITIAL_SIZE;    return dictExpand(d, minimal);}int dictExpand(dict *d, unsigned long size){    dictht n;     //获取扩容到目标，每次扩容到原来的2倍，_dictNextPower就是下一个2次幂    unsigned long realsize = _dictNextPower(size);    if (dictIsRehashing(d) || d->ht[0].used > size)        return DICT_ERR;    if (realsize == d->ht[0].size) return DICT_ERR;    n.size = realsize;    //reasize是2次幂，股realsize-1用二进制表示则每一位上都是1。    n.sizemask = realsize-1;    //给dictht中的dictEntry数组申请空间    n.table = zcalloc(realsize*sizeof(dictEntry*));    n.used = 0;    //如果是dictht的初始化，新生成的dictht则交给ht[0]指针    if (d->ht[0].table == NULL) {        d->ht[0] = n;        return DICT_OK;    }    //如果不是，则把新生成的dictht交给ht[1]指针    d->ht[1] = n;    d->rehashidx = 0;    return DICT_OK;}

dictResize是个很有意思的过程。它根据新的大小生成新的dictht然后直接把新的dict交给ht[1]指针，同时维护了两个表。然后通过dictRehash迁移数据

int dictRehash(dict *d, int n) {    //设置迁移数量    int empty_visits = n*10;    if (!dictIsRehashing(d)) return 0;    //迁移循环    while(n-- && d->ht[0].used != 0) {        dictEntry *de, *nextde;        assert(d->ht[0].size > (unsigned long)d->rehashidx);        while(d->ht[0].table[d->rehashidx] == NULL) {            d->rehashidx++;            if (--empty_visits == 0) return 1;        }        //找到一个非空的table节点        de = d->ht[0].table[d->rehashidx];        //把链表上的节点逐个迁移        while(de) {            unsigned int h;            nextde = de->next;            h = dictHashKey(d, de->key) & d->ht[1].sizemask;            de->next = d->ht[1].table[h];            d->ht[1].table[h] = de;            d->ht[0].used--;            d->ht[1].used++;            de = nextde;        }        //清空原table节点        d->ht[0].table[d->rehashidx] = NULL;        //迁移进度更新。        d->rehashidx++;    }    //检查是否迁移完成    if (d->ht[0].used == 0) {        zfree(d->ht[0].table);        d->ht[0] = d->ht[1];        _dictReset(&d->ht[1]);        d->rehashidx = -1;        return 0;    }    return 1;}

可以看到，这个迁移数据是有数量限制的，只迁移一定数量的数据，如果没迁完就先放着。
同时再有这两个函数

//获取当前时间long long timeInMilliseconds(void) {    struct timeval tv;    gettimeofday(&tv,NULL);    return (((long long)tv.tv_sec)*1000)+(tv.tv_usec/1000);}int dictRehashMilliseconds(dict *d, int ms) {    long long start = timeInMilliseconds();    int rehashes = 0;    while(dictRehash(d,100)) {        rehashes += 100;        if (timeInMilliseconds()-start > ms) break;    }    return rehashes;}

可以看出来这两个函数是用来控制迁移的时间的。如果hashtable过大，则一次性迁移完需要的时间实在太多，长时间处理会影响整个系统的性能。
redis的解决方案是蚂蚁搬家式的。只在dictRehash中大规模的迁移一次，然后在之后的每次增删查改中都摊派一次迁移工作，直到整个dictht迁移完成。
可以看到在dictAdd， dictDelete， dictFind， dictGetEandomKey等函数中都包含了

if (dictIsRehashing(d)) _dictRehashStep(d);

他们都会在执行过程中完成一组迁移。
那么，如何保证在迁移完成前的正确性呢？
正确性就是靠dict中的rehashidx字段来保证的，它记录了迁移的进度。
如果在增删查改中，发现涉及的key的hash指向table中的第i项，那么就会先拿i和rehashidx比较，看看table中第i项的链表已经是否被迁移了，如果被迁移了，则到ht[1]中寻找，如果没有迁移，则到ht[0]中寻找。

其他的增删查改没什么特别之处。
整个hashtable的实现似乎和JDK的非常相似。唯一的区别是这里的hastable不会在链表过长的时候自动转换为红黑树。

阅读全文

0 0