【Redis源码剖析】

来源:互联网 发布:win7仿mac dock栏 编辑:程序博客网 时间:2024/06/05 14:40

今天我们来介绍Redis中一个比较简单的内置数据结构 – 整数集合intset。


1、intset存储结构

整数集合,顾名思义,就是存储一系列整数的集合。在redis中,intset结构体的定义如下:

/* 整数集合结构体 */typedef struct intset {    // 编码方式    uint32_t encoding;    // 保存的元素数量    uint32_t length;    // 真正存放数据的地方    int8_t contents[];} intset;

其中:encoding表示intset存储的整型类型,共有3中不同编码:INTSET_ENC_INT16、INTSET_ENC_INT32、INTSET_ENC_INT64分别表示int16_t、int32_t、int64_t三种不同类型的整数。一个intset中存储的整数都是相同类型的。length指明了intset中存放的元素个数。contents数组才是真正存放整数的地方,每个整数占用的字节数由encoding指出,可能是2字节、4字节、8字节。

intset的存储结构如下图:

这里写图片描述

2、元素存储特点

作为一个集合,判断集合中是否存在某个元素是一个常用的操作。在intset中如果通过顺序查找contents数组来实现必定带来低效率。为此,intset将其保存的元素按序(递增顺序)排列,通过二分法加快查找过程。

我们可以看看search方法的实现:

/* 查找函数:在整数集合intset中查找给定的整数,如果查找成功,则将其索引位置赋值给pos并返回1,    否则返回0,此时pos指向一个可插入新节点的位置 */static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) {    int min = 0, max = intrev32ifbe(is->length)-1, mid = -1;    int64_t cur = -1;    /* The value can never be found when the set is empty */    // 如果intset为空,返回0    if (intrev32ifbe(is->length) == 0) {        if (pos) *pos = 0;        return 0;    } else {        /* Check for the case where we know we cannot find the value,         * but do know the insert position. */        // intset中的元素是排好序的,判断第一个和最后一个元素的数值就可以确定给定数value有无可能        // 落在intset包含的整数区间中,如果给定值不在该区间则无需继续查找        if (value > _intsetGet(is,intrev32ifbe(is->length)-1)) {            if (pos) *pos = intrev32ifbe(is->length);            return 0;        } else if (value < _intsetGet(is,0)) {            if (pos) *pos = 0;            return 0;        }    }    // 利用二分法查找一个插入位置    while(max >= min) {        mid = ((unsigned int)min + (unsigned int)max) >> 1;        cur = _intsetGet(is,mid);        if (value > cur) {            min = mid+1;        } else if (value < cur) {            max = mid-1;        } else {            break;        }    }    if (value == cur) {        // 查找到指定值        if (pos) *pos = mid;        return 1;    } else {        // 未找到给定值,返回插入位置        if (pos) *pos = min;        return 0;    }}

3、intsetUpgradeAndAdd操作

既然intset中只能保存int16_t、int32_t、int64_t中一种类型的整数,那就可能存在这么一种情况:当前的intset存放int16_t的整型数,但想往其中插入一个int32_t的新值。此时需要对旧集合中的所有元素全部调整为int32_t编码,然后再插入新值。intsetUpgradeAndAdd函数就是用来处理这种情况的。

static intset *intsetUpgradeAndAdd(intset *is, int64_t value) {    // 旧的编码方式    uint8_t curenc = intrev32ifbe(is->encoding);    // 新的编码方式    uint8_t newenc = _intsetValueEncoding(value);    int length = intrev32ifbe(is->length);    // 插入的新值放在intset->contents数组的前面或后面,prepend用来保证留出一个空间来存放新值    int prepend = value < 0 ? 1 : 0;    /* First set new encoding and resize */    // 设置新的编码然后调整intset的容量大小    is->encoding = intrev32ifbe(newenc);    is = intsetResize(is,intrev32ifbe(is->length)+1);    /* Upgrade back-to-front so we don't overwrite values.     * Note that the "prepend" variable is used to make sure we have an empty     * space at either the beginning or the end of the intset. */    // 从后往前开始复制,这样就不会发生覆盖现象    while(length--)        _intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc));    /* Set the value at the beginning or the end. */    // 插入新值,如果新值大于0,则追加到contents数组后面,否则插入到contents数组前边    if (prepend)        _intsetSet(is,0,value);    else        _intsetSet(is,intrev32ifbe(is->length),value);    // 更新长度信息    is->length = intrev32ifbe(intrev32ifbe(is->length)+1);    return is;}
intset是一种比较简单的数据结构,下面是注释后的源码,供大家参考:

/* * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com> * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com> * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * *   * Redistributions of source code must retain the above copyright notice, *     this list of conditions and the following disclaimer. *   * Redistributions in binary form must reproduce the above copyright *     notice, this list of conditions and the following disclaimer in the *     documentation and/or other materials provided with the distribution. *   * Neither the name of Redis nor the names of its contributors may be used *     to endorse or promote products derived from this software without *     specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */#include <stdio.h>#include <stdlib.h>#include <string.h>#include "intset.h"#include "zmalloc.h"#include "endianconv.h"/* 整数集合intset中是以递增的形式存放整数的,可以加快查找速度 *//* Note that these encodings are ordered, so: * INTSET_ENC_INT16 < INTSET_ENC_INT32 < INTSET_ENC_INT64. *//* 三种不同整数类型编码 */#define INTSET_ENC_INT16 (sizeof(int16_t))  // int16_t整型#define INTSET_ENC_INT32 (sizeof(int32_t))  // int32_t整型#define INTSET_ENC_INT64 (sizeof(int64_t))  // int64_t整型/* Return the required encoding for the provided value. *//* 返回存储给定整数值的编码方式 */static uint8_t _intsetValueEncoding(int64_t v) {    if (v < INT32_MIN || v > INT32_MAX)        return INTSET_ENC_INT64;    else if (v < INT16_MIN || v > INT16_MAX)        return INTSET_ENC_INT32;    else        return INTSET_ENC_INT16;}/* Return the value at pos, given an encoding. *//* 将intset.contents数组指定位置的数据按指定的编码方式解析并返回 */static int64_t _intsetGetEncoded(intset *is, int pos, uint8_t enc) {    int64_t v64;    int32_t v32;    int16_t v16;    if (enc == INTSET_ENC_INT64) {        // 将pos位置开始的8个字节解析为int64_t整数        memcpy(&v64,((int64_t*)is->contents)+pos,sizeof(v64));        // 统一转换为小端模式        memrev64ifbe(&v64);        return v64;    } else if (enc == INTSET_ENC_INT32) {        // 将pos位置开始的4个字节解析为int32_t整数        memcpy(&v32,((int32_t*)is->contents)+pos,sizeof(v32));        // 统一转换为小端模式        memrev32ifbe(&v32);        return v32;    } else {        // 将pos位置开始的2个字节解析为int16_t整数        memcpy(&v16,((int16_t*)is->contents)+pos,sizeof(v16));        // 统一转换为小端模式        memrev16ifbe(&v16);        return v16;    }}/* Return the value at pos, using the configured encoding. *//* 获取intset.contents数组指定位置的数据,编码方式由intset.encoding指定。 */static int64_t _intsetGet(intset *is, int pos) {    return _intsetGetEncoded(is,pos,intrev32ifbe(is->encoding));}/* Set the value at pos, using the configured encoding. *//* 在intset.contents数组指定位置上设置数值,编码方式由intset.encoding指定。 */static void _intsetSet(intset *is, int pos, int64_t value) {    // 获取编码方式    uint32_t encoding = intrev32ifbe(is->encoding);    // 依据编码方式设置相应的值    if (encoding == INTSET_ENC_INT64) {        ((int64_t*)is->contents)[pos] = value;        memrev64ifbe(((int64_t*)is->contents)+pos);    } else if (encoding == INTSET_ENC_INT32) {        ((int32_t*)is->contents)[pos] = value;        memrev32ifbe(((int32_t*)is->contents)+pos);    } else {        ((int16_t*)is->contents)[pos] = value;        memrev16ifbe(((int16_t*)is->contents)+pos);    }}/* Create an empty intset. *//* 创建一个空的整数结合 */intset *intsetNew(void) {    intset *is = zmalloc(sizeof(intset));    // 默认存放int16_t整数    is->encoding = intrev32ifbe(INTSET_ENC_INT16);    is->length = 0;    return is;}/* Resize the intset *//* 调整intset的容量大小 */static intset *intsetResize(intset *is, uint32_t len) {    // 计算新增的字节数    uint32_t size = len*intrev32ifbe(is->encoding);    // 重新分配内存空间,realloc在原地分配    is = zrealloc(is,sizeof(intset)+size);    return is;}/* Search for the position of "value". Return 1 when the value was found and * sets "pos" to the position of the value within the intset. Return 0 when * the value is not present in the intset and sets "pos" to the position * where "value" can be inserted. *//* 查找函数:在整数集合intset中查找给定的整数,如果查找成功,则将其索引位置赋值给pos并返回1,    否则返回0,此时pos指向一个可插入新节点的位置 */static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) {    int min = 0, max = intrev32ifbe(is->length)-1, mid = -1;    int64_t cur = -1;    /* The value can never be found when the set is empty */    // 如果intset为空,返回0    if (intrev32ifbe(is->length) == 0) {        if (pos) *pos = 0;        return 0;    } else {        /* Check for the case where we know we cannot find the value,         * but do know the insert position. */        // intset中的元素是排好序的,判断第一个和最后一个元素的数值就可以确定给定数value有无可能        // 落在intset中,如果给定值不在该区间则无需继续查找        if (value > _intsetGet(is,intrev32ifbe(is->length)-1)) {            if (pos) *pos = intrev32ifbe(is->length);            return 0;        } else if (value < _intsetGet(is,0)) {            if (pos) *pos = 0;            return 0;        }    }    // 利用二分法查找一个插入位置    while(max >= min) {        mid = ((unsigned int)min + (unsigned int)max) >> 1;        cur = _intsetGet(is,mid);        if (value > cur) {            min = mid+1;        } else if (value < cur) {            max = mid-1;        } else {            break;        }    }    if (value == cur) {        // 查找到指定值        if (pos) *pos = mid;        return 1;    } else {        // 未找到给定值,返回插入位置        if (pos) *pos = min;        return 0;    }}/* Upgrades the intset to a larger encoding and inserts the given integer. *//* 使用更大的编码方式更新整个intset并插入一个数字。    如果当前的intset存放int16_t的整型数,如果要插入一个int32_t的新值,则需要对旧集合中的所有元素全部    调整为int32_t编码,然后再插入新值。这个函数就是用来处理这种情况的。*/static intset *intsetUpgradeAndAdd(intset *is, int64_t value) {    // 旧的编码方式    uint8_t curenc = intrev32ifbe(is->encoding);    // 新的编码方式    uint8_t newenc = _intsetValueEncoding(value);    int length = intrev32ifbe(is->length);    // 插入的新值放在intset->contents数组的前面或后面,prepend用来保证留出一个空间来存放新值    int prepend = value < 0 ? 1 : 0;    /* First set new encoding and resize */    // 设置新的编码然后调整intset的容量大小    is->encoding = intrev32ifbe(newenc);    is = intsetResize(is,intrev32ifbe(is->length)+1);    /* Upgrade back-to-front so we don't overwrite values.     * Note that the "prepend" variable is used to make sure we have an empty     * space at either the beginning or the end of the intset. */    // 从后往前开始复制,这样就不会发生覆盖现象    while(length--)        _intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc));    /* Set the value at the beginning or the end. */    // 插入新值,如果新值大于0,则追加到contents数组后面,否则插入到contents数组前边    if (prepend)        _intsetSet(is,0,value);    else        _intsetSet(is,intrev32ifbe(is->length),value);    // 更新长度信息    is->length = intrev32ifbe(intrev32ifbe(is->length)+1);    return is;}/* 将from位置开始的数据移动到to位置开始后的空间,用户删除操作。    该操作结束后,contents数组尾端可能存在无效数据。 */static void intsetMoveTail(intset *is, uint32_t from, uint32_t to) {    void *src, *dst;    // 计算from位置开始共有多少个元素    uint32_t bytes = intrev32ifbe(is->length)-from;    uint32_t encoding = intrev32ifbe(is->encoding);    // 根据编码方式计算需要移动的字节数、目标位置和移动开始位置    if (encoding == INTSET_ENC_INT64) {        src = (int64_t*)is->contents+from;        dst = (int64_t*)is->contents+to;        bytes *= sizeof(int64_t);    } else if (encoding == INTSET_ENC_INT32) {        src = (int32_t*)is->contents+from;        dst = (int32_t*)is->contents+to;        bytes *= sizeof(int32_t);    } else {        src = (int16_t*)is->contents+from;        dst = (int16_t*)is->contents+to;        bytes *= sizeof(int16_t);    }    // 内存移动    memmove(dst,src,bytes);}/* Insert an integer in the intset *//* 往intset中添加一个元素 */intset *intsetAdd(intset *is, int64_t value, uint8_t *success) {    // 获取存储待插入新值所需要的字节数,即编码方式    uint8_t valenc = _intsetValueEncoding(value);    uint32_t pos;    if (success) *success = 1;    /* Upgrade encoding if necessary. If we need to upgrade, we know that     * this value should be either appended (if > 0) or prepended (if < 0),     * because it lies outside the range of existing values. */    // 如果新值的编码方式大于当前intset的编码方式,需要扩展后再插入    if (valenc > intrev32ifbe(is->encoding)) {        /* This always succeeds, so we don't need to curry *success. */        return intsetUpgradeAndAdd(is,value);    } else {        /* Abort if the value is already present in the set.         * This call will populate "pos" with the right position to insert         * the value when it cannot be found. */        // 判断待插入值是不是已经存在        if (intsetSearch(is,value,&pos)) {            if (success) *success = 0;            return is;        }        // 经过intsetSearch操作后,pos指向了一个可插入位置        is = intsetResize(is,intrev32ifbe(is->length)+1);        // 移动元素以保住在pos位置留出足够空间用来容纳新值        if (pos < intrev32ifbe(is->length)) intsetMoveTail(is,pos,pos+1);    }    // 写入待插入整数值    _intsetSet(is,pos,value);    is->length = intrev32ifbe(intrev32ifbe(is->length)+1);    // 更新长度信息    return is;}/* Delete integer from intset *//* 从intset中删除一个给定的整数,参数success用来标识是否执行了删除操作 */intset *intsetRemove(intset *is, int64_t value, int *success) {    uint8_t valenc = _intsetValueEncoding(value);    uint32_t pos;    if (success) *success = 0;    // 先在intset中查找给定值value,只有该值存在才能删除    if (valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,&pos)) {        uint32_t len = intrev32ifbe(is->length);        /* We know we can delete */        if (success) *success = 1;        /* Overwrite value with tail and update length */        // 将待删除元素后面的元素移动到待删除元素当前的位置        if (pos < (len-1)) intsetMoveTail(is,pos+1,pos);        // 调整intset的内存空间大小        is = intsetResize(is,len-1);        // 更新长度信息        is->length = intrev32ifbe(len-1);    }    return is;}/* Determine whether a value belongs to this set *//* 判断intset中是否包含指定的值 */uint8_t intsetFind(intset *is, int64_t value) {    uint8_t valenc = _intsetValueEncoding(value);    return valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,NULL);}/* Return random member *//* 随机返回intset中的一个元素 */int64_t intsetRandom(intset *is) {    return _intsetGet(is,rand()%intrev32ifbe(is->length));}/* Sets the value to the value at the given position. When this position is * out of range the function returns 0, when in range it returns 1. *//* 返回intset中指定位置的元素,如果指定的位置超出contents的长度则返回0,否则返回1。 */uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {    // 判断pos是否合法    if (pos < intrev32ifbe(is->length)) {        // 调用_intsetGet函数返回相应的值        *value = _intsetGet(is,pos);        return 1;    }    return 0;}/* Return intset length *//* 返回intset中保存的元素个数 */uint32_t intsetLen(intset *is) {    return intrev32ifbe(is->length);}/* Return intset blob size in bytes. *//* 返回intset所占用的字节数 */size_t intsetBlobLen(intset *is) {    return sizeof(intset)+intrev32ifbe(is->length)*intrev32ifbe(is->encoding);}/* 下面是一些测试代码 */#ifdef INTSET_TEST_MAIN#include <sys/time.h>void intsetRepr(intset *is) {    int i;    for (i = 0; i < intrev32ifbe(is->length); i++) {        printf("%lld\n", (uint64_t)_intsetGet(is,i));    }    printf("\n");}void error(char *err) {    printf("%s\n", err);    exit(1);}void ok(void) {    printf("OK\n");}long long usec(void) {    struct timeval tv;    gettimeofday(&tv,NULL);    return (((long long)tv.tv_sec)*1000000)+tv.tv_usec;}#define assert(_e) ((_e)?(void)0:(_assert(#_e,__FILE__,__LINE__),exit(1)))void _assert(char *estr, char *file, int line) {    printf("\n\n=== ASSERTION FAILED ===\n");    printf("==> %s:%d '%s' is not true\n",file,line,estr);}intset *createSet(int bits, int size) {    uint64_t mask = (1<<bits)-1;    uint64_t i, value;    intset *is = intsetNew();    for (i = 0; i < size; i++) {        if (bits > 32) {            value = (rand()*rand()) & mask;        } else {            value = rand() & mask;        }        is = intsetAdd(is,value,NULL);    }    return is;}void checkConsistency(intset *is) {    int i;    for (i = 0; i < (intrev32ifbe(is->length)-1); i++) {        uint32_t encoding = intrev32ifbe(is->encoding);        if (encoding == INTSET_ENC_INT16) {            int16_t *i16 = (int16_t*)is->contents;            assert(i16[i] < i16[i+1]);        } else if (encoding == INTSET_ENC_INT32) {            int32_t *i32 = (int32_t*)is->contents;            assert(i32[i] < i32[i+1]);        } else {            int64_t *i64 = (int64_t*)is->contents;            assert(i64[i] < i64[i+1]);        }    }}int main(int argc, char **argv) {    uint8_t success;    int i;    intset *is;    sranddev();    printf("Value encodings: "); {        assert(_intsetValueEncoding(-32768) == INTSET_ENC_INT16);        assert(_intsetValueEncoding(+32767) == INTSET_ENC_INT16);        assert(_intsetValueEncoding(-32769) == INTSET_ENC_INT32);        assert(_intsetValueEncoding(+32768) == INTSET_ENC_INT32);        assert(_intsetValueEncoding(-2147483648) == INTSET_ENC_INT32);        assert(_intsetValueEncoding(+2147483647) == INTSET_ENC_INT32);        assert(_intsetValueEncoding(-2147483649) == INTSET_ENC_INT64);        assert(_intsetValueEncoding(+2147483648) == INTSET_ENC_INT64);        assert(_intsetValueEncoding(-9223372036854775808ull) == INTSET_ENC_INT64);        assert(_intsetValueEncoding(+9223372036854775807ull) == INTSET_ENC_INT64);        ok();    }    printf("Basic adding: "); {        is = intsetNew();        is = intsetAdd(is,5,&success); assert(success);        is = intsetAdd(is,6,&success); assert(success);        is = intsetAdd(is,4,&success); assert(success);        is = intsetAdd(is,4,&success); assert(!success);        ok();    }    printf("Large number of random adds: "); {        int inserts = 0;        is = intsetNew();        for (i = 0; i < 1024; i++) {            is = intsetAdd(is,rand()%0x800,&success);            if (success) inserts++;        }        assert(intrev32ifbe(is->length) == inserts);        checkConsistency(is);        ok();    }    printf("Upgrade from int16 to int32: "); {        is = intsetNew();        is = intsetAdd(is,32,NULL);        assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);        is = intsetAdd(is,65535,NULL);        assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);        assert(intsetFind(is,32));        assert(intsetFind(is,65535));        checkConsistency(is);        is = intsetNew();        is = intsetAdd(is,32,NULL);        assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);        is = intsetAdd(is,-65535,NULL);        assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);        assert(intsetFind(is,32));        assert(intsetFind(is,-65535));        checkConsistency(is);        ok();    }    printf("Upgrade from int16 to int64: "); {        is = intsetNew();        is = intsetAdd(is,32,NULL);        assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);        is = intsetAdd(is,4294967295,NULL);        assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);        assert(intsetFind(is,32));        assert(intsetFind(is,4294967295));        checkConsistency(is);        is = intsetNew();        is = intsetAdd(is,32,NULL);        assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);        is = intsetAdd(is,-4294967295,NULL);        assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);        assert(intsetFind(is,32));        assert(intsetFind(is,-4294967295));        checkConsistency(is);        ok();    }    printf("Upgrade from int32 to int64: "); {        is = intsetNew();        is = intsetAdd(is,65535,NULL);        assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);        is = intsetAdd(is,4294967295,NULL);        assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);        assert(intsetFind(is,65535));        assert(intsetFind(is,4294967295));        checkConsistency(is);        is = intsetNew();        is = intsetAdd(is,65535,NULL);        assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);        is = intsetAdd(is,-4294967295,NULL);        assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);        assert(intsetFind(is,65535));        assert(intsetFind(is,-4294967295));        checkConsistency(is);        ok();    }    printf("Stress lookups: "); {        long num = 100000, size = 10000;        int i, bits = 20;        long long start;        is = createSet(bits,size);        checkConsistency(is);        start = usec();        for (i = 0; i < num; i++) intsetSearch(is,rand() % ((1<<bits)-1),NULL);        printf("%ld lookups, %ld element set, %lldusec\n",num,size,usec()-start);    }    printf("Stress add+delete: "); {        int i, v1, v2;        is = intsetNew();        for (i = 0; i < 0xffff; i++) {            v1 = rand() % 0xfff;            is = intsetAdd(is,v1,NULL);            assert(intsetFind(is,v1));            v2 = rand() % 0xfff;            is = intsetRemove(is,v2,NULL);            assert(!intsetFind(is,v2));        }        checkConsistency(is);        ok();    }}#endif





原创粉丝点击