memcached源码分析之hashtable

来源：互联网发布：suse linux 编辑：程序博客网时间：2024/04/28 01:01

memcached中hashtable部分的源码主要分布在assoc.h/c、hash.h/c中，总得来说代码比较简单，这里就稍微介绍一下。

hashtable通常包括哈希函数和解决冲突的方法两个最主要的因素，memcached使用的哈希函数为Bob Jenkins在1996年发明的，定义位于hash.h中，实现在hash.c中，作者与2006年时提出另一个新的hash算法，其具有更快的速度（近2倍）和更大的吞吐量，详情请参照：http://burtleburtle.net/bob/hash/doobs.html，这里不进行介绍了。

解决冲突的方法，memcached中采用了链地址法（或拉链法），从数据结构书中截取的一个采用链地址法(哈希函数为：key MOD 13)解决冲突的示意图：

所不同的是，memcached中定义了primary_hashtable和old_hashtable，当hashtable的填装因子（memcached中硬编码为 3/2，无法确定如何定值的），assoc_maintenance_thread线程会将old_hashtable中的items以hash_bulk_move个buckets为单位，逐步移到primary_hashtable中。

对外接口：

// 完成primary_hashtable的初始化

void assoc_init(void);

// 根据key和nkey查找对应的item

item *assoc_find(const char *key, const size_t nkey);

// 将item插入hashtable中

int assoc_insert(item *item);

// 从hashtable中删除key对应的item

void assoc_delete(const char *key, const size_t nkey);

// 下面两个函数分别为启动和结束hashtable维护的线程，如果不需要这个功能，可以不调用，就是会浪费primary_hashtable指针数组占用的内存资源

int start_assoc_maintenance_thread(void);
void stop_assoc_maintenance_thread(void);

void do_assoc_move_next_bucket(void); // 函数没有实现

实现部分：

首先介绍几个用到的变量：

static pthread_cond_t maintenance_cond; // 同步insert和hashtable维护线程的条件变量

           static unsigned int hashpower = 16;
           #define hashsize(n) ((ub4)1<<(n))              //hashtable初始化大小设置为2^16
           #define hashmask(n) (hashsize(n)-1)         // 初始hashmask=0x1111 1111 1111 1111，用来将哈希函数计算的结果映射到hashsize域内，hashmask二进制值永远是hashpower位1的串

static unsigned int hash_items = 0; // hashtable中存在的item数量

static bool expanding = false; // 是否正在扩展的flag

static unsigned int expand_bucket = 0; // 当前扩展的位置（old_hashtable中的索引）

[cpp] view plaincopy
// hashtable初始化
void assoc_init(void) {
    // 分配hashsize个buckets，每个bucket就是一个单向链表或null
    primary_hashtable = calloc(hashsize(hashpower), sizeof(void *));
    if (! primary_hashtable) {
        fprintf(stderr, "Failed to init hashtable.\n");
        exit(EXIT_FAILURE);
    }
}

[cpp] view plaincopy
根据key和nkey查找对应的item，不存在返回NULL
item *assoc_find(const char *key, const size_t nkey) {
    uint32_t hv = hash(key, nkey, 0);
    item *it;
    unsigned int oldbucket;

    // 如果正在扩展中，且hash值映射到尚未移入新primary_hashtable中的item，在old_hashtable查找，
    // 当前即将扩展索引expand_bucket位置的bucket，大于等于expand_bucket表示还在old_hashtable中
    if (expanding &&
        (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
    {
        it = old_hashtable[oldbucket];
    } else {
        it = primary_hashtable[hv & hashmask(hashpower)];
    }

    item *ret = NULL;
    int depth = 0;
    while (it) {
        // 遍历单向链表，查找相应的item
        if ((nkey == it->nkey) && (memcmp(key, ITEM_key(it), nkey) == 0)) {
            ret = it;
            break;
        }
        it = it->h_next;
        ++depth;
    }
    MEMCACHED_ASSOC_FIND(key, nkey, depth);
    return ret;
}

[cpp] view plaincopy
/* returns the address of the item pointer before the key.  if *item == 0,
   the item wasn't found */

static item** _hashitem_before (const char *key, const size_t nkey) {
    uint32_t hv = hash(key, nkey, 0);
    item **pos;
    unsigned int oldbucket;

    if (expanding &&
        (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
    {
        pos = &old_hashtable[oldbucket];
    } else {
        pos = &primary_hashtable[hv & hashmask(hashpower)];
    }

    while (*pos && ((nkey != (*pos)->nkey) || memcmp(key, ITEM_key(*pos), nkey))) {
        pos = &(*pos)->h_next;
    }
    return pos;
}

比较函数assoc_find和_hashitem_before，发现两者只是返回值不同，assoc_find使用了item指针（item*），而_hashitem_before使用了item指针的指针（item**），查找过程都是一样的，_hashitem_before只是在assoc_delete中调用，返回指针的指针，方便直接修改指向的地址，从而删除此item；使用指针同样可以达到效果，但是必须修改指向的item的值，相比较前者效率更高。

[cpp] view plaincopy
/* Note: this isn't an assoc_update.  The key must not already exist to call this */
// 上面英文注释已经说的很清楚了，插入的item的key必须在hashtable中不存在，否则assert错误
int assoc_insert(item *it) {
    uint32_t hv;
    unsigned int oldbucket;

    assert(assoc_find(ITEM_key(it), it->nkey) == 0);  /* shouldn't have duplicately named things defined */

    // 计算hash值
    hv = hash(ITEM_key(it), it->nkey, 0);
    if (expanding &&
        (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
    {
        // 如果正在展开hashtable过程中，则将映射到expand_bucket之后的，
        // 即还没有展开的bucket部分，继续放在old_hashtable中，
        // 这样，当查询这些item时，从old_hashtable能够获得，参见assoc_find
        it->h_next = old_hashtable[oldbucket];
        old_hashtable[oldbucket] = it;
    } else {
        // 链表操作，放入链表起始位置
        it->h_next = primary_hashtable[hv & hashmask(hashpower)];
        primary_hashtable[hv & hashmask(hashpower)] = it;
    }

    hash_items++;
    if (! expanding && hash_items > (hashsize(hashpower) * 3) / 2) { //3、2不知道怎么定的
        assoc_expand(); // 启动扩展
    }

    MEMCACHED_ASSOC_INSERT(ITEM_key(it), it->nkey, hash_items);
    return 1;
}

[cpp] view plaincopy
// 从hashtable中删除键值为key的item
void assoc_delete(const char *key, const size_t nkey) {
    // 返回值是键值为key的item的指针的指针，感觉before用的不太恰当，开始以为是指向它前面的一个item
    item **before = _hashitem_before(key, nkey);

    if (*before) {
        item *nxt;
        hash_items--;
        /* The DTrace probe cannot be triggered as the last instruction
         * due to possible tail-optimization by the compiler
         */
        MEMCACHED_ASSOC_DELETE(key, nkey, hash_items);
        nxt = (*before)->h_next;
        (*before)->h_next = 0;   /* probably pointless, but whatever. */
        *before = nxt;
        return;
    }
    /* Note:  we never actually get here.  the callers don't delete things
       they can't find. */
    assert(*before != 0);
}

[cpp] view plaincopy
// 扩展hashtable（这里只分配新的hashtable指针数组，设置相应的标志等，
// 真正的扩展在assoc_maintenance_thread线程中完成）
/* grows the hashtable to the next power of 2. */
static void assoc_expand(void) {
    old_hashtable = primary_hashtable;

    // 扩大为原来的2倍
    primary_hashtable = calloc(hashsize(hashpower + 1), sizeof(void *));
    if (primary_hashtable) {
        if (settings.verbose > 1)
            fprintf(stderr, "Hash table expansion starting\n");
        hashpower++;
        expanding = true;
        expand_bucket = 0;
        pthread_cond_signal(&maintenance_cond); // 设置条件变量
    } else {
        primary_hashtable = old_hashtable;
        /* Bad news, but we can keep running. */
    }
}

[cpp] view plaincopy
// 创建hashtable维护线程
int start_assoc_maintenance_thread() {
    int ret;
    // 如果环境变量MEMCACHED_HASH_BULK_MOVE设置，则使用此设置值
    // 维护线程中，每次扩展的粒度(每次hash_bulk_move个buckets)
    char *env = getenv("MEMCACHED_HASH_BULK_MOVE");
    if (env != NULL) {
        hash_bulk_move = atoi(env);
        if (hash_bulk_move == 0) {
            hash_bulk_move = DEFAULT_HASH_BULK_MOVE;
        }
    }
    if ((ret = pthread_create(&maintenance_tid, NULL,
                              assoc_maintenance_thread, NULL)) != 0) {
        fprintf(stderr, "Can't create thread: %s\n", strerror(ret));
        return -1;
    }
    return 0;
}
[cpp] view plaincopy
// 停止hashtable维护线程
void stop_assoc_maintenance_thread() {
    pthread_mutex_lock(&cache_lock);
    do_run_maintenance_thread = 0;          // 结束标志
    pthread_cond_signal(&maintenance_cond);
    pthread_mutex_unlock(&cache_lock);

    /* Wait for the maintenance thread to stop */
    pthread_join(maintenance_tid, NULL);
}
[cpp] view plaincopy
#define DEFAULT_HASH_BULK_MOVE 1
int hash_bulk_move = DEFAULT_HASH_BULK_MOVE;     // 数据转移粒度，即每次移动的bucket数量

// hashtable维护线程(hashtable扩展使用)
static void *assoc_maintenance_thread(void *arg) {

    // 维护线程运行标志
    while (do_run_maintenance_thread) {
        int ii = 0;

        /* Lock the cache, and bulk move multiple buckets to the new
         * hash table. */
        pthread_mutex_lock(&cache_lock);  // 互斥访问old_hashtable

        // 每次扩展hash_bulk_move(这里定义为1)个buckets到新的hashtable中
        for (ii = 0; ii < hash_bulk_move && expanding; ++ii) {
            item *it, *next;
            int bucket;

            // expand_bucket: 当前正在扩展的bucket索引
            for (it = old_hashtable[expand_bucket]; NULL != it; it = next) {
                next = it->h_next;

                bucket = hash(ITEM_key(it), it->nkey, 0) & hashmask(hashpower);
                it->h_next = primary_hashtable[bucket];
                primary_hashtable[bucket] = it;
            }

            old_hashtable[expand_bucket] = NULL;

            // 到达old_hashtable结尾，则扩展结束
            expand_bucket++;
            if (expand_bucket == hashsize(hashpower - 1)) {
                expanding = false;
                free(old_hashtable);
                if (settings.verbose > 1)
                    fprintf(stderr, "Hash table expansion done\n");
            }
        }

        if (!expanding) {
            /* We are done expanding.. just wait for next invocation */
            pthread_cond_wait(&maintenance_cond, &cache_lock);
        }

        pthread_mutex_unlock(&cache_lock);
    }
    return NULL;
}