Memcached内存管理源码阅读

来源：互联网发布：今年淘宝总额编辑：程序博客网时间：2024/05/01 18:09
memcache能进行快速地查找和良好的内存管理,得益于良好的hash查找和内存管理技巧.这两项功能主要由assoc.c和slab.c
这两个文件来实现. 下面详细地分析一下每行代码实现

slab.c

#define POWER_SMALLEST 1        //slabclass数组的最小下标(slabclass 主要是来保存分配好的内存)
#define POWER_LARGEST  200        //slabclass数组的最大下标
#define POWER_BLOCK 1048576        //每一个chunk的最大值
#define CHUNK_ALIGN_BYTES 8        //内存对其
#define DONT_PREALLOC_SLABS        //不采用事前分配内存

/* powers-of-N allocation structures */
/*管理内存的主要数据结构, 搞清楚这个数据结构对整个内存的使用,分配,释放都很重要*/
typedef struct {
    unsigned int size;      /* sizes of items */        //该结构保存的item的size大小,即最多能保存多大的数据
    unsigned int perslab;   /* how many items per slab */    //分配好一个slab后,该slab可以存储多少个大小size的

    void **slots;           /* list of item ptrs */        //回收回来后,内存的数组
    unsigned int sl_total;  /* size of previous array */    //目前总共有多少个空余的内存块
    unsigned int sl_curr;   /* first free slot */        //目前已经使用到了多少个内存块

    void *end_page_ptr;         /* pointer to next free item at end of page, or 0 */        //每个slab中,可使用的地址
    unsigned int end_page_free; /* number of items remaining at end of last alloced page */    //该slab中,可使用的内存块大小

    unsigned int slabs;     /* how many slabs were allocated for this class */            //已经使用slab

    void **slab_list;       /* array of slab pointers */                    //保存每个slab的起始地址                
    unsigned int list_size; /* size of prev array */                        //总共有多少个slab

    unsigned int killing;  /* index+1 of dying slab, or zero if none */
} slabclass_t;

static slabclass_t slabclass[POWER_LARGEST + 1];                        //核心的slabclass变量, 保存所有的内存
static size_t mem_limit = 0;                                    //限制memcache内存的使用的大小
static size_t mem_malloced = 0;                                    //已经创建使用的内存大小
static int power_largest;                                    //使用中的slabclass的最大值

static void *mem_base = NULL;                                    //创建内存的起始值                            
static void *mem_current = NULL;                                //当前使用内存的地址
static size_t mem_avail = 0;                                    //可以使用的内存大小

/*
 * Forward Declarations
 */
static int do_slabs_newslab(const unsigned int id);
static void *memory_allocate(size_t size);

#ifndef DONT_PREALLOC_SLABS
/* Preallocate as many slab pages as possible (called from slabs_init)
   on start-up, so users don't get confused out-of-memory errors when
   they do have free (in-slab) space, but no space to make new slabs.
   if maxslabs is 18 (POWER_LARGEST - POWER_SMALLEST + 1), then all
   slab types can be made.  if max memory is less than 18 MB, only the
   smaller ones will be made.  */
static void slabs_preallocate (const unsigned int maxslabs);
#endif

/*
 * Figures out which slab class (chunk size) is required to store an item of
 * a given size.
 *
 * Given object size, return id to use when allocating/freeing memory for object
 * 0 means error: can't store such a large object
 */
/**
 根据size的大小,计算在slabclass数组中的下标
**/
unsigned int slabs_clsid(const size_t size) {
    int res = POWER_SMALLEST;            //res最为最终的下标返回至, 该句设置res的起始值 1;

    if (size == 0)                //当需要内存的大小为0, 表示出错
        return 0;
    while (size > slabclass[res].size)        //循环直至需要内存大小小于数组中的size大小
        if (res++ == power_largest)     /* won't fit in the biggest slab */
            return 0;                //如果需要的内存大于最大内存(1M), 返回错误
    return res;                    //返回对应的id
}

/**

非常重要的函数,主要是初始化slabclass, 然后给每个slab分配内存,该函数在程序初始化时调用
**/

/**
 * Determines the chunk sizes and initializes the slab class descriptors
 * accordingly.
 */
void slabs_init(const size_t limit, const double factor, const bool prealloc) { //limit 表示可使用内存的大小, factor表示每个档次的slab的大小倍数,例如 slabclass[0] 的size 为128, slabclass[1]的size 为 128*factor
    int i = POWER_SMALLEST - 1;                            //i = 0;
    unsigned int size = sizeof(item) + settings.chunk_size;            //初始化size大小

    /* Factor of 2.0 means use the default memcached behavior */
    if (factor == 2.0 && size < 128)                        //采用memcache默认的分配方式, size=128
        size = 128;

    mem_limit = limit;                                //给mem_limit赋值

    if (prealloc) {                                //预先分配内存
        /* Allocate everything in a big chunk with malloc */
        mem_base = malloc(mem_limit);                        //一次性创建mem_limit,并赋值给全局变量mem_base
        if (mem_base != NULL) {
            mem_current = mem_base;                        //给全局变量mem_current, mem_avail赋值
            mem_avail = mem_limit;
        } else {
            fprintf(stderr, "Warning: Failed to allocate requested memory in"
                    " one large chunk.\nWill allocate in smaller chunks\n");    //分配内存失败，打印错误信息
        }
    }

    memset(slabclass, 0, sizeof(slabclass));                    //将全局数组slabclass初始化

    while (++i < POWER_LARGEST && size <= POWER_BLOCK / 2) {            //给每个slabclass初始化size和perslab值
        /* Make sure items are always n-byte aligned */
        if (size % CHUNK_ALIGN_BYTES)
            size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);

        slabclass[i].size = size;
        slabclass[i].perslab = POWER_BLOCK / slabclass[i].size;            //此时并没有分配内存空间
        size *= factor;
        if (settings.verbose > 1) {
            fprintf(stderr, "slab class %3d: chunk size %6u perslab %5u\n",
                    i, slabclass[i].size, slabclass[i].perslab);
        }
    }

    power_largest = i;                                //数组的最后一个分配一个超大的size； 1m
    slabclass[power_largest].size = POWER_BLOCK;
    slabclass[power_largest].perslab = 1;                    //每次只分配一个

    /* for the test suite:  faking of how much we've already malloc'd */
    {
        char *t_initial_malloc = getenv("T_MEMD_INITIAL_MALLOC");
        if (t_initial_malloc) {
            mem_malloced = (size_t)atol(t_initial_malloc);
        }

    }

#ifndef DONT_PREALLOC_SLABS
    {
        char *pre_alloc = getenv("T_MEMD_SLABS_ALLOC");

        if (pre_alloc == NULL || atoi(pre_alloc) != 0) {
            slabs_preallocate(power_largest);                    //开始为每个slabcalss分配空间了
        }
    }
#endif
}

#ifndef DONT_PREALLOC_SLABS
static void slabs_preallocate (const unsigned int maxslabs) {
    int i;
    unsigned int prealloc = 0;

    /* pre-allocate a 1MB slab in every size class so people don't get
       confused by non-intuitive "SERVER_ERROR out of memory"
       messages.  this is the most common question on the mailing
       list.  if you really don't want this, you can rebuild without
       these three lines.  */

    for (i = POWER_SMALLEST; i <= POWER_LARGEST; i++) {
        if (++prealloc > maxslabs)                        //当大于最大数组的下标， 停止创建
            return;        
        do_slabs_newslab(i);                            //给每个slab创建内存
    }

}
#endif

static int grow_slab_list (const unsigned int id) {
    slabclass_t *p = &slabclass[id];
    if (p->slabs == p->list_size) {
        size_t new_size =  (p->list_size != 0) ? p->list_size * 2 : 16;
        void *new_list = realloc(p->slab_list, new_size * sizeof(void *));
        if (new_list == 0) return 0;
        p->list_size = new_size;
        p->slab_list = new_list;
    }
    return 1;
}

/**
真正创建内存的函数，很重要
**/
static int do_slabs_newslab(const unsigned int id) {
    slabclass_t *p = &slabclass[id];            //取下标为id的slabclass的地址
#ifdef ALLOW_SLABS_REASSIGN
    int len = POWER_BLOCK;
#else
    int len = p->size * p->perslab;
#endif                            //得到该slabclass需要的内存空间大小
    char *ptr;

    if ((mem_limit && mem_malloced + len > mem_limit && p->slabs > 0) ||
        (grow_slab_list(id) == 0) ||
        ((ptr = memory_allocate((size_t)len)) == 0)) {    //此处逻辑较为复杂， 先 为slabclass分配slab——list空间， 然后分配一个slab所需的空间。

        MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);
        return 0;
    }

    memset(ptr, 0, (size_t)len);            //初始化一分配好的内存
    p->end_page_ptr = ptr;                //slabclass-〉end_page_ptr指向该空间
    p->end_page_free = p->perslab;            //slabclass-〉end——page——free指向还剩多少个空余内存

    p->slab_list[p->slabs++] = ptr;            //slab——list指向每个slab的初始地址
    mem_malloced += len;                //已分配增加

    MEMCACHED_SLABS_SLABCLASS_ALLOCATE(id);
    return 1;
}

/**
如何给外部分配需要的内存，这个是一个外部调用接口，主要给外部提供分配的内存地址： slab的分配原则， 如果有已经free的内存， 先把这些内存分配掉， 然后使用已经
创建好的slab， 最后再创建新的slab
**/
/*@null@*/
void *do_slabs_alloc(const size_t size, unsigned int id) {
    slabclass_t *p;
    void *ret = NULL;

    if (id < POWER_SMALLEST || id > power_largest) {
        MEMCACHED_SLABS_ALLOCATE_FAILED(size, 0);
        return NULL;
    }

    p = &slabclass[id];                            //取出下标为id的slabclass
    assert(p->sl_curr == 0 || ((item *)p->slots[p->sl_curr - 1])->slabs_clsid == 0);

#ifdef USE_SYSTEM_MALLOC                        //此处是指使用malloc方式，需要多少分多少
    if (mem_limit && mem_malloced + size > mem_limit) {
        MEMCACHED_SLABS_ALLOCATE_FAILED(size, id);
        return 0;
    }
    mem_malloced += size;
    ret = malloc(size);
    MEMCACHED_SLABS_ALLOCATE(size, id, 0, ret);
    return ret;
#endif

    /* fail unless we have space at the end of a recently allocated page,
       we have something on our freelist, or we could allocate a new page */
    if (! (p->end_page_ptr != 0 || p->sl_curr != 0 ||
           do_slabs_newslab(id) != 0)) {                //如果没有free的，并且创建好的slab也用完，则创建一个新的slab
        /* We don't have more memory available */
        ret = NULL;
    } else if (p->sl_curr != 0) {                    //使用free的内存
        /* return off our freelist */
        ret = p->slots[--p->sl_curr];
    } else {
        /* if we recently allocated a whole page, return from that */
        assert(p->end_page_ptr != NULL);                //使用创建好的内存
        ret = p->end_page_ptr;
        if (--p->end_page_free != 0) {
            p->end_page_ptr += p->size;
        } else {                            //如果用户，则将标志设为0
            p->end_page_ptr = 0;
        }
    }

    if (ret) {
        MEMCACHED_SLABS_ALLOCATE(size, id, p->size, ret);
    } else {
        MEMCACHED_SLABS_ALLOCATE_FAILED(size, id);
    }

    return ret;
}
/**
内存释放，此处并非真正的释放，只是将不用的内存，放在slabclass中回收数组中

**/

void do_slabs_free(void *ptr, const size_t size, unsigned int id) {
    slabclass_t *p;

    assert(((item *)ptr)->slabs_clsid == 0);
    assert(id >= POWER_SMALLEST && id <= power_largest);
    if (id < POWER_SMALLEST || id > power_largest)
        return;

    MEMCACHED_SLABS_FREE(size, id, ptr);
    p = &slabclass[id];

#ifdef USE_SYSTEM_MALLOC
    mem_malloced -= size;
    free(ptr);
    return;
#endif

    if (p->sl_curr == p->sl_total) { /* need more space on the free list */        //如果回收数组的大小已经不足，则扩展这个数组
            int new_size = (p->sl_total != 0) ? p->sl_total * 2 : 16;  /* 16 is arbitrary */
        void **new_slots = realloc(p->slots, new_size * sizeof(void *));
        if (new_slots == 0)
            return;
        p->slots = new_slots;
        p->sl_total = new_size;
    }
    p->slots[p->sl_curr++] = ptr;                            //将空闲的内存回收至slots中
    return;
}

/*@null@*/
char* do_slabs_stats(int *buflen) {
    int i, total;
    char *buf = (char *)malloc(power_largest * 200 + 100);
    char *bufcurr = buf;

    *buflen = 0;
    if (buf == NULL) return NULL;

    total = 0;
    for(i = POWER_SMALLEST; i <= power_largest; i++) {
        slabclass_t *p = &slabclass[i];
        if (p->slabs != 0) {
            unsigned int perslab, slabs;

            slabs = p->slabs;
            perslab = p->perslab;

            bufcurr += sprintf(bufcurr, "STAT %d:chunk_size %u\r\n", i, p->size);
            bufcurr += sprintf(bufcurr, "STAT %d:chunks_per_page %u\r\n", i, perslab);
            bufcurr += sprintf(bufcurr, "STAT %d:total_pages %u\r\n", i, slabs);
            bufcurr += sprintf(bufcurr, "STAT %d:total_chunks %u\r\n", i, slabs*perslab);
            bufcurr += sprintf(bufcurr, "STAT %d:used_chunks %u\r\n", i, slabs*perslab - p->sl_curr - p->end_page_free);
            bufcurr += sprintf(bufcurr, "STAT %d:free_chunks %u\r\n", i, p->sl_curr);
            bufcurr += sprintf(bufcurr, "STAT %d:free_chunks_end %u\r\n", i, p->end_page_free);
            total++;
        }
    }
    bufcurr += sprintf(bufcurr, "STAT active_slabs %d\r\nSTAT total_malloced %llu\r\n", total, (unsigned long long)mem_malloced);
    bufcurr += sprintf(bufcurr, "END\r\n");
    *buflen = bufcurr - buf;
    return buf;
}

#ifdef ALLOW_SLABS_REASSIGN
/* Blows away all the items in a slab class and moves its slabs to another
   class. This is only used by the "slabs reassign" command, for manual tweaking
   of memory allocation. It's disabled by default since it requires that all
   slabs be the same size (which can waste space for chunk size mantissas of
   other than 2.0).
   1 = success
   0 = fail
   -1 = tried. busy. send again shortly. */
int do_slabs_reassign(unsigned char srcid, unsigned char dstid) {
    void *slab, *slab_end;
    slabclass_t *p, *dp;
    void *iter;
    bool was_busy = false;

    if (srcid < POWER_SMALLEST || srcid > power_largest ||
        dstid < POWER_SMALLEST || dstid > power_largest)
        return 0;

    p = &slabclass[srcid];
    dp = &slabclass[dstid];

    /* fail if src still populating, or no slab to give up in src */
    if (p->end_page_ptr || ! p->slabs)
        return 0;

    /* fail if dst is still growing or we can't make room to hold its new one */
    if (dp->end_page_ptr || ! grow_slab_list(dstid))
        return 0;

    if (p->killing == 0) p->killing = 1;

    slab = p->slab_list[p->killing - 1];
    slab_end = (char*)slab + POWER_BLOCK;

    for (iter = slab; iter < slab_end; (char*)iter += p->size) {
        item *it = (item *)iter;
        if (it->slabs_clsid) {
            if (it->refcount) was_busy = true;
            item_unlink(it);
        }
    }

    /* go through free list and discard items that are no longer part of this slab */
    {
        int fi;
        for (fi = p->sl_curr - 1; fi >= 0; fi--) {
            if (p->slots[fi] >= slab && p->slots[fi] < slab_end) {
                p->sl_curr--;
                if (p->sl_curr > fi) p->slots[fi] = p->slots[p->sl_curr];
            }
        }
    }

    if (was_busy) return -1;

    /* if good, now move it to the dst slab class */
    p->slab_list[p->killing - 1] = p->slab_list[p->slabs - 1];
    p->slabs--;
    p->killing = 0;
    dp->slab_list[dp->slabs++] = slab;
    dp->end_page_ptr = slab;
    dp->end_page_free = dp->perslab;
    /* this isn't too critical, but other parts of the code do asserts to
       make sure this field is always 0.  */
    for (iter = slab; iter < slab_end; (char*)iter += dp->size) {
        ((item *)iter)->slabs_clsid = 0;
    }
    return 1;
}
#endif

/**
负责具体的内存分配， 如果不采用预先分配大块内存， 则直接使用malloc分配
**/
static void *memory_allocate(size_t size) {
    void *ret;

    if (mem_base == NULL) {
        /* We are not using a preallocated large memory chunk */
        ret = malloc(size);                    //直接malloc分配
    } else {
        ret = mem_current;                    //在已分配好的内存中分配内存

        if (size > mem_avail) {
            return NULL;
        }

        /* mem_current pointer _must_ be aligned!!! */
        if (size % CHUNK_ALIGN_BYTES) {
            size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
        }

        mem_current += size;
        if (size < mem_avail) {
            mem_avail -= size;
        } else {
            mem_avail = 0;
        }
    }

    return ret;
}