Memcached内存管理分析

来源：互联网发布：下载地毯设计软件编辑：程序博客网时间：2024/05/01 14:08

Memcached有自己的内存管理，避免了频繁使用malloc/free造成内存碎片的问题。在看源码之前，先大概了解一下Memcached内存管理的概念。
在Memcached中，会先分配一大块连续内存（默认为64Msettings.maxbytes = 64 * 1024 * 1024），可以在启动时通过-m指令制定。一大块内存分配好后，会再细分为大小相同的Slab（默认为1M）；一个Slab在细分为更小的Chunk，在同一个Slab中，Chunk的大小相同；在每个Chunk中保存着item，item是真正存储数据的地方。

参考《Memcached全面剖析》（长野雅广、前坂徹著charlee 译）中的图片，来形象看一下各个结构：

这里写图片描述

图中，Slab Class: N即为Slab，里面保存的Chunks；不同Slab中Chunk的大小依次增长，增长的快慢是由参数settings.factor决定的，这个参会默认为1.25，可以通过-f指定。

下面来看一下Slab对应的结构体：

typedef struct {    unsigned int size;      /* sizes of items *///这里其实是Chunk的大小    unsigned int perslab;   /* how many items per slab *///一个Slab中包含多少个Chunk    void *slots;           /* list of item ptrs */    unsigned int sl_curr;   /* total free items in list */    unsigned int slabs;     /* how many slabs were allocated for this class */    void **slab_list;       /* array of slab pointers *///指针的指针，指向chunk数组    unsigned int list_size; /* size of prev array */    unsigned int killing;  /* index+1 of dying slab, or zero if none */    size_t requested; /* The number of requested bytes */} slabclass_t;static slabclass_t slabclass[MAX_NUMBER_OF_SLAB_CLASSES];//这里保存着Slab数组

来看一下item对应的数据结构：

typedef struct _stritem {    /* Protected by LRU locks */    struct _stritem *next;//next和prev指针，用在链表用。和slab的void* slots指针对应。    struct _stritem *prev;    /* Rest are protected by an item lock */    struct _stritem *h_next;    /* hash chain next */    rel_time_t      time;       /* least recent access */    rel_time_t      exptime;    /* expire time */    int             nbytes;     /* size of data */    unsigned short  refcount;//引用个数    uint8_t         nsuffix;    /* length of flags-and-length string */    uint8_t         it_flags;   /* ITEM_* above */    uint8_t         slabs_clsid;/* which slab class we're in */    uint8_t         nkey;       /* key length, w/terminating null and padding */    /* this odd type prevents type-punning issues when we do     * the little shuffle to save space when not using CAS. */    union {        uint64_t cas;        char end;    } data[];    /* if it_flags & ITEM_CAS we have 8 bytes CAS */    /* then null-terminated key */    /* then " flags length\r\n" (no terminating null) */    /* then data with terminating \r\n (no terminating null; it's binary!) */} item;

item是存储key-value的地方，前面是一些属性，用来管理存储空间；存储数据的地方为data[]。
下面这张图来看一下Slab、subclass、item的关系
这里写图片描述

从图中可以看出，subclass数组保存着结构体，结构体中保存有指向item的连接指针slots，指向chunk数组的指针slab_list。

下面源码分析，包括分配一大块内存、把大块内存分解为chunk，分配item添加到slabclass结构体中。

从main函数开始看起：

//main函数中调用slabs_init(settings.maxbytes, settings.factor, preallocate);//具体实现在slabs.c中static void *mem_base = NULL;//分配内存起始地址static void *mem_current = NULL;//当前分配的地址static size_t mem_avail = 0;//内存剩余大小void slabs_init(const size_t limit, const double factor, const bool prealloc) {    int i = POWER_SMALLEST - 1;    //chunk的大小，还要包括item在内    unsigned int size = sizeof(item) + settings.chunk_size;    mem_limit = limit;//初始化mem_limit    if (prealloc) {//是否预分配        /* Allocate everything in a big chunk with malloc */        mem_base = malloc(mem_limit);        if (mem_base != NULL) {//分配成功，给全局变量赋值            mem_current = mem_base;            mem_avail = mem_limit;        } else {            fprintf(stderr, "Warning: Failed to allocate requested memory in"                    " one large chunk.\nWill allocate in smaller chunks\n");        }    }    memset(slabclass, 0, sizeof(slabclass));//slabclass是slabclass_t类型数组    while (++i < MAX_NUMBER_OF_SLAB_CLASSES-1 && size <= settings.item_size_max / factor) {        /* Make sure items are always n-byte aligned */        if (size % CHUNK_ALIGN_BYTES)%按照8字节对齐            size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);        slabclass[i].size = size;        slabclass[i].perslab = settings.item_size_max / slabclass[i].size;//chunk的个数        size *= factor;//乘上增长因子，为下一个slab的chunk的size的值        if (settings.verbose > 1) {            fprintf(stderr, "slab class %3d: chunk size %9u perslab %7u\n",                    i, slabclass[i].size, slabclass[i].perslab);        }    }    //设置最后一个subclass    power_largest = i;    slabclass[power_largest].size = settings.item_size_max;    slabclass[power_largest].perslab = 1;//只有一个chunk，大小为1M    if (settings.verbose > 1) {        fprintf(stderr, "slab class %3d: chunk size %9u perslab %7u\n",                i, slabclass[i].size, slabclass[i].perslab);    }    /* for the test suite:  faking of how much we've already malloc'd */    {        char *t_initial_malloc = getenv("T_MEMD_INITIAL_MALLOC");//读取环境变量        if (t_initial_malloc) {            mem_malloced = (size_t)atol(t_initial_malloc);        }    }    if (prealloc) {        slabs_preallocate(power_largest);//进一步细分每个Slab，power_largest为最后一个Slab    }}static void slabs_preallocate (const unsigned int maxslabs) {    int i;    unsigned int prealloc = 0;    /* pre-allocate a 1MB slab in every size class so people don't get       confused by non-intuitive "SERVER_ERROR out of memory"       messages.  this is the most common question on the mailing       list.  if you really don't want this, you can rebuild without       these three lines.  */    for (i = POWER_SMALLEST; i < MAX_NUMBER_OF_SLAB_CLASSES; i++) {        if (++prealloc > maxslabs)            return;        if (do_slabs_newslab(i) == 0) {//分配一个新的slab            fprintf(stderr, "Error while preallocating slab memory!\n"                "If using -L or other prealloc options, max memory must be "                "at least %d megabytes.\n", power_largest);            exit(1);        }    }}

在函数do_slabs_newslab中会给slabclass_t的slab_list指针赋值，把item加到slabclass_t的slot上

static int do_slabs_newslab(const unsigned int id) {    slabclass_t *p = &slabclass[id];//第id个Slab的地址    int len = settings.slab_reassign ? settings.item_size_max        : p->size * p->perslab;    char *ptr;    if ((mem_limit && mem_malloced + len > mem_limit && p->slabs > 0)) {//超出分配空间        mem_limit_reached = true;        MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);        return 0;    }    if ((grow_slab_list(id) == 0) ||//初始化slabclass_t中的slab_list指针        ((ptr = memory_allocate((size_t)len)) == 0)) {//给Memcached分配内存（前面可能已经预分配）        MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);        return 0;    }    memset(ptr, 0, (size_t)len);    split_slab_page_into_freelist(ptr, id);//把item加到slabclass_t的slot上    p->slab_list[p->slabs++] = ptr;    mem_malloced += len;    MEMCACHED_SLABS_SLABCLASS_ALLOCATE(id);    return 1;}//给slab_list分配空间（增加空间）static int grow_slab_list (const unsigned int id) {    slabclass_t *p = &slabclass[id];    if (p->slabs == p->list_size) {        size_t new_size =  (p->list_size != 0) ? p->list_size * 2 : 16;        void *new_list = realloc(p->slab_list, new_size * sizeof(void *));        if (new_list == 0) return 0;        p->list_size = new_size;        p->slab_list = new_list;    }    return 1;}static void split_slab_page_into_freelist(char *ptr, const unsigned int id) {    slabclass_t *p = &slabclass[id];//取出第id个subclass_t    int x;    for (x = 0; x < p->perslab; x++) {//给subclass_t划分空间        do_slabs_free(ptr, 0, id);//向subclass_t添加空闲item        ptr += p->size;    }}

//给Memcached分配内存

static void *memory_allocate(size_t size) {    void *ret;    if (mem_base == NULL) {//如果没有预分配，这里分配        /* We are not using a preallocated large memory chunk */        ret = malloc(size);    } else {        ret = mem_current;        if (size > mem_avail) {            return NULL;        }        /* mem_current pointer _must_ be aligned!!! */        if (size % CHUNK_ALIGN_BYTES) {//按照8字节对齐            size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);        }        mem_current = ((char*)mem_current) + size;        if (size < mem_avail) {            mem_avail -= size;        } else {            mem_avail = 0;        }    }    return ret;}

0 0