linux内存管理slab算法之slab初始化

来源：互联网发布：花刺代理软件编辑：程序博客网时间：2024/05/19 16:33

业余时间写的玩具操作系统，准备把内存管理部分加强一下，伙伴系统分配页面算法已经完成，下面就要开始写更加细粒度的内存分配，毕竟伙伴系统是按照页为基本单位分配的，参考内核版本linux2.6.30，没分析高版本的源码，算法基本思想应该差不多。

slab算法是一个高效的内存分配算法，它通过把经常使用的内存块比如32字节，64字节大小或者某个常用结构体大小的类型组织成一个kmem_cache结构，经常分配和释放的内存会保存在一个array_cache数组里，这样对频繁分配和释放的内存，分配和回收效率都是O(1)。kmalloc函数就是从slab分配的。整体结构就是如下图，其中一个slab包含1到多个页面，slab管理结构可能在页面上，也可能从其它kmem_cache上动态分配的。

图 1. slab 分配器的主要结构

源码分析：为了较好的理解算法核心思想，内核各种锁和bug检查代码先不去分析，kmem_cache初始化，先看几个重要的结构体

/* * struct kmem_cache * * manages a cache. *//*这个结构体是slab的核心，每个类型内存都需要创建一个这样的结构体，比如32字节，64字节，task_struct类型的等等*/struct kmem_cache {/* 1) per-cpu data, touched during every alloc/free */    /*表示本cpu的缓存数组，如果是单核NR_CPUS就是1*/struct array_cache *array[NR_CPUS];/* 2) Cache tunables. Protected by cache_chain_mutex */unsigned int batchcount;         //每次更新array_cache数组元素个数unsigned int limit;             //array_cache数组大小unsigned int shared;            //cpu共享个数unsigned int buffer_size;       //当前对象大小u32 reciprocal_buffer_size;     //当前对象大小个数/* 3) touched by every alloc & free from the backend */unsigned int flags;/* constant flags */unsigned int num;/* # of objs per slab *//* 4) cache_grow/shrink *//* order of pgs per slab (2^n) */unsigned int gfporder;         //每个slab包含页面的阶/* force GFP flags, e.g. GFP_DMA */gfp_t gfpflags;               //分配标志size_t colour;/* cache colouring range */ //每个slab颜色个数unsigned int colour_off;/* colour offset */         //着色偏移，等于cacheline的大小struct kmem_cache *slabp_cache; unsigned int slab_size;                                    //slab管理结构的大小unsigned int dflags;/* dynamic flags *//* constructor func */void (*ctor)(void *obj);                              //初始化对象的回调函数，用户自己设置/* 5) cache creation/removal */const char *name;                                  //kmem_cache的名字struct list_head next;                             //kmem_cache链表/* * We put nodelists[] at the end of kmem_cache, because we want to size * this array to nr_node_ids slots instead of MAX_NUMNODES * (see kmem_cache_init()) * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache * is statically defined, so we reserve the max number of nodes. */struct kmem_list3 *nodelists[MAX_NUMNODES];       //三链结构，UMA架构下MAX_NUMNODES是1/* * Do not add fields after nodelists[] */};

/* * The slab lists for all objects. *///主要连接3种类型的slab链表struct kmem_list3 {struct list_head slabs_partial;/* partial list first, better asm code *///部分空slab类型链表struct list_head slabs_full;//满slab类型链表struct list_head slabs_free;    //空slab类型链表unsigned long free_objects;     //空闲对象个数unsigned int free_limit;        //空闲对象大小限制unsigned int colour_next;/* Per-node cache coloring *///下一个偏移个数spinlock_t list_lock;struct array_cache *shared;/* shared per node */   //numa共享node的节点struct array_cache **alien;/* on other nodes */unsigned long next_reap;/* updated without locking */int free_touched;/* updated without locking */};

/* * bootstrap: The caches do not work without cpuarrays anymore, but the * cpuarrays are allocated from the generic caches... */#define BOOT_CPUCACHE_ENTRIES1struct arraycache_init {struct array_cache cache;void *entries[BOOT_CPUCACHE_ENTRIES];};

struct array_cache {unsigned int avail;unsigned int limit;unsigned int batchcount;unsigned int touched;spinlock_t lock;void *entry[];/* * Must have this definition in here for the proper * alignment of array_cache. Also simplifies accessing * the entries. */};

主要分析UMA类型的

/* * Initialisation.  Called after the page allocator have been initialised and * before smp_init(). * 初始化kmem_cache,主要初始化创建初始化的kmem_cache的slab */void  kmem_cache_init(void){size_t left_over;           //剩余字节struct cache_sizes *sizes;  //初始化kmem_cache的sizestruct cache_names *names;  //初始化kmem_cache的名字int i;int order;int node;//非NUMA架构/*if (num_possible_nodes() == 1) {use_alien_caches = 0;numa_platform = 0;}*///NUM_INIT_LISTS为3for (i = 0; i < NUM_INIT_LISTS; i++) {kmem_list3_init(&initkmem_list3[i]);if (i < MAX_NUMNODES)cache_cache.nodelists[i] = NULL;   //cache_cache结构就是kmem_cache类型的slab}set_up_list3s(&cache_cache, CACHE_CACHE);/* * Fragmentation resistance on low memory - only use bigger * page orders on machines with more than 32MB of memory. */if (num_physpages > (32 << 20) >> PAGE_SHIFT)slab_break_gfp_order = BREAK_GFP_ORDER_HI;/* Bootstrap is tricky, because several objects are allocated * from caches that do not exist yet: * 1) initialize the cache_cache cache: it contains the struct *    kmem_cache structures of all caches, except cache_cache itself: *    cache_cache is statically allocated. *    Initially an __init data area is used for the head array and the *    kmem_list3 structures, it's replaced with a kmalloc allocated *    array at the end of the bootstrap. * 2) Create the first kmalloc cache. *    The struct kmem_cache for the new cache is allocated normally. *    An __init data area is used for the head array. * 3) Create the remaining kmalloc caches, with minimally sized *    head arrays. * 4) Replace the __init data head arrays for cache_cache and the first *    kmalloc cache with kmalloc allocated arrays. * 5) Replace the __init data for kmem_list3 for cache_cache and *    the other cache's with kmalloc allocated memory. * 6) Resize the head arrays of the kmalloc caches to their final sizes. */node = numa_node_id();  //node = 0/* 1) create the cache_cache */INIT_LIST_HEAD(&cache_chain);   //初始化kmem_cache链表头list_add(&cache_cache.next, &cache_chain);//把cache_cache结构体加入链表cache_cache.colour_off = cache_line_size(); //cache_cache的着色偏移,cache_line的大小//cache_cache的空闲数组指向初始化的array数组cache_cache.array[smp_processor_id()] = &initarray_cache.cache;//cache_cache的三链指向初始化的三链cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];/* * struct kmem_cache size depends on nr_node_ids, which * can be less than MAX_NUMNODES. */ //cache_cache的对象大小，也就是kmem_cache的大小cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) + 1 * sizeof(struct kmem_list3 *);//对象大小和cache_line对齐cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,cache_line_size());//kmem_cache大小的倒数//cache_cache.reciprocal_buffer_size =//reciprocal_value(cache_cache.buffer_size);    //计算所需页面的阶，对象个数，剩余字节数for (order = 0; order < MAX_ORDER; order++) {cache_estimate(order, cache_cache.buffer_size,cache_line_size(), 0, &left_over, &cache_cache.num);if (cache_cache.num)break;}//BUG_ON(!cache_cache.num);cache_cache.gfporder = order;//颜色个数等于剩余字节数除以cacheline的大小，因为slab初始位置是按照空闲字节数做偏移的//最大偏移到空闲字节cache_cache.colour = left_over / cache_cache.colour_off;    //存放slab_size大小,slab_size包括slab结构体大小，包括object索引数组大小    //kmem_bufctl_t就是数组索引大小，比如a[1] = 2；cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +      sizeof(struct slab), cache_line_size());/* 2+3) create the kmalloc caches */sizes = malloc_sizes;names = cache_names;/* * Initialize the caches that provide memory for the array cache and the * kmem_list3 structures first.  Without this, further allocations will * bug. */    //创建对象为32字节的缓存sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,sizes[INDEX_AC].cs_size,ARCH_KMALLOC_MINALIGN,ARCH_KMALLOC_FLAGS|SLAB_PANIC,NULL);printf("sizes[INDEX_AC].cs_cachep:0x%p\n",sizes[INDEX_AC].cs_cachep);    //创建对象为64字节的对象if (INDEX_AC != INDEX_L3) {sizes[INDEX_L3].cs_cachep =kmem_cache_create(names[INDEX_L3].name,sizes[INDEX_L3].cs_size,ARCH_KMALLOC_MINALIGN,ARCH_KMALLOC_FLAGS|SLAB_PANIC,NULL);}    //slab早期初始化结束slab_early_init = 0;while (sizes->cs_size != ULONG_MAX) {/* * For performance, all the general caches are L1 aligned. * This should be particularly beneficial on SMP boxes, as it * eliminates "false sharing". * Note for systems short on memory removing the alignment will * allow tighter packing of the smaller caches. */if (!sizes->cs_cachep) {sizes->cs_cachep = kmem_cache_create(names->name,sizes->cs_size,ARCH_KMALLOC_MINALIGN,ARCH_KMALLOC_FLAGS|SLAB_PANIC,NULL);}sizes++;names++;}/* 4) Replace the bootstrap head arrays *///替换cache_cache的array_cache成员，使用slab管理的空闲内存替换全局内存区{struct array_cache *ptr;ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);//local_irq_disable();//BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);memcpy(ptr, cpu_cache_get(&cache_cache),       sizeof(struct arraycache_init));/* * Do not assume that spinlocks can be initialized via memcpy: *///spin_lock_init(&ptr->lock);cache_cache.array[smp_processor_id()] = ptr;//local_irq_enable();ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);//local_irq_disable();//BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)//       != &initarray_generic.cache);//替换INDEX_AC cache的array_cache成员，也就是32字节的cachememcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),       sizeof(struct arraycache_init));/* * Do not assume that spinlocks can be initialized via memcpy: *///spin_lock_init(&ptr->lock);malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =    ptr;//local_irq_enable();}/* 5) Replace the bootstrap kmem_list3's */{int nid;for(nid = 0; nid < 1; nid++) {init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid);init_list(malloc_sizes[INDEX_AC].cs_cachep,  &initkmem_list3[SIZE_AC + nid], nid);if (INDEX_AC != INDEX_L3) {init_list(malloc_sizes[INDEX_L3].cs_cachep,  &initkmem_list3[SIZE_L3 + nid], nid);}}}/* 6) resize the head arrays to their final sizes */{struct kmem_cache *cachep;//mutex_lock(&cache_chain_mutex);list_for_each_entry(cachep, &cache_chain, next) {    if (enable_cpucache(cachep))assert(0);}//mutex_unlock(&cache_chain_mutex);}/* Annotate slab for lockdep -- annotate the malloc caches *///init_lock_keys();/* Done! */g_cpucache_up = FULL;/* * Register a cpu startup notifier callback that initializes * cpu_cache_get for all new cpus *///register_cpu_notifier(&cpucache_notifier);/* * The reap timers are started later, with a module init call: That part * of the kernel is not yet operational. */}

阅读全文

0 0