bootmem allocator

来源:互联网 发布:软件开发的难度 编辑:程序博客网 时间:2024/05/18 01:06

       在系统启动阶段,buddy系统和slab分配器建立之前,系统的每个节点都拥有自己的bootmem allocator来实现内存的分配,当启动阶段结束后,bootmem allocator将被销毁,而相应的空闲内存会提交给buddy系统来管理,因此bootmem allocator所存在的时间是短暂的,它的宗旨是简单,而非高效!bootmem allocator的基本思想是在一个节点中建立一片位图区域,每一位对应该节点的低端内存的一个页框,通过一个bit来标记一个页的状态,实现页面的分配与回收。

        首先了解一下bootmem的核心数据结构

 

typedef struct bootmem_data {unsigned long node_min_pfn; unsigned long node_low_pfn;void *node_bootmem_map;    unsigned long last_end_off; unsigned long hint_idx;     struct list_head list;     } bootmem_data_t;
  • node_min_pfn:节点的最小页框编号
  • node_low_pfn:节点的低端内存最大页框编号
  • node_bootmem_map:节点的位图起始地址
  • last_end_off:上次分配内存的最后一个字节相对于其所属页面末端的偏移,这个变量内存分配的时候用到,用于防止产生碎片
  • hint_idx:用于内存分配时确定分配的起始地址
  • list:用于将该节点的bootmem链入所有节点的bootmem链表

     下面结合具体的代码就以下几个主要的方面介绍bootmem allocator的工作过程

      1.bootmem allocator的初始化

      2.bootmem allocator保留内存和释放内存

      3.bootmem allocator分配内存

      4.bootmem allocator的销毁

1.bootmem allocator的初始化

        在arch_setup(),通过initmem_init()-->setup_bootmem_allocator()-->setup_node_bootmem()-->init_bootmem_node()来建立节点中的bootmem allocator. 还有一个初始化的函数是init_bootmem(),其和init_bootmem_node()一样,都是对init_bootmem_core()的封装,区别是前者只针对单节点系统,而后者指定了一个节点,在后面其他操作中都用到了类似的封装方法。

unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,unsigned long startpfn, unsigned long endpfn){return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn);}unsigned long __init init_bootmem(unsigned long start, unsigned long pages){max_low_pfn = pages;min_low_pfn = start;return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);}

 

下面来看看bootmem初始化的核心函数init_bootmem_core()

static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,unsigned long mapstart, unsigned long start, unsigned long end){unsigned long mapsize;mminit_validate_memmodel_limits(&start, &end);bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));/*存储位图起始地址的虚拟地址*/bdata->node_min_pfn = start;/*节点中的起始页*/bdata->node_low_pfn = end;  /*节点中的终止页*/link_bootmem(bdata);/*将该bdata按顺序链入bdata_list中*//* * Initially all pages are reserved - setup_arch() has to * register free RAM areas explicitly. */mapsize = bootmap_bytes(end - start);memset(bdata->node_bootmem_map, 0xff, mapsize);/*将位图全部置1,保留所有页*/bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n",bdata - bootmem_node_data, start, mapstart, end, mapsize);return mapsize;/*返回位图大小*/}

 

我们可以看到在init_bootmem_core()中,主要的工作就是初始化bdata中的变量,以及将位图全部置1,这些参数的确定是在前面列举的函数中完成的。

 

 

2.bootmem allocator保留内存和释放内存

           保留内存和释放内存是两个相对的概念,bootmem allocator分配出去的内存的会被标记为保留状态,也就是对应的位图区域都为1,这些内存在bootmem allocator销毁后是不会被buddy系统接管的,而释放内存很好理解,就是将相应的页面置于空闲状态,这些页面可以被bootmem allocator分配,空闲的页面在bootmem allocator销毁后会被buddy系统接管。

       先来看看保留内存的处理,调用reserve_bootmem_node()函数可以将指定节点中的指定范围页面置为保留状态

int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, unsigned long size, int flags){unsigned long start, end;start = PFN_DOWN(physaddr);     /*获得起始页框*/ end = PFN_UP(physaddr + size);  /*获得终止页框*/return mark_bootmem_node(pgdat->bdata, start, end, 1, flags);}

 

下面来看核心函数mark_bootmem_node()

static int __init mark_bootmem_node(bootmem_data_t *bdata,unsigned long start, unsigned long end,int reserve, int flags){unsigned long sidx, eidx;bdebug("nid=%td start=%lx end=%lx reserve=%d flags=%x\n",bdata - bootmem_node_data, start, end, reserve, flags);/*条件判断*/BUG_ON(start < bdata->node_min_pfn);BUG_ON(end > bdata->node_low_pfn);/*计算出start index,end index,即start和end相对于节点最小页框号的偏移量*/sidx = start - bdata->node_min_pfn;eidx = end - bdata->node_min_pfn;if (reserve) /*如果选择保留页框*/return __reserve(bdata, sidx, eidx, flags);else    /*选择释放页框*/__free(bdata, sidx, eidx);return 0;}

 

再看__reserve()

static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx,unsigned long eidx, int flags){unsigned long idx;int exclusive = flags & BOOTMEM_EXCLUSIVE;bdebug("nid=%td start=%lx end=%lx flags=%x\n",bdata - bootmem_node_data,sidx + bdata->node_min_pfn,eidx + bdata->node_min_pfn,flags);for (idx = sidx; idx < eidx; idx++)/*遍历sidx-->eidx的页框对应的位图区域*/if (test_and_set_bit(idx, bdata->node_bootmem_map)) {/*把位图的相关位置1*/if (exclusive) {__free(bdata, sidx, idx);return -EBUSY;}bdebug("silent double reserve of PFN %lx\n",idx + bdata->node_min_pfn);}return 0;}

 

可以看到,保留页面的关键操作就是调用test_and_set_bit()将位图的相关区域置1.

 

          释放内存和保留内存的过程基本相同,只不过传递给mark_bootmem_node()的reserve参数为0,表示释放相应页面,因此在mark_bootmem_node()中会调用__free()

static void __init __free(bootmem_data_t *bdata,unsigned long sidx, unsigned long eidx){unsigned long idx;bdebug("nid=%td start=%lx end=%lx\n", bdata - bootmem_node_data,sidx + bdata->node_min_pfn,eidx + bdata->node_min_pfn);if (bdata->hint_idx > sidx)bdata->hint_idx = sidx;/*保证hint_idx指向最低的空闲页*/for (idx = sidx; idx < eidx; idx++)/*遍历相关的位图区域*/if (!test_and_clear_bit(idx, bdata->node_bootmem_map))/*清零*/BUG();}

__free()相较__reserve()多了一处对bdata->hint_idx的操作,这个地方是为了保证hint_idx指向最低的空闲页,因为在进行分配的时候,boot allocator是保证从最低的空闲页开始分配

 

 

3.bootmem allocator分配内存

         bootmem allocator分配内存相对于前面的操作来说要复杂一些,这里面主要考虑的一个问题就是内存碎片。设我们的页面大小为4KB,假如我们上一次分配内存的范围是从第4个页面开始到第8个页面的2KB处,而这次要求分配的起始地址处于第九个页面,如果从第九个页面开始分配的话,那么至少会产生2KB的内存碎片,这样无疑会产生大量的浪费。这也是为什么我们之前介绍的bootmem关键数据结构中引入last_end_off这个变量,它记录了上次分配的末端地址离页尾的偏移,在我们这个例子中该值为2KB,那么如果这次我们从第9个页面开始分配,我们就要考虑将这2KB整合到这次分配中去。

       分配内存的核心函数是alloc_bootmem_core(),具体代码如下:

static void * __init alloc_bootmem_core(struct bootmem_data *bdata,unsigned long size, unsigned long align,unsigned long goal, unsigned long limit){unsigned long fallback = 0;unsigned long min, max, start, sidx, midx, step;bdebug("nid=%td size=%lx [%lu pages] align=%lx goal=%lx limit=%lx\n",bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT,align, goal, limit);BUG_ON(!size);                        /*检测size*/BUG_ON(align & (align - 1));          /*检测对齐数是否为2的指数幂*/BUG_ON(limit && goal + size > limit); /*如果limit不为0则检测goal+size是否超过limit*/if (!bdata->node_bootmem_map)return NULL;/*得到该节点的最小最大低端内存页框号*/min = bdata->node_min_pfn;max = bdata->node_low_pfn;/*将goal和limit从地址转化为页框号*/goal >>= PAGE_SHIFT;limit >>= PAGE_SHIFT;if (limit && max > limit)max = limit;if (max <= min)return NULL;/*设定步进,以页面为单位*/step = max(align >> PAGE_SHIFT, 1UL);/*确定起始页框*/if (goal && min < goal && goal < max)start = ALIGN(goal, step);elsestart = ALIGN(min, step);/*确定起始页框和最大页框的偏移量*/sidx = start - bdata->node_min_pfn;midx = max - bdata->node_min_pfn;if (bdata->hint_idx > sidx) { /*sidx小于hint_idx的话则要下调至hint_idx对齐后的结果*//* * Handle the valid case of sidx being zero and still * catch the fallback below. */fallback = sidx + 1;sidx = align_idx(bdata, bdata->hint_idx, step);}while (1) {int merge;void *region;unsigned long eidx, i, start_off, end_off;find_block:sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx); /*找到下一个0位作为起始地址*/sidx = align_idx(bdata, sidx, step);                            /*按step进行对齐*/eidx = sidx + PFN_UP(size);if (sidx >= midx || eidx > midx)break;for (i = sidx; i < eidx; i++)if (test_bit(i, bdata->node_bootmem_map)) { /*遇到了保留位,则表明无法找到一块连续的空闲区域*/sidx = align_idx(bdata, i, step);  /*调整sidx*/if (sidx == i)sidx += step;goto find_block;                   /*重新开始检索bitmap*/}        /*如果 1.上次分配的PAGE还有剩余的空间       2.PAGE_SIZE-1>0       3.上次分配的PAGE是在这次要求分配的PAGE的相邻并在前面*/if (bdata->last_end_off & (PAGE_SIZE - 1) &&PFN_DOWN(bdata->last_end_off) + 1 == sidx)start_off = align_off(bdata, bdata->last_end_off, align);/*start_off从上次的PAGE剩余处开始,取对齐后的结果,将上次分配的页面剩余的部分整合到这次分配的内存中来*/elsestart_off = PFN_PHYS(sidx);/*不满足上述条件,则从要求的起始PAGE开始*/merge = PFN_DOWN(start_off) < sidx; /*确定merge的值为0或1*/end_off = start_off + size;/*重新确定last_end_off和hint_idx*/bdata->last_end_off = end_off;bdata->hint_idx = PFN_UP(end_off);/* * Reserve the area now: */if (__reserve(bdata, PFN_DOWN(start_off) + merge, /*保留相关的区域*/PFN_UP(end_off), BOOTMEM_EXCLUSIVE))BUG();region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) + /*得到起始地址的虚拟地址*/start_off);memset(region, 0, size);/*将申请到的区域清空*//* * The min_count is set to 0 so that bootmem allocated blocks * are never reported as leaks. */kmemleak_alloc(region, size, 0, 0);return region;}if (fallback) {sidx = align_idx(bdata, fallback - 1, step);fallback = 0;goto find_block;}return NULL;}


 

4.bootmem allocator的销毁

           bootmem allocator销毁后,其空闲的内存将交由buddy system接管,核心函数为free_all_bootmem_core()

static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata){int aligned;struct page *page;unsigned long start, end, pages, count = 0;if (!bdata->node_bootmem_map)/*bitmap不存在,表示该节点已经释放*/return 0;/*获得低端内存的起始页框和终止页框*/start = bdata->node_min_pfn;end = bdata->node_low_pfn;/* * If the start is aligned to the machines wordsize, we might * be able to free pages in bulks of that order. */aligned = !(start & (BITS_PER_LONG - 1));/*得到start是否为2的指数幂*/bdebug("nid=%td start=%lx end=%lx aligned=%d\n",bdata - bootmem_node_data, start, end, aligned);/**************************************         第一步:释放空闲页          **************************************/while (start < end) {unsigned long *map, idx, vec;map = bdata->node_bootmem_map;idx = start - bdata->node_min_pfn;vec = ~map[idx / BITS_PER_LONG];/*将idx所处的long字段的位图部分进行取反*//*如果:1.起始地址是2的整数幂       2.该long字段的位图全为0,即空闲状态       3.start+BITS_PER_LONG未超过范围*/if (aligned && vec == ~0UL && start + BITS_PER_LONG < end) {int order = ilog2(BITS_PER_LONG);/*得到Long的长度为2的多少次幂*/__free_pages_bootmem(pfn_to_page(start), order);/*直接将整块内存释放*/count += BITS_PER_LONG;} else {/*否则只能逐页释放*/unsigned long off = 0;while (vec && off < BITS_PER_LONG) {/*判断该字段内的空闲页是否已经释放完*/if (vec & 1) { /*vec的最低位为1,也就是说start+off对应的page为空闲*/page = pfn_to_page(start + off);__free_pages_bootmem(page, 0);count++;}vec >>= 1;off++;}}start += BITS_PER_LONG;}/******************************  第二步:释放保存bitmap的页 *******************************/page = virt_to_page(bdata->node_bootmem_map);/*得到bitmap起始地址的所属页*/pages = bdata->node_low_pfn - bdata->node_min_pfn;pages = bootmem_bootmap_pages(pages);/*得到bitmap的大小,以页为单位*/count += pages;while (pages--)/*逐页释放*/__free_pages_bootmem(page++, 0);bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count);return count;/*返回释放的页框数*/}


 

	
				
		
原创粉丝点击