memory block

来源:互联网 发布:手机淘宝购物教程 编辑:程序博客网 时间:2024/06/06 04:51


日期内核版本架构作者GitHubCSDN2017-07-017Linux-4.12X86lwhuqLinuxMemoryStudyLinux内存管理

1 Introduction

  在Linux内核早期启动阶段,在Linux的内存管理模块还没有初始化完成之前,内核也需要提供简化的内存管理模块来满足内存分配请求。早期的内核中负责初始化阶段的内存分配器称为引导内存分配器(bootmem分配器)。bootmem分配器基于最先适配(first-first)分配器的原理(这儿是很多系统的内存分配所使用的原理), 使用一个位图来管理页。最新的内核过渡到使用memblock,详见patch。

  Memoryblock和bootmem这两种机制对提供的API是一致的,因此对用户是透明的。内核中可以通过编译选项CONFIG_NO_BOOTMEM来选择使用哪一种机制,定义在mm/Makefile#L46

ifdef CONFIG_NO_BOOTMEMobj-y+= nobootmem.oelseobj-y+= bootmem.oendif

2 Data structure

  Memoryblock的所有数据结构定义在include/linux/memblock.h。

  第一个数据结构的名字是memblock,定义在include/linux/memblock.h#L48

struct memblock {bool bottom_up;  /* is bottom up direction? 如果true,从下往上分配内存 */phys_addr_t current_limit; /* memory block的大小限制 *//* 三种不同内存类型:内存,预留,物理 */struct memblock_type memory;struct memblock_type reserved;#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAPstruct memblock_type physmem;#endif};
  第二个数据结构是memblock_type,定义在include/linux/memblock.h#L40
struct memblock_type {unsigned long cnt;/* number of regions 内存区域的数目*/unsigned long max;/* size of the allocated array 已经分配的内存区域大小*/phys_addr_t total_size;/* size of all regions 所有内存区域的大小*/struct memblock_region *regions; /* 指针指向memblock_region结构体 */char *name; /* 名字 */};

  memblock_region结构用于描述memory region,定义在include/linux/memblock.h#L31

struct memblock_region {phys_addr_t base;phys_addr_t size;unsigned long flags;#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAPint nid;#endif};
  memoryblock_region记录了当前memory region的起始地址,大小,标志和Node ID。标志的定义在include/linux/memblock.h#L24

/* Definition of memblock flags. */enum {MEMBLOCK_NONE= 0x0,/* No special request */MEMBLOCK_HOTPLUG= 0x1,/* hotpluggable region */MEMBLOCK_MIRROR= 0x2,/* mirrored region */MEMBLOCK_NOMAP= 0x4,/* don't add to kernel direct mapping */};


  总结来说,整个memory block的data structure布局如下

3 Memblock 初始化

Memblock结构的实例是一个同名全局静态变量,定义在mm/memblock.c#L34

static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAPstatic struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock;#endifstruct memblock memblock __initdata_memblock = {.memory.regions= memblock_memory_init_regions,.memory.cnt= 1,/* empty dummy entry */.memory.max= INIT_MEMBLOCK_REGIONS,.memory.name= "memory",.reserved.regions= memblock_reserved_init_regions,.reserved.cnt= 1,/* empty dummy entry */.reserved.max= INIT_MEMBLOCK_REGIONS,.reserved.name= "reserved",#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP.physmem.regions= memblock_physmem_init_regions,.physmem.cnt= 1,/* empty dummy entry */.physmem.max= INIT_PHYSMEM_REGIONS,.physmem.name= "physmem",#endif.bottom_up= false,.current_limit= MEMBLOCK_ALLOC_ANYWHERE,};

  • __initdata_memblock指定了结构储存位置,如果定义了CONFIG_ARCH_DISCARD_MEMBLOCK,则存放在__meminitdata
  • 每中memory type的cnt字段都初始化为1
  • 每种memory type的regions都指向全局静态数组。数组单元个数,memory和reserved初始化为INIT_MEMBLOCK_REGIONS,physical memory初始化为INIT_PHYSMEM_REGIONS。因此max字段也初始化同样的值
#define INIT_MEMBLOCK_REGIONS128#define INIT_PHYSMEM_REGIONS4
  • buttom_up被初始化为false,说明内存分配是从高到低
  • current_limit被初始化为MEMBLOCK_ALLOC_ANYWHERE,可访问最高地址空间。
#define MEMBLOCK_ALLOC_ANYWHERE(~(phys_addr_t)0)

4 Memblock APIs  

4.1 Add


  在bootmem.h中的相关APIs


4.1.1 memblock_add_range

定义在mm/memblock.c#L496

/** * memblock_add_range - add new memblock region * @type: memblock type to add new region into * @base: base address of the new region * @size: size of the new region * @nid: nid of the new region * @flags: flags of the new region * * Add new memblock region [@base,@base+@size) into @type.  The new region * is allowed to overlap with existing ones - overlaps don't affect already * existing regions.  @type is guaranteed to be minimal (all neighbouring * compatible regions are merged) after the addition. * * RETURNS: * 0 on success, -errno on failure. */int __init_memblock memblock_add_range(struct memblock_type *type,phys_addr_t base, phys_addr_t size,int nid, unsigned long flags){bool insert = false;phys_addr_t obase = base;phys_addr_t end = base + memblock_cap_size(base, &size);int idx, nr_new;struct memblock_region *rgn;if (!size)return 0;/* special case for empty array */if (type->regions[0].size == 0) {WARN_ON(type->cnt != 1 || type->total_size);type->regions[0].base = base;type->regions[0].size = size;type->regions[0].flags = flags;memblock_set_region_node(&type->regions[0], nid);type->total_size = size;return 0;}repeat:/* * The following is executed twice.  Once with %false @insert and * then with %true.  The first counts the number of regions needed * to accommodate the new area.  The second actually inserts them. */base = obase;nr_new = 0;for_each_memblock_type(type, rgn) {phys_addr_t rbase = rgn->base;phys_addr_t rend = rbase + rgn->size;if (rbase >= end)break;if (rend <= base)continue;/* * @rgn overlaps.  If it separates the lower part of new * area, insert that portion. */if (rbase > base) {#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAPWARN_ON(nid != memblock_get_region_node(rgn));#endifWARN_ON(flags != rgn->flags);nr_new++;if (insert)memblock_insert_region(type, idx++, base,       rbase - base, nid,       flags);}/* area below @rend is dealt with, forget about it */base = min(rend, end);}/* insert the remaining portion */if (base < end) {nr_new++;if (insert)memblock_insert_region(type, idx, base, end - base,       nid, flags);}if (!nr_new)return 0;/* * If this was the first round, resize array and repeat for actual * insertions; otherwise, merge and return. */if (!insert) {while (type->cnt + nr_new > type->max)if (memblock_double_array(type, obase, size) < 0)return -ENOMEM;insert = true;goto repeat;} else {memblock_merge_regions(type);return 0;}}

  • 第一次循环检查是否有region的overlap。并且检查memory type存放的memory region实例个数type->max是否足够容纳新增的region。不够的话就调用memblock_double_array扩容。如果有需要添加的region就设置insert = true。最后goto到repeat执行第二次循环
  • 第二次循环中,执行insert == true代码块,调用memblock_insert_region插入region,最后调用memblock_merge_regions合并相邻region。
  • idx没有初始化,从默认值0开始?

4.2 Free and remove


4.3 Allocate


  • memory allocate就是把内存范围添加到memory reserved region

5 memblock初始化

  X86_64结构内核从E820和EFI memmap得到boot内存信息,随后根据boot内存信息建立memory block结构。具体实现在setup_arch函数,定义在arch/x86/kernel/setup.c#L848

void __init setup_arch(char **cmdline_p){memblock_reserve(__pa_symbol(_text), (unsigned long)__bss_stop - (unsigned long)_text);#ifdef CONFIG_EFIif (efi_enabled(EFI_BOOT))efi_memblock_x86_reserve_range();#endif#ifdef CONFIG_MEMORY_HOTPLUG/* * Memory used by the kernel cannot be hot-removed because Linux * cannot migrate the kernel pages. When memory hotplug is * enabled, we should prevent memblock from allocating memory * for the kernel. * * ACPI SRAT records all hotpluggable memory ranges. But before * SRAT is parsed, we don't know about it. * * The kernel image is loaded into memory at very early time. We * cannot prevent this anyway. So on NUMA system, we set any * node the kernel resides in as un-hotpluggable. * * Since on modern servers, one node could have double-digit * gigabytes memory, we can assume the memory around the kernel * image is also un-hotpluggable. So before SRAT is parsed, just * allocate memory near the kernel image to try the best to keep * the kernel away from hotpluggable memory. */if (movable_node_is_enabled())memblock_set_bottom_up(true);#endif/* after early param, so could get panic from serial */memblock_x86_reserve_range_setup_data();/* * Need to conclude brk, before e820__memblock_setup() *  it could use memblock_find_in_range, could overlap with *  brk area. */reserve_brk();cleanup_highmap();memblock_set_current_limit(ISA_END_ADDRESS);e820__memblock_setup();}

  • 最后的e820_memblock_setup()真正完成memory block的添加初始化工作。在此之前的函数都只是调用memblock_reserve从reserve内存申请

6 Reference

  • Linux kernel memory management Part 1 
  • 启动期间的内存管理之memblock分配器--Linux内存管理(十一)


原创粉丝点击