内存_预留内存与连续内存分配
来源:互联网 发布:东华软件股份有限公司 编辑:程序博客网 时间:2024/05/19 01:31
预留内存
宋宝华
在我们使用ARM等嵌入式Linux系统的时候,一个头疼的问题是GPU,Camera,HDMI等都需要预留大量连续内存,这部分内存平时不用,但是一般的做法又必须先预留着。目前,Marek Szyprowski和Michal Nazarewicz实现了一套全新的Contiguous Memory Allocator。通过这套机制,我们可以做到不预留内存,这些内存平时是可用的,只有当需要的时候才被分配给Camera,HDMI等设备。下面分析它的基本代码流程。
声明连续内存
内核启动过程中arch/arm/mm/init.c中的arm_memblock_init()会调用dma_contiguous_reserve(min(arm_dma_limit, arm_lowmem_limit));
该函数位于:drivers/base/dma-contiguous.c
- /**
- * dma_contiguous_reserve() - reserve area for contiguous memory handling
- * @limit: End address of the reserved memory (optional, 0 for any).
- *
- * This function reserves memory from early allocator. It should be
- * called by arch specific code once the early allocator (memblock or bootmem)
- * has been activated and all other subsystems have already allocated/reserved
- * memory.
- */
- void __init dma_contiguous_reserve(phys_addr_t limit)
- {
- unsigned long selected_size = 0;
- pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit);
- if (size_cmdline != -1) {
- selected_size = size_cmdline;
- } else {
- #ifdef CONFIG_CMA_SIZE_SEL_MBYTES
- selected_size = size_bytes;
- #elif defined(CONFIG_CMA_SIZE_SEL_PERCENTAGE)
- selected_size = cma_early_percent_memory();
- #elif defined(CONFIG_CMA_SIZE_SEL_MIN)
- selected_size = min(size_bytes, cma_early_percent_memory());
- #elif defined(CONFIG_CMA_SIZE_SEL_MAX)
- selected_size = max(size_bytes, cma_early_percent_memory());
- #endif
- }
- if (selected_size) {
- pr_debug("%s: reserving %ld MiB for global area\n", __func__,
- selected_size / SZ_1M);
- dma_declare_contiguous(NULL, selected_size, 0, limit);
- }
- };
/** * dma_contiguous_reserve() - reserve area for contiguous memory handling * @limit: End address of the reserved memory (optional, 0 for any). * * This function reserves memory from early allocator. It should be * called by arch specific code once the early allocator (memblock or bootmem) * has been activated and all other subsystems have already allocated/reserved * memory. */void __init dma_contiguous_reserve(phys_addr_t limit){ unsigned long selected_size = 0; pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit); if (size_cmdline != -1) { selected_size = size_cmdline; } else {#ifdef CONFIG_CMA_SIZE_SEL_MBYTES selected_size = size_bytes;#elif defined(CONFIG_CMA_SIZE_SEL_PERCENTAGE) selected_size = cma_early_percent_memory();#elif defined(CONFIG_CMA_SIZE_SEL_MIN) selected_size = min(size_bytes, cma_early_percent_memory());#elif defined(CONFIG_CMA_SIZE_SEL_MAX) selected_size = max(size_bytes, cma_early_percent_memory());#endif } if (selected_size) { pr_debug("%s: reserving %ld MiB for global area\n", __func__, selected_size / SZ_1M); dma_declare_contiguous(NULL, selected_size, 0, limit); } };
其中的size_bytes定义为:
static const unsigned long size_bytes = CMA_SIZE_MBYTES * SZ_1M; 默认情况下,CMA_SIZE_MBYTES会被定义为16MB,来源于CONFIG_CMA_SIZE_MBYTES=16
->- int __init dma_declare_contiguous(struct device *dev, unsigned long size,
- phys_addr_t base, phys_addr_t limit)
- {
- ...
- /* Reserve memory */
- if (base) {
- if (memblock_is_region_reserved(base, size) ||
- memblock_reserve(base, size) < 0) {
- base = -EBUSY;
- goto err;
- }
- } else {
- /*
- * Use __memblock_alloc_base() since
- * memblock_alloc_base() panic()s.
- */
- phys_addr_t addr = __memblock_alloc_base(size, alignment, limit);
- if (!addr) {
- base = -ENOMEM;
- goto err;
- } else if (addr + size > ~(unsigned long)0) {
- memblock_free(addr, size);
- base = -EINVAL;
- base = -EINVAL;
- goto err;
- } else {
- base = addr;
- }
- }
- /*
- * Each reserved area must be initialised later, when more kernel
- * subsystems (like slab allocator) are available.
- */
- r->start = base;
- r->size = size;
- r->dev = dev;
- cma_reserved_count++;
- pr_info("CMA: reserved %ld MiB at %08lx\n", size / SZ_1M,
- (unsigned long)base);
- /* Architecture specific contiguous memory fixup. */
- dma_contiguous_early_fixup(base, size);
- return 0;
- err:
- pr_err("CMA: failed to reserve %ld MiB\n", size / SZ_1M);
- return base;
- }
int __init dma_declare_contiguous(struct device *dev, unsigned long size, phys_addr_t base, phys_addr_t limit){ ... /* Reserve memory */ if (base) { if (memblock_is_region_reserved(base, size) || memblock_reserve(base, size) < 0) { base = -EBUSY; goto err; } } else { /* * Use __memblock_alloc_base() since * memblock_alloc_base() panic()s. */ phys_addr_t addr = __memblock_alloc_base(size, alignment, limit); if (!addr) { base = -ENOMEM; goto err; } else if (addr + size > ~(unsigned long)0) { memblock_free(addr, size); base = -EINVAL; base = -EINVAL; goto err; } else { base = addr; } } /* * Each reserved area must be initialised later, when more kernel * subsystems (like slab allocator) are available. */ r->start = base; r->size = size; r->dev = dev; cma_reserved_count++; pr_info("CMA: reserved %ld MiB at %08lx\n", size / SZ_1M, (unsigned long)base); /* Architecture specific contiguous memory fixup. */ dma_contiguous_early_fixup(base, size); return 0;err: pr_err("CMA: failed to reserve %ld MiB\n", size / SZ_1M); return base;}
由此可见,连续内存区域也是在内核启动的早期,通过__memblock_alloc_base()拿到的。
另外:
drivers/base/dma-contiguous.c里面的core_initcall()会导致cma_init_reserved_areas()被调用:
- static int __init cma_init_reserved_areas(void)
- {
- struct cma_reserved *r = cma_reserved;
- unsigned i = cma_reserved_count;
- pr_debug("%s()\n", __func__);
- for (; i; --i, ++r) {
- struct cma *cma;
- cma = cma_create_area(PFN_DOWN(r->start),
- r->size >> PAGE_SHIFT);
- if (!IS_ERR(cma))
- dev_set_cma_area(r->dev, cma);
- }
- return 0;
- }
- core_initcall(cma_init_reserved_areas);
static int __init cma_init_reserved_areas(void){ struct cma_reserved *r = cma_reserved; unsigned i = cma_reserved_count; pr_debug("%s()\n", __func__); for (; i; --i, ++r) { struct cma *cma; cma = cma_create_area(PFN_DOWN(r->start), r->size >> PAGE_SHIFT); if (!IS_ERR(cma)) dev_set_cma_area(r->dev, cma); } return 0;}core_initcall(cma_init_reserved_areas);
cma_create_area()会调用cma_activate_area(),cma_activate_area()函数则会针对每个page调用:
init_cma_reserved_pageblock(pfn_to_page(base_pfn));
这个函数则会通过set_pageblock_migratetype(page, MIGRATE_CMA)将页设置为MIGRATE_CMA类型的:
- #ifdef CONFIG_CMA
- /* Free whole pageblock and set it's migration type to MIGRATE_CMA. */
- void __init init_cma_reserved_pageblock(struct page *page)
- {
- unsigned i = pageblock_nr_pages;
- struct page *p = page;
- do {
- __ClearPageReserved(p);
- set_page_count(p, 0);
- } while (++p, --i);
- set_page_refcounted(page);
- set_pageblock_migratetype(page, MIGRATE_CMA);
- __free_pages(page, pageblock_order);
- totalram_pages += pageblock_nr_pages;
- }
- #endif
#ifdef CONFIG_CMA/* Free whole pageblock and set it's migration type to MIGRATE_CMA. */void __init init_cma_reserved_pageblock(struct page *page){ unsigned i = pageblock_nr_pages; struct page *p = page; do { __ClearPageReserved(p); set_page_count(p, 0); } while (++p, --i); set_page_refcounted(page); set_pageblock_migratetype(page, MIGRATE_CMA); __free_pages(page, pageblock_order); totalram_pages += pageblock_nr_pages;} #endif同时其中调用的__free_pages(page, pageblock_order);最终会调用到__free_one_page(page, zone, order, migratetype);
相关的page会被加到MIGRATE_CMA的free_list上面去:
list_add(&page->lru, &zone->free_area[order].free_list[migratetype]);
申请连续内存
申请连续内存仍然使用标准的arch/arm/mm/dma-mapping.c中定义的dma_alloc_coherent()和dma_alloc_writecombine(),这二者会间接调用drivers/base/dma-contiguous.c中的
- struct page *dma_alloc_from_contiguous(struct device *dev, int count,
- unsigned int align)
struct page *dma_alloc_from_contiguous(struct device *dev, int count, unsigned int align)
->
- struct page *dma_alloc_from_contiguous(struct device *dev, int count,
- unsigned int align)
- {
- ...
- for (;;) {
- pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count,
- start, count, mask);
- if (pageno >= cma->count) {
- ret = -ENOMEM;
- goto error;
- }
- pfn = cma->base_pfn + pageno;
- ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
- if (ret == 0) {
- bitmap_set(cma->bitmap, pageno, count);
- break;
- } else if (ret != -EBUSY) {
- goto error;
- }
- pr_debug("%s(): memory range at %p is busy, retrying\n",
- __func__, pfn_to_page(pfn));
- /* try again with a bit different memory target */
- start = pageno + mask + 1;
- }
- ...
- }
struct page *dma_alloc_from_contiguous(struct device *dev, int count, unsigned int align){ ... for (;;) { pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count, start, count, mask); if (pageno >= cma->count) { ret = -ENOMEM; goto error; } pfn = cma->base_pfn + pageno; ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA); if (ret == 0) { bitmap_set(cma->bitmap, pageno, count); break; } else if (ret != -EBUSY) { goto error; } pr_debug("%s(): memory range at %p is busy, retrying\n", __func__, pfn_to_page(pfn)); /* try again with a bit different memory target */ start = pageno + mask + 1; } ...}->
int alloc_contig_range(unsigned long start, unsigned long end,
unsigned migratetype)需要隔离page,隔离page的作用通过代码的注释可以体现:
- /*
- * What we do here is we mark all pageblocks in range as
- * MIGRATE_ISOLATE. Because of the way page allocator work, we
- * align the range to MAX_ORDER pages so that page allocator
- * won't try to merge buddies from different pageblocks and
- * change MIGRATE_ISOLATE to some other migration type.
- *
- * Once the pageblocks are marked as MIGRATE_ISOLATE, we
- * migrate the pages from an unaligned range (ie. pages that
- * we are interested in). This will put all the pages in
- * range back to page allocator as MIGRATE_ISOLATE.
- *
- * When this is done, we take the pages in range from page
- * allocator removing them from the buddy system. This way
- * page allocator will never consider using them.
- *
- * This lets us mark the pageblocks back as
- * MIGRATE_CMA/MIGRATE_MOVABLE so that free pages in the
- * MAX_ORDER aligned range but not in the unaligned, original
- * range are put back to page allocator so that buddy can use
- * them.
- */
- ret = start_isolate_page_range(pfn_align_to_maxpage_down(start),
- pfn_align_to_maxpage_up(end),
- migratetype);
/* * What we do here is we mark all pageblocks in range as * MIGRATE_ISOLATE. Because of the way page allocator work, we * align the range to MAX_ORDER pages so that page allocator * won't try to merge buddies from different pageblocks and * change MIGRATE_ISOLATE to some other migration type. * * Once the pageblocks are marked as MIGRATE_ISOLATE, we * migrate the pages from an unaligned range (ie. pages that * we are interested in). This will put all the pages in * range back to page allocator as MIGRATE_ISOLATE. * * When this is done, we take the pages in range from page * allocator removing them from the buddy system. This way * page allocator will never consider using them. * * This lets us mark the pageblocks back as * MIGRATE_CMA/MIGRATE_MOVABLE so that free pages in the * MAX_ORDER aligned range but not in the unaligned, original * range are put back to page allocator so that buddy can use * them. */ ret = start_isolate_page_range(pfn_align_to_maxpage_down(start), pfn_align_to_maxpage_up(end), migratetype);
简单地说,就是把相关的page标记为MIGRATE_ISOLATE,这样buddy系统就不会再使用他们。
- /*
- * start_isolate_page_range() -- make page-allocation-type of range of pages
- * to be MIGRATE_ISOLATE.
- * @start_pfn: The lower PFN of the range to be isolated.
- * @end_pfn: The upper PFN of the range to be isolated.
- * @migratetype: migrate type to set in error recovery.
- *
- * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
- * the range will never be allocated. Any free pages and pages freed in the
- * future will not be allocated again.
- *
- * start_pfn/end_pfn must be aligned to pageblock_order.
- * Returns 0 on success and -EBUSY if any part of range cannot be isolated.
- */
- int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
- unsigned migratetype)
- {
- unsigned long pfn;
- unsigned long undo_pfn;
- struct page *page;
- BUG_ON((start_pfn) & (pageblock_nr_pages - 1));
- BUG_ON((end_pfn) & (pageblock_nr_pages - 1));
- for (pfn = start_pfn;
- pfn < end_pfn;
- pfn += pageblock_nr_pages) {
- page = __first_valid_page(pfn, pageblock_nr_pages);
- if (page && set_migratetype_isolate(page)) {
- undo_pfn = pfn;
- goto undo;
- }
- }
- return 0;
- undo:
- for (pfn = start_pfn;
- pfn < undo_pfn;
- pfn += pageblock_nr_pages)
- unset_migratetype_isolate(pfn_to_page(pfn), migratetype);
- return -EBUSY;
- }
/* * start_isolate_page_range() -- make page-allocation-type of range of pages * to be MIGRATE_ISOLATE. * @start_pfn: The lower PFN of the range to be isolated. * @end_pfn: The upper PFN of the range to be isolated. * @migratetype: migrate type to set in error recovery. * * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in * the range will never be allocated. Any free pages and pages freed in the * future will not be allocated again. * * start_pfn/end_pfn must be aligned to pageblock_order. * Returns 0 on success and -EBUSY if any part of range cannot be isolated. */int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, unsigned migratetype){ unsigned long pfn; unsigned long undo_pfn; struct page *page; BUG_ON((start_pfn) & (pageblock_nr_pages - 1)); BUG_ON((end_pfn) & (pageblock_nr_pages - 1)); for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { page = __first_valid_page(pfn, pageblock_nr_pages); if (page && set_migratetype_isolate(page)) { undo_pfn = pfn; goto undo; } } return 0;undo: for (pfn = start_pfn; pfn < undo_pfn; pfn += pageblock_nr_pages) unset_migratetype_isolate(pfn_to_page(pfn), migratetype); return -EBUSY;}
接下来调用__alloc_contig_migrate_range()进行页面隔离和迁移:
- static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)
- {
- /* This function is based on compact_zone() from compaction.c. */
- unsigned long pfn = start;
- unsigned int tries = 0;
- int ret = 0;
- struct compact_control cc = {
- .nr_migratepages = 0,
- .order = -1,
- .zone = page_zone(pfn_to_page(start)),
- .sync = true,
- };
- INIT_LIST_HEAD(&cc.migratepages);
- migrate_prep_local();
- while (pfn < end || !list_empty(&cc.migratepages)) {
- if (fatal_signal_pending(current)) {
- ret = -EINTR;
- break;
- }
- if (list_empty(&cc.migratepages)) {
- cc.nr_migratepages = 0;
- pfn = isolate_migratepages_range(cc.zone, &cc,
- pfn, end);
- if (!pfn) {
- ret = -EINTR;
- break;
- }
- tries = 0;
- } else if (++tries == 5) {
- ret = ret < 0 ? ret : -EBUSY;
- break;
- }
- ret = migrate_pages(&cc.migratepages,
- __alloc_contig_migrate_alloc,
- 0, false, true);
- }
- putback_lru_pages(&cc.migratepages);
- return ret > 0 ? 0 : ret;
- }
static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) { /* This function is based on compact_zone() from compaction.c. */ unsigned long pfn = start; unsigned int tries = 0; int ret = 0; struct compact_control cc = { .nr_migratepages = 0, .order = -1, .zone = page_zone(pfn_to_page(start)), .sync = true, }; INIT_LIST_HEAD(&cc.migratepages); migrate_prep_local(); while (pfn < end || !list_empty(&cc.migratepages)) { if (fatal_signal_pending(current)) { ret = -EINTR; break; } if (list_empty(&cc.migratepages)) { cc.nr_migratepages = 0; pfn = isolate_migratepages_range(cc.zone, &cc, pfn, end); if (!pfn) { ret = -EINTR; break; } tries = 0; } else if (++tries == 5) { ret = ret < 0 ? ret : -EBUSY; break; } ret = migrate_pages(&cc.migratepages, __alloc_contig_migrate_alloc, 0, false, true); } putback_lru_pages(&cc.migratepages); return ret > 0 ? 0 : ret; }
其中的函数migrate_pages()会完成页面的迁移,迁移过程中通过传入的__alloc_contig_migrate_alloc()申请新的page,并将老的page付给新的page:
- int migrate_pages(struct list_head *from,
- new_page_t get_new_page, unsigned long private, bool offlining,
- bool sync)
- {
- int retry = 1;
- int nr_failed = 0;
- int pass = 0;
- struct page *page;
- struct page *page2;
- int swapwrite = current->flags & PF_SWAPWRITE;
- int rc;
- if (!swapwrite)
- current->flags |= PF_SWAPWRITE;
- for(pass = 0; pass < 10 && retry; pass++) {
- retry = 0;
- list_for_each_entry_safe(page, page2, from, lru) {
- cond_resched();
- rc = unmap_and_move(get_new_page, private,
- page, pass > 2, offlining,
- sync);
- switch(rc) {
- case -ENOMEM:
- goto out;
- case -EAGAIN:
- retry++;
- break;
- case 0:
- break;
- default:
- /* Permanent failure */
- nr_failed++;
- break;
- }
- }
- }
- rc = 0;
- ...
- }
int migrate_pages(struct list_head *from, new_page_t get_new_page, unsigned long private, bool offlining, bool sync){ int retry = 1; int nr_failed = 0; int pass = 0; struct page *page; struct page *page2; int swapwrite = current->flags & PF_SWAPWRITE; int rc; if (!swapwrite) current->flags |= PF_SWAPWRITE; for(pass = 0; pass < 10 && retry; pass++) { retry = 0; list_for_each_entry_safe(page, page2, from, lru) { cond_resched(); rc = unmap_and_move(get_new_page, private, page, pass > 2, offlining, sync); switch(rc) { case -ENOMEM: goto out; case -EAGAIN: retry++; break; case 0: break; default: /* Permanent failure */ nr_failed++; break; } } } rc = 0;...}其中的unmap_and_move()函数较为关键,它定义在mm/migrate.c中
- /*
- * Obtain the lock on page, remove all ptes and migrate the page
- * to the newly allocated page in newpage.
- */
- static int unmap_and_move(new_page_t get_new_page, unsigned long private,
- struct page *page, int force, bool offlining, bool sync)
- {
- int rc = 0;
- int *result = NULL;
- struct page *newpage = get_new_page(page, private, &result);
- int remap_swapcache = 1;
- int charge = 0;
- struct mem_cgroup *mem = NULL;
- struct anon_vma *anon_vma = NULL;
- ...
- /* charge against new page */
- charge = mem_cgroup_prepare_migration(page, newpage, &mem);
- ...
- if (PageWriteback(page)) {
- if (!force || !sync)
- goto uncharge;
- wait_on_page_writeback(page);
- }
- /*
- * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
- * we cannot notice that anon_vma is freed while we migrates a page.
- * This get_anon_vma() delays freeing anon_vma pointer until the end
- * of migration. File cache pages are no problem because of page_lock()
- * File Caches may use write_page() or lock_page() in migration, then,
- * just care Anon page here.
- */
- if (PageAnon(page)) {
- /*
- * Only page_lock_anon_vma() understands the subtleties of
- * getting a hold on an anon_vma from outside one of its mms.
- */
- anon_vma = page_lock_anon_vma(page);
- if (anon_vma) {
- /*
- * Take a reference count on the anon_vma if the
- * page is mapped so that it is guaranteed to
- * exist when the page is remapped later
- */
- get_anon_vma(anon_vma);
- page_unlock_anon_vma(anon_vma);
- } else if (PageSwapCache(page)) {
- /*
- * We cannot be sure that the anon_vma of an unmapped
- * swapcache page is safe to use because we don't
- * know in advance if the VMA that this page belonged
- * to still exists. If the VMA and others sharing the
- * data have been freed, then the anon_vma could
- * already be invalid.
- *
- * To avoid this possibility, swapcache pages get
- * migrated but are not remapped when migration
- * completes
- */
- remap_swapcache = 0;
- } else {
- goto uncharge;
- }
- }
- ...
- /* Establish migration ptes or remove ptes */
- try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
- skip_unmap:
- if (!page_mapped(page))
- rc = move_to_new_page(newpage, page, remap_swapcache);
- if (rc && remap_swapcache)
- remove_migration_ptes(page, page);
- /* Drop an anon_vma reference if we took one */
- if (anon_vma)
- drop_anon_vma(anon_vma);
- uncharge:
- if (!charge)
- mem_cgroup_end_migration(mem, page, newpage, rc == 0);
- unlock:
- unlock_page(page);
- move_newpage:
- ...
- }
/* * Obtain the lock on page, remove all ptes and migrate the page * to the newly allocated page in newpage. */static int unmap_and_move(new_page_t get_new_page, unsigned long private, struct page *page, int force, bool offlining, bool sync){ int rc = 0; int *result = NULL; struct page *newpage = get_new_page(page, private, &result); int remap_swapcache = 1; int charge = 0; struct mem_cgroup *mem = NULL; struct anon_vma *anon_vma = NULL; ... /* charge against new page */ charge = mem_cgroup_prepare_migration(page, newpage, &mem); ... if (PageWriteback(page)) { if (!force || !sync) goto uncharge; wait_on_page_writeback(page); } /* * By try_to_unmap(), page->mapcount goes down to 0 here. In this case, * we cannot notice that anon_vma is freed while we migrates a page. * This get_anon_vma() delays freeing anon_vma pointer until the end * of migration. File cache pages are no problem because of page_lock() * File Caches may use write_page() or lock_page() in migration, then, * just care Anon page here. */ if (PageAnon(page)) { /* * Only page_lock_anon_vma() understands the subtleties of * getting a hold on an anon_vma from outside one of its mms. */ anon_vma = page_lock_anon_vma(page); if (anon_vma) { /* * Take a reference count on the anon_vma if the * page is mapped so that it is guaranteed to * exist when the page is remapped later */ get_anon_vma(anon_vma); page_unlock_anon_vma(anon_vma); } else if (PageSwapCache(page)) { /* * We cannot be sure that the anon_vma of an unmapped * swapcache page is safe to use because we don't * know in advance if the VMA that this page belonged * to still exists. If the VMA and others sharing the * data have been freed, then the anon_vma could * already be invalid. * * To avoid this possibility, swapcache pages get * migrated but are not remapped when migration * completes */ remap_swapcache = 0; } else { goto uncharge; } } ... /* Establish migration ptes or remove ptes */ try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);skip_unmap: if (!page_mapped(page)) rc = move_to_new_page(newpage, page, remap_swapcache); if (rc && remap_swapcache) remove_migration_ptes(page, page); /* Drop an anon_vma reference if we took one */ if (anon_vma) drop_anon_vma(anon_vma);uncharge: if (!charge) mem_cgroup_end_migration(mem, page, newpage, rc == 0);unlock: unlock_page(page);move_newpage: ...}通过unmap_and_move(),老的page就被迁移过去新的page。
接下来要回收page,回收page的作用是,不至于因为拿了连续的内存后,系统变得内存饥饿:
->
- /*
- * Reclaim enough pages to make sure that contiguous allocation
- * will not starve the system.
- */
- __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start);
/* * Reclaim enough pages to make sure that contiguous allocation * will not starve the system. */ __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start);
->
- /*
- * Trigger memory pressure bump to reclaim some pages in order to be able to
- * allocate 'count' pages in single page units. Does similar work as
- *__alloc_pages_slowpath() function.
- */
- static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count)
- {
- enum zone_type high_zoneidx = gfp_zone(gfp_mask);
- struct zonelist *zonelist = node_zonelist(0, gfp_mask);
- int did_some_progress = 0;
- int order = 1;
- unsigned long watermark;
- /*
- * Increase level of watermarks to force kswapd do his job
- * to stabilise at new watermark level.
- */
- __update_cma_watermarks(zone, count);
- /* Obey watermarks as if the page was being allocated */
- watermark = low_wmark_pages(zone) + count;
- while (!zone_watermark_ok(zone, 0, watermark, 0, 0)) {
- wake_all_kswapd(order, zonelist, high_zoneidx, zone_idx(zone));
- did_some_progress = __perform_reclaim(gfp_mask, order, zonelist,
- NULL);
- if (!did_some_progress) {
- /* Exhausted what can be done so it's blamo time */
- out_of_memory(zonelist, gfp_mask, order, NULL);
- }
- }
- /* Restore original watermark levels. */
- __update_cma_watermarks(zone, -count);
- return count;
- }
/* * Trigger memory pressure bump to reclaim some pages in order to be able to * allocate 'count' pages in single page units. Does similar work as *__alloc_pages_slowpath() function. */static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count){ enum zone_type high_zoneidx = gfp_zone(gfp_mask); struct zonelist *zonelist = node_zonelist(0, gfp_mask); int did_some_progress = 0; int order = 1; unsigned long watermark; /* * Increase level of watermarks to force kswapd do his job * to stabilise at new watermark level. */ __update_cma_watermarks(zone, count); /* Obey watermarks as if the page was being allocated */ watermark = low_wmark_pages(zone) + count; while (!zone_watermark_ok(zone, 0, watermark, 0, 0)) { wake_all_kswapd(order, zonelist, high_zoneidx, zone_idx(zone)); did_some_progress = __perform_reclaim(gfp_mask, order, zonelist, NULL); if (!did_some_progress) { /* Exhausted what can be done so it's blamo time */ out_of_memory(zonelist, gfp_mask, order, NULL); } } /* Restore original watermark levels. */ __update_cma_watermarks(zone, -count); return count;}
释放连续内存
内存释放的时候也比较简单,直接就是:arch/arm/mm/dma-mapping.c:
- void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle)
void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle)->
arch/arm/mm/dma-mapping.c:
- static void __free_from_contiguous(struct device *dev, struct page *page,
- size_t size)
- {
- __dma_remap(page, size, pgprot_kernel);
- dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);
- }
static void __free_from_contiguous(struct device *dev, struct page *page, size_t size){ __dma_remap(page, size, pgprot_kernel); dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);}
->
- bool dma_release_from_contiguous(struct device *dev, struct page *pages,
- int count)
- {
- ...
- free_contig_range(pfn, count);
- ..
- }
bool dma_release_from_contiguous(struct device *dev, struct page *pages, int count){ ... free_contig_range(pfn, count); ..}
->
- void free_contig_range(unsigned long pfn, unsigned nr_pages)
- {
- for (; nr_pages--; ++pfn)
- __free_page(pfn_to_page(pfn));
- }
void free_contig_range(unsigned long pfn, unsigned nr_pages){ for (; nr_pages--; ++pfn) __free_page(pfn_to_page(pfn));}将page交还给buddy。
内核内存分配的migratetype
内核内存分配的时候,带的标志是GFP_,但是GFP_可以转化为migratetype:
- static inline int allocflags_to_migratetype(gfp_t gfp_flags)
- {
- WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
- if (unlikely(page_group_by_mobility_disabled))
- return MIGRATE_UNMOVABLE;
- /* Group based on mobility */
- return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) |
- ((gfp_flags & __GFP_RECLAIMABLE) != 0);
- }
static inline int allocflags_to_migratetype(gfp_t gfp_flags){ WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); if (unlikely(page_group_by_mobility_disabled)) return MIGRATE_UNMOVABLE; /* Group based on mobility */ return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) | ((gfp_flags & __GFP_RECLAIMABLE) != 0); }之后申请内存的时候,会对比迁移类型匹配的free_list:
- page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
- zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET,
- preferred_zone, migratetype);
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET, preferred_zone, migratetype);
另外,笔者也编写了一个测试程序,透过它随时测试CMA的功能:
- /*
- * kernel module helper for testing CMA
- *
- * Licensed under GPLv2 or later.
- */
- #include <linux/module.h>
- #include <linux/device.h>
- #include <linux/fs.h>
- #include <linux/miscdevice.h>
- #include <linux/dma-mapping.h>
- #define CMA_NUM 10
- static struct device *cma_dev;
- static dma_addr_t dma_phys[CMA_NUM];
- static void *dma_virt[CMA_NUM];
- /* any read request will free coherent memory, eg.
- * cat /dev/cma_test
- */
- static ssize_t
- cma_test_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
- {
- int i;
- for (i = 0; i < CMA_NUM; i++) {
- if (dma_virt[i]) {
- dma_free_coherent(cma_dev, (i + 1) * SZ_1M, dma_virt[i], dma_phys[i]);
- _dev_info(cma_dev, "free virt: %p phys: %p\n", dma_virt[i], (void *)dma_phys[i]);
- dma_virt[i] = NULL;
- break;
- }
- }
- return 0;
- }
- /*
- * any write request will alloc coherent memory, eg.
- * echo 0 > /dev/cma_test
- */
- static ssize_t
- cma_test_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
- {
- int i;
- int ret;
- for (i = 0; i < CMA_NUM; i++) {
- if (!dma_virt[i]) {
- dma_virt[i] = dma_alloc_coherent(cma_dev, (i + 1) * SZ_1M, &dma_phys[i], GFP_KERNEL);
- if (dma_virt[i]) {
- void *p;
- /* touch every page in the allocated memory */
- for (p = dma_virt[i]; p < dma_virt[i] + (i + 1) * SZ_1M; p += PAGE_SIZE)
- *(u32 *)p = 0;
- _dev_info(cma_dev, "alloc virt: %p phys: %p\n", dma_virt[i], (void *)dma_phys[i]);
- } else {
- dev_err(cma_dev, "no mem in CMA area\n");
- ret = -ENOMEM;
- }
- break;
- }
- }
- return count;
- }
- static const struct file_operations cma_test_fops = {
- .owner = THIS_MODULE,
- .read = cma_test_read,
- .write = cma_test_write,
- };
- static struct miscdevice cma_test_misc = {
- .name = "cma_test",
- .fops = &cma_test_fops,
- };
- static int __init cma_test_init(void)
- {
- int ret = 0;
- ret = misc_register(&cma_test_misc);
- if (unlikely(ret)) {
- pr_err("failed to register cma test misc device!\n");
- return ret;
- }
- cma_dev = cma_test_misc.this_device;
- cma_dev->coherent_dma_mask = ~0;
- _dev_info(cma_dev, "registered.\n");
- return ret;
- }
- module_init(cma_test_init);
- static void __exit cma_test_exit(void)
- {
- misc_deregister(&cma_test_misc);
- }
- module_exit(cma_test_exit);
- MODULE_LICENSE("GPL");
- MODULE_AUTHOR("Barry Song <21cnbao@gmail.com>");
- MODULE_DESCRIPTION("kernel module to help the test of CMA");
- MODULE_ALIAS("CMA test");
/* * kernel module helper for testing CMA * * Licensed under GPLv2 or later. */#include <linux/module.h>#include <linux/device.h>#include <linux/fs.h>#include <linux/miscdevice.h>#include <linux/dma-mapping.h>#define CMA_NUM 10static struct device *cma_dev;static dma_addr_t dma_phys[CMA_NUM];static void *dma_virt[CMA_NUM];/* any read request will free coherent memory, eg. * cat /dev/cma_test */static ssize_tcma_test_read(struct file *file, char __user *buf, size_t count, loff_t *ppos){int i;for (i = 0; i < CMA_NUM; i++) {if (dma_virt[i]) {dma_free_coherent(cma_dev, (i + 1) * SZ_1M, dma_virt[i], dma_phys[i]);_dev_info(cma_dev, "free virt: %p phys: %p\n", dma_virt[i], (void *)dma_phys[i]);dma_virt[i] = NULL;break;}}return 0;}/* * any write request will alloc coherent memory, eg. * echo 0 > /dev/cma_test */static ssize_tcma_test_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos){int i;int ret;for (i = 0; i < CMA_NUM; i++) {if (!dma_virt[i]) {dma_virt[i] = dma_alloc_coherent(cma_dev, (i + 1) * SZ_1M, &dma_phys[i], GFP_KERNEL);if (dma_virt[i]) {void *p;/* touch every page in the allocated memory */for (p = dma_virt[i]; p < dma_virt[i] + (i + 1) * SZ_1M; p += PAGE_SIZE)*(u32 *)p = 0;_dev_info(cma_dev, "alloc virt: %p phys: %p\n", dma_virt[i], (void *)dma_phys[i]);} else {dev_err(cma_dev, "no mem in CMA area\n");ret = -ENOMEM;}break;}}return count;}static const struct file_operations cma_test_fops = {.owner = THIS_MODULE,.read = cma_test_read,.write = cma_test_write,};static struct miscdevice cma_test_misc = {.name = "cma_test",.fops = &cma_test_fops,};static int __init cma_test_init(void){int ret = 0;ret = misc_register(&cma_test_misc);if (unlikely(ret)) {pr_err("failed to register cma test misc device!\n");return ret;}cma_dev = cma_test_misc.this_device;cma_dev->coherent_dma_mask = ~0;_dev_info(cma_dev, "registered.\n");return ret;}module_init(cma_test_init);static void __exit cma_test_exit(void){misc_deregister(&cma_test_misc);}module_exit(cma_test_exit);MODULE_LICENSE("GPL");MODULE_AUTHOR("Barry Song <21cnbao@gmail.com>");MODULE_DESCRIPTION("kernel module to help the test of CMA");MODULE_ALIAS("CMA test");
申请内存:
- # echo 0 > /dev/cma_test
# echo 0 > /dev/cma_test释放内存:
- # cat /dev/cma_test
# cat /dev/cma_test
- 内存_预留内存与连续内存分配
- 物理内存连续内存分配
- 连续内存块分配
- 连续内存分配
- 内存的连续分配与回收算法
- 分配内存_释放内存
- 计算节点为虚拟机分配内存时预留内存设置
- JACK_C#_内存分配
- GSL连续内存块分配
- 静态内存分配与动态内存分配
- [LDD3速记]_内存分配
- JavaScript----01_内存分配
- 指针与内存分配
- 内存分配 与 const
- 分配与释放内存
- 堆栈与内存分配
- 指针与内存分配
- 变量与内存分配
- LUA 03
- HDU 4043 FXTZ II
- 修改linux/Android启动logo/开机画面
- 流式数据处理的计算模型
- 面向对象的合理解释
- 内存_预留内存与连续内存分配
- 关于Thread.IsBackground属性的理解
- 获取指定窗口界面到图片 同时 保存到剪切板
- C++内存管理
- IT行业常见职位英语缩写全攻略
- ios学习笔记 UIAlertView
- 一个关于Weakly-Ordered CPU的实验
- [评论]我眼中的Windows 8 - A New Start
- 如何编写并编译一个shellcode!