内存_预留内存与连续内存分配

来源:互联网 发布:东华软件股份有限公司 编辑:程序博客网 时间:2024/05/19 01:31

预留内存

宋宝华

在我们使用ARM等嵌入式Linux系统的时候,一个头疼的问题是GPU,Camera,HDMI等都需要预留大量连续内存,这部分内存平时不用,但是一般的做法又必须先预留着。目前,Marek Szyprowski和Michal Nazarewicz实现了一套全新的Contiguous Memory Allocator。通过这套机制,我们可以做到不预留内存,这些内存平时是可用的,只有当需要的时候才被分配给Camera,HDMI等设备。下面分析它的基本代码流程。

声明连续内存

内核启动过程中arch/arm/mm/init.c中的arm_memblock_init()会调用dma_contiguous_reserve(min(arm_dma_limit, arm_lowmem_limit));

该函数位于:drivers/base/dma-contiguous.c

[cpp] view plaincopyprint?
  1. /** 
  2.  * dma_contiguous_reserve() - reserve area for contiguous memory handling 
  3.  * @limit: End address of the reserved memory (optional, 0 for any). 
  4.  * 
  5.  * This function reserves memory from early allocator. It should be 
  6.  * called by arch specific code once the early allocator (memblock or bootmem) 
  7.  * has been activated and all other subsystems have already allocated/reserved 
  8.  * memory. 
  9.  */  
  10. void __init dma_contiguous_reserve(phys_addr_t limit)  
  11. {  
  12.         unsigned long selected_size = 0;  
  13.   
  14.         pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit);  
  15.   
  16.         if (size_cmdline != -1) {  
  17.                 selected_size = size_cmdline;  
  18.         } else {  
  19. #ifdef CONFIG_CMA_SIZE_SEL_MBYTES  
  20.                 selected_size = size_bytes;  
  21. #elif defined(CONFIG_CMA_SIZE_SEL_PERCENTAGE)  
  22.                 selected_size = cma_early_percent_memory();  
  23. #elif defined(CONFIG_CMA_SIZE_SEL_MIN)  
  24.                 selected_size = min(size_bytes, cma_early_percent_memory());  
  25. #elif defined(CONFIG_CMA_SIZE_SEL_MAX)  
  26.                 selected_size = max(size_bytes, cma_early_percent_memory());  
  27. #endif   
  28.         }     
  29.   
  30.         if (selected_size) {  
  31.                 pr_debug("%s: reserving %ld MiB for global area\n", __func__,  
  32.                          selected_size / SZ_1M);  
  33.   
  34.                 dma_declare_contiguous(NULL, selected_size, 0, limit);  
  35.         }     
  36. };  
/** * dma_contiguous_reserve() - reserve area for contiguous memory handling * @limit: End address of the reserved memory (optional, 0 for any). * * This function reserves memory from early allocator. It should be * called by arch specific code once the early allocator (memblock or bootmem) * has been activated and all other subsystems have already allocated/reserved * memory. */void __init dma_contiguous_reserve(phys_addr_t limit){        unsigned long selected_size = 0;        pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit);        if (size_cmdline != -1) {                selected_size = size_cmdline;        } else {#ifdef CONFIG_CMA_SIZE_SEL_MBYTES                selected_size = size_bytes;#elif defined(CONFIG_CMA_SIZE_SEL_PERCENTAGE)                selected_size = cma_early_percent_memory();#elif defined(CONFIG_CMA_SIZE_SEL_MIN)                selected_size = min(size_bytes, cma_early_percent_memory());#elif defined(CONFIG_CMA_SIZE_SEL_MAX)                selected_size = max(size_bytes, cma_early_percent_memory());#endif        }           if (selected_size) {                pr_debug("%s: reserving %ld MiB for global area\n", __func__,                         selected_size / SZ_1M);                dma_declare_contiguous(NULL, selected_size, 0, limit);        }   };

其中的size_bytes定义为:

static const unsigned long size_bytes = CMA_SIZE_MBYTES * SZ_1M; 默认情况下,CMA_SIZE_MBYTES会被定义为16MB,来源于CONFIG_CMA_SIZE_MBYTES=16

->

[cpp] view plaincopyprint?
  1. int __init dma_declare_contiguous(struct device *dev, unsigned long size,  
  2.                                   phys_addr_t base, phys_addr_t limit)  
  3. {  
  4.         ...  
  5.         /* Reserve memory */  
  6.         if (base) {  
  7.                 if (memblock_is_region_reserved(base, size) ||  
  8.                     memblock_reserve(base, size) < 0) {  
  9.                         base = -EBUSY;  
  10.                         goto err;  
  11.                 }  
  12.         } else {  
  13.                 /* 
  14.                  * Use __memblock_alloc_base() since 
  15.                  * memblock_alloc_base() panic()s. 
  16.                  */  
  17.                 phys_addr_t addr = __memblock_alloc_base(size, alignment, limit);  
  18.                 if (!addr) {  
  19.                         base = -ENOMEM;  
  20.                         goto err;  
  21.                 } else if (addr + size > ~(unsigned long)0) {  
  22.                         memblock_free(addr, size);  
  23.                         base = -EINVAL;  
  24.                         base = -EINVAL;  
  25.                         goto err;  
  26.                 } else {  
  27.                         base = addr;  
  28.                 }  
  29.         }  
  30.   
  31.         /* 
  32.          * Each reserved area must be initialised later, when more kernel 
  33.          * subsystems (like slab allocator) are available. 
  34.          */  
  35.         r->start = base;  
  36.         r->size = size;  
  37.         r->dev = dev;  
  38.         cma_reserved_count++;  
  39.         pr_info("CMA: reserved %ld MiB at %08lx\n", size / SZ_1M,  
  40.                 (unsigned long)base);  
  41.   
  42.         /* Architecture specific contiguous memory fixup. */  
  43.         dma_contiguous_early_fixup(base, size);  
  44.         return 0;  
  45. err:  
  46.         pr_err("CMA: failed to reserve %ld MiB\n", size / SZ_1M);  
  47.         return base;  
  48. }   
int __init dma_declare_contiguous(struct device *dev, unsigned long size,                                  phys_addr_t base, phys_addr_t limit){        ...        /* Reserve memory */        if (base) {                if (memblock_is_region_reserved(base, size) ||                    memblock_reserve(base, size) < 0) {                        base = -EBUSY;                        goto err;                }        } else {                /*                 * Use __memblock_alloc_base() since                 * memblock_alloc_base() panic()s.                 */                phys_addr_t addr = __memblock_alloc_base(size, alignment, limit);                if (!addr) {                        base = -ENOMEM;                        goto err;                } else if (addr + size > ~(unsigned long)0) {                        memblock_free(addr, size);                        base = -EINVAL;                        base = -EINVAL;                        goto err;                } else {                        base = addr;                }        }        /*         * Each reserved area must be initialised later, when more kernel         * subsystems (like slab allocator) are available.         */        r->start = base;        r->size = size;        r->dev = dev;        cma_reserved_count++;        pr_info("CMA: reserved %ld MiB at %08lx\n", size / SZ_1M,                (unsigned long)base);        /* Architecture specific contiguous memory fixup. */        dma_contiguous_early_fixup(base, size);        return 0;err:        pr_err("CMA: failed to reserve %ld MiB\n", size / SZ_1M);        return base;} 

由此可见,连续内存区域也是在内核启动的早期,通过__memblock_alloc_base()拿到的。

另外:

drivers/base/dma-contiguous.c里面的core_initcall()会导致cma_init_reserved_areas()被调用:

[cpp] view plaincopyprint?
  1. static int __init cma_init_reserved_areas(void)  
  2. {  
  3.         struct cma_reserved *r = cma_reserved;  
  4.         unsigned i = cma_reserved_count;  
  5.   
  6.         pr_debug("%s()\n", __func__);  
  7.   
  8.         for (; i; --i, ++r) {  
  9.                 struct cma *cma;  
  10.                 cma = cma_create_area(PFN_DOWN(r->start),  
  11.                                       r->size >> PAGE_SHIFT);  
  12.                 if (!IS_ERR(cma))  
  13.                         dev_set_cma_area(r->dev, cma);  
  14.         }  
  15.         return 0;  
  16. }  
  17. core_initcall(cma_init_reserved_areas);  
static int __init cma_init_reserved_areas(void){        struct cma_reserved *r = cma_reserved;        unsigned i = cma_reserved_count;        pr_debug("%s()\n", __func__);        for (; i; --i, ++r) {                struct cma *cma;                cma = cma_create_area(PFN_DOWN(r->start),                                      r->size >> PAGE_SHIFT);                if (!IS_ERR(cma))                        dev_set_cma_area(r->dev, cma);        }        return 0;}core_initcall(cma_init_reserved_areas);

cma_create_area()会调用cma_activate_area(),cma_activate_area()函数则会针对每个page调用:

init_cma_reserved_pageblock(pfn_to_page(base_pfn));

这个函数则会通过set_pageblock_migratetype(page, MIGRATE_CMA)将页设置为MIGRATE_CMA类型的:

[cpp] view plaincopyprint?
  1. #ifdef CONFIG_CMA   
  2. /* Free whole pageblock and set it's migration type to MIGRATE_CMA. */  
  3. void __init init_cma_reserved_pageblock(struct page *page)  
  4. {                                      
  5.         unsigned i = pageblock_nr_pages;  
  6.         struct page *p = page;  
  7.           
  8.         do {  
  9.                 __ClearPageReserved(p);  
  10.                 set_page_count(p, 0);  
  11.         } while (++p, --i);  
  12.           
  13.         set_page_refcounted(page);  
  14.         set_pageblock_migratetype(page, MIGRATE_CMA);  
  15.         __free_pages(page, pageblock_order);  
  16.         totalram_pages += pageblock_nr_pages;  
  17. }         
  18. #endif  
#ifdef CONFIG_CMA/* Free whole pageblock and set it's migration type to MIGRATE_CMA. */void __init init_cma_reserved_pageblock(struct page *page){                                            unsigned i = pageblock_nr_pages;        struct page *p = page;                do {                __ClearPageReserved(p);                set_page_count(p, 0);        } while (++p, --i);                set_page_refcounted(page);        set_pageblock_migratetype(page, MIGRATE_CMA);        __free_pages(page, pageblock_order);        totalram_pages += pageblock_nr_pages;}       #endif
同时其中调用的__free_pages(page, pageblock_order);最终会调用到__free_one_page(page, zone, order, migratetype);
相关的page会被加到MIGRATE_CMA的free_list上面去:

list_add(&page->lru, &zone->free_area[order].free_list[migratetype]);


申请连续内存

申请连续内存仍然使用标准的arch/arm/mm/dma-mapping.c中定义的dma_alloc_coherent()和dma_alloc_writecombine(),这二者会间接调用drivers/base/dma-contiguous.c中的

[cpp] view plaincopyprint?
  1. struct page *dma_alloc_from_contiguous(struct device *dev, int count,  
  2.                                        unsigned int align)  
struct page *dma_alloc_from_contiguous(struct device *dev, int count,                                       unsigned int align)

->


[cpp] view plaincopyprint?
  1. struct page *dma_alloc_from_contiguous(struct device *dev, int count,  
  2.                                        unsigned int align)  
  3. {  
  4.        ...  
  5.   
  6.        for (;;) {  
  7.                 pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count,  
  8.                                                     start, count, mask);  
  9.                 if (pageno >= cma->count) {  
  10.                         ret = -ENOMEM;  
  11.                         goto error;  
  12.                 }  
  13.   
  14.                 pfn = cma->base_pfn + pageno;  
  15.                 ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);  
  16.                 if (ret == 0) {  
  17.                         bitmap_set(cma->bitmap, pageno, count);  
  18.                         break;  
  19.                 } else if (ret != -EBUSY) {  
  20.                         goto error;  
  21.                 }  
  22.                 pr_debug("%s(): memory range at %p is busy, retrying\n",  
  23.                          __func__, pfn_to_page(pfn));  
  24.                 /* try again with a bit different memory target */  
  25.                 start = pageno + mask + 1;  
  26.         }  
  27.        ...  
  28.   
  29. }  
struct page *dma_alloc_from_contiguous(struct device *dev, int count,                                       unsigned int align){       ...       for (;;) {                pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count,                                                    start, count, mask);                if (pageno >= cma->count) {                        ret = -ENOMEM;                        goto error;                }                pfn = cma->base_pfn + pageno;                ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);                if (ret == 0) {                        bitmap_set(cma->bitmap, pageno, count);                        break;                } else if (ret != -EBUSY) {                        goto error;                }                pr_debug("%s(): memory range at %p is busy, retrying\n",                         __func__, pfn_to_page(pfn));                /* try again with a bit different memory target */                start = pageno + mask + 1;        }       ...}
->

int alloc_contig_range(unsigned long start, unsigned long end,

                       unsigned migratetype)

需要隔离page,隔离page的作用通过代码的注释可以体现:

[cpp] view plaincopyprint?
  1. /* 
  2.  * What we do here is we mark all pageblocks in range as 
  3.  * MIGRATE_ISOLATE.  Because of the way page allocator work, we 
  4.  * align the range to MAX_ORDER pages so that page allocator 
  5.  * won't try to merge buddies from different pageblocks and 
  6.  * change MIGRATE_ISOLATE to some other migration type. 
  7.  * 
  8.  * Once the pageblocks are marked as MIGRATE_ISOLATE, we 
  9.  * migrate the pages from an unaligned range (ie. pages that 
  10.  * we are interested in).  This will put all the pages in 
  11.  * range back to page allocator as MIGRATE_ISOLATE. 
  12.  * 
  13.  * When this is done, we take the pages in range from page 
  14.  * allocator removing them from the buddy system.  This way 
  15.  * page allocator will never consider using them. 
  16.  * 
  17.  * This lets us mark the pageblocks back as 
  18.  * MIGRATE_CMA/MIGRATE_MOVABLE so that free pages in the 
  19.  * MAX_ORDER aligned range but not in the unaligned, original 
  20.  * range are put back to page allocator so that buddy can use 
  21.  * them.  
  22.  */    
  23.           
  24. ret = start_isolate_page_range(pfn_align_to_maxpage_down(start),  
  25.                                pfn_align_to_maxpage_up(end),  
  26.                                migratetype);  
        /*         * What we do here is we mark all pageblocks in range as         * MIGRATE_ISOLATE.  Because of the way page allocator work, we         * align the range to MAX_ORDER pages so that page allocator         * won't try to merge buddies from different pageblocks and         * change MIGRATE_ISOLATE to some other migration type.         *         * Once the pageblocks are marked as MIGRATE_ISOLATE, we         * migrate the pages from an unaligned range (ie. pages that         * we are interested in).  This will put all the pages in         * range back to page allocator as MIGRATE_ISOLATE.         *         * When this is done, we take the pages in range from page         * allocator removing them from the buddy system.  This way         * page allocator will never consider using them.         *         * This lets us mark the pageblocks back as         * MIGRATE_CMA/MIGRATE_MOVABLE so that free pages in the         * MAX_ORDER aligned range but not in the unaligned, original         * range are put back to page allocator so that buddy can use         * them.          */                          ret = start_isolate_page_range(pfn_align_to_maxpage_down(start),                                       pfn_align_to_maxpage_up(end),                                       migratetype);

简单地说,就是把相关的page标记为MIGRATE_ISOLATE,这样buddy系统就不会再使用他们。


[cpp] view plaincopyprint?
  1. /*       
  2.  * start_isolate_page_range() -- make page-allocation-type of range of pages 
  3.  * to be MIGRATE_ISOLATE. 
  4.  * @start_pfn: The lower PFN of the range to be isolated. 
  5.  * @end_pfn: The upper PFN of the range to be isolated. 
  6.  * @migratetype: migrate type to set in error recovery. 
  7.  * 
  8.  * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in 
  9.  * the range will never be allocated. Any free pages and pages freed in the 
  10.  * future will not be allocated again. 
  11.  * 
  12.  * start_pfn/end_pfn must be aligned to pageblock_order. 
  13.  * Returns 0 on success and -EBUSY if any part of range cannot be isolated. 
  14.  */  
  15. int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,  
  16.                              unsigned migratetype)  
  17. {  
  18.         unsigned long pfn;  
  19.         unsigned long undo_pfn;  
  20.         struct page *page;  
  21.   
  22.         BUG_ON((start_pfn) & (pageblock_nr_pages - 1));  
  23.         BUG_ON((end_pfn) & (pageblock_nr_pages - 1));  
  24.   
  25.         for (pfn = start_pfn;  
  26.              pfn < end_pfn;  
  27.              pfn += pageblock_nr_pages) {  
  28.                 page = __first_valid_page(pfn, pageblock_nr_pages);  
  29.                 if (page && set_migratetype_isolate(page)) {  
  30.                         undo_pfn = pfn;  
  31.                         goto undo;  
  32.                 }  
  33.         }  
  34.         return 0;  
  35. undo:  
  36.         for (pfn = start_pfn;  
  37.              pfn < undo_pfn;  
  38.              pfn += pageblock_nr_pages)  
  39.                 unset_migratetype_isolate(pfn_to_page(pfn), migratetype);  
  40.   
  41.         return -EBUSY;  
  42. }  
/*       * start_isolate_page_range() -- make page-allocation-type of range of pages * to be MIGRATE_ISOLATE. * @start_pfn: The lower PFN of the range to be isolated. * @end_pfn: The upper PFN of the range to be isolated. * @migratetype: migrate type to set in error recovery. * * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in * the range will never be allocated. Any free pages and pages freed in the * future will not be allocated again. * * start_pfn/end_pfn must be aligned to pageblock_order. * Returns 0 on success and -EBUSY if any part of range cannot be isolated. */int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,                             unsigned migratetype){        unsigned long pfn;        unsigned long undo_pfn;        struct page *page;        BUG_ON((start_pfn) & (pageblock_nr_pages - 1));        BUG_ON((end_pfn) & (pageblock_nr_pages - 1));        for (pfn = start_pfn;             pfn < end_pfn;             pfn += pageblock_nr_pages) {                page = __first_valid_page(pfn, pageblock_nr_pages);                if (page && set_migratetype_isolate(page)) {                        undo_pfn = pfn;                        goto undo;                }        }        return 0;undo:        for (pfn = start_pfn;             pfn < undo_pfn;             pfn += pageblock_nr_pages)                unset_migratetype_isolate(pfn_to_page(pfn), migratetype);        return -EBUSY;}

接下来调用__alloc_contig_migrate_range()进行页面隔离和迁移:

[cpp] view plaincopyprint?
  1. static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)   
  2. {  
  3.         /* This function is based on compact_zone() from compaction.c. */  
  4.   
  5.         unsigned long pfn = start;  
  6.         unsigned int tries = 0;   
  7.         int ret = 0;   
  8.   
  9.         struct compact_control cc = {  
  10.                 .nr_migratepages = 0,   
  11.                 .order = -1,  
  12.                 .zone = page_zone(pfn_to_page(start)),  
  13.                 .sync = true,  
  14.         };     
  15.         INIT_LIST_HEAD(&cc.migratepages);  
  16.   
  17.         migrate_prep_local();  
  18.   
  19.         while (pfn < end || !list_empty(&cc.migratepages)) {  
  20.                 if (fatal_signal_pending(current)) {  
  21.                         ret = -EINTR;  
  22.                         break;  
  23.                 }      
  24.   
  25.                 if (list_empty(&cc.migratepages)) {  
  26.                         cc.nr_migratepages = 0;   
  27.                         pfn = isolate_migratepages_range(cc.zone, &cc,   
  28.                                                          pfn, end);  
  29.                         if (!pfn) {  
  30.                                 ret = -EINTR;  
  31.                                 break;  
  32.                         }      
  33.                         tries = 0;   
  34.                 } else if (++tries == 5) {   
  35.                         ret = ret < 0 ? ret : -EBUSY;  
  36.                         break;  
  37.                 }      
  38.   
  39.                 ret = migrate_pages(&cc.migratepages,  
  40.                                     __alloc_contig_migrate_alloc,  
  41.                                     0, falsetrue);  
  42.         }      
  43.   
  44.         putback_lru_pages(&cc.migratepages);  
  45.         return ret > 0 ? 0 : ret;   
  46. }  
static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) {        /* This function is based on compact_zone() from compaction.c. */        unsigned long pfn = start;        unsigned int tries = 0;         int ret = 0;         struct compact_control cc = {                .nr_migratepages = 0,                 .order = -1,                .zone = page_zone(pfn_to_page(start)),                .sync = true,        };           INIT_LIST_HEAD(&cc.migratepages);        migrate_prep_local();        while (pfn < end || !list_empty(&cc.migratepages)) {                if (fatal_signal_pending(current)) {                        ret = -EINTR;                        break;                }                    if (list_empty(&cc.migratepages)) {                        cc.nr_migratepages = 0;                         pfn = isolate_migratepages_range(cc.zone, &cc,                                                          pfn, end);                        if (!pfn) {                                ret = -EINTR;                                break;                        }                            tries = 0;                 } else if (++tries == 5) {                         ret = ret < 0 ? ret : -EBUSY;                        break;                }                    ret = migrate_pages(&cc.migratepages,                                    __alloc_contig_migrate_alloc,                                    0, false, true);        }            putback_lru_pages(&cc.migratepages);        return ret > 0 ? 0 : ret; }

其中的函数migrate_pages()会完成页面的迁移,迁移过程中通过传入的__alloc_contig_migrate_alloc()申请新的page,并将老的page付给新的page:

[cpp] view plaincopyprint?
  1. int migrate_pages(struct list_head *from,  
  2.                 new_page_t get_new_page, unsigned long privatebool offlining,  
  3.                 bool sync)  
  4. {  
  5.         int retry = 1;   
  6.         int nr_failed = 0;   
  7.         int pass = 0;   
  8.         struct page *page;  
  9.         struct page *page2;  
  10.         int swapwrite = current->flags & PF_SWAPWRITE;  
  11.         int rc;  
  12.   
  13.         if (!swapwrite)  
  14.                 current->flags |= PF_SWAPWRITE;  
  15.   
  16.         for(pass = 0; pass < 10 && retry; pass++) {  
  17.                 retry = 0;   
  18.   
  19.                 list_for_each_entry_safe(page, page2, from, lru) {  
  20.                         cond_resched();  
  21.   
  22.                         rc = unmap_and_move(get_new_page, private,  
  23.                                                 page, pass > 2, offlining,  
  24.                                                 sync);  
  25.   
  26.                         switch(rc) {  
  27.                         case -ENOMEM:  
  28.                                 goto out;   
  29.                         case -EAGAIN:  
  30.                                 retry++;  
  31.                                 break;  
  32.                         case 0:  
  33.                                 break;  
  34.                         default:  
  35.                                 /* Permanent failure */  
  36.                                 nr_failed++;  
  37.                                 break;  
  38.                         }      
  39.                 }      
  40.         }      
  41.         rc = 0;  
  42. ...  
  43. }   
int migrate_pages(struct list_head *from,                new_page_t get_new_page, unsigned long private, bool offlining,                bool sync){        int retry = 1;         int nr_failed = 0;         int pass = 0;         struct page *page;        struct page *page2;        int swapwrite = current->flags & PF_SWAPWRITE;        int rc;        if (!swapwrite)                current->flags |= PF_SWAPWRITE;        for(pass = 0; pass < 10 && retry; pass++) {                retry = 0;                 list_for_each_entry_safe(page, page2, from, lru) {                        cond_resched();                        rc = unmap_and_move(get_new_page, private,                                                page, pass > 2, offlining,                                                sync);                        switch(rc) {                        case -ENOMEM:                                goto out;                         case -EAGAIN:                                retry++;                                break;                        case 0:                                break;                        default:                                /* Permanent failure */                                nr_failed++;                                break;                        }                    }            }            rc = 0;...} 
其中的unmap_and_move()函数较为关键,它定义在mm/migrate.c中

[cpp] view plaincopyprint?
  1. /* 
  2.  * Obtain the lock on page, remove all ptes and migrate the page 
  3.  * to the newly allocated page in newpage. 
  4.  */  
  5. static int unmap_and_move(new_page_t get_new_page, unsigned long private,  
  6.             struct page *page, int force, bool offlining, bool sync)  
  7. {  
  8.     int rc = 0;  
  9.     int *result = NULL;  
  10.     struct page *newpage = get_new_page(page, private, &result);  
  11.     int remap_swapcache = 1;  
  12.     int charge = 0;  
  13.     struct mem_cgroup *mem = NULL;  
  14.     struct anon_vma *anon_vma = NULL;  
  15.   
  16.     ...  
  17.   
  18.     /* charge against new page */  
  19.     charge = mem_cgroup_prepare_migration(page, newpage, &mem);  
  20.     ...  
  21.   
  22.     if (PageWriteback(page)) {  
  23.         if (!force || !sync)  
  24.             goto uncharge;  
  25.         wait_on_page_writeback(page);  
  26.     }  
  27.     /* 
  28.      * By try_to_unmap(), page->mapcount goes down to 0 here. In this case, 
  29.      * we cannot notice that anon_vma is freed while we migrates a page. 
  30.      * This get_anon_vma() delays freeing anon_vma pointer until the end 
  31.      * of migration. File cache pages are no problem because of page_lock() 
  32.      * File Caches may use write_page() or lock_page() in migration, then, 
  33.      * just care Anon page here. 
  34.      */  
  35.     if (PageAnon(page)) {  
  36.         /* 
  37.          * Only page_lock_anon_vma() understands the subtleties of 
  38.          * getting a hold on an anon_vma from outside one of its mms. 
  39.          */  
  40.         anon_vma = page_lock_anon_vma(page);  
  41.         if (anon_vma) {  
  42.             /* 
  43.              * Take a reference count on the anon_vma if the 
  44.              * page is mapped so that it is guaranteed to 
  45.              * exist when the page is remapped later 
  46.              */  
  47.             get_anon_vma(anon_vma);  
  48.             page_unlock_anon_vma(anon_vma);  
  49.         } else if (PageSwapCache(page)) {  
  50.             /* 
  51.              * We cannot be sure that the anon_vma of an unmapped 
  52.              * swapcache page is safe to use because we don't 
  53.              * know in advance if the VMA that this page belonged 
  54.              * to still exists. If the VMA and others sharing the 
  55.              * data have been freed, then the anon_vma could 
  56.              * already be invalid. 
  57.              * 
  58.              * To avoid this possibility, swapcache pages get 
  59.              * migrated but are not remapped when migration 
  60.              * completes 
  61.              */  
  62.             remap_swapcache = 0;  
  63.         } else {  
  64.             goto uncharge;  
  65.         }  
  66.     }  
  67.   
  68.     ...  
  69.     /* Establish migration ptes or remove ptes */  
  70.     try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);  
  71.   
  72. skip_unmap:  
  73.     if (!page_mapped(page))  
  74.         rc = move_to_new_page(newpage, page, remap_swapcache);  
  75.   
  76.     if (rc && remap_swapcache)  
  77.         remove_migration_ptes(page, page);  
  78.   
  79.     /* Drop an anon_vma reference if we took one */  
  80.     if (anon_vma)  
  81.         drop_anon_vma(anon_vma);  
  82.   
  83. uncharge:  
  84.     if (!charge)  
  85.         mem_cgroup_end_migration(mem, page, newpage, rc == 0);  
  86. unlock:  
  87.     unlock_page(page);  
  88.   
  89. move_newpage:  
  90.     ...  
  91. }  
/* * Obtain the lock on page, remove all ptes and migrate the page * to the newly allocated page in newpage. */static int unmap_and_move(new_page_t get_new_page, unsigned long private,            struct page *page, int force, bool offlining, bool sync){    int rc = 0;    int *result = NULL;    struct page *newpage = get_new_page(page, private, &result);    int remap_swapcache = 1;    int charge = 0;    struct mem_cgroup *mem = NULL;    struct anon_vma *anon_vma = NULL;    ...    /* charge against new page */    charge = mem_cgroup_prepare_migration(page, newpage, &mem);    ...    if (PageWriteback(page)) {        if (!force || !sync)            goto uncharge;        wait_on_page_writeback(page);    }    /*     * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,     * we cannot notice that anon_vma is freed while we migrates a page.     * This get_anon_vma() delays freeing anon_vma pointer until the end     * of migration. File cache pages are no problem because of page_lock()     * File Caches may use write_page() or lock_page() in migration, then,     * just care Anon page here.     */    if (PageAnon(page)) {        /*         * Only page_lock_anon_vma() understands the subtleties of         * getting a hold on an anon_vma from outside one of its mms.         */        anon_vma = page_lock_anon_vma(page);        if (anon_vma) {            /*             * Take a reference count on the anon_vma if the             * page is mapped so that it is guaranteed to             * exist when the page is remapped later             */            get_anon_vma(anon_vma);            page_unlock_anon_vma(anon_vma);        } else if (PageSwapCache(page)) {            /*             * We cannot be sure that the anon_vma of an unmapped             * swapcache page is safe to use because we don't             * know in advance if the VMA that this page belonged             * to still exists. If the VMA and others sharing the             * data have been freed, then the anon_vma could             * already be invalid.             *             * To avoid this possibility, swapcache pages get             * migrated but are not remapped when migration             * completes             */            remap_swapcache = 0;        } else {            goto uncharge;        }    }    ...    /* Establish migration ptes or remove ptes */    try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);skip_unmap:    if (!page_mapped(page))        rc = move_to_new_page(newpage, page, remap_swapcache);    if (rc && remap_swapcache)        remove_migration_ptes(page, page);    /* Drop an anon_vma reference if we took one */    if (anon_vma)        drop_anon_vma(anon_vma);uncharge:    if (!charge)        mem_cgroup_end_migration(mem, page, newpage, rc == 0);unlock:    unlock_page(page);move_newpage:    ...}
通过unmap_and_move(),老的page就被迁移过去新的page。

接下来要回收page,回收page的作用是,不至于因为拿了连续的内存后,系统变得内存饥饿:

->

[cpp] view plaincopyprint?
  1. /* 
  2.  * Reclaim enough pages to make sure that contiguous allocation 
  3.  * will not starve the system. 
  4.  */  
  5. __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start);  
        /*         * Reclaim enough pages to make sure that contiguous allocation         * will not starve the system.         */        __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start);

->

[cpp] view plaincopyprint?
  1. /* 
  2.  * Trigger memory pressure bump to reclaim some pages in order to be able to 
  3.  * allocate 'count' pages in single page units. Does similar work as 
  4.  *__alloc_pages_slowpath() function. 
  5.  */  
  6. static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count)  
  7. {  
  8.         enum zone_type high_zoneidx = gfp_zone(gfp_mask);  
  9.         struct zonelist *zonelist = node_zonelist(0, gfp_mask);  
  10.         int did_some_progress = 0;  
  11.         int order = 1;  
  12.         unsigned long watermark;  
  13.   
  14.         /* 
  15.          * Increase level of watermarks to force kswapd do his job 
  16.          * to stabilise at new watermark level. 
  17.          */  
  18.         __update_cma_watermarks(zone, count);  
  19.   
  20.         /* Obey watermarks as if the page was being allocated */  
  21.         watermark = low_wmark_pages(zone) + count;  
  22.         while (!zone_watermark_ok(zone, 0, watermark, 0, 0)) {  
  23.                 wake_all_kswapd(order, zonelist, high_zoneidx, zone_idx(zone));  
  24.   
  25.                 did_some_progress = __perform_reclaim(gfp_mask, order, zonelist,  
  26.                                                       NULL);  
  27.                 if (!did_some_progress) {  
  28.                         /* Exhausted what can be done so it's blamo time */  
  29.                         out_of_memory(zonelist, gfp_mask, order, NULL);  
  30.                 }  
  31.         }  
  32.   
  33.         /* Restore original watermark levels. */  
  34.         __update_cma_watermarks(zone, -count);  
  35.   
  36.         return count;  
  37. }  
/* * Trigger memory pressure bump to reclaim some pages in order to be able to * allocate 'count' pages in single page units. Does similar work as *__alloc_pages_slowpath() function. */static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count){        enum zone_type high_zoneidx = gfp_zone(gfp_mask);        struct zonelist *zonelist = node_zonelist(0, gfp_mask);        int did_some_progress = 0;        int order = 1;        unsigned long watermark;        /*         * Increase level of watermarks to force kswapd do his job         * to stabilise at new watermark level.         */        __update_cma_watermarks(zone, count);        /* Obey watermarks as if the page was being allocated */        watermark = low_wmark_pages(zone) + count;        while (!zone_watermark_ok(zone, 0, watermark, 0, 0)) {                wake_all_kswapd(order, zonelist, high_zoneidx, zone_idx(zone));                did_some_progress = __perform_reclaim(gfp_mask, order, zonelist,                                                      NULL);                if (!did_some_progress) {                        /* Exhausted what can be done so it's blamo time */                        out_of_memory(zonelist, gfp_mask, order, NULL);                }        }        /* Restore original watermark levels. */        __update_cma_watermarks(zone, -count);        return count;}


释放连续内存

内存释放的时候也比较简单,直接就是:

arch/arm/mm/dma-mapping.c:

[cpp] view plaincopyprint?
  1. void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle)  
void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle)
->

arch/arm/mm/dma-mapping.c:

[cpp] view plaincopyprint?
  1. static void __free_from_contiguous(struct device *dev, struct page *page,  
  2.                                    size_t size)  
  3. {  
  4.         __dma_remap(page, size, pgprot_kernel);  
  5.         dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);  
  6. }  
static void __free_from_contiguous(struct device *dev, struct page *page,                                   size_t size){        __dma_remap(page, size, pgprot_kernel);        dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);}

->

[cpp] view plaincopyprint?
  1. bool dma_release_from_contiguous(struct device *dev, struct page *pages,  
  2.                                  int count)  
  3. {  
  4.         ...  
  5.         free_contig_range(pfn, count);  
  6.         ..  
  7.   
  8. }  
bool dma_release_from_contiguous(struct device *dev, struct page *pages,                                 int count){        ...        free_contig_range(pfn, count);        ..}

->

[cpp] view plaincopyprint?
  1. void free_contig_range(unsigned long pfn, unsigned nr_pages)  
  2. {         
  3.         for (; nr_pages--; ++pfn)  
  4.                 __free_page(pfn_to_page(pfn));  
  5. }    
void free_contig_range(unsigned long pfn, unsigned nr_pages){               for (; nr_pages--; ++pfn)                __free_page(pfn_to_page(pfn));}  
将page交还给buddy。


内核内存分配的migratetype

内核内存分配的时候,带的标志是GFP_,但是GFP_可以转化为migratetype:

[cpp] view plaincopyprint?
  1. static inline int allocflags_to_migratetype(gfp_t gfp_flags)  
  2. {  
  3.         WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);  
  4.   
  5.         if (unlikely(page_group_by_mobility_disabled))  
  6.                 return MIGRATE_UNMOVABLE;  
  7.   
  8.         /* Group based on mobility */  
  9.         return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) |  
  10.                 ((gfp_flags & __GFP_RECLAIMABLE) != 0);   
  11. }  
static inline int allocflags_to_migratetype(gfp_t gfp_flags){        WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);        if (unlikely(page_group_by_mobility_disabled))                return MIGRATE_UNMOVABLE;        /* Group based on mobility */        return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) |                ((gfp_flags & __GFP_RECLAIMABLE) != 0); }
之后申请内存的时候,会对比迁移类型匹配的free_list:

[cpp] view plaincopyprint?
  1. page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,  
  2.                 zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET,  
  3.                 preferred_zone, migratetype);  
        page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,                        zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET,                        preferred_zone, migratetype);

另外,笔者也编写了一个测试程序,透过它随时测试CMA的功能:

[cpp] view plaincopyprint?
  1. /* 
  2.  * kernel module helper for testing CMA 
  3.  * 
  4.  * Licensed under GPLv2 or later. 
  5.  */  
  6.   
  7. #include <linux/module.h>  
  8. #include <linux/device.h>   
  9. #include <linux/fs.h>   
  10. #include <linux/miscdevice.h>   
  11. #include <linux/dma-mapping.h>  
  12.   
  13. #define CMA_NUM  10   
  14. static struct device *cma_dev;  
  15. static dma_addr_t dma_phys[CMA_NUM];  
  16. static void *dma_virt[CMA_NUM];  
  17.   
  18. /* any read request will free coherent memory, eg. 
  19.  * cat /dev/cma_test 
  20.  */  
  21. static ssize_t  
  22. cma_test_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)  
  23. {  
  24.     int i;  
  25.   
  26.     for (i = 0; i < CMA_NUM; i++) {  
  27.         if (dma_virt[i]) {  
  28.             dma_free_coherent(cma_dev, (i + 1) * SZ_1M, dma_virt[i], dma_phys[i]);  
  29.             _dev_info(cma_dev, "free virt: %p phys: %p\n", dma_virt[i], (void *)dma_phys[i]);  
  30.             dma_virt[i] = NULL;  
  31.             break;  
  32.         }  
  33.     }  
  34.     return 0;  
  35. }  
  36.   
  37. /* 
  38.  * any write request will alloc coherent memory, eg. 
  39.  * echo 0 > /dev/cma_test 
  40.  */  
  41. static ssize_t  
  42. cma_test_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)  
  43. {  
  44.     int i;  
  45.     int ret;  
  46.   
  47.     for (i = 0; i < CMA_NUM; i++) {  
  48.         if (!dma_virt[i]) {  
  49.             dma_virt[i] = dma_alloc_coherent(cma_dev, (i + 1) * SZ_1M, &dma_phys[i], GFP_KERNEL);  
  50.   
  51.             if (dma_virt[i]) {  
  52.                 void *p;  
  53.                 /* touch every page in the allocated memory */  
  54.                 for (p = dma_virt[i]; p <  dma_virt[i] + (i + 1) * SZ_1M; p += PAGE_SIZE)  
  55.                     *(u32 *)p = 0;  
  56.   
  57.                 _dev_info(cma_dev, "alloc virt: %p phys: %p\n", dma_virt[i], (void *)dma_phys[i]);  
  58.             } else {  
  59.                 dev_err(cma_dev, "no mem in CMA area\n");  
  60.                 ret = -ENOMEM;  
  61.             }  
  62.             break;  
  63.         }  
  64.     }  
  65.   
  66.     return count;  
  67. }  
  68.   
  69. static const struct file_operations cma_test_fops = {  
  70.     .owner =    THIS_MODULE,  
  71.     .read  =    cma_test_read,  
  72.     .write =    cma_test_write,  
  73. };  
  74.   
  75. static struct miscdevice cma_test_misc = {  
  76.     .name = "cma_test",  
  77.     .fops = &cma_test_fops,  
  78. };  
  79.   
  80. static int __init cma_test_init(void)  
  81. {  
  82.     int ret = 0;  
  83.   
  84.     ret = misc_register(&cma_test_misc);  
  85.     if (unlikely(ret)) {  
  86.         pr_err("failed to register cma test misc device!\n");  
  87.         return ret;  
  88.     }  
  89.     cma_dev = cma_test_misc.this_device;  
  90.     cma_dev->coherent_dma_mask = ~0;  
  91.     _dev_info(cma_dev, "registered.\n");  
  92.   
  93.     return ret;  
  94. }  
  95. module_init(cma_test_init);  
  96.   
  97. static void __exit cma_test_exit(void)  
  98. {  
  99.     misc_deregister(&cma_test_misc);  
  100. }  
  101. module_exit(cma_test_exit);  
  102.   
  103. MODULE_LICENSE("GPL");  
  104. MODULE_AUTHOR("Barry Song <21cnbao@gmail.com>");  
  105. MODULE_DESCRIPTION("kernel module to help the test of CMA");  
  106. MODULE_ALIAS("CMA test");  
/* * kernel module helper for testing CMA * * Licensed under GPLv2 or later. */#include <linux/module.h>#include <linux/device.h>#include <linux/fs.h>#include <linux/miscdevice.h>#include <linux/dma-mapping.h>#define CMA_NUM  10static struct device *cma_dev;static dma_addr_t dma_phys[CMA_NUM];static void *dma_virt[CMA_NUM];/* any read request will free coherent memory, eg. * cat /dev/cma_test */static ssize_tcma_test_read(struct file *file, char __user *buf, size_t count, loff_t *ppos){int i;for (i = 0; i < CMA_NUM; i++) {if (dma_virt[i]) {dma_free_coherent(cma_dev, (i + 1) * SZ_1M, dma_virt[i], dma_phys[i]);_dev_info(cma_dev, "free virt: %p phys: %p\n", dma_virt[i], (void *)dma_phys[i]);dma_virt[i] = NULL;break;}}return 0;}/* * any write request will alloc coherent memory, eg. * echo 0 > /dev/cma_test */static ssize_tcma_test_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos){int i;int ret;for (i = 0; i < CMA_NUM; i++) {if (!dma_virt[i]) {dma_virt[i] = dma_alloc_coherent(cma_dev, (i + 1) * SZ_1M, &dma_phys[i], GFP_KERNEL);if (dma_virt[i]) {void *p;/* touch every page in the allocated memory */for (p = dma_virt[i]; p <  dma_virt[i] + (i + 1) * SZ_1M; p += PAGE_SIZE)*(u32 *)p = 0;_dev_info(cma_dev, "alloc virt: %p phys: %p\n", dma_virt[i], (void *)dma_phys[i]);} else {dev_err(cma_dev, "no mem in CMA area\n");ret = -ENOMEM;}break;}}return count;}static const struct file_operations cma_test_fops = {.owner =    THIS_MODULE,.read  =    cma_test_read,.write =    cma_test_write,};static struct miscdevice cma_test_misc = {.name = "cma_test",.fops = &cma_test_fops,};static int __init cma_test_init(void){int ret = 0;ret = misc_register(&cma_test_misc);if (unlikely(ret)) {pr_err("failed to register cma test misc device!\n");return ret;}cma_dev = cma_test_misc.this_device;cma_dev->coherent_dma_mask = ~0;_dev_info(cma_dev, "registered.\n");return ret;}module_init(cma_test_init);static void __exit cma_test_exit(void){misc_deregister(&cma_test_misc);}module_exit(cma_test_exit);MODULE_LICENSE("GPL");MODULE_AUTHOR("Barry Song <21cnbao@gmail.com>");MODULE_DESCRIPTION("kernel module to help the test of CMA");MODULE_ALIAS("CMA test");

申请内存:

[plain] view plaincopyprint?
  1. # echo 0 > /dev/cma_test  
# echo 0 > /dev/cma_test
释放内存:

[plain] view plaincopyprint?
  1. # cat /dev/cma_test  
# cat /dev/cma_test