vmalloc

来源:互联网 发布:java erp 开源框架 编辑:程序博客网 时间:2024/05/21 14:05
/*************************************************************************/
mm/vmalloc.c
/**
 *    vmalloc  -  allocate virtually contiguous memory
 *    @size:        allocation size
 *    Allocate enough pages to cover @size from the page level
 *    allocator and map them into contiguous kernel virtual space.
 *
 *    For tight control over page level allocator and protection flags
 *    use __vmalloc() instead.
 */
void *vmalloc(unsigned long size)
{
    return __vmalloc_node_flags(size, -1, GFP_KERNEL | __GFP_HIGHMEM);
}

static inline void *__vmalloc_node_flags(unsigned long size,
                    int node, gfp_t flags)
{
    return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
                    node, __builtin_return_address(0));
}

/*    Allocate enough pages to cover @size from the page level
 *    allocator with @gfp_mask flags.  Map them into contiguous
 *    kernel virtual space, using a pagetable protection of @prot.
 */
static void *__vmalloc_node(unsigned long size, unsigned long align,
                gfp_t gfp_mask, pgprot_t prot,
                int node, void *caller)
{
    return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
                gfp_mask, prot, node, caller);
}

void *__vmalloc_node_range(unsigned long size, unsigned long align,
            unsigned long start, unsigned long end, gfp_t gfp_mask,
            pgprot_t prot, int node, void *caller)
{
    struct vm_struct *area;
    void *addr;
    unsigned long real_size = size;

    area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNLIST,
                  start, end, node, gfp_mask, caller);
    addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);

    /*
     * In this function, newly allocated vm_struct is not added
     * to vmlist at __get_vm_area_node(). so, it is added here.
     */
    insert_vmalloc_vmlist(area);

    /*
     * A ref_count = 3 is needed because the vm_struct and vmap_area
     * structures allocated in the __get_vm_area_node() function contain
     * references to the virtual address of the vmalloc'ed block.
     */
    kmemleak_alloc(addr, real_size, 3, gfp_mask);

    return addr;
}

该函数由两部分组成:
static struct vm_struct *__get_vm_area_node(unsigned long size,
        unsigned long align, unsigned long flags, unsigned long start,
        unsigned long end, int node, gfp_t gfp_mask, void *caller)
{
    struct vmap_area *va;
    struct vm_struct *area;
    /*分配memory for [vm_struct *area]*/
    area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
    /*
     * We always allocate a guard page.
     */
    size += PAGE_SIZE;

    va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
    
    setup_vmalloc_vm(area, va, flags, caller);
}

/*
 * Allocate a region of KVA of the specified size and alignment, within the
 * vstart and vend.
 */
static struct vmap_area *alloc_vmap_area(unsigned long size,
                unsigned long align,
                unsigned long vstart, unsigned long vend,
                int node, gfp_t gfp_mask)
{
    struct vmap_area *va;
    struct rb_node *n;
    unsigned long addr;
    int purged = 0;
    struct vmap_area *first;

    /* from the starting point, walk areas until a suitable hole is found */
    while (addr + size > first->va_start && addr + size <= vend) {
        if (addr + cached_hole_size < first->va_start)
            cached_hole_size = first->va_start - addr;
        addr = ALIGN(first->va_end, align);
        if (addr + size - 1 < addr)
            goto overflow;

        n = rb_next(&first->rb_node);
        if (n)
            first = rb_entry(n, struct vmap_area, rb_node);
        else
            goto found;
    }

found:
    if (addr + size > vend)
        goto overflow;

    va->va_start = addr;
    va->va_end = addr + size;
    va->flags = 0;
    __insert_vmap_area(va);
    free_vmap_cache = &va->rb_node;
    spin_unlock(&vmap_area_lock);
    return va;

}

/*关联vm_struct and vmap_area*/
static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
                  unsigned long flags, void *caller)
{
    vm->flags = flags;
    vm->addr = (void *)va->va_start;
    vm->size = va->va_end - va->va_start;
    vm->caller = caller;
    va->vm = vm;
    va->flags |= VM_VM_AREA;
}

static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
                 pgprot_t prot, int node, void *caller)
{
    const int order = 0;
    struct page **pages;
    unsigned int nr_pages, array_size, i;
    gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;

    nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
    array_size = (nr_pages * sizeof(struct page *));

    area->nr_pages = nr_pages;
    pages = kmalloc_node(array_size, nested_gfp, node);

    area->pages = pages;

    for (i = 0; i < area->nr_pages; i++) {
        struct page *page;
        gfp_t tmp_mask = gfp_mask | __GFP_NOWARN;

        page = alloc_page(tmp_mask);

        area->pages[i] = page;
    }
    map_vm_area(area, prot, &pages);

    return area->addr;
}

int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
{
    unsigned long addr = (unsigned long)area->addr;
    unsigned long end = addr + area->size - PAGE_SIZE;
    int err;

    err = vmap_page_range(addr, end, prot, *pages);

    return err;
}

static int vmap_page_range(unsigned long start, unsigned long end,
               pgprot_t prot, struct page **pages)
{
    int ret;

    ret = vmap_page_range_noflush(start, end, prot, pages);
    flush_cache_vmap(start, end);
    return ret;
}

/*
 * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and
 * will have pfns corresponding to the "pages" array.
 *
 * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N]
 */
static int vmap_page_range_noflush(unsigned long start, unsigned long end,
                   pgprot_t prot, struct page **pages)
{
    pgd_t *pgd;
    unsigned long next;
    unsigned long addr = start;
    int err = 0;
    int nr = 0;

    BUG_ON(addr >= end);
    pgd = pgd_offset_k(addr);
    do {
        next = pgd_addr_end(addr, end);
        err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
        if (err)
            return err;
    } while (pgd++, addr = next, addr != end);

    return nr;
}

crash> vmap_area
struct vmap_area {
    unsigned long va_start;
    unsigned long va_end;
    unsigned long flags;
    struct rb_node rb_node;
    struct list_head list;
    struct list_head purge_list;
    struct vm_struct *vm;
    struct rcu_head rcu_head;
}
SIZE: 52

crash> vm_struct
struct vm_struct {
    struct vm_struct *next;
    void *addr;
    unsigned long size;
    unsigned long flags;
    struct page **pages;
    unsigned int nr_pages;
    phys_addr_t phys_addr;
    void *caller;
}
SIZE: 32
原创粉丝点击