linux 0.11内核源码 malloc 的实现

来源:互联网 发布:visio mac版 下载 编辑:程序博客网 时间:2024/05/21 09:07
这里分析linux 0.11内核源码中, 给内核使用的malloc函数(注意,是给内核使用的,不是给应用程序使用的,不是glibc)

直接上图:
解释:
    这个图只举例bucket_size=16的桶列表.
   第一个桶描述符中的buf中的obj已经大部分被分配出去了. 只剩一个obj
   第二个桶描述符中有两个obj被分配出去.
   第三个是个全新的桶描述符,还没有任何obj被分配出去的状态.
   一个page中能存放的obj的个数在图中没有体现出来,因为太多了,不好画图.


/*
 * malloc.c --- a general purpose kernel memory allocator for Linux.
 * 
 * Written by Theodore Ts'o (tytso@mit.edu), 11/29/91
 *
 * This routine is written to be as fast as possible, so that it
 * can be called from the interrupt level.
 *
 * Limitations: maximum size of memory we can allocate using this routine
 * is 4k, the size of a page in Linux.
 *
 * The general game plan is that each page (called a bucket) will only hold
 * objects of a given size.  When all of the object on a page are released,
 * the page can be returned to the general free pool.  When malloc() is
 * called, it looks for the smallest bucket size which will fulfill its
 * request, and allocate a piece of memory from that bucket pool.
 *
 * Each bucket has as its control block a bucket descriptor which keeps 
 * track of how many objects are in use on that page, and the free list
 * for that page.  Like the buckets themselves, bucket descriptors are
 * stored on pages requested from get_free_page().  However, unlike buckets,
 * pages devoted to bucket descriptor pages are never released back to the
 * system.  Fortunately, a system should probably only need 1 or 2 bucket
 * descriptor pages, since a page can hold 256 bucket descriptors (which
 * corresponds to 1 megabyte worth of bucket pages.)  If the kernel is using 
 * that much allocated memory, it's probably doing something wrong.  :-)
 *
 * Note: malloc() and free() both call get_free_page() and free_page()
 * in sections of code where interrupts are turned off, to allow
 * malloc() and free() to be safely called from an interrupt routine.
 * (We will probably need this functionality when networking code,
 * particularily things like NFS, is added to Linux.)  However, this
 * presumes that get_free_page() and free_page() are interrupt-level
 * safe, which they may not be once paging is added.  If this is the
 * case, we will need to modify malloc() to keep a few unused pages
 * "pre-allocated" so that it can safely draw upon those pages if
 *  it is called from an interrupt routine.
 *
 *  Another concern is that get_free_page() should not sleep; if it 
 * does, the code is carefully ordered so as to avoid any race 
 * conditions.  The catch is that if malloc() is called re-entrantly, 
 * there is a chance that unecessary pages will be grabbed from the 
 * system.  Except for the pages for the bucket descriptor page, the 
 * extra pages will eventually get released back to the system, though,
 * so it isn't all that bad.
 */


#include <linux/kernel.h>
#include <linux/mm.h>
#include <asm/system.h>

// 桶描述符. 
struct bucket_desc { /* 16 bytes */
void *page; // 记录了桶中内存的页面地址(对应page的首地址)
struct bucket_desc*next; // 记录了下一个同类型的桶描述符
void *freeptr; // 当前桶中空闲的obj的地址
unsigned shortrefcnt;// 当前桶中分配出去的obj的数量. 一个桶最多分配 PAGE_SIZE/bucket_size 个obj.
unsigned shortbucket_size; // 桶中元素的大小. 每个分配出去的obj均具有bucket_size的大小.
};

// 不同类型的桶索引
struct _bucket_dir { /* 8 bytes */
int size; // 当前索引记录桶元素bucket_size = size的桶
struct bucket_desc*chain; // 首个桶描述符
};


/*
 * The following is the where we store a pointer to the first bucket
 * descriptor for a given size.  
 *
 * If it turns out that the Linux kernel allocates a lot of objects of a
 * specific size, then we may want to add that specific size to this list,
 * since that will allow the memory to be allocated more efficiently.
 * However, since an entire page must be dedicated to each specific size
 * on this list, some amount of temperance must be exercised here.
 *
 * Note that this list *must* be kept in order.
 */
// 不同类型的桶索引数组, 全局变量.
struct _bucket_dir bucket_dir[] = {
{ 16, (struct bucket_desc *) 0},
{ 32, (struct bucket_desc *) 0},
{ 64, (struct bucket_desc *) 0},
{ 128, (struct bucket_desc *) 0},
{ 256, (struct bucket_desc *) 0},
{ 512, (struct bucket_desc *) 0},
{ 1024, (struct bucket_desc *) 0},
{ 2048, (struct bucket_desc *) 0},
{ 4096, (struct bucket_desc *) 0},
{ 0,    (struct bucket_desc *) 0}};   /* End of list marker */


/*
 * This contains a linked list of free bucket descriptor blocks
 */
// 空闲的桶描述符指针. 全局变量. 指向一个空闲的描述符链表.
struct bucket_desc *free_bucket_desc = (struct bucket_desc *) 0;

/*
 * This routine initializes a bucket description page.
 */
static inline void init_bucket_desc()
{
    struct bucket_desc *bdesc, *first;
    int    i;
    
    // 申请一页内存来用作桶描述符
    first = bdesc = (struct bucket_desc *) get_free_page();
    if (!bdesc)
        panic("Out of memory in init_bucket_desc()");
    // 将空闲描述符首尾相连
    for (i = PAGE_SIZE/sizeof(struct bucket_desc); i > 1; i--) {
        bdesc->next = bdesc+1;
        bdesc++;
    }

    /*
     * This is done last, to avoid race conditions in case 
     * get_free_page() sleeps and this routine gets called again....
     */
    // 把新申请的桶描述符挂在空闲桶描述符链表上
    bdesc->next = free_bucket_desc;
    free_bucket_desc = first;
}


void *malloc(unsigned int len)
{
    struct _bucket_dir    *bdir;
    struct bucket_desc    *bdesc;
    void            *retval;


    /*
     * First we search the bucket_dir to find the right bucket change
     * for this request.
     */
    // 根据len大小,找到一个best-fit最佳大小的桶索引
    for (bdir = bucket_dir; bdir->size; bdir++)
        if (bdir->size >= len)
            break;
    if (!bdir->size) {
        printk("malloc called with impossibly large argument (%d)\n",
            len);
        panic("malloc: bad arg");
    }
    /*
     * Now we search for a bucket descriptor which has free space
     */
    cli();    /* Avoid race conditions */
    // 找到桶索引了, 继续找该索引下挂载的一桶链表,看哪个桶中有空闲的obj
    for (bdesc = bdir->chain; bdesc; bdesc = bdesc->next) 
        if (bdesc->freeptr)
            break;
    /*
     * If we didn't find a bucket with free space, then we'll 
     * allocate a new one.
     */
    if (!bdesc) {
        char        *cp;
        int        i;

        // 从空闲桶描述符链表中摘下一个
        if (!free_bucket_desc)    
            init_bucket_desc();
        bdesc = free_bucket_desc;
        free_bucket_desc = bdesc->next;

        // 初始化桶描述符
        bdesc->refcnt = 0;
        bdesc->bucket_size = bdir->size; // 该桶中的buf的obj大小固定为bdir->size

        // 新分配一页
        // freeptr 指向第一个obj
        bdesc->page = bdesc->freeptr = (void *) cp = get_free_page();
        if (!cp)
            panic("Out of memory in kernel malloc()");
        /* Set up the chain of free objects */
        // 这里又是非常具有技巧性的代码
        for (i=PAGE_SIZE/bdir->size; i > 1; i--) {
            *((char **) cp) = cp + bdir->size; // 将当前obj的开头(cp指向的内存)指向下一个obj的地址(cp+bdir->size)
            cp += bdir->size; // cp指向下一个obj
        }
        // 最后一个obj的的开头指向0. 后面没obj了.
        *((char **) cp) = 0;

        // 把该桶描述符挂载具有相同size的桶描述符链表里.
        bdesc->next = bdir->chain; /* OK, link it in! */
        bdir->chain = bdesc;
    }
    // 找到了. 把freeptr指向的obj分配出去,
    retval = (void *) bdesc->freeptr;
    // 从空闲obj链表上取下,freeptr指向obj指向的下一个空闲obj
    bdesc->freeptr = *((void **) retval);
    bdesc->refcnt++;
    sti();    /* OK, we're safe again */
    return(retval);
}


/*
 * Here is the free routine.  If you know the size of the object that you
 * are freeing, then free_s() will use that information to speed up the
 * search for the bucket descriptor.
 * 
 * We will #define a macro so that "free(x)" is becomes "free_s(x, 0)"
 */
void free_s(void *obj, int size)
{
    void        *page;
    struct _bucket_dir    *bdir;
    struct bucket_desc    *bdesc, *prev;


    /* Calculate what page this object lives in */
    // 得到obj所在的页面起始地址
    page = (void *)  ((unsigned long) obj & 0xfffff000);

    /* Now search the buckets looking for that page */
    for (bdir = bucket_dir; bdir->size; bdir++) {
        prev = 0;
        /* If size is zero then this conditional is always false */
        if (bdir->size < size)
            continue;

        // 找到了obj的大小所在桶索引
        for (bdesc = bdir->chain; bdesc; bdesc = bdesc->next) {
            // 根据页面地址确定落在了哪个桶里
            if (bdesc->page == page) 
                goto found;
            prev = bdesc;
        }
    }
    panic("Bad address passed to kernel free_s()");
found:
    cli(); /* To avoid race conditions */
    // 此时已经找到了obj所在的桶描述符,归还obj到桶的空闲obj链表中
    // 把obj挂载在空闲链表的头部
    *((void **)obj) = bdesc->freeptr;
    bdesc->freeptr = obj;
    bdesc->refcnt--;
   
    // 如果当前桶中的page已经全部空闲. 没有分配出去的obj了. 那么释放page吧
    if (bdesc->refcnt == 0) {
        /*
         * We need to make sure that prev is still accurate.  It
         * may not be, if someone rudely interrupted us....
         */
        // 保险起见,这里有做了一次prev的检查, 如果有问题, 则重新计算prev
        // 因为prev是在关中断cli()之前就做的,所以,有可能被改变.
        // 这里在做一次检查,如果对的,那么就简单了,直接跳过.
        if ((prev && (prev->next != bdesc)) ||
            (!prev && (bdir->chain != bdesc)))
            for (prev = bdir->chain; prev; prev = prev->next)
                if (prev->next == bdesc)
                    break;
        // 把当前桶描述符从中桶链表中去掉
        if (prev)
            prev->next = bdesc->next;
        else {
            if (bdir->chain != bdesc)
                panic("malloc bucket chains corrupted");
            bdir->chain = bdesc->next;
        }
        // 释放桶申请的buf
        free_page((unsigned long) bdesc->page);
        // 把桶描述符加入到空闲桶描述符链表中.
        bdesc->next = free_bucket_desc;
        free_bucket_desc = bdesc;
    }
    sti();
    return;
}
原创粉丝点击