Linux 内核学习之内存寻址(二) Linux内存寻址

来源:互联网 发布:unity后期美工师招聘 编辑:程序博客网 时间:2024/05/17 06:03


Linux以非常有限的方式使用分段。2.6 版本的Linux只有x86结构才需要分段。

段 Base G Limit S Type DPL D/B P 用户代码段 0x00000000 1 0xfffff 1 10 3 1 1 用户数据段 0x00000000 1 0xfffff 1 2 3 1 1 内核代码段 0x00000000 1 0xfffff 1 10 0 1 1 内核数据段 0x00000000 1 0xfffff 1 2 0 1 1


每个CPU都有一个全局描述符表(GDT), 包含18个段描述符和14个空的。



2.6.11 版本后Linux采用四级分页(为了兼容)

  • 页全局目录 Page Global Directory
  • 页上级目录 Page Upper Directory
  • 页中间级目录 Page Middle Directory
  • 页表 Page Table


1. 32位系统无PAE,二级目录 页全局+页表+offset;
2. 32位系统带PAE,三级页全局+中间级目录+页表+offset;
3. 64位系统采用三级还是四级依赖于对线性地址的划分。
注:开启大页(4MB or 2MB)的话,页表和offset合并为offset.


一般来说,Linux内核安装在RAM中从物理地址0x0010 0000开始的地方,也就是说从第二MB开始。典型的配置所得到的内核可以安装在小于3MB的RAM中。(这么做是因为PC体系结构,RAM的前1MB留供BIOS使用)。

1. 内核创建一个有限的地址空间,包括内核的代码段和数据段、初始页表和用于存放动态数据结构的共128KB大小的空间。这个空间只够内核装入RAM和对其初始化的核心数据结构。
2. 内核充分利用剩余的RAM并适当建立分页页表。

下面代码是内核初始化页表的源代码(Linux kernel 4.0)。为了能在保护模式和实模式下都能寻址,页目录项的0 和768, 1 和769 …. 指向同一页表(未开启PAE)。

#if PTRS_PER_PMD > 1#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)#else#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)#endif/* * Number of possible pages in the lowmem region. * * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a * gas warning about overflowing shift count when gas has been compiled * with only a host target support using a 32-bit type for internal * representation. */LOWMEM_PAGES = (((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT)MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT/* Enough space to fit pagetables for the low memory linear map */MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT

可以算出 MAPPING_BEYOND_END = 0x 10 0000 // 1MB

/* * Initialize page tables.  This creates a PDE and a set of page * tables, which are located immediately beyond __brk_base.  The variable * _brk_end is set up to point to the first "safe" location. * Mappings are created both at virtual address 0 (identity mapping) * and PAGE_OFFSET for up to _end. */#ifdef CONFIG_X86_PAE    /*     * In PAE mode initial_page_table is statically defined to contain     * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3     * entries). The identity mapping is handled by pointing two PGD entries     * to the first kernel PMD.     *     * Note the upper half of each PMD or PTE are always zero at this stage.     */#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */    xorl %ebx,%ebx              /* %ebx is kept at zero */    movl $pa(__brk_base), %edi    movl $pa(initial_pg_pmd), %edx    movl $PTE_IDENT_ATTR, %eax10:    leal PDE_IDENT_ATTR(%edi),%ecx      /* Create PMD entry */    movl %ecx,(%edx)            /* Store PMD entry */                        /* Upper half already zero */    addl $8,%edx    movl $512,%ecx11:    stosl    xchgl %eax,%ebx    stosl    xchgl %eax,%ebx    addl $0x1000,%eax    loop 11b    /*     * End condition: we must map up to the end + MAPPING_BEYOND_END.     */    movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp    cmpl %ebp,%eax    jb 10b1:    addl $__PAGE_OFFSET, %edi    movl %edi, pa(_brk_end)    shrl $12, %eax    movl %eax, pa(max_pfn_mapped)    /* Do early initialization of the fixmap area */    movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax    movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8)#else   /* Not PAE */page_pde_offset = (__PAGE_OFFSET >> 20);  // 0xc0000000 >> 20 ???    movl $pa(__brk_base), %edi   //__brk_base??? /*__brk_base==PTD*/  // pg0的地址 __end    movl $pa(initial_page_table), %edx             //initial_page_table 和 __brk_base 不再一个位置    movl $PTE_IDENT_ATTR, %eax    //???10:    leal PDE_IDENT_ATTR(%edi),%ecx      /* Create PDE entry */ #define PDE_IDENT_ATTR    0x063      /* PRESENT+RW+DIRTY+ACCESSED */ 页目录项    movl %ecx,(%edx)            /* Store identity PDE entry */    movl %ecx,page_pde_offset(%edx)     /* Store kernel PDE entry */  // ?? 0xc00 / 4 = 0x300 -> 768   实模式和保护模式指向同一页表    addl $4,%edx                        // next page ??    movl $1024, %ecx               //循环 1024次 建立1024个表项11:                                 //初始化页表 1024项    stosl                          ///eax的内容放入 edi指向的物理地址 edi+=4         addl $0x1000,%eax              // 所有项都填0 /*eax: 0x1063, 0x2063, 0x3063 …, 0x3ff063*/  /* PRESENT+RW+DIRTY+ACCESSED */    loop 11b                       // 内循环填充表目录      /*     * End condition: we must map up to the end + MAPPING_BEYOND_END.     */    movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp     //这个地方不太明白。。。 1MB    cmpl %ebp,%eax       ebp >= eax jump 继续映射下个目录项    jb 10b    addl $__PAGE_OFFSET, %edi   // + 0xc000 0000 线性地址    movl %edi, pa(_brk_end)    // 存入 brk 末端    shrl $12, %eax             // 右移12位  最终页表地址--> 得到目录项    movl %eax, pa(max_pfn_mapped)  //计算最大页数 max_pfn_mapped    /* Do early initialization of the fixmap area */   //高端映射??    movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax    movl %eax,pa(initial_page_table+0xffc)                 //目录最后一项 保存 initial_pag_fixmap 表项  固定映射??#endif#endif

内核编译后的可执行代码中的地址都是虚拟地址,也就是说地址都大于0xc0000000, 目标是要内核映象在虚拟内核空间中运行。在 分页机制开启前这些地址是无效的。不能直接被送到cpu的外部地址总线上,用于直接寻址对应的物理内存。

enable_paging:/* * Enable paging */    movl $pa(initial_page_table), %eax    movl %eax,%cr3      /* set the page table pointer.. */    movl $CR0_STATE,%eax    movl %eax,%cr0      /* ..and set paging (PG) bit */    ljmp $__BOOT_CS,$1f    /* Clear prefetch and normalize %eip */1:    /* Shift the stack pointer to a virtual address */    addl $__PAGE_OFFSET, %esp
/* * BSS section    /***bss 区**/ */__PAGE_ALIGNED_BSS    .align PAGE_SIZE#ifdef CONFIG_X86_PAEinitial_pg_pmd:    .fill 1024*KPMDS,4,0#elseENTRY(initial_page_table)    .fill 1024,4,0#endifinitial_pg_fixmap:    .fill 1024,4,0ENTRY(empty_zero_page)    .fill 4096,1,0ENTRY(swapper_pg_dir)    .fill 1024,4,0
0 0