arm linux 启动代码分析（二）

来源：互联网发布：mac checksum 编辑：程序博客网时间：2024/04/29 09:43

昨天分析了一下SEP4020 LINUX的zImage的加载引导过程，zImage其实主要就是在重定位代码，然后就是将我们的Image镜像搬运到0x30008000位置，然后向Image传递r0=0,r1=体系架构号，r2=参数列表基址，然后就跳到了Image去执行系统启动了，不过这中间在解压缩内核和搬运过程中是要开MMU和CACHE的，所以其中有相当的篇幅是开这两个玩意的，下面就代码一句一句来介绍分析吧，代码位置在/arch/arm/boot/compressed/head.S中：

.section ".start", #alloc, #execinstr

* sort out different calling conventions

.align

start:

.type start,#function

.rept 8 /*重复定义8次下面的指令，也就是空出中断向量表的位置*/

mov r0, r0 /*就是nop指令*/

.endr

b 1f

.word0x016f2818 @ 辅助引导程序的幻数

.wordstart @ 加载运行zImage的绝对地址，start表示赋的初值

.word_edata @ zImage end address zImage结尾地址，_edata是在vmlinux.lds.S中定义的，表示init,text,data三个段的结束位置（155行）

1: movr7, r1 @ save architecture ID 保存体系结构ID 用r1保存

movr8, r2 @ save atags pointer 保存r2寄存器参数列表，r0始终为0

* Booting from Angel - need to enter SVC mode and disable

* FIQs/IRQs (numeric definitions from angel arm.h source).

* We only do this if we were in user mode on entry.

mrs r2, cpsr @ get current mode

tst r2, #3 @ not user?，tst实际上是相与

bne not_angel

mov r0, #0x17 @ angel_SWIreason_EnterSVC，向SWI中传递参数

swi 0x123456 @ angel_SWI_ARM这个是让用户空间调到SVC空间，这个会从前面0x0008处重新执行

not_angel: /*表示非用户模式，可以直接关闭中断*/

mrs r2, cpsr @ turn off interrupts to

orr r2, r2, #0xc0 @ prevent angel from running关闭中断

msr cpsr_c, r2

/* 注意这里可能需要做cache刷新和其他工作 */

/* 链接的时候，这里可以插入一些体系结构相关的代码，但是应该保留r7 r8 */

/*读入地址表。因为我们的代码可以在任何地址执行，也就是位置无关代码（PIC），所以我们需要加上一个偏移量。

下面有每一个列表项的具体意义。

LC0是表的首项，它本身就是在此head.s中定义的

.type LC0, #object

LC0:

.word LC0 @ r1 LC0表的起始位置

.word __bss_start @ r2 bss段的起始地址

.word _end @ r3 zImage（bss）连接的结束地址在vmlinux.lds.S中定义

.word zreladdr @ r4 zImage的连接地址，我们在mach-sep4020/makefile.boot中定义的

.word _start @ r5 zImage的基地址，bootp/init.S中的_start函数，主要起传递参数作用

.word _got_start @ r6 GOT（全局偏移表）起始地址，_got_start是在compressed/vmlinux.lds.in中定义的

.word _got_end @ ip GOT结束地址

.word user_stack+4096 @ sp 用户栈底 user_stack是紧跟在bss段的后面的，在compressed/vmlinux.lds.in中定义的

在本head.S的末尾定义了zImag的临时栈空间，在这里分配了4K的空间用来做堆栈。

.section ".stack", "w"

user_stack:

.space 4096

GOT表的初值是连接器指定的，当时程序并不知道代码在哪个地址执行。如果当前运行的地址已经和表上的地址不一样，还要修正GOT表。*/

.text

adr r0, LC0

ldmia r0, {r1, r2, r3, r4, r5, r6, ip, sp}

@r0是运行时地址，而r1则是链接时地址，而它们两都是表示LC0表的起始位置，这样他们两的差则是运行和链接的偏移量，纠正了这个偏移量才可以运行与”地址相关的代码“

sub sr0, r0, r1 @ calculate the delta offset

@ if delta is zero, we are

beq not_relocated @ running at the address we

@ were linked at.若相等则不用重定位了。

* 偏移量不为零，说明运行在不同的地址，那么需要修正几个指针

* r5 – zImage基地址

* r6 – GOT（全局偏移表）起始地址

* ip – GOT结束地址

add r5, r5, r0 /*加上偏移量*/

add r6, r6, r0

add ip, ip, r0 /*ip即是r12*/

* If we're running fully PIC === CONFIG_ZBOOT_ROM = n,

* we need to fix up pointers into the BSS region.

* 这时需要修正BSS区域的指针，我们平台适用。

* r2 – BSS 起始地址

* r3 – BSS 结束地址

* sp – 堆栈指针

add r2, r2, r0

add r3, r3, r0

add sp, sp, r0

* 重新定位GOT表中所有的项.

1: ldr r1, [r6, #0] @ relocate entries in the GOT

add r1, r1, r0 @ table. This fixes up the

str r1, [r6], #4 @ C references.

cmp r6, ip

blo 1b

not_relocated: mov r0, #0 /*清除bss段*/

1: str r0, [r2], #4 @ clear bss

str r0, [r2], #4

cmp r2, r3

blo 1b

@ @ 正如下面的注释所说，C环境我们已经设置好了。下面我们要打开cache和mmu。为什么要这样做呢？

@ 这只是一个解压程序呀？为了速度。那为什么要开mmu呢，而且只是做一个平板式的映射？还是为了速度。

@ 如果不开mmu的话，就只能打开icache。因为不开mmu的话就无法实现内存管理，而io区是决不能开dcache的。

/* 这时C运行环境应该已经配置好了。

* 打开cache，设置一些指针，开始解压vmlinux

bl cache_on

/************************************进入cache_on函数******************************************************/

* Turn on the cache. We need to setup some page tables so that we

* can have both the I and D caches on.

* We place the page tables 16k down from the kernel execution address,

* and we hope that nothing else is using it. If we're using it, we

* will go pop!

* On entry,

* r4 = kernel execution address

* r6 = processor ID

* r7 = architecture number

* r8 = atags pointer

* r9 = run-time address of "start" (???)

* On exit,

* r1, r2, r3, r9, r10, r12 corrupted

* This routine must preserve:

* r4, r5, r6, r7, r8

.align 5

cache_on: mov r3, #8 @ cache_on function

b call_cache_fn

call_cache_fn: adr r12, proc_types

mrc p15, 0, r6, c0, c0 @ get processor ID

1: ldr r1, [r12, #0] @ get value

ldr r2, [r12, #4] @ get mask

eor r1, r1, r6 @ (real ^ match)将从c0中读出的cpu id与下面的proc_types表中的cpu系列进行比较可得到其属于哪个系列的

tst r1, r2 @ & mask

addeq pc, r12, r3 @ 如果是这个系列的cpu调用其cache打开函数

add r12, r12, #4*5

b 1b

.typeproc_types,#object

proc_types:

.word 0x41560600 @ ARM6/610

.word 0xffffffe0

b __arm6_cache_off@ works, but slow

b __arm6_cache_off

mov pc, lr

.word 0x00000000 @ old ARM ID

.word 0x0000f000

mov pc, lr

.word 0x41007000 @ ARM7/710

.word 0xfff8fe00

b __arm7_cache_off

mov pc, lr

.word 0x41807200 @ ARM720T (writethrough)

.word 0xffffff00

b __armv4_cache_on

b __armv4_cache_off

mov pc, lr

.size proc_types, . - proc_types

__armv4_cache_on:

mov r12, lr /*在后面的cache_on函数返回的时候会用到的*/

bl __setup_mmu

/************************************进入__setup_mmu函数******************************************************/

__setup_mmu: sub r3, r4, #16384 @ Page directory size（16k），r4是zImage的起始位置，再减16k即是0x30004000

bic r3, r3, #0xff @ Align the pointer

bic r3, r3, #0x3f00

* Initialise the page tables, turning on the cacheable and bufferable

* bits for the RAM area only.

* 在这里只建立了一级虚实映射，是虚实一一映射的，映射的大小为4GB

mov r0, r3

mov r9, r0, lsr #18

mov r9, r9, lsl #18 @ start of RAM，当前可用sdram的起始地址（以256k为边界）

add r10, r9, #0x10000000 @ a reasonable RAM size，这里假设的可用ram大小为256M

mov r1, #0x12 /*填充段描述符的第五位*/

orr r1, r1, #3 << 10 /*段描述符的AP为11b*/

add r2, r3, #16384 /*段描述符的空间大小为16k*/

1: cmp r1, r9 @ if virt > start of RAM只有虚空间在sdram中才是可cache和可buffer

orrhs r1, r1, #0x0c @ set cacheable, bufferable

cmp r1, r10 @ if virt > end of RAM

bichs r1, r1, #0x0c @ clear cacheable, bufferable

str r1, [r0], #4 @ 1:1 mapping

add r1, r1, #1048576 /*每次描述符的地址内容是自加0x100000*/

teq r0, r2

bne 1b

* 在这里如果我们是从flash上直接启动，我们也可以将flash这快空间映射为可cache和可buffer的，这样可以加快这段代码的运行速度

mov r1, #0x1e

orr r1, r1, #3 << 10

mov r2, pc, lsr #20

orr r1, r1, r2, lsl #20

add r0, r3, r2, lsl #2

str r1, [r0], #4

add r1, r1, #1048576

str r1, [r0]

mov pc, lr

/************************************从__setup_mmu函数返回****************************************************/

mov r0, #0

mcr p15, 0, r0, c7, c10, 4 @ 济干write buffer

mcr p15, 0, r0, c8, c7, 0 @ 失效I/D TLBs

mrc p15, 0, r0, c1, c0, 0 @ read control reg

orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement

orr r0, r0, #0x0030

bl __common_cache_on

/************************************进入__common_cache_on函数********************************************/

__common_cache_on:

mov r1, #-1

mcr p15, 0, r3, c2, c0, 0 @ load page table pointer

mcr p15, 0, r1, c3, c0, 0 @ load domain access control所有域都是可读可写

mcr p15, 0, r0, c1, c0, 0 @ load control register赋值cp15的控制寄存器，这时候开MMU，cache

mov pc, lr

/******************************从__common_cache_on函数返回***************************************************/

mov r0, #0

mcr p15, 0, r0, c8, c7, 0@ 失效I/D TLBs

mov pc, r12

/******************************从cache_on函数返回*************************************************************/

mov r1, sp @ malloc space above stack

add r2, sp, #0x10000 @ 64k max解压缩的缓冲区

0 0