基于ARM平台分析Linux系统调用过程
来源:互联网 发布:测试端口是否开启 编辑:程序博客网 时间:2024/05/24 06:30
基于ARM平台分析Linux系统调用过程
Linux 3.10.44 + ARM V7 + Android:4.4.2
一 open
以open系统调用为例在bionic中的入口代码位于bionic/libc/bionic/open.c中,源代码如下:
int open(const char *pathname, int flags, ...)
{
mode_t mode = 0;
flags |= O_LARGEFILE;
if (flags & O_CREAT)
{
va_list args;
va_start(args, flags);
mode = (mode_t) va_arg(args, int);
va_end(args);
}
return __open(pathname, flags, mode);
}
仅在创建新文件时,mode参数才有作用。
二 __open
代码位于bionic/libc/arch-arm/syscalls/__open.S中,源代码如下:
ENTRY(__open)
mov ip, r7
ldr r7, =__NR_open
swi #0
mov r7, ip
cmn r0, #(MAX_ERRNO + 1)
bxls lr
neg r0, r0
b __set_errno
END(__open)
首先将r7内容保存到ip寄存器中,将系统调用号放到R7中,然后调用SWI(软中断,已更新成SVC)指令陷入内核模式,后边的处理就交给内核了。从内核返回后首先做的就是恢复R7寄存器,然后判断调用是否成功,如果失败的话设置errno。
三 vector_swi
异常处理的代码位于arch/arm/kernel/entry-armv.S,摘取如下:
__vectors_start:
W(b) vector_rst
W(b) vector_und
W(ldr) pc, __vectors_start + 0x1000
W(b) vector_pabt
W(b) vector_dabt
W(b) vector_addrexcptn
W(b) vector_irq
W(b) vector_fiq
在0x08偏移处的指令是将__vectors_start + 0x1000地址中的内容复制给PC,那么该地址是什么值呢?参考该文件下边的内容,该操作执行后会调用到vector_swi中执行。
.section .stubs, "ax", %progbits
__stubs_start:
@ This must be the first word
.word vector_swi
vector_swi源代码位于arch/arm/kernel/entry-common.S
S_FRAME_SIZE及S_PC定义在arch/arm/kernel/asm-offsets.c和arch/arm/include/uapi/asm/ptrace.h中,参考如下定义,整个frame包含18个寄存器,除了r0~r15外再增加cpsr和原r0。
#ifndef __KERNEL__
struct pt_regs {
long uregs[18];
};
#endif /* __KERNEL__ */
#define ARM_cpsr uregs[16]
#define ARM_pc uregs[15]
#define ARM_lr uregs[14]
#define ARM_sp uregs[13]
#define ARM_ip uregs[12]
#define ARM_fp uregs[11]
#define ARM_r10 uregs[10]
#define ARM_r9 uregs[9]
#define ARM_r8 uregs[8]
#define ARM_r7 uregs[7]
#define ARM_r6 uregs[6]
#define ARM_r5 uregs[5]
#define ARM_r4 uregs[4]
#define ARM_r3 uregs[3]
#define ARM_r2 uregs[2]
#define ARM_r1 uregs[1]
#define ARM_r0 uregs[0]
#define ARM_ORIG_r0 uregs[17]
以下是SWI handler去掉一些宏开关,在CONFIG_OABI_COMPAT和CONFIG_ARM_THUMB都设置为Y情况下的代码。
/*=============================================================================
* SWI handler
*-----------------------------------------------------------------------------
*/
.align 5
ENTRY(vector_swi)
//首先保存寄存器信息
sub sp, sp, #S_FRAME_SIZE
stmia sp, {r0 - r12} @ Calling r0 - r12
ARM( add r8, sp, #S_PC )
ARM( stmdb r8, {sp, lr}^ ) @ Calling sp, lr
THUMB( mov r8, sp )
THUMB( store_user_sp_lr r8, r10, S_SP ) @ calling sp, lr
mrs r8, spsr @ called from non-FIQ mode, so ok.
str lr, [sp, #S_PC] @ Save calling PC
str r8, [sp, #S_PSR] @ Save CPSR
str r0, [sp, #S_OLD_R0] @ Save OLD_R0
zero_fp//fp清零
//遗留
#ifdef CONFIG_ALIGNMENT_TRAP
ldr ip, __cr_alignment
ldr ip, [ip]
mcr p15, 0, ip, c1, c0 @ update control register
#endif
//开中断
enable_irq
//调用内核用户态跟踪的函数
ct_user_exit
//获取thread_info指针到r9寄存器
get_thread_info tsk
/*
* Get the system call number.
*/
/*
* If we have CONFIG_OABI_COMPAT then we need to look at the swi
* value to determine if it is an EABI or an old ABI call.
*/
tst r8, #PSR_T_BIT
movne r10, #0 @ no thumb OABI emulation
USER( ldreq r10, [lr, #-4] ) @ get SWI instruction
ARM_BE8(rev r10, r10) @ little endian instruction
adr tbl, sys_call_table @ load syscall table pointer
/*
* If the swi argument is zero, this is an EABI call and we do nothing.
*
* If this is an old ABI call, get the syscall number into scno and
* get the old ABI syscall table address.
*/
bics r10, r10, #0xff000000
eorne scno, r10, #__NR_OABI_SYSCALL_BASE
ldrne tbl, =sys_oabi_call_table
local_restart:
ldr r10, [tsk, #TI_FLAGS] @ check for syscall tracing
stmdb sp!, {r4, r5} @ push fifth and sixth args
tst r10, #_TIF_SYSCALL_WORK @ are we tracing syscalls?
bne __sys_trace
cmp scno, #NR_syscalls @ check upper syscall limit
//设置返回地址为ret_fast_syscall
adr lr, BSYM(ret_fast_syscall) @ return address
//如果是正常的系统调用那么执行系统调用函数
ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine
//以下处理特殊的系统调用
add r1, sp, #S_OFF
2: mov why, #0 @ no longer a real syscall
cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE)
eor r0, scno, #__NR_SYSCALL_BASE @ put OS number back
bcs arm_syscall
b sys_ni_syscall @ not private func
/*
* We failed to handle a fault trying to access the page
* containing the swi instruction, but we're not really in a
* position to return -EFAULT. Instead, return back to the
* instruction and re-enter the user fault handling path trying
* to page it in. This will likely result in sending SEGV to the
* current task.
*/
//异常处理的功能,把lr减4,那么将再次做系统调用
9001:
sub lr, lr, #4
str lr, [sp, #S_PC]
b ret_fast_syscall
ENDPROC(vector_swi)
四 关于USER宏的作用
USER( ldreq r10, [lr, #-4] ) @ get SWI instruction
#define USER(x...) \
9999: x; \
.pushsection __ex_table,"a"; \
.align 3; \
.long 9999b,9001f; \
.popsection
ldreq r10, [lr, #-4]在读取内容是存在该页尚未换入的可能,这样直接访问就可能
导致page fault。USER宏的作用是在__ex_table中增加一个异常处理的函数,表示该指
令出错后会执行9001标签后的代码。
以下是这个patch的原始说明:
ARM: 7748/1: oabi: handle faults when loading swi instruction from userspace
Running an OABI_COMPAT kernel on an SMP platform can lead to fun and
games with page aging.
If one CPU issues a swi instruction immediately before another CPU
decides to mkold the page containing the swi instruction, then we will
fault attempting to load the instruction during the vector_swi handler
in order to retrieve its immediate field. Since this fault is not
currently dealt with by our exception tables, this results in a panic:
Unable to handle kernel paging request at virtual address 4020841c
pgd = c490c000
[4020841c] *pgd=84451831, *pte=bf05859d, *ppte=00000000
Internal error: Oops: 17 [#1] PREEMPT SMP ARM
Modules linked in: hid_sony(O)
CPU: 1 Tainted: G W O (3.4.0-perf-gf496dca-01162-gcbcc62b #1)
PC is at vector_swi+0x28/0x88
LR is at 0x40208420
This patch wraps all of the swi instruction loads with the USER macro
and provides a shared exception table entry which simply rewinds the
saved user PC and returns from the system call (without setting tbl, so
there's no worries with tracing or syscall restarting). Returning to
userspace will re-enter the page fault handler, from where we will
probably send SIGSEGV to the current task.
Reported-by: Wang, Yalin <yalin.wang@s
Reviewed-by: Nicolas Pitre <nico@linaro.
Signed-off-by: Will Deacon <will.deacon@
Signed-off-by: Russell King <rmk+kernel@a
从以上说明看问题发生在一个核刚调用完swi陷入内核后,另一个核就把包含swi页换出了,
这个时候内核访问swi指令就会导致发生oops错误。增加了异常处理后,如果发生page fault,
那么首先会产生data about,在异常处理函数中通过执行9001处代码,这段代码会将lr减4再
返回到用户空间,这样用户空间会再次调用swi,此时产生缺页异常再次陷入内核,内核将该
页换入,然后再陷入内核执行系统调用。
五 ret_fast_syscall
系统调用执行完毕后就会通过调用ret_fast_syscall返回到用户空间。
.align 5
/*
* This is the fast syscall return path. We do as little as
* possible here, and this includes saving r0 back into the SVC
* stack.
*/
ret_fast_syscall:
UNWIND(.fnstart )
UNWIND(.cantunwind )
disable_irq @ disable interrupts
ldr r1, [tsk, #TI_FLAGS]
tst r1, #_TIF_WORK_MASK
//检查返回用户空间前是否需要做些其他事情,比如need rescheduled到其他进程
bne fast_work_pending
//
asm_trace_hardirqs_on
/* perform architecture specific actions before user return */
arch_ret_to_user r1, lr
ct_user_enter
//返回到用户空间
restore_user_regs fast = 1, offset = S_OFF
UNWIND(.fnend )
六 系统调用过程中的异常
因为在vector_swi中会通过zero_fp将fp设置为0,且对于内核态vector_swi是调用的起点,
所以在vector_swi中发生异常后是没有调用栈的,且提示“no frame pointer”
Unable to handle kernel paging request at virtual address 400da2dc
pgd = d91c4000
[400da2dc] *pgd=1fead831, *pte=1793c59d, *ppte=00000000
Internal error: Oops: 17 [#1] PREEMPT SMP ARM
Modules linked in:
CPU: 1 PID: 2204 Comm: ActivityManager Not tainted 3.10.24-00056-g2526063 #1
task: d9353a00 ti: d3c4a000 task.ti: d3c4a000
PC is at vector_swi+0x2c/0x58
LR is at 0x400da2e0
pc : [<c000e2ec>] lr : [<400da2e0>] psr: 600f0093
sp : d3c4bfb0 ip : 40108384 fp : 00000000
r10: 000000a4 r9 : 629e2a98 r8 : 200f0010
r7 : 00000003 r6 : ada00009 r5 : 5fbd69d4 r4 : 5b505c80
r3 : 00000000 r2 : 000000ff r1 : 5fbd69d4 r0 : 000000a4
Flags: nZCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment user
Control: 10c5387d Table: 1fdc406a DAC: 00000015
PC: 0xc000e26c:
e26c e16ff001 f57ff01f e95d7fff e1a00000 e28dd00c e1b0f00e eb0122cd e3550000
e28c 11a00004 128fe000 11a0f005 e1a096ad e1a09689 eaffffeb e320f000 e320f000
e2ac e320f000 e320f000 e320f000 e320f000 e320f000 e24dd048 e88d1fff e28d803c
e2cc e9486000 e14f8000 e58de03c e58d8040 e58d0044 e3a0b000 e3180020 13a0a000
e2ec 051ea004 e59fc0a8 e59cc000 ee01cf10 f1080080 e1a096ad e1a09689 e28f8098
e30c e3daa4ff 122a7609 159f8088 e599a000 e92d0030 e31a0c0f 1a000008 e3570f5f
e32c e24fef4d 3798f107 e28d1008 e3a08000 e357080f e2270000 2a0013df ea00eb4e
e34c e1a01007 e28d0008 eb000b1e e28fe024 e1a07000 e28d1008 e3570f5f 3891007f
SP: 0xd3c4bf30:
bf30 d3c4bf54 d3c4bf40 c0101090 c00f1b54 00000000 000000a4 c000e2ec 600f0093
bf50 ffffffff d3c4bf9c 00000000 d3c4bf68 c000dd58 c000849c 000000a4 5fbd69d4
bf70 000000ff 00000000 5b505c80 5fbd69d4 ada00009 00000003 200f0010 629e2a98
bf90 000000a4 00000000 40108384 d3c4bfb0 400da2e0 c000e2ec 600f0093 ffffffff
bfb0 000000a4 5fbd69d4 000000ff 00000000 5b505c80 5fbd69d4 ada00009 00000003
bfd0 ebc00001 629e2a98 000000a4 00000000 40108384 5fbd69b0 401f5527 400da2e0
bff0 200f0010 000000a4 ff566758 59777795 00000000 00000002 bf000000 d93b3a00
c010 c0a5161c 00000002 00000015 d93b3a00 00000004 d3c4c000 c0a469b0 c0b052d8
Process ActivityManager (pid: 2204, stack limit = 0xd3c4a238)
Stack: (0xd3c4bfb0 to 0xd3c4c000)
bfa0: 000000a4 5fbd69d4 000000ff 00000000
bfc0: 5b505c80 5fbd69d4 ada00009 00000003 ebc00001 629e2a98 000000a4 00000000
bfe0: 40108384 5fbd69b0 401f5527 400da2e0 200f0010 000000a4 ff566758 59777795
Backtrace: no frame pointer
Code: e58d0044 e3a0b000 e3180020 13a0a000 (051ea004)
---[ end trace 10d7d4bd070793====1401323113.82689
除vector_swi中fp会设置为0后,还有几个地方也会设置fp为0:
1. 不合法的异常入口
2. fiq入口
3. 在user模式进入各种异常时
这些地方的统一特点是对于内核来讲这些都是调用的起点,所以fp为0是正常的。
common_invalid:
zero_fp
ldmia r0, {r4 - r6}
add r0, sp, #S_PC @ here for interlock avoidance
mov r7, #-1 @ "" "" "" ""
str r4, [sp] @ save preserved r0
stmia r0, {r5 - r7} @ lr_<exception>,
@ cpsr_<exception>, "old_r0"
mov r0, sp
b bad_mode
ENDPROC(__und_invalid)
因为在调用内核中的API处理函数前,lr寄存器会被设置成ret_fast_syscall函数所在的位置,所以
如果这个时候内核发生异常,那么调用栈的最后是ret_fast_syscall,实际上这个是不对的,因为
调用系统函数的起点是vector_swi,这个地方只是显示问题。
Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000100
CPU: 3 PID: 1 Comm: init Tainted: G W 3.10.44-g516054c #1
Backtrace:
[<c0013410>] (dump_backtrace+0x0/0x10c) from [<c0013748>] (show_stack+0x18/0x1c)
r6:e1660000 r5:c0a1bab4 r4:c0d8dae8 r3:00000000
[<c0013730>] (show_stack+0x0/0x1c) from [<c083aac0>] (dump_stack+0x20/0x28)
[<c083aaa0>] (dump_stack+0x0/0x28) from [<c0836ac8>] (panic+0x98/0x1fc)
[<c0836a30>] (panic+0x0/0x1fc) from [<c002a10c>] (do_exit+0x7d8/0x930)
r0:c0a1bab4[2014:06:27 01:38:10][pid:1,cpu3,init]
r7:e02c9e00
[<c0029934>] (do_exit+0x0/0x930) from [<c002a3e0>] (do_group_exit+0x44/0xb8)
r7:000000f8
[<c002a39c>] (do_group_exit+0x0/0xb8) from [<c002a46c>] (__wake_up_parent+0x0/0x28)
r7:000000f8 r6:beac5d54 r5:00000000 r4:beac5c34
[<c002a454>] (SyS_exit_group+0x0/0x18) from [<c000e880>] (ret_fast_syscall+0x0/0x30)
遗留问题:
1. 以上的分析都是假设在内核中不能处理缺页异常,且如果在异常处理表中有处理表项就不会发生
oops异常,如果没有就会发生。这个未分析相关的代码。
2. asm_trace_hardirqs_on的具体作用
- 基于ARM平台分析Linux系统调用过程
- 基于arm的linux系统调用分析
- Linux内核中断处理过程分析-基于arm平台
- linux系统调用过程解析(基于ARM处理器)
- ARM Linux系统调用过程
- Arm linux 系统调用分析
- 《ARM与Linux些许问题》第四章:ARM平台系统调用原理分析
- 《ARM与Linux些许问题》第四章:ARM平台系统调用原理分析
- 基于S3C2410-ARM Linux启动过程分析
- Linux系统调用过程分析
- Linux系统调用过程分析
- Linux系统调用过程分析
- Linux系统调用过程分析
- ARM Linux系统调用详细分析
- 基于Linux系统的ARM平台QT移植
- arm linux中断向量注册分析,linu系统调用分析
- ARM Linux系统调用
- ARM Linux系统调用
- 桥接模式处理下机计算金额问题
- WDF驱动模型
- Android事件处理
- 对微信的一点研究
- 经典SQL例题
- 基于ARM平台分析Linux系统调用过程
- window.moveTo方法
- android:layout_weight的真实含义
- C专家编程学习笔记
- LeetCode-Merge Intervals
- IFeatureSelection接口
- HttpTunnel技术介绍
- 黑马程序员_异常
- LeetCode - Array - Maximum Subarray