android 抓取crash信息流程简介
来源:互联网 发布:牡丹江管理局电视网络 编辑:程序博客网 时间:2024/05/21 22:32
通过上一篇博客我们知道,在linker 完成自身重定位,在对可执行程序进行重定位的过程中,会初始化debuggerd,也就是注册异常处理函数,在程序发生异常的时候抓取异常信息。
4185 /*4186 * This code is called after the linker has linked itself and4187 * fixed it's own GOT. It is safe to make references to externs4188 * and other non-local data at this point.4189 */4190 static ElfW(Addr) __linker_init_post_relocation(KernelArgumentBlock& args, ElfW(Addr) linker_base) {...4202 debuggerd_init(); //进行debuggerd 的初始化...4389 TRACE("[ Ready to execute \"%s\" @ %p ]", si->get_realpath(), reinterpret_cast<void*>(si->entry));4390 return si->entry;4391 }
追踪debuggerd_init() 这个函数
bionic/linker/debugger.cpp
__LIBC_HIDDEN__ void debuggerd_init() {/* * bionic/libc/kernel/uapi/asm-generic/signal.h * struct sigaction { * __sighandler_t sa_handler; //信号对应的处理函数 * unsigned long sa_flags; * #ifdef SA_RESTORER * __sigrestore_t sa_restorer; //处理完成之后的返回函数,一般不设置,kernel会在设置 * #endif * sigset_t sa_mask; * };*/303 struct sigaction action;304 memset(&action, 0, sizeof(action));305 sigemptyset(&action.sa_mask);306 action.sa_sigaction = debuggerd_signal_handler; // debuggerd_signal_handler 就是处理函数307 action.sa_flags = SA_RESTART | SA_SIGINFO;308 309 // Use the alternate signal stack if available so we can catch stack overflows.310 action.sa_flags |= SA_ONSTACK; //使用独立的栈空间311 312 sigaction(SIGABRT, &action, nullptr);313 sigaction(SIGBUS, &action, nullptr);314 sigaction(SIGFPE, &action, nullptr);315 sigaction(SIGILL, &action, nullptr);316 sigaction(SIGSEGV, &action, nullptr);317 #if defined(SIGSTKFLT)318 sigaction(SIGSTKFLT, &action, nullptr);319 #endif320 sigaction(SIGTRAP, &action, nullptr);321 }
debuggerd_signal_handler 就是程序收到SIGABRT,SIGBUS,SIGFPE,SIGILL,SIGSEGV等 这几个信号时,会调用的处理函数。
bionic/libc/bionic/sigaction.cpp
36 extern "C" int __rt_sigaction(int, const struct __kernel_sigaction*, struct __kernel_sigaction*, size_t);37 38 int sigaction(int signal, const struct sigaction* bionic_new_action, struct sigaction* bionic_old_action) {39 __kernel_sigaction kernel_new_action;40 if (bionic_new_action != NULL) {41 kernel_new_action.sa_flags = bionic_new_action->sa_flags;42 kernel_new_action.sa_handler = bionic_new_action->sa_handler;43 kernel_new_action.sa_mask = bionic_new_action->sa_mask;44 #if defined(SA_RESTORER)45 kernel_new_action.sa_restorer = bionic_new_action->sa_restorer;46 #if defined(__aarch64__)47 // arm64 has sa_restorer, but unwinding works best if you just let the48 // kernel supply the default restorer from [vdso]. gdb doesn't care, but49 // libgcc needs the nop that the kernel includes before the actual code.50 // (We could add that ourselves, but why bother?)51 #else52 if (!(kernel_new_action.sa_flags & SA_RESTORER)) {53 kernel_new_action.sa_flags |= SA_RESTORER;54 kernel_new_action.sa_restorer = &__restore_rt; //用户空间处理函数执行完后返回内核空间的函数55 }56 #endif57 #endif58 }59 60 __kernel_sigaction kernel_old_action;61 int result = __rt_sigaction(signal,62 (bionic_new_action != NULL) ? &kernel_new_action : NULL,63 (bionic_old_action != NULL) ? &kernel_old_action : NULL,64 sizeof(sigset_t));65 66 if (bionic_old_action != NULL) {67 bionic_old_action->sa_flags = kernel_old_action.sa_flags;68 bionic_old_action->sa_handler = kernel_old_action.sa_handler;69 bionic_old_action->sa_mask = kernel_old_action.sa_mask;70 #if defined(SA_RESTORER)71 bionic_old_action->sa_restorer = kernel_old_action.sa_restorer;72 #endif73 }74 75 return result;76 }
这个函数中把传入的参数sigaction 转成__kernel_sigaction类型,这两个结构体其实是一样的,然后调用__rt_sigaction()注册。
bionic/libc/arch-arm64/syscalls/__rt_sigaction.S
3 #include <private/bionic_asm.h>4 5 ENTRY(__rt_sigaction)6 mov x8, __NR_rt_sigaction7 svc #0 //系统调用8 9 cmn x0, #(MAX_ERRNO + 1)10 cneg x0, x0, hi11 b.hi __set_errno_internal12 13 ret14 END(__rt_sigaction)15 .hidden __rt_sigaction__rt_sigaction()是一个系统调用,kernel 中对应的处理函数是do_sigaction(),系统调用的过程在fork() 对应的博客中有详细分析,这里不再分析,所以我们理所当然地认为调用了__rt_sigaction()函数后,就会跑到kernel中的do_sigaction()。
linux-4.10/kernel/signal.c
3065 int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)3066 {/* * linux-4.10/include/linux/signal.h * struct sigaction { // sigaction 在kernel 中的定义 * #ifndef __ARCH_HAS_IRIX_SIGACTION * __sighandler_tsa_handler; * unsigned longsa_flags; * #else * unsigned intsa_flags; * __sighandler_tsa_handler; * #endif * #ifdef __ARCH_HAS_SA_RESTORER * __sigrestore_t sa_restorer; * #endif * sigset_tsa_mask;/* mask last for extensibility */ * }; * * struct k_sigaction { // k_sigaction在kernel 中的定义 * struct sigaction sa; //相当于把sigaction 的成员搬到这里 * #ifdef __ARCH_HAS_KA_RESTORER * __sigrestore_t ka_restorer; * #endif * };*/3067 struct task_struct *p = current, *t; //p 当前进程的task_struct 结构体,也就是PCB3068 struct k_sigaction *k;3069 sigset_t mask;3070 3071 if (!valid_signal(sig) || sig < 1 || (act && sig_kernel_only(sig)))3072 return -EINVAL;3073 /* * linux-4.10/include/linux/sched.h * struct task_struct { * .../* signal handlers * struct signal_struct *signal; * struct sighand_struct *sighand; //保存信号相关的处理函数 * * sigset_t blocked, real_blocked; * sigset_t saved_sigmask;// restored if set_restore_sigmask() was used * struct sigpending pending; * * unsigned long sas_ss_sp; //信号处理函数独立的堆栈 * size_t sas_ss_size; //堆栈的大小 * unsigned sas_ss_flags; //相关标志位 * ... * } * * struct sighand_struct { * atomic_tcount; * struct k_sigactionaction[_NSIG]; #define _NSIG64 * spinlock_tsiglock; * wait_queue_head_tsignalfd_wqh; * };*/3074 k = &p->sighand->action[sig-1]; // struct k_sigaction指针,可以认为是对应信号的处理函数3075 3076 spin_lock_irq(&p->sighand->siglock);3077 if (oact)3078 *oact = *k; //指向之前注册的处理函数3079 3080 sigaction_compat_abi(act, oact);3081 3082 if (act) {3083 sigdelsetmask(&act->sa.sa_mask,3084 sigmask(SIGKILL) | sigmask(SIGSTOP));3085 *k = *act; //将user space 也就是应用程序中的handle 函数保存(注册进来)...3097 if (sig_handler_ignored(sig_handler(p, sig), sig)) { //handler 为 0或者 1的会被特殊处理3098 sigemptyset(&mask);3099 sigaddset(&mask, sig);3100 flush_sigqueue_mask(&mask, &p->signal->shared_pending);3101 for_each_thread(p, t)3102 flush_sigqueue_mask(&mask, &t->pending);3103 }3104 }3105 3106 spin_unlock_irq(&p->sighand->siglock);3107 return 0;3108 }
56 static void __user *sig_handler(struct task_struct *t, int sig) //返回之前的handle指针57 {58 return t->sighand->action[sig - 1].sa.sa_handler;59 }6061 static int sig_handler_ignored(void __user *handler, int sig)62 {63 /* Is it explicitly or implicitly ignored? */64 return handler == SIG_IGN || // #define SIG_IGN((__force __sighandler_t)1)/* ignore signal */65 (handler == SIG_DFL && sig_kernel_ignore(sig)); //#define SIG_DFL((__force __sighandler_t)0)/* default signal handling */66 }
经过上面的流程,程序中SIGABRT,SIGBUS,SIGFPE,SIGILL,SIGSEGV等信号对应的处理函数就注册完成了。接下来我们分析一下程序出现异常时,信号被处理的流程。
linux-4.10/arch/arm64/kernel/entry.S
317 ENTRY(vectors)…327 328 ventryel0_sync// Synchronous 64-bit EL0 //用户空间访问非法地址后走这里329 ventryel0_irq// IRQ 64-bit EL0330 ventryel0_fiq_invalid// FIQ 64-bit EL0331 ventryel0_error_invalid// Error 64-bit EL0…344 END(vectors)
当程序在运行过程中访问到非法地址,比如空指针,或者未映射的地址,就会被处理器捕获到异常,走到对应的异常处理函数。怎么走到异常处理函数,其实很好理解,处理器捕获到异常后会跳到一个约定好的地址,kernel在初始化的时候往这些地址写对应的处理函数地址,这样就能走到处理函数中了。
512 * EL0 mode handlers.513 */514 .align6515 el0_sync:516 kernel_entry 0 //保存用户空间的寄存器517 mrsx25, esr_el1// read the syndrome register518 lsrx24, x25, #ESR_ELx_EC_SHIFT// exception class519 cmpx24, #ESR_ELx_EC_SVC64// SVC in 64-bit state520 b.eqel0_svc521 cmpx24, #ESR_ELx_EC_DABT_LOW// data abort in EL0 //走这条flow522 b.eqel0_da523 cmpx24, #ESR_ELx_EC_IABT_LOW// instruction abort in EL0524 b.eqel0_ia
发生异常时,处理器会把一些信息保存到对应的寄存器,具体是哪个寄存器,会保存什么样的信息,这里不详细介绍,异常处理函数根据寄存器中的信息,再跳到更加细化的处理函数中,程序访问非法地址,会走el0_da。
589 el0_da:590 /*591 * Data abort handling592 */593 mrsx26, far_el1594 // enable interrupts before calling the main handler595 enable_dbg_and_irq596 ct_user_exit597 bicx0, x26, #(0xff << 56) // x0 = x26 & 0x0fffffff598 movx1, x25 // x1 = x25599 movx2, sp // x2 = sp600 bldo_mem_abort //x0 , x1 , x2作为do_mem_abort() 的三个参数601 bret_to_user
linux-4.10/arch/arm64/mm/fault.c
568 /*569 * Dispatch a data abort to the relevant handler.570 */571 asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,572 struct pt_regs *regs)573 {574 const struct fault_info *inf = fault_info + (esr & 63);575 struct siginfo info;576 577 if (!inf->fn(addr, esr, regs)) //如果是缺页异常,尝试修复,成功就直接返回578 return;579 580 pr_alert("Unhandled fault: %s (0x%08x) at 0x%016lx\n",581 inf->name, esr, addr);582 583 info.si_signo = inf->sig;584 info.si_errno = 0;585 info.si_code = inf->code;586 info.si_addr = (void __user *)addr;587 arm64_notify_die("", regs, &info, esr); 588 }
进入到do_mem_abort ()后,会先判断是不是缺页导致,如果不是,的确是访问了非法地址会走arm64_notify_die()。
linux-4.10/arch/arm64/kernel/traps.c
298 void arm64_notify_die(const char *str, struct pt_regs *regs,299 struct siginfo *info, int err)300 {301 if (user_mode(regs)) { //如果是用户空间的进程执行导致的异常302 current->thread.fault_address = 0;303 current->thread.fault_code = err;304 force_sig_info(info->si_signo, info, current); 305 } else { //否则是kernel 里面的异常,走这里,最后会走到panic306 die(str, regs, err);307 }308 }
如果是用户空间的程序访问了非法地址,会调用force_sig_info()发送信号给对应程序。
linux-4.10/kernel/signal.c
1165 int1166 force_sig_info(int sig, struct siginfo *info, struct task_struct *t)1167 {1168 unsigned long int flags;1169 int ret, blocked, ignored;1170 struct k_sigaction *action;1171 1172 spin_lock_irqsave(&t->sighand->siglock, flags);1173 action = &t->sighand->action[sig-1]; //保存在task_struct 里面 k_sigaction1174 ignored = action->sa.sa_handler == SIG_IGN;1175 blocked = sigismember(&t->blocked, sig); //测试参数sig 代表的信号是否已加入至参数set信号集里. 如果信号集里已有该信号则返回1,否则返回0。1176 if (blocked || ignored) {1177 action->sa.sa_handler = SIG_DFL;1178 if (blocked) {1179 sigdelset(&t->blocked, sig);1180 recalc_sigpending_and_wake(t);1181 }1182 }1183 if (action->sa.sa_handler == SIG_DFL)1184 t->signal->flags &= ~SIGNAL_UNKILLABLE;1185 ret = specific_send_sig_info(sig, info, t);1186 spin_unlock_irqrestore(&t->sighand->siglock, flags);1187 1188 return ret;1189 }
接着走到specific_send_sig_info()
1134 static int1135 specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)1136 {1137 return send_signal(sig, info, t, 0);1138 }
specific_send_sig_info() 接着调用到send_signal()
1082 static int send_signal(int sig, struct siginfo *info, struct task_struct *t,1083 int group)1084 {1085 int from_ancestor_ns = 0;1086 1087 #ifdef CONFIG_PID_NS1088 from_ancestor_ns = si_fromuser(info) &&1089 !task_pid_nr_ns(current, task_active_pid_ns(t));1090 #endif1091 1092 return __send_signal(sig, info, t, group, from_ancestor_ns);1093 }
send_signal() 也没有太多的处理逻辑,继续调用到__send_signal(),传入的group 参数的值是0。
978 static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,979 int group, int from_ancestor_ns)980 {...989 if (!prepare_signal(sig, t, //对特殊信号做相应处理990 from_ancestor_ns || (info == SEND_SIG_FORCED)))991 goto ret;992 993 pending = group ? &t->signal->shared_pending : &t->pending; //这时group 为0,所以信号是发给这个进程,而不是进程里面的所有线程...1073 out_set:1074 signalfd_notify(t, sig); // Deliver the signal to listening signalfd1075 sigaddset(&pending->signal, sig);1076 complete_signal(sig, t, group); //继续往下走到这里1077 ret:1078 trace_signal_generate(sig, info, t, group, result);1079 return ret;1080 }
__send_signal()里面做了各种各样的check,因为我们这里主要是熟悉这个流程,对于里面的具体细节,不做过多的介绍,需要了解的同学可以自己看源码。
876 static void complete_signal(int sig, struct task_struct *p, int group)877 {878 struct signal_struct *signal = p->signal;879 struct task_struct *t;880 881 /*882 * Now find a thread we can wake up to take the signal off the queue.883 *884 * If the main thread wants the signal, it gets first crack.885 * Probably the least surprising to the average bear.886 */887 if (wants_signal(sig, p))888 t = p;889 else if (!group || thread_group_empty(p))890 /*891 * There is just one thread and it does not need to be woken.892 * It will dequeue unblocked signals before it runs again.893 */894 return;895 else {896 /*897 * Otherwise try to find a suitable thread.898 */899 t = signal->curr_target;900 while (!wants_signal(sig, t)) {901 t = next_thread(t);902 if (t == signal->curr_target)903 /*904 * No thread needs to be woken.905 * Any eligible threads will see906 * the signal in the queue soon.907 */908 return;909 }910 signal->curr_target = t;911 }912 913 /*914 * Found a killable thread. If the signal will be fatal,915 * then start taking the whole group down immediately.916 */917 if (sig_fatal(p, sig) &&918 !(signal->flags & (SIGNAL_UNKILLABLE | SIGNAL_GROUP_EXIT)) &&919 !sigismember(&t->real_blocked, sig) &&920 (sig == SIGKILL || !t->ptrace)) {921 /*922 * This signal will be fatal to the whole group.923 */924 if (!sig_kernel_coredump(sig)) {925 /*926 * Start a group exit and wake everybody up.927 * This way we don't have other threads928 * running and doing things after a slower929 * thread has the fatal signal pending.930 */931 signal->flags = SIGNAL_GROUP_EXIT;932 signal->group_exit_code = sig;933 signal->group_stop_count = 0;934 t = p;935 do {936 task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);937 sigaddset(&t->pending.signal, SIGKILL);938 signal_wake_up(t, 1);939 } while_each_thread(p, t);940 return;941 }942 }943 944 /*945 * The signal is already in the shared-pending queue.946 * Tell the chosen thread to wake up and dequeue it.947 */948 signal_wake_up(t, sig == SIGKILL);949 return;950 }
complete_signal() 对信号在进行了进一步处理,最后调用到signal_wake_up()
linux-4.10/include/uapi/asm-generic/signal.h
10 #define SIGHUP 111 #define SIGINT 212 #define SIGQUIT 313 #define SIGILL 4 //执行了非法指令. 通常是因为可执行文件本身出现错误, 或者试图执行数据段. 堆栈溢出时也有可能产生这个信号。14 #define SIGTRAP 515 #define SIGABRT 616 #define SIGIOT 6 //调用abort函数生成的信号17 #define SIGBUS 718 #define SIGFPE 819 #define SIGKILL 9 //用来立即结束程序的运行. 本信号不能被阻塞、处理和忽略。20 #define SIGUSR11021 #define SIGSEGV11 //试图访问未分配给自己的内存, 或试图往没有写权限的内存地址写数据.22 #define SIGUSR21223 #define SIGPIPE13 //管道破裂。这个信号通常在进程间通信产生24 #define SIGALRM1425 #define SIGTERM1526 #define SIGSTKFLT 1627 #define SIGCHLD1728 #define SIGCONT18 //让一个停止(stopped)的进程继续执行. 本信号不能被阻塞.29 #define SIGSTOP1930 #define SIGTSTP20
上面给出了一些信号对应的值,在实际中,遇到最多的情况就是SIGSEGV,也就是访问了非法地址,应该90%以上是这种情况。
linux-4.10/include/linux/sched.h
3520 static inline void signal_wake_up(struct task_struct *t, bool resume)3521 {3522 signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0);3523 }
现在考虑的是SIGSEGV 信号的情况,所以上面传下来的resume 为0,调用到signal_wake_up_state() 传入的第二个参数也是0
linux-4.10/kernel/signal.c
645 void signal_wake_up_state(struct task_struct *t, unsigned int state)646 {647 set_tsk_thread_flag(t, TIF_SIGPENDING);648 /*649 * TASK_WAKEKILL also means wake it up in the stopped/traced/killable650 * case. We don't check t->state here because there is a race with it651 * executing another processor and just now entering stopped state.652 * By using wake_up_state, we ensure the process will wake up and653 * handle its death signal.654 */655 if (!wake_up_state(t, state | TASK_INTERRUPTIBLE)) 656 kick_process(t);657 }
走了一圈,有回到signal.c 这个文件里面的代码,set_tsk_thread_flag()会把thread_info.flags,设置成TIF_SIGPENDING,wake_up_state()和kick_process()就不深入了解了。
linux-4.10/arch/arm64/kernel/entry.S
589 el0_da:590 /*591 * Data abort handling592 */593 mrsx26, far_el1594 // enable interrupts before calling the main handler595 enable_dbg_and_irq596 ct_user_exit597 bicx0, x26, #(0xff << 56)598 movx1, x25599 movx2, sp//bl (表示带返回值的跳转) 带链接的跳转。 首先将当前指令的下一条指令地址保存在LR寄存器,然后跳转的lable//b 表示无条件跳转600 bldo_mem_abort //do_mem_abort在kernel 走了一圈,返回了601 bret_to_user //
重新回到entry.S 的el0_da 代码块,do_mem_abort跑完了之后,会继续往下,跑到ret_to_user
/*770 * "slow" syscall return path.771 */772 ret_to_user:773 disable_irq// disable interrupts774 ldrx1, [tsk, #TSK_TI_FLAGS] //tsk.reqx28 current thread_info x1 = thread_info.flags775 andx2, x1, #_TIF_WORK_MASK // x2 = x1 & _TIF_WORK_MASK 也就是取出thread_info.flags 的标志位776 cbnzx2, work_pending // 如果x2 != 0 跳到 work_pending 777 finish_ret_to_user:778 enable_step_tsk x1, x2779 kernel_exit 0780 ENDPROC(ret_to_user)
前面说了set_tsk_thread_flag()会把thread_info.flags设置成TIF_SIGPENDING(增加这个flag),所以这里会走到work_pending。
758 /*759 * Ok, we need to do extra processing, enter the slow path.760 */761 work_pending:762 movx0, sp// 'regs'763 bldo_notify_resume764 #ifdef CONFIG_TRACE_IRQFLAGS765 bltrace_hardirqs_on// enabled while in userspace766 #endif767 ldrx1, [tsk, #TSK_TI_FLAGS]// re-check for single-step768 bfinish_ret_to_user
继续往下会调用到do_notify_resume()函数
linux-4.10/arch/arm64/kernel/signal.c
402 asmlinkage void do_notify_resume(struct pt_regs *regs,403 unsigned int thread_flags)404 {405 /*406 * The assembly code enters us with IRQs off, but it hasn't407 * informed the tracing code of that for efficiency reasons.408 * Update the trace code with the current status.409 */410 trace_hardirqs_off();411 do {412 if (thread_flags & _TIF_NEED_RESCHED) {413 schedule();414 } else {415 local_irq_enable();416 417 if (thread_flags & _TIF_UPROBE)418 uprobe_notify_resume(regs);419 420 if (thread_flags & _TIF_SIGPENDING) //走的是这里421 do_signal(regs);422 423 if (thread_flags & _TIF_NOTIFY_RESUME) {424 clear_thread_flag(TIF_NOTIFY_RESUME);425 tracehook_notify_resume(regs);426 }427 428 if (thread_flags & _TIF_FOREIGN_FPSTATE)429 fpsimd_restore_current_state();430 }431 432 local_irq_disable();433 thread_flags = READ_ONCE(current_thread_info()->flags);434 } while (thread_flags & _TIF_WORK_MASK);435 }
通过前面的了解,我们知道thread_flags & _TIF_SIGPENDING 这个条件是成立的,所以继续走到do_signal(regs);
static void do_signal(struct pt_regs *regs)332 {...366 /*367 * Get the signal to deliver. When running under ptrace, at this point368 * the debugger may change all of our registers.369 */370 if (get_signal(&ksig)) {...385 handle_signal(&ksig, regs);386 return;387 }...399 restore_saved_sigmask();400 }
接着会调用handle_signal() 函数
285 /*286 * OK, we're invoking a handler287 */288 static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)289 {290 struct task_struct *tsk = current;291 sigset_t *oldset = sigmask_to_save();292 int usig = ksig->sig;293 int ret;294 295 /*296 * Set up the stack frame297 */298 if (is_compat_task()) {299 if (ksig->ka.sa.sa_flags & SA_SIGINFO)300 ret = compat_setup_rt_frame(usig, ksig, oldset, regs);301 else302 ret = compat_setup_frame(usig, ksig, oldset, regs);303 } else {304 ret = setup_rt_frame(usig, ksig, oldset, regs); //走这里305 }306 307 /*308 * Check that the resulting registers are actually sane.309 */310 ret |= !valid_user_regs(®s->user_regs, current);311 312 /*313 * Fast forward the stepping logic so we step into the signal314 * handler.315 */316 if (!ret)317 user_fastforward_single_step(tsk);318 319 signal_setup_done(ret, ksig, 0);320 }
我们重点看setup_rt_frame()做了什么。
250 static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,251 struct pt_regs *regs)252 {253 struct rt_sigframe __user *frame;254 int err = 0;255 256 frame = get_sigframe(ksig, regs); //获取用户空间处理信号的栈257 if (!frame)258 return 1;259 260 __put_user_error(0, &frame->uc.uc_flags, err);261 __put_user_error(NULL, &frame->uc.uc_link, err);262 263 err |= __save_altstack(&frame->uc.uc_stack, regs->sp);264 err |= setup_sigframe(frame, regs, set);265 if (err == 0) {266 setup_return(regs, &ksig->ka, frame, usig);267 if (ksig->ka.sa.sa_flags & SA_SIGINFO) {268 err |= copy_siginfo_to_user(&frame->info, &ksig->info);269 regs->regs[1] = (unsigned long)&frame->info;270 regs->regs[2] = (unsigned long)&frame->uc;271 }272 }273 274 return err;275 }setup_rt_frame()会用户空间执行信号处理函数准备好栈和相关参数。
232 static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,233 void __user *frame, int usig)234 {235 __sigrestore_t sigtramp;236 237 regs->regs[0] = usig;238 regs->sp = (unsigned long)frame;239 regs->regs[29] = regs->sp + offsetof(struct rt_sigframe, fp);240 regs->pc = (unsigned long)ka->sa.sa_handler; //之前注册的用户空间处理函数241 242 if (ka->sa.sa_flags & SA_RESTORER)243 sigtramp = ka->sa.sa_restorer;244 else245 sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp); 246 247 regs->regs[30] = (unsigned long)sigtramp; //用户空间的处理函数执行完后,会调用这个函数再返回内核空间248 }
regs->pc 设置成之前用户空间传下来的处理函数,所以返回用户空间时会执行处理函数,regs->regs[30]设置里用户空间执行完处理函数后,通过这个函数再返回内核空间。
linux-4.10/arch/arm64/kernel/vdso/vdso.lds.S
/*96 * Make the sigreturn code visible to the kernel.97 */98 VDSO_sigtramp= __kernel_rt_sigreturn;
linux-4.10/arch/arm64/kernel/vdso/sigreturn.S
28 ENTRY(__kernel_rt_sigreturn)29 .cfi_startproc30 .cfi_signal_frame31 .cfi_def_cfax29, 032 .cfi_offsetx29, 0 * 833 .cfi_offsetx30, 1 * 834 movx8, #__NR_rt_sigreturn35 svc#036 .cfi_endproc37 ENDPROC(__kernel_rt_sigreturn)所以上面的sigtramp 里面插入了一个系统调用__NR_rt_sigreturn,也就是信号处理后返回。
bionic/linker/debugger.cpp
302 __LIBC_HIDDEN__ void debuggerd_init() {...306 action.sa_sigaction = debuggerd_signal_handler;...321 }
再会到debuggerd_init()中,kernel 将信号发生出来后,经常许多环节的处理,会返回用户空间,调用debuggerd_signal_handler()这个处理函数。
262 static void debuggerd_signal_handler(int signal_number, siginfo_t* info, void*) {...271 send_debuggerd_packet(info);...294 int rc = syscall(SYS_rt_tgsigqueueinfo, getpid(), gettid(), signal_number, info);295 if (rc != 0) {296 __libc_format_log(ANDROID_LOG_FATAL, "libc", "failed to resend signal during crash: %s",297 strerror(errno));298 _exit(0);299 }300 }这里会调用send_debuggerd_packet() 向debuggerd进程发生信息。
208 static void send_debuggerd_packet(siginfo_t* info) {...226 int s = socket_abstract_client(DEBUGGER_SOCKET_NAME, SOCK_STREAM | SOCK_CLOEXEC); //创建socket连接227 if (s == -1) {228 __libc_format_log(ANDROID_LOG_FATAL, "libc", "Unable to open connection to debuggerd: %s",229 strerror(errno));230 return;231 }232 233 // debuggerd knows our pid from the credentials on the234 // local socket but we need to tell it the tid of the crashing thread.235 // debuggerd will be paranoid and verify that we sent a tid236 // that's actually in our process.237 debugger_msg_t msg;238 msg.action = DEBUGGER_ACTION_CRASH;239 msg.tid = gettid(); //消息中有tid,也就是出现异常的线程号240 msg.abort_msg_address = reinterpret_cast<uintptr_t>(g_abort_message);241 msg.original_si_code = (info != nullptr) ? info->si_code : 0;242 ret = TEMP_FAILURE_RETRY(write(s, &msg, sizeof(msg))); //将消息通过socket 发送给debuggerd...255 close(s);256 }
接着debuggerd 会收到发送过来的消息
system/core/debuggerd/debuggerd.cpp
921 int main(int argc, char** argv) {922 union selinux_callback cb;923 if (argc == 1) {924 cb.func_audit = audit_callback;925 selinux_set_callback(SELINUX_CB_AUDIT, cb);926 cb.func_log = selinux_log_callback;927 selinux_set_callback(SELINUX_CB_LOG, cb);928 return do_server();929 }930 931 bool dump_backtrace = false;932 bool have_tid = false;933 pid_t tid = 0;934 for (int i = 1; i < argc; i++) {935 if (!strcmp(argv[i], "-b")) {936 dump_backtrace = true;937 } else if (!have_tid) {938 tid = atoi(argv[i]);939 have_tid = true;940 } else {941 usage();942 return 1;943 }944 }945 if (!have_tid) {946 usage();947 return 1;948 }949 return do_explicit_dump(tid, dump_backtrace);950 }
debuggerd进程运行起来之后,会根据参数走不同的流程,如果是默认的走do_server(),也就是创建一个socket server,等待client端连接,处理相应的请求。另一种情况就是我们手动调用它dump 某个进程的backtrace,前面有讲过,adb shell debuggerd -b pid .
842 static int do_server() {843 // debuggerd crashes can't be reported to debuggerd.844 // Reset all of the crash handlers.845 signal(SIGABRT, SIG_DFL);846 signal(SIGBUS, SIG_DFL);847 signal(SIGFPE, SIG_DFL);848 signal(SIGILL, SIG_DFL);849 signal(SIGSEGV, SIG_DFL);850 #ifdef SIGSTKFLT851 signal(SIGSTKFLT, SIG_DFL);852 #endif853 signal(SIGTRAP, SIG_DFL);854 855 // Ignore failed writes to closed sockets856 signal(SIGPIPE, SIG_IGN);857 858 // Block SIGCHLD so we can sigtimedwait for it.859 sigset_t sigchld;860 sigemptyset(&sigchld);861 sigaddset(&sigchld, SIGCHLD);862 sigprocmask(SIG_SETMASK, &sigchld, nullptr);863 864 int s = socket_local_server(SOCKET_NAME, ANDROID_SOCKET_NAMESPACE_ABSTRACT, //创建socket ,充当server端865 SOCK_STREAM | SOCK_CLOEXEC);866 if (s == -1) return 1;867 868 // Fork a process that stays root, and listens on a pipe to pause and resume the target.869 if (!start_signal_sender()) {870 ALOGE("debuggerd: failed to fork signal sender");871 return 1;872 }873 874 ALOGI("debuggerd: starting\n");875 876 for (;;) { //循环等待877 sockaddr_storage ss;878 sockaddr* addrp = reinterpret_cast<sockaddr*>(&ss);879 socklen_t alen = sizeof(ss);880 881 ALOGV("waiting for connection\n");882 int fd = accept4(s, addrp, &alen, SOCK_CLOEXEC); //等待client端来连接883 if (fd == -1) {884 ALOGE("accept failed: %s\n", strerror(errno));885 continue;886 }887 888 handle_request(fd); //处理client端的请求889 }890 return 0;891 }
do_server() 会把debuggerd 自身crash 时的信号屏蔽掉,然后创建socket,充当server,调用accept()等待client端来连接,收到连接后,调用handle_request(fd)处理。
801 static void handle_request(int fd) {802 ALOGV("handle_request(%d)\n", fd);803 804 ScopedFd closer(fd);805 debugger_request_t request;806 memset(&request, 0, sizeof(request));807 int status = read_request(fd, &request); //读取client端发生过来的消息808 if (status != 0) {809 return;810 }...831 // Fork a child to handle the rest of the request.832 pid_t fork_pid = fork();833 if (fork_pid == -1) {834 ALOGE("debuggerd: failed to fork: %s\n", strerror(errno));835 } else if (fork_pid == 0) {836 worker_process(fd, request); //fork 出子进程来处理837 } else {838 monitor_worker_process(fork_pid, request);839 }840 }
handle_request()会把client 端,也就是信号处理函数中发生过来的信息读出来,然后创建出一个子进程继续处理。
565 static void worker_process(int fd, debugger_request_t& request) {...598 // Attach to the target process.599 if (!ptrace_attach_thread(request.pid, request.tid)) { //ptrace 进程600 ALOGE("debuggerd: ptrace attach failed: %s", strerror(errno));601 exit(1);602 }...608 if (request.action == DEBUGGER_ACTION_CRASH) { //通过前面的代码我们知道action是 DEBUGGER_ACTION_CRASH609 pid_t pid;610 uid_t uid;611 gid_t gid;612 if (get_process_info(request.tid, &pid, &uid, &gid) != 0) {613 ALOGE("debuggerd: failed to get process info for tid '%d'", request.tid);614 exit(1);615 }...624 }625 626 // Don't attach to the sibling threads if we want to attach gdb.627 // Supposedly, it makes the process less reliable.628 bool attach_gdb = should_attach_gdb(request);629 if (attach_gdb) {630 // Open all of the input devices we need to listen for VOLUMEDOWN before dropping privileges.631 if (init_getevent() != 0) {632 ALOGE("debuggerd: failed to initialize input device, not waiting for gdb");633 attach_gdb = false;634 }636 }...662 int crash_signal = SIGKILL;663 succeeded = perform_dump(request, fd, tombstone_fd, backtrace_map.get(), siblings, //dump 异常进程的 寄存器 和backtrace 信息664 &crash_signal, amfd_data.get());...692 for (pid_t sibling : siblings) {693 ptrace(PTRACE_DETACH, sibling, 0, 0); //ptrace DETACH694 }...717 718 close(amfd); //关闭socket连接719 720 exit(!succeeded);721 }worker_process()中,会PTRACE_ATTACH 上发生异常的进程,然后dump 出进程的信息用于debug,最后PTRACE_DETACH 该进程,关于ptrace 的功能,这里不再介绍,用户空间调用ptrace 实际上是系统调用的接口,真正的实现在kernel中。
483 static bool perform_dump(const debugger_request_t& request, int fd, int tombstone_fd,484 BacktraceMap* backtrace_map, const std::set<pid_t>& siblings,485 int* crash_signal, std::string* amfd_data) {...492 while (true) {493 int signal = wait_for_signal(request.tid, &total_sleep_time_usec);494 switch (signal) {495 case -1:496 ALOGE("debuggerd: timed out waiting for signal");497 return false;498 ...517 case SIGABRT:518 case SIGBUS:519 case SIGFPE:520 case SIGILL:521 case SIGSEGV:522 #ifdef SIGSTKFLT523 case SIGSTKFLT:524 #endif525 case SIGSYS:526 case SIGTRAP:527 ALOGV("stopped -- fatal signal\n");528 *crash_signal = signal;529 engrave_tombstone(tombstone_fd, backtrace_map, request.pid, request.tid, siblings, signal,530 request.original_si_code, request.abort_msg_address, amfd_data);531 break;532 533 default:534 ALOGE("debuggerd: process stopped due to unexpected signal %d\n", signal);535 break;536 }537 break;538 }539 540 return true;541 }
ptrace 上出现异常的进程后,该进程会重新跑起来,还是跑出现异常的那段代码,所以又会发生异常,但是这时候的异常信息不会发生给debuggerd_signal_handler()处理函数,而是给当前的debuggerd进程。debugger进程收到信号后会调用engrave_tombstone().
system/core/debuggerd/tombstone.cpp688 void engrave_tombstone(int tombstone_fd, BacktraceMap* map, pid_t pid, pid_t tid,689 const std::set<pid_t>& siblings, int signal, int original_si_code,690 uintptr_t abort_msg_address, std::string* amfd_data) {691 log_t log;692 log.current_tid = tid;693 log.crashed_tid = tid;694 695 if (tombstone_fd < 0) {696 ALOGE("debuggerd: skipping tombstone write, nothing to do.\n");697 return;698 }699 700 log.tfd = tombstone_fd;701 log.amfd_data = amfd_data;702 dump_crash(&log, map, pid, tid, siblings, signal, original_si_code, abort_msg_address);703 }
走到dump_crash() dump相关信息
607 // Dumps all information about the specified pid to the tombstone.608 static void dump_crash(log_t* log, BacktraceMap* map, pid_t pid, pid_t tid,609 const std::set<pid_t>& siblings, int signal, int si_code,610 uintptr_t abort_msg_address) {611 // don't copy log messages to tombstone unless this is a dev device612 char value[PROPERTY_VALUE_MAX];613 property_get("ro.debuggable", value, "0");614 bool want_logs = (value[0] == '1');615 616 _LOG(log, logtype::HEADER,617 "*** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***\n");618 dump_header_info(log);619 dump_thread(log, pid, tid, map, signal, si_code, abort_msg_address, true);620 if (want_logs) {621 dump_logs(log, pid, 5);622 }623 624 if (!siblings.empty()) {625 for (pid_t sibling : siblings) {626 dump_thread(log, pid, sibling, map, 0, 0, 0, false);627 }628 }629 630 if (want_logs) {631 dump_logs(log, pid, 0);632 }633 }
- android 抓取crash信息流程简介
- Android Crash抓取处理
- Android Crash抓取处理
- 抓取crash
- Android Crash日志抓取及保存
- Android平台抓取native crash log
- Android Crash处理流程分析
- Android crash 默认处理流程
- Android 抓取log信息
- android 自己采集crash信息
- android crash / debug 堆栈信息
- Android应用Crash信息收集
- Android程序崩溃 crash信息
- 保留所有Android crash信息
- Windows Phone & Windows App应用程序崩溃crash信息抓取方法
- Android和IOS获取crash信息
- android crash的log信息自动保存
- Android APP 获取crash异常信息
- Android端关于HTTPS的认证---->可用
- Struts2结合Bootstrap-Table分页的使用
- 深入理解c语言指针-第六章
- docker项目开发部署
- 二进制转十进制快速方法
- android 抓取crash信息流程简介
- 对一个人身高体重进行排序(结构体)
- 目录---FlappyBird游戏开发教程
- 南京邮电大学单片机实验一:使用集成开发环境Keil进行程序开发
- 《程序员的成长课》新书日销量排名第4
- 勾股定理
- 对象引用与对象的区别
- docker在windows中安装
- 建立五个员工链表(头插法)