Android——coredump 配置

来源:互联网 发布:公司内部聊天软件 编辑:程序博客网 时间:2024/05/16 05:45

撰写不易,转载需注明出处:http://blog.csdn.net/jscese/article/details/46699117本文来自 【jscese】的博客!


kernel支持:

由linux支持,进程崩溃时记录存储堆栈空间,寄存器等相关内容,保留致命现场数据,便于分析查找根源。
编译项:

Symbol: COREDUMP [=y] Type  : booleanPrompt: Enable core dump support  Location:(1) -> Userspace binary formats Defined at fs/Kconfig.binfmt:182  

/kernel/signal.c中的信号处理函数:get_signal_to_deliver中有这么一段:

        if (sig_kernel_coredump(signr)) {            if (print_fatal_signals)                print_fatal_signal(info->si_signo);            proc_coredump_connector(current);            /*             * If it was able to dump core, this kills all             * other threads in the group and synchronizes with             * their demise.  If we lost the race with another             * thread getting here, it set group_exit_code             * first and our do_group_exit call below will use             * that value and ignore the one we pass it.             */            do_coredump(info);        }

展开宏如下:

(((signr) < 32) && ((1UL << ((signr) - 1)) & ((\        (1UL << ((3) - 1))   |  (1UL << ((4) - 1))    | \    (1UL << ((5) - 1))   |  (1UL << ((6) - 1))   | \        (1UL << ((8) - 1))    |  (1UL << ((11) - 1))   | \    (1UL << ((10) - 1))    |  (1UL << ((12) - 1))    | \        (1UL << ((24) - 1))   |  (1UL << ((25) - 1))   | \    (1UL << ((7) - 1))      

可对照signal列表查看,

do_coredump函数实现在/fs/coredump.c

void do_coredump(siginfo_t *siginfo){...struct mm_struct *mm = current->mm;struct linux_binfmt * binfmt;...    struct coredump_params cprm = {        .siginfo = siginfo,        .regs = signal_pt_regs(),        .limit = rlimit(RLIMIT_CORE),  //获取当前进程的rlimit        /*         * We must use the same mm->flags while dumping core to avoid         * inconsistency of bit flags, since this flag is not protected         * by any locks.         */        .mm_flags = mm->flags,    };...binfmt = mm->binfmt;...    if (cprm.limit < binfmt->min_coredump)    //判断rlimit 必须大于定的一个最小值:             goto fail_unlock;...}
这个最小值定义在binfmt_elf.c中static struct linux_binfmt elf_format = {    .module     = THIS_MODULE,    .load_binary    = load_elf_binary,    .load_shlib = load_elf_library,    .core_dump  = elf_core_dump,    .min_coredump   = ELF_EXEC_PAGESIZE,};#define ELF_EXEC_PAGESIZE 4096

kernel中的就先记录这些准备信息,如有需要可再深入查看coredump时抓取存储的细节


Android native 层面配置

上patch:

diff --git a/init.{ro.hardware}.rc b/init.{ro.hardware}.rcindex 8571839..a161071 100755--- a/init.{ro.hardware}.rc+++ b/init.ro.hardware.rc@@ -106,6 +106,10 @@ on fsmount debugfs none /sys/kernel/debug+    # config coredump+   mkdir /data/coredump 0777 system system+   checkenable /data/coredump/enable+   # enddiff --git a/init/builtins.c b/init/builtins.cindex 81f9b2c..c21a192 100755--- a/init/builtins.c+++ b/init/builtins.c@@ -949,3 +949,60 @@ int do_wait(int nargs, char **args)     } else         return -1; }++/*(jiangbin: command check enable from file;operation )*/+int do_checkenable(int nargs, char **args) {++   int fd,len;+   char buf[10];+++   ERROR("do_checkenable in init");+    if (nargs == 2) {++       if((fd = open(args[1], O_RDONLY|O_CREAT, 0664)) < 0)+       {+           return -1;+       }++       len = read(fd, buf, sizeof buf);+       if (len < 0) {+           close (fd);+           return -1;+       }+       close (fd);+       buf[len] = '\0';+       if(atoi(buf)==1) /*is enable*/+       {+           if(strcmp(args[1],"/data/coredump/enable")==0)+           {+               struct rlimit coredump;+               memset(&coredump, 0, sizeof(struct rlimit));+               coredump.rlim_cur = RLIM_INFINITY;+               coredump.rlim_max = RLIM_INFINITY;+               if(setrlimit(RLIMIT_CORE, &coredump)==0)+               {+                   NOTICE("in init command do_checkenable coredump cur==%lu , max==%lu pid==%lu\n",coredump.rlim_cur,coredump.rlim_max, getpid());+               }else+               {+                   ERROR("setrlimit unlimit fail");+                   return -1;+               }+               int iret=0;+               iret=write_file("/proc/sys/kernel/core_pattern","/data/coredump/core.%e.%p.%s");+               iret=write_file("/proc/sys/fs/suid_dumpable","1");+               return iret;+++           }++       }+++       return 0;++    }+    return -1;++}+/*end*/diff --git a/init/init_parser.c b/init/init_parser.cold mode 100644new mode 100755index 6466db2..569b910--- a/init/init_parser.c+++ b/init/init_parser.c@@ -89,6 +89,7 @@ static int lookup_keyword(const char *s)         if (!strcmp(s, "hown")) return K_chown;         if (!strcmp(s, "hmod")) return K_chmod;         if (!strcmp(s, "ritical")) return K_critical;+        if (!strcmp(s, "heckenable")) return K_checkenable;/*jiangbin add for checkenable*/         break;     case 'd':         if (!strcmp(s, "isabled")) return K_disabled;diff --git a/init/keywords.h b/init/keywords.hold mode 100644new mode 100755index 2d97e5b..cf8792d--- a/init/keywords.h+++ b/init/keywords.h@@ -41,6 +41,7 @@ int do_loglevel(int nargs, char **args); int do_load_persist_props(int nargs, char **args); int do_load_all_props(int nargs, char **args); int do_wait(int nargs, char **args);+int do_checkenable(int nargs, char **args); #define __MAKE_KEYWORD_ENUM__ #define KEYWORD(symbol, flags, nargs, func) K_##symbol, enum {@@ -104,6 +105,7 @@ enum {     KEYWORD(load_persist_props,    COMMAND, 0, do_load_persist_props)     KEYWORD(load_all_props,        COMMAND, 0, do_load_all_props)     KEYWORD(ioprio,      OPTION,  0, 0)+    KEYWORD(checkenable,        COMMAND, 1, do_checkenable) #ifdef __MAKE_KEYWORD_ENUM__     KEYWORD_COUNT, };

作为开关添加了一个checkenable command 方便修改操作.

最终生成core文件 /data/coredump/core.%e.%p.%s 含义:

%p 出Core进程的PID
%u 出Core进程的UID
%s 造成Core的signal号
%t 出Core的时间,从1970-01-0100:00:00开始的秒数
%e 出Core进程对应的可执行文件名

可使用ulimit -c 查看


Android Application 层配置:

按道理init进程按照上面那样设置之后,它的子进程zygote应该也是具备coredump能力的,自然fork的app进程也是
但是发现在zygote fork进程之后进行的:

    private static void callPostForkChildHooks(int debugFlags, String instructionSet) {        long startTime = SystemClock.elapsedRealtime();        VM_HOOKS.postForkChild(debugFlags, instructionSet);        checkTime(startTime, "Zygote.callPostForkChildHooks");    }

中调用到 dalvik.system.ZygoteHooks 中进一步初始化
libcore/dalvik/src/main/java/dalvik/system/ZygoteHooks.java
native:
/art/runtime/native/dalvik_system_ZygoteHooks.cc

调用逻辑不多描述其中会进入这个函数:

static void EnableDebugger() {  // To let a non-privileged gdbserver attach to this  // process, we must set our dumpable flag.#if defined(HAVE_PRCTL)  if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) == -1) {    PLOG(ERROR) << "prctl(PR_SET_DUMPABLE) failed for pid " << getpid();  }#endif  // We don't want core dumps, though, so set the core dump size to 0.  rlimit rl;  rl.rlim_cur = 0;  rl.rlim_max = RLIM_INFINITY;  if (setrlimit(RLIMIT_CORE, &rl) == -1) {    PLOG(ERROR) << "setrlimit(RLIMIT_CORE) failed for pid " << getpid();  }}

所以zygote fork出来的process 最后都是current rlimit_core 为0
导致无法coredump

解决办法patch如下:

diff --git a/core/jni/com_android_internal_os_Zygote.cpp b/core/jni/com_android_internal_os_Zygote.cppold mode 100644new mode 100755index 4f5e08b..f9782c3--- a/core/jni/com_android_internal_os_Zygote.cpp+++ b/core/jni/com_android_internal_os_Zygote.cpp@@ -35,6 +35,7 @@ #include <sys/utsname.h> #include <sys/wait.h>+#include <cutils/properties.h> #include <cutils/fs.h> #include <cutils/multiuser.h>@@ -165,6 +166,26 @@ static void SetGids(JNIEnv* env, jintArray javaGids) {   } }-       delete se_name; ++/*actions_code(jiangbin:native interface to setrlimit for app process:coredump)*/+static void Zygote_nativesetrlimit()+{+   struct rlimit coredump;+   memset(&coredump, 0, sizeof(struct rlimit));+   coredump.rlim_cur = RLIM_INFINITY;+   coredump.rlim_max = RLIM_INFINITY;+   if(setrlimit(RLIMIT_CORE, &coredump)==0)+   {+       ALOGD("in zygotejni setunlimit cur==%lu , max==%lu pid==%lu\n",coredump.rlim_cur,coredump.rlim_max, getpid());+   }else+   {+       ALOGE("setrlimit unlimit fail in zygotejni");+   }+}+/*end*/+++ // Sets the resource limits via setrlimit(2) for the values in the // two-dimensional array of integers that's passed in. The second dimension // contains a tuple of length 3: (resource, rlim_cur, rlim_max). NULL is@@ -577,6 +598,17 @@ static pid_t ForkAndSpecializeCommon(JNIEnv* env, uid_t uid, gid_t gid, jintArra       ALOGE("Error calling post fork hooks.");       RuntimeAbort(env);     }++    /*actions_code(jiangbin:to setrlimit for app process:coredump after PostForkChildHooks)*/+    char propcoredump[PROPERTY_VALUE_MAX];+    property_get("persist.sys.zygotedump", propcoredump, "");+   if(strstr(propcoredump,se_name_c_str)!=NULL)+   {+      Zygote_nativesetrlimit();+   }+   /*end*/+       delete se_name; +   } else if (pid > 0) {     // the parent process   }

同样留有开关用于控制~
下篇记录 coredump 文件解析流程

1 0
原创粉丝点击