android—init进程如何重启service

来源:互联网 发布:长城网络还原大师bios 编辑:程序博客网 时间:2024/06/09 20:02

在《android—init.rc的读取》中介绍过,init进程会启动很多native的service,这些service如果不是oneshot的,当service出现异常挂掉后,init需要将其重新启动起来,那么具体是如何操作的?其实主要是借助了信号和socket来实现。

在init的main()函数中,首先进行了signal相关的初始化,设置了init对SIGCHLD(native的service都是在init中通过fork新建的子进程,子进程挂掉后会给init发送SIGCHLD信号)的信号处理函数,

queue_builtin_action(signal_init_action, "signal_init");static int signal_init_action(int nargs, char **args){    signal_init();    return 0;}void signal_init(void){    int s[2];    struct sigaction act;    memset(&act, 0, sizeof(act));    act.sa_handler = sigchld_handler;    act.sa_flags = SA_NOCLDSTOP;    sigaction(SIGCHLD, &act, 0);    //创建一个socketpair,一个读fd一个写fd    /* create a signalling mechanism for the sigchld handler */    if (socketpair(AF_UNIX, SOCK_STREAM, 0, s) == 0) {        signal_fd = s[0];        signal_recv_fd = s[1];        fcntl(s[0], F_SETFD, FD_CLOEXEC);        fcntl(s[0], F_SETFL, O_NONBLOCK);        fcntl(s[1], F_SETFD, FD_CLOEXEC);        fcntl(s[1], F_SETFL, O_NONBLOCK);    }    handle_signal();}

SIGCHLD的信号处理函数sigchld_handler(),就是向signal_fd中写数据,这时候signal_recv_fd将会受到数据,那么,init肯定在哪里对这个signal_recv_fd进行了poll。

//接收到SIGCHLD的处理函数//往signal_fd里随便写个东西,socket pair的另外一端马上就能受到static void sigchld_handler(int s){    write(signal_fd, &s, 1);}

在init的for循环中,发现确实对signal_recv_fd进行了poll,

        if (!signal_fd_init && get_signal_fd() > 0) {            //信号处理函数在子进程挂掉后会给signal_fd写东西            //这时候socketpair的对端,signal_recv_fd会受到,这里监听了该signal_recv_fd            ufds[fd_count].fd = get_signal_fd();            ufds[fd_count].events = POLLIN;            ufds[fd_count].revents = 0;            fd_count++;            signal_fd_init = 1;        }int get_signal_fd(){    return signal_recv_fd;}

当signal_recv_fd描述符poll触发返回时,执行handle_signal()函数,

        for (i = 0; i < fd_count; i++) {            if (ufds[i].revents == POLLIN) {                if (ufds[i].fd == get_property_set_fd())                    handle_property_set_fd();                else if (ufds[i].fd == get_keychord_fd())                    handle_keychord();                else if (ufds[i].fd == get_signal_fd())                    //执行handle_signal()函数                    handle_signal();            }        }

而,

void handle_signal(void){    char tmp[32];   //从signal_recv_fd中读数据,    /* we got a SIGCHLD - reap and restart as needed */    read(signal_recv_fd, tmp, sizeof(tmp));    while (!wait_for_one_process(0))        ;}

wait_for_one_process的主要功能是等待子进程退出,然后设置对应需要重启service的状态为SVC_RESTARTING,同时需要注意会执行service属性中的onrestart相关的,其实就是去重启相关的其他service,

/*    这个函数基本都是返回0*/static int wait_for_one_process(int block){    pid_t pid;    int status;    struct service *svc;    struct socketinfo *si;    time_t now;    struct listnode *node;    struct command *cmd;    //等待子进程执行完退出    //通过pid找到service,重启service不在这里,这里只负责启动service下面的onrestart    while ( (pid = waitpid(-1, &status, block ? 0 : WNOHANG)) == -1 && errno == EINTR );    if (pid <= 0) return -1;    INFO("waitpid returned pid %d, status = %08x\n", pid, status);    svc = service_find_by_pid(pid);    if (!svc) {        ERROR("untracked pid %d exited\n", pid);        return 0;    }    NOTICE("process '%s', pid %d exited\n", svc->name, pid);    //    if (!(svc->flags & SVC_ONESHOT) || (svc->flags & SVC_RESTART)) {        kill(-pid, SIGKILL);        NOTICE("process '%s' killing any children in process group\n", svc->name);    }    /* remove any sockets we may have created */    for (si = svc->sockets; si; si = si->next) {        char tmp[128];        snprintf(tmp, sizeof(tmp), ANDROID_SOCKET_DIR"/%s", si->name);        unlink(tmp);    }    svc->pid = 0;    svc->flags &= (~SVC_RUNNING);        /* oneshot processes go into the disabled state on exit,         * except when manually restarted. */    if ((svc->flags & SVC_ONESHOT) && !(svc->flags & SVC_RESTART)) {        svc->flags |= SVC_DISABLED;    }        /* disabled and reset processes do not get restarted automatically */    if (svc->flags & (SVC_DISABLED | SVC_RESET) )  {        notify_service_state(svc->name, "stopped");        return 0;    }    now = gettime();    //如果service是critical的,崩了4次,还有个4分钟后,android就重启进入recovery了    if ((svc->flags & SVC_CRITICAL) && !(svc->flags & SVC_RESTART)) {        if (svc->time_crashed + CRITICAL_CRASH_WINDOW >= now) {            if (++svc->nr_crashed > CRITICAL_CRASH_THRESHOLD) {                ERROR("critical process '%s' exited %d times in %d minutes; "                      "rebooting into recovery mode\n", svc->name,                      CRITICAL_CRASH_THRESHOLD, CRITICAL_CRASH_WINDOW / 60);                android_reboot(ANDROID_RB_RESTART2, 0, "recovery");                return 0;            }        } else {            svc->time_crashed = now;            svc->nr_crashed = 1;        }    }    svc->flags &= (~SVC_RESTART);    svc->flags |= SVC_RESTARTING;    /* Execute all onrestart commands for this service. */    //重启这个service下的onrestart这个action下的所有commands    list_for_each(node, &svc->onrestart.commands) {        cmd = node_to_item(node, struct command, clist);        cmd->func(cmd->nargs, cmd->args);    }    notify_service_state(svc->name, "restarting");    return 0;}

上面只是设置了service的状态为SVC_RESTARTING,真正重启的地方在main函数中的for循环的restart_processes(),

restart_processes();static void restart_processes(){    process_needs_restart = 0;    //只会去重启service状态是SVC_RESTARTING的service,    service_for_each_flags(SVC_RESTARTING,                           restart_service_if_needed);}void service_for_each_flags(unsigned matchflags,                            void (*func)(struct service *svc)){    struct listnode *node;    struct service *svc;    list_for_each(node, &service_list) {        svc = node_to_item(node, struct service, slist);        if (svc->flags & matchflags) {            func(svc);        }    }}static void restart_service_if_needed(struct service *svc){    time_t next_start_time = svc->time_started + 5;    if (next_start_time <= gettime()) {        svc->flags &= (~SVC_RESTARTING);        //重启service        service_start(svc, NULL);        return;    }    if ((next_start_time < process_needs_restart) ||        (process_needs_restart == 0)) {        process_needs_restart = next_start_time;    }}

至此,init就完成了对service的重启,保证了某些关键service一直运行。

0 0
原创粉丝点击