kernel对R状态task在4s不被调度的检测

来源:互联网 发布:arp欺骗工具 windows 编辑:程序博客网 时间:2024/05/17 02:30
内核死锁分为D状态和R状态,之前介绍过D状态了,下来看看kernel对R状态死锁的检测.其入口函数在void __init lockup_detector_init(void){// 设置检测的频率,默认是每4s检测一次set_sample_period();//R状态会对没有cpu 创建一个thread来检测,当前cpu是否处于R状态死锁,因此这里的watchdog_cpumask 决定要在哪些cpu上创建thread#ifdef CONFIG_NO_HZ_FULLif (tick_nohz_full_enabled()) {pr_info("Disabling watchdog on nohz_full cores by default\n");cpumask_copy(&watchdog_cpumask, housekeeping_mask);} elsecpumask_copy(&watchdog_cpumask, cpu_possible_mask);#elsecpumask_copy(&watchdog_cpumask, cpu_possible_mask);#endifif (watchdog_enabled)watchdog_enable_all_cpus();}为啥是4s检测一次呢?int __read_mostly watchdog_thresh = 10;static int get_softlockup_thresh(void){return watchdog_thresh * 2;}static void set_sample_period(void){/* * convert watchdog_thresh from seconds to ns * the divide by 5 is to give hrtimer several chances (two * or three with the current relation between the soft * and hard thresholds) to increment before the * hardlockup detector generates a warning */sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);watchdog_update_hrtimer_threshold(sample_period);}可以看出sample_period=10*2*1s/5 =4s回到lockup_detector_init 函数中,最终通过watchdog_enable_all_cpus 来在每个cpu上创建thread来检测R状态死锁static int watchdog_enable_all_cpus(void){int err = 0;//第一次进来watchdog_running 为0,因此调用smpboot_register_percpu_thread_cpumask 在watchdog_cpumask 表示的cpu上,默认watchdog_cpumask 就是所有在线的cpu上创建threadif (!watchdog_running) {err = smpboot_register_percpu_thread_cpumask(&watchdog_threads,     &watchdog_cpumask);if (err)pr_err("Failed to create watchdog threads, disabled\n");elsewatchdog_running = 1;} else {/* * Enable/disable the lockup detectors or * change the sample period 'on the fly'. *///更新定时器的到期时间err = update_watchdog_all_cpus();if (err) {watchdog_disable_all_cpus();pr_err("Failed to update lockup detectors, disabled\n");}}if (err)watchdog_enabled = 0;return err;}我们先看看watchdog_threadsstatic struct smp_hotplug_thread watchdog_threads = {.store= &softlockup_watchdog,.thread_should_run= watchdog_should_run,.thread_fn= watchdog,.thread_comm= "watchdog/%u",.setup= watchdog_enable,.cleanup= watchdog_cleanup,.park= watchdog_disable,.unpark= watchdog_enable,};smpboot_register_percpu_thread_cpumask 会为每个cpu创建thread,正常情况下会先调用watchdog_threads的setup来初始化static void watchdog_enable(unsigned int cpu){//每个cpu都有一个高精度定时器指针hrtimerstruct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);//初始化定时器,并设置定时器到期处理函数为watchdog_timer_fn/* kick off the timer for the hardlockup detector */hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);hrtimer->function = watchdog_timer_fn;/* Enable the perf event */watchdog_nmi_enable(cpu);//开始定时器工作/* done here because hrtimer_start can only pin to smp_processor_id() */hrtimer_start(hrtimer, ns_to_ktime(sample_period),      HRTIMER_MODE_REL_PINNED);/* initialize timestamp *///将这个thread的优先级设置为最高,只有最高才能不被其他thread 抢占watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);//执行首次喂狗动作__touch_watchdog();}我们看看如何喂狗的?/* Commands for resetting the watchdog */static void __touch_watchdog(void){__this_cpu_write(watchdog_touch_ts, get_timestamp());}原来也是写每个cpu 变量时间get_timestamp()smpboot_register_percpu_thread_cpumask 创建的thread执行完成setup后,就会调用thread_should_run 来判断是否thread是否要继续运行static int watchdog_should_run(unsigned int cpu){return __this_cpu_read(hrtimer_interrupts) !=__this_cpu_read(soft_lockup_hrtimer_cnt);}从这里可以看出只有hrtimer_interrupts 不等于 soft_lockup_hrtimer_cnt才是正常的情况,这说明hrtimer_interrupts 一直在更新啊,说明系统没有处于R状态死锁那hrtimer_interrupts 是在高精确定时器的回调函数中更新的watchdog_timer_fn->watchdog_interrupt_count 中更新的static void watchdog_interrupt_count(void){__this_cpu_inc(hrtimer_interrupts);}mpboot_register_percpu_thread_cpumask 调用thread_should_run 返回true后,就会调用thread_fn 函数来运行threadstatic void watchdog(unsigned int cpu){__this_cpu_write(soft_lockup_hrtimer_cnt, __this_cpu_read(hrtimer_interrupts));__touch_watchdog();}可以看到这个函数会调用__touch_watchdog 来喂狗,然后会将hrtimer_interrupts的值写到soft_lockup_hrtimer_cnt 中。这样如果系统处于R状态死锁了,就会导致高精度定时器不会到期,那下一次调用thread_should_run的时候由于hrtimer_interrupts 没有更新就会导致thread_should_run 返回false,就不会调用watchdog来喂狗,这样就等于检测到R状态死锁呀检测到死锁后,会在下一次高精度定时器到期后在static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer){//死锁后这个duration 就为1duration = is_softlockup(touch_ts);if (unlikely(duration)) {/* * If a virtual machine is stopped by the host it can look to * the watchdog like a soft lockup, check to see if the host * stopped the vm before we issue the warning */if (kvm_check_and_clear_guest_paused())return HRTIMER_RESTART;/* only warn once */if (__this_cpu_read(soft_watchdog_warn) == true) {/* * When multiple processes are causing softlockups the * softlockup detector only warns on the first one * because the code relies on a full quiet cycle to * re-arm.  The second process prevents the quiet cycle * and never gets reported.  Use task pointers to detect * this. */if (__this_cpu_read(softlockup_task_ptr_saved) !=    current) {__this_cpu_write(soft_watchdog_warn, false);__touch_watchdog();}return HRTIMER_RESTART;}if (softlockup_all_cpu_backtrace) {/* Prevent multiple soft-lockup reports if one cpu is already * engaged in dumping cpu back traces */if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {/* Someone else will report us. Let's give up */__this_cpu_write(soft_watchdog_warn, true);return HRTIMER_RESTART;}}就看看到下面这段logpr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",smp_processor_id(), duration,current->comm, task_pid_nr(current));__this_cpu_write(softlockup_task_ptr_saved, current);print_modules();print_irqtrace_events(current);if (regs)show_regs(regs);elsedump_stack();if (softlockup_all_cpu_backtrace) {/* Avoid generating two back traces for current * given that one is already made above */trigger_allbutself_cpu_backtrace();clear_bit(0, &soft_lockup_nmi_warn);/* Barrier to sync with other cpus */smp_mb__after_atomic();}add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);if (softlockup_panic)panic("softlockup: hung tasks");__this_cpu_write(soft_watchdog_warn, true);} else__this_cpu_write(soft_watchdog_warn, false);return HRTIMER_RESTART;}有人可能有疑问,为啥到R状态死锁了,为啥还可以指定定时器的到期函数,这是因为我们在watchdog_enable 中将优先级设置到最高了

阅读全文
0 0
原创粉丝点击