Kernel 中断分析六——softirq

来源:互联网 发布:cocos2d js 教程 mac 编辑:程序博客网 时间:2024/06/08 01:22

Abstract

目前kernel中的中断机制主要有top half、bottom half(softirq、tasklet、waitqueue)、threaded irq handler。top half不用赘述,这里把threed irq handler与bottom half区分开,是因为他们有以下区别:
1、调度方式
threaded irq handler被系统调度器调度,bottom half被top half调度(也可通过ksoftirqd被schedule)。

2、运行上下文
threaded irq handler运行在process context中
bottom half:softirq和tasklet运行在interrupt context,waitqueue运行在process contex。

当然,他们有共同之处,将中断的部分或者大部分工作延迟执行
啰嗦一句,系统调用属于异常处理程序,通过软件中断(software interrupt,不是softirq)实现,运行在process context。

本篇主要分析bottom half中的softirq。

softirq

软中断类型

379 /* PLEASE, avoid to allocate new softirqs, if you need not _really_ high380    frequency threaded job scheduling. For almost all the purposes381    tasklets are more than enough. F.e. all serial device BHs et382    al. should be converted to tasklets, not to softirqs.383  */384385 enum386 {387     HI_SOFTIRQ=0,388     TIMER_SOFTIRQ,389     NET_TX_SOFTIRQ,390     NET_RX_SOFTIRQ,391     BLOCK_SOFTIRQ,392     BLOCK_IOPOLL_SOFTIRQ,393     TASKLET_SOFTIRQ,394     SCHED_SOFTIRQ,395     HRTIMER_SOFTIRQ,396     RCU_SOFTIRQ,    /* Preferable RCU should always be the last softirq */397398     NR_SOFTIRQS399 };

系统已经定义好10种软中断类型,优先级依次降低,并且提示开发人员avoid to allocate new softirqs,开发最常使用的是HI_SOFTIRQ和TASKLET_SOFTIRQ。

软中断向量表

和硬中断的静态线性向量表类似,软中断也有个向量表,数组大小当然是10.
每个数据元素即一个软中断处理函数。

static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;408 /* softirq mask and active fields moved to irq_cpustat_t in409  * asm/hardirq.h to get better cache usage.  KAO410  */411412 struct softirq_action413 {414     void    (*action)(struct softirq_action *);415 };

软中断初始化

每种软中断在初始化时都会调用open_softirq,来初始化软中断处理函数,以TASKLET_SOFTIRQ为例:

631 void __init softirq_init(void)632 {633     int cpu;634635     for_each_possible_cpu(cpu) {636         per_cpu(tasklet_vec, cpu).tail =637             &per_cpu(tasklet_vec, cpu).head;638         per_cpu(tasklet_hi_vec, cpu).tail =639             &per_cpu(tasklet_hi_vec, cpu).head;640     }641642     open_softirq(TASKLET_SOFTIRQ, tasklet_action);643     open_softirq(HI_SOFTIRQ, tasklet_hi_action);644 }430 void open_softirq(int nr, void (*action)(struct softirq_action *))431 {432     softirq_vec[nr].action = action;433 }

软中断触发

API

触发方软中断的API为raise_softirq_irqoff

446 void __tasklet_schedule(struct tasklet_struct *t)447 {448     unsigned long flags;449450     local_irq_save(flags);451     t->next = NULL;452     *__this_cpu_read(tasklet_vec.tail) = t;453     __this_cpu_write(tasklet_vec.tail, &(t->next));454     raise_softirq_irqoff(TASKLET_SOFTIRQ);455     local_irq_restore(flags);456 }457 EXPORT_SYMBOL(__tasklet_schedule);

有一个细节,调用raise_softirq_irqoff之前需要禁用本地cpu中断,那么是不是说执行softirq的过程中中断一直是禁止的呢?当然不是,我们看后面的分析。
raise_softirq_irqoff

395 /*396  * This function must run with irqs disabled!397  */398 inline void raise_softirq_irqoff(unsigned int nr)399 {400     __raise_softirq_irqoff(nr);------------------------1401402     /*403      * If we're in an interrupt or softirq, we're done404      * (this also catches softirq-disabled code). We will405      * actually run the softirq once we return from406      * the irq or softirq.407      *408      * Otherwise we wake up ksoftirqd to make sure we409      * schedule the softirq soon.410      */411     if (!in_interrupt())-------------------------------2412         wakeup_softirqd();413 }

1 . __raise_softirq_irqoff
调用or_softirq_pending,将irq_stat.__softirq_pending的对应软中断bit位置1,关于irq_stat.__softirq_pending,见appendix–__softirq_pending。

#define or_softirq_pending(x)   this_cpu_or(irq_stat.__softirq_pending, (x))

2 . in_interrupt判断此时是否处于中断上下文,具体判断的逻辑见appendix–in_interrupt。
如果处于中断上下文,那么不做任何处理,因为当前中断结束之后会处理pending的softirq。
否则,唤醒softorqd线程,处理软中断。

可以看到,这里并没有真正触发软中断,而是pending软中断,等待后续处理(或者中断退出时处理,或者主动唤醒softirqd处理)。所以之前的怀疑(软中断执行过程中禁用本地CPU中断)不成立。

触发时机

这里写图片描述

如图,kernel中触发软中断主要有以下几种情况

  1. irq_exit,退出中断时会检测有没有pending的软中断
  2. local_bh_enable
  3. netif_rx_ni

处理软中断的流程最终都会调用到__do_softirq,不同情况下有直接调用__do_softirq,也有通过唤醒softirqd来调用。

除了以上三种情况,在别的资料和blog上还有:
4. SMP中,处理完处理器间中断时
5. 在使用APIC的系统处理完本地中断时

本篇主要分析前两种情况。

irq_exit

最常见的调用流程是do_IRQ在退出中断时调用irq_exit。

374 /*375  * Exit an interrupt context. Process softirqs if needed and possible:376  */377 void irq_exit(void)378 {379 #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED380         local_irq_disable();381 #else382         WARN_ON_ONCE(!irqs_disabled());383 #endif384385         account_irq_exit_time(current);386         preempt_count_sub(HARDIRQ_OFFSET);-----------------------1387         if (!in_interrupt() && local_softirq_pending())----------2388                 invoke_softirq();389390         tick_irq_exit();391         rcu_irq_exit();392         trace_hardirq_exit(); /* must be last! */393 }
  1. 将硬件中断在preempt count中的计数减少,前面已经处理完硬件中断。
  2. 若当前不在中断上下文且有软中断pending,那么调用invoke_softirq
338 static inline void invoke_softirq(void)339 {340         if (!force_irqthreads) {341 #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK342                 /*343                  * We can safely execute softirq on the current stack if344                  * it is the irq stack, because it should be near empty345                  * at this stage.346                  */347                 __do_softirq();348 #else349                 /*350                  * Otherwise, irq_exit() is called on the task stack that can351                  * be potentially deep already. So call softirq in its own stack352                  * to prevent from any overrun.353                  */354                 do_softirq_own_stack();355 #endif356         } else {357                 wakeup_softirqd();358         }359 }

从目前大部分内核配置来看,大多都使用强制线程化,所以会唤醒ksoftirqd进程。

ksoftirqd

kernel中每个cpu都定义了一个ksoftirqd进程,用来处理软中断。

 57 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
749 static struct smp_hotplug_thread softirq_threads = {750         .store                  = &ksoftirqd,751         .thread_should_run      = ksoftirqd_should_run,752         .thread_fn              = run_ksoftirqd,753         .thread_comm            = "ksoftirqd/%u",754 };

该线程被唤醒后会调用run_ksoftirqd

run_ksoftirqd

651 static void run_ksoftirqd(unsigned int cpu)652 {653         local_irq_disable();-------------------------1654         if (local_softirq_pending()) {---------------2655                 /*656                  * We can safely run softirq on inline stack, as we are not deep657                  * in the task stack here.658                  */659                 __do_softirq();660                 local_irq_enable();-----------------3661                 cond_resched();662663                 preempt_disable();664                 rcu_note_context_switch(cpu);665                 preempt_enable();666667                 return;668         }669         local_irq_enable();670 }
  1. 进入ksoftirqd时先关闭中断,对应下文的3,很容易让人猜测,软中断处理过程中是不是对应CPU的中断时全部mask的?显然不是,在__do_softirq中还有对中断屏蔽的操作;
  2. 如果当前有软中断pending,那么调用__do_softirq

__do_softirq

225 asmlinkage void __do_softirq(void)226 {227         unsigned long end = jiffies + MAX_SOFTIRQ_TIME;228         unsigned long old_flags = current->flags;229         int max_restart = MAX_SOFTIRQ_RESTART;230         struct softirq_action *h;231         bool in_hardirq;232         __u32 pending;233         int softirq_bit;234         int cpu;235236         /*237          * Mask out PF_MEMALLOC s current task context is borrowed for the238          * softirq. A softirq handled such as network RX might set PF_MEMALLOC239          * again if the socket is related to swap240          */241         current->flags &= ~PF_MEMALLOC;----------------------------1242243         pending = local_softirq_pending();-------------------------2244         account_irq_enter_time(current);---------------------------3245246         __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);------------4247         in_hardirq = lockdep_softirq_start();248249         cpu = smp_processor_id();----------------------------------5250 restart:251         /* Reset the pending bitmask before enabling irqs */252         set_softirq_pending(0);-----------------------------------6253254         local_irq_enable();----------------------------------------7255256         h = softirq_vec;-------------------------------------------8257258         while ((softirq_bit = ffs(pending))) {----------------------9259                 unsigned int vec_nr;260                 int prev_count;261262                 h += softirq_bit - 1;------------------------------9.1263264                 vec_nr = h - softirq_vec;265                 prev_count = preempt_count();---------------------9.2266267                 kstat_incr_softirqs_this_cpu(vec_nr);268269                 trace_softirq_entry(vec_nr);270                 h->action(h);--------------------------------------9.3271                 trace_softirq_exit(vec_nr);272                 if (unlikely(prev_count != preempt_count())) {-----9.4273                         pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",274                                vec_nr, softirq_to_name[vec_nr], h->action,275                                prev_count, preempt_count());276                         preempt_count_set(prev_count);277                 }278                 rcu_bh_qs(cpu);279                 h++;-----------------------------------------------9.5280                 pending >>= softirq_bit;----------------------------9.6281         }282283         local_irq_disable();----------------------------------------10284285         pending = local_softirq_pending();-------------------------11286         if (pending) {287                 if (time_before(jiffies, end) && !need_resched() &&----11.1288                     --max_restart)289                         goto restart;290291                 wakeup_softirqd();--------------------------------11.2292         }293294         lockdep_softirq_end(in_hardirq);295         account_irq_exit_time(current);296         __local_bh_enable(SOFTIRQ_OFFSET);-----------------------12297         WARN_ON_ONCE(in_interrupt());298         tsk_restore_flags(current, old_flags, PF_MEMALLOC);299 }
  1. 这里为什么要清除PF_MEMALLOC?这样分配内存时就会受到buddy system的限制。
  2. 获取当前pending的所有软中断
  3. 计算进入软中断的时间,便于出错时统计。
  4. disable当前CPU的软中断,所以__do_softirq时不响应新来的软中断(个人理解)
    再来看所谓的disable软中断__local_bh_disable_ip,就是将preempt count中SOFTIRQ_OFFSET对应的bit加1,不用说,此时是禁止schedule的。还记得前面判断的in_interrupt吗?所以如果in_interrupt为真,那么此时可能其他软中断正在被处理。
  5. 获取当前CPU编号
  6. 清除irq_stat.__softirq_pending中所有pending的软中断
  7. enable当前cpu中断,所以软中断只是在前期关闭硬件中断,后期主要流程中可以被硬中断打断
  8. 获取softirq_vec,软中断向量表
  9. 从pending软中断中找第一个软中断,所以软中断的执行是有优先级的。但是从第四步可以看出,不能被新的软中断打断,即软中断不能打断软中断和硬中断
    9.1 获取对应的软中断向量和向量编号vec_nr
    9.2 获取preempt count
    9.3 调用软中断向量的中断处理函数
    9.4 执行软中断处理函数前后preempt count发生了改变,
    我们知道有preempt count是percpu变量,
    DECLARE_PER_CPU(int, __preempt_count);

    说明该cpu上发生了软中断或者硬中断或者NMI或者在软中断处理函数中发生了schedule,而我们在之前已经禁用了软中断,并且如果发生了硬中断,该CPU应该会被硬中断打断并进入硬中断的处理流程,所以很大可能是在软中断处理函数中进行了调度,比如睡眠引发调度(以上个人理解)。这时kernel抛出一个错误信息提示用户。
    9.4 发生了硬中断,我们知道硬中断的优先级比
    9.5 h指向下一个软中断向量,既然下一次循环会重新计算h,不明白这里自加有什么意义。
    9.6 便于下一次循环获取下一个软中断bit位

  10. 这里禁用当前CPU的中断是为了下面可能要操作 wakeup_softirqd

  11. 在step 9中,依次遍历并执行了__softirq_pending中所有的软中断,借用网上的一张图,布局如下
    这里写图片描述
    在执行软中断处理函数的过程中,可能会有新的软中断触发,虽然不会打断当前正在执行的软中断处理流程,但是其对应的标志位会被set,比如我们之前分析的__raise_softirq_irqoff—>or_softirq_pending。
    此时kernel会处理这些新来的软中断,如果此时处理软中断的时间没有超时且没有其他进程等待调度且step 6-9没有连续处理10次,那么继续restart重复处理;否则,kernel认为这段时间的软中断触发过于频繁,继续处理会给系统workload带来较大负担,其他进程得不到调度执行,影响了整体的performance,所以将后续到达的软中断延迟执行,即放到下一次ksoftirqd被调度的时候执行。

local_bh_enable

顾名思义,enable bottom half。看了一下,local_bh_enable的调用大都出现在net相关的驱动,由于对net缺乏了解,此处只分析local_bh_enable与softirq的相关流程。

 30 static inline void local_bh_enable(void) 31 { 32         __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET); 33 }143 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)144 {145         WARN_ON_ONCE(in_irq() || irqs_disabled());----------------------1146 #ifdef CONFIG_TRACE_IRQFLAGS147         local_irq_disable();148 #endif149         /*150          * Are softirqs going to be turned on now:151          */152         if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)153                 trace_softirqs_on(ip);154         /*155          * Keep preemption disabled until we are done with156          * softirq processing:157          */158         preempt_count_sub(cnt - 1);------------------------------------2159160         if (unlikely(!in_interrupt() && local_softirq_pending())) {161                 /*162                  * Run softirq if any pending. And do it in its own stack163                  * as we may be calling this deep in a task call stack already.164                  */165                 do_softirq();166         }167168         preempt_count_dec();--------------------------------------------3169 #ifdef CONFIG_TRACE_IRQFLAGS170         local_irq_enable();171 #endif172         preempt_check_resched();173 }174 EXPORT_SYMBOL(__local_bh_enable_ip);
  1. 开头先check一下,in_irq其实是判断hard irq的context中,显然如果有硬中断正需要处理,应该硬中断优先。另外判断是否禁止了中断。
  2. 如果当前不在某个中断的中断上下文且有软中断pending,那么调用do_softirq。

do_softirq

301 asmlinkage void do_softirq(void)302 {303         __u32 pending;304         unsigned long flags;305306         if (in_interrupt())----------------------1307                 return;308309         local_irq_save(flags);-------------------2310311         pending = local_softirq_pending();------3312313         if (pending)314                 do_softirq_own_stack();----------4315316         local_irq_restore(flags);---------------5317 }
  1. 再check一下~
  2. 关闭本地CPU中断
  3. 获得pending的软中断bit位
  4. 调用do_softirq_own_stack
  5. 开本地CPU中断

do_softirq_own_stack

148 void do_softirq_own_stack(void)149 {150         struct thread_info *curctx;151         union irq_ctx *irqctx;152         u32 *isp;153154         curctx = current_thread_info();155         irqctx = __this_cpu_read(softirq_ctx);156         irqctx->tinfo.task = curctx->task;157         irqctx->tinfo.previous_esp = current_stack_pointer;158159         /* build the stack frame on the softirq stack */160         isp = (u32 *) ((char *)irqctx + sizeof(*irqctx));161162         call_on_stack(__do_softirq, isp);163 }

这边其实就是保存当前进程的上下文,切换到softirq的栈,前面已经分析过hardirq和softirq在每个CPU各有自己的栈。

比较奇怪的是,运行run_ksoftirqd的时候没有看到栈的切换,希望能有大神解惑。

Appendix

__softirq_pending

每个cpu定义了一个irq_stat变量

24 DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);

类型为irq_cpustat_t,其中__softirq_pending表示有哪些softirq已经pending等待处理。

  7 typedef struct {  8     unsigned int __softirq_pending;  9     unsigned int __nmi_count;   /* arch dependent */ 10 #ifdef CONFIG_X86_LOCAL_APIC 11     unsigned int apic_timer_irqs;   /* arch dependent */ 12     unsigned int irq_spurious_count; 13     unsigned int icr_read_retry_count; 14 #endif 15 #ifdef CONFIG_HAVE_KVM 16     unsigned int kvm_posted_intr_ipis; 17 #endif 18     unsigned int x86_platform_ipis; /* arch dependent */ 19     unsigned int apic_perf_irqs; 20     unsigned int apic_irq_work_irqs; 21 #ifdef CONFIG_SMP 22     unsigned int irq_resched_count; 23     unsigned int irq_call_count; 24     /* 25      * irq_tlb_count is double-counted in irq_call_count, so it must be 26      * subtracted from irq_call_count when displaying irq_call_count 27      */ 28     unsigned int irq_tlb_count; 29 #endif 30 #ifdef CONFIG_X86_THERMAL_VECTOR 31     unsigned int irq_thermal_count; 32 #endif 33 #ifdef CONFIG_X86_MCE_THRESHOLD 34     unsigned int irq_threshold_count; 35 #endif 36 } ____cacheline_aligned irq_cpustat_t; 37

in_interrupt

所谓中断上下文,必然是包含了各种中断情况,如HARDIRQ、SOFTIRQ、NMI(x86上才有NMI)。
在x86 kernel 中断分析三——中断处理流程中,我们分析了preempt_count的组成结构。判断当前是否处于中断上下文,就是看preempt_count中对应的bit是否有效。

 66 #define in_interrupt()      (irq_count()) 55 #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ 56                  | NMI_MASK))
 25 EXPORT_PER_CPU_SYMBOL(irq_stat);424 void __raise_softirq_irqoff(unsigned int nr)425 {426     trace_softirq_raise(nr);427     or_softirq_pending(1UL << nr);428 }
0 0
原创粉丝点击