tasklet、工作队列 - [linux内核]

来源：互联网发布：云商网络编辑：程序博客网时间：2024/04/30 12:48

注：软中断、tasklet、工作队列、等待队列、完成量关系
tasklet 依靠软中断实现
工作队列[工作者线程可能睡眠在该cpu的工作队列->wait_queue_head_t中]、完成量依靠等待队列实现
工作队列[queue_delayed_work]、等待队列[等待资源指定时间]、完成量[等待操作指定时间] 依靠时间管理
1 tasklet原理和执行切入点
tasklet是I/0驱动程序中实现可延迟函数的首选方法，tasklet建立在两个叫做
HI_SOFTIRQ和TASKLET_SOFTIRQ的软中断之上，几个tasklet可以和同一个软中断相关联，每个tasklet都有自己的执行函数连个软中断没有任何区别，只是调用的优先级不同。tasklet和高优先级tasklet分别存放在两个内核全局变量tasklet_vec和tasklet_hi_vec数组中，下面只分析tasklet_hi_vec
tasklet_hi_vec是每cpu变量组成的数组：struct tasklet_head tasklet_hi_vec[NR_CPUS] ;
struct tasklet_head
{
struct tasklet_struct *head；
struct tasklet_struct **tail;
};;//head始终指向一个tasklet_struct，而tail始终指向head链表中最后一个tasklet_struct的next指针的地址
这样每次添加一个元素，只需要通过*tail=new tasklet_struct即可把新tasklet_struct添加到末尾，tail可以重新指向新tasklet_struct的next元素
以高优先级tasklet为例，在do_softirq执行时，调用其softirq_action结构中的action函数，执行如下
static void tasklet_hi_action(struct softirq_action *a)
{
struct tasklet_struct *list;

local_irq_disable(); //关中断
list = __get_cpu_var(tasklet_hi_vec).head; //list列表取得所有的tasklet单列表
__get_cpu_var(tasklet_hi_vec).head = NULL;
__get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head;
local_irq_enable(); //开中断

while (list) {
struct tasklet_struct *t = list;

list = list->next;

  if (tasklet_trylock(t)) {               //将tasklet->state=TASKLET_STATE_RUNNING
   if (!atomic_read(&t->count)) { //该tasklet没有被禁止执行，如果该tasklet被禁止执行，
    if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
     BUG();
    t->func(t->data);   //执行该tasklet函数
    tasklet_unlock(t);
    continue;  //循环执行所有的tasklet
   }
   tasklet_unlock(t);
  }

  local_irq_disable();
  t->next = NULL; 如果tasklet t被禁止执行，将t重新插入到tasklet_hi_vec列表中
  *__get_cpu_var(tasklet_hi_vec).tail = t;    //head=t
  __get_cpu_var(tasklet_hi_vec).tail = &(t->next);//tail=存储0的一个地址
  __raise_softirq_irqoff(HI_SOFTIRQ); //激活HI_SOFTIRQ类型的软中断，适时处理
  local_irq_enable();
}
}
2 tasklet结构简介
struct tasklet_struct
{
struct tasklet_struct *next;
unsigned long state;   //TASKLET_STATE_SCHED,TASKLET_STATE_RUNNING
注：TASKLET_STATE_SCHED表明tasklet已被插入到tasklet_vec或者tasklet_hi_vec数组中的一个链表中
        TASKLET_STATE_RUNNING表明tasklet正在被调度器调度执行，防止在smp中被并发调度
atomic_t count;
//非0时禁止tasklet执行，由tasklet_disable_nosync()或者tasklet_disable()，tasklet_enable()完成
void (*func)(unsigned long);
unsigned long data;
};
3 激活自己的tasklet
3.1
在自己编写驱动，使用tasklet机制时，需要调用tasklet_hi_schedule（）函数激活自己的tasklet
void __tasklet_hi_schedule(struct tasklet_struct *t)
{ unsigned long flags;

local_irq_save(flags); //禁止中断
t->next = NULL;
*__get_cpu_var(tasklet_hi_vec).tail = t;  //将该tasklet加入到软中断数组中某一项列表末尾
__get_cpu_var(tasklet_hi_vec).tail = &(t->next);
raise_softirq_irqoff(HI_SOFTIRQ);     //激活HI_SOFTIRQ类型中断
local_irq_restore(flags); //开启中断,恢复IF标志
}
3.2 激活该类型的软中断必须在禁止中断情况下实现
/*
* This function must run with irqs disabled!
*/
inline void raise_softirq_irqoff(unsigned int nr)
{
__raise_softirq_irqoff(nr); //将当前cpu中irq_stat数组中的__softirq_pending位图中（共32位，每位表明该软中断列表中是否是软中断）该位置位，表明该号软中断列表中有软中断/*
* If we're in an interrupt or softirq, we're done
* (this also catches softirq-disabled code). We will
* actually run the softirq once we return from
* the irq or softirq.
*
* Otherwise we wake up ksoftirqd to make sure we
* schedule the softirq soon.
*/
//如果在软中断或者异步中断处理中，结尾会自动执行软中断处理函数，否则在用户环境中，唤醒内核线程执行，执行上面的tasklet_hi_action函数。
if (!in_interrupt())
  wakeup_softirqd();
}
3.3 __raise_softirq_irqoff执行分析
#define __raise_softirq_irqoff(nr) do {or_softirq_pending(1UL <<(nr));}while (0);
#define or_softirq_pending(x) (local_softirq_pending() |=(x) )
#define local_softirq_pending()   __get_cpu_var(irq_stat).__softirq_pending
tasklet在执行时如果已经被另外cpu执行或者被禁止，则可以重新激活软中断
4 tasklet实例展示：

5 工作队列
工作队列在linux2.6内核被引入，允许内核函数被激活，稍后由一种叫做工作者线程events的特殊内核线程执行
5-1 工作队列与软中断比较：
1 软中断主要运行在中断上下文中，工作队列运行在进程上下文中
2 中断上下文中不能发生进程切换，不能阻塞，工作队列可以
3 软中断函数[执行时不可能有任何正在运行的进程]和工作队列函数[有内核线程执行]都不能访问用户态地址空间
5-2 工作队列结构体
1 每个工作队列在每个cpu上都有一个该结构
/*
* The per-CPU workqueue (if single thread, we always use the first
* possible cpu).
*/
struct cpu_workqueue_struct {

spinlock_t lock;

struct list_head worklist; //work_struct 通过entry挂入到工作队列的worklist头部
wait_queue_head_t more_work; //其中的工作线程因等待工作而处于睡眠状态
struct work_struct * current_work; //该cpu上暂时执行的工作

struct workqueue_struct * wq; //指向工作队列
struct task_struct *thread;      //工作者线程
} ____cacheline_aligned;
2 工作队列结构
/*
* The externally visible workqueue abstraction is an array of
* per-CPU workqueues:
*/
struct workqueue_struct {
struct cpu_workqueue_struct * cpu_wq; //每个cpu上都有一个该结构体
struct list_head list;
const char *name;       //工作队列的名字
int singlethread;          //该工作队列是否只有一个工作线程
int freezeable;   /* Freeze threads during suspend */ 与休眠相关
int rt;
#ifdef CONFIG_LOCKDEP
struct lockdep_map lockdep_map;
#endif
};
3 标准任务结构
struct work_struct
{
atomic_long_t data; //指向与该work_struct相关的任何数据，由于为指针，最后两位为0，因此可加以利用
struct list_head entry;
  work_func_t func;
}
5-3 工作队列函数
0 创建任务
INIT_WORK(work_struct ws,func)
1 创建工作队列
struct workqueue_struct * create_workqueue(const char* name,int singlethread)
如果singlethread=0，该函数对每个cpu创建一个工作者线程，命名方式如下：name/0 name/1 ... name/n
如果singletheread=1，该函数在第一个cpu上创建一个工作者线程
2 撤销工作队列
destory_workqueue(struct workqueue_struct*)
3 将任务插入到工作队列中所有任务都必须通过work_struct结构的实例插入到工作队列中
int fastcall queue_work(struct workqueue_struct* wq,struct work_struct* work)
int fastcall queue_delayed_work(struct workqueue_struct* wq,struct delayed_work* work,long delay)
4 内核已经实现了标准的工作队列，内核线程称为events，工作队列称为keventd_wq
static struct workqueue_struct *keventd_wq __read_mostly;
keventd_wq = create_workqueue("events"); 可以在/boot/System.map-`uname -r`中查到
内核可以使用下列函数将新工作添加到工作队列中
int schedule_work(struct work_struct* work)
int schedule_delayed_work(struct delay_work* work,unsigned long delay)
int schedule_delayed_work_on(int cpu,struct delay_work* work,unsigned long delay)
int flush_scheduled_work()
向工作队列中添加工作后，就会唤起该工作者线程执行，其执行worker_thread()
5 等待工作队列中所有挂起函数执行完毕
flush_workqueue(struct workqueue_struct* wq)
5-5   函数剖析 queue_work
1 queue_work 将任务加入到工作队列的某个cpu项中，并唤醒在该cpu对应的处理函数
/**
* queue_work - queue work on a workqueue
* @wq: workqueue to use
* @work: work to queue
*
* Returns 0 if @work was already on a queue, non-zero otherwise.
*
* We queue the work to the CPU on which it was submitted, but if the CPU dies
* it can be processed by another CPU.
*/
int queue_work(struct workqueue_struct *wq, struct work_struct *work)
{
int ret;

ret = queue_work_on(get_cpu(), wq, work);
put_cpu();

return ret;
}
2 将任务加入到工作队列的当前cpu上
/**
* queue_work_on - queue work on specific cpu
* @cpu: CPU number to execute work on
* @wq: workqueue to use
* @work: work to queue
*
* Returns 0 if @work was already on a queue, non-zero otherwise.
*
* We queue the work to a specific CPU, the caller must ensure it
* can't go away.
*/
int
queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
{
int ret = 0;

if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) { //为插入到工作队列中
  BUG_ON(!list_empty(&work->entry));
  __queue_work(wq_per_cpu(wq, cpu), work);
  ret = 1;
}
return ret;
}
3 将任务加入到工作队列的当前cpu对应的工作队列数组项中
static void __queue_work(struct cpu_workqueue_struct *cwq,
    struct work_struct *work)
{
unsigned long flags;

debug_work_activate(work);
spin_lock_irqsave(&cwq->lock, flags); //关中断，保护该链表
insert_work(cwq, work, &cwq->worklist);
spin_unlock_irqrestore(&cwq->lock, flags);
}
4
static void insert_work(struct cpu_workqueue_struct *cwq,
struct work_struct *work, struct list_head *head)
{
trace_workqueue_insertion(cwq->thread, work);

set_wq_data(work, cwq);
/*该函数设置work->data为cpu_workqueue_struct地址
new = (unsigned long) cwq | (1UL << WORK_STRUCT_PENDING);
new |= WORK_STRUCT_FLAG_MASK & *work_data_bits(work);
atomic_long_set(&work->data, new);
*/
/*
* Ensure that we get the right work->data if we see the
* result of list_add() below, see try_to_grab_pending().
*/
smp_wmb();
list_add_tail(&work->entry, head); //将该work_struct 插入到cwq->head中
wake_up(&cwq->more_work); //唤醒等待工作而睡眠的工作着线程
注：由于我们可以在进程中执行flush_workqueue（）
}
5-6 函数剖析---工作者线程循环执行
static int worker_thread(void *__cwq)
{
struct cpu_workqueue_struct *cwq = __cwq;
DEFINE_WAIT(wait); //申明wait_queue结构体

if (cwq->wq->freezeable)
set_freezable();

for (;;) {
  prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);
//设置该工作者线程的状态，然后通过wait挂入到cpu_workqueue_struct->more_work中
  if (!freezing(current) &&      //如果条件不满足，进入睡眠状态，重新调度
      !kthread_should_stop() &&
      list_empty(&cwq->worklist))
   schedule();
  finish_wait(&cwq->more_work, &wait); //从cpu_workqueu_struct队列中脱离

try_to_freeze();

if (kthread_should_stop())
break;

run_workqueue(cwq);
}

return 0;
}
执行任务函数
static void run_workqueue(struct cpu_workqueue_struct *cwq)
{
spin_lock_irq(&cwq->lock);
while (!list_empty(&cwq->worklist)) {
  struct work_struct *work = list_entry(cwq->worklist.next,struct work_struct, entry);
  work_func_t f = work->func;
#ifdef CONFIG_LOCKDEP
  /*
   * It is permissible to free the struct work_struct
   * from inside the function that is called from it,
   * this we need to take into account for lockdep too.
   * To avoid bogus "held lock freed" warnings as well
   * as problems when looking into work->lockdep_map,
   * make a copy and use that here.
   */
  struct lockdep_map lockdep_map = work->lockdep_map;
#endif
  trace_workqueue_execution(cwq->thread, work);
  debug_work_deactivate(work);
  cwq->current_work = work;
  list_del_init(cwq->worklist.next);
  spin_unlock_irq(&cwq->lock);

  BUG_ON(get_wq_data(work) != cwq);
  work_clear_pending(work); //清除等待状态
  lock_map_acquire(&cwq->wq->lockdep_map);
  lock_map_acquire(&lockdep_map);
  f(work); 该执行函数的参数为work_struct结构体
  lock_map_release(&lockdep_map);
  lock_map_release(&cwq->wq->lockdep_map);

  if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
   printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
     "%s/0x%08x/%d\n",
     current->comm, preempt_count(),
           task_pid_nr(current));
   printk(KERN_ERR "    last function: ");
   print_symbol("%s\n", (unsigned long)f);
   debug_show_held_locks(current);
   dump_stack();
  }

  spin_lock_irq(&cwq->lock);
  cwq->current_work = NULL;
}
spin_unlock_irq(&cwq->lock);
}
5-7函数剖析工作在工作队列中延迟固定时间执行，解开神秘面纱 queue_delayed_work 分析
0 结构分析
struct delayed_work
{
struct work_struct work;
struct timer_list timer;
}
1
/**
* queue_delayed_work - queue work on a workqueue after delay
* @wq: workqueue to use
* @dwork: delayable work to queue
* @delay: number of jiffies to wait before queueing
*
* Returns 0 if @work was already on a queue, non-zero otherwise.
*/
int queue_delayed_work(struct workqueue_struct *wq,
   struct delayed_work *dwork, unsigned long delay)
{
if (delay == 0)
  return queue_work(wq, &dwork->work); //普通的任务

return queue_delayed_work_on(-1, wq, dwork, delay);//延迟任务，延迟delay个jiffies后放入任务队列
}
2 主函数--延期delayed jiffies后将任务放入该cpu对应的任务链表中
/**
* queue_delayed_work_on - queue work on specific CPU after delay
* @cpu: CPU number to execute work on
* @wq: workqueue to use
* @dwork: work to queue
* @delay: number of jiffies to wait before queueing
*
* Returns 0 if @work was already on a queue, non-zero otherwise.
*/
int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
struct delayed_work *dwork, unsigned long delay)
{
int ret = 0;
struct timer_list *timer = &dwork->timer;
struct work_struct *work = &dwork->work;

if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
BUG_ON(timer_pending(timer));
BUG_ON(!list_empty(&work->entry));

timer_stats_timer_set_start_info(&dwork->timer);

  /* This stores cwq for the moment, for the timer_fn */
  set_wq_data(work, wq_per_cpu(wq, raw_smp_processor_id()));
//设置work->data指向cpu_workqueue_struct结构体，便于delayed jiffies后将其转移到该cpu对应队列中
  timer->expires = jiffies + delay;
  timer->data = (unsigned long)dwork;
  timer->function = delayed_work_timer_fn;
//注：工作队列中的延时函数利用的定时器中断处理函数原理，每个定时器中断时，所有的延时处理函数都通过一个
timer_list结构加入到内核的数组链表中，该数组按照定时器发生的时间排列，每个时钟中断都会处理相应这列对应的所有的时钟函数，执行time->function（timer->data）。详情请关注后期博客。所以，这里假定经过delayed个jiffies后，时钟中断执行了delayed_work_timer_fn,该函数将该任务放入到该cpu对应的任务链表中

  if (unlikely(cpu >= 0))
   add_timer_on(timer, cpu);
  else
   add_timer(timer);
  ret = 1;
}
return ret;
}
3 时钟中断唤醒该时钟结构后的执行函数
static void delayed_work_timer_fn(unsigned long __data)
{
struct delayed_work *dwork = (struct delayed_work *)__data;
struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work);
//该指针保存在work_struct->data中
struct workqueue_struct *wq = cwq->wq;

__queue_work(wq_per_cpu(wq, smp_processor_id()), &dwork->work);
}
6 工作队列实现举例