Linux等待队列waitqueue
来源:互联网 发布:win7 443端口服务 编辑:程序博客网 时间:2024/05/20 10:21
waitqueue
- waitqueue
- 创建一个等待队列
- 让当前进程开始等待
- 1 wait_event
- 2 wait_event_timeout
- 3 wait_event_interruptible
- 4 wait_event_interruptible_timeout
- 唤醒等待队列上的进程
- waitqueue
内核中提供了等待队列,作用是实现阻塞操作。等待队列用于使进程等待某一特定的事件发生而无需频繁的轮询,进程在等待期间睡眠,在某些事件发生时,由内核自动唤醒。
首先,Linux中所有的进程都是由task_struct这个结构管理。在生成进程的时候会分配一个task_struct结构,之后将通过这个结构对进程进行管理。task_struct位于独立的连续区间。task_struct结构中有一个state成员,有下面几种状态:
1. 创建一个等待队列
Linux内核中,wait_queue_head_t代表一个等待队列头,wait_queue_head_t数据结构如下:
struct __wait_queue_head { spinlock_t lock; // 自旋锁,确保对链表操作的原子性 struct list_head task_list; // 链表};typedef struct __wait_queue_head wait_queue_head_t;
等待队列中每个元素用wait_queue_t来表示,wait_queue_t数据结构如下:
typedef struct __wait_queue wait_queue_t;struct __wait_queue { unsigned int flags; // WQ_FLAG_EXCLUSIVE-表示等待进程想要被独占地唤醒; 0-可以和其他进程一起唤醒。#define WQ_FLAG_EXCLUSIVE 0x01 // 在结构体中定义宏跟一般的宏没区别,这里表示flags会用到该宏,提高直观性。 void *private; // 指向等待进程的task_struct地址 wait_queue_func_t func; // 用于唤醒被挂起任务的回调函数 struct list_head task_list; // 链表元素,用于链接到wait_queue_head_t中的task_list链表中};
① 可以调用init_waitqueue_head接口来初始化此队列,init_waitqueue_head主要是将wait_queue_head_t结构体中的两个成员进行初始化。
staitc wait_queue_head_t prod_wq;init_waitqueue_head(&prod_wq);#define init_waitqueue_head(q) \ do { \ static struct lock_class_key __key; \ \ __init_waitqueue_head((q), #q, &__key); \ } while (0)void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key){ spin_lock_init(&q->lock); // 初始化自旋锁 lockdep_set_class_and_name(&q->lock, key, name); //和防止死锁有关 INIT_LIST_HEAD(&q->task_list); // 初始化链表}
② 也可以使用DECLARE_WAIT_QUEUE_HEAD来定义和初始化等待队列头。
#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ .task_list = { &(name).task_list, &(name).task_list } }#define DECLARE_WAIT_QUEUE_HEAD(name) \ wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
③ 定义和初始化等待队列项
#define __WAITQUEUE_INITIALIZER(name, tsk) { \ .private = tsk, \ .func = default_wake_function, \ .task_list = { NULL, NULL } }#define DECLARE_WAITQUEUE(name, tsk) \ wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)
2. 让当前进程开始等待
内核提供了如下的接口来让当前进程在条件不满足的情况下,阻塞等待:
wait_event(wq, condition)wait_event_timeout(wq, condition, timeout)wait_event_interruptible(wq, condition)wait_event_interruptible_timeout(wq, condition, timeout)
2.1 wait_event
wait_event的实现如下:
void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait){ unsigned long flags; wait->flags |= WQ_FLAG_EXCLUSIVE; spin_lock_irqsave(&q->lock, flags); __add_wait_queue_tail(q, wait); spin_unlock_irqrestore(&q->lock, flags);}EXPORT_SYMBOL(add_wait_queue_exclusive);void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state){ unsigned long flags; wait->flags &= ~WQ_FLAG_EXCLUSIVE; spin_lock_irqsave(&q->lock, flags); if (list_empty(&wait->task_list)) __add_wait_queue(q, wait); set_current_state(state); spin_unlock_irqrestore(&q->lock, flags);}EXPORT_SYMBOL(prepare_to_wait);void finish_wait(wait_queue_head_t *q, wait_queue_t *wait){ unsigned long flags; __set_current_state(TASK_RUNNING); /* * We can check for list emptiness outside the lock * IFF: * - we use the "careful" check that verifies both * the next and prev pointers, so that there cannot * be any half-pending updates in progress on other * CPU's that we haven't seen yet (and that might * still change the stack area. * and * - all other users take the lock (ie we can only * have _one_ other CPU that looks at or modifies * the list). */ if (!list_empty_careful(&wait->task_list)) { spin_lock_irqsave(&q->lock, flags); list_del_init(&wait->task_list); spin_unlock_irqrestore(&q->lock, flags); }}#define __wait_event(wq, condition) \do { \ DEFINE_WAIT(__wait); \ \ for (;;) { \ prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \ if (condition) \ break; \ schedule(); \ } \ finish_wait(&wq, &__wait); \} while (0)#define wait_event(wq, condition) \do { \ if (condition) \ break; \ __wait_event(wq, condition); \} while (0)
里面有个宏定义即DEFINE_WAIT,详细如下:
#define DEFINE_WAIT_FUNC(name, function) \ wait_queue_t name = { \ .private = current, \ .func = function, \ .task_list = LIST_HEAD_INIT((name).task_list), \ }#define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function)
可以看到private成员是当前task对象的地址current, func成员是autoremove_wake_function(在下面wake_up时再做说明)。
所以整个wait_event的逻辑就是:
① 首先判断条件是否满足,如果满足,直接退出;如果不满足,调用__wait_event
② __wait_event中首先基于当前经常构建一个等待队列项;然后进入死循环:
- 调用prepare_to_wait,该函数将新建的等待队列项加入到等待队列中,并修改当前任务的state为TASK_UNINTERRUPTIBLE;(注,该函数flags的结果必然是0,也就是说这个函数是将非独占进程添加到等待队列当中。而add_wait_queue_exclusive函数则是将独占进程添加到等待队列的尾部,也就是说一个等待队列,非独占进程总是在前面,独占进程总是在后面)
- 判断condition条件,满足就退出循环,不满足继续
- 调用schedule()进行任务调度后,重新开始循环
③ 退出循环后调用finish_wait,将当前任务的state设置为TASK_RUNNING,并将新建的等待队列从任务队列中删除。
2.2 wait_event_timeout
wait_event_timeout 的实现如下:
signed long __sched schedule_timeout(signed long timeout){ struct timer_list timer; unsigned long expire; switch (timeout) { case MAX_SCHEDULE_TIMEOUT: /* * These two special cases are useful to be comfortable * in the caller. Nothing more. We could take * MAX_SCHEDULE_TIMEOUT from one of the negative value * but I' d like to return a valid offset (>=0) to allow * the caller to do everything it want with the retval. */ schedule(); goto out; default: /* * Another bit of PARANOID. Note that the retval will be * 0 since no piece of kernel is supposed to do a check * for a negative retval of schedule_timeout() (since it * should never happens anyway). You just have the printk() * that will tell you if something is gone wrong and where. */ if (timeout < 0) { printk(KERN_ERR "schedule_timeout: wrong timeout " "value %lx\n", timeout); dump_stack(); current->state = TASK_RUNNING; goto out; } } expire = timeout + jiffies; setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); __mod_timer(&timer, expire, false, TIMER_NOT_PINNED); schedule(); del_singleshot_timer_sync(&timer); /* Remove the timer from the object tracker */ destroy_timer_on_stack(&timer); timeout = expire - jiffies; out: return timeout < 0 ? 0 : timeout;}#define __wait_event_timeout(wq, condition, ret) \do { \ DEFINE_WAIT(__wait); \ \ for (;;) { \ prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \ if (condition) \ break; \ ret = schedule_timeout(ret); \ if (!ret) \ break; \ } \ if (!ret && (condition)) \ ret = 1; \ finish_wait(&wq, &__wait); \} while (0)#define wait_event_timeout(wq, condition, timeout) \({ \ long __ret = timeout; \ if (!(condition)) \ __wait_event_timeout(wq, condition, __ret); \ __ret; \})
wait_event_timeout 和 wait_event逻辑类似,就一个地方差异较大,即schedule_timeout。
schedule_timeout中构建了一个定时器,该定时器到期后将调用process_timeout(通过中断的形式),传入的参数则是当前进程的指针current。然后调用schedule,等待调度器回到该位置(由于任务状态为UNINTERRUPTABLE,不能通过调度或信号回到该位置)。这个时候就有两种情况(唤醒在后面wake_up部分详细说明):
① 超时了,调用process_timeout函数,该函数调用wake_up_process函数,核心代码类似wake_up_xxx(current)
② 在其他任务中调用了wake_up_xxx(wq)函数,将任务状态修改为TASK_RUNNING
一旦任务状态为TASK_RUNNING,就又回到了cpu的run queue中,可以通过调度回到函数中的schedule位置。
wait_event_timeout 返回值如下:
- 大于0: 表示condition满足,返回值表示距离设定超时还有多久(jiffies)
- 等于0: 表示超时发生
2.3 wait_event_interruptible
wait_event_interruptible 的实现如下:
#define __wait_event_interruptible(wq, condition, ret) \do { \ DEFINE_WAIT(__wait); \ \ for (;;) { \ prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \ if (condition) \ break; \ if (!signal_pending(current)) { \ schedule(); \ continue; \ } \ ret = -ERESTARTSYS; \ break; \ } \ finish_wait(&wq, &__wait); \} while (0)#define wait_event_interruptible(wq, condition) \({ \ int __ret = 0; \ if (!(condition)) \ __wait_event_interruptible(wq, condition, __ret); \ __ret; \})
wait_event_interruptible的实现和wait_event类似,区别是有多了一个signal_pending操作。
signal_pending检查给定进程是否有信号需要处理,返回0表示没有信号需要处理。
所以此时退出循环的条件是: 满足 condition 和 有信号 两者之一就行 (如果执行到schedule,需要另外一个进程调用wake_up_xxx(&wq)操作,或者该进程收到了信号,将任务加入到run queue中。)
wait_event_interruptible 返回值如下:
- -ERESTARTSYS: 表示被信号激活唤醒。该错误的意思表示发生系统调用,任务正处在睡眠状态,等wakeup之后,会重新调用一次系统调用。
- 等于0: 表示condition满足
2.4 wait_event_interruptible_timeout
wait_event_interruptible_timeout 的实现如下:
#define __wait_event_interruptible_timeout(wq, condition, ret) \do { \ DEFINE_WAIT(__wait); \ \ for (;;) { \ prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \ if (condition) \ break; \ if (!signal_pending(current)) { \ ret = schedule_timeout(ret); \ if (!ret) \ break; \ continue; \ } \ ret = -ERESTARTSYS; \ break; \ } \ if (!ret && (condition)) \ ret = 1; \ finish_wait(&wq, &__wait); \} while (0)#define wait_event_interruptible_timeout(wq, condition, timeout) \({ \ long __ret = timeout; \ if (!(condition)) \ __wait_event_interruptible_timeout(wq, condition, __ret); \ __ret; \})
wait_event_interruptible_timeout的实现和上面wait_event等类似,退出循环的条件是: 满足 condition、timeout 和 有信号 三者之一(如果执行到schedule,需要另外一个进程调用wake_up_xxx(&wq)操作,或者超时了,或者该进程收到了信号,这三者都会将任务加入到run queue中。)。
wait_event_interruptible_timeout 返回值如下:
- -ERESTARTSYS: 表示被信号激活唤醒
- 大于0: 表示condition满足,返回值表示距离设定超时还有多久(jiffies)
- 等于0: 表示超时发生
3. 唤醒等待队列上的进程
内核提供了如下接口来唤醒等待队列上的进程:
#define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL)#define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL)#define wake_up_all(x) __wake_up(x, TASK_NORMAL, 0, NULL)#define wake_up_locked(x) __wake_up_locked((x), TASK_NORMAL, 1)#define wake_up_all_locked(x) __wake_up_locked((x), TASK_NORMAL, 0)#define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)#define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL)#define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL)#define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_INTERRUPTIBLE, 1)
可以到这些接口调用了三个函数__wake_up,__wake_up_locked,__wake_up_sync。先看看__wake_up的实现:
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, int wake_flags, void *key){ wait_queue_t *curr, *next; list_for_each_entry_safe(curr, next, &q->task_list, task_list) { unsigned flags = curr->flags; if (curr->func(curr, mode, wake_flags, key) && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) break; }}void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, void *key){ unsigned long flags; spin_lock_irqsave(&q->lock, flags); __wake_up_common(q, mode, nr_exclusive, 0, key); spin_unlock_irqrestore(&q->lock, flags);}
可以看到__wake_up会调到__wake_up_common函数,该函数的逻辑是,遍历等待队列上的的wait_queue_t结构体,进行如下的操作:
① 获取curr->flags值放入flags中。
② 进行判断,如果同时满足三个条件就退出循环。(注对于if来说,如果前面有一项不满足,后续的判断就不会做)
第一个条件是curr->func的返回结果,依据前面的说明,该函数实际上就是autoremove_wake_function,其详细说明如下,如果返回1,表明已经将相关的任务加入到cpu的run queue,并修改任务的状态成功。依据前面定义的wait_event_xxx的实现,该项正常来说均返回1。
第二个条件是flags & WQ_FLAG_EXCLUSIVE,如果该wait_queue_t是独占的,就为真。对于一个任务队列来说,只有前面的非互斥项执行curr->func之后,才轮到互斥进程,也只有互斥进程flags & WQ_FLAG_EXCLUSIVE才为真。
第三个条件是!–nr_exclusive,如果nr_exclusive为0,依据常理,该项始终为假;如果nr_exclusive为1,则第一次就为真;如果nr_exclusive为一个整数nr,则第nr次,该项为真。
所以可以得出:
wake_up 唤醒全部的非独占任务,唤醒一个独占任务。
wake_up_nr 唤醒全部的非独占任务,唤醒nr个独占任务。
wake_up_all 唤醒全部的非独占任务,唤醒全部独占任务。
wake_up_interruptible_xxx等函数类似上面。
static inttry_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags){ unsigned long flags; int cpu, success = 0; /* * If we are going to wake up a thread waiting for CONDITION we * need to ensure that CONDITION=1 done by the caller can not be * reordered with p->state check below. This pairs with mb() in * set_current_state() the waiting thread does. */ smp_mb__before_spinlock(); raw_spin_lock_irqsave(&p->pi_lock, flags); if (!(p->state & state)) goto out; success = 1; /* we're going to change ->state */ cpu = task_cpu(p); /* 获取最后执行该任务的CPU */ if (p->on_rq && ttwu_remote(p, wake_flags)) goto stat; /* support smp 在很多架构上还不支持smp可以忽略此处 * 判断是否要将任务转移到另外一个CPU的执行队列上,负载均衡 * /#ifdef CONFIG_SMP /* * If the owning (remote) cpu is still in the middle of schedule() with * this task as prev, wait until its done referencing the task. */ while (p->on_cpu) cpu_relax(); /* * Pairs with the smp_wmb() in finish_lock_switch(). */ smp_rmb(); p->sched_contributes_to_load = !!task_contributes_to_load(p); p->state = TASK_WAKING; if (p->sched_class->task_waking) p->sched_class->task_waking(p); cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); if (task_cpu(p) != cpu) { wake_flags |= WF_MIGRATED; set_task_cpu(p, cpu); }#endif /* CONFIG_SMP */ ttwu_queue(p, cpu);stat: ttwu_stat(p, cpu, wake_flags);out: raw_spin_unlock_irqrestore(&p->pi_lock, flags); return success;}int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags, void *key){ return try_to_wake_up(curr->private, mode, wake_flags);}int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key){ int ret = default_wake_function(wait, mode, sync, key); if (ret) // 如果返回真,将该任务从等待队列中移除。 list_del_init(&wait->task_list); return ret;}
以上是autoremove_wake_function的实现,具体看try_to_wake_up函数。
该函数有3个参数:
- p 任务结构体指针
- state 需要唤醒的进程状态掩码,即需要唤醒符合该状态掩码的进程
- wake_flags 此处等待队列传过来的值为0。表示是同步唤醒sync,还是异步唤醒 async;
- Linux等待队列waitqueue
- 等待队列(waitqueue)
- 推后执行_等待队列(waitqueue)
- linux等待队列
- LINUX 等待队列
- LINUX内核-等待队列
- linux等待队列
- linux 内核等待队列
- linux 等待队列
- linux 等待队列
- Linux等待队列简介
- linux 等待队列
- linux 等待队列
- linux 之 等待队列
- linux 等待队列
- linux等待队列
- linux 等待队列
- linux 线程等待队列
- Android进程绝杀技--forceStop
- vim编辑只读文档时无法保存
- Java Android 编程规范
- 单点登录简介
- 170614 杂项-.NET平台基本概念
- Linux等待队列waitqueue
- 89C52单片机之流水灯中断
- 数据结构—栈的应用
- openlayer3热力图的实现 Heatmap
- Manifest merger failed with multiple errors, see logs问题处理
- 记录C++学习bug历程
- Android7.0中文文档(API)-- Toolbar
- java集合类的一些内建函数分析
- 单链表习题(进阶二)——复杂链表复制