源码剖析之wait queue

来源:互联网 发布:贵阳公安数据铁笼 编辑:程序博客网 时间:2024/06/08 13:16

wait queue

在内核中,一般使用等待队列(wait queue)实现当发生特定事件时的异步通知
具体方法如下:
1. 为每个特定的事件(可读、可写等)维护一个等待队列,本质上是一个双向链表
2. 关心该事件的实体作为等待队列中一项加入该等待队列中
3. 每个等待队列项包含关注该事件的实体的信息(进程、线程),以及回调函数
4. 当关注的事件发生时,会遍历相应的等待队列,调用每个节点中指定的回调函数

1. Structure

// wait_queue 头节点typedef struct __wait_queue_head wait_queue_head_t;  struct __wait_queue_head {      spinlock_t lock;      struct list_head task_list;  };  // wait_queue 节点  typedef struct __wait_queue wait_queue_t;  struct __wait_queue {      unsigned int flags;  #define WQ_FLAG_EXCLUSIVE   0x01      void *private;      // 事件发生时的回调函数    wait_queue_func_t func;      struct list_head task_list;  };// 回调函数的原型typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);  

2. Init

// include/linux/wait.hextern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *);#define init_waitqueue_head(q)              \    do {                        \        static struct lock_class_key __key; \                            \        __init_waitqueue_head((q), #q, &__key); \    } while (0)// /kernel/sched/wati.cvoid __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key){    spin_lock_init(&q->lock);    lockdep_set_class_and_name(&q->lock, key, name);    INIT_LIST_HEAD(&q->task_list);}// include/linux/list.hstatic inline void INIT_LIST_HEAD(struct list_head *list){    WRITE_ONCE(list->next, list);    list->prev = list;}// include/linux/wait.hstatic inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p){    q->flags    = 0;    q->private  = p;    q->func     = default_wake_function;}// include/linux/wait.hstatic inline voidinit_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func){    q->flags    = 0;    q->private  = NULL;    q->func     = func;}

3. Add && Remove

// include/linux/wait.hstatic inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new){    list_add(&new->task_list, &head->task_list);}/* * include/linux/wait.h * Used for wake-one threads: */static inline void__add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait){    wait->flags |= WQ_FLAG_EXCLUSIVE;    __add_wait_queue(q, wait);static inline void __add_wait_queue_tail(wait_queue_head_t *head,                     wait_queue_t *new){    list_add_tail(&new->task_list, &head->task_list);}// include/linux/wait.hstatic inline void__add_wait_queue_tail_exclusive(wait_queue_head_t *q, wait_queue_t *wait){    wait->flags |= WQ_FLAG_EXCLUSIVE;    __add_wait_queue_tail(q, wait);}// include/linux/wait.hstatic inline void__remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old){    list_del(&old->task_list);}// include/linux/wait.cvoid add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait){    unsigned long flags;    wait->flags &= ~WQ_FLAG_EXCLUSIVE;    spin_lock_irqsave(&q->lock, flags);    __add_wait_queue(q, wait);    spin_unlock_irqrestore(&q->lock, flags);}// include/linux/wait.cvoid add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait){    unsigned long flags;    wait->flags |= WQ_FLAG_EXCLUSIVE;    spin_lock_irqsave(&q->lock, flags);    __add_wait_queue_tail(q, wait);    spin_unlock_irqrestore(&q->lock, flags);}// include/linux/wait.cvoid remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait){    unsigned long flags;    spin_lock_irqsave(&q->lock, flags);    __remove_wait_queue(q, wait);    spin_unlock_irqrestore(&q->lock, flags);}

4. Wait

// include/linux/wait.h/* * The below macro ___wait_event() has an explicit shadow of the __ret * variable when used from the wait_event_*() macros. * * This is so that both can use the ___wait_cond_timeout() construct * to wrap the condition. * * The type inconsistency of the wait_event_*() __ret variable is also * on purpose; we use long where we can return timeout values and int * otherwise. */#define ___wait_event(wq, condition, state, exclusive, ret, cmd)    \({                                  \    __label__ __out;                        \    wait_queue_t __wait;                        \    long __ret = ret;   /* explicit shadow */           \                                    \    init_wait_entry(&__wait, exclusive ? WQ_FLAG_EXCLUSIVE : 0);    \    for (;;) {                          \        long __int = prepare_to_wait_event(&wq, &__wait, state);\                                    \        if (condition)                      \            break;                      \                                    \        if (___wait_is_interruptible(state) && __int) {     \            __ret = __int;                  \            goto __out;                 \        }                           \                                    \        cmd;                            \    }                               \    finish_wait(&wq, &__wait);                  \__out:  __ret;                              \})#define __wait_event(wq, condition)                 \    (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,  \                schedule())/** * wait_event - sleep until a condition gets true * @wq: the waitqueue to wait on * @condition: a C expression for the event to wait for * * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the * @condition evaluates to true. The @condition is checked each time * the waitqueue @wq is woken up. * * wake_up() has to be called after changing any variable that could * change the result of the wait condition. */#define wait_event(wq, condition)                   \do {                                    \    might_sleep();                          \    if (condition)                          \        break;                          \    __wait_event(wq, condition);                    \} while (0)#define __io_wait_event(wq, condition)                  \    (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,  \                io_schedule())/* * io_wait_event() -- like wait_event() but with io_schedule() */#define io_wait_event(wq, condition)                    \do {                                    \    might_sleep();                          \    if (condition)                          \        break;                          \    __io_wait_event(wq, condition);                 \} while (0)#define __wait_event_freezable(wq, condition)               \    ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,      \                schedule(); try_to_freeze())/** * wait_event_freezable - sleep (or freeze) until a condition gets true * @wq: the waitqueue to wait on * @condition: a C expression for the event to wait for * * The process is put to sleep (TASK_INTERRUPTIBLE -- so as not to contribute * to system load) until the @condition evaluates to true. The * @condition is checked each time the waitqueue @wq is woken up. * * wake_up() has to be called after changing any variable that could * change the result of the wait condition. */#define wait_event_freezable(wq, condition)             \({                                  \    int __ret = 0;                          \    might_sleep();                          \    if (!(condition))                       \        __ret = __wait_event_freezable(wq, condition);      \    __ret;                              \})#define __wait_event_timeout(wq, condition, timeout)            \    ___wait_event(wq, ___wait_cond_timeout(condition),      \              TASK_UNINTERRUPTIBLE, 0, timeout,         \              __ret = schedule_timeout(__ret))/** * wait_event_timeout - sleep until a condition gets true or a timeout elapses * @wq: the waitqueue to wait on * @condition: a C expression for the event to wait for * @timeout: timeout, in jiffies * * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the * @condition evaluates to true. The @condition is checked each time * the waitqueue @wq is woken up. * * wake_up() has to be called after changing any variable that could * change the result of the wait condition. * * Returns: * 0 if the @condition evaluated to %false after the @timeout elapsed, * 1 if the @condition evaluated to %true after the @timeout elapsed, * or the remaining jiffies (at least 1) if the @condition evaluated * to %true before the @timeout elapsed. */#define wait_event_timeout(wq, condition, timeout)          \({                                  \    long __ret = timeout;                       \    might_sleep();                          \    if (!___wait_cond_timeout(condition))               \        __ret = __wait_event_timeout(wq, condition, timeout);   \    __ret;                              \})#define __wait_event_freezable_timeout(wq, condition, timeout)      \    ___wait_event(wq, ___wait_cond_timeout(condition),      \              TASK_INTERRUPTIBLE, 0, timeout,           \              __ret = schedule_timeout(__ret); try_to_freeze())/* * like wait_event_timeout() -- except it uses TASK_INTERRUPTIBLE to avoid * increasing load and is freezable. */#define wait_event_freezable_timeout(wq, condition, timeout)        \({                                  \    long __ret = timeout;                       \    might_sleep();                          \    if (!___wait_cond_timeout(condition))               \        __ret = __wait_event_freezable_timeout(wq, condition, timeout); \    __ret;                              \})

6. Wake Up

 /** * __wake_up - wake up threads blocked on a waitqueue. * @q: the waitqueue * @mode: which threads * @nr_exclusive: how many wake-one or wake-many threads to wake up * @key: is directly passed to the wakeup function * * It may be assumed that this function implies a write memory barrier before * changing the task state if and only if any tasks are woken up. */void __wake_up(wait_queue_head_t *q, unsigned int mode,            int nr_exclusive, void *key){    unsigned long flags;    spin_lock_irqsave(&q->lock, flags);    __wake_up_common(q, mode, nr_exclusive, 0, key);    spin_unlock_irqrestore(&q->lock, flags);}void __wake_up(wait_queue_head_t *q, unsigned int mode,                 int nr_exclusive, void *key)  {      unsigned long flags;      spin_lock_irqsave(&q->lock, flags);      __wake_up_common(q, mode, nr_exclusive, 0, key);      spin_unlock_irqrestore(&q->lock, flags);  }  /* * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve * number) then we wake all the non-exclusive tasks and one exclusive task. * * There are circumstances in which we can try to wake a task which has already * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns * zero in this (rare) case, and we handle it by continuing to scan the queue. */static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,            int nr_exclusive, int wake_flags, void *key){    wait_queue_t *curr, *next;    list_for_each_entry_safe(curr, next, &q->task_list, task_list) {        unsigned flags = curr->flags;        if (curr->func(curr, mode, wake_flags, key) &&                (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)            break;    }}

7. Summary

create -> init -> add or remove -> wait -> wake up