Linux工作队列workqueue实现分析

来源：互联网发布：心事谁人知二胡编辑：程序博客网时间：2024/05/16 10:18

本文档的Copyleft归rosetta所有，使用GPL发布，可以自由拷贝、转载，转载时请保持文档的完整性。
参考资料：《Linux内核设计与实现》第3版 LKD3e、linux-2.6.27

        工作队列子系统是一个用于调用创建内核线程的接口，通过它创建的线程负责执行由内核其它部分排到队列里的任务。这些内核线程称为工作者线程。工作队列子系统提供了一个缺省的工作都线程来处理工作。一般使用缺省线程即可，但当处理密集型和性能要求严格的任务时，创建拥有自己的工作者线程比较好。（引至LKD3e）

        这个接口就是create_workqueue(),它返回一个struct workqueue_struct 结构指针。
        /*
         * The externally visible workqueue abstraction is an array of
         * per-CPU workqueues:
         */
        struct workqueue_struct {
            struct cpu_workqueue_struct *cpu_wq;
            struct list_head list;
            const char *name;
            int singlethread;
            int freezeable;     /* Freeze threads during suspend */
        #ifdef CONFIG_LOCKDEP
            struct lockdep_map lockdep_map;
        #endif
        };
        上面注释：外部可见的工作队列抽象是由per-CPU的工作队列组成的数组，这个数组为结构体 cpu_workqueue_struct 。所以每种任务，它有一个自己的工作队列（struct workqueue_struct ），这个工作队列如果需要的话会为每个CPU创建对应的工作者线程。也就是说每个CPU，每个工作者线程对应一个cpu_workqueue_struct。

        /*
         * The per-CPU workqueue (if single thread, we always use the first
         * possible cpu).
         */
        struct cpu_workqueue_struct {

            spinlock_t lock;

            struct list_head worklist;
            wait_queue_head_t more_work;
            struct work_struct *current_work;

            struct workqueue_struct *wq;
            struct task_struct *thread;

            int run_depth;      /* Detect run_workqueue() recursion depth */
        } ____cacheline_aligned;
        刚才说了，每种任务，它都有一个自己的工作队列，这种任务的抽象就是为每个CPU创建一个处理这种任务的工作者线程（当然这是需要的情况下，如果不需要则会使用默认的工作者线程events/n,n为CPU编号），那么这个wq就是关联到自己的工作队列workqueue_struct。所有的工作者线程都是用普通的内核线程实现的，由worker_thread()函数完成。
当为一个CPU创建完一个线程后，这个线程执行死循环开始休眠，当有操作插入到队列时，线程被唤醒并执行。（LKD3e）

        对应的具体工作由work_struct结构：
        struct work_struct {
            atomic_long_t data;
            struct list_head entry;
            work_func_t func;
        };
        由list_head可知，它是个双向链表，每个结点为一个work_struct结构类型。每个CPU上的每种类型的队列都对应这样一个链表。当一个工作线程被唤醒时，它会执行这个链表上的所有工作；工作执行完毕后就从链表上先移除相应的work_struct；当链表上不再有对象时就继续休眠。
        总得来说就是每种任务（可以理解成为处理不同数据结构），有一个workqueue_struct。每个CPU有多个工作者线程，
        每个线程处理相应的任务。处理过程最终调用的是func。至于func是怎么赋值的可参考下面代码实现。

        下面再看下linux内核具体实现，看一下create_workqueue是怎样创建工作者线程的,代码有很多内核同步机制，暂时不关注。
        #define create_workqueue(name) __create_workqueue((name), 0, 0)
        #define __create_workqueue(name, singlethread, freezeable) \//singlethread为0
        ({                              \
            static struct lock_class_key __key;         \
            const char *__lock_name;                \
                                        \
            if (__builtin_constant_p(name))             \
                __lock_name = (name);               \
            else                            \
                __lock_name = #name;                \
                                        \
            __create_workqueue_key((name), (singlethread),      \
                           (freezeable), &__key,        \
                           __lock_name);            \
        })

        struct workqueue_struct *__create_workqueue_key(const char *name,
                               int singlethread,
                               int freezeable,
                               struct lock_class_key *key,
                               const char *lock_name)
        {
           struct workqueue_struct *wq;
           struct cpu_workqueue_struct *cwq;
           int err = 0, cpu;

           wq = kzalloc(sizeof(*wq), GFP_KERNEL);
           if (!wq)
               return NULL;

           wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);//为每个CPU分配cpu_workqueue_struct结构内存
        //per-CPU可参考《Linux per-CPU实现分析》
           if (!wq->cpu_wq) {
               kfree(wq);
               return NULL;
           }

           wq->name = name;
           lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
           wq->singlethread = singlethread;
           wq->freezeable = freezeable;
           INIT_LIST_HEAD(&wq->list);

           if (singlethread) {
               cwq = init_cpu_workqueue(wq, singlethread_cpu);
               err = create_workqueue_thread(cwq, singlethread_cpu);
               start_workqueue_thread(cwq, -1);
           } else {//__create_workqueue传进来的参数singlethread为0
               cpu_maps_update_begin();
               /*
               * We must place this wq on list even if the code below fails.
               * cpu_down(cpu) can remove cpu from cpu_populated_map before
               * destroy_workqueue() takes the lock, in that case we leak
               * cwq[cpu]->thread.
               */
               spin_lock(&workqueue_lock);
               list_add(&wq->list, &workqueues);
               spin_unlock(&workqueue_lock);
               /*
               * We must initialize cwqs for each possible cpu even if we
               * are going to call destroy_workqueue() finally. Otherwise
               * cpu_up() can hit the uninitialized cwq once we drop the
               * lock.
               */
               for_each_possible_cpu(cpu) {//遍历所有CPU
                   cwq = init_cpu_workqueue(wq, cpu);
                   if (err || !cpu_online(cpu))
                       continue;
                   err = create_workqueue_thread(cwq, cpu);//为每个CPU创建一个内核线程
                   start_workqueue_thread(cwq, cpu);
               }
               cpu_maps_update_done();
           }

           if (err) {
               destroy_workqueue(wq);
               wq = NULL;
           }
           return wq;
        }

        static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
        {
            struct workqueue_struct *wq = cwq->wq;
            const char *fmt = is_single_threaded(wq) ? "%s" : "%s/%d";
            struct task_struct *p;

            p = kthread_create(worker_thread, cwq, fmt, wq->name, cpu);//为每个CPU创建一个线程，每个线程的数据对应于 //结构体cpu_workqueue_struct
            /*
             * Nobody can add the work_struct to this cwq,
             * if (caller is __create_workqueue)
             *      nobody should see this wq
             * else // caller is CPU_UP_PREPARE
             *      cpu is not on cpu_online_map
             * so we can abort safely.
             */
            if (IS_ERR(p))
                return PTR_ERR(p);

            cwq->thread = p;

            return 0;
        }

        static int worker_thread(void *__cwq) //最终创建完成的工作者线程
        {
            struct cpu_workqueue_struct *cwq = __cwq;
            DEFINE_WAIT(wait);

            if (cwq->wq->freezeable)
                set_freezable();

            set_user_nice(current, -5);

            for (;;) {
                prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);
                if (!freezing(current) &&
                    !kthread_should_stop() &&
                    list_empty(&cwq->worklist))//如果cpu_workqueue_struct队列为空
                    schedule();//进入睡眠状态
                finish_wait(&cwq->more_work, &wait);

                try_to_freeze();

                if (kthread_should_stop())
                    break;

                run_workqueue(cwq);//真正干活在这里干的。
            }

            return 0;
        }

        static void run_workqueue(struct cpu_workqueue_struct *cwq)
        {
            spin_lock_irq(&cwq->lock);
            cwq->run_depth++;
            if (cwq->run_depth > 3) {
                /* morton gets to eat his hat */
                printk("%s: recursion depth exceeded: %d\n",
                    __func__, cwq->run_depth);
                dump_stack();
            }
            while (!list_empty(&cwq->worklist)) {//遍历当前CPU上所有类型的任务，因为每个cpu_workqueue_struct对应一种任务。
                struct work_struct *work = list_entry(cwq->worklist.next,
                                struct work_struct, entry);//遍历当前任务上的work_struct链表。
                work_func_t f = work->func;//给执行函数赋值。
        #ifdef CONFIG_LOCKDEP
                /*
                 * It is permissible to free the struct work_struct
                 * from inside the function that is called from it,
                 * this we need to take into account for lockdep too.
                 * To avoid bogus "held lock freed" warnings as well
                 * as problems when looking into work->lockdep_map,
                 * make a copy and use that here.
                 */
                struct lockdep_map lockdep_map = work->lockdep_map;
        #endif

                cwq->current_work = work;
                list_del_init(cwq->worklist.next);
                spin_unlock_irq(&cwq->lock);

                BUG_ON(get_wq_data(work) != cwq);
                work_clear_pending(work);
                lock_map_acquire(&cwq->wq->lockdep_map);
                lock_map_acquire(&lockdep_map);
                f(work);//最终执行工作函数。
                lock_map_release(&lockdep_map);
                lock_map_release(&cwq->wq->lockdep_map);

                if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
                    printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
                            "%s/0x%08x/%d\n",
                            current->comm, preempt_count(),
                                task_pid_nr(current));
                    printk(KERN_ERR "    last function: ");
                    print_symbol("%s\n", (unsigned long)f);
                    debug_show_held_locks(current);
                    dump_stack();
                }

                spin_lock_irq(&cwq->lock);
                cwq->current_work = NULL;
            }
            cwq->run_depth--;
            spin_unlock_irq(&cwq->lock);
        }