Linux工作队列workqueue实现分析

来源:互联网 发布:心事谁人知二胡 编辑:程序博客网 时间:2024/05/16 10:18
本文档的Copyleft归rosetta所有,使用GPL发布,可以自由拷贝、转载,转载时请保持文档的完整性。
参考资料:《Linux内核设计与实现》第3版 LKD3e、linux-2.6.27

        工作队列子系统是一个用于调用创建内核线程的接口,通过它创建的线程负责执行由内核其它部分排到队列里的任务。这些内核线程称为工作者线程。工作队列子系统提供了一个缺省的工作都线程来处理工作。一般使用缺省线程即可,但当处理密集型和性能要求严格的任务时,创建拥有自己的工作者线程比较好。(引至LKD3e)

        这个接口就是create_workqueue(),它返回一个struct workqueue_struct 结构指针。
        /*
         * The externally visible workqueue abstraction is an array of
         * per-CPU workqueues:
         */
        struct workqueue_struct {
            struct cpu_workqueue_struct *cpu_wq;
            struct list_head list;
            const char *name;
            int singlethread;
            int freezeable;     /* Freeze threads during suspend */
        #ifdef CONFIG_LOCKDEP
            struct lockdep_map lockdep_map;
        #endif
        };
        上面注释:外部可见的工作队列抽象是由per-CPU的工作队列组成的数组,这个数组为结构体 cpu_workqueue_struct 。所以每种任务,它有一个自己的工作队列(struct workqueue_struct ),这个工作队列如果需要的话会为每个CPU创建对应的工作者线程。也就是说每个CPU,每个工作者线程对应一个cpu_workqueue_struct。

        /*
         * The per-CPU workqueue (if single thread, we always use the first
         * possible cpu).
         */
        struct cpu_workqueue_struct {
        
            spinlock_t lock;
        
            struct list_head worklist;
            wait_queue_head_t more_work;
            struct work_struct *current_work;
        
            struct workqueue_struct *wq;
            struct task_struct *thread;
        
            int run_depth;      /* Detect run_workqueue() recursion depth */
        } ____cacheline_aligned;
        刚才说了,每种任务,它都有一个自己的工作队列,这种任务的抽象就是为每个CPU创建一个处理这种任务的工作者线程(当然这是需要的情况下,如果不需要则会使用默认的工作者线程events/n,n为CPU编号),那么这个wq就是关联到自己的工作队列workqueue_struct。所有的工作者线程都是用普通的内核线程实现的,由worker_thread()函数完成。
当为一个CPU创建完一个线程后,这个线程执行死循环开始休眠,当有操作插入到队列时,线程被唤醒并执行。(LKD3e)

        对应的具体工作由work_struct结构:
        struct work_struct {
            atomic_long_t data;
            struct list_head entry;
            work_func_t func;
        };
        由list_head可知,它是个双向链表,每个结点为一个work_struct结构类型。每个CPU上的每种类型的队列都对应这样一个链表。当一个工作线程被唤醒时,它会执行这个链表上的所有工作;工作执行完毕后就从链表上先移除相应的work_struct;当链表上不再有对象时就继续休眠。
        总得来说就是每种任务(可以理解成为处理不同数据结构),有一个workqueue_struct。每个CPU有多个工作者线程,
        每个线程处理相应的任务。处理过程最终调用的是func。至于func是怎么赋值的可参考下面代码实现。

        下面再看下linux内核具体实现,看一下create_workqueue是怎样创建工作者线程的,代码有很多内核同步机制,暂时不关注。
        #define create_workqueue(name) __create_workqueue((name), 0, 0)
        #define __create_workqueue(name, singlethread, freezeable)  \//singlethread为0
        ({                              \
            static struct lock_class_key __key;         \
            const char *__lock_name;                \
                                        \
            if (__builtin_constant_p(name))             \
                __lock_name = (name);               \
            else                            \
                __lock_name = #name;                \
                                        \
            __create_workqueue_key((name), (singlethread),      \
                           (freezeable), &__key,        \
                           __lock_name);            \
        })

        struct workqueue_struct *__create_workqueue_key(const char *name,
                                int singlethread,
                                int freezeable,
                                struct lock_class_key *key,
                                const char *lock_name)
        {
            struct workqueue_struct *wq;
            struct cpu_workqueue_struct *cwq;
            int err = 0, cpu;
        
            wq = kzalloc(sizeof(*wq), GFP_KERNEL);
            if (!wq)
                return NULL;
        
            wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);//为每个CPU分配cpu_workqueue_struct结构内存
        //per-CPU可参考《Linux per-CPU实现分析》
            if (!wq->cpu_wq) {
                kfree(wq);
                return NULL;
            }
        
            wq->name = name;
            lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
            wq->singlethread = singlethread;
            wq->freezeable = freezeable;
            INIT_LIST_HEAD(&wq->list);
        
            if (singlethread) {
                cwq = init_cpu_workqueue(wq, singlethread_cpu);
                err = create_workqueue_thread(cwq, singlethread_cpu);
                start_workqueue_thread(cwq, -1);
            } else {//__create_workqueue传进来的参数singlethread为0
                cpu_maps_update_begin();
                /*
                 * We must place this wq on list even if the code below fails.
                 * cpu_down(cpu) can remove cpu from cpu_populated_map before
                 * destroy_workqueue() takes the lock, in that case we leak
                 * cwq[cpu]->thread.
                 */
                spin_lock(&workqueue_lock);
                list_add(&wq->list, &workqueues);
                spin_unlock(&workqueue_lock);
                /*
                 * We must initialize cwqs for each possible cpu even if we
                 * are going to call destroy_workqueue() finally. Otherwise
                 * cpu_up() can hit the uninitialized cwq once we drop the
                 * lock.
                 */
                for_each_possible_cpu(cpu) {//遍历所有CPU
                    cwq = init_cpu_workqueue(wq, cpu);
                    if (err || !cpu_online(cpu))
                        continue;
                    err = create_workqueue_thread(cwq, cpu);//为每个CPU创建一个内核线程
                    start_workqueue_thread(cwq, cpu);
                }
                cpu_maps_update_done();
            }
        
            if (err) {
                destroy_workqueue(wq);
                wq = NULL;
            }
            return wq;
        }

        static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
        {
            struct workqueue_struct *wq = cwq->wq;
            const char *fmt = is_single_threaded(wq) ? "%s" : "%s/%d";
            struct task_struct *p;
        
            p = kthread_create(worker_thread, cwq, fmt, wq->name, cpu);//为每个CPU创建一个线程,每个线程的数据对应于 //结构体cpu_workqueue_struct
            /*
             * Nobody can add the work_struct to this cwq,
             *  if (caller is __create_workqueue)
             *      nobody should see this wq
             *  else // caller is CPU_UP_PREPARE
             *      cpu is not on cpu_online_map
             * so we can abort safely.
             */
            if (IS_ERR(p))
                return PTR_ERR(p);
        
            cwq->thread = p;
        
            return 0;
        }

        static int worker_thread(void *__cwq) //最终创建完成的工作者线程
        {   
            struct cpu_workqueue_struct *cwq = __cwq;
            DEFINE_WAIT(wait);
        
            if (cwq->wq->freezeable)
                set_freezable();
        
            set_user_nice(current, -5);
            
            for (;;) {
                prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);
                if (!freezing(current) &&
                    !kthread_should_stop() &&
                    list_empty(&cwq->worklist))//如果cpu_workqueue_struct队列为空
                    schedule();//进入睡眠状态
                finish_wait(&cwq->more_work, &wait);
        
                try_to_freeze();
        
                if (kthread_should_stop())
                    break;
        
                run_workqueue(cwq);//真正干活在这里干的。
            }
        
            return 0;
        }

        static void run_workqueue(struct cpu_workqueue_struct *cwq)
        {
            spin_lock_irq(&cwq->lock);
            cwq->run_depth++;
            if (cwq->run_depth > 3) {
                /* morton gets to eat his hat */
                printk("%s: recursion depth exceeded: %d\n",
                    __func__, cwq->run_depth);
                dump_stack();
            }
            while (!list_empty(&cwq->worklist)) {//遍历当前CPU上所有类型的任务,因为每个cpu_workqueue_struct对应一种任务。
                struct work_struct *work = list_entry(cwq->worklist.next,
                                struct work_struct, entry);//遍历当前任务上的work_struct链表。
                work_func_t f = work->func;//给执行函数赋值。
        #ifdef CONFIG_LOCKDEP
                /*
                 * It is permissible to free the struct work_struct
                 * from inside the function that is called from it,
                 * this we need to take into account for lockdep too.
                 * To avoid bogus "held lock freed" warnings as well
                 * as problems when looking into work->lockdep_map,
                 * make a copy and use that here.
                 */
                struct lockdep_map lockdep_map = work->lockdep_map;
        #endif
        
                cwq->current_work = work;
                list_del_init(cwq->worklist.next);
                spin_unlock_irq(&cwq->lock);
        
                BUG_ON(get_wq_data(work) != cwq);
                work_clear_pending(work);
                lock_map_acquire(&cwq->wq->lockdep_map);
                lock_map_acquire(&lockdep_map);
                f(work);//最终执行工作函数。
                lock_map_release(&lockdep_map);
                lock_map_release(&cwq->wq->lockdep_map);
        
                if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
                    printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
                            "%s/0x%08x/%d\n",
                            current->comm, preempt_count(),
                                task_pid_nr(current));
                    printk(KERN_ERR "    last function: ");
                    print_symbol("%s\n", (unsigned long)f);
                    debug_show_held_locks(current);
                    dump_stack();
                }
        
                spin_lock_irq(&cwq->lock);
                cwq->current_work = NULL;
            }
            cwq->run_depth--;
            spin_unlock_irq(&cwq->lock);
        }