ovs记录

来源：互联网发布：js中的event对象编辑：程序博客网时间：2024/05/02 04:52

kthread_create与kernel_thread的区别

从表面上来看，这两个函数非常的类似，但是实现却是相差甚远。
kthread_create是通过work_queue来实现的，kernel_thread是通过do_fork来实现的。

kernel thread可以用kernel_thread创建，但是在执行函数里面必须用daemonize释放资源并挂到init下，还需要用 completion等待这一过程的完成。

kthread_create是比较正牌的创建函数，这个不必要调用daemonize，用这个创建的kernel thread都挂在了kthread线程下。

可以在非内核线程中调用kernel_thread, 但这样创建的线程必须在自己调用daemonize(...)来释放资源，成为真正的内核线程。

#include <linux/kernel.h>
#include <linux/module.h>
static int noop(void *dummy)
{
        int i = 0;
        daemonize("mythread");
        while(i++ < 5) {
                printk("current->mm = %p\n", current->mm);
                printk("current->active_mm = %p\n", current->active_mm);
                set_current_state(TASK_INTERRUPTIBLE);
                schedule_timeout(10 * HZ);
        }
        return 0;
}
static int test_init(void)
{
        kernel_thread(noop, NULL, CLONE_KERNEL | SIGCHLD);
        return 0;
}
static void test_exit(void) {}
module_init(test_init);
module_exit(test_exit);

”mythread“就是给这个内核线程取的名字, 可以用ps -A来查看。
schedule()用于进程调度，可以理解为放弃CPU的使用权.

kthread_create创建线程

1 使用kthread_create创建线程：
struct task_struct *kthread_create(int (*threadfn)(void *data),

                                            void *data,
                                            const char *namefmt, ...);
这个函数可以像printk一样传入某种格式的线程名
线程创建后，不会马上运行，而是需要将kthread_create() 返回的task_struct指针传给wake_up_process()，然后通过此函数运行线程。
2. 当然，还有一个创建并启动线程的函数：kthread_run
   struct task_struct *kthread_run(int (*threadfn)(void *data),
                                    void *data,
                                    const char *namefmt, ...);
3. 线程一旦启动起来后，会一直运行，除非该线程主动调用do_exit函数，或者其他的进程调用kthread_stop函数，结束线程的运行。
    int kthread_stop(struct task_struct *thread);
kthread_stop() 通过发送信号给线程。
如果线程函数正在处理一个非常重要的任务，它不会被中断的。当然如果线程函数永远不返回并且不检查信号，它将永远都不会停止。
参考：Kernel threads made easy

view plaincopy to clipboardprint?
#include <linux/kthread.h>
static struct task_struct * _task;
static struct task_struct * _task2;
static struct task_struct * _task3;
static int thread_func(void *data)
{
        int j,k;
        int timeout;
        wait_queue_head_t timeout_wq;
        static int i = 0;
        i++;
        j = 0;
        k = i;
        printk("thread_func %d started\n", i);
        init_waitqueue_head(&timeout_wq);
        while(!kthread_should_stop())
        {
                interruptible_sleep_on_timeout(&timeout_wq, HZ);
                printk("[%d]sleeping..%d\n", k, j++);
        }
        return 0;
}
void my_start_thread(void)
{

        //_task = kthread_create(thread_func, NULL, "thread_func2");
        //wake_up_process(_task);
        _task = kthread_run(thread_func, NULL, "thread_func2");
        _task2 = kthread_run(thread_func, NULL, "thread_func2");
        _task3 = kthread_run(thread_func, NULL, "thread_func2");
        if (!IS_ERR(_task))
        {
                printk("kthread_create done\n");
        }
        else
        {
                printk("kthread_create error\n");
        }
}
void my_end_thread(void)
{
        int ret = 0;
        ret = kthread_stop(_task);
        printk("end thread. ret = %d\n" , ret);
        ret = kthread_stop(_task2);
        printk("end thread. ret = %d\n" , ret);
        ret = kthread_stop(_task3);
        printk("end thread. ret = %d\n" , ret);
}
#include <linux/kthread.h>
static struct task_struct * _task;
static struct task_struct * _task2;
static struct task_struct * _task3;
static int thread_func(void *data)
{
        int j,k;
        int timeout;
        wait_queue_head_t timeout_wq;
        static int i = 0;
        i++;
        j = 0;
        k = i;
        printk("thread_func %d started\n", i);
        init_waitqueue_head(&timeout_wq);
        while(!kthread_should_stop())
        {
                interruptible_sleep_on_timeout(&timeout_wq, HZ);
                printk("[%d]sleeping..%d\n", k, j++);
        }
        return 0;
}
void my_start_thread(void)
{

        //_task = kthread_create(thread_func, NULL, "thread_func2");
        //wake_up_process(_task);
        _task = kthread_run(thread_func, NULL, "thread_func2");
        _task2 = kthread_run(thread_func, NULL, "thread_func2");
        _task3 = kthread_run(thread_func, NULL, "thread_func2");
        if (!IS_ERR(_task))
        {
                printk("kthread_create done\n");
        }
        else
        {
                printk("kthread_create error\n");
        }
}
void my_end_thread(void)
{
        int ret = 0;
        ret = kthread_stop(_task);
        printk("end thread. ret = %d\n" , ret);
        ret = kthread_stop(_task2);
        printk("end thread. ret = %d\n" , ret);
        ret = kthread_stop(_task3);
        printk("end thread. ret = %d\n" , ret);
}

在执行kthread_stop的时候，目标线程必须没有退出，否则会Oops。原因很容易理解，当目标线程退出的时候，其对应的task结构也变得无效，kthread_stop引用该无效task结构就会出错。

为了避免这种情况，需要确保线程没有退出，其方法如代码中所示：

thread_func()

{

// do your work here

// wait to exit

while(!thread_could_stop())

{

wait();

}

exit_code()

{

kthread_stop(_task); //发信号给task，通知其可以退出了

}

这种退出机制很温和，一切尽在thread_func()的掌控之中，线程在退出时可以从容地释放资源，而不是莫名其妙地被人“暗杀”

wait _event_interruptible()函数分析

1 realted structures
Linux-2.6提供如下关于等待队列的操作:
(1) 定义"等待队列头",
wait_queue_head_t my_queue;

defined in linux/wait.h

50 struct __wait_queue_head {
51 spinlock_t lock;
52 struct list_head task_list;
53 };
54 typedef struct __wait_queue_head wait_queue_head_t;

(2) 初始化"等待队列头"
init_waitqueue_head(&my_queue);

defined in linux/wait.c header file

13 void init_waitqueue_head(wait_queue_head_t *q)
14 {
15 spin_lock_init(&q->lock);
16 INIT_LIST_HEAD(&q->task_list);
17 }

定义和初始化的快捷方式:
DECLARE_WAIT_QUEUE_HEAD(my_queue);

linux/wait.h

70 #define __WAIT_QUEUE_HEAD_INITIALIZER(name) { /
71 .lock = __SPIN_LOCK_UNLOCKED(name.lock), /
72 .task_list = { &(name).task_list, &(name).task_list } }
74 #define DECLARE_WAIT_QUEUE_HEAD(name) /
75 wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)

(3) 定义等待队列
DECLARE_WAITQUEUE(name, tsk);
定义并初始化一个名为name的等待队列(wait_queue_t);

linux/wait.h
32 struct __wait_queue {
33 unsigned int flags;
34 #define WQ_FLAG_EXCLUSIVE 0x01
35 void *private;
36 wait_queue_func_t func;
37 struct list_head task_list;
38 };

28 typedef struct __wait_queue wait_queue_t;

62 #define __WAITQUEUE_INITIALIZER(name, tsk) { /
63 .private = tsk, /
64 .func = default_wake_function, /
65 .task_list = { NULL, NULL } }
66
67 #define DECLARE_WAITQUEUE(name, tsk) /
68 wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)

2 specific analysis

wait_event_interruptible()。该函数修改task的状态为TASK_INTERRUPTIBLE，意味着改进程将不会继续运行直到被唤醒，然后被添加到等待队列wq中。

在wait_event_interruptible()中首先判断condition是不是已经满足，如果是则直接返回0，否则调用__wait_event_interruptible()，并用__ret来存放返回值

---------------------------------------------------------------

#define wait_event_interruptible(wq, condition) /

({ /

int __ret = 0; /

if (!(condition)) /

__wait_event_interruptible(wq, condition, __ret);/

__ret; /

})

wait_event_interruptible() --> __wait_event_interruptible()

__wait_event_interruptible()首先定义并初始化一个wait_queue_t变量__wait，其中数据为当前进程current，并把__wait入队。

在无限循环中，__wait_event_interruptible()将本进程置为可中断的挂起状态，反复检查condition是否成立，如果成立
则退出，如果不成立则继续休眠；条件满足后，即把本进程运行状态置为运行态，并将__wait从等待队列中清除掉，从而进程能够调度运行。如果进程当前有
异步信号(POSIX的)，则返回-ERESTARTSYS。

----------------------------------------------------------------

#define __wait_event_interruptible(wq, condition, ret) /

do { /

DEFINE_WAIT(__wait); /

for (;;) { /

prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); /

if (condition) /

break; /

if (!signal_pending(current)) { /

schedule(); /

continue; /

} /

ret = -ERESTARTSYS; /

break; /

} /

finish_wait(&wq, &__wait); /

} while (0)

__wait_event_interruptible() --> DEFINE_WAIT(name)

/usr/src/linux-2.6.21.5/include/linux/wait.h

---------------------------------------------------------

#define DEFINE_WAIT(name) /

wait_queue_t name = { /

.private = current, /

.func = autoremove_wake_function, /

.task_list = LIST_HEAD_INIT((name).task_list), -/

}

wait_queue_t

---------------------------------------------------------

typedef struct __wait_queue wait_queue_t;

struct __wait_queue {

unsigned int flags;

#define WQ_FLAG_EXCLUSIVE 0x01

void *private;

wait_queue_func_t func;

struct list_head task_list;

};

__wait_event_interruptible() --> prepare_to_wait()

void fastcall
prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
{
unsigned long flags;
wait->flags &= ~WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
if (list_empty(&wait->task_list))
__add_wait_queue(q, wait);
if (is_sync_wait(wait))
set_current_state(state);
spin_unlock_irqrestore(&q->lock, flags);
}

124 static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
125 {
126 list_add(&new->task_list, &head->task_list);
127 }

here, we can see clearly that that function is just simply to link the variable
wait (type of wait_queue_t) into the variable p(type of wait_queue_head_t).

description of list_add:
that essentially invokes the __list_add(new , head, head->next), which implements the task
of inserting new between head and head->next

104 void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
105 {
106 unsigned long flags;
107
108 __set_current_state(TASK_RUNNING);
109 /*
110 * We can check for list emptiness outside the lock
111 * IFF:
112 * - we use the "careful" check that verifies both
113 * the next and prev pointers, so that there cannot
114 * be any half-pending updates in progress on other
115 * CPU's that we haven't seen yet (and that might
116 * still change the stack area.
117 * and
118 * - all other users take the lock (ie we can only
119 * have _one_ other CPU that looks at or modifies
120 * the list).
121 */
122 if (!list_empty_careful(&wait->task_list)) {
123 spin_lock_irqsave(&q->lock, flags);
124 list_del_init(&wait->task_list);
125 spin_unlock_irqrestore(&q->lock, flags);
126 }
127 }
128 EXPORT_SYMBOL(finish_wait);

struct hlist_head {
     struct hlist_node *first;
};
struct hlist_node {
     struct hlist_node *next, **pprev;
};
一般hash表都比较大，所以设计成单链表能节省几乎一半空间。这并不奇怪。奇怪的是hlist_node的pprev域。pprev域被设计成hlist_node的二级指针，而且指向前一个节点的next域（或者对于第一个节点，指向表头的first域）。为什么要设计成这样？
考虑普通的单链表，如下：
struct node {
     struct node *next;
};
对于这样的链表，在指定节点(a)后插入节点(b)很容易：b->next = a->next; a->next = b;
但是要在指定节点（a）前插入（b）则很麻烦：先通过while(i->next == a) i = i->next;得到a的前一个节点，然后再进行i->next = b; b->next = a;进行插入；这就花费一定的时间用来搜索a的前一个节点。
linux要节省这部分时间。分析一下，搜索的目的是修改上一个节点的next域。为了省去搜索，hlist_node便增加了pprev域指向上一个节点的next域。要修改上一个节点的next，可以同*pprev ＝？快速的修改。
总结一下，引入pprv域是把单链表的前插操作最优。

Linux里实现了一种用于hash表的链表头结构hlist_head.

struct hlist_head {
struct hlist_node *first;
};

struct hlist_node {
struct hlist_node *next, **pprev;
};

因为hash表头数组是一次性分配的,表头体积减小50%也就意味着,整个hash表为空时,体积减小了一半.
基于hash表多数情况下是稀疏的,这么做很大地改善了空间利用率.

但代价也是明显的,作为hash表项的链表变成了单链表,无法在O(1)时间内找到表尾元素,也就不支持表尾的删除和添加.

链表本身的体积并没有减小. 只是访问方式不同了. 之所以在单链表里保存两个指针,就是为了支持在链表中间删除节点.
这里的pprev指针指向的就是前一个节点的next,所以它是个 **
这样,只要你有某个节点的指针,不需要求hash值和查找,就可以将他从单链表中删除,也即从hash表中删除.

linux内核中的Hlist与List_head结构

List_Head

操作系统内核经常需要维护数据结构。内核有标准的循环链表、双向链表的实现。在<Linux/list.h>文件中定义了一个list_head类型简单结构：

struct list_head {

struct list_head *next, *prev;

};

通用链表的常用用途是将某一个数据结构本身串成链表，或将某些链表与一个数据结构联系起来，这两种情况实质上都是由结构list_head组成链表，只是list_head所“背负”的负载不一样。下面分别举例说明这两种用途。

以下示例说明了如何将某一个数据结构本身串成链表，并对链表进行操作，同时还说明list_head结构的实现与使用。

示例：将某一个数据结构本身串成链表。

（1）加入list_head结构成员。

假设有一个example_struct结构需连接成链表，因而在其结构里面加上list_head成员，就组成了结构链表，如下：

struct example_struct {

struct list_head list;

int priority;

……//其他成员

};

在example_struct结构中的list成员，用来将example_struct结构串成链表。可理解为list_head“背负”的负载是example_struct结构。

（2）创建list_head结构。

使用前必须申请链表头并用 INIT_LIST_HEAD 宏来初始化链表头。可使用两种方法。

方法1：

struct list_head example_list;

INIT_LIST_HEAD(&example_list);

方法2：

LIST_HEAD(example_list);

其中，这两个宏在include/Linux/list.h中定义如下：

#define LIST_HEAD(name) /

struct list_head name = LIST_HEAD_INIT(name)

#define INIT_LIST_HEAD(ptr) do { /

(ptr)->next = (ptr); (ptr)->prev = (ptr); /

} while (0)

宏定义INIT_LIST_HEAD初始化了链表头，即向前、向后的指针都指向链表头。这样，就已初始化了一个example_list的链表头，以后就可以向链表中增加链表元素了。

（3）链表与用户结构连接。

list_entry宏将exmplelist链表与exmple_struct结构类型连接起来。

有两项链表的链表头

List_entry宏的效果

含list_head的定制结构

list_head结构

空链表

<linux/list. h>中的链表

下面这个代码行就是从examplelist链表中得到节点对应的example_struct结构指针，其中ptr是examplelist链表中的指针，如ptr = examplelist->next。

struct example_struct *node =

list_entry(ptr, struct example_struct, list);

在上面代码行中的宏定义list_entry将一个 list_head结构指针映射回一个指向结构example_struct的指针，即得到list_head的宿主结构。下面分析这个宏定义（在include/Linux/list.h中）：

#define list_entry(ptr, type, member) /

container_of(ptr, type, member)

list_entry的功能是得到链表中节点的结构，它的参数含义为：

ØØ ptr是链表中的一个struct list_head结构元素指针。

ØØ type是用户定义的结构类型，其中，包含struct list_head结构成员。

ØØ member用户定义结构中的struct list_head结构成员名字。

在include/Linux/kernel.h中有container_of的定义，参数含义与list_entry中一致，container_of得到list的容器结构，即含有list成员的结构type。container_of的定义如下：

#define container_of(ptr, type, member) ({ /

　　　　//将链表中的元素ptr转换成结构type中成员member的类型

const typeof( ((type *)0)->member ) *__mptr = (ptr); /

　　　　//__mptr减去member成员偏移地址正好是type结构地址

(type *)( (char *)__mptr - offsetof(type,member) );})

在include/Linux/stddef.h中有宏offsetof的定义，列出如下：

#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)

offsetof宏对于上述示例的展开及分析是：&((struct example_struct *)0)->list表示当结构example_struct正好在地址0上时其成员list的地址，即成员位移。

（4）遍历链表

下面使用list_entry 宏遍历链表得到链表指针，再从链表指针映射回对应结构example_struct的指针。然后，对其成员priority进行操作。函数example_add_entry的功能是给链表加入新的结构成员。

void example_add_entry(struct example_struct *new)

{

struct list_head *ptr;

struct example_struct *entry;

　　//遍历链表

for (ptr = exmple_list.next; ptr != &exmple_list; ptr = ptr->next) {

//映射回对应结构example_struct的指针

entry = list_entry(ptr, struct todo_struct, list);

if (entry->priority < new->priority) {

list_add_tail(&new->list, ptr);

return;

}

list_add_tail(&new->list, &exmple_struct)

}

示例：将某些链表与一个数据结构联系起来。

函数new_inode为给定的超级块分配一个新的节点，并将新的节点加到链表inode_in_use和sb->s_inodes中，从而在两个链表中链接了新的节点。一个是以inode_in_use为链表头的全局的节点链表；一个是超级块结构为链表头的节点链表。

fs/inode.c

extern struct list_head inode_in_use;

struct inode *new_inode(struct super_block *sb)

{

static unsigned long last_ino;

struct inode * inode;

spin_lock_prefetch(&inode_lock);

inode = alloc_inode(sb);

if (inode) {

spin_lock(&inode_lock);

inodes_stat.nr_inodes++;

　　　　　//将inode加到inode_in_use链表中

list_add(&inode->i_list, &inode_in_use);

list_add(&inode->i_sb_list, &sb->s_inodes);　//将inode加到超级块的节点链表中

inode->i_ino = ++last_ino;

inode->i_state = 0;

spin_unlock(&inode_lock);

}

return inode;

……

s_inodes

i_sb_list

list_head

*next

*prev

*next

Super_block

inode

}

在include/Linux/list.h中还定义了下面操作链表的函数。

ØØ list_add(struct list_head *new, struct list_head *head)；这个函数在链表头后面添加新的元素。如果是在链表的头部添加元素，就可以用来建立栈。还需要注意的是，head并不一定非得是链表的第一项，如果传递了一个恰巧位于链表中间某处的list_head结构，新入口会立即排在它的后面。因为Linux链表是循环的，链表头通常与其他入口没有本质区别。

ØØ list_add_tail(struct list_head *new, struct list_head *head)；在给定链表头的前面增加一个新的元素，即在链表的末尾添加。可使用list_add_tail建立“先入先出”队列。

ØØ list_del(struct list_head *entry)；从链表中将给定的入口删除。

ØØ list_empty(struct list_head *head)；如果给定的链表是空的，就返回一个非零值。

ØØ list_splice(struct list_head *list, struct list_head *head)；这个函数通过在head的后面插入list来合并两个链表。

Hlist

在include/Linux/list.h中有list链表与hlist哈希链表结构的定义，下面都列出它们的定义，可以对比一下：

struct list_head {struct list_head *next, *prev; };

struct hlist_head { struct hlist_node *first; };

struct hlist_node { struct hlist_node *next, **pprev; };

双头（next，prev）的双链表对于Hash表来说“过于浪费”，因而另行设计了一套Hash表专用的hlist数据结构——单指针表头双循环链表，hlist的表头仅有一个指向首节点的指针，而没有指向尾节点的指针，这样在可能是海量的Hash表中存储的表头就能减少一半的空间消耗。

pprev因为hlist不是一个完整的循环链表而不得不使用。在list中，表头和节点是同一个数据结构，直接用prev没问题；在hlist中，表头没有prev，也没有next，只有一个first。为了能统一地修改表头的first指针，即表头的first指针必须修改指向新插入的节点，hlist就设计了pprev。hlist节点的pprev不再是指向前一个节点的指针，而是指向前一个节点（可能是表头）中的next（对于表头则是first）指针（struct list_head **pprev），从而在表头插入的操作可以通过一致的“*(node->pprev)”访问和修改前节点的next（或first）指针。

下面是hlist中常用的几个宏：

#define HLIST_HEAD_INIT { .first = NULL }

#define HLIST_HEAD(name) struct hlist_head name = { .first = NULL }

#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)

#define INIT_HLIST_NODE(ptr) ((ptr)->next = NULL, (ptr)->pprev = NULL)

下面只列出hlist_add_before操作函数，其他hlist链表操作函数操作方法类似。这个函数中的参数next不能为空。它在next前面加入了n节点。函数的实现与list中对应函数类似。

static inline void hlist_add_before(struct hlist_node *n, struct hlist_node *next)

{

n->pprev = next->pprev;

n->next = next;

next->pprev = &n->next;

*(n->pprev) = n;

}

为了更好的得到问题解决，也对问题的提出进点义务，在抛出我的问题前先简单介绍一下Linux下slab高速缓存的使用方法。

在内核编程中，可能经常会有一些数据结构需要反复使用和释放，按照通常的思路，可能是使用kmalloc和kfree来实现。
但是这种方式效率不高，Linux为我们提供了更加高效的方法——Slab高速缓存管理器
通过先使用kmem_cache_create函数创建一个高速缓存的头指针——在内核中是struct kmem_cache结构，具体用法可以这样：

struct kmem_cache *cachep = NULL;

cachep = kmem_cache_create("cache_name", sizeof(struct yourstruct), 0, SLAB_HWCACHE_ALIGN,NULL, NULL);

这样我们就获得了一个可用的cachep头指针。

当需要分配一个structyourstruct的结构体空间时，我们只需要调用kmem_cache_alloc函数，就可以获得一个足够我们使用的空间的指针（为什么我要说足够呢？因为刚才的声明中我使用了一个标志——SLAB_HWCACHE_ALIGN，这个标志会让分配的空间对于硬件来说是对齐的，而不一定恰好等于sizeof(struct yourstruct)的结果）。范例代码如下：

struct yourstruct *bodyp = NULL;

bodyp = (struct yourstruct *) kmem_cache_alloc(cachep, GFP_ATOMIC & ~__GFP_DMA);

这样就可以使用bodyp指针所对应的空间存贮你需要的结构体信息了。
当用完结束后，我们需要释放空间，如何操作呢？代码很简单：

kmem_cache_free(cachep, bodyp);