深入理解linux内核链表

来源：互联网发布：阿里云租服务器多少钱编辑：程序博客网时间：2024/06/06 12:41

一、深入理解linux内核链表

通常，链表数据结构至少应包含两个域：数据域和指针域，数据域用于存储数据，针域用于建立与下一个节点的联系。但在Linux内核链表中，不是在链表结构中包含数据，而是在各种特定数据结构中包含链表节点。在linux中许多大规模的数据就是通过内嵌链表，将数据很好的组织起来的，给遍历，查询的相关处理提供了方便。

1、定义

所在文件：./include/linux/types.h
struct list_head {
struct list_head *next, *prev;
};

所在文件：./include/linux/list.h 。
/*以下的定义可以用来定义一个链表头，然后初始化，将表头的prev和next的指针都指向表头。*/
#define LIST_HEAD_INIT(name) { &(name), &(name) }
#define LIST_HEAD(name) \
struct list_head name = LIST_HEAD_INIT(name)
static inline void INIT_LIST_HEAD(struct list_head *list)
{
list->next = list;
list->prev = list;
}

2、对链表的一些操作接口定义：

a）插入
所在文件./kernel/workqueue.c
/*
参数：new：要插入的新链表
  prev：被插入的前指针
  next：被插入的后指针。
*/
static inline void __list_add(struct list_head *new,
         struct list_head *prev,
         struct list_head *next)
{
  next->prev = new;
  new->next = next;
  new->prev = prev;
  prev->next = new;
}

/*
   * 将一个新的链表插入到一个指定的链表后面，这非常适合堆栈的操作。
   */
static inline void list_add(struct list_head *new, struct list_head *head)
{
  __list_add(new, head, head->next);
}

/**
* 将一个新的链表插入到一个指定的链表前面，这非常适合堆栈的操作。
*/
static inline void list_add_tail(struct list_head *new, struct list_head *head)
{
__list_add(new, head->prev, head);
}
b）删除
static inline void __list_del_entry(struct list_head *entry)
{
__list_del(entry->prev, entry->next);
}

/*将entry 从链表中删除，这里并没有将entry的pre和next 都赋值为NULL，被剔除下来的prev、next指针分别被设为LIST_POSITION2和LIST_POSITION1两个特殊值，是为了保证不在链表中的节点项不可访问.因为系统对LIST_POSITION1和LIST_POSITION2的访问都将引起页故障。
*/
static inline void list_del(struct list_head *entry)
{
__list_del(entry->prev, entry->next);
entry->next = LIST_POISON1;
entry->prev = LIST_POISON2;
}

c) 搬移
Linux提供了将原本属于一个链表的节点移动到另一个链表的操作，并根据插入到新链表的位置分为两类：

static inline void list_move(struct list_head *list, struct list_head *head)
{ /*其实也就是调用了上面的删除操作，将list节点从链表中删除，然后将list节点插入head前面*/
__list_del_entry(list);/*其实也就是调用了上面的删除操作，然后将list插入到head中*/
list_add(list, head);
}
static inline void list_move_tail(struct list_head *list, struct list_head *head);
{
__list_del_entry(list);
list_add_tail(list, head);
}
例如:list_move(&new_sockopt.list,&nf_sockopts)会把new_sockopt从它所在的链表上删除，并将其再链入
nf_sockopts的表头。

d) 合并

除了针对节点的插入、删除操作，Linux链表还提供了整个链表的插入功能：
static inline void __list_splice(const struct list_head *list,
struct list_head *prev,
struct list_head *next)
{
struct list_head *first = list->next;
struct list_head *last = list->prev;

first->prev = prev;
prev->next = first;

last->next = next;
next->prev = last;
}

/**
* list_splice - join two lists, this is designed for stacks
* @list: the new list to add.
* @head: the place to add it in the first list.
*/
static inline void list_splice(const struct list_head *list,
struct list_head *head)
{
if (!list_empty(list))
__list_splice(list, head, head->next);
}

假设当前有两个链表，表头分别是list1和list2（都是struct list_head变量），当调用list_splice(&list1,&list2)时，只要list1非空，list1链表的内容将被挂接在list2链表上，位于list2和list2.next（原list2表的第一个节点）之间。新list2链表将以原list1表的第一个节点为首节点，而尾节点不变。

3、遍历

输入参数：pos：被循环访问的结构体
     head：结构体中内嵌的链表头
     member：就是链表的类型名，一般为list。
#define list_for_each_entry(pos, head, member)    \
for (pos = list_entry((head)->next, typeof(*pos), member); \
      prefetch(pos->member.next), &pos->member != (head); \
      pos = list_entry(pos->member.next, typeof(*pos), member))

要了解这个循环，得熟悉下面这个非常重要的定义：

/*container_of这个宏用处是：通过一个结构体的成员的地址来获取这个结构指针。
输入参数：ptr:指向成员变量的指针。
type：结构体的类型。
member：成员变量在结构体的名字。

在遍历过程中使用了prefetch，其功能是数据的预先读取。当你确认后继续要读取某内存的信息的时候，可以采用prefetch将这一块数据读取到cache之中，以便后继快速访问。我的理解是条件不止这些，如果后继读取的内容仅仅被访问一次，那么是否prefetch也就没有意义了。后继访问的内存单元多次被读写才有意义。对于上述的list操作，在循环内部往往还会访问next指向的structure的内容，因此访问多次是可以期待的。
在这里，typeof()是gcc的扩展，和sizeof()类似，比如：typeof（x）返回的是 x的类型。

*/
#define container_of(ptr, type, member) ({ \
const typeof( ((type *)0)->member ) *__mptr = (ptr); \
(type *)( (char *)__mptr - offsetof(type,member) );})

/*这个宏是获取MEMBER在类型为TYPE的结构体中的偏移，这个宏定义精妙就在于使用0来类型转换，0是这个结构体的起始地址，那么成员MEMBER的地址就是相对于TYPE结构体起始地址的偏移。
*/
#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)

/* 先定义一个__mptr变量，这个变量的类型为type类型结构体的成员member的类型，并把ptr赋值给它。这样就获取了这个成员的地址。
*/
const typeof( ((type *)0)->member ) *__mptr = (ptr);

/*用上面定义的__mptr变量的地址减去__mptr在类型为type的结构体中的偏移，那么就得到了外围结构体的地址，然后用（type*）进行强制转换，这样就获得了返回了外围结构体的指针*/
(type *)( (char *)__mptr - offsetof(type,member) );})

4. 遍历的安全保障（摘抄http://www.ibm.com/developerworks/cn/linux/kernel/l-chain/）

在并发执行的环境下，链表操作通常都应该考虑同步安全性问题，为了方便，Linux将这一操作留给应用自己处理。Linux链表自己考虑的安全性主要有两个方面：

a) list_empty()判断

基本的list_empty()仅以头指针的next是否指向自己来判断链表是否为空，Linux链表另行提供了一个list_empty_careful()宏，它同时判断头指针的next和prev，仅当两者都指向自己时才返回真。这主要是为了应付另一个cpu正在处理同一个链表而造成next、prev不一致的情况。但代码注释也承认，这一安全保障能力有限：除非其他cpu的链表操作只有list_del_init()，否则仍然不能保证安全，也就是说，还是需要加锁保护。

b) 遍历时节点删除

前面介绍了用于链表遍历的几个宏，它们都是通过移动pos指针来达到遍历的目的。但如果遍历的操作中包含删除pos指针所指向的节点，pos指针的移动就会被中断，因为list_del(pos)将把pos的next、prev置成LIST_POSITION2和LIST_POSITION1的特殊值。当然，调用者完全可以自己缓存next指针使遍历操作能够连贯起来，但为了编程的一致性，Linux链表仍然提供了两个对应于基本
遍历操作的"_safe"接口：list_for_each_safe(pos, n, head)、list_for_each_entry_safe(pos, n, head, member)，它们要
求调用者另外提供一个与pos同类型的指针n，在for循环中暂存pos下一个节点的地址，避免因pos节点被释放而造成的断链。

1.创建双向链表（doubly linkedlist）：

INIT_LIST_HEAD(struct list_head*list)

代码如下：

static inline void INIT_LIST_HEAD(struct list_head *list)
{
list->next = list;
list->prev = list;
}
将List的头和尾都指向自身。

2. 添加内容到双向链表：

2.1: 平常的添加：

2.1.1:将新项目添加到list的头部（head之后第一个位置）。注意，此处head是指此双向链表头。

void list_add(struct list_head *new, struct list_head *head)

将参数一（new）添加到head之后。它调用

__list_add(new, head,head->next);也就是说，把new添加到head和head->next之间。

static inline void __list_add(structlist_head *new,
struct list_head *prev,
struct list_head *next) //它只是将new添加到prev和next之间
{
next->prev = new;
new->next = next;
new->prev = prev;
prev->next = new;
}

2.1.2：将新项目添加双向链表最后一个位置（也就是head的priv）。注意此处head表示list头。

static inline void list_add_tail(structlist_head *new, struct list_head *head)
{
__list_add(new, head->prev,head);
}

则将new添加到head->prev和head之间了。

2.2:读拷贝更新（rcu）模式的添加（smp_wmb()）（请看背景知识）

2.2.1: 将新项目加到以知的prev和next之间：

static inline void __list_add_rcu(structlist_head * new,
struct list_head * prev, structlist_head * next)
{
new->next = next;
new->prev = prev;
smp_wmb();
next->prev = new;
prev->next = new;
}//此处注意：smp_wmb();smp_wmb()防止编译器和CPU优化代码执行的顺序。在这里，smp_wmb保证在它之前的两行代码执行完了之后再执行后两行

2.2.2:将新项目添加到list的头部（head之后第一个位置）。注意，此处head是指此双向链表头。

static inline void list_add_rcu(structlist_head *new, struct list_head *head)
{
__list_add_rcu(new, head,head->next);
}

2.2.3：将新项目添加双向链表最后一个位置（也就是head的priv）。注意此处head表示list头。staticinline void list_add_tail_rcu(struct list_head *new,
structlist_head *head)
{
__list_add_rcu(new, head->prev,head);
}

3. 从双向链表删除项目：

3.1:基本删除函数：

static inline void __list_del(structlist_head * prev, struct list_head * next)
{
next->prev = prev;
prev->next = next;
}//只是将前一个和后一个互指

3.2:删除指定项：

static inline void list_del(structlist_head *entry)
{
__list_del(entry->prev,entry->next);
entry->next = LIST_POISON1;
entry->prev = LIST_POISON2;
}

3.3: 安全的删除指定项：

static inline void list_del_rcu(structlist_head *entry)
{
__list_del(entry->prev,entry->next);
entry->prev = LIST_POISON2;
}

此处Sam并不很清楚怎么回事。

3.4：删除并初始化某一项：

static inline void list_del_init(structlist_head *entry)
{
__list_del(entry->prev,entry->next);
INIT_LIST_HEAD(entry);
}

4.替换某项：

4.1 使用new 替换 old:

static inline void list_replace(structlist_head *old,
structlist_head *new)
{
new->next =old->next;
new->next->prev =new;
new->prev =old->prev;
new->prev->next =new;
}

4.2 替换并初始化：

static inline voidlist_replace_init(struct list_head *old,
structlist_head *new)
{
list_replace(old, new);
INIT_LIST_HEAD(old);
}

4.3：安全替换：

static inline void list_replace_rcu(structlist_head *old,
structlist_head *new)
{
new->next =old->next;
new->prev =old->prev;
smp_wmb();
new->next->prev =new;
new->prev->next =new;
old->prev = LIST_POISON2;
}

5. 移动项：

5.1移动到头部

static inline void list_move(structlist_head *list, struct list_head *head)
{
__list_del(list->prev,list->next);
list_add(list, head);
}

5.2移动到尾部
static inline void list_move_tail(structlist_head *list,
struct list_head *head)
{
__list_del(list->prev,list->next);
list_add_tail(list, head);
}

6.测试项目是否为最后一项：

static inline int list_is_last(conststruct list_head *list,
conststruct list_head *head)
{
return list->next == head;
}

7. 测试list是否为空：

static inline int list_empty(const structlist_head *head)
{
return head->next == head;
}

8. 两个链表连接起来：

8.1：将list链表连接如head链表头部：

static inline void __list_splice(structlist_head *list,
struct list_head *head)
{
struct list_head *first =list->next;
struct list_head *last =list->prev;
struct list_head *at =head->next;

first->prev =head;
head->next = first;

last->next = at;
at->prev = last;
}

8.2：连接

static inline void list_splice(structlist_head *list, struct list_head *head)
{
if (!list_empty(list))
__list_splice(list,head);
}

8.3：连接并初始化：

将list连接到head头部，再将list初始化：

static inline void list_splice_init(structlist_head *list,
       struct list_head *head)
{
if (!list_empty(list)) {
  __list_splice(list,head);
  INIT_LIST_HEAD(list);
}
}

9.一些有用的宏：

9.1得到 list_entry(ptr, type, member)

简单的讲，这个宏的作用是：通过结构（type）中的某个变量(member)的指针（ptr）获取结构本身的指针.

也就是说，type中包含一个成员变量member.且某个结构体实体中member的指针为ptr.则list_entry（）则返回的是：这个结构体实体的指针。至于如何做到的，请看背景知识3---container_of。

9.2:list_first_entry(ptr, type,member)
得到ptr链表中下一个的struct的实体。

9.3: list_for_each(pos,head)

#define list_for_each(pos, head) /
for (pos = (head)->next;prefetch(pos->next), pos != (head); /
pos = pos->next)

它其实就是一个for循环，循环双向链表一圈。

prefetch()是档案快取技术，不用深究。

下面几个宏与之类似：

__list_for_each(pos, head) //不用档案快取技术的循环

list_for_each_prev(pos, head) //向前循环

9.4: list_for_each_entry(pos,head, member)

这个宏是双向链表中最常用的，也是最有用的。表示从以head为头的双向循环列表中，一个一个拿出包含此list项目的结构体（pos的类型），并放到pos中。

#define list_for_each_entry(pos, head,member)    /
for (pos =list_entry((head)->next, typeof(*pos),member); /
     prefetch(pos->member.next),&pos->member != (head); /
     pos = list_entry(pos->member.next, typeof(*pos),member))

因为有上面list_entry()的铺垫，所以非常简单。

参数一：pos就是一个结构体指针。这个结构体中会包含成员变量member.

参数二：head就是一个双向链表头。

参数三：pos结构体中的成员变量名。

pos = list_entry((head)->next, typeof(*pos),member)：pos得到双向链表中第一个链表被包含的结构体实体。

&pos->member !=(head)：此结构体中的链表不是头。

pos = list_entry(pos->member.next, typeof(*pos),member): pos得到双向链表中下一个结构体实体。

Linux kernel 中双向循环链表的使用：

在Linux内核链表中，需要用链表组织起来的数据通常会包含一个structlist_head成员，结构都通过这个list成员组织在一个链表中。

例如：在hid-core.c中，要组织一个report链表。

于是，首先使用

1）

INIT_LIST_HEAD（&device->report_enum[i].report_list）

struct hid_report {
struct list_head list;
unsignedid;
unsignedtype;
struct hid_field*field[HID_MAX_FIELDS];
unsignedmaxfield;
unsignedsize;
struct hid_device*device;
};

这就是需要用链表组织起来的数据通常会包含一个struct list_head成员。

2）。

list_add_tail(&report->list,&report_enum->report_list);

将report类型的项目添加到刚才初始化的list中。

3).

list_for_each_entry(report,&hid->report_enum[HID_INPUT_REPORT].report_list,list)

遍历 hid->report_enum[HID_INPUT_REPORT].report_list，从其中一个一个得到report.放到report中。

背景知识：

背景知识一：typeof:

typeof不是标准C的运算符，这是gcc的一个扩展.

它与sizeof() 语义类似，sizeof(exp)代表返回exp长度。则typeof(exp)返回的事exp类型。

例1：

int a;

typeof(&a) b;

因为a 为int型。所以&a为int*.

也就是说b 为int* 类型。

例2：

typedef struct

{

int size;

char t;

} ngate, *pngate;

typeof(((ngate *)0)->t) w;

这其实就是表示，w 的类型为：ngate的t的类型。

在这里0并不是真正的变量，可以把它理解为一个替代使用的符号。其意思更可以理解为一个被赋值了的变量，这个数可以不是0，，随便什么数字都可以。

背景知识二：offsetof

kernel中定义如下：

#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)

与上面所以类似，（TYPE *）0 表示：0是指向TYPE的指针。

则 &（TYPE *）0->MEMBER表示：TYPE类型的实体0的变量MEMBER的地址，因为从0开始，所以它的地址就成为offset.再用size_t强制转换，就是从struct头到成员变量MEMBER的offset.

背景知识三：container_of(ptr, type,member)

Kernel中如下定义：

#define container_of(ptr, type, member)({ /
const typeof( ((type *)0)->member) *__mptr = (ptr); /
(type *)( (char *)__mptr - offsetof(type,member));})

（type *）0: 表明某个实体为type类型的。

(（type *）0)->member表明这个实体的某个成员变量。

typeof((（type *）0)->member) *__mptr表明定了一个指向此成员变量类型的指针。

offsetof(type,member)表明成员变量member到结构体类型type头的offset.

(type *)( (char*)__mptr - offsetof(type,member)则表明：返回的是一个指向type的指针，此指针指向一个type类型的实体。而参数ptr则是这个实体中的某一个成员变量位置。

背景知识四：RCU（Read-CopyUpdate）

RCU是2.5/2.6内核中引入的新技术，它通过延迟写操作来提高同步性能。

系统中数据读取操作远多于写操作，而rwlock机制在smp环境下随着处理机增多性能会迅速下降。针对这一应用背景，IBMLinux技术中心的Paul E.McKenney提出了"读拷贝更新"的技术，并将其应用于Linux内核中。RCU技术的核心是写操作分为写-更新两步，允许读操作在任何时候无阻访问，当系统有写操作时，更新动作一直延迟到对该数据的所有读操作完成为止。