Linux内核链表实现剖析

来源：互联网发布：iphone清除数据后开机编辑：程序博客网时间：2024/04/27 20:27

Linux内核使用环形双向链表，无所谓头结点和尾节点。

内核链表详细信息见 include/ linux / list.h 。

1. 定义和初始化内核链表

struct list_head {

struct list_head *prev, *next ;

} ;

list_head不包含数据，一般内嵌于其它数据结构中。

定义list_head

struct my_struct {

struct list_head list ;

unsigned long dog ;

void *cat ;

} ;

静态初始化链表

struct my_struct *p ;

p = kmalloc( sizeof(struct my_struct) , GFP_KERNEL) ;

if ( !p )

return -ENOMEM ;

p->dog = 0 ;

p->cat = NULL ;

p->list = LIST_HEAD_INIT( p->list ) ; // #define LIST_HEAD_INIT(name) { &(name), &(name) }

或者

struct my_struct mine = {

.list = LIST_HEAD_INIT( mine->list ) ;

.dog = 0 ;

.cat = NULL ;

} ;

或者

static LIST_HEAD( list ) ; //定义加初始化

#define LIST_HEAD(name) \
struct list_head name = LIST_HEAD_INIT(name)

动态初始化链表

struct my_struct *p ;

..................

INIT_LIST_HEAD( &p->list ) ;

//函数说明

static inline void INIT_LIST_HEAD(struct list_head *list)
{
list->next = list;
list->prev = list;
}

2. 操作链表

2.1判断链表

/**

*list_empty - test whether a list is empty

*@head : the list to test

static inline int list_empty( const struct list_head * head )

{

return head->next == head ;

}

/**

* list_is_last - test whether @list is the last entry in list @head

* @list : the entry to test

* @head : the head of the list

static inline int list_is_last( const struct list_head *list ,

const struct list_head *head )

{

return list->next == head ;

}

2.2插入

/**

*Insert a new entry between two known consecutive entries.

*This is only for internal list manipulation where we know

*the prev/next entries already!

static inline void __list_add( struct list_head *new ,

struct list_head *prev ,

struct list_head *next )

{

next->prev = new ;

new->next = next ;

new->prev = prev ;

prev->next = new ;

}

/**

*list_add - add a new entry

*@new : new entry to be added

*@head : list head to add it after

*Insert a new entry after the specified head .

*This is good for implementing stacks .

static inline void list_add( struct list_head * new ,struct list_head *new )

{

__list_add( new , head , head->next ) ;

}

/**

* list_add_tail - add a new entry

* @new : new entry to be added

* @head : list head to add it before

* Insert a new entry before the specified head .

* This is useful for implementing queues .

static inline void list_add_tail ( struct list_head *new , struct list_head *head )

{

__list_add( new , head->prev , head ) ;

}

对链表的插入有两种：在表头插入和在表尾插入。

例：

struct my_struct new_my_struct ;

/* 初始化new_my_struct */

LIST_HEAD(list) ;

list_add ( &new_my_struct . list , &list ) ;

2.3 删除

/**

* Delete a list entry by making the prev / next entries

* point to each other.

* This is only for internal list manipulation where we know

* the prev/next entries already!

static inline void __list_del( struct list_head * prev , struct list_head * next )

{

next->prev = prev ;

prev->next = next ;

}

/**

* list_del - deletes entry from list.

* @entry : the element to delete from the list.

* Note : list_empty() on entry does not return true after this , the entry is

* in an undefined state.

static inline void list_del( struct list_head *entry )

{

__list_del( entry->prev , entry->next ) ;

entry->next = LIST_POISON1 ;

entry->prev = LIST_POISON2 ;

}

例：

list_del ( &new_my_struct . list ) ;

2.4 替换

/**

* list_replace - replace old entry by new one

* @old : the element to be replaced

* @new : the new element to insert

* If @old was empty , it will be overwritten. ？不会oops？

static inline void list_replace( struct list_head *old ,

struct list_head *new)

{

new->next = old->next ;

new->next->prev = new ;

new->prev = old->prev ;

new->prev->next = new ;

}

static inline void list_replace_init ( struct list_head *old ,

struct list_head *new )

{

list_replace( old , new ) ;

INIT_LIST_HEAD( old ) ;

}

/**

* list_del_init - deletes entry from list and reinitialize it .

* @entry : the element to delete from the list .

static inline void list_del_init ( struct list_head *entry )

{

__list_del( entry->prev , entry->next ) ;

INIT_LIST_HEAD( entry ) ;

}

2.5 搬移

/**

* list_move - delete from the list and add as another's head

* @list : the entry to move

* @head : the head that will precede our entry

static inline void list_move ( struct list_head *list , struct list_head *head )

{

__list_del( list->prev , list->next ) ;

list_add( list , head ) ;

}

/**

* list_move_tail - delete from one list and add as another's tail

* @list : the entry to move

* @head : the head that will follow our entry

static inline void list_move_tail(struct list_head *list ,

struct list_head *head )

{

__list_del(list->prev , list->next ) ;

list_add_tail( list , head ) ;

}

2.6合并

把第一个链表合并到第二个链表。注意，第一个链表首节点被放弃。

/**

* list_splice - join two lists , this is designed for stacks

* @list : the new list to add.

* @head : the place to add the first list.

static inline void list_splice( const struct list_head *list ,

struct list_head *head )

{

if( !list_empty( list ) )

__list_splice( list , head , head->next ) ;

}

static inline void __list_splice( const struct list_head *list ,

struct list_head *prev ,

struct list_head *next )

{

struct list_head *first = list->next ;

struct list_head *last = list->prev ;

first->prev = prev ;

prev->next = first ;

last->next = next ;

next->prev = last ;

}

/**

* list_splice_tail - join two lists , each list being a queue

* @list : the new list to add.

* @head : the place to add it in the first list .

static inline void list_splice_tail( struct list_head *list ,

struct list_head *head )

{

if( ! list_empty( list ) )

__list_splice( list , head ->prev , head ) ;

}

3. 遍历

/**

* list_entry - get the struct for this entry

* @ptr : the &struct list_head pointer

* @type : the type of the struct this is embedded in.

* @member : the name of the list_struct within the struct

#define list_entry( ptr , type , member ) \

container_of( ptr , type , member )

例：

// mine为初始化了得my_struct结构体

struct my_struct *p = list_entry( &mine . list , my_struct , list ) ;

#define container_of ( ptr , type , member ) ( { \

const typeof( ( ( type *)0)->member) *__mptr = (ptr) ; \

(type *) ( ( char *) __mptr - offsetof (type , member) ) ;

#define offsetof( TYPE , MEMBER) ( ( size_t) & ( (TYPE *) 0)->MEMBER)

这里使用的是一个利用编译器技术的小技巧，即先求得结构成员在与结构中的偏移量，然后根据成员变量的地址反过来得出属主结构变量的地址。

container_of()和offsetof()并不仅用于链表操作，这里最有趣的地方是((type *)0)->member，它将0地址强制"转换"为type结构的指针，再访问到type结构中的member成员。在container_of宏中，它用来给typeof()提供参数（typeof()是gcc的扩展，和sizeof()类似），以获得member成员的数据类型；在offsetof()中，这个member成员的地址实际上就是type数据结构中member成员相对于结构变量的偏移量。

/**

* list_for_each - iterate over a list

* @pos : the &struct list_head to use as a loop cursor.

* @head : the head for your list.

#define list_for_each( pos , head ) \

for( pos = (head)->next ; prefetch(pos->next ) , pos != head ; \

pos = pos->next )

它实际上是一个for循环，利用传入的pos作为循环变量，从表头head开始，逐项向后（next方向）移动pos，直至又回到head（prefetch()可以不考虑，用于预取以提高遍历速度）。

例：

//遍历my_struct链表，从mine开始

struct list_head *p ;

list_for_each( p , &mine . list ) {

struct my_struct *ptr = list_entry ( p , my_struct , list ) ;

printk(KERN_ALERT" the number of dog : %ld " , ptr->dog ) ;

}

大多数情况下，遍历链表的时候都需要获得链表节点数据项，也就是说list_for_each()和list_entry()总是同时使用。

/**

* list_for_each_entry - iterate over list of given type

* @pos : the type * to use as a loop cursor.

* @head : the head for your list.

* @member : the name of the list_struct within the struct.

#define list_for_each_entry ( pos , head , member )

for ( pos = list_entry ( (head)->next , typeof(*pos) , member ) ; \

&pos->member != (head) ; \

pos = list_entry (pos->member.next , typeof(*pos) , member ))

list_for_each_entry相当于list_for_each和list_entry的结合，可以更加方便的使用。

4. 安全性考虑

在并发执行的环境下，链表操作通常都应该考虑同步安全性问题，为了方便，Linux将这一操作留给应用自己处理。Linux链表自己考虑的安全性主要有两个方面：

a) list_empty()判断

基本的list_empty()仅以头指针的next是否指向自己来判断链表是否为空，Linux链表另行提供了一个list_empty_careful()宏，它同时判断头指针的next和prev，仅当两者都指向自己时才返回真。这主要是为了应付另一个cpu正在处理同一个链表而造成next、prev不一致的情况。但代码注释也承认，这一安全保障能力有限：除非其他cpu的链表操作只有list_del_init()，否则仍然不能保证安全，也就是说，还是需要加锁保护。

b) 遍历时节点删除

前面介绍了用于链表遍历的几个宏，它们都是通过移动pos指针来达到遍历的目的。但如果遍历的操作中包含删除pos指针所指向的节点，pos指针的移动就会被中断，因为list_del(pos)将把pos的next、prev置成LIST_POSITION2和LIST_POSITION1的特殊值。

当然，调用者完全可以自己缓存next指针使遍历操作能够连贯起来，但为了编程的一致性，Linux链表仍然提供了两个对应于基本遍历操作的"_safe"接口：list_for_each_safe(pos, n, head)、list_for_each_entry_safe(pos, n, head, member)，它们要求调用者另外提供一个与pos同类型的指针n，在for循环中暂存pos下一个节点的地址，避免因pos节点被释放而造成的断链。

扩展

1. hlist

list和hlist
图6 list和hlist

精益求精的Linux链表设计者（因为list.h没有署名，所以很可能就是Linus Torvalds）认为双头（next、prev）的双链表对于HASH表来说"过于浪费"，因而另行设计了一套用于HASH表应用的hlist数据结构--单指针表头双循环链表，从上图可以看出，hlist的表头仅有一个指向首节点的指针，而没有指向尾节点的指针，这样在可能是海量的HASH表中存储的表头就能减少一半的空间消耗。

因为表头和节点的数据结构不同，插入操作如果发生在表头和首节点之间，以往的方法就行不通了：表头的first指针必须修改指向新插入的节点，却不能使用类似list_add()这样统一的描述。为此，hlist节点的prev不再是指向前一个节点的指针，而是指向前一个节点（可能是表头）中的next（对于表头则是first）指针（struct list_head **pprev），从而在表头插入的操作可以通过一致的"*(node->pprev)"访问和修改前驱节点的next（或first）指针。

2. read-copy update

在Linux链表功能接口中还有一系列以"_rcu"结尾的宏，与以上介绍的很多函数一一对应。RCU（Read-Copy Update）是2.5/2.6内核中引入的新技术，它通过延迟写操作来提高同步性能。

我们知道，系统中数据读取操作远多于写操作，而rwlock机制在smp环境下随着处理机增多性能会迅速下降。针对这一应用背景，IBM Linux技术中心的Paul E. McKenney提出了"读拷贝更新"的技术，并将其应用于Linux内核中。RCU技术的核心是写操作分为写-更新两步，允许读操作在任何时候无阻访问，当系统有写操作时，更新动作一直延迟到对该数据的所有读操作完成为止。

参考：http://www.ibm.com/developerworks/cn/linux/kernel/l-chain/ 很详细的一个帖子