Linux内核中通知块操作

来源:互联网 发布:仿阿里巴巴网站源码 编辑:程序博客网 时间:2024/05/15 12:13
Linux内核中通知块操作本文档的Copyleft归yfydz所有,使用GPL发布,可以自由拷贝,转载,转载时请保持文档的完整性,严禁用于任何商业用途。msn:yfydz_no1@hotmail.com来源:http://yfydz.cublog.cn1. 前言notify是Linux内核中一种常用的事件回调处理机制,提供了基于优先级的回调链表处理功能。以下内核代码版本为2.6.1Array.2。2. 数据结构/* include/linux/notifier.h */// 基本的通知块结构struct notifier_block {// 回调函数int (*notifier_call)(struct notifier_block *, unsigned long, void *);// 链表中的下一个结构, 这个一个单向链表struct notifier_block *next;// 该块的优先级, 在链表中各个块是按此优先级值进行排序的, 值大的在链表前, 表明// 相应回调函数执行的顺序int priority;};出来基本的通知块结构, 还定义了一些扩展的通知块结构:// 原子通知头结构, 增加了一个锁来保证操作的原子性struct atomic_notifier_head {spinlock_t lock;struct notifier_block *head;};// 阻塞通知头结构, 增加了一个读写信号灯struct blocking_notifier_head {struct rw_semaphore rwsem;struct notifier_block *head;};// 原始通知头结构, 就是一个通知块指针struct raw_notifier_head {struct notifier_block *head;};// srcu: Sleepable Read-Copy Update mechanism// srcu通知头结构, 增加了锁和srcu结构struct srcu_notifier_head {struct mutex mutex;struct srcu_struct srcu;struct notifier_block *head;};以下是一些宏来初始化各种类型的通知头结构, 一般在程序中使用:#define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \  spin_lock_init(&(name)->lock); \  (name)->head = NULL;  \} while (0)#define BLOCKING_INIT_NOTIFIER_HEAD(name) do { \  init_rwsem(&(name)->rwsem); \  (name)->head = NULL;  \} while (0)#define RAW_INIT_NOTIFIER_HEAD(name) do { \  (name)->head = NULL;  \} while (0)以下这些宏也是用来初始化各种类型的通知头结构, 但是在参数定义时使用:#define ATOMIC_NOTIFIER_INIT(name) {    \  .lock = __SPIN_LOCK_UNLOCKED(name.lock), \  .head = NULL }#define BLOCKING_NOTIFIER_INIT(name) {    \  .rwsem = __RWSEM_INITIALIZER((name).rwsem), \  .head = NULL }#define RAW_NOTIFIER_INIT(name) {    \  .head = NULL }注意, 没有定义scru通知头结构的初始化, 因为scru是不能静态初始化的.以下这些宏用来直接定义通知头结构:#define ATOMIC_NOTIFIER_HEAD(name)    \struct atomic_notifier_head name =   \  ATOMIC_NOTIFIER_INIT(name)#define BLOCKING_NOTIFIER_HEAD(name)    \struct blocking_notifier_head name =   \  BLOCKING_NOTIFIER_INIT(name)#define RAW_NOTIFIER_HEAD(name)     \struct raw_notifier_head name =    \  RAW_NOTIFIER_INIT(name)3. 基本通知块操作函数关于通知块的基本操作函数都在kernel/sys.c中定义3.1 登记该函数将一个通知块结构挂接到指定的通知链表/** Notifier chain core routines.  The exported routines below* are layered on top of these, with appropriate locking added.*/// nl是链表头块的地址, n是要添加到该链表的通知块static int notifier_chain_register(struct notifier_block **nl,  struct notifier_block *n){// 使用的是dummy header算法, 即使刚开始时链表为空也不用显示判断区分while ((*nl) != NULL) {// 判断优先权值, 优先权值越大位置越靠前  if (n->priority > (*nl)->priority)   break;  nl = &((*nl)->next);}// 将节点n链接到链表nl中的合适位置n->next = *nl;// 使用rcu处理函数保证SMP下的安全性, 相当于加上锁再赋值rcu_assign_pointer(*nl, n);return 0;}3.2 撤销该函数将一个通知块结构从通知链表中拆除:// nl是链表头块的地址, n是要删除的通知块static int notifier_chain_unregister(struct notifier_block **nl,  struct notifier_block *n){while ((*nl) != NULL) {// 地址匹配, 进行拆除操作  if ((*nl) == n) {// *nl=n->next的安全赋值操作,相当于将节点从链表断开   rcu_assign_pointer(*nl, n->next);   return 0;  }  nl = &((*nl)->next);}return -ENOENT;}3.3 回调函数处理该函数执行链表中各节点的回调函数:// nl通常是通知块链表头的地址, val和v是传给回调函数的参数static int __kprobes notifier_call_chain(struct notifier_block **nl,  unsigned long val, void *v){int ret = NOTIFY_DONE;struct notifier_block *nb, *next_nb;// 安全地获取通知块指针nb = rcu_dereference(*nl);// 链表循环while (nb) {// 找下一个块  next_nb = rcu_dereference(nb->next);// 调用回调函数  ret = nb->notifier_call(nb, val, v);// 如果返回停止标志, 不执行后续结构  if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK)   break;// 循环到下一节点  nb = next_nb;}return ret;}4. 扩展的通知块操作扩展的通知块操作功能和基本通知块类似, 但使用了扩展的结构中的参数保证操作的安全4.1 原子通知块4.1.1 登记/** Atomic notifier chain routines.  Registration and unregistration* use a spinlock, and call_chain is synchronized by RCU (no locks).*//*** atomic_notifier_chain_register - Add notifier to an atomic notifier chain* @nh: Pointer to head of the atomic notifier chain* @n: New entry in notifier chain** Adds a notifier to an atomic notifier chain.** Currently always returns zero.*/// 只是在基本通知登记操作前后加锁解锁进行保护int atomic_notifier_chain_register(struct atomic_notifier_head *nh,  struct notifier_block *n){unsigned long flags;int ret;// 加锁spin_lock_irqsave(&nh->lock, flags);ret = notifier_chain_register(&nh->head, n);// 解锁spin_unlock_irqrestore(&nh->lock, flags);return ret;}EXPORT_SYMBOL_GPL(atomic_notifier_chain_register);4.1.2 撤销/*** atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain* @nh: Pointer to head of the atomic notifier chain* @n: Entry to remove from notifier chain** Removes a notifier from an atomic notifier chain.** Returns zero on success or %-ENOENT on failure.*/// 只是在基本通知块撤销操作前后加锁解锁进行保护int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,  struct notifier_block *n){unsigned long flags;int ret;// 加锁spin_lock_irqsave(&nh->lock, flags);ret = notifier_chain_unregister(&nh->head, n);// 解锁spin_unlock_irqrestore(&nh->lock, flags);// 同步rcu, 等待一个grace periodsynchronize_rcu();return ret;}EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);4.1.3 原子回调这个函数是在原子操作上下文中调用, 是不能阻塞的int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh,  unsigned long val, void *v){int ret;// 禁止了抢占rcu_read_lock();// 使用基本通知块回调ret = notifier_call_chain(&nh->head, val, v);// 允许抢占rcu_read_unlock();return ret;}EXPORT_SYMBOL_GPL(atomic_notifier_call_chain);4.2 可阻塞通知块4.2.1 登记int blocking_notifier_chain_register(struct blocking_notifier_head *nh,  struct notifier_block *n){int ret;/*  * This code gets used during boot-up, when task switching is  * not yet working and interrupts must remain disabled.  At  * such times we must not call down_write().  */// 这是内核启动时就进行调用了, 虽然可能性很小, 直接执行基本登记函数// 不用处理信号灯, 因为此时是不能阻塞if (unlikely(system_state == SYSTEM_BOOTING))  return notifier_chain_register(&nh->head, n);// 使用信号灯进行同步, 可能阻塞down_write(&nh->rwsem);// 基本登记函数ret = notifier_chain_register(&nh->head, n);up_write(&nh->rwsem);return ret;}EXPORT_SYMBOL_GPL(blocking_notifier_chain_register);4.2.2 撤销该函数是在进程处理过程中调用,可阻塞:int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh,  struct notifier_block *n){int ret;/*  * This code gets used during boot-up, when task switching is  * not yet working and interrupts must remain disabled.  At  * such times we must not call down_write().  */// 这是内核启动时就进行调用了, 虽然可能性很小, 直接执行基本撤销函数// 不用处理信号灯, 因为此时是不能阻塞if (unlikely(system_state == SYSTEM_BOOTING))  return notifier_chain_unregister(&nh->head, n);// 使用信号灯进行同步, 可能阻塞down_write(&nh->rwsem);// 基本撤销函数ret = notifier_chain_unregister(&nh->head, n);up_write(&nh->rwsem);return ret;}EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister);4.2.3 回调在进行上下文中调用, 可以阻塞:int blocking_notifier_call_chain(struct blocking_notifier_head *nh,  unsigned long val, void *v){int ret;// 信号灯同步down_read(&nh->rwsem);// 进行基本回调处理ret = notifier_call_chain(&nh->head, val, v);up_read(&nh->rwsem);return ret;}EXPORT_SYMBOL_GPL(blocking_notifier_call_chain);4.3 原始通知块操作和基本原始块操作完全相同:int raw_notifier_chain_register(struct raw_notifier_head *nh,  struct notifier_block *n){return notifier_chain_register(&nh->head, n);}EXPORT_SYMBOL_GPL(raw_notifier_chain_register);int raw_notifier_chain_unregister(struct raw_notifier_head *nh,  struct notifier_block *n){return notifier_chain_unregister(&nh->head, n);}EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister);int raw_notifier_call_chain(struct raw_notifier_head *nh,  unsigned long val, void *v){return notifier_call_chain(&nh->head, val, v);}EXPORT_SYMBOL_GPL(raw_notifier_call_chain);4.4 SRCU通知块操作4.4.1 登记必须在进程的上下文中调用, 和blocking通知类似int srcu_notifier_chain_register(struct srcu_notifier_head *nh,  struct notifier_block *n){int ret;/*  * This code gets used during boot-up, when task switching is  * not yet working and interrupts must remain disabled.  At  * such times we must not call mutex_lock().  */if (unlikely(system_state == SYSTEM_BOOTING))  return notifier_chain_register(&nh->head, n);mutex_lock(&nh->mutex);ret = notifier_chain_register(&nh->head, n);mutex_unlock(&nh->mutex);return ret;}EXPORT_SYMBOL_GPL(srcu_notifier_chain_register);4.4.2 撤销必须在进程的上下文中调用, 和blocking通知类似int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh,  struct notifier_block *n){int ret;/*  * This code gets used during boot-up, when task switching is  * not yet working and interrupts must remain disabled.  At  * such times we must not call mutex_lock().  */if (unlikely(system_state == SYSTEM_BOOTING))  return notifier_chain_unregister(&nh->head, n);mutex_lock(&nh->mutex);ret = notifier_chain_unregister(&nh->head, n);mutex_unlock(&nh->mutex);synchronize_srcu(&nh->srcu);return ret;}EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister);4.4.3 回调在进程的上下文中调用, 可以阻塞:int srcu_notifier_call_chain(struct srcu_notifier_head *nh,  unsigned long val, void *v){int ret;int idx;// 使用srcu读锁来加锁idx = srcu_read_lock(&nh->srcu);ret = notifier_call_chain(&nh->head, val, v);srcu_read_unlock(&nh->srcu, idx);return ret;}EXPORT_SYMBOL_GPL(srcu_notifier_call_chain);4.4.4 初始化因为SRCU通知不能通过宏来初始化, 必须要专门定义一个初始化函数来初始化srcu的通知块参数:void srcu_init_notifier_head(struct srcu_notifier_head *nh){// 初始化锁mutex_init(&nh->mutex);// 初始化scru结构if (init_srcu_struct(&nh->srcu) head = NULL;}EXPORT_SYMBOL_GPL(srcu_init_notifier_head);5. 应用下面以连接跟踪中的事件处理来说明, 就是通过通知块来实现的:初始化定义一个静态的原子通知头参数:/* net/ipv4/netfilter/ip_conntrack_core.c */ATOMIC_NOTIFIER_HEAD(ip_conntrack_chain);连接跟踪的事件处理函数, 实际就是通知回调函数:/* include/linux/netfilter_ipv4/ip_conntrack.h */// 连接事件处理static inline void ip_conntrack_event(enum ip_conntrack_events event,          struct ip_conntrack *ct){// 判断连接是否合法if (is_confirmed(ct) && !is_dying(ct))// 调用原子通知回调函数, 执行登记的回调函数  atomic_notifier_call_chain(&ip_conntrack_chain, event, ct);}连接跟踪相关事件的登记和撤销:/* include/linux/netfilter_ipv4/ip_conntrack.h */// 就是标准的原子通知登记和撤销函数static inline int ip_conntrack_register_notifier(struct notifier_block *nb){return atomic_notifier_chain_register(&ip_conntrack_chain, nb);}static inline int ip_conntrack_unregister_notifier(struct notifier_block *nb){return atomic_notifier_chain_unregister(&ip_conntrack_chain, nb);}在net/ipv4/netfilter/ip_conntrack_netlink.c中定义了netlink通知回调函数:static struct notifier_block ctnl_notifier = {.notifier_call = ctnetlink_conntrack_event,};......ret = ip_conntrack_register_notifier(&ctnl_notifier);......这样, 在任何地方ip_conntrack_event()函数时就会调用到该netlink通知回调函数.6. 结论notify通知块处理是内核中的一种回调处理机制, 一般不是直接调用原始的通知处理函数, 而是根据要完成的功能, 如事件回调, 重启回调等重新定义新的处理函数, 然后在必要的地方调用相应的回调包装函数就可以实现回调。

0 0