linux路由内核实现分析(一)----邻居子节点

来源:互联网 发布:js flash播放器代码 编辑:程序博客网 时间:2024/04/28 23:50

有三种路由结构:

 

1,neigh_table{}结构和neighbour{}结构

 存储和本机物理上相邻的主机地址信息表,通常称为邻居子节点,指的是和本机相邻只有

 一跳的机器,其中neigh_table{}作为数据结构链表来表示neighbour{}表示相邻的机器节点

 

2,路由规则的存储,判断了一个到达一个网络地址必须经过怎样的路由,使用fib_table来表示

 

3,提供了路由地址的缓存机制,使用rtable链表来表示.

neigh_table结构

 

struct neigh_table

{

       struct neigh_table   *next;

       int                  family;

       int                  entry_size;

       int                  key_len;

       __u32                   (*hash)(const void *pkey, const struct net_device *);

       int                  (*constructor)(struct neighbour *);

       int                  (*pconstructor)(struct pneigh_entry *);

       void               (*pdestructor)(struct pneigh_entry *);

       void               (*proxy_redo)(struct sk_buff *skb);

       char               *id;

       struct neigh_parms parms;

      

       int                  gc_interval;

       int                  gc_thresh1;

       int                  gc_thresh2;

       int                  gc_thresh3;

       unsigned long         last_flush;

       struct timer_list     gc_timer;

       struct timer_list     proxy_timer;

       struct sk_buff_head       proxy_queue;

       atomic_t         entries;

       rwlock_t        lock;

       unsigned long         last_rand;

       struct kmem_cache              *kmem_cachep;

       struct neigh_statistics    *stats;

       struct neighbour     **hash_buckets;

       unsigned int           hash_mask;

       __u32                   hash_rnd;

       unsigned int           hash_chain_gc;

       struct pneigh_entry **phash_buckets;

#ifdef CONFIG_PROC_FS

       struct proc_dir_entry     *pde;

#endif

};

 

struct proc_dir_entry     *pde

这个成员是linux 2.6中添加了对proc文件系统的支持,但是我没有找到proc文件系统中

有直接相关于邻居节点的信息,估计是和其他结构一起向用户态提供了路由信息,有可能

是输出到/proc/net/route里面.

 

struct neigh_table *next; //下一个邻居表,实际上就是ARP报文到达的下一台机器

 

int family;//地址族,对于以太网而言就是 AF_INET

 

int entry_size; //入口长度,也就是一个邻居结构的大小,初始化为sizeof(neighbour)+4(4为一个IP地址的长度)

 

//哈希关键值长度 IP地址的长度,为4

int  key_len;

 

__u32 (*hash)(const void *pkey, const struct net_device *);构造出存放和检索这个neigh_tableneighbour的哈希函数  

 

//允许邻居的上限,根据网络的类型,大小会有所变化,例如C类地址,邻居限制就应该小于255

int gc_thresh3

 

//哈希数组,存入其中的邻居,在一个neigh_table里面,最多可以有32neighbour结构的链表.

struct neighbour  **hash_buckets;

int entries //整个neigh_table中邻居的数量

unsigned int hash_mask; //哈希数组大小的掩码

 

neighbour结构

 

struct neighbour

{

       struct neighbour     *next;

       struct neigh_table   *tbl;

       struct neigh_parms *parms;

       struct net_device    *dev;

       unsigned long         used;

       unsigned long         confirmed;

       unsigned long         updated;

       __u8                     flags;

       __u8                     nud_state;

       __u8                     type;

       __u8                     dead;

       atomic_t         probes;

       rwlock_t        lock;

       unsigned char        ha[(MAX_ADDR_LEN+sizeof(unsigned long)-1)&~(sizeof(unsigned long)-1)];

       struct hh_cache            *hh;

       atomic_t         refcnt;

       int                  (*output)(struct sk_buff *skb);

       struct sk_buff_head       arp_queue;

       struct timer_list      timer;

       struct neigh_ops     *ops;

       u8                  primary_key[0];

};

 

struct neigh_table *tbl;//所在的邻居表,指向上层的neigh_table结构

struct net_device *dev;//邻居所对应的网络设备接口指针

 

int (*output)(struct sk_buff *skb);//找到合适的邻居节点之后,系统将调用这个函数指针,

                             使用结构中的dev设备,将数据包发送出去,如果协议

               族是AF_INET,将调用dev_queue_xmit函数发送数据

 

u8 primary_key[0];//哈希关键字

 

//这段代码完成函数指针的转换(net/ipv4/arp.c

static struct neigh_ops arp_hh_ops = {

       .family =        AF_INET,

       .solicit =         arp_solicit,

       .error_report =              arp_error_report,

       .output =        neigh_resolve_output,

       .connected_output =      neigh_resolve_output,

       .hh_output =          dev_queue_xmit,

       .queue_xmit =        dev_queue_xmit,

};

邻居节点相关的操作:

 

查找到路由后,会调用arp_bind_neighbour绑定一个邻居项

 

int arp_bind_neighbour(struct dst_entry *dst)

{

       struct net_device *dev = dst->dev;

       struct neighbour *n = dst->neighbour;

 

       if (dev == NULL)

              return -EINVAL;

      

       //如果这个邻居不存在,则执行__neigh_lookup_errno

 

       if (n == NULL) {

              __be32 nexthop = ((struct rtable*)dst)->rt_gateway;

              if (dev->flags&(IFF_LOOPBACK|IFF_POINTOPOINT))

                     nexthop = 0;

              n = __neigh_lookup_errno(

 

      //ATM网络和以太网络调用了不同的neigh_table,作为以太网络将调用&arp_tbl作为neigh_table的入口

         #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)

                  dev->type == ARPHRD_ATM ? clip_tbl_hook :

          #endif

                  &arp_tbl, &nexthop, dev);

              if (IS_ERR(n))

                     return PTR_ERR(n);

              dst->neighbour = n;

       }

       return 0;

}

__neigh_lookup_errno函数

 

static inline struct neighbour *

__neigh_lookup_errno(struct neigh_table *tbl, const void *pkey,

  struct net_device *dev)

{

       //在邻居表中查找邻居项,如果不存在,则新建一项

       struct neighbour *n = neigh_lookup(tbl, pkey, dev);

 

       if (n)

              return n;

      

       //新建邻居项

       return neigh_create(tbl, pkey, dev);

}

 

neigh_lookup函数

 

struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,

                            struct net_device *dev)

{

       struct neighbour *n;

       int key_len = tbl->key_len;

       u32 hash_val = tbl->hash(pkey, dev);

 

       NEIGH_CACHE_STAT_INC(tbl, lookups);

 

       read_lock_bh(&tbl->lock);

       //以下代码可以看出,通过指定的neigh_table入口,找到hash_buckets,

       //因为所有的neighbour链表是经过哈希的,所以再通过传入的哈希值作为

  //下标最后找到链表头,然后在往下遍历,直到找到相对应的neighbour结构

       //为止

 

       for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {

              if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {

                     neigh_hold(n);

                     NEIGH_CACHE_STAT_INC(tbl, hits);

                     break;

              }

       }

       read_unlock_bh(&tbl->lock);

       return n;

}

 

 

如果到邻居表中寻找对应的邻居项,如果不存在,则新建一项。继续跟进
调用neigh_create函数

 

struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,

                            struct net_device *dev)

{

       u32 hash_val;

       int key_len = tbl->key_len;

       int error;

       struct neighbour *n1, *rc, *n = neigh_alloc(tbl);

 

       if (!n) {

              rc = ERR_PTR(-ENOBUFS);

              goto out;

       }

       //每个neighbour的哈希就是在这里计算的,实际上我们可以看出,

  //所谓的哈希值就是目的IP

       memcpy(n->primary_key, pkey, key_len);

       n->dev = dev;

       dev_hold(dev);

 

      

       if (tbl->constructor && (error = tbl->constructor(n)) < 0) {

              rc = ERR_PTR(error);

              goto out_neigh_release;

       }

 

      

       if (n->parms->neigh_setup &&

           (error = n->parms->neigh_setup(n)) < 0) {

              rc = ERR_PTR(error);

              goto out_neigh_release;

       }

 

       n->confirmed = jiffies - (n->parms->base_reachable_time << 1);

 

       write_lock_bh(&tbl->lock);

      

       if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1))

              neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1);

 

       hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;

 

       if (n->parms->dead) {

              rc = ERR_PTR(-EINVAL);

              goto out_tbl_unlock;

       }

       //查找所添加的邻居是否已经存在

       for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {

              if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {

                     neigh_hold(n1);

                     rc = n1;

                     goto out_tbl_unlock;

              }

       }

 

       n->next = tbl->hash_buckets[hash_val];

       tbl->hash_buckets[hash_val] = n;

       n->dead = 0;

       neigh_hold(n);

       write_unlock_bh(&tbl->lock);

       NEIGH_PRINTK2("neigh %p is created./n", n);

       rc = n;

out:

       return rc;

out_tbl_unlock:

       write_unlock_bh(&tbl->lock);

out_neigh_release:

       neigh_release(n);

       goto out;

}

原创粉丝点击