Linux内核网络部分数据包流程

来源:互联网 发布:数据解决方案 编辑:程序博客网 时间:2024/04/30 21:52

1.   Linux内核网络部分数据包流程

1.1.      数据结构

1.1.1.            ethhdr

struct ethhdr

{

       unsigned char h_dest[ETH_ALEN];     /* destination eth addr    */

       unsigned char h_source[ETH_ALEN];  /* source ether addr       */

       unsigned short       h_proto;         /* packet type ID field   */

};

1.1.2.            iphdr

struct iphdr {

#if defined(__LITTLE_ENDIAN_BITFIELD)

       __u8       ihl:4,

              version:4;

#elif defined (__BIG_ENDIAN_BITFIELD)

       __u8       version:4,

             ihl:4;

#else

#error     "Please fix <asm/byteorder.h>"

#endif

       __u8       tos;

       __u16     tot_len;

       __u16     id;

       __u16     frag_off;

       __u8       ttl;

       __u8       protocol;

       __u16     check;

       __u32     saddr;

       __u32     daddr;

       /*The options start here. */

};

1.1.3.            tcphdr

struct tcphdr {

       __u16     source;

       __u16     dest;

       __u32     seq;

       __u32     ack_seq;

#if defined(__LITTLE_ENDIAN_BITFIELD)

       __u16     res1:4,

              doff:4,

              fin:1,

              syn:1,

              rst:1,

              psh:1,

              ack:1,

              urg:1,

              ece:1,

              cwr:1;

#elif defined(__BIG_ENDIAN_BITFIELD)

       __u16     doff:4,

              res1:4,

              cwr:1,

              ece:1,

              urg:1,

              ack:1,

              psh:1,

              rst:1,

              syn:1,

              fin:1;

#else

#error     "Adjust your <asm/byteorder.h> defines"

#endif    

       __u16     window;

       __u16     check;

       __u16     urg_ptr;

};

1.1.4.            sk_buff

struct sk_buff {

       /* These two members must be first. */

       struct sk_buff * next;                  /* Next buffer in list                           */

       struct sk_buff * prev;                  /* Previous buffer in list                     */

 

       struct sk_buff_head * list;            /* List we are on                         */

       struct sock     *sk;               /* Socket we are owned by                */

       struct timeval  stamp;                   /* Time we arrived                      */

       struct net_device  *dev;             /* Device we arrived on/are leaving by         */

#define HAVE_SKBUFF_PHYSINOUTDEV

       struct net_device  *physindev;   /* Physical device we arrived on          */

       struct net_device  *physoutdev; /* Physical device we will leave by              */

 

       /* Transport layer header */

       union

       {

              struct tcphdr *th;

              struct udphdr       *uh;

              struct icmphdr      *icmph;

              struct igmphdr     *igmph;

              struct iphdr   *ipiph;

              struct spxhdr *spxh;

              unsigned char       *raw;

       } h;

 

       /* Network layer header */

       union

       {

              struct iphdr   *iph;

              struct ipv6hdr       *ipv6h;

              struct arphdr *arph;

              struct ipxhdr *ipxh;

              unsigned char       *raw;

       } nh;

 

       /* Link layer header */

       union

       {     

             struct ethhdr *ethernet;

             unsigned char      *raw;

       } mac;

 

       struct  dst_entry *dst;

 

       /*

        * This is the control buffer. It is free to use for every

        * layer. Please put your private variables there. If you

        * want to keep them across layers you have to do a skb_clone()

        * first. This is owned by whoever has the skb queued ATM.

        */

       char              cb[48];   

 

       unsigned int   len;                /* Length of actual data                */

      unsigned int        data_len;

       unsigned int    csum;                   /* Checksum                              */

       unsigned char        __unused,              /* Dead field, may be reused                */

                     cloned,          /* head may be cloned (check refcnt to be sure). */

                    pkt_type,              /* Packet class                                   */

                    ip_summed;           /* Driver fed us an IP checksum                 */

       __u32            priority;          /* Packet queueing priority                  */

       atomic_t  users;                    /* User count - see datagram.c,tcp.c          */

       unsigned short       protocol;        /* Packet protocol from driver.           */

       unsigned short       security;         /* Security level of packet                   */

       unsigned int    truesize;         /* Buffer size                                    */

 

       unsigned char *head;                   /* Head of buffer                        */

       unsigned char       *data;                   /* Data head pointer                            */

       unsigned char *tail;                     /* Tail pointer                              */

       unsigned char        *end;                    /* End pointer                             */

 

       void              (*destructor)(struct sk_buff *); /* Destruct function             */

#ifdef CONFIG_NETFILTER

       /* Can be used for communication between hooks. */

        unsigned long  nfmark;

       /* Cache info */

       __u32            nfcache;

       /* Associated connection, if any */

       struct nf_ct_info *nfct;

#ifdef CONFIG_NETFILTER_DEBUG

        unsigned int nf_debug;

#endif

#endif /*CONFIG_NETFILTER*/

 

#if defined(CONFIG_HIPPI)

       union{

              __u32     ifield;

       } private;

#endif

 

#ifdef CONFIG_NET_SCHED

       __u32           tc_index;               /* traffic control index */

#endif

};

 

 

 

1.2.      sk_buff的成员相关的代码片断

1.2.1.            关于sk_buff->data

网卡驱动中创建skb结构的代码片断:

if (new_skb == NULL) {

 

            new_skb = (struct sk_buff *) dev_alloc_skb(skb_size);

     }

………

rx_struct->skb = new_skb;

rx_struct->dma_addr = pci_map_single(bdp->pdev, new_skb->data,

                                      sizeof (rfd_t),

                                             PCI_DMA_FROMDEVICE);

 

 

网卡驱动中接收数据包的代码片断:

rx_struct = list_entry(bdp->active_rx_list.next,

                                 struct rx_list_elem, list_elem);

skb = rx_struct->skb;

 

rfd = RFD_POINTER(skb, bdp);  /* locate RFD within skb */

 

pci_dma_sync_single(bdp->pdev, rx_struct->dma_addr,

                              bdp->rfd_size, PCI_DMA_FROMDEVICE);

………

/* set the protocol */

skb->protocol = eth_type_trans(skb, dev);

 

/* set the checksum info */

if (bdp->flags & DF_CSUM_OFFLOAD) {

if (bdp->rev_id >= D102_REV_ID) {

                          skb->ip_summed = e100_D102_check_checksum(rfd);

                   } else {

                          skb->ip_summed = e100_D101M_checksum(bdp, skb);

                   }

            } else {

                   skb->ip_summed = CHECKSUM_NONE;

     }

 

1.2.2.            关于sk_buff->dev

e100_alloc_skbs函数中不仅调用e100_alloc_skb,同时也调用e100_add_skb_to_end函数。在e100_add_skb_to_end函数中有以下代码:

(rx_struct->skb)->dev = bdp->device;

这决定了该数据包是从那个网卡接收上来的数据。

 

1.2.3.            关于sk_buff->mac

上面说道网卡驱动接收到数据包,其中一个函数eth_type_trans是关键,该函数代码如下:

unsigned short eth_type_trans(struct sk_buff *skb, struct net_device *dev)

{

     struct ethhdr *eth;

     unsigned char *rawp;

    

     skb->mac.raw=skb->data;

     skb_pull(skb,dev->hard_header_len);

     eth= skb->mac.ethernet;

    

     if(*eth->h_dest&1)

     {

            if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0)

                   skb->pkt_type=PACKET_BROADCAST;

            else

                   skb->pkt_type=PACKET_MULTICAST;

     }

    

     /*

      *    This ALLMULTI check should be redundant by 1.4

      *    so don't forget to remove it.

      *

      *    Seems, you forgot to remove it. All silly devices

      *    seems to set IFF_PROMISC.

      */

      

     else if(1 /*dev->flags&IFF_PROMISC*/)

     {

            if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN))

                   skb->pkt_type=PACKET_OTHERHOST;

     }

    

     if (ntohs(eth->h_proto) >= 1536)

            return eth->h_proto;

           

     rawp = skb->data;

    

     /*

      *    This is a magic hack to spot IPX packets. Older Novell breaks

      *    the protocol design and runs IPX over 802.3 without an 802.2 LLC

      *    layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This

      *    won't work for fault tolerant netware but does for the rest.

      */

     if (*(unsigned short *)rawp == 0xFFFF)

            return htons(ETH_P_802_3);

           

     /*

      *    Real 802.2 LLC

      */

     return htons(ETH_P_802_2);

}

 

1.2.4.            关于sk_buff->pkt_type

sk_buff->pkt_type的值由上面可以看出是在eth_type_trans函数中赋值的,可以这个是干什么用的呢?

在内核ip_rcv函数中由这样的代码:

       unsigned char * pos = (unsigned char*)(skb->nh.iph);

      

       if (skb->pkt_type == PACKET_OTHERHOST)

              goto drop;      //丢弃了

 

       IP_INC_STATS_BH(IpInReceives);

 

1.2.5.            关于sk_buff->nhsk_buff->h

net_rx_action函数中有以下代码:

………

skb->h.raw = skb->nh.raw = skb->data;

………

 

ip_local_deliver_finish函数中有以下代码:

………

int ihl = skb->nh.iph->ihl*4;

………

if (!pskb_may_pull(skb, ihl))

       goto out;

__skb_pull(skb, ihl);

………

skb->h.raw = skb->data;

………

阅读下面两个函数的代码是有帮助的。

static inline char *__pskb_pull(struct sk_buff *skb, unsigned int len)

{

       if (len > skb_headlen(skb) &&

           __pskb_pull_tail(skb, len-skb_headlen(skb)) == NULL)

              return NULL;

       skb->len -= len;

       return     skb->data += len;

}

 

static inline unsigned char * pskb_pull(struct sk_buff *skb, unsigned int len)

{    

       if (len > skb->len)

              return NULL;

       return __pskb_pull(skb,len);

}

1.2.6.            关于sk_buff->nfct

这个成员用于连接跟踪。

resolve_normal_ct函数中有以下代码:

………

skb->nfct = &h->ctrack->infos[*ctinfo];

………

 

ip_conntrack_attach函数中有以下代码:

……….

skb->nfct = &ct->infos[ctinfo];

……….

 

 

 

1.3.      IP数据包流程

1.3.1.            网卡驱动到内核的流程

 

如图所示,当网卡接收到中断后,调用中断处理函数,在中断处理函数中分配生成skb结构,并获得数据,然后调用netif_rx函数。在netif_rx函数中主要是把skb放入softnet_data中并产品NET_RX_SOFTIRQ软中断。

 

1.3.2.            内核中IP层的流程

net_rx_actionNET_RX_SOFTIRQ的中断函数。


 

 


 转发数据包的流程

 

1.3.3.            本地数据包流程