【Linux4.1.12源码分析】协议栈gro收包之TCP处理

来源:互联网 发布:阿里云香港机房 编辑:程序博客网 时间:2024/06/14 00:53

TCP gro实现定义在tcpv4_offload对象

static const struct net_offload tcpv4_offload = {.callbacks = {.gso_segment=tcp4_gso_segment,.gro_receive=tcp4_gro_receive,.gro_complete=tcp4_gro_complete,},};
tcp4_gro_receive函数

static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb){/* Don't bother verifying checksum if we're going to flush anyway. */if (!NAPI_GRO_CB(skb)->flush &&    skb_gro_checksum_validate(skb, IPPROTO_TCP,      inet_gro_compute_pseudo)) {//如果flush为0,需要检测csumNAPI_GRO_CB(skb)->flush = 1;//如果检测失败则flush置1,报文将被提交到协议栈return NULL;}return tcp_gro_receive(head, skb);//TCP gro receive处理,与IP协议无关}
tcp_gro_receive函数

struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb){struct sk_buff **pp = NULL;struct sk_buff *p;struct tcphdr *th;struct tcphdr *th2;unsigned int len;unsigned int thlen;__be32 flags;unsigned int mss = 1;unsigned int hlen;unsigned int off;int flush = 1;int i;off = skb_gro_offset(skb);hlen = off + sizeof(*th);th = skb_gro_header_fast(skb, off);//得到TCP头if (skb_gro_header_hard(skb, hlen)) {th = skb_gro_header_slow(skb, hlen, off);if (unlikely(!th))goto out;}thlen = th->doff * 4;//得到TCP头的长度if (thlen < sizeof(*th))goto out;hlen = off + thlen;if (skb_gro_header_hard(skb, hlen)) {//检测报文th = skb_gro_header_slow(skb, hlen, off);if (unlikely(!th))goto out;}skb_gro_pull(skb, thlen);//报文移动到payload数据区len = skb_gro_len(skb);//得到报文的数据区长度flags = tcp_flag_word(th);for (; (p = *head); head = &p->next) {//遍历gro_list中的报文if (!NAPI_GRO_CB(p)->same_flow)continue;th2 = tcp_hdr(p);//得到报文tcp头if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {//源和目的端口不一致的不是同一个流NAPI_GRO_CB(p)->same_flow = 0;continue;}goto found;//找到同一个流的报文,则跳出循环,即p指向同一个流的skb}goto out_check_final;found:/* Include the IP ID check below from the inner most IP hdr */flush = NAPI_GRO_CB(p)->flush | NAPI_GRO_CB(p)->flush_id;//得到flush值,经过MAC/IP层设置flush |= (__force int)(flags & TCP_FLAG_CWR);//如果当前报文携带CWR标记,则flush置1flush |= (__force int)((flags ^ tcp_flag_word(th2)) &//如果当前报文和同流报文在(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)标记之外的标记不相同,则置flush为1  ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));flush |= (__force int)(th->ack_seq ^ th2->ack_seq);//如果当前报文和同流报文的ack_seq不同,则置flush为1for (i = sizeof(*th); i < thlen; i += 4)//如果当前报文和同流报文的TCP头option信息不同,则置flush为1flush |= *(u32 *)((u8 *)th + i) ^ *(u32 *)((u8 *)th2 + i);mss = tcp_skb_mss(p);//得到mss值flush |= (len - 1) >= mss;//如果当前报文数据区长度超过mss,则置flush为1flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);//如果当前报文和同流报文不连续,则置flush为1if (flush || skb_gro_receive(head, skb)) {//如果flush为0,则把当前报文合并到同流报文mss = 1;goto out_check_final;}p = *head;//同流报文th2 = tcp_hdr(p);tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);//如果当前报文包含(TCP_FLAG_FIN | TCP_FLAG_PSH)标记,则同流报文也添加该标记out_check_final:flush = len < mss;//报文长度小于mss,一般是一个流的最后报文,需要尽快提交报文flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH |//如果报文携带这5个标记,则flush为1TCP_FLAG_RST | TCP_FLAG_SYN |TCP_FLAG_FIN));if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))//p不为空,即找到同流报文,两种场景,1)同流报文超过65536;2)flush为1pp = head;out:NAPI_GRO_CB(skb)->flush |= (flush != 0);//设置当前报文的flush,决定是否提交当前报文到协议栈return pp;}
skb_gro_receive函数

int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb){struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb);unsigned int offset = skb_gro_offset(skb);unsigned int headlen = skb_headlen(skb);unsigned int len = skb_gro_len(skb);struct sk_buff *lp, *p = *head;//p指向gro_list中与当前报文同流的skbunsigned int delta_truesize;if (unlikely(p->len + len >= 65536))//超过最大报文数,返回错误将提交同流报文return -E2BIG;lp = NAPI_GRO_CB(p)->last;//初始时,last指向p自身pinfo = skb_shinfo(lp);if (headlen <= offset) {//如果线性区长度小于offset,即frag中还有报头数据skb_frag_t *frag;skb_frag_t *frag2;int i = skbinfo->nr_frags;int nr_frags = pinfo->nr_frags + i;//合并后的frag数if (nr_frags > MAX_SKB_FRAGS)//如果合并后的frag超过最大frag数,则需要mergegoto merge;offset -= headlen;pinfo->nr_frags = nr_frags;skbinfo->nr_frags = 0;frag = pinfo->frags + nr_frags;frag2 = skbinfo->frags + i;do {*--frag = *--frag2;} while (--i);frag->page_offset += offset;//修正第一个frag,需要减掉报头数据skb_frag_size_sub(frag, offset);/* all fragments truesize : remove (head size + sk_buff) */delta_truesize = skb->truesize - SKB_TRUESIZE(skb_end_offset(skb));skb->truesize -= skb->data_len;skb->len -= skb->data_len;skb->data_len = 0;NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE;//当前报文被合并,待释放goto done;} else if (skb->head_frag) {//ixgbe驱动创建的skb,该标记为trueint nr_frags = pinfo->nr_frags;skb_frag_t *frag = pinfo->frags + nr_frags;struct page *page = virt_to_head_page(skb->head);//得到线性区的pageunsigned int first_size = headlen - offset;unsigned int first_offset;if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS)//合并后的frag数超过最大frag数,则需要mergegoto merge;first_offset = skb->data -       (unsigned char *)page_address(page) +       offset;pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags;frag->page.p  = page;//该frag报文报文线性区中的数据frag->page_offset = first_offset;skb_frag_size_set(frag, first_size);memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);//拷贝frag/* We dont need to clear skbinfo->nr_frags here */delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;//当前报文被合并,待释放goto done;}merge:delta_truesize = skb->truesize;if (offset > headlen) {//如果offset大于报文的线性区长度,意味着frag中有部分数据是报文头unsigned int eat = offset - headlen;skbinfo->frags[0].page_offset += eat;//调整frag0中的数据,减掉报文头skb_frag_size_sub(&skbinfo->frags[0], eat);skb->data_len -= eat;skb->len -= eat;offset = headlen;}__skb_pull(skb, offset);//当前报文移动到数据区if (NAPI_GRO_CB(p)->last == p)//初始状态时(skb第一次放到gro_list中),且没有merge过skb_shinfo(p)->frag_list = skb;//报文保存到frag_list中elseNAPI_GRO_CB(p)->last->next = skb;//报文保存到frag_list中的最后一个报文的NAPI_GRO_CB(p)->last = skb;//merge过以后,报文都放在frag_list链表中__skb_header_release(skb);//释放skb的线性区lp = p;done:NAPI_GRO_CB(p)->count++;//count加一,最后设置为segsp->data_len += len;//同流报文的长度加上当前报文的数据区长度p->truesize += delta_truesize;//同流报文的truesize加上当前报文的truesizep->len += len;//同流报文的长度增加当前报文的长度if (lp != p) {//当lp与p不相同时,lp报文相关长度信息也需要调整lp->data_len += len;lp->truesize += delta_truesize;lp->len += len;}NAPI_GRO_CB(skb)->same_flow = 1;//same_flow置1,说明报文已经被合并到gro_list中return 0;}


tcp4_gro_complete函数

static int tcp4_gro_complete(struct sk_buff *skb, int thoff){const struct iphdr *iph = ip_hdr(skb);struct tcphdr *th = tcp_hdr(skb);th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,//刷新check值  iph->daddr, 0);skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;//置GSO_TCPV4标记return tcp_gro_complete(skb);}
tcp_gro_complete
int tcp_gro_complete(struct sk_buff *skb){struct tcphdr *th = tcp_hdr(skb);skb->csum_start = (unsigned char *)th - skb->head;//设置ip_summed及相关值skb->csum_offset = offsetof(struct tcphdr, check);skb->ip_summed = CHECKSUM_PARTIAL;skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;//设置segsif (th->cwr)skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;//如果当前报文携带cwr标记,则携带SKB_GSO_TCP_ECN标记return 0;}

tcp4_gro_receive实现了报文合并,根据报文的不同,有几种可能:

1)找到同流报文,合并报文后超过65536,该同流报文将会被提交给协议栈,当前报文在mac层被放到gro_list;

2)找到同流报文,合并成功,当前报文未携带相关flag,将不会有报文被提交到协议栈;

3)找到同流报文,合并成功,当前报文携带相关flag,同流报文和当前报文一起被提交到协议栈;

4)未找到同流报文,当前报文携带相关flag,当前报文将被提交给协议栈;

5)未找到同流报文,当前报文未携带相关flag,当前报文将被保存到gro_list中;

0 0
原创粉丝点击