sendmsg在传输层上的实现(二)

来源:互联网 发布:华为软件开发工程师 编辑:程序博客网 时间:2024/06/06 00:27

tcp_write_xmit()将发送队列上的skb发送出去。

static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle){struct tcp_sock *tp = tcp_sk(sk);struct sk_buff *skb;unsigned int tso_segs, sent_pkts;int cwnd_quota;int result;/* If we are closed, the bytes will have to remain here. * In time closedown will finish, we empty the write queue and all * will be happy. */if (unlikely(sk->sk_state == TCP_CLOSE))return 0;sent_pkts = 0;//已发送总段数,初始为0;        //发送MUT探测包/* Do MTU probing. */if ((result = tcp_mtu_probe(sk)) == 0) {return 0;} else if (result > 0) {sent_pkts = 1;}        //如果发送队列不为空,则准备发送数据while ((skb = sk->sk_send_head)) {unsigned int limit;                //初始化TSO分段信息tso_segs = tcp_init_tso_segs(sk, skb, mss_now);BUG_ON(!tso_segs);                //检测拥塞窗口大小cwnd_quota = tcp_cwnd_test(tp, skb);if (!cwnd_quota)break;if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))break;if (tso_segs == 1) {if (unlikely(!tcp_nagle_test(tp, skb, mss_now,     (tcp_skb_is_last(sk, skb) ?      nonagle : TCP_NAGLE_PUSH))))break;} else {if (tcp_tso_should_defer(sk, tp, skb))break;}                //根据TSO进行分段limit = mss_now;if (tso_segs > 1) {limit = tcp_window_allows(tp, skb,  mss_now, cwnd_quota);if (skb->len < limit) {unsigned int trim = skb->len % mss_now;if (trim)limit = skb->len - trim;}}if (skb->len > limit &&    unlikely(tso_fragment(sk, skb, limit, mss_now)))break;TCP_SKB_CB(skb)->when = tcp_time_stamp;//标记发送时间if (unlikely(tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC)))//发送TCP段,第三个参数1表示skb的克隆。break;/* Advance the send_head.  This one is sent out. * This call will increment packets_out. */update_send_head(sk, tp, skb);//更新发送队列头指针,snd_nxt,计数未确认报文段数,计时重传超时定时器。tcp_minshall_update(tp, mss_now, skb);sent_pkts++;//更新发送总段数}if (likely(sent_pkts)) {tcp_cwnd_validate(sk, tp);return 0;}return !tp->packets_out && sk->sk_send_head;}

tcp_transmit_skb将输出段到网络层:

//clone_it表示是克隆还是复制skb,gfp_mask表示克隆或复制时分配内存方式

static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask){const struct inet_connection_sock *icsk = inet_csk(sk);struct inet_sock *inet;struct tcp_sock *tp;struct tcp_skb_cb *tcb;int tcp_header_size;struct tcphdr *th;int sysctl_flags;int err;BUG_ON(!skb || !tcp_skb_pcount(skb));        //判断当前拥塞控制算法是否支持时间采样,并在克隆或复制时设置时间戳/* If congestion control is doing timestamping, we must * take such a timestamp before we potentially clone/copy. */if (icsk->icsk_ca_ops->rtt_sample)__net_timestamp(skb);if (likely(clone_it)) {if (unlikely(skb_cloned(skb)))skb = pskb_copy(skb, gfp_mask);elseskb = skb_clone(skb, gfp_mask);if (unlikely(!skb))return -ENOBUFS;}inet = inet_sk(sk);tp = tcp_sk(sk);tcb = TCP_SKB_CB(skb);tcp_header_size = tp->tcp_header_len;#define SYSCTL_FLAG_TSTAMPS0x1#define SYSCTL_FLAG_WSCALE0x2#define SYSCTL_FLAG_SACK0x4        //根据数据包类型调整包首部长度sysctl_flags = 0;if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {//SYN标志tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;if(sysctl_tcp_timestamps) {//需要时间戳tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;sysctl_flags |= SYSCTL_FLAG_TSTAMPS;}if (sysctl_tcp_window_scaling) {//窗口扩大因子tcp_header_size += TCPOLEN_WSCALE_ALIGNED;sysctl_flags |= SYSCTL_FLAG_WSCALE;}if (sysctl_tcp_sack) {//sack选项sysctl_flags |= SYSCTL_FLAG_SACK;if (!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;}} else if (unlikely(tp->rx_opt.eff_sacks)) {/* A SACK is 2 pad bytes, a 2 byte header, plus * 2 32-bit sequence numbers for each SACK block. */tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +    (tp->rx_opt.eff_sacks *     TCPOLEN_SACK_PERBLOCK));}if (tcp_packets_in_flight(tp) == 0)//发送为确认包数为0tcp_ca_event(sk, CA_EVENT_TX_START);        //在报文中加入TCP首部,th = (struct tcphdr *) skb_push(skb, tcp_header_size);skb->h.th = th;        skb_set_owner_w(skb, sk);//设置skb宿主        //填充tcp头部/* Build TCP header and checksum it. */  th->source= inet->sport;  th->dest= inet->dport;  th->seq= htonl(tcb->seq); th->ack_seq= htonl(tp->rcv_nxt); *(((__u16 *)th) + 6)= htons(((tcp_header_size >> 2) << 12) | tcb->flags);         //设置接收窗口if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {  /* RFC1323: The window in SYN & SYN/ACK segments   * is never scaled.  */th->window= htons(tp->rcv_wnd); } else {th->window= htons(tcp_select_window(sk)); }th->check= 0;  th->urg_ptr= 0;         //判断是否需要设置紧急指针与带外数据标志位if (unlikely(tp->urg_mode &&     between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF))) { th->urg_ptr= htons(tp->snd_up-tcb->seq);th->urg= 1;}        if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {//构建SYN段的首部选项tcp_syn_build_options((__u32 *)(th + 1),      tcp_advertise_mss(sk),      (sysctl_flags & SYSCTL_FLAG_TSTAMPS),      (sysctl_flags & SYSCTL_FLAG_SACK),      (sysctl_flags & SYSCTL_FLAG_WSCALE),      tp->rx_opt.rcv_wscale,      tcb->when,      tp->rx_opt.ts_recent);} else {tcp_build_and_update_options((__u32 *)(th + 1),     tp, tcb->when);//构建除SYN段的首部选项TCP_ECN_send(sk, tp, skb, tcp_header_size);}icsk->icsk_af_ops->send_check(sk, skb->len, skb);//计算校验和if (likely(tcb->flags & TCPCB_FLAG_ACK))//如果发送段有ACK标志,递减快速发送ACK段的数量,停止延时确认定时器;tcp_event_ack_sent(sk, tcp_skb_pcount(skb));if (skb->len != tcp_header_size)tcp_event_data_sent(tp, skb, sk);if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)TCP_INC_STATS(TCP_MIB_OUTSEGS);err = icsk->icsk_af_ops->queue_xmit(skb, 0);调用发送接口queue_xmit发送报文,TCP中该接口的函数实现为ip_queue_xmit;if (likely(err <= 0)) return err; tcp_enter_cwr(sk); /* NET_XMIT_CN is special. It does not guarantee, * that this packet is lost. It tells that device  * is about to start to drop packets or already * drops some packets of the same priority and  * invokes us to send less aggressively. */return err == NET_XMIT_CN ? 0 : err; #undef SYSCTL_FLAG_TSTAMPS #undef SYSCTL_FLAG_WSCALE#undef SYSCTL_FLAG_SACK}


0 0
原创粉丝点击