12.3 拥塞控制流程

来源:互联网 发布:高端男士护肤品知乎 编辑:程序博客网 时间:2024/06/05 10:00

12.3.1 TCP拥塞状态

        TCP拥塞状态共有5个:

135 enum tcp_ca_state {136     TCP_CA_Open = 0,137 #define TCPF_CA_Open    (1<<TCP_CA_Open)138     TCP_CA_Disorder = 1,139 #define TCPF_CA_Disorder (1<<TCP_CA_Disorder)140     TCP_CA_CWR = 2,141 #define TCPF_CA_CWR (1<<TCP_CA_CWR)142     TCP_CA_Recovery = 3,143 #define TCPF_CA_Recovery (1<<TCP_CA_Recovery)144     TCP_CA_Loss = 4145 #define TCPF_CA_Loss    (1<<TCP_CA_Loss)146 };

        Open:是初始状态,也是正常状态

        Disorder:当第一次由于SACK块或重复确认而检测到拥塞时进入此状态;此状态下拥塞窗口不变,TCP需要保持网络中的包的数量不变;TCP在进入Recovery状态之前要进入本状态

        CRW(Congestion Window Reduced):此状态下TCP会减小拥塞窗口,但不会重传已发送数据;这个状态在本地拥塞或收到显示拥塞通告(ECN)后设置

        Recovery:减小拥塞窗口直至到达ssthresh但不能增加拥塞窗口,会重传数据

        Loss:所有已发送数据都会被标记为丢失,拥塞窗口减小到一个报文段,然后数据发送端使用慢启动算法增大拥塞窗口。这个状态下不能使用快速重传算法

12.3.2 拥塞窗口

        TCP在调用tcp_write_xmit函数发送数据时会检查拥塞窗口:

1811 static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,1812                int push_one, gfp_t gfp)1813 { ...1842         cwnd_quota = tcp_cwnd_test(tp, skb);1843         if (!cwnd_quota) {    //拥塞窗口不允许发送数据1844             if (push_one == 2)  //发送丢失探测报文是允许的1845                 /* Force out a loss probe pkt. */1846                 cwnd_quota = 1;1847             else    //其它的报文不允许1848                 break;1849         }...
         tcp_cwnd_test函数来检查拥塞窗口是否允许发送数据:

1407 static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,1408                      const struct sk_buff *skb)1409 {1410     u32 in_flight, cwnd;1411 1412     /* Don't be strict about the congestion window for the final FIN.  */1413     if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&1414         tcp_skb_pcount(skb) == 1)   1415         return 1;    //不需要分段的带FIN标记位的报文是不受拥塞窗口限制的1416 1417     in_flight = tcp_packets_in_flight(tp);  //得到在网络中的报文数量1418     cwnd = tp->snd_cwnd;1419     if (in_flight < cwnd)1420         return (cwnd - in_flight);    //当前拥塞窗口即允许发送的报文数量,减去在网络中的报文数量就是现在允许发送的数量1421 1422     return 0;1423 }
        可见拥塞窗口的值保存在tp->snd_cwnd中,这个值由拥塞控制算法来计算。

12.3.3 拥塞控制的起点

        TCP的拥塞控制是从ACK的处理开始的:

3325 static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) 3326 {3327     struct inet_connection_sock *icsk = inet_csk(sk);3328     struct tcp_sock *tp = tcp_sk(sk);3329     u32 prior_snd_una = tp->snd_una;3330     u32 ack_seq = TCP_SKB_CB(skb)->seq;3331     u32 ack = TCP_SKB_CB(skb)->ack_seq;3332     bool is_dupack = false;3333     u32 prior_in_flight;3334     u32 prior_fackets;3335     int prior_packets = tp->packets_out;3336     int prior_sacked = tp->sacked_out;3337     int pkts_acked = 0;3338     int previous_packets_out = 0;3339 3340     /* If the ack is older than previous acks3341      * then we can probably ignore it.3342      */3343     if (before(ack, prior_snd_una)) {3344         /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */3345         if (before(ack, prior_snd_una - tp->max_window)) {3346             tcp_send_challenge_ack(sk);3347             return -1;3348         }3349         goto old_ack;3350     }3351 3352     /* If the ack includes data we haven't sent yet, discard3353      * this segment (RFC793 Section 3.9).3354      */3355     if (after(ack, tp->snd_nxt))3356         goto invalid_ack;...3374     if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {    //处于快速处理路径并且有新被确认的数据3375         /* Window is constant, pure forward advance.3376          * No more checks are required.3377          * Note, we use the fact that SND.UNA>=SND.WL2.3378          */3379         tcp_update_wl(tp, ack_seq);3380         tp->snd_una = ack;3381         flag |= FLAG_WIN_UPDATE;3382 3383         tcp_ca_event(sk, CA_EVENT_FAST_ACK);  //处理快速ACK拥塞事件3384 3385         NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS);3386     } else {  //处于慢速处理路径或ack_seq号与之前重复3387         if (ack_seq != TCP_SKB_CB(skb)->end_seq)3388             flag |= FLAG_DATA;  //包中有数据3389         else3390             NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS);3391 3392         flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);3393 3394         if (TCP_SKB_CB(skb)->sacked)3395             flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);3396 3397         if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))    //TCP开启了ECN功能且在ACK中发现了ecn标记3398             flag |= FLAG_ECE;3399 3400         tcp_ca_event(sk, CA_EVENT_SLOW_ACK);    //处理慢速ACK拥塞事件3401     }...3409     if (!prior_packets)3410         goto no_queue;...3413     previous_packets_out = tp->packets_out;3414     flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);3415 3416     pkts_acked = previous_packets_out - tp->packets_out;3417 3418     if (tcp_ack_is_dubious(sk, flag)) {3419         /* Advance CWND, if state allows this. */3420         if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))3421             tcp_cong_avoid(sk, ack, prior_in_flight);3422         is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));3423         tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,3424                       prior_packets, is_dupack, flag);3425     } else {3426         if (flag & FLAG_DATA_ACKED)3427             tcp_cong_avoid(sk, ack, prior_in_flight);3428     }3429 3430     if (tp->tlp_high_seq)3431         tcp_process_tlp_ack(sk, ack, flag);3432 3433     if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {3434         struct dst_entry *dst = __sk_dst_get(sk);3435         if (dst)3436             dst_confirm(dst);3437     }3438 3439     if (icsk->icsk_pending == ICSK_TIME_RETRANS)3440         tcp_schedule_loss_probe(sk);3441     return 1;3442 3443 no_queue:3444     /* If data was DSACKed, see if we can undo a cwnd reduction. */3445     if (flag & FLAG_DSACKING_ACK)  3446         tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,3447                       prior_packets, is_dupack, flag);3448     /* If this ack opens up a zero window, clear backoff.  It was3449      * being used to time the probes, and is probably far higher than3450      * it needs to be for normal retransmission.3451      */3452     if (tcp_send_head(sk))3453         tcp_ack_probe(sk);3454 3455     if (tp->tlp_high_seq)3456         tcp_process_tlp_ack(sk, ack, flag);3457     return 1;3458 3459 invalid_ack:3460     SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);3461     return -1;3462 3463 old_ack:3464     /* If data was SACKed, tag it and see if we should send more data.3465      * If data was DSACKed, see if we can undo a cwnd reduction.3466      */3467     if (TCP_SKB_CB(skb)->sacked) { 3468         flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);3469         tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,3470                       prior_packets, is_dupack, flag);3471     }3472 3473     SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);3474     return 0;


        To Be continued...



0 0