linux拥塞控制状态机

来源:互联网 发布:承德县大数据招聘信息 编辑:程序博客网 时间:2024/06/13 07:08

        当sender收到确认后,sender会根据确认行为进行拥塞控制状态进行转换,其实现方法为tcp_fastretrans_alert。

拥塞状态一共分5种,它们之间的转换关系如下图:

Open状态:

常态,正常慢启动或者拥塞避免。

Disorder状态:

当sender收到DSACK(重复确认)或者SACK(选择性确认)时,会进入Disorder状态。拥塞窗口不做调整,每个新确认触发一个新包的发送;

CWR状态:

发送方显式收到拥塞信息,sender采用保守方式,不立即减少拥塞窗口,而是每收到一个ACK,拥塞窗口减一,直到减为一半。

Recovery状态:

Reno算法中,收到三个重复的ACK,或者支持SACK时,SACK段足够多,在支持FACK时,如果highest SACK足够大,都会进入Recovery状态。拥塞窗口的变化类似于CWR状态,直到降为ssthresh,且不会增大。

Loss状态:

当RTO超时,或者接收到的ACK已经被之前的SACK确认,则会进入Loss状态。拥塞窗口设置为1,按慢启动增大。


内核版本:2.6.18

拥塞状态的处理

当sender收到ACK时,如果判断ACK是可疑的,则会进行状态处理。可疑条件:

(1)拥塞控制状态不是Open;

(2)显式拥塞通知或者有数据SACKED;

(3)ACK没有负载,同时没有窗口更新和重复确认。

变量说明:

pakcets_out:

sender在snd.una之后已经发送的包数;

sacked_out:

被SACK块确认的包数;

fackets_out:

最高SACK块与snd.una之差;

left_out:

已经离开网络的包数,包括被sack包和丢失包;

static voidtcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,      int prior_packets, int flag){struct inet_connection_sock *icsk = inet_csk(sk);struct tcp_sock *tp = tcp_sk(sk);int is_dupack = (tp->snd_una == prior_snd_una && !(flag&FLAG_NOT_DUP));//判断是否为重复确认;        //如果当前已发送包数为0,则sacked_out和fackets_out必为0;/* Some technical things:  * 1. Reno does not count dupacks (sacked_out) automatically. */ if (!tp->packets_out) tp->sacked_out = 0;         /* 2. SACK counts snd_fack in packets inaccurately. */ if (tp->sacked_out == 0) tp->fackets_out = 0;         //现在,状态机启动,        /* Now state machine starts. * A. ECE, hence prohibit cwnd undoing, the reduction is required. */if (flag&FLAG_ECE)      //A:如果收到显式拥塞通知,设置prior_ssthresh为0,prior_ssthresh保存原来的ssthresh值,用以撤销恢复,如果为0,说明禁止撤销。tp->prior_ssthresh = 0;/* B. In all the states check for reneging SACKs. */if (tp->sacked_out && tcp_check_sack_reneging(sk))//B:如果此次确认的包已经被sack过,说明接收者处于拥塞状态或者有bug,此时按超时方式进行处理。return;         //C:处理数据丢失情况,丢失判断有两种。第一种是当发生拥塞时具有最高序列号high_seq的包被sack,则判断数据loss。第二种是重传丢失,记录重传包时的         snd_nxt,如果此次被sack,则判断重传包丢失;        /* C. Process data loss notification, provided it is valid. */if ((flag&FLAG_DATA_LOST) &&    before(tp->snd_una, tp->high_seq) &&    icsk->icsk_ca_state != TCP_CA_Open &&    tp->fackets_out > tp->reordering) {//reordering为可重新排序数,在sack选项的处理中会进行更新。tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq);//判断有包丢失,为重传队列中丢失包的记分牌打上LOST标记。NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); }         //D:left_out变量的更新;/* D. Synchronize left_out to current state. */ tcp_sync_left_out(tp);         //E:检查状态退出条件。拥塞状态返回条件:high_seq被ACK;/* E. Check state exit conditions. State can be terminated  *    when high_seq is ACKed. */ if (icsk->icsk_ca_state == TCP_CA_Open) { //if (!sysctl_tcp_frto) BUG_TRAP(tp->retrans_out == 0); tp->retrans_stamp = 0; } else if (!before(tp->snd_una, tp->high_seq)) { //如果high_seq被ACKswitch (icsk->icsk_ca_state) { case TCP_CA_Loss: icsk->icsk_retransmits = 0; //设置重传包数为0;if (tcp_try_undo_recovery(sk, tp)) //尝试从Loss状态恢复;return; break;  case TCP_CA_CWR: /* CWR is to be held something *above* high_seq  * is ACKed for CWR bit to reach receiver. */if (tp->snd_una != tp->high_seq) {//high_seq之后的包被ACK,拥塞撤销会Open状态;tcp_complete_cwr(sk);tcp_set_ca_state(sk, TCP_CA_Open);}break;case TCP_CA_Disorder: tcp_try_undo_dsack(sk, tp);if (!tp->undo_marker ||     /* For SACK case do not Open to allow to undo      * catching for all duplicate ACKs. */     IsReno(tp) || tp->snd_una != tp->high_seq) { tp->undo_marker = 0; tcp_set_ca_state(sk, TCP_CA_Open); } break;  case TCP_CA_Recovery: if (IsReno(tp)) tcp_reset_reno_sack(tp); //如果是reno算法,重新设置sacked_out为0;if (tcp_try_undo_recovery(sk, tp)) return; tcp_complete_cwr(sk); break; } }         //F:状态处理/* F. Process state. */ switch (icsk->icsk_ca_state) {case TCP_CA_Recovery:if (prior_snd_una == tp->snd_una) {if (IsReno(tp) && is_dupack) tcp_add_reno_sack(sk); //reno算法,收到重复ACK,reno模拟sack,sacked_out++,更新reordering。} else {int acked = prior_packets - tp->packets_out;//计算被确认的包数if (IsReno(tp))tcp_remove_reno_sacks(sk, tp, acked);//更新重复ACK数;is_dupack = tcp_try_undo_partial(sk, tp, acked); 拥塞窗口的撤销}break;case TCP_CA_Loss: if (flag&FLAG_DATA_ACKED) icsk->icsk_retransmits = 0; //设置重传包数为0;if (!tcp_try_undo_loss(sk, tp)) {//试着从Loss状态撤销,如果不能撤销,微调拥塞窗口,并重传丢失段。cwnd=min(cwnd,in_flight+3);tcp_moderate_cwnd(tp);tcp_xmit_retransmit_queue(sk);return;}if (icsk->icsk_ca_state != TCP_CA_Open)return;/* Loss is undone; fall through to processing in Open state. */default:if (IsReno(tp)) {if (tp->snd_una != prior_snd_una)tcp_reset_reno_sack(tp);//有新包被确认,更新sacked_out=0;if (is_dupack)tcp_add_reno_sack(sk);//重复确认,sacked_out++;}if (icsk->icsk_ca_state == TCP_CA_Disorder)tcp_try_undo_dsack(sk, tp);//如果DSACK确认所有重传包,尝试撤销拥塞窗口减少。if (!tcp_time_to_recover(sk, tp)) { //判断是否进入Recovery状态,如果不进入Recovery状态,尝试进入Open状态或Disorder状态。tcp_try_to_open(sk, tp, flag);return;}//MTU探测/* MTU probe failure: don't reduce cwnd */if (icsk->icsk_ca_state < TCP_CA_CWR &&    icsk->icsk_mtup.probe_size &&    tp->snd_una == tp->mtu_probe.probe_seq_start) {tcp_mtup_probe_failed(sk);/* Restores the reduction we did in tcp_mtup_probe() */tp->snd_cwnd++;tcp_simple_retransmit(sk);return;}/* Otherwise enter Recovery state */                //进入Recovery状态if (IsReno(tp))NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERY);elseNET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERY);tp->high_seq = tp->snd_nxt;tp->prior_ssthresh = 0;tp->undo_marker = tp->snd_una;tp->undo_retrans = tp->retrans_out;if (icsk->icsk_ca_state < TCP_CA_CWR) {if (!(flag&FLAG_ECE))tp->prior_ssthresh = tcp_current_ssthresh(sk);//保存当前ssthresh;tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);TCP_ECN_queue_cwr(tp);}tp->bytes_acked = 0;tp->snd_cwnd_cnt = 0;tcp_set_ca_state(sk, TCP_CA_Recovery);}if (is_dupack || tcp_head_timedout(sk, tp))//如果是重复ACK或者重传队首超时,则更新丢失记分牌。tcp_update_scoreboard(sk, tp);tcp_cwnd_down(sk);每隔一个ACK,则拥塞窗口减一,直到等于ssthresh为止。tcp_xmit_retransmit_queue(sk);//重传丢失包。}

拥塞状态撤销

//从Loss状态撤销;

第一种情况:当high_seq被确认,Loss->Open

static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp){if (tcp_may_undo(tp)) {//测试是否可以从Loss状态撤销,撤销条件:1、撤销重传包为0;2、没有重传;3、重传了还没有确认/* Happy end! We did not retransmit anything * or our original transmission succeeded. */DBGUNDO(sk, tp, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");tcp_undo_cwr(sk, 1);//撤销拥塞窗口减少;if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);elseNET_INC_STATS_BH(LINUX_MIB_TCPFULLUNDO);tp->undo_marker = 0;}if (tp->snd_una == tp->high_seq && IsReno(tp)) {//对于reno算法,必须high_seq后的包被ACK,才能撤销Loss状态,防止假重传(RFC2582)/* Hold old state until something *above* high_seq * is ACKed. For Reno it is MUST to prevent false * fast retransmits (RFC2582). SACK TCP is safe. */tcp_moderate_cwnd(tp);return 1;}tcp_set_ca_state(sk, TCP_CA_Open);//进入Open状态;return 0;}

第二种情况:收到新的确认,尝试Loss->Open

static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp){if (tcp_may_undo(tp)) {//尝试是否可以撤销;struct sk_buff *skb;sk_stream_for_retrans_queue(skb, sk) {TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;//遍历重传队列,取消LOST标志;}clear_all_retrans_hints(tp);DBGUNDO(sk, tp, "partial loss");tp->lost_out = 0;tp->left_out = tp->sacked_out;tcp_undo_cwr(sk, 1);//撤销拥塞窗口减少;NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);inet_csk(sk)->icsk_retransmits = 0;tp->undo_marker = 0;if (!IsReno(tp))tcp_set_ca_state(sk, TCP_CA_Open);return 1;}return 0;}
//撤销拥塞窗口减少
static void tcp_undo_cwr(struct sock *sk, const int undo){struct tcp_sock *tp = tcp_sk(sk);if (tp->prior_ssthresh) {const struct inet_connection_sock *icsk = inet_csk(sk);if (icsk->icsk_ca_ops->undo_cwnd)tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);elsetp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1);if (undo && tp->prior_ssthresh > tp->snd_ssthresh) {tp->snd_ssthresh = tp->prior_ssthresh;//恢复到原来的ssthresh值;TCP_ECN_withdraw_cwr(tp);}} else {tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);}tcp_moderate_cwnd(tp);tp->snd_cwnd_stamp = tcp_time_stamp;/* There is something screwy going on with the retrans hints after   an undo */clear_all_retrans_hints(tp);}


//CWR状态撤销,结束拥塞窗口减少。

static inline void tcp_complete_cwr(struct sock *sk){struct tcp_sock *tp = tcp_sk(sk);tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);//将拥塞窗口设为当前拥塞窗口与ssthresh的最小值。tp->snd_cwnd_stamp = tcp_time_stamp;tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);}
//Disorder状态撤销

static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp){if (tp->undo_marker && !tp->undo_retrans) {//如果DSACK确认所有重传包DBGUNDO(sk, tp, "D-SACK");tcp_undo_cwr(sk, 1);//撤销拥塞窗口减少;tp->undo_marker = 0;//重传起始位为0;NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO);}}
//Recovery状态撤销

第一种情况:high_seq被确认,Recovery->Open;

tcp_try_undo_recovery:已经分析过,不再重述;

第二种情况:如果ACK确认了新包;

static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp,int acked){/* Partial ACK arrived. Force Hoe's retransmit. */int failed = IsReno(tp) || tp->fackets_out>tp->reordering;if (tcp_may_undo(tp)) {//尝试是否可以撤销/* Plain luck! Hole if filled with delayed * packet, rather than with a retransmit. */if (tp->retrans_out == 0)tp->retrans_stamp = 0;tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);DBGUNDO(sk, tp, "Hoe");tcp_undo_cwr(sk, 0);//撤销拥塞窗口减少NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO);/* So... Do not make Hoe's retransmit yet. * If the first packet was delayed, the rest * ones are most probably delayed as well. */failed = 0;}return failed;}



0 0
原创粉丝点击