linux拥塞控制状态机
来源:互联网 发布:承德县大数据招聘信息 编辑:程序博客网 时间:2024/06/13 07:08
当sender收到确认后,sender会根据确认行为进行拥塞控制状态进行转换,其实现方法为tcp_fastretrans_alert。
拥塞状态一共分5种,它们之间的转换关系如下图:
Open状态:
常态,正常慢启动或者拥塞避免。
Disorder状态:
当sender收到DSACK(重复确认)或者SACK(选择性确认)时,会进入Disorder状态。拥塞窗口不做调整,每个新确认触发一个新包的发送;
CWR状态:
发送方显式收到拥塞信息,sender采用保守方式,不立即减少拥塞窗口,而是每收到一个ACK,拥塞窗口减一,直到减为一半。
Recovery状态:
Reno算法中,收到三个重复的ACK,或者支持SACK时,SACK段足够多,在支持FACK时,如果highest SACK足够大,都会进入Recovery状态。拥塞窗口的变化类似于CWR状态,直到降为ssthresh,且不会增大。
Loss状态:
当RTO超时,或者接收到的ACK已经被之前的SACK确认,则会进入Loss状态。拥塞窗口设置为1,按慢启动增大。
内核版本:2.6.18
拥塞状态的处理
当sender收到ACK时,如果判断ACK是可疑的,则会进行状态处理。可疑条件:
(1)拥塞控制状态不是Open;
(2)显式拥塞通知或者有数据SACKED;
(3)ACK没有负载,同时没有窗口更新和重复确认。
变量说明:
pakcets_out:
sender在snd.una之后已经发送的包数;
sacked_out:
被SACK块确认的包数;
fackets_out:
最高SACK块与snd.una之差;
left_out:
已经离开网络的包数,包括被sack包和丢失包;
static voidtcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, int prior_packets, int flag){struct inet_connection_sock *icsk = inet_csk(sk);struct tcp_sock *tp = tcp_sk(sk);int is_dupack = (tp->snd_una == prior_snd_una && !(flag&FLAG_NOT_DUP));//判断是否为重复确认; //如果当前已发送包数为0,则sacked_out和fackets_out必为0;/* Some technical things: * 1. Reno does not count dupacks (sacked_out) automatically. */ if (!tp->packets_out) tp->sacked_out = 0; /* 2. SACK counts snd_fack in packets inaccurately. */ if (tp->sacked_out == 0) tp->fackets_out = 0; //现在,状态机启动, /* Now state machine starts. * A. ECE, hence prohibit cwnd undoing, the reduction is required. */if (flag&FLAG_ECE) //A:如果收到显式拥塞通知,设置prior_ssthresh为0,prior_ssthresh保存原来的ssthresh值,用以撤销恢复,如果为0,说明禁止撤销。tp->prior_ssthresh = 0;/* B. In all the states check for reneging SACKs. */if (tp->sacked_out && tcp_check_sack_reneging(sk))//B:如果此次确认的包已经被sack过,说明接收者处于拥塞状态或者有bug,此时按超时方式进行处理。return; //C:处理数据丢失情况,丢失判断有两种。第一种是当发生拥塞时具有最高序列号high_seq的包被sack,则判断数据loss。第二种是重传丢失,记录重传包时的 snd_nxt,如果此次被sack,则判断重传包丢失; /* C. Process data loss notification, provided it is valid. */if ((flag&FLAG_DATA_LOST) && before(tp->snd_una, tp->high_seq) && icsk->icsk_ca_state != TCP_CA_Open && tp->fackets_out > tp->reordering) {//reordering为可重新排序数,在sack选项的处理中会进行更新。tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq);//判断有包丢失,为重传队列中丢失包的记分牌打上LOST标记。NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); } //D:left_out变量的更新;/* D. Synchronize left_out to current state. */ tcp_sync_left_out(tp); //E:检查状态退出条件。拥塞状态返回条件:high_seq被ACK;/* E. Check state exit conditions. State can be terminated * when high_seq is ACKed. */ if (icsk->icsk_ca_state == TCP_CA_Open) { //if (!sysctl_tcp_frto) BUG_TRAP(tp->retrans_out == 0); tp->retrans_stamp = 0; } else if (!before(tp->snd_una, tp->high_seq)) { //如果high_seq被ACKswitch (icsk->icsk_ca_state) { case TCP_CA_Loss: icsk->icsk_retransmits = 0; //设置重传包数为0;if (tcp_try_undo_recovery(sk, tp)) //尝试从Loss状态恢复;return; break; case TCP_CA_CWR: /* CWR is to be held something *above* high_seq * is ACKed for CWR bit to reach receiver. */if (tp->snd_una != tp->high_seq) {//high_seq之后的包被ACK,拥塞撤销会Open状态;tcp_complete_cwr(sk);tcp_set_ca_state(sk, TCP_CA_Open);}break;case TCP_CA_Disorder: tcp_try_undo_dsack(sk, tp);if (!tp->undo_marker || /* For SACK case do not Open to allow to undo * catching for all duplicate ACKs. */ IsReno(tp) || tp->snd_una != tp->high_seq) { tp->undo_marker = 0; tcp_set_ca_state(sk, TCP_CA_Open); } break; case TCP_CA_Recovery: if (IsReno(tp)) tcp_reset_reno_sack(tp); //如果是reno算法,重新设置sacked_out为0;if (tcp_try_undo_recovery(sk, tp)) return; tcp_complete_cwr(sk); break; } } //F:状态处理/* F. Process state. */ switch (icsk->icsk_ca_state) {case TCP_CA_Recovery:if (prior_snd_una == tp->snd_una) {if (IsReno(tp) && is_dupack) tcp_add_reno_sack(sk); //reno算法,收到重复ACK,reno模拟sack,sacked_out++,更新reordering。} else {int acked = prior_packets - tp->packets_out;//计算被确认的包数if (IsReno(tp))tcp_remove_reno_sacks(sk, tp, acked);//更新重复ACK数;is_dupack = tcp_try_undo_partial(sk, tp, acked); 拥塞窗口的撤销}break;case TCP_CA_Loss: if (flag&FLAG_DATA_ACKED) icsk->icsk_retransmits = 0; //设置重传包数为0;if (!tcp_try_undo_loss(sk, tp)) {//试着从Loss状态撤销,如果不能撤销,微调拥塞窗口,并重传丢失段。cwnd=min(cwnd,in_flight+3);tcp_moderate_cwnd(tp);tcp_xmit_retransmit_queue(sk);return;}if (icsk->icsk_ca_state != TCP_CA_Open)return;/* Loss is undone; fall through to processing in Open state. */default:if (IsReno(tp)) {if (tp->snd_una != prior_snd_una)tcp_reset_reno_sack(tp);//有新包被确认,更新sacked_out=0;if (is_dupack)tcp_add_reno_sack(sk);//重复确认,sacked_out++;}if (icsk->icsk_ca_state == TCP_CA_Disorder)tcp_try_undo_dsack(sk, tp);//如果DSACK确认所有重传包,尝试撤销拥塞窗口减少。if (!tcp_time_to_recover(sk, tp)) { //判断是否进入Recovery状态,如果不进入Recovery状态,尝试进入Open状态或Disorder状态。tcp_try_to_open(sk, tp, flag);return;}//MTU探测/* MTU probe failure: don't reduce cwnd */if (icsk->icsk_ca_state < TCP_CA_CWR && icsk->icsk_mtup.probe_size && tp->snd_una == tp->mtu_probe.probe_seq_start) {tcp_mtup_probe_failed(sk);/* Restores the reduction we did in tcp_mtup_probe() */tp->snd_cwnd++;tcp_simple_retransmit(sk);return;}/* Otherwise enter Recovery state */ //进入Recovery状态if (IsReno(tp))NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERY);elseNET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERY);tp->high_seq = tp->snd_nxt;tp->prior_ssthresh = 0;tp->undo_marker = tp->snd_una;tp->undo_retrans = tp->retrans_out;if (icsk->icsk_ca_state < TCP_CA_CWR) {if (!(flag&FLAG_ECE))tp->prior_ssthresh = tcp_current_ssthresh(sk);//保存当前ssthresh;tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);TCP_ECN_queue_cwr(tp);}tp->bytes_acked = 0;tp->snd_cwnd_cnt = 0;tcp_set_ca_state(sk, TCP_CA_Recovery);}if (is_dupack || tcp_head_timedout(sk, tp))//如果是重复ACK或者重传队首超时,则更新丢失记分牌。tcp_update_scoreboard(sk, tp);tcp_cwnd_down(sk);每隔一个ACK,则拥塞窗口减一,直到等于ssthresh为止。tcp_xmit_retransmit_queue(sk);//重传丢失包。}
拥塞状态撤销
//从Loss状态撤销;
第一种情况:当high_seq被确认,Loss->Open
static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp){if (tcp_may_undo(tp)) {//测试是否可以从Loss状态撤销,撤销条件:1、撤销重传包为0;2、没有重传;3、重传了还没有确认/* Happy end! We did not retransmit anything * or our original transmission succeeded. */DBGUNDO(sk, tp, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");tcp_undo_cwr(sk, 1);//撤销拥塞窗口减少;if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);elseNET_INC_STATS_BH(LINUX_MIB_TCPFULLUNDO);tp->undo_marker = 0;}if (tp->snd_una == tp->high_seq && IsReno(tp)) {//对于reno算法,必须high_seq后的包被ACK,才能撤销Loss状态,防止假重传(RFC2582)/* Hold old state until something *above* high_seq * is ACKed. For Reno it is MUST to prevent false * fast retransmits (RFC2582). SACK TCP is safe. */tcp_moderate_cwnd(tp);return 1;}tcp_set_ca_state(sk, TCP_CA_Open);//进入Open状态;return 0;}
第二种情况:收到新的确认,尝试Loss->Open
static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp){if (tcp_may_undo(tp)) {//尝试是否可以撤销;struct sk_buff *skb;sk_stream_for_retrans_queue(skb, sk) {TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;//遍历重传队列,取消LOST标志;}clear_all_retrans_hints(tp);DBGUNDO(sk, tp, "partial loss");tp->lost_out = 0;tp->left_out = tp->sacked_out;tcp_undo_cwr(sk, 1);//撤销拥塞窗口减少;NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);inet_csk(sk)->icsk_retransmits = 0;tp->undo_marker = 0;if (!IsReno(tp))tcp_set_ca_state(sk, TCP_CA_Open);return 1;}return 0;}
//撤销拥塞窗口减少
static void tcp_undo_cwr(struct sock *sk, const int undo){struct tcp_sock *tp = tcp_sk(sk);if (tp->prior_ssthresh) {const struct inet_connection_sock *icsk = inet_csk(sk);if (icsk->icsk_ca_ops->undo_cwnd)tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);elsetp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1);if (undo && tp->prior_ssthresh > tp->snd_ssthresh) {tp->snd_ssthresh = tp->prior_ssthresh;//恢复到原来的ssthresh值;TCP_ECN_withdraw_cwr(tp);}} else {tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);}tcp_moderate_cwnd(tp);tp->snd_cwnd_stamp = tcp_time_stamp;/* There is something screwy going on with the retrans hints after an undo */clear_all_retrans_hints(tp);}
//CWR状态撤销,结束拥塞窗口减少。
static inline void tcp_complete_cwr(struct sock *sk){struct tcp_sock *tp = tcp_sk(sk);tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);//将拥塞窗口设为当前拥塞窗口与ssthresh的最小值。tp->snd_cwnd_stamp = tcp_time_stamp;tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);}//Disorder状态撤销
static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp){if (tp->undo_marker && !tp->undo_retrans) {//如果DSACK确认所有重传包DBGUNDO(sk, tp, "D-SACK");tcp_undo_cwr(sk, 1);//撤销拥塞窗口减少;tp->undo_marker = 0;//重传起始位为0;NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO);}}//Recovery状态撤销
第一种情况:high_seq被确认,Recovery->Open;
tcp_try_undo_recovery:已经分析过,不再重述;
第二种情况:如果ACK确认了新包;
static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp,int acked){/* Partial ACK arrived. Force Hoe's retransmit. */int failed = IsReno(tp) || tp->fackets_out>tp->reordering;if (tcp_may_undo(tp)) {//尝试是否可以撤销/* Plain luck! Hole if filled with delayed * packet, rather than with a retransmit. */if (tp->retrans_out == 0)tp->retrans_stamp = 0;tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);DBGUNDO(sk, tp, "Hoe");tcp_undo_cwr(sk, 0);//撤销拥塞窗口减少NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO);/* So... Do not make Hoe's retransmit yet. * If the first packet was delayed, the rest * ones are most probably delayed as well. */failed = 0;}return failed;}
- linux拥塞控制状态机
- Linux 内核网络协议栈 ------ TCP拥塞状态机 tcp_fastretrans_alert
- Linux TCP拥塞控制中undo操作
- 拥塞控制
- 拥塞控制
- 拥塞控制
- 拥塞控制
- 拥塞控制
- 拥塞控制
- 拥塞控制
- 拥塞控制
- TCP:浅析拥塞控制窗口、慢启动、拥塞避免在linux内核中的实现
- 网络拥塞控制TCP拥塞控制算法
- linux内核工程导论-网络:tcp拥塞控制
- Linux Kernel 4.9中BBR拥塞控制算法的优势
- TCP拥塞状态机的实现(上)
- TCP拥塞状态机的实现(中)
- TCP拥塞状态机的实现(下)
- linux下ipcs和ipcrm命令详解
- .net CsvHelper 2.0
- BITMAP
- dom4j基本使用_xpath基本使用_junit测试
- 开发经验_2014/11/13
- linux拥塞控制状态机
- 广州电子地图下载 来自谷歌电子地图库 地图展示15、17、19级
- 八数码(hdu 1043)
- java--生成实体类方法
- RDLC8.0 报表完美解决方案
- 我的新机器配置列表(每次换电脑或者装机都可能更新)windows版..
- 一个计算机专业学生几年的Java编程经验汇总
- 重新打开Eclipse出现java.lang.NullPointerException的问题解决
- php5.3之命名空间