9.10 TIME_WAIT定时器
来源:互联网 发布:tao淘宝网 编辑:程序博客网 时间:2024/06/05 14:59
9.10.1 Why
当socekt进入TIME_WAIT状态后,TIME_WAIT定时器启动。在超时之前,替代socket的tw sock会处理旧连接中的包,阻止其危害新连接。定时器超时后,tw sock被删除,并释放其占用的端口号。
9.10.2 When
TIME_WAIT定时器的安装由tcp_time_wait函数完成,调用tcp_time_wait函数的时机有:
(1)在TCP_FIN_WAIT2状态下socket关闭,没有用TCP_LINGER2选项将tp->linger2设置为小于0且tcp_fin_time的大小小于等于TCP_TIMEWAIT_LEN:
2059 void tcp_close(struct sock *sk, long timeout)2060 {...2183 if (sk->sk_state == TCP_FIN_WAIT2) {2184 struct tcp_sock *tp = tcp_sk(sk);2185 if (tp->linger2 < 0) {...2190 } else {2191 const int tmo = tcp_fin_time(sk);2192 2193 if (tmo > TCP_TIMEWAIT_LEN) {2194 inet_csk_reset_keepalive_timer(sk,2195 tmo - TCP_TIMEWAIT_LEN);2196 } else {2197 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);2198 goto out;2199 }2200 }...(2)TCP_FIN_WAIT2状态下收到FIN并发送ACK后:
3783 static void tcp_fin(struct sock *sk)3784 {...3818 case TCP_FIN_WAIT2:3819 /* Received a FIN -- send ACK and enter TIME_WAIT. */3820 tcp_send_ack(sk);3821 tcp_time_wait(sk, TCP_TIME_WAIT, 0);(3)孤儿socket在TCP_FIN_WAIT1状态下收到ACK时,满足:
1)没有用TCP_LINGER2选项将tp->linger2设置为小于0
2)tcp_fin_time的大小小于等于TCP_TIMEWAIT_LEN:
3)ACK中没有数据或数据全是旧的
4)ACK中没有FIN标记并且socket没有被应用进程锁定
5600 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,5601 const struct tcphdr *th, unsigned int len)5602 {...5751 case TCP_FIN_WAIT1:...5780 if (!sock_flag(sk, SOCK_DEAD))5781 /* Wake up lingering close() */5782 sk->sk_state_change(sk);5783 else {5784 int tmo;5785 5786 if (tp->linger2 < 0 ||5787 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&5788 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {5789 tcp_done(sk);5790 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);5791 return 1;5792 }5793 5794 tmo = tcp_fin_time(sk);5795 if (tmo > TCP_TIMEWAIT_LEN) {5796 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);5797 } else if (th->fin || sock_owned_by_user(sk)) {5798 /* Bad case. We could lose such FIN otherwise.5799 * It is not a big problem, but it looks confusing5800 * and not so rare event. We still can lose it now,5801 * if it spins in bh_lock_sock(), but it is really5802 * marginal case.5803 */5804 inet_csk_reset_keepalive_timer(sk, tmo);5805 } else {5806 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);5807 goto discard;5808 }...(4)TCP在TCP_CLOSING状态下收到ACK时:
5600 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,5601 const struct tcphdr *th, unsigned int len)5602 {...5813 case TCP_CLOSING:5814 if (tp->snd_una == tp->write_seq) {5815 tcp_time_wait(sk, TCP_TIME_WAIT, 0);5816 goto discard;5817 }...(5)FIN_WAIT2定时器超时时,没有用TCP_LINGER2选项将tp->linger2设置为小于0且tcp_fin_time的大小大于TCP_TIMEWAIT_LEN:
558 static void tcp_keepalive_timer (unsigned long data)559 {...578 if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {579 if (tp->linger2 >= 0) {580 const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;581 582 if (tmo > 0) {583 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);584 goto out;585 }...tcp_time_wait函数会调用inet_twsk_schedule函数安装TIME_WAIT定时器:
266 void tcp_time_wait(struct sock *sk, int state, int timeo)267 {...327 __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); //将tw sock放入ESTABLESHED hash表和bind hash表中,将sk从ESTABLISHED hash表中移除328 329 /* Get the TIME_WAIT timeout firing. */330 if (timeo < rto)331 timeo = rto;332 333 if (recycle_ok) {334 tw->tw_timeout = rto;335 } else {336 tw->tw_timeout = TCP_TIMEWAIT_LEN;337 if (state == TCP_TIME_WAIT)338 timeo = TCP_TIMEWAIT_LEN;339 }340 341 inet_twsk_schedule(tw, &tcp_death_row, timeo,342 TCP_TIMEWAIT_LEN);343 inet_twsk_put(tw);...
__inet_twsk_hashdance函数将tw_sock加入到bind hash表和ESTABLISHED表中,这样在tw_sock被删除之前相应IP|端口不允许bind,也不允许建立:
126 void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,127 struct inet_hashinfo *hashinfo)128 {129 const struct inet_sock *inet = inet_sk(sk); 130 const struct inet_connection_sock *icsk = inet_csk(sk);131 struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash);132 spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);133 struct inet_bind_hashbucket *bhead;134 /* Step 1: Put TW into bind hash. Original socket stays there too.135 Note, that any socket with inet->num != 0 MUST be bound in136 binding cache, even if it is closed.137 */138 bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,139 hashinfo->bhash_size)]; 140 spin_lock(&bhead->lock);141 tw->tw_tb = icsk->icsk_bind_hash;142 WARN_ON(!icsk->icsk_bind_hash);143 inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); //加入到bind hash表中144 spin_unlock(&bhead->lock);145 146 spin_lock(lock);...153 inet_twsk_add_node_rcu(tw, &ehead->twchain); //加入到ESBABLISHED hash表中154 155 /* Step 3: Remove SK from established hash. */156 if (__sk_nulls_del_node_init_rcu(sk))157 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);...167 atomic_add(1 + 1 + 1, &tw->tw_refcnt);168 169 spin_unlock(lock);170 }这样,在应用进程使用bind系统调用绑定与tw_sock相同的IP|端口对时内核会用到inet_csk_bind_conflict函数,但由于成功匹配到bind hash表中的tw_sock,会导致冲突,无法bind(详见2.2 Bind系统调用)。而在建立连接时,inet_hash_connect函数会调用__inet_check_established检查即将建立的连接是否与已建立的连接冲突:
311 static int __inet_check_established(struct inet_timewait_death_row *death_row,312 struct sock *sk, __u16 lport,313 struct inet_timewait_sock **twp)314 {...335 sk_nulls_for_each(sk2, node, &head->twchain) {336 if (sk2->sk_hash != hash)337 continue;338 339 if (likely(INET_TW_MATCH(sk2, net, acookie,340 saddr, daddr, ports, dif))) { //地址|端口匹配341 tw = inet_twsk(sk2);342 if (twsk_unique(sk, sk2, twp)) //调用tcp_twsk_unique判断是否冲突343 goto unique; //不冲突344 else345 goto not_unique; //冲突346 }347 }348 tw = NULL;...359 unique:...376 if (twp) {377 *twp = tw; //交给调用者处理378 } else if (tw) {379 /* Silly. Should hash-dance instead... */380 inet_twsk_deschedule(tw, death_row);381 382 inet_twsk_put(tw);383 }384 return 0;385 386 not_unique:387 spin_unlock(lock);388 return -EADDRNOTAVAIL;389 }tcp_twsk_unique函数
109 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) 110 { 111 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); 112 struct tcp_sock *tp = tcp_sk(sk);... 125 if (tcptw->tw_ts_recent_stamp && //开启时间戳选项且在TIME_WAIT状态下收到过包 126 (twp == NULL || (sysctl_tcp_tw_reuse && 127 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { 128 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; 129 if (tp->write_seq == 0) 130 tp->write_seq = 1; 131 tp->rx_opt.ts_recent = tcptw->tw_ts_recent; 132 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; 133 sock_hold(sktw); 134 return 1; 135 } 136 137 return 0; 138 }可见,当:
(1)__inet_check_established函数的调用者不需要返回tw_sock的时候(即twp == NULL为真),或
(2)应用进程设置了net.ipv4.tcp_tw_reuse内核选项允许tw_sock重用时,
tcp_twsk_unique函数会返回1,即不冲突。不冲突时如果是(1),则__inet_check_established函数会释放tw_sock;否则会将tw_sock返回给调用者inet_hash_connect函数处理。在不冲突时,情况(1)发生时到底意味着什么?情况(1)没有发生时inet_hash_connect函数用tw_sock干什么?来看代码:
589 int inet_hash_connect(struct inet_timewait_death_row *death_row,590 struct sock *sk)591 {592 return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk),593 __inet_check_established, __inet_hash_nolisten);594 }看来__inet_check_established函数的使用者是__inet_hash_connect函数:
477 int __inet_hash_connect(struct inet_timewait_death_row *death_row,478 struct sock *sk, u32 port_offset,479 int (*check_established)(struct inet_timewait_death_row *,480 struct sock *, __u16, struct inet_timewait_sock **),481 int (*hash)(struct sock *sk, struct inet_timewait_sock *twp))482 {...491 if (!snum) {...520 if (!check_established(death_row, sk,521 port, &tw))522 goto ok;...544 ok:545 hint += i;546 547 /* Head lock still held and bh's disabled */548 inet_bind_hash(sk, tb, port);549 if (sk_unhashed(sk)) {550 inet_sk(sk)->inet_sport = htons(port);551 twrefcnt += hash(sk, tw); //将sk加入到ESTABLISHED hash表中,将tw_sock从这个表中摘出552 }553 if (tw)554 twrefcnt += inet_twsk_bind_unhash(tw, hinfo); //将tw_sock从bind hash表中摘出555 spin_unlock(&head->lock);556 557 if (tw) {558 inet_twsk_deschedule(tw, death_row); //释放tw_sock559 while (twrefcnt) {560 twrefcnt--;561 inet_twsk_put(tw);562 }563 }564 565 ret = 0;566 goto out;567 }568 569 head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)];570 tb = inet_csk(sk)->icsk_bind_hash;571 spin_lock_bh(&head->lock);572 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { //绑定到这个IP|port对的只有这一个socket573 hash(sk, NULL);574 spin_unlock_bh(&head->lock);575 return 0;576 } else {577 spin_unlock(&head->lock);578 /* No definite answer... Walk to established hash table */579 ret = check_established(death_row, sk, snum, NULL);580 out:581 local_bh_enable();582 return ret;583 }584 }
要绑定的端口非0情况(1)才会发生,这时意味着应用进程在调用connect系统调用之前已经成功地使用了bind系统调用,既然bind时不冲突,那么在connect时直接将tw_sock释放即可。而情况(1)没有发生时,tw_sock也会被释放并从hash表中摘出。
tcp_death_row的定义为:
35 struct inet_timewait_death_row tcp_death_row = { 36 .sysctl_max_tw_buckets = NR_FILE * 2, 37 .period = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS, 38 .death_lock = __SPIN_LOCK_UNLOCKED(tcp_death_row.death_lock), 39 .hashinfo = &tcp_hashinfo, 40 .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, 41 (unsigned long)&tcp_death_row), 42 .twkill_work = __WORK_INITIALIZER(tcp_death_row.twkill_work, 43 inet_twdr_twkill_work), 44 /* Short-time timewait calendar */ 45 46 .twcal_hand = -1, 47 .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, 48 (unsigned long)&tcp_death_row), 49 };inet_twsk_schedule函数:
340 void inet_twsk_schedule(struct inet_timewait_sock *tw,341 struct inet_timewait_death_row *twdr,342 const int timeo, const int timewait_len)343 { 344 struct hlist_head *list;345 int slot;346 ... //计算tw sock加入到time_wait定时器链表中的位置,slot越大则超时时间越长371 slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK;372 373 spin_lock(&twdr->death_lock);374375 /* Unlink it, if it was scheduled */376 if (inet_twsk_del_dead_node(tw))//已经在time_wait定时器链表中了,则摘除377 twdr->tw_count--;378 else379 atomic_inc(&tw->tw_refcnt);380381 if (slot >= INET_TWDR_RECYCLE_SLOTS) { //超时时间过长,使用慢速定时器382 /* Schedule to slow timer */383 if (timeo >= timewait_len) {384 slot = INET_TWDR_TWKILL_SLOTS - 1;385 } else {386 slot = DIV_ROUND_UP(timeo, twdr->period);387 if (slot >= INET_TWDR_TWKILL_SLOTS)388 slot = INET_TWDR_TWKILL_SLOTS - 1;389 }390 tw->tw_ttd = jiffies + timeo;391 slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1);392 list = &twdr->cells[slot]; //添加tw_sock到twdr->cells中393 } else { //超时时间短的都放入再生定时器中394 tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK);395396 if (twdr->twcal_hand < 0) { //再生定时器未设置或已经超时397 twdr->twcal_hand = 0;398 twdr->twcal_jiffie = jiffies; //记录初次设置定时器的时间399 twdr->twcal_timer.expires = twdr->twcal_jiffie +400 (slot << INET_TWDR_RECYCLE_TICK);401 add_timer(&twdr->twcal_timer);//设置再生定时器402 } else {403 if (time_after(twdr->twcal_timer.expires,404 jiffies + (slot << INET_TWDR_RECYCLE_TICK))) //再生定时器未超时405 mod_timer(&twdr->twcal_timer,406 jiffies + (slot << INET_TWDR_RECYCLE_TICK));//设置再生超时定时器407 slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1);408 }409 list = &twdr->twcal_row[slot]; //添加tw_sock到twdr->twcal_row中410 }411412 hlist_add_head(&tw->tw_death_node, list);//加入到time_wait定时器链表中413414 if (twdr->tw_count++ == 0)//加入之前time_wait定时器链表中没有成员415 mod_timer(&twdr->tw_timer, jiffies + twdr->period); //设置慢速定时器416 spin_unlock(&twdr->death_lock);417 }
371:按照超时时间长短划分slot:0 jiffies为slot 0,1-2^INET_TWDR_RECYCLE_TICK jiffies为slot 1,2^INET_TWDR_RECYCLE_TICK + 1 -2^(INET_TWDR_RECYCLE_TICK + 1)为slot 2...每个slot 的时间长度是2^INET_TWDR_RECYCLE_TICK个jiffies。
386:按照超时时间长短划分slot,每个slot的时间长度是twdr->period。
可见TIME_WAIT定时器包含2个定时器结构:twcal_timer和tw_timer。其中twcal_timer的超时时间较短,被称为“再生定时器”。
tw_timer的超时时间是TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS(即7.5s),删除的条件有:
(1)在应用进程使用connect系统调用绑定IP|端口时匹配到tw_sock,但判定不冲突时(详见__inet_twsk_hashdance函数相关分析);
(2)调用inet_twsk_deschedule删除一个tw_sock,如果tw队列中没有成员,则禁用tw_timer:
326 void inet_twsk_deschedule(struct inet_timewait_sock *tw,327 struct inet_timewait_death_row *twdr)328 { 329 spin_lock(&twdr->death_lock);330 if (inet_twsk_del_dead_node(tw)) {331 inet_twsk_put(tw);332 if (--twdr->tw_count == 0) //tw队列为空333 del_timer(&twdr->tw_timer); //删除tw_timer 334 } 335 spin_unlock(&twdr->death_lock);336 __inet_twsk_kill(tw, twdr->hashinfo);337 }
__inet_twsk_kill会将tw_sock从bind hash表和ESTABLISHED hash表中删除:
70 static void __inet_twsk_kill(struct inet_timewait_sock *tw, 71 struct inet_hashinfo *hashinfo) 72 { 73 struct inet_bind_hashbucket *bhead; 74 int refcnt; 75 /* Unlink from established hashes. */ 76 spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); 77 78 spin_lock(lock); 79 refcnt = inet_twsk_unhash(tw); //从ESTABLISHED hash表中删除 80 spin_unlock(lock); 81 82 /* Disassociate with bind bucket. */ 83 bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, 84 hashinfo->bhash_size)]; 85 86 spin_lock(&bhead->lock); 87 refcnt += inet_twsk_bind_unhash(tw, hashinfo); //从bind hash表中删除 88 spin_unlock(&bhead->lock); 89 90 #ifdef SOCK_REFCNT_DEBUG 91 if (atomic_read(&tw->tw_refcnt) != 1) { 92 pr_debug("%s timewait_sock %p refcnt=%d\n", 93 tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); 94 } 95 #endif 96 while (refcnt) { 97 inet_twsk_put(tw); 98 refcnt--; 99 }100 }
(3)twcal_timer超时时调用inet_twdr_twcal_tick删除tw_sock,如果tw队列中没有成员,则禁用tw_timer.
再生定时器不会被删除,其超时时间为slot * 2^INET_TWDR_RECYCLE_TICK。INET_TWDR_RECYCLE_TICK的定义如下:
41 #if HZ <= 16 || HZ > 4096 42 # error Unsupported: HZ <= 16 or HZ > 4096 43 #elif HZ <= 32 44 # define INET_TWDR_RECYCLE_TICK (5 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) 45 #elif HZ <= 64 46 # define INET_TWDR_RECYCLE_TICK (6 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) 47 #elif HZ <= 128 48 # define INET_TWDR_RECYCLE_TICK (7 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) 49 #elif HZ <= 256 50 # define INET_TWDR_RECYCLE_TICK (8 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) 51 #elif HZ <= 512 52 # define INET_TWDR_RECYCLE_TICK (9 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) 53 #elif HZ <= 1024 54 # define INET_TWDR_RECYCLE_TICK (10 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) 55 #elif HZ <= 2048 56 # define INET_TWDR_RECYCLE_TICK (11 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) 57 #else 58 # define INET_TWDR_RECYCLE_TICK (12 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) 59 #endif
如果jiffies每1ms加1,则INET_TWDR_RECYCLE_TICK的值为7;如果timo的值为60s(通常是最大值),则slot的值为469,那么再生定时器的最大超时时间为60s.如果1ms <= timeo <= 128ms,则slot = 1,再生定时器的最小超时时间为127ms.
9.10.3 What
twcal_timer对应的超时函数是inet_twdr_twcal_tick:
420 void inet_twdr_twcal_tick(unsigned long data)421 {422 struct inet_timewait_death_row *twdr;423 int n, slot;424 unsigned long j;425 unsigned long now = jiffies;426 int killed = 0;427 int adv = 0;428429 twdr = (struct inet_timewait_death_row *)data;430431 spin_lock(&twdr->death_lock);432 if (twdr->twcal_hand < 0)//再生超时定时器未设置或已经超时433 goto out;434435 slot = twdr->twcal_hand;436 j = twdr->twcal_jiffie; //获取初次设置定时器的时间437438 for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) { //遍历所有时隙439 if (time_before_eq(j, now)) { //已经超时440 struct hlist_node *safe;441 struct inet_timewait_sock *tw;442443 inet_twsk_for_each_inmate_safe(tw, safe,444 &twdr->twcal_row[slot]) { //遍历一个时隙中的所有节点 445 __inet_twsk_del_dead_node(tw); //删除定时节点446 __inet_twsk_kill(tw, twdr->hashinfo);//将tw sock移出TCP ESTABLISH hash表...450 inet_twsk_put(tw); 451 killed++; //记录已删除的节点的数量452 }453 } else {//尚未超时454 if (!adv) { 455 adv = 1; 456 twdr->twcal_jiffie = j; //更新尚未超时的时间起点 457 twdr->twcal_hand = slot; //更新尚未超时的时隙起点 458 }459460 if (!hlist_empty(&twdr->twcal_row[slot])) {461 mod_timer(&twdr->twcal_timer, j);462 goto out;463 }464 }465 j += 1 << INET_TWDR_RECYCLE_TICK;466 slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1); //进入下一个时隙467 }468 twdr->twcal_hand = -1; //标记再生定时器已经超时469470 out:471 if ((twdr->tw_count -= killed) == 0)472 del_timer(&twdr->tw_timer);473 #ifndef CONFIG_NET_NS474 NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITKILLED, killed);475 #endif476 spin_unlock(&twdr->death_lock);477 }439-451:再生定时器会将所有落入相同时隙(slot)的节点做同样的对待,它的基本动作是超时则删除,否则再次设置再生定时器
慢速定时器tw_timer对应的超时函数是inet_twdr_hangman:
262 void inet_twdr_hangman(unsigned long data)263 {264 struct inet_timewait_death_row *twdr;265 unsigned int need_timer;266267 twdr = (struct inet_timewait_death_row *)data;268 spin_lock(&twdr->death_lock);269270 if (twdr->tw_count == 0) //没有tw_sock271 goto out;272273 need_timer = 0;274 if (inet_twdr_do_twkill_work(twdr, twdr->slot)) { //删除慢速定时器链表中的节点及其对应的tw_sock275 twdr->thread_slots |= (1 << twdr->slot); //将当前slot的值标记下来276 schedule_work(&twdr->twkill_work); //若杀死了过多的tw_sock,则将没有删除完毕则将任务放入工作者队列中由工作者进程完成277 need_timer = 1;278 } else { //没有杀死过多的tw_sock279 /* We purged the entire slot, anything left? */280 if (twdr->tw_count) //还有tw_sock281 need_timer = 1; //还要继续设置tw_timer282 twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1)); //进入下一个slot283 }284 if (need_timer)285 mod_timer(&twdr->tw_timer, jiffies + twdr->period);286 out:287 spin_unlock(&twdr->death_lock);288inet_twdr_hangman每次超时只处理一个slot,然后再设置tw_timer在经过twdr->period的时间后再超时处理下一个slot。由于相邻slot的超时时间差正好是一个twdr->period,故所有slot都能得到及时的处理。
inet_twdr_do_twkill_work函数删除慢速定时器链表中的节点及其对应的tw_sock:
215 static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr,216 const int slot)217 {218 struct inet_timewait_sock *tw;219 unsigned int killed;220 int ret;221222 /* NOTE: compare this to previous version where lock223 * was released after detaching chain. It was racy,224 * because tw buckets are scheduled in not serialized context225 * in 2.3 (with netfilter), and with softnet it is common, because226 * soft irqs are not sequenced.227 */228 killed = 0;229 ret = 0;230 rescan:231 inet_twsk_for_each_inmate(tw, &twdr->cells[slot]) { //遍历慢速超时队列232 __inet_twsk_del_dead_node(tw);233 spin_unlock(&twdr->death_lock);234 __inet_twsk_kill(tw, twdr->hashinfo);235 #ifdef CONFIG_NET_NS236 NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED);237 #endif238 inet_twsk_put(tw);239 killed++; 240 spin_lock(&twdr->death_lock);241 if (killed > INET_TWDR_TWKILL_QUOTA) { //杀戮过重242 ret = 1;243 break;244 }245 246 /* While we dropped twdr->death_lock, another cpu may have247 * killed off the next TW bucket in the list, therefore248 * do a fresh re-read of the hlist head node with the249 * lock reacquired. We still use the hlist traversal250 * macro in order to get the prefetches.251 */252 goto rescan;253 }254255 twdr->tw_count -= killed;256 #ifndef CONFIG_NET_NS257 NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITED, killed);258 #endif259 return ret;260 }inet_twdr_twkill_work函数是twdr->twkill_work对应的工作者线程处理函数,用于将inet_twdr_do_twkill_work函数未完成的屠杀进行到底:
291 void inet_twdr_twkill_work(struct work_struct *work)292 { 293 struct inet_timewait_death_row *twdr =294 container_of(work, struct inet_timewait_death_row, twkill_work);295 int i;296 297 BUILD_BUG_ON((INET_TWDR_TWKILL_SLOTS - 1) >298 (sizeof(twdr->thread_slots) * 8));299 300 while (twdr->thread_slots) {301 spin_lock_bh(&twdr->death_lock);302 for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) {303 if (!(twdr->thread_slots & (1 << i))) //slot i不需要处理304 continue;305306 while (inet_twdr_do_twkill_work(twdr, i) != 0) { //循环一直到杀光为止307 if (need_resched()) {308 spin_unlock_bh(&twdr->death_lock);309 schedule();310 spin_lock_bh(&twdr->death_lock);311 }312 }313 314 twdr->thread_slots &= ~(1 << i); //已经杀光此slot了315 }316 spin_unlock_bh(&twdr->death_lock);317 }318 }问题:慢速定时器超时时如果释放的tw_sock超出限制为什么要将任务转移到工作者线程中完成呢?
答案(个人理解):Linux定时器是在软中断上下文执行,如果运行时间过长会导致当前CPU的其它任务无法执行,有违公平性。而工作者线程的优先级较低,运行的时间长一点也没关系。
- 9.10 TIME_WAIT定时器
- TIME_WAIT
- TIME_WAIT
- TIME_WAIT
- TIME_WAIT
- TIME_WAIT
- TIME_WAIT
- TIME_WAIT
- Time_Wait
- time_wait
- TIME_WAIT
- TIME_WAIT
- TIME_WAIT 状态
- 关于TIME_WAIT
- TIME_WAIT是什么?
- TIME_WAIT状态
- TIME_WAIT解决方法
- TIME_WAIT状态
- 直方图均衡
- 黑马程序员 异常处理的学习笔记
- 探究Android SQLite3多线程
- OpenGL--混合
- hdu 1253 胜利大逃亡
- 9.10 TIME_WAIT定时器
- 第六周项目六:复数模板类(2)
- github 多人协作
- Android 自定义实现switch开关按钮
- UITableView
- hdu 2722 Here We Go(relians) Again(恶心读入+最短路floyd)
- Python optionParser模块的使用方法
- python中read() readline()以及readlines()用法
- 自定义输出流状态