TCP三步握手建立连接(2)-----被动连接方发送SYN/ACK
来源:互联网 发布:linux vncserver 编辑:程序博客网 时间:2024/05/13 07:57
本过程分析的基础建立在本地TCP已经调用了listen进入了监听状态,至于数据包如何进入tcp这里暂且不表。
tcp数据包的入口函数tcp_v4_rcv,该函数在检验数据包的正确性后,找到对应的INET SOCKET,对于SYN包,
找到的是对应的Listen状态的sock。后面的处理无论是数据包进入backlog,还是prequeue,最后都会进入
tcp_v4_do_rcv函数进行处理。
- /* The socket must have it's spinlock held when we get
- * here.
- *
- * We have a potential double-lock case here, so even when
- * doing backlog processing we use the BH locking scheme.
- * This is because we cannot sleep with the original spinlock
- * held.
- */
- int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
- {
- struct sock *rsk;
- #ifdef CONFIG_TCP_MD5SIG
- /*
- * We really want to reject the packet as early as possible
- * if:
- * o We're expecting an MD5'd packet and this is no MD5 tcp option
- * o There is an MD5 option and we're not expecting one
- */
- if (tcp_v4_inbound_md5_hash(sk, skb))
- goto discard;
- #endif
- /* 该路径进入连接建立后的处理 */
- if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
- TCP_CHECK_TIMER(sk);
- if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
- rsk = sk;
- goto reset;
- }
- TCP_CHECK_TIMER(sk);
- return 0;
- }
- if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
- goto csum_err;
- /* SYN 和SYN/ACK都进入此路径 */
- if (sk->sk_state == TCP_LISTEN) {
- struct sock *nsk = tcp_v4_hnd_req(sk, skb);
- if (!nsk)
- goto discard;
- if (nsk != sk) {
- if (tcp_child_process(sk, nsk, skb)) {
- rsk = nsk;
- goto reset;
- }
- return 0;
- }
- }
- TCP_CHECK_TIMER(sk);
- if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
- rsk = sk;
- goto reset;
- }
- TCP_CHECK_TIMER(sk);
- return 0;
- reset:
- tcp_v4_send_reset(rsk, skb);
- discard:
- kfree_skb(skb);
- /* Be careful here. If this function gets more complicated and
- * gcc suffers from register pressure on the x86, sk (in %ebx)
- * might be destroyed here. This current version compiles correctly,
- * but you have been warned.
- */
- return 0;
- csum_err:
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
- goto discard;
- }
现在进入tcp_v4_hnd_req函数,该函数处理listen sock的连接请求。
- static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
- {
- struct tcphdr *th = tcp_hdr(skb);
- const struct iphdr *iph = ip_hdr(skb);
- struct sock *nsk;
- struct request_sock **prev;
- /* Find possible connection requests. */
- struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
- iph->saddr, iph->daddr);
- if (req)
- return tcp_check_req(sk, skb, req, prev);
- nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
- th->source, iph->daddr, th->dest, inet_iif(skb));
- if (nsk) {
- if (nsk->sk_state != TCP_TIME_WAIT) {
- bh_lock_sock(nsk);
- return nsk;
- }
- inet_twsk_put(inet_twsk(nsk));
- return NULL;
- }
- #ifdef CONFIG_SYN_COOKIES
- if (!th->rst && !th->syn && th->ack)
- sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
- #endif
- return sk;
- }
很显然,由于是第一次连接,所以该sock没有此次请求的信息,所以该函数返回参数中sk。
然后在tcp_v4_do_rcv中进入tcp_rcv_state_process。
tcp_rcv_state_process函数是个状态机。对该函数我们分开来说,对于此次连接进入下面的case语句中。
- case TCP_LISTEN:
- /* Listen 下受到的是个ACK包,重置该连接 */
- if (th->ack)
- return 1;
- if (th->rst)
- goto discard;
- /* 进入新的连接请求的处理 */
- if (th->syn) {
- if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
- return 1;
- /* Now we have several options: In theory there is
- * nothing else in the frame. KA9Q has an option to
- * send data with the syn, BSD accepts data with the
- * syn up to the [to be] advertised window and
- * Solaris 2.1 gives you a protocol error. For now
- * we just ignore it, that fits the spec precisely
- * and avoids incompatibilities. It would be nice in
- * future to drop through and process the data.
- * 丢弃SYN包中存在的数据,不过有的协议中支持SYN包就发送数据
- * Now that TTCP is starting to be used we ought to
- * queue this data.
- * But, this leaves one open to an easy denial of
- * service attack, and SYN cookies can't defend
- * against this problem. So, we drop the data
- * in the interest of security over speed unless
- * it's still in use.
- */
- kfree_skb(skb);
- return 0;
- }
- goto discard;
icsk_af_ops是有连接类型的SOCKET的特点操作符,在tcp连接时对于的是
- struct inet_connection_sock_af_ops ipv4_specific = {
- .queue_xmit = ip_queue_xmit,
- .send_check = tcp_v4_send_check,
- .rebuild_header = inet_sk_rebuild_header,
- .conn_request = tcp_v4_conn_request,
- .syn_recv_sock = tcp_v4_syn_recv_sock,
- .remember_stamp = tcp_v4_remember_stamp,
- .net_header_len = sizeof(struct iphdr),
- .setsockopt = ip_setsockopt,
- .getsockopt = ip_getsockopt,
- .addr2sockaddr = inet_csk_addr2sockaddr,
- .sockaddr_len = sizeof(struct sockaddr_in),
- .bind_conflict = inet_csk_bind_conflict,
- #ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_ip_setsockopt,
- .compat_getsockopt = compat_ip_getsockopt,
- #endif
- };
所以对应的函数是tcp_v4_conn_request
- int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
- {
- struct inet_request_sock *ireq;
- struct tcp_options_received tmp_opt;
- struct request_sock *req;
- __be32 saddr = ip_hdr(skb)->saddr;
- __be32 daddr = ip_hdr(skb)->daddr;
- __u32 isn = TCP_SKB_CB(skb)->when;
- struct dst_entry *dst = NULL;
- #ifdef CONFIG_SYN_COOKIES
- int want_cookie = 0;
- #else
- #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
- #endif
- /* Never answer to SYNs send to broadcast or multicast */
- if (skb->rtable->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
- goto drop;
- /* TW buckets are converted to open requests without
- * limitations, they conserve resources and peer is
- * evidently real one.
- */
- if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
- #ifdef CONFIG_SYN_COOKIES
- if (sysctl_tcp_syncookies) {
- want_cookie = 1;
- } else
- #endif
- goto drop;
- }
- /* Accept backlog is full. If we have already queued enough
- * of warm entries in syn queue, drop request. It is better than
- * clogging syn queue with openreqs with exponentially increasing
- * timeout.
- */
- if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
- goto drop;
- /* 分配一个request_sock*/
- req = inet_reqsk_alloc(&tcp_request_sock_ops);
- if (!req)
- goto drop;
- #ifdef CONFIG_TCP_MD5SIG
- tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
- #endif
- tcp_clear_options(&tmp_opt);
- tmp_opt.mss_clamp = 536;
- tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss;
- /* 解析TCP选项 */
- tcp_parse_options(skb, &tmp_opt, 0);
- if (want_cookie && !tmp_opt.saw_tstamp)
- tcp_clear_options(&tmp_opt);
- if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
- /* Some OSes (unknown ones, but I see them on web server, which
- * contains information interesting only for windows'
- * users) do not send their stamp in SYN. It is easy case.
- * We simply do not advertise TS support.
- */
- tmp_opt.saw_tstamp = 0;
- tmp_opt.tstamp_ok = 0;
- }
- tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
- /* 保存相关连接信息 */
- tcp_openreq_init(req, &tmp_opt, skb);
- ireq = inet_rsk(req);
- ireq->loc_addr = daddr;
- ireq->rmt_addr = saddr;
- ireq->no_srccheck = inet_sk(sk)->transparent;
- ireq->opt = tcp_v4_save_options(sk, skb);
- if (security_inet_conn_request(sk, skb, req))
- goto drop_and_free;
- if (!want_cookie)
- TCP_ECN_create_request(req, tcp_hdr(skb));
- if (want_cookie) {
- #ifdef CONFIG_SYN_COOKIES
- syn_flood_warning(skb);
- req->cookie_ts = tmp_opt.tstamp_ok;
- #endif
- isn = cookie_v4_init_sequence(sk, skb, &req->mss);
- } else if (!isn) {
- struct inet_peer *peer = NULL;
- /* VJ's idea. We save last timestamp seen
- * from the destination in peer table, when entering
- * state TIME-WAIT, and check against it before
- * accepting new connection request.
- *
- * If "isn" is not zero, this request hit alive
- * timewait bucket, so that all the necessary checks
- * are made in the function processing timewait state.
- */
- if (tmp_opt.saw_tstamp &&
- tcp_death_row.sysctl_tw_recycle &&
- (dst = inet_csk_route_req(sk, req)) != NULL &&
- (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
- peer->v4daddr == saddr) {
- if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
- (s32)(peer->tcp_ts - req->ts_recent) >
- TCP_PAWS_WINDOW) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
- goto drop_and_release;
- }
- }
- /* Kill the following clause, if you dislike this way. */
- else if (!sysctl_tcp_syncookies &&
- (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
- (sysctl_max_syn_backlog >> 2)) &&
- (!peer || !peer->tcp_ts_stamp) &&
- (!dst || !dst_metric(dst, RTAX_RTT))) {
- /* Without syncookies last quarter of
- * backlog is filled with destinations,
- * proven to be alive.
- * It means that we continue to communicate
- * to destinations, already remembered
- * to the moment of synflood.
- */
- LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u/n",
- &saddr, ntohs(tcp_hdr(skb)->source));
- goto drop_and_release;
- }
- isn = tcp_v4_init_sequence(skb);
- }
- tcp_rsk(req)->snt_isn = isn;
- /* 发送SYNACK包,如果设置SYN_COOKIES,就直接返回,并释放open_request */
- if (__tcp_v4_send_synack(sk, req, dst) || want_cookie)
- goto drop_and_free;
- inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
- return 0;
- drop_and_release:
- dst_release(dst);
- drop_and_free:
- reqsk_free(req);
- drop:
- return 0;
- }
linux中的做法是监听方对于新建的连接在3步握手完成前不建立对于的INET SOCKET,而是建立一个request_sock结构。该结构保存了
连接双方的相关信息,相互的结构关系如下图
另外tcp中对synflood的应当机制也是在这里实现的,如果系统设置了syn_cookies的话。对于没有启用syn_cookies时,这该函数中会对应建立
一个request_sock结构,并加入到hash表中;否则,该函数并不建立这样一个结构,而是运用syn_cookies机制产生一个isn,然后发送了syn/ack
包后就直接返回,当下次client发送ACK包过来时,在tcp_v4_hnd_req函数中调用cookie_v4_check,该函数在验证通过后建立INET socket结构。
主动连接方收到SYNACK包后,也进入tcp_rcv_state_process,不过它有自己对应的INET SOCKET,并且处于SYN_SENT状态
- case TCP_SYN_SENT:
- queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
- if (queued >= 0)
- return queued;
- /* Do step6 onward by hand. */
- tcp_urg(sk, skb, th);
- __kfree_skb(skb);
- tcp_data_snd_check(sk);
- return 0;
tcp_rcv_synsent_state_process函数处理在SYN_SENT状态的SOCKET。
- static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
- struct tcphdr *th, unsigned len)
- {
- struct tcp_sock *tp = tcp_sk(sk);
- struct inet_connection_sock *icsk = inet_csk(sk);
- int saved_clamp = tp->rx_opt.mss_clamp;
- tcp_parse_options(skb, &tp->rx_opt, 0);
- if (th->ack) {
- /* rfc793:
- * "If the state is SYN-SENT then
- * first check the ACK bit
- * If the ACK bit is set
- * If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send
- * a reset (unless the RST bit is set, if so drop
- * the segment and return)"
- *
- * We do not send data with SYN, so that RFC-correct
- * test reduces to:
- */
- if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
- goto reset_and_undo;
- /*如果时戳存在,则坚持时戳是否合法,retrans_stamp记录上次重传时间*/
- if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
- !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
- tcp_time_stamp)) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED);
- goto reset_and_undo;
- }
- /* Now ACK is acceptable.
- *
- * "If the RST bit is set
- * If the ACK was acceptable then signal the user "error:
- * connection reset", drop the segment, enter CLOSED state,
- * delete TCB, and return."
- */
- if (th->rst) {
- tcp_reset(sk);
- goto discard;
- }
- /* rfc793:
- * "fifth, if neither of the SYN or RST bits is set then
- * drop the segment and return."
- *
- * See note below!
- * --ANK(990513)
- */
- if (!th->syn)
- goto discard_and_undo;
- /* rfc793:
- * "If the SYN bit is on ...
- * are acceptable then ...
- * (our SYN has been ACKed), change the connection
- * state to ESTABLISHED..."
- */
- TCP_ECN_rcv_synack(tp, th);
- /* snd_wl1记录对方窗口更新时的对方数据包序号*/
- tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
- /* 处理ACK,稍后详细讲解,注意最后一个参数是FLAG_SLOWPATH */
- tcp_ack(sk, skb, FLAG_SLOWPATH);
- /* Ok.. it's good. Set up sequence numbers and
- * move to established.
- */
- /* rcv_wup是窗口更新时记录的当前rcv_nxt */
- tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
- tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
- /* RFC1323: The window in SYN & SYN/ACK segments is
- * never scaled.
- */
- tp->snd_wnd = ntohs(th->window);
- tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq);
- /* 对方没有窗口扩放,则设置相关变量*/
- if (!tp->rx_opt.wscale_ok) {
- tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
- tp->window_clamp = min(tp->window_clamp, 65535U);
- }
- /* 时戳协商一致,以后每次发送TCP包都要设置时戳选项*/
- if (tp->rx_opt.saw_tstamp) {
- tp->rx_opt.tstamp_ok = 1;
- tp->tcp_header_len =
- sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
- tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
- tcp_store_ts_recent(tp);
- } else {
- tp->tcp_header_len = sizeof(struct tcphdr);
- }
- /* FACK 是在SACK基础上更好解决recovery算法 */
- if (tcp_is_sack(tp) && sysctl_tcp_fack)
- tcp_enable_fack(tp);
- /* 为数据传输准备好MTU,MSS */
- tcp_mtup_init(sk);
- tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
- tcp_initialize_rcv_mss(sk);
- /* Remember, tcp_poll() does not lock socket!
- * Change state from SYN-SENT only after copied_seq
- * is initialized. */
- tp->copied_seq = tp->rcv_nxt;
- smp_mb();
- tcp_set_state(sk, TCP_ESTABLISHED);
- security_inet_conn_established(sk, skb);
- /* Make sure socket is routed, for correct metrics. */
- icsk->icsk_af_ops->rebuild_header(sk);
- tcp_init_metrics(sk);
- tcp_init_congestion_control(sk);
- /* Prevent spurious tcp_cwnd_restart() on first data
- * packet.
- */
- tp->lsndtime = tcp_time_stamp;
- tcp_init_buffer_space(sk);
- if (sock_flag(sk, SOCK_KEEPOPEN))
- inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
- /* 如果没有窗口扩放选项,则为头部预测做准备*/
- if (!tp->rx_opt.snd_wscale)
- __tcp_fast_path_on(tp, tp->snd_wnd);
- else
- tp->pred_flags = 0;
- if (!sock_flag(sk, SOCK_DEAD)) {
- sk->sk_state_change(sk);
- sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
- }
- /* 握手第3步的ACK,如果有数据要发送,则随数据发出,否则马上发ACK*/
- if (sk->sk_write_pending ||
- icsk->icsk_accept_queue.rskq_defer_accept ||
- icsk->icsk_ack.pingpong) {
- /* Save one ACK. Data will be ready after
- * several ticks, if write_pending is set.
- *
- * It may be deleted, but with this feature tcpdumps
- * look so _wonderfully_ clever, that I was not able
- * to stand against the temptation 8) --ANK
- */
- inet_csk_schedule_ack(sk);
- icsk->icsk_ack.lrcvtime = tcp_time_stamp;
- icsk->icsk_ack.ato = TCP_ATO_MIN;
- tcp_incr_quickack(sk);
- tcp_enter_quickack_mode(sk);
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
- TCP_DELACK_MAX, TCP_RTO_MAX);
- discard:
- __kfree_skb(skb);
- return 0;
- } else {
- tcp_send_ack(sk);
- }
- return -1;
- }
- /* No ACK in the segment */
- if (th->rst) {
- /* rfc793:
- * "If the RST bit is set
- *
- * Otherwise (no ACK) drop the segment and return."
- */
- goto discard_and_undo;
- }
- /* PAWS check. */
- if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
- tcp_paws_check(&tp->rx_opt, 0))
- goto discard_and_undo;
- /* 同时打开,回复SYNACK报文*/
- if (th->syn) {
- /* We see SYN without ACK. It is attempt of
- * simultaneous connect with crossed SYNs.
- * Particularly, it can be connect to self.
- */
- tcp_set_state(sk, TCP_SYN_RECV);
- if (tp->rx_opt.saw_tstamp) {
- tp->rx_opt.tstamp_ok = 1;
- tcp_store_ts_recent(tp);
- tp->tcp_header_len =
- sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
- } else {
- tp->tcp_header_len = sizeof(struct tcphdr);
- }
- tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
- tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
- /* RFC1323: The window in SYN & SYN/ACK segments is
- * never scaled.
- */
- tp->snd_wnd = ntohs(th->window);
- tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
- tp->max_window = tp->snd_wnd;
- TCP_ECN_rcv_syn(tp, th);
- tcp_mtup_init(sk);
- tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
- tcp_initialize_rcv_mss(sk);
- tcp_send_synack(sk);
- #if 0
- /* Note, we could accept data and URG from this segment.
- * There are no obstacles to make this.
- *
- * However, if we ignore data in ACKless segments sometimes,
- * we have no reasons to accept it sometimes.
- * Also, seems the code doing it in step6 of tcp_rcv_state_process
- * is not flawless. So, discard packet for sanity.
- * Uncomment this return to process the data.
- */
- return -1;
- #else
- goto discard;
- #endif
- }
- /* "fifth, if neither of the SYN or RST bits is set then
- * drop the segment and return."
- */
- discard_and_undo:
- tcp_clear_options(&tp->rx_opt);
- tp->rx_opt.mss_clamp = saved_clamp;
- goto discard;
- reset_and_undo:
- tcp_clear_options(&tp->rx_opt);
- tp->rx_opt.mss_clamp = saved_clamp;
- return 1;
- }
接下来继续第3步,也就是上面的主动连接方发出的ACK到对方
- TCP三步握手建立连接(2)-----被动连接方发送SYN/ACK
- TCP三步握手建立连接(1)-----主动连接syn包发送
- TCP建立连接需要三步握手的原因
- TCP连接建立系列 — 客户端发送SYN段
- SYN(synchronous)是TCP/IP建立连接时使用的握手信号
- TCP三次握手建立连接与四次握手终止连接及sep和ack号的正确理解
- TCP三次握手建立连接
- TCP三次握手建立连接
- TCP三次握手建立连接
- TCP三次握手建立连接
- tcp三次握手建立连接
- TCP三次握手建立连接
- TCP三次握手建立连接
- TCP建立连接三次握手
- TCP连接:SYN ACK RST UTG PSH FIN
- TCP连接:SYN ACK RST UTG PSH FIN
- TCP学习(2)--TCP连接的建立(三次握手)
- TCP 3 way handshake (三向握手,三次握手)--TCP连接建立过程
- MySQL基础(1)
- POJ 2374
- epoll 内核分析
- java.lang.ThreadGroup
- TCP三步握手建立连接(1)-----主动连接syn包发送
- TCP三步握手建立连接(2)-----被动连接方发送SYN/ACK
- 【Java学习之一】Eclipse断点调试
- 神奇的苹果浏览器
- FileZilla Server的虚拟目录设置
- Redis:Cannot assign requested address的解决办法
- [Leetcode 135, Hard] Candy
- 找实习
- LeetCode题解:Partition List
- LeetCode题解:Binary Tree Preorder Traversal