libnids中TCP/IP栈实现细节分析（上）——TCP会话重组

来源：互联网发布：js string转jsonarray 编辑：程序博客网时间：2024/05/18 01:31

dccmx 于 2011年三月 7日发表 | 最后修改于 2011年三月 22日

libnids是网络安全方面的一个库，可以用来检测网络上的攻击行为。其中最有价值的部分是，它模拟了linux内核中3层和4层的协议栈。可以供我们进一步研究linux内核中的TCP/IP协议栈做一些有价值的参考。这里简单谈谈这个库中模拟3、4层协议的实现细节（在继续读下去之前，有必要复习一下TCP/IP协议相关理论，主要是滑动窗口协议）。这里送上一张网上到处都有的TCP状态转化图，算是开胃小菜：

在TCP/IP协议栈中，3层对应的是IP层，4层对应TCP层，在这里，从3层到4层转化主要做了两件重要的事情：IP分片重组和TCP会话重组。本篇先分析其中TCP会话重组的部分（自顶向下嘛，哈哈）。

OK，先看下重要的数据结构，在tcp.h中：

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
struct skbuff {
  //万年不变的next和prev，这向我们昭示了这是一个双向队列。
  //对于每个TCP会话（ip:端口<- ->ip:端口）都要维护两个skbuf队列（每个方向都有一个嘛）
  //每个skbuf对应网络上的一个IP包，TCP流就是一个接一个的IP包嘛。
  struct skbuff *next;
  struct skbuff *prev;
  
  void *data;
  u_int len;
  u_int truesize;
  u_int urg_ptr;
  
  char fin;
  char urg;
  u_int seq;
  u_int ack;
};

这个结构体就是模仿的内核中的sk_buff结构体，只不过比内核中的要小很多（你懂的，因为这里只做会话重组）。

下面是在nids.h中的

1
2
3
4
5
6
7
struct tuple4
{
u_short source;
u_short dest;
u_int saddr;
u_int daddr;
};

这是用来表示一个TCP连接的，不解释。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
struct half_stream
{
  char state;
  char collect;
  char collect_urg;
  
  char *data;//这里存放着已经按顺序集齐排列好的数据
  int offset;
  int count;//这里存放data中数据的字节数
  int count_new;//这里存放data中还没回调过的数据的字节数
  int bufsize;
  int rmem_alloc;
  
  inturg_count;
  u_int acked;
  u_int seq;
  u_int ack_seq;
  u_int first_data_seq;
  u_char urgdata;
  u_char count_new_urg;
  u_char urg_seen;
  u_int urg_ptr;
  u_short window;
  u_char ts_on;//tcp时间戳选项是否打开
  u_char wscale_on;//窗口扩展选项是否打开
  u_int curr_ts;
  u_int wscale; 
  
  //下面是ip包缓冲区
  struct skbuff *list;
  struct skbuff *listtail;
}

这个是用来表示“半个TCP会话”，其实就是一个方向上的TCP流。

还有

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
struct tcp_stream
{
  struct tuple4 addr;
  char nids_state;
  struct lurker_node *listeners;
  struct half_stream client;
  struct half_stream server;
  struct tcp_stream *next_node;
  struct tcp_stream *prev_node;
  int hash_index;
  struct tcp_stream *next_time;
  struct tcp_stream *prev_time;
  int read;
  struct tcp_stream *next_free;
  void *user;
};

显然，这是用来表示一个完整的TCP会话了，最后是static struct tcp_stream **tcp_stream_table;一个TCP会话指针的数组，其实就是hash表了。

下面来看处理过程，先是初始化：

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
int tcp_init(int size){
  ...
  //初始化全局tcp会话哈希表
  tcp_stream_table_size = size;
  tcp_stream_table =calloc(tcp_stream_table_size,sizeof(char*));
  if(!tcp_stream_table) {
    nids_params.no_mem("tcp_init");
    return-1;
  }
  
  //设置最大会话数，为了哈希的效率，哈希表的元素个数上限设为3/4表大小
  max_stream = 3 * tcp_stream_table_size / 4;
  
  //先将max_stream个tcp会话结构体申请好，放着（避免后面陆陆续续申请浪费时间）。
  streams_pool = (structtcp_stream *)malloc((max_stream + 1) *sizeof(structtcp_stream));
  if(!streams_pool) {
    nids_params.no_mem("tcp_init");
    return-1;
  }
  
  //ok，将这个数组初始化成链表
  for(i = 0; i < max_stream; i++)
    streams_pool[i].next_free = &(streams_pool[i + 1]);
  streams_pool[max_stream].next_free = 0;
  free_streams = streams_pool;
  
  ...
  return0;
}

很简单，做了两件事：1.初始化tcp会话哈希表。2.初始化会话池。这个初始化函数只在库初始化时执行一次。

初始化完成之后，就进入了pcap_loop中了，nids中的回调函数是nids_pcap_handler，在这个函数里面做了些ip分片重组（等下篇再说）后（tcp包）便来到了process_tcp函数，这里tcp会话重组开始了。来看看。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
void process_tcp(u_char * data, int skblen){
  //处理头，得到ip包和tcp包
  struct ip *this_iphdr = (struct ip *)data;
  struct tcphdr *this_tcphdr = (struct tcphdr *)(data + 4 * this_iphdr->ip_hl);
  
  ...//此处忽略安检代码
  
  //在哈希表里找找，如果没有此tcp会话则看看是不是要新建一个
  if(!(a_tcp = find_stream(this_tcphdr, this_iphdr, &from_client))) {    //////a_tcp的来源设置
    //这里判断此包是否是tcp回话周期中的第一个包（由客户端发出的syn包）
    //如果是，说明客户端发起了一个连接，那就新建一个回话
    if((this_tcphdr->th_flags & TH_SYN) &&
    !(this_tcphdr->th_flags & TH_ACK) &&
    !(this_tcphdr->th_flags & TH_RST))
      add_new_tcp(this_tcphdr, this_iphdr);     //调用函数添加tcp会话
    //否则，果断忽略
    return;
  }
  
  //如果找到会话，根据数据流向，将发送方（snd）和接收方（rcv）设置好
  if(from_client) {
    snd = &a_tcp->client;
    rcv = &a_tcp->server;
  }
  else{
    rcv = &a_tcp->client;
    snd = &a_tcp->server;
  }
  
  //来了一个SYN包
  if((this_tcphdr->th_flags & TH_SYN)) {
    //syn包是用来建立新连接的，所以，要么来自客户端且没标志（前面处理了），要么来自服务端且加ACK标志
    //所以这里只能来自服务器，检查服务器状态是否正常，不正常的话果断忽略这个包
    if(from_client || a_tcp->client.state != TCP_SYN_SENT ||
      a_tcp->server.state != TCP_CLOSE || !(this_tcphdr->th_flags & TH_ACK))
      return;
  
    //忽略流水号错误的包
    if(a_tcp->client.seq != ntohl(this_tcphdr->th_ack))
      return;
  
    //自此，说明此包是服务端的第二次握手包，初始化连接（初始状态、流水号、窗口大小等等）
    a_tcp->server.state = TCP_SYN_RECV;
    a_tcp->server.seq = ntohl(this_tcphdr->th_seq) + 1;
    a_tcp->server.first_data_seq = a_tcp->server.seq;
    a_tcp->server.ack_seq = ntohl(this_tcphdr->th_ack);
    a_tcp->server.window = ntohs(this_tcphdr->th_win);
  
    //下面处理tcp的一些附加选项
    //先是时间戳选项
    if(a_tcp->client.ts_on) {
        a_tcp->server.ts_on = get_ts(this_tcphdr, &a_tcp->server.curr_ts);
    if(!a_tcp->server.ts_on)
        a_tcp->client.ts_on = 0;
    }elsea_tcp->server.ts_on = 0;
    //再是窗口扩大选项
    if(a_tcp->client.wscale_on) {
        a_tcp->server.wscale_on = get_wscale(this_tcphdr, &a_tcp->server.wscale);
    if(!a_tcp->server.wscale_on) {
        a_tcp->client.wscale_on = 0;
        a_tcp->client.wscale  = 1;
        a_tcp->server.wscale = 1;
    }
    }else{
        a_tcp->server.wscale_on = 0;
        a_tcp->server.wscale = 1;
    }
    //syn包处理完，返回
    return;
  }
  
  if(
    ! (  !datalen && ntohl(this_tcphdr->th_seq) == rcv->ack_seq )/*不是流水号正确且没数据的包*/
    &&//而且这个包不再当前窗口之内
    ( !before(ntohl(this_tcphdr->th_seq), rcv->ack_seq + rcv->window*rcv->wscale) ||//流水号大于等于窗口右侧
          before(ntohl(this_tcphdr->th_seq) + datalen, rcv->ack_seq) //数据包尾部小于窗口左侧
        )
     )
     //这个包不正常，果断放弃
     return;
  
  //如果是rst包，ok，关闭连接
  //将现有数据推给注册的回调方，然后销毁这个会话。
  //注意此处，是由于受到rst包，表示要关闭tcp连接，故将该tcp连接里的数据提出。
  if((this_tcphdr->th_flags & TH_RST)) {
    if(a_tcp->nids_state == NIDS_DATA) {
      struct lurker_node *i;
  
      a_tcp->nids_state = NIDS_RESET;
      //下面回调所有的钩子
      for(i = a_tcp->listeners; i; i = i->next)
    (i->item) (a_tcp, &i->data);     ////////////这个函数的功能？？？
    }
    nids_free_tcp_stream(a_tcp);
    return;
  }
  
  /* PAWS(防止重复报文)check 检查时间戳*/
  if(rcv->ts_on && get_ts(this_tcphdr, &tmp_ts) &&
    before(tmp_ts, snd->curr_ts))
  return;   
  
  //好的，ack包来了
  if((this_tcphdr->th_flags & TH_ACK)) {
  
    //如果是从客户端来的，且两边都在第二次握手的状态上
    if(from_client && a_tcp->client.state == TCP_SYN_SENT &&
    a_tcp->server.state == TCP_SYN_RECV) {
  
      //在此情况下，流水号又对得上，好的，这个包是第三次握手包，连接建立成功
      if(ntohl(this_tcphdr->th_ack) == a_tcp->server.seq) {
    a_tcp->client.state = TCP_ESTABLISHED;//更新客户端状态
    a_tcp->client.ack_seq = ntohl(this_tcphdr->th_ack);//更新ack序号
    {
      structproc_node *i;
      structlurker_node *j;
      void*data;
  
      a_tcp->server.state = TCP_ESTABLISHED;//更新服务端状态
      a_tcp->nids_state = NIDS_JUST_EST;//这个是安全方面的，这里无视之
  
          //下面这个循环是回调所有钩子函数，告知连接建立
      for(i = tcp_procs; i; i = i->next) {
        charwhatto = 0;
        charcc = a_tcp->client.collect;
        charsc = a_tcp->server.collect;
        charccu = a_tcp->client.collect_urg;
        charscu = a_tcp->server.collect_urg;
  
        (i->item) (a_tcp, &data);//回调
        if(cc < a_tcp->client.collect)
          whatto |= COLLECT_cc;
        if(ccu < a_tcp->client.collect_urg)
          whatto |= COLLECT_ccu;
        if(sc < a_tcp->server.collect)
          whatto |= COLLECT_sc;
        if(scu < a_tcp->server.collect_urg)
          whatto |= COLLECT_scu;
        if(nids_params.one_loop_less) {
                if(a_tcp->client.collect >=2) {
                    a_tcp->client.collect=cc;
                    whatto&=~COLLECT_cc;
                }
                if(a_tcp->server.collect >=2 ) {
                    a_tcp->server.collect=sc;
                    whatto&=~COLLECT_sc;
                }
        }
        if(whatto) {
          j = mknew(struct lurker_node);
          j->item = i->item;
          j->data = data;
          j->whatto = whatto;
          j->next = a_tcp->listeners;
          a_tcp->listeners = j;
        }
      }
      if(!a_tcp->listeners) {
        nids_free_tcp_stream(a_tcp);
        return;
      }
      a_tcp->nids_state = NIDS_DATA;
    }
      }
      // return;
    }
  }
  //自此，握手包处理完毕
  
  //下面就是挥手包了
  if((this_tcphdr->th_flags & TH_ACK)) {
  
    //先调用handle_ack更新ack序号
    handle_ack(snd, ntohl(this_tcphdr->th_ack));
  
    //更新状态，回调告知连接关闭，然后释放连接
    if(rcv->state == FIN_SENT)
      rcv->state = FIN_CONFIRMED;
    if(rcv->state == FIN_CONFIRMED && snd->state == FIN_CONFIRMED) {
      structlurker_node *i;
  
      a_tcp->nids_state = NIDS_CLOSE;
      for(i = a_tcp->listeners; i; i = i->next)
    (i->item) (a_tcp, &i->data);
      nids_free_tcp_stream(a_tcp);
      return;
    }
  }
  
  //下面处理数据包，和初始的fin包
  if(datalen + (this_tcphdr->th_flags & TH_FIN) > 0)
    //就将数据更新到接收方缓冲区
    tcp_queue(a_tcp, this_tcphdr, snd, rcv,
          (char*) (this_tcphdr) + 4 * this_tcphdr->th_off,
          datalen, skblen);
  //更新窗口大小
  snd->window = ntohs(this_tcphdr->th_win);
  
  //如果缓存溢出（说明出了问题），果断释放连接
  if(rcv->rmem_alloc > 65535)
    prune_queue(rcv, this_tcphdr);
  if(!a_tcp->listeners)
    nids_free_tcp_stream(a_tcp);
}

好了，tcp包的基本处理流程就这些了，主要做了连接的建立、释放、状态迁移这些工作，下面看看连接的缓冲区是如何维护的（主要就是如何更新的）。来看tcp_queue函数：

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
staticvoid
tcp_queue(structtcp_stream * a_tcp,structtcphdr * this_tcphdr,
      structhalf_stream * snd,structhalf_stream * rcv,
      char*data,intdatalen, int skblen
      )
{
  u_int this_seq = ntohl(this_tcphdr->th_seq);
  structskbuff *pakiet, *tmp;
  
  /*
   * Did we get anything new to ack?
   */
  //EXP_SEQ是目前已集齐的数据流水号，我们希望收到从这里开始的数据
  //先判断数据是不是在EXP_SEQ之前开始
  if(!after(this_seq, EXP_SEQ)) {
    //再判断数据长度是不是在EXP_SEQ之后，如果是，说明有新数据，否则是重发的包，无视之
    if(after(this_seq + datalen + (this_tcphdr->th_flags & TH_FIN), EXP_SEQ)) {
      /* the packet straddles our window end */
      get_ts(this_tcphdr, &snd->curr_ts);
      //ok，更新集齐的数据区，值得一提的是add_from_skb函数一旦发现集齐了一段数据之后
      //便立刻调用notify函数，在notify函数里面将数据推给回调方
      add_from_skb(a_tcp, rcv, snd, (u_char *)data, datalen, this_seq,
           (this_tcphdr->th_flags & TH_FIN),
           (this_tcphdr->th_flags & TH_URG),
           ntohs(this_tcphdr->th_urp) + this_seq - 1);
      /*
       * Do we have any old packets to ack that the above
       * made visible? (Go forward from skb)
       */
      //此时EXP_SEQ有了变化了，看看缓冲区里的包有没有符合条件能用同样的方法处理掉的
      //有就处理掉，然后释放
      pakiet = rcv->list;
      while(pakiet) {
    if(after(pakiet->seq, EXP_SEQ))
      break;
    if(after(pakiet->seq + pakiet->len + pakiet->fin, EXP_SEQ)) {
      add_from_skb(a_tcp, rcv, snd, pakiet->data,
               pakiet->len, pakiet->seq, pakiet->fin, pakiet->urg,
               pakiet->urg_ptr + pakiet->seq - 1);
        }
    rcv->rmem_alloc -= pakiet->truesize;
    if(pakiet->prev)
      pakiet->prev->next = pakiet->next;
    else
      rcv->list = pakiet->next;
    if(pakiet->next)
      pakiet->next->prev = pakiet->prev;
    else
      rcv->listtail = pakiet->prev;
    tmp = pakiet->next;
    free(pakiet->data);
    free(pakiet);
    pakiet = tmp;
      }
    }
    else
      return;
  }
  //这里说明现在这个包是个乱序到达的（数据开始点超过了EXP_SEQ），放到缓冲区等待处理，注意保持缓冲区有序
  else{
    structskbuff *p = rcv->listtail;
  
    pakiet = mknew(structskbuff);
    pakiet->truesize = skblen;
    rcv->rmem_alloc += pakiet->truesize;
    pakiet->len = datalen;
    pakiet->data =malloc(datalen);
    if(!pakiet->data)
      nids_params.no_mem("tcp_queue");
    memcpy(pakiet->data, data, datalen);
    pakiet->fin = (this_tcphdr->th_flags & TH_FIN);
    /* Some Cisco - at least - hardware accept to close a TCP connection
     * even though packets were lost before the first TCP FIN packet and
     * never retransmitted; this violates RFC 793, but since it really
     * happens, it has to be dealt with... The idea is to introduce a 10s
     * timeout after TCP FIN packets were sent by both sides so that
     * corresponding libnids resources can be released instead of waiting
     * for retransmissions which will never happen.  -- Sebastien Raveau
     */
    if(pakiet->fin) {
      snd->state = TCP_CLOSING;
      if(rcv->state == FIN_SENT || rcv->state == FIN_CONFIRMED)
    add_tcp_closing_timeout(a_tcp);
    }
    pakiet->seq = this_seq;
    pakiet->urg = (this_tcphdr->th_flags & TH_URG);
    pakiet->urg_ptr = ntohs(this_tcphdr->th_urp);
    for(;;) {
      if(!p || !after(p->seq, this_seq))
    break;
      p = p->prev;
    }
    if(!p) {
      pakiet->prev = 0;
      pakiet->next = rcv->list;
      if(rcv->list)
         rcv->list->prev = pakiet;
      rcv->list = pakiet;
      if(!rcv->listtail)
    rcv->listtail = pakiet;
    }
    else{
      pakiet->next = p->next;
      p->next = pakiet;
      pakiet->prev = p;
      if(pakiet->next)
    pakiet->next->prev = pakiet;
      else
    rcv->listtail = pakiet;
    }
  }
}

好了，主体函数就这些了，其他的函数比较简单，不解释。协议栈模拟，考验的就是细心周到啊。