ip层和4层的接口实现分析
来源:互联网 发布:合并会计报表软件 编辑:程序博客网 时间:2024/05/21 14:48
首先来看一下基于3层的ipv4以及ipv6实现的一些4层的协议:
这里要注意并没有IGMPV6,这是因为在ipv6中,它是作为iCMPv6的一部分实现的.
首先我们要知道输入数据包的ip头中的protocol域标识了,将要传递的4层协议.
我们这里主要介绍的是ip数据包从3层传递到4层的接口(也就是输入帧接口).而输出帧的处理,我前面的blog都已经有介绍,想了解的话,可以去看前面的blog.
先来看主要的数据结构,然后我们会分析ip_local_deliver_finish函数(也就是3层处理的出口函数).
在内核中,每一个4层协议都是一个net_protocol结构体,而内核会在启动的时候将所有的4层协议都注册到一个数组inet_protos中,然后根据数据包的ip头来得到相应的handle函数:
- struct net_protocol {
- ///协议的处理函数,也就是将要处理输入数据报的4层协议的处理函数.
- int (*handler)(struct sk_buff *skb);
- ///协议的错误处理函数.
- void (*err_handler)(struct sk_buff *skb, u32 info);
- ///gso相关的两个函数.
- int (*gso_send_check)(struct sk_buff *skb);
- struct sk_buff *(*gso_segment)(struct sk_buff *skb,
- int features);
-
- ///主要是被ipsec所使用的两个域
- unsigned int no_policy:1,
- netns_ok:1;
- };
L4的协议都是在linux/in.h这个文件中,都是以IPPROTO开头的一些宏.由于ip头中的4层协议域是8位,因此4层协议的最大数值也就是255.而在内核中,255是raw ip, IPPPROTO_RAW:
- enum {
- IPPROTO_IP = 0, /* Dummy protocol for TCP */
- IPPROTO_ICMP = 1, /* Internet Control Message Protocol */
- IPPROTO_IGMP = 2, /* Internet Group Management Protocol */
- IPPROTO_IPIP = 4, /* IPIP tunnels (older KA9Q tunnels use 94) */
- IPPROTO_TCP = 6, /* Transmission Control Protocol */
- IPPROTO_EGP = 8, /* Exterior Gateway Protocol */
- IPPROTO_PUP = 12, /* PUP protocol */
- IPPROTO_UDP = 17, /* User Datagram Protocol */
- IPPROTO_IDP = 22, /* XNS IDP protocol */
- IPPROTO_DCCP = 33, /* Datagram Congestion Control Protocol */
- IPPROTO_RSVP = 46, /* RSVP protocol */
- IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */
-
- IPPROTO_IPV6 = 41, /* IPv6-in-IPv4 tunnelling */
-
- IPPROTO_ESP = 50, /* Encapsulation Security Payload protocol */
- IPPROTO_AH = 51, /* Authentication Header protocol */
- IPPROTO_BEETPH = 94, /* IP option pseudo header for BEET */
- IPPROTO_PIM = 103, /* Protocol Independent Multicast */
-
- IPPROTO_COMP = 108, /* Compression Header protocol */
- IPPROTO_SCTP = 132, /* Stream Control Transport Protocol */
- IPPROTO_UDPLITE = 136, /* UDP-Lite (RFC 3828) */
-
- IPPROTO_RAW = 255, /* Raw IP packets */
- IPPROTO_MAX
- };
这里要上面列出的协议,并不是所有的都在内核态handle的,其中一些经常在用户态handle的例如(IPPROTO_RSVP).
内核是通过inet_add_protocol来添加协议到inet_protos数组中的,相应的还有一个删除方法,我们先来看inet_protos的结构:
这里要注意的就是读写inet_protos时,使用的是自旋锁,而只读时,使用的是RCU(Read-Copy Update).
然后来看inet_add_protocol的源码:
- struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp;
-
-
- ///这里只是举两个例子,tcp和udp的协议注册函数.我们这次暂时就不分析tcp和udp的处理函数了(我会在3层结束后,分析4层源码)
- static struct net_protocol tcp_protocol = {
- .handler = tcp_v4_rcv,
- .err_handler = tcp_v4_err,
- .gso_send_check = tcp_v4_gso_send_check,
- .gso_segment = tcp_tso_segment,
- .no_policy = 1,
- .netns_ok = 1,
- };
-
- static struct net_protocol udp_protocol = {
- .handler = udp_rcv,
- .err_handler = udp_err,
- .no_policy = 1,
- .netns_ok = 1,
- };
-
-
- int inet_add_protocol(struct net_protocol *prot, unsigned char protocol)
- {
- int hash, ret;
-
- ///计算当前协议在数组中的slot.
- hash = protocol & (MAX_INET_PROTOS - 1);
-
- ///使用自旋锁.
- spin_lock_bh(&inet_proto_lock);
- if (inet_protos[hash]) {
- ret = -1;
- } else {
- ///将相应的prot添加到数组
- inet_protos[hash] = prot;
- ret = 0;
- }
- spin_unlock_bh(&inet_proto_lock);
- return ret;
- }
然后这些协议的注册都是在内核boot的时候在inet_init中初始化的,下面就是inet_init的代码片段.:
- static int __init inet_init(void)
- {
- ...........................................
- /*
- * Add all the base protocols.
- */
-
- if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
- printk(KERN_CRIT "inet_init: Cannot add ICMP protocol\n");
- if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
- printk(KERN_CRIT "inet_init: Cannot add UDP protocol\n");
- if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
- printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n");
- #ifdef CONFIG_IP_MULTICAST
- if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
- printk(KERN_CRIT "inet_init: Cannot add IGMP protocol\n");
- #endif
-
- ..................................
- }
知道协议如何注册之后,我们来分析ip_local_deliver_finish函数,来看3层是如何将数据包发送到4层的.
1 我们知道linux支持raw数据包的发送,因此在这里会对raw socket进行了特殊处理,它会clone一份数据包然后传递给相应的raw处理函数,然后再继续后面的处理.
2 ipsec.这时还需要加上相应的ipsec头,然后再传给4层处理.看下面的图:
- static int ip_local_deliver_finish(struct sk_buff *skb)
- {
-
- ///取出相应的net信息.
- struct net *net = dev_net(skb->dev);
- ///下面两个主要是调整data指针,使data指针指向4层的数据开始处.
- __skb_pull(skb, ip_hdrlen(skb));
- skb_reset_transport_header(skb);
-
- ///加rcu锁.
- rcu_read_lock();
- {
- ///取出ip头中的协议.
- int protocol = ip_hdr(skb)->protocol;
- int hash, raw;
- struct net_protocol *ipprot;
-
- resubmit:
- ///得到raw socket, 如果不是raw socket,则返回0.
- raw = raw_local_deliver(skb, protocol);
-
- ///计算4层协议的slot.
- hash = protocol & (MAX_INET_PROTOS - 1);
- ///rcu读取相应的协议处理结构.
- ipprot = rcu_dereference(inet_protos[hash]);
- ///主要是ipprot是否有被当前主机注册.
- if (ipprot != NULL && (net == &init_net || ipprot->netns_ok)) {
- int ret;
-
- ///判断ipsec,并进行相关处理.
- if (!ipprot->no_policy) {
- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- kfree_skb(skb);
- goto out;
- }
- nf_reset(skb);
- }
- ///调用handler,进入相应的4层协议的处理.
- ret = ipprot->handler(skb);
- if (ret < 0) {
- protocol = -ret;
- goto resubmit;
- }
- IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS);
- }
- ................................................
- out:
- rcu_read_unlock();
-
- return 0;
- }
最后来看一下raw socket的处理,通过上面我们知道,会调用raw_local_deliver来进行raw socket的相关处理(如果没有raw socket,则会直接返回):
当应用程序使用raw ip socket,他只需要攒递给内核协议id(4层的协议),以及目的地址.因此这里存取sock的hash表使用的key就是4层协议id.
- ///相应的hash表,保存raw socket.
- struct raw_hashinfo {
- rwlock_t lock;
- struct hlist_head ht[RAW_HTABLE_SIZE];
- };
-
- static struct raw_hashinfo raw_v4_hashinfo = {
- .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
- };
-
-
-
- int raw_local_deliver(struct sk_buff *skb, int protocol)
- {
- int hash;
- struct sock *raw_sk;
- ///通过协议计算hash值(使用4层协议id).
- hash = protocol & (RAW_HTABLE_SIZE - 1);
- ///得到相应的raw_sk.
- raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
-
- /* If there maybe a raw socket we must check - if not we
- * don't care less
- */
- ///交给raw socket的处理函数,raw_v4_input中会clone一个skb,然后交给最后的raw_rev函数去处理最终的数据包.
- if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash))
- raw_sk = NULL;
-
- return raw_sk != NULL;
-
- }
- struct net_protocol {
- ///协议的处理函数,也就是将要处理输入数据报的4层协议的处理函数.
- int (*handler)(struct sk_buff *skb);
- ///协议的错误处理函数.
- void (*err_handler)(struct sk_buff *skb, u32 info);
- ///gso相关的两个函数.
- int (*gso_send_check)(struct sk_buff *skb);
- struct sk_buff *(*gso_segment)(struct sk_buff *skb,
- int features);
- ///主要是被ipsec所使用的两个域
- unsigned int no_policy:1,
- netns_ok:1;
- };
- enum {
- IPPROTO_IP = 0, /* Dummy protocol for TCP */
- IPPROTO_ICMP = 1, /* Internet Control Message Protocol */
- IPPROTO_IGMP = 2, /* Internet Group Management Protocol */
- IPPROTO_IPIP = 4, /* IPIP tunnels (older KA9Q tunnels use 94) */
- IPPROTO_TCP = 6, /* Transmission Control Protocol */
- IPPROTO_EGP = 8, /* Exterior Gateway Protocol */
- IPPROTO_PUP = 12, /* PUP protocol */
- IPPROTO_UDP = 17, /* User Datagram Protocol */
- IPPROTO_IDP = 22, /* XNS IDP protocol */
- IPPROTO_DCCP = 33, /* Datagram Congestion Control Protocol */
- IPPROTO_RSVP = 46, /* RSVP protocol */
- IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */
- IPPROTO_IPV6 = 41, /* IPv6-in-IPv4 tunnelling */
- IPPROTO_ESP = 50, /* Encapsulation Security Payload protocol */
- IPPROTO_AH = 51, /* Authentication Header protocol */
- IPPROTO_BEETPH = 94, /* IP option pseudo header for BEET */
- IPPROTO_PIM = 103, /* Protocol Independent Multicast */
- IPPROTO_COMP = 108, /* Compression Header protocol */
- IPPROTO_SCTP = 132, /* Stream Control Transport Protocol */
- IPPROTO_UDPLITE = 136, /* UDP-Lite (RFC 3828) */
- IPPROTO_RAW = 255, /* Raw IP packets */
- IPPROTO_MAX
- };
- struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp;
- ///这里只是举两个例子,tcp和udp的协议注册函数.我们这次暂时就不分析tcp和udp的处理函数了(我会在3层结束后,分析4层源码)
- static struct net_protocol tcp_protocol = {
- .handler = tcp_v4_rcv,
- .err_handler = tcp_v4_err,
- .gso_send_check = tcp_v4_gso_send_check,
- .gso_segment = tcp_tso_segment,
- .no_policy = 1,
- .netns_ok = 1,
- };
- static struct net_protocol udp_protocol = {
- .handler = udp_rcv,
- .err_handler = udp_err,
- .no_policy = 1,
- .netns_ok = 1,
- };
- int inet_add_protocol(struct net_protocol *prot, unsigned char protocol)
- {
- int hash, ret;
- ///计算当前协议在数组中的slot.
- hash = protocol & (MAX_INET_PROTOS - 1);
- ///使用自旋锁.
- spin_lock_bh(&inet_proto_lock);
- if (inet_protos[hash]) {
- ret = -1;
- } else {
- ///将相应的prot添加到数组
- inet_protos[hash] = prot;
- ret = 0;
- }
- spin_unlock_bh(&inet_proto_lock);
- return ret;
- }
- static int __init inet_init(void)
- {
- ...........................................
- /*
- * Add all the base protocols.
- */
- if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
- printk(KERN_CRIT "inet_init: Cannot add ICMP protocol\n");
- if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
- printk(KERN_CRIT "inet_init: Cannot add UDP protocol\n");
- if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
- printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n");
- #ifdef CONFIG_IP_MULTICAST
- if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
- printk(KERN_CRIT "inet_init: Cannot add IGMP protocol\n");
- #endif
- ..................................
- }
- static int ip_local_deliver_finish(struct sk_buff *skb)
- {
- ///取出相应的net信息.
- struct net *net = dev_net(skb->dev);
- ///下面两个主要是调整data指针,使data指针指向4层的数据开始处.
- __skb_pull(skb, ip_hdrlen(skb));
- skb_reset_transport_header(skb);
- ///加rcu锁.
- rcu_read_lock();
- {
- ///取出ip头中的协议.
- int protocol = ip_hdr(skb)->protocol;
- int hash, raw;
- struct net_protocol *ipprot;
- resubmit:
- ///得到raw socket, 如果不是raw socket,则返回0.
- raw = raw_local_deliver(skb, protocol);
- ///计算4层协议的slot.
- hash = protocol & (MAX_INET_PROTOS - 1);
- ///rcu读取相应的协议处理结构.
- ipprot = rcu_dereference(inet_protos[hash]);
- ///主要是ipprot是否有被当前主机注册.
- if (ipprot != NULL && (net == &init_net || ipprot->netns_ok)) {
- int ret;
- ///判断ipsec,并进行相关处理.
- if (!ipprot->no_policy) {
- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- kfree_skb(skb);
- goto out;
- }
- nf_reset(skb);
- }
- ///调用handler,进入相应的4层协议的处理.
- ret = ipprot->handler(skb);
- if (ret < 0) {
- protocol = -ret;
- goto resubmit;
- }
- IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS);
- }
- ................................................
- out:
- rcu_read_unlock();
- return 0;
- }
- ///相应的hash表,保存raw socket.
- struct raw_hashinfo {
- rwlock_t lock;
- struct hlist_head ht[RAW_HTABLE_SIZE];
- };
- static struct raw_hashinfo raw_v4_hashinfo = {
- .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
- };
- int raw_local_deliver(struct sk_buff *skb, int protocol)
- {
- int hash;
- struct sock *raw_sk;
- ///通过协议计算hash值(使用4层协议id).
- hash = protocol & (RAW_HTABLE_SIZE - 1);
- ///得到相应的raw_sk.
- raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
- /* If there maybe a raw socket we must check - if not we
- * don't care less
- */
- ///交给raw socket的处理函数,raw_v4_input中会clone一个skb,然后交给最后的raw_rev函数去处理最终的数据包.
- if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash))
- raw_sk = NULL;
- return raw_sk != NULL;
- }
- ip层和4层的接口实现分析
- ip层和4层的接口实现分析
- ip层和4层的接口实现分析
- 《TCP-IP详解 卷2:实现》学习笔记—接口层分析
- IP 层分析
- HAL向上层提供接口的分析
- 《TCP/IP详解卷2:实现》笔记--接口层:以太网和环回
- OSI七层和TCP/IP四层的关系
- 《TCP/IP详解卷2:实现》笔记--接口层
- lwip中ip层的实现浅析
- 第二十二章 TCP/IP层的实现
- 在Linux下实现comer的TCP/IP协议栈--网络接口层
- IP层数据包的分片和重组
- 搭建你的Spring.Net+Nhibernate+Asp.Net Mvc 框架 (三)实现数据库接口层和业务逻辑层
- LWIP之IP层实现
- IP层实现1--初始化
- IP层实现2--gro
- 网络层-4、IP地址和IP子网
- 转载:java IO流详解
- GDB多进程调试(转)
- 数据结构学习笔记(3.线性表之静态链表及柔性数组)
- 数据库中的日期转换
- 轻听-最好的解压助眠医疗轻音乐集
- ip层和4层的接口实现分析
- sgu491
- 第五章 初始化与清理
- 如何监控插入RICHEDIT中位图对象的鼠标点击事件?
- VIM 高校实用配置实践
- 第六章 访问权限与控制
- 对猿友们说几句话,杂谈
- Android组件之评分组件
- linux内核中socket的实现