IP层实现1--初始化
来源:互联网 发布:python游戏开发百度云 编辑:程序博客网 时间:2024/06/16 01:18
IP头部:
[ include/uapi/linux/ip.h ]
struct iphdr {#if defined(__LITTLE_ENDIAN_BITFIELD)__u8ihl:4,// 头部长度(单位为32位)version:4;// IP版本4或6#elif defined (__BIG_ENDIAN_BITFIELD)__u8version:4, ihl:4;#else#error"Please fix <asm/byteorder.h>"#endif__u8tos;// Type of Service,不太常用__be16tot_len;// 包的长度(包含头部),单位为字节__be16id;// ID,在分片中起核心作用__be16frag_off;// DF (Don’t Fragment);MF (More Fragments);Fragment Offset__u8ttl;// 生存时间, 默认64__u8protocol;// 上一层协议__sum16check;// IP头部校验和__be32saddr;// 源地址__be32daddr;// 目的地址/*The options start here. * IP选项 */};在IP层之上的协议(TCP,UDP),都要支持socket接口的调用。socket提供了一个结构,用来提供各接口:
[ include/net/sock.h ]
struct proto {void(*close)(struct sock *sk,long timeout);int(*connect)(struct sock *sk,struct sockaddr *uaddr,int addr_len); ...};然后又提供了一个全局列表,所有支持socket的协议都注册到些列表上:
[ net/core/sock.c ]
static LIST_HEAD(proto_list);注册函数为:
[ net/core/sock.c ]
int proto_register(struct proto *prot, int alloc_slab){if (alloc_slab) {/* 分配缓冲,名称为协议的名字(如"TCP") * 大小为对应结构的大小(如,对TCP, .obj_size = sizeof(struct tcp_sock) ) */prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,SLAB_HWCACHE_ALIGN | prot->slab_flags,NULL);if (prot->slab == NULL) {pr_crit("%s: Can't create sock SLAB cache!\n",prot->name);goto out;}/* socket中各种响应(ack)的操作 */if (prot->rsk_prot != NULL) {prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name);// 缓冲名称if (prot->rsk_prot->slab_name == NULL)goto out_free_sock_slab;/* 分配缓冲,名称(如"request_sock_TCP") * 大小为对应结构的大小(如,对TCP, .obj_size = sizeof(struct tcp_request_sock) ) */prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name, prot->rsk_prot->obj_size, 0, SLAB_HWCACHE_ALIGN, NULL);if (prot->rsk_prot->slab == NULL) {pr_crit("%s: Can't create request sock SLAB cache!\n",prot->name);goto out_free_request_sock_slab_name;}}/* socket处于TIMEWAIT状态时的操作 */if (prot->twsk_prot != NULL) {prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);// 缓冲名称if (prot->twsk_prot->twsk_slab_name == NULL)goto out_free_request_sock_slab;/* 分配缓冲,名称(如"tw_sock_TCP") * 大小为对应结构的大小(如,对TCP, .obj_size = sizeof(struct tcp_timewait_sock) ) */prot->twsk_prot->twsk_slab =kmem_cache_create(prot->twsk_prot->twsk_slab_name, prot->twsk_prot->twsk_obj_size, 0, SLAB_HWCACHE_ALIGN |prot->slab_flags, NULL);if (prot->twsk_prot->twsk_slab == NULL)goto out_free_timewait_sock_slab_name;}}mutex_lock(&proto_list_mutex);list_add(&prot->node, &proto_list);// 将协议加入到全局列表中/* 为快速查询协议是否在socket中有效,申明了一个bitmap,其中的每一位都表示一个协议 * bitmap中位的索引保存在 prot->inuse_idx中 * 使用时可根据些索引设置和查询对应bitmap中位的值 * 别外还申明了一个PRE_CPU数组变量,通过prot->inuse_idx可设置和查询当前使用prot的数量 */assign_proto_idx(prot);mutex_unlock(&proto_list_mutex);return 0;out_free_timewait_sock_slab_name:kfree(prot->twsk_prot->twsk_slab_name);out_free_request_sock_slab:if (prot->rsk_prot && prot->rsk_prot->slab) {kmem_cache_destroy(prot->rsk_prot->slab);prot->rsk_prot->slab = NULL;}out_free_request_sock_slab_name:if (prot->rsk_prot)kfree(prot->rsk_prot->slab_name);out_free_sock_slab:kmem_cache_destroy(prot->slab);prot->slab = NULL;out:return -ENOBUFS;}EXPORT_SYMBOL(proto_register);对于IP来说,socket对应的family为PF_INET,与之对应的结构为:
[ net/ipv4/af_inet.c ]
static const struct net_proto_family inet_family_ops = {.family = PF_INET,.create = inet_create,// 建立socket时被调用.owner= THIS_MODULE,};对与socket支持的family,都有一个对应的net_proto_family结构,同样,内核提供一个全局数组:
[ net/socket.c ]
static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;NPROTO为socket支持的family的总数。用下面的函数进行注册:
[ net/socket.c ]
int sock_register(const struct net_proto_family *ops){int err;if (ops->family >= NPROTO) {printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);return -ENOBUFS;}spin_lock(&net_family_lock);if (rcu_dereference_protected(net_families[ops->family], lockdep_is_held(&net_family_lock)))err = -EEXIST;else {rcu_assign_pointer(net_families[ops->family], ops);// 将ops设置到全局数组中err = 0;}spin_unlock(&net_family_lock);printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);return err;}EXPORT_SYMBOL(sock_register);
socket 同一个family中有很多类型(如:SOCK_STREAM,SOCK_DGRAM,SOCK_RAW),对IP层来说,这些所有的类型都保存在全局列表中:
[ net/ipv4/af_inet.c ]
/* The inetsw table contains everything that inet_create needs to * build a new socket. */static struct list_head inetsw[SOCK_MAX];内核提供了一个初始化列表:
[ net/ipv4/af_inet.c ]
/* Upon startup we insert all the elements in inetsw_array[] into * the linked list inetsw. */static struct inet_protosw inetsw_array[] ={{.type = SOCK_STREAM,.protocol = IPPROTO_TCP,.prot = &tcp_prot,.ops = &inet_stream_ops,.no_check = 0,.flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK,},{.type = SOCK_DGRAM,.protocol = IPPROTO_UDP,.prot = &udp_prot,.ops = &inet_dgram_ops,.no_check = UDP_CSUM_DEFAULT,.flags = INET_PROTOSW_PERMANENT, }, {.type = SOCK_DGRAM,.protocol = IPPROTO_ICMP,.prot = &ping_prot,.ops = &inet_dgram_ops,.no_check = UDP_CSUM_DEFAULT,.flags = INET_PROTOSW_REUSE, }, { .type = SOCK_RAW, .protocol = IPPROTO_IP,/* wild card */ .prot = &raw_prot, .ops = &inet_sockraw_ops, .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, }};将socket类型注册到全局数组inetsw_array,调用下面函数:
[ net/ipv4/af_inet.c ]
void inet_register_protosw(struct inet_protosw *p){struct list_head *lh;struct inet_protosw *answer;/* 协议类型(TCP,UDP...) * 这里要和 p->type( SOCK_STREAM,SOCK_DGRAM... )区分 */int protocol = p->protocol;struct list_head *last_perm;spin_lock_bh(&inetsw_lock);if (p->type >= SOCK_MAX)goto out_illegal;/* If we are trying to override a permanent protocol, bail. */answer = NULL;last_perm = &inetsw[p->type];// 协议对应的位置list_for_each(lh, &inetsw[p->type]) {// 协议列表answer = list_entry(lh, struct inet_protosw, list);/* Check only the non-wild match. */if (INET_PROTOSW_PERMANENT & answer->flags) { /* Permanent protocols are unremovable. */if (protocol == answer->protocol)// 协议类型相同break;last_perm = lh;}answer = NULL;}if (answer)// 协议己经存在goto out_permanent;/* Add the new entry after the last permanent entry if any, so that * the new entry does not override a permanent entry when matched with * a wild-card protocol. But it is allowed to override any existing * non-permanent entry. This means that when we remove this entry, the * system automatically returns to the old behavior. * 加到列表的最后 */list_add_rcu(&p->list, last_perm);out:spin_unlock_bh(&inetsw_lock);return;out_permanent:pr_err("Attempt to override permanent protocol %d\n", protocol);goto out;out_illegal:pr_err("Ignoring attempt to register invalid socket type %d\n", p->type);goto out;}
内核支持不同的协议,如UDP,TCP。当数据到达IP层后,要根据上层协议的类型调用不同的接收函数,内核通过下面的方式处理这种情况:
- 定义一个结构封装各函数:
[ include/net/protocol.h ]/* This is used to register protocols. */struct net_protocol {void(*early_demux)(struct sk_buff *skb);int(*handler)(struct sk_buff *skb);void(*err_handler)(struct sk_buff *skb, u32 info);unsigned intno_policy:1,netns_ok:1,/* does the protocol do more stringent * icmp tag validation than simple * socket lookup? */icmp_strict_tag_validation:1;};
- 定义一个全局列表,所有协议都注册到此列表
[ net/ipv4/protocol.c ]
const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
[ include/net/protocol.h ]/* This is one larger than the largest protocol value that can be * found in an ipv4 or ipv6 header. Since in both cases the protocol * value is presented in a __u8, this is defined to be 256. */#define MAX_INET_PROTOS256
以下函数用来向全局列表注册net_protocol类型:
[ net/ipv4/protocol.c ]
int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol){if (!prot->netns_ok) {pr_err("Protocol %u is not namespace aware, cannot register.\n",protocol);return -EINVAL;}return !cmpxchg((const struct net_protocol **)&inet_protos[protocol],NULL, prot) ? 0 : -1;}EXPORT_SYMBOL(inet_add_protocol);
所有接收的包都有不同的类型,如IP,802.3,ARP,IPv6等,当接收到不同类型的包后,要调用不同的处理函数,内核通过下面的方式处理这种情况。
- 定义一个结构,用来将类型和函数对应起来
[ include/linux/netdevice.h ]struct packet_type {__be16type;/* This is really htons(ether_type).包的类型 */struct net_device*dev;/* NULL is wildcarded here.对应的网络设备 */int(*func) (struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *);bool(*id_match)(struct packet_type *ptype, struct sock *sk);void*af_packet_priv;struct list_headlist;};
- 定义一个全局列表,所有packet_type类型为ETH_P_ALL(接收所有类型的包)的都挂在此列表上
[ net/core/dev.c ]
struct list_head ptype_all __read_mostly; /* Taps */ - 定义一个哈希表,其中的key为包的类型
[ net/core/dev.c ]
struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
[ include/linux/netdevice.h ]/* *The list of packet types we will receive (as opposed to discard) *and the routines to invoke. * *Why 16. Because with 16 the only overlap we get on a hash of the *low nibble of the protocol value is RARP/SNAP/X.25. * * NOTE: That is no longer true with the addition of VLAN tags. Not * sure which should go first, but I bet it won't make much * difference if we are running VLANs. The good news is that * this protocol won't be in the list unless compiled in, so * the average user (w/out VLANs) will not be adversely affected. * --BLG * *0800IP *8100 802.1Q VLAN *0001802.3 *0002AX.25 *0004802.2 *8035RARP *0005SNAP *0805X.25 *0806ARP *8137IPX *0009Localtalk *86DDIPv6 */#define PTYPE_HASH_SIZE(16)#define PTYPE_HASH_MASK(PTYPE_HASH_SIZE - 1)
以下函数用来向全局列表注册packet_type类型:
[ net/core/dev.c ]
/* *Add a protocol ID to the list. Now that the input handler is *smarter we can dispense with all the messy stuff that used to be *here. * *BEWARE!!! Protocol handlers, mangling input packets, *MUST BE last in hash buckets and checking protocol handlers *MUST start from promiscuous ptype_all chain in net_bh. *It is true now, do not change it. *Explanation follows: if protocol handler, mangling packet, will *be the first on list, it is not able to sense, that packet *is cloned and should be copied-on-write, so that it will *change it and subsequent readers will get broken packet. *--ANK (980803) */static inline struct list_head *ptype_head(const struct packet_type *pt){if (pt->type == htons(ETH_P_ALL))// 接收所有类型的包return &ptype_all;elsereturn &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];}/** *dev_add_pack - add packet handler *@pt: packet type declaration * *Add a protocol handler to the networking stack. The passed &packet_type *is linked into kernel lists and may not be freed until it has been *removed from the kernel lists. * *This call does not sleep therefore it can not *guarantee all CPU's that are in middle of receiving packets *will see the new packet type (until the next received packet). */void dev_add_pack(struct packet_type *pt){struct list_head *head = ptype_head(pt);// 得到要挂载的列表spin_lock(&ptype_lock);list_add_rcu(&pt->list, head);// 将pt挂到列表上spin_unlock(&ptype_lock);}EXPORT_SYMBOL(dev_add_pack);对于IP包,定义了如下的类型:
[ net/ipv4/af_inet.c ]
/* 网络数据包的类型(链路层) */static struct packet_type ip_packet_type __read_mostly = {.type = cpu_to_be16(ETH_P_IP),/* Internet Protocol packet*/.func = ip_rcv,};它会在初始化IP模块时注册到全局列表当中去。其中的ip_rcv就是接收数据包的函数。下面就可以看IP层的初始化:
[ net/ipv4/af_inet.c ]
static int __init inet_init(void){struct inet_protosw *q;struct list_head *r;int rc = -EINVAL;BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);// 先分配一块大小为8192字节的空间,将些空间初始化为0if (!sysctl_local_reserved_ports)goto out;rc = proto_register(&tcp_prot, 1);// 注册TCP接口if (rc)goto out_free_reserved_ports;rc = proto_register(&udp_prot, 1);// 注册UDP接口if (rc)goto out_unregister_tcp_proto;rc = proto_register(&raw_prot, 1);// 注册RAW接口if (rc)goto out_unregister_udp_proto;rc = proto_register(&ping_prot, 1);// 注册PING接口if (rc)goto out_unregister_raw_proto;/* *Tell SOCKET that we are alive... */(void)sock_register(&inet_family_ops);// 向socket注册IP协议#ifdef CONFIG_SYSCTLip_static_sysctl_init();// 注册sysctl,和路由相关#endif/* *Add all the base protocols. * inet_protos是一个全局数组,包含所有支持的协议 */if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)// 添加ICMP协议pr_crit("%s: Cannot add ICMP protocol\n", __func__);if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)pr_crit("%s: Cannot add UDP protocol\n", __func__);// 添加UDP协议if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)pr_crit("%s: Cannot add TCP protocol\n", __func__);// 添加TCP协议#ifdef CONFIG_IP_MULTICASTif (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)// 添加IGMP协议pr_crit("%s: Cannot add IGMP protocol\n", __func__);#endif/* Register the socket-side information for inet_create. * inetsw是一个列表,包含所有SOCKET类型 * 初始化所有的SOCKET类型列表 */for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)INIT_LIST_HEAD(r);/* 用inetsw_array初始化inetsw */for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)inet_register_protosw(q);/* *Set the ARP module up */arp_init();/* *Set the IP module up */ip_init();tcp_v4_init();/* Setup TCP slab cache for open requests. */tcp_init();/* Setup UDP memory threshold */udp_init();/* Add UDP-Lite (RFC 3828) */udplite4_register();ping_init();/* *Set the ICMP layer up */if (icmp_init() < 0)panic("Failed to create the ICMP control socket.\n");/* *Initialise the multicast router */#if defined(CONFIG_IP_MROUTE)if (ip_mr_init())pr_crit("%s: Cannot init ipv4 mroute\n", __func__);#endif/* *Initialise per-cpu ipv4 mibs */if (init_ipv4_mibs())pr_crit("%s: Cannot init ipv4 mibs\n", __func__);ipv4_proc_init();ipfrag_init();dev_add_pack(&ip_packet_type);// 注册数据包的类型(ETH_P_IP)rc = 0;out:return rc;out_unregister_raw_proto:proto_unregister(&raw_prot);out_unregister_udp_proto:proto_unregister(&udp_prot);out_unregister_tcp_proto:proto_unregister(&tcp_prot);out_free_reserved_ports:kfree(sysctl_local_reserved_ports);goto out;}fs_initcall(inet_init);// 在系统初始化时调用inet_init
为提高接收和发送的效率,尤其是在大负载下的效率,内核作了特别的处理
[ net/ipv4/af_inet.c ]
static int __init ipv4_offload_init(void){/* * Add offloads */if (udpv4_offload_init() < 0)pr_crit("%s: Cannot add UDP protocol offload\n", __func__);if (tcpv4_offload_init() < 0)pr_crit("%s: Cannot add TCP protocol offload\n", __func__);dev_add_offload(&ip_packet_offload);inet_add_offload(&ipip_offload, IPPROTO_IPIP);return 0;}fs_initcall(ipv4_offload_init);// 在内核初始化时调用ipv4_offload_init所有接收的包都有不同的类型,如IP,802.3,ARP,IPv6等,每种类型都有对应的packet_offload类型。其中IP层处理的数据包的类型是ETH_P_IP,对应结构为ip_packet_offload
[ net/ipv4/af_inet.c ]
/* *IP protocol layer initialiser */static struct packet_offload ip_packet_offload __read_mostly = {.type = cpu_to_be16(ETH_P_IP),.callbacks = {.gso_send_check = inet_gso_send_check,.gso_segment = inet_gso_segment,.gro_receive = inet_gro_receive,.gro_complete = inet_gro_complete,},};内核申明一个全局数组offload_base,通过下面的函数将packet_offload注册到数组中:
[ net/core/dev.c ]
static struct list_head offload_base __read_mostly;/** *dev_add_offload - register offload handlers *@po: protocol offload declaration * *Add protocol offload handlers to the networking stack. The passed *&proto_offload is linked into kernel lists and may not be freed until *it has been removed from the kernel lists. * *This call does not sleep therefore it can not *guarantee all CPU's that are in middle of receiving packets *will see the new offload handlers (until the next received packet). */void dev_add_offload(struct packet_offload *po){struct list_head *head = &offload_base;// 全局列表spin_lock(&offload_lock);list_add_rcu(&po->list, head);spin_unlock(&offload_lock);}EXPORT_SYMBOL(dev_add_offload);上面提到内核为支持不同的协议,如UDP,TCP,申明了全局数组inet_protos,内核用相同的方法处理大负载:
[ net/ipv4/protocol.c ]
const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly;int inet_add_offload(const struct net_offload *prot, unsigned char protocol){ return !cmpxchg((const struct net_offload **)&inet_offloads[protocol], NULL, prot) ? 0 : -1;}EXPORT_SYMBOL(inet_add_offload);这样就把不同协议的net_offload结构注册到了全局数组inet_offloads中了。而对于IP层对应的结构为:
[ net/ipv4/af_inet.c ]
static const struct net_offload ipip_offload = {.callbacks = {.gso_send_check = inet_gso_send_check,.gso_segment= inet_gso_segment,},};
0 0
- IP层实现1--初始化
- LWIP之IP层实现
- IP层实现2--gro
- TCP/IP协议--IP层ip_local_deliver实现
- ip层以上协议的初始化以及套接字相关
- lwip中ip层的实现浅析
- IP层实现3-接收数据
- 第二十二章 TCP/IP层的实现
- ip层和4层的接口实现分析
- ip层和4层的接口实现分析
- tilera netlib应用层协议栈-IP层实现方式
- ip层和4层的接口实现分析
- TCP/IP协议栈初始化(十一)完结篇-完成IP层与网卡的连接
- IP层
- TCP&IP协议: 4层协议栈架构的初始化过程
- Linux netfilter 学习笔记 之八 ip层netfilter的连接跟踪模块初始化
- Linux netfilter 学习笔记 之十一 ip层netfilter的NAT模块初始化以及NAT原理
- Linux netfilter 学习笔记 之十一 ip层netfilter的NAT模块初始化以及NAT原理
- wampserver多站点配置
- 0xCCCCCCCCC的内存不能访问的原因
- css clear:left 的本质
- 移动Web——CSS为Retina屏幕替换图片
- 多重继承的应用《教师类和干部类》
- IP层实现1--初始化
- 39个让你受益的HTML5教程
- CyberDuck是一个很棒的 FTP SFTP图形化工具
- ①-- > java中常用的方法
- 网页游戏常见外挂原理及防御
- C++学习笔记25 类模板的特化
- C语言堆栈入门——堆和栈的区别
- VS2012未能正确加载“Microsoft.VisualStudio.Editor.Implementation.EditorPackage”包
- 驾校一点通电脑版2015 v1.5 最新版