学习linux协议栈关键数据结构
来源:互联网 发布:python与量化投资 编辑:程序博客网 时间:2024/05/17 22:22
0x01 缘由
从大学开始算法题开始,强调数据结构的重要性,良好的数据结构设计会使程序设计更加合理和健壮。
0x02 关键结构
先摘录一个图,了解各数据结构件的关系。--摘录于《Linux TCP IP 协议栈分析.pdf》
1.struct sk_buff - socket buffer
struct sk_buff { /* 这两个结构必须放在此结构的前面,主要方便数据的强制转换*/ struct sk_buff *next; //双向链表结构,指向下一个sk_buff struct sk_buff *prev; //指向前一个sk_buff结构 struct sock *sk; //这个指针指向一个套接字sock数据结构。当数据在本地产生或者本地进程接受时,需要这个指针;里面的数据会有tcp/udp和用户态程序使用。如果是转发此指针为NULL。后续详解。 ktime_t tstamp; //包到达的时间戳 struct net_device *dev; //网络设备,哪个网卡、虚拟网卡,后续结构详解。 unsigned long _skb_dst; /* * 这是控制缓冲区。 每层都可以自由使用 请把您的私有变量放在那里。 如果你想让他们跨层,你必须先做一个skb_clone()。 这是由谁拥有skb排队的ATM拥有。 */ char cb[48]; unsigned int len, //数据包的全部数据长度,包括data指向的数据和end后面的分片的数据的总长 data_len; //本分片所包含的数据长度 __u16 mac_len, //mac包头长度 hdr_len; //硬件头部长度 union { __wsum csum; struct { __u16 csum_start; __u16 csum_offset; }; }; //校验和 __u32 priority; //QoS等级 kmemcheck_bitfield_begin(flags1); __u8 local_df:1, cloned:1, ip_summed:2, nohdr:1, nfctinfo:3; __u8 pkt_type:3, // 根据L2层帧的目的地址进行类型划分。 fclone:2, //sk_buff克隆状态 ipvs_property:1, //IP虚拟服务器属性 peeked:1, //这个数据包已经被看到了,所以已经做了统计,不要再做了 nf_trace:1; //netfilter 包记录标识 __be16 protocol:16; //从L2层设备驱动看使用在下一个较高层的协议。 kmemcheck_bitfield_end(flags1); void (*destructor)(struct sk_buff *skb); //析构函数#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct nf_conntrack *nfct; skb与连接的关系 struct sk_buff *nfct_reasm; netfilter conntrack重组指针#endif#ifdef CONFIG_BRIDGE_NETFILTER struct nf_bridge_info *nf_bridge;//桥接帧数据#endif int iif; //到达的设备的索引,网卡索引编号#ifdef CONFIG_NET_SCHED __u16 tc_index; //流量控制索引#ifdef CONFIG_NET_CLS_ACT __u16 tc_verd; //流量控制决定#endif#endif kmemcheck_bitfield_begin(flags2); __u16 queue_mapping:16; //多队列网卡设备的映射关系#ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; //路由类型,来自链路层;#endif kmemcheck_bitfield_end(flags2); /* 0/14 bit hole */#ifdef CONFIG_NET_DMA dma_cookie_t dma_cookie; //被DMA相关函数完成的相关操作cookeie#endif#ifdef CONFIG_NETWORK_SECMARK __u32 secmark; //安全相关标记#endif __u32 mark; //通用标记 __u16 vlan_tci; //vlan标签控制信息 sk_buff_data_t transport_header; //传输层头 sk_buff_data_t network_header; //网络层头 sk_buff_data_t mac_header; //以太网层头 /* These elements must be at the end, see alloc_skb() for details. */ sk_buff_data_t tail; sk_buff_data_t end; unsigned char *head, *data; //head和end指向的是数据区的开端和尾、,data和tail指向的是实际数据的开头和结尾 unsigned int truesize; //此缓冲区总大小,包括sk_buff。sk_buff只不过是个指针的集合,他所指的才是真正的数据区,所以是两部分。 atomic_t users; //引用计数,使用这个sk_buff的使用者的数目,可能有多个函数要使用同一个sk_buff所以防止提前释放掉,设置此计数};
2.struct sock - scokets的网络层描述
struct sock { sock_common __sk_common; //套接口在网络层的最小表示#define sk_node __sk_common.skc_node#define sk_nulls_node __sk_common.skc_nulls_node#define sk_refcnt __sk_common.skc_refcnt#define sk_copy_start __sk_common.skc_hash#define sk_hash __sk_common.skc_hash#define sk_family __sk_common.skc_family#define sk_state __sk_common.skc_state#define sk_reuse __sk_common.skc_reuse#define sk_bound_dev_if __sk_common.skc_bound_dev_if#define sk_bind_node __sk_common.skc_bind_node#define sk_prot __sk_common.skc_prot#define sk_net __sk_common.skc_net kmemcheck_bitfield_begin(flags); unsigned int sk_shutdown : 2, //是一组标志位,SEND_SHUTDOWN and/or RCV_SHUTDOWN。 sk_no_check : 2, //不对包进行检查标识 sk_userlocks : 4, // %SO_SNDBUF 和 %SO_RCVBUF 缓存设置锁 sk_protocol : 8, sk_type : 16; kmemcheck_bitfield_end(flags); int sk_rcvbuf; //接收缓存区大小 socket_lock_t sk_lock; //同步锁 /* * The backlog queue is special, it is always used with * the per-socket spinlock held and requires low latency * access. Therefore we special case it's implementation. */ struct { struct sk_buff *head; struct sk_buff *tail; } sk_backlog; //总是被自旋锁持有 wait_queue_head_t *sk_sleep; //在队列中等待的socket struct dst_entry *sk_dst_cache; //目的地址的缓存#ifdef CONFIG_XFRM struct xfrm_policy *sk_policy[2];#endif rwlock_t sk_dst_lock; //目的缓存读写锁 atomic_t sk_rmem_alloc; //表示接收队列已提交的字节数。 atomic_t sk_wmem_alloc; //表示发送队列已提交的字节数。 atomic_t sk_omem_alloc; //用“O”或“other”做选项 int sk_sndbuf; struct sk_buff_head sk_receive_queue; //表示接收的数据包的队列。 struct sk_buff_head sk_write_queue; //表示发送的数据包的队列。#ifdef CONFIG_NET_DMA struct sk_buff_head sk_async_wait_queue; //DMA复制数据包#endif int sk_wmem_queued; //维持的队列大小 int sk_forward_alloc; //转发空间分配 gfp_t sk_allocation; //分配空间的模式 int sk_route_caps; //路由容量 int sk_gso_type; //GSO type (e.g. %SKB_GSO_TCPV4) unsigned int sk_gso_max_size;//最大的GSO段大小 int sk_rcvlowat; // unsigned long sk_flags; //%SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, %SO_OOBINLINE 标识设定, %SO_TIMESTAMPING 标识设定 unsigned long sk_lingertime; //SO_LINGER设定 struct sk_buff_head sk_error_queue; //非常少用 struct proto *sk_prot_creator; //原始socket创建器 rwlock_t sk_callback_lock; // int sk_err, sk_err_soft; atomic_t sk_drops; //raw/udp drop计数 unsigned short sk_ack_backlog; //当前监听队列数 unsigned short sk_max_ack_backlog; //在listen()中设置的数目 __u32 sk_priority; //优先级 struct ucred sk_peercred; long sk_rcvtimeo; long sk_sndtimeo; struct sk_filter *sk_filter; //socket 过滤结构 void *sk_protinfo; //私有区域 struct timer_list sk_timer; //socket清理定时器 ktime_t sk_stamp; //最后一包接收时间 struct socket *sk_socket; //IO信号 void *sk_user_data; //RPC层私有数据 struct page *sk_sndmsg_page; //sndmsg缓存 struct sk_buff *sk_send_head; //转发数据头 __u32 sk_sndmsg_off; // sndmsg缓存偏移 int sk_write_pending;#ifdef CONFIG_SECURITY void *sk_security;#endif __u32 sk_mark; /* XXX 4 bytes hole on 64 bit */ void (*sk_state_change)(struct sock *sk); void (*sk_data_ready)(struct sock *sk, int bytes); void (*sk_write_space)(struct sock *sk); void (*sk_error_report)(struct sock *sk); int (*sk_backlog_rcv)(struct sock *sk, struct sk_buff *skb); void (*sk_destruct)(struct sock *sk);};struct sock_common { unsigned short skc_family; /*地址族*/ volatile unsigned char skc_state; /*连接状态*/ unsigned char skc_reuse; /*SO_REUSEADDR设置*/ int skc_bound_dev_if; struct hlist_node skc_node; struct hlist_node skc_bind_node; /*哈希表相关*/ atomic_t skc_refcnt; /*引用计数*/};
3.struct net_device
struct net_device{ char name[IFNAMSIZ]; //网络设备名称,如eth0 struct hlist_node name_hlist; //这个字段用于构建网络设备名的哈希散列表,而struct net中的name_hlist就指向每个哈希散列表的链表头; char *ifalias; //网络设备的别名; /*网络设备内存映射时在主机中的内存区域*/ unsigned long mem_end; /* 共享内存结束 */ unsigned long mem_start; /* 共享内存开始 */ unsigned long base_addr; /* 网络设备I/O基地址 */ unsigned int irq; /* 设备终端号*/ unsigned char if_port; /* 传输介质,如双绞线、同轴电缆等,在多端口设备中指定使用哪个端口*/ unsigned char dma; /* DMA通道*/ unsigned long state; /* 网络设备物理上的工作状态 */ struct list_head dev_list; //网络设备链表 struct list_head napi_list;//支持NAPI传输的网络设备链表 /* Net device features */ unsigned long features; //设备硬件功能特性#define NETIF_F_SG 1 /* Scatter/gather IO. */#define NETIF_F_IP_CSUM 2 /* Can checksum TCP/UDP over IPv4. */#define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */#define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */#define NETIF_F_IPV6_CSUM 16 /* Can checksum TCP/UDP over IPV6 */#define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */#define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */#define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */#define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */#define NETIF_F_GSO 2048 /* Enable software GSO. */#define NETIF_F_LLTX 4096 /* LockLess TX - deprecated. Please */ /* do not use LLTX in new drivers */#define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */#define NETIF_F_GRO 16384 /* Generic receive offload */#define NETIF_F_LRO 32768 /* large receive offload *//* the GSO_MASK reserves bits 16 through 23 */#define NETIF_F_FCOE_CRC (1 << 24) /* FCoE CRC32 */#define NETIF_F_SCTP_CSUM (1 << 25) /* SCTP checksum offload */#define NETIF_F_FCOE_MTU (1 << 26) /* Supports max FCoE MTU, 2158 bytes*/ /* Segmentation offload features */#define NETIF_F_GSO_SHIFT 16#define NETIF_F_GSO_MASK 0x00ff0000#define NETIF_F_TSO (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT)#define NETIF_F_UFO (SKB_GSO_UDP << NETIF_F_GSO_SHIFT)#define NETIF_F_GSO_ROBUST (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT)#define NETIF_F_TSO_ECN (SKB_GSO_TCP_ECN << NETIF_F_GSO_SHIFT)#define NETIF_F_TSO6 (SKB_GSO_TCPV6 << NETIF_F_GSO_SHIFT)#define NETIF_F_FSO (SKB_GSO_FCOE << NETIF_F_GSO_SHIFT) /* List of features with software fallbacks. */#define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6)#define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)#define NETIF_F_V4_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM)#define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM)#define NETIF_F_ALL_CSUM (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM) /* * If one device supports one of these features, then enable them * for all in netdev_increment_features. */#define NETIF_F_ONE_FOR_ALL (NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \ NETIF_F_SG | NETIF_F_HIGHDMA | \ NETIF_F_FRAGLIST) /* Interface index. Unique device identifier */ int ifindex;//标识网络设备的唯一索引号 int iflink;//用于虚拟网络设备 struct net_device_stats stats; //统计信息#ifdef CONFIG_WIRELESS_EXT /* List of functions to handle Wireless Extensions (instead of ioctl). * See <net/iw_handler.h> for details. Jean II */ const struct iw_handler_def * wireless_handlers; /* Instance data managed by the core of Wireless Extensions. */ struct iw_public_data * wireless_data;#endif /* Management operations */ const struct net_device_ops *netdev_ops;//网络设备驱动程序需要实现的一组操作函数 const struct ethtool_ops *ethtool_ops;//支持ethtool功能的一组操作函数 /* Hardware header description */ const struct header_ops *header_ops;//数据链路层协议头相关的一组操作函数 unsigned int flags; /* 它们的可能取值定义在linux-2.6.38.8/include/linux/if.h文件中。*/ unsigned short gflags; unsigned short priv_flags; /* Like 'flags' but invisible to userspace. */ unsigned short padded; /* 分配net_device结构体及其私有数据时为对齐所需的填充位数目*/ unsigned char operstate; /*//RFC 2863操作状态 */ unsigned char link_mode; /* 映射到RFC2863兼容状态的策略 */ unsigned mtu; /* MTU */ unsigned short type; /* 网络设备硬件类型,如10Mbps以太网ARPHRD_ETHER */ unsigned short hard_header_len; /* 硬件数据帧头的长度,以太网为14字节 */ /* extra head- and tailroom the hardware may need, but not in all cases * can this be guaranteed, especially tailroom. Some cases also use * LL_MAX_HEADER instead to allocate the skb. */ unsigned short needed_headroom; //分配套接字缓冲区时预留空间的长度 unsigned short needed_tailroom; struct net_device *master; /* 分组状态 */ /* 硬件(如MAC)地址长度以及设备的硬件地址 */ unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */ unsigned char addr_len; /* hardware address length */ unsigned short dev_id; /* for shared network cards */ struct netdev_hw_addr_list uc; /* 网络设备硬件地址组成的链表 */ int uc_promisc; //混杂模式时的单播地址个数 spinlock_t addr_list_lock;//防止单播地址链表和组播地址链表被并发访问的自旋锁 struct dev_addr_list *mc_list; /* Multicast mac addresses */ int mc_count; /* Number of installed mcasts */ unsigned int promiscuity; //混杂模式的计数器 unsigned int allmulti;//监听所有组播地址 /* 网络层协议特定数据 */#ifdef CONFIG_NET_DSA void *dsa_ptr; /* dsa specific data */#endif void *atalk_ptr; /* AppleTalk link */ void *ip_ptr; /* IPv4 specific data */ void *dn_ptr; /* DECnet specific data */ void *ip6_ptr; /* IPv6 specific data */ void *ec_ptr; /* Econet specific data */ void *ax25_ptr; /* AX.25 specific data */ struct wireless_dev *ieee80211_ptr; /* IEEE 802.11 specific data, assign before registering *//* * Cache line mostly used on receive path (including eth_type_trans()) */ unsigned long last_rx; /* 最后接收数据包的时间 */ /* Interface address info used in eth_type_trans() */ unsigned char *dev_addr; /* hw address, (before bcast because most packets are unicast) */ struct netdev_hw_addr_list dev_addrs; /* //网络设备硬件地址组成的链表 */ unsigned char broadcast[MAX_ADDR_LEN]; /* //广播地址 */ struct netdev_queue rx_queue; //接收队列,RPS(Receive Packet Steering)特性 struct netdev_queue *_tx ____cacheline_aligned_in_smp; /* Number of TX queues allocated at alloc_netdev_mq() time */ unsigned int num_tx_queues; //发送队列 /* Number of TX queues currently active in device */ unsigned int real_num_tx_queues; /* root qdisc from userspace point of view */ struct Qdisc *qdisc; unsigned long tx_queue_len; /* Max frames per queue allowed */ spinlock_t tx_global_lock;/* * One part is mostly used on xmit path (device) */ /* These may be needed for future network-power-down code. */ /* * trans_start here is expensive for high speed devices on SMP, * please use netdev_queue->trans_start instead. */ unsigned long trans_start; /* //最近传送数据包的时间 */ int watchdog_timeo; //发生传输超时时,设置的标志 struct timer_list watchdog_timer;//网络层设置的传送数据包超时的时钟 /* Number of references to this device */ atomic_t refcnt ____cacheline_aligned_in_smp; /* delayed register/unregister */ struct list_head todo_list;//延迟注册/注销的网络设备链表 /* device index hash chain */ struct hlist_node index_hlist;//以索引号为关键字的网络设备哈希链表 struct net_device *link_watch_next; /* register/unregister state machine */ enum { NETREG_UNINITIALIZED=0, NETREG_REGISTERED, /* completed register_netdevice */ NETREG_UNREGISTERING, /* called unregister_netdevice */ NETREG_UNREGISTERED, /* completed unregister todo */ NETREG_RELEASED, /* called free_netdev */ NETREG_DUMMY, /* dummy device for NAPI poll */ } reg_state; //设备注册/注销状态机 /* Called from unregister, can be used to call free_netdev */ void (*destructor)(struct net_device *dev);#ifdef CONFIG_NETPOLL struct netpoll_info *npinfo;//NETPOLL相关信息 #endif#ifdef CONFIG_NET_NS /* Network namespace this network device is inside */ struct net *nd_net; //网络命名空间#endif /* mid-layer private */ void *ml_priv; //中间层的私有数据 /* bridge stuff */ struct net_bridge_port *br_port; //桥接模式 /* macvlan */ struct macvlan_port *macvlan_port; /* GARP */ struct garp_port *garp_port; /* class/net/name entry */ struct device dev; //在sysfs文件系统中输出网络设备信息 /* space for optional statistics and wireless sysfs groups */ const struct attribute_group *sysfs_groups[3]; /* rtnetlink link ops */ const struct rtnl_link_ops *rtnl_link_ops; //rtnetlink操作函数 /* VLAN feature mask */ unsigned long vlan_features; //虚拟局域网相关 /* for setting kernel sock attribute on TCP connection setup */#define GSO_MAX_SIZE 65536 unsigned int gso_max_size; //GSO最大值 #ifdef CONFIG_DCB /* Data Center Bridging netlink ops */ struct dcbnl_rtnl_ops *dcbnl_ops; //DCB操作函数 #endif#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) /* max exchange id for FCoE LRO by ddp */ unsigned int fcoe_ddp_xid;#endif};
参考:http://blog.sina.com.cn/s/blog_636a55070101qfse.html
0x03 总结
在学习过程中,发现相关代码都有良好的注释。仅仅需要做下翻译,然后理解其目的。
阅读全文
0 0
- 学习linux协议栈关键数据结构
- Linux TCP/IP 协议栈的关键数据结构Socket Buffer
- Linux TCP/IP 协议栈的关键数据结构Socket Buffer
- Linux TCP/IP 协议栈的关键数据结构Socket
- (转贴)Linux TCP/IP 协议栈的关键数据结构Socket Buffer(sk_buff )
- Linux TCP/IP 协议栈的关键数据结构Socket Buffer(sk_buff )
- [转]Linux TCP/IP 协议栈的关键数据结构Socket Buffer(sk_buff )
- Linux TCP/IP协议栈的关键数据结构Socket Buffer(sk_buff )
- Linux TCP/IP 协议栈的关键数据结构Socket Buffer(sk_buff )
- Linux TCP/IP 协议栈的关键数据结构Socket Buffer(sk_buff )
- Linux TCP/IP 协议栈的关键数据结构Socket Buffer(sk_buff )
- Linux TCP/IP 协议栈的关键数据结构Socket Buffer(sk_buff )
- LINUX TCP/IP 协议栈的关键数据结构SOCKET BUFFER(SK_BUFF )
- Linux TCP/IP 协议栈的关键数据结构Socket Buffer(sk_buff )
- LINUX TCP/IP 协议栈的关键数据结构SOCKET BUFFER(SK_BUFF )
- Linux TCP/IP 协议栈的关键数据结构Socket Buffer(sk_buff )
- [转]Linux TCP/IP 协议栈的关键数据结构Socket Buffer(sk_buff )
- Linux TCP/IP 协议栈的关键数据结构Socket Buffer(sk_buff )
- C++之线程池的原理及创建
- 分享七天轻松拿到公众号原创+赞赏
- Android 仿微信语音聊天音量大小显示控件
- Ajax的Json操作
- html css3不拉伸图片显示效果,类似淘宝的
- 学习linux协议栈关键数据结构
- Project Euler 11-15题
- 实践中学帆软(一)
- Vim常用命令
- 【Android】permissions4m 库解决系统运行时权限使用记录
- JAVA编程思想学习 --- 第八章 (对象的容纳)
- 搭建vsftpd服务器及简单配置
- 引用表格数据
- HDOJ HDU 1109 Run Away