linux网络协议栈-socket

来源:互联网 发布:java模拟器安卓版5.1 编辑:程序博客网 时间:2024/05/22 03:10

1、TCP/IP参考模型
为了实现各种网络的互连,国际标准化组织(ISO)制定了开放式系统互连(OSI)参考模型。尽管OSI的体系结构从理论上讲是比较完整的,但实际上,完全符合OSI各层协议的商用产品却很少进入市场。而使用TCP/IP 协议的产品却大量涌入市场,几乎所有的工作站都配有TCP/IP协议,使得TCP/IP 成为计算机网络的实际的国际标准。
这里写图片描述

2、套接字(socket)
socket是操作系统的重要组成部分之一,它是网络应用程序的基础。从层次上来说,它位于应用层,是操作系统为应用程序员提供的API,通过它,应用程序可以访问传输层协议。
1、socket 位于传输层协议之上,屏蔽了不同网络协议之间的差异;
2、socket是网络编程的入口,它提供了大量的系统调用,构成了网络程序的主体;
3、在Linux系统中,socket属于文件系统的一部分,网络通信可以被看作是对文件的读取,使得我们对网络的控制和对文件的控制一样方便。
这里写图片描述

2.1、套接字地址
在传输层上,通信端点可由Internet上3个参数描述:所用的协议、IP地址和端口号。这些内容由sockaddr描述:

//usr/include/sys/socket.htypedef unsigned short    sa_family_t;//通用socket地址struct sockaddr {    sa_family_t    sa_family;    /* address family, AF_xxx,协议簇*/    char        sa_data[14];    /* 14 bytes of protocol address    */};//usr/include/netinet/in.h//INET地址簇的socket地址struct in_addr {                    __u32 s_addr;};struct sockaddr_in   {   sa_family_t            sin_family;      /* Address family: AF_INET */   unsigned short int     sin_port;        /* Port number,端口*/   struct in_addr         sin_addr;        /* Internet address,IP地址*/   /* Pad to size of 'struct sockaddr' . */   unsigned char sin_zero[sizeof (struct sockaddr) -                          sizeof (sa_family_t) -                          sizeof (uint16_t) -                          sizeof (struct in_addr)];};

Linux 支持的套接字地址族:

套接字地址族 描述 UNIX UNIX 域套接字 INET 通过 TCP/IP 协议支持的 Internet 地址族 AX25 Amater radio X25 APPLETALK Appletalk DDP IPX Novell IPX X25 X25

Linux 所支持的BSD套接字类型:

BSD 套接字类型 描述 流(stream) 这种套接字提供了可靠的双向顺序数据流,可保证数据不会在传输过程中丢失、破坏或重复出现。流套接字通过 INET 地址族的 TCP 协议实现。 数据报(datagram) 这种套接字也提供双向的数据传输,但是并不对数据的传输提供担保,也就是说,数据可能会以错误的顺序传递,甚至丢失或破坏。这种类型的套接字通过 INET 地址族的 UDP 协议实现。 原始(raw) 利用这种类型的套接字,进程可以直接访问底层协议(因此称为原始)。例如,可在某个以太网设备上打开原始套接字,然后获取原始的 IP 数据传输信息。 可靠发送的消息 和数据报套接字类似,但保证数据被正确传输到目的端。 顺序数据包 和流套接字类似,但数据包大小是固定的。 数据包(packet) 这并不是标准的 BSD 套接字类型,它是 Linux 专有的 BSD 套接字扩展,可允许进程直接在设备级访问数据包。

2.2、套接字操作
套接字(更确切的说是BSD套接字)为应用程序提供了基本的API,这些API是编写网络应用程序的基础。

这里写图片描述

3、套接字的实现
套接字最先是在UNIX的BSD版本实现的,所以也叫做BSD套接字,它隐藏了各个协议之间的差异,并向上提供统一的接口。Linux中实现套接字的基本结构:
这里写图片描述

3.1、BSD套接字
3.1.1、核心数据结构
为了实现BSD套接字,内核提供一个重要的数据结构struct socket,它的定义如下:

 //BSD套接字(include/linux/net.h)struct socket {    socket_state        state;  //套接字状态    unsigned long        flags;    struct proto_ops    *ops; //操作函数集    struct fasync_struct    *fasync_list;    struct file        *file;//每个BSD套接字都有一个inode结点,通过文件对象与其关联起来      struct sock        *sk; //socket内部结构,与具体的协议簇(比如PF_INET)相关    wait_queue_head_t    wait;    short            type;    //套接字类型:如SOCK_STREAM, SOCK_DGRAM, SOCK_RAW, SOCK_RDM, SOCK_SEQPACKET, and SOCK_PACKET    unsigned char        passcred; };//BSD套接字操作函数集struct proto_ops {    int        family;    struct module    *owner;    int        (*release)   (struct socket *sock);    int        (*bind)         (struct socket *sock,                      struct sockaddr *myaddr,                      int sockaddr_len);    int        (*connect)   (struct socket *sock,                      struct sockaddr *vaddr,                      int sockaddr_len, int flags);    int        (*socketpair)(struct socket *sock1,                      struct socket *sock2);    int        (*accept)    (struct socket *sock,                      struct socket *newsock, int flags);    int        (*getname)   (struct socket *sock,                      struct sockaddr *addr,                      int *sockaddr_len, int peer);    unsigned int    (*poll)         (struct file *file, struct socket *sock,                      struct poll_table_struct *wait);    int        (*ioctl)     (struct socket *sock, unsigned int cmd,                      unsigned long arg);    int        (*listen)    (struct socket *sock, int len);    int        (*shutdown)  (struct socket *sock, int flags);    int        (*setsockopt)(struct socket *sock, int level,                      int optname, char __user *optval, int optlen);    int        (*getsockopt)(struct socket *sock, int level,                      int optname, char __user *optval, int __user *optlen);    int        (*sendmsg)   (struct kiocb *iocb, struct socket *sock,                      struct msghdr *m, size_t total_len);    int        (*recvmsg)   (struct kiocb *iocb, struct socket *sock,                      struct msghdr *m, size_t total_len,                      int flags);    int        (*mmap)         (struct file *file, struct socket *sock,                      struct vm_area_struct * vma);    ssize_t        (*sendpage)  (struct socket *sock, struct page *page,                      int offset, size_t size, int flags);};//BSD套接字状态typedef enum {    SS_FREE = 0,            /* not allocated        */    SS_UNCONNECTED,            /* unconnected to any socket    */    SS_CONNECTING,            /* in process of connecting    */    SS_CONNECTED,            /* connected to socket        */    SS_DISCONNECTING        /* in process of disconnecting    */} socket_state; 

3.1.2、BSD套接字初始化

//net/socket.c//BSD套接字的初始化void __init sock_init(void){    int i;    /*     *    Initialize all address (protocol) families.      */    for (i = 0; i < NPROTO; i++)         net_families[i] = NULL; //协议簇数组初始化    /*     *    Initialize sock SLAB cache.     */     //分配sock缓存    sk_init();#ifdef SLAB_SKB    /*     *    Initialize skbuff SLAB cache      */    skb_init();#endif    /*     *    Initialize the protocols module.      */    init_inodecache();    //注册sockfs文件系统    register_filesystem(&sock_fs_type);    //安装sockfs    sock_mnt = kern_mount(&sock_fs_type);    /* The real protocol initialization is performed when     *  do_initcalls is run.       */#ifdef CONFIG_NETFILTER    netfilter_init();#endif}//net/socket.c//sockfs文件系统的安装点static struct vfsmount *sock_mnt;//sockfs文件系统类型static struct file_system_type sock_fs_type = {    .name =        "sockfs",    .get_sb =    sockfs_get_sb,    .kill_sb =    kill_anon_super,}; //地址簇及协议信息static struct net_proto_family *net_families[NPROTO]; 

sock_init在系统初始化的被调用:

这里写图片描述

3.1.3、BSD套接字的系统调用
实际上,Linux内核只提供了一个与套接字相关的系统调用,即sys_socketcall,应用程序的所有套接字调用都会映射到这个系统调用上。

//BSD套接字调用入口(net/socket.c)asmlinkage long sys_socketcall(int call, unsigned long __user *args){    unsigned long a[6];    unsigned long a0,a1;    int err;    if(call<1||call>SYS_RECVMSG)        return -EINVAL;    /* copy_from_user should be SMP safe. */    if (copy_from_user(a, args, nargs[call]))//从用户区拷贝参数        return -EFAULT;    a0=a[0];    a1=a[1];    switch(call)  //调用相应的函数    {        case SYS_SOCKET:            err = sys_socket(a0,a1,a[2]);            break;        case SYS_BIND:            err = sys_bind(a0,(struct sockaddr __user *)a1, a[2]);            break;        case SYS_CONNECT:            err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);            break;        case SYS_LISTEN:            err = sys_listen(a0,a1);            break;        case SYS_ACCEPT:            err = sys_accept(a0,(struct sockaddr __user *)a1, (int __user *)a[2]);            break;        case SYS_GETSOCKNAME:            err = sys_getsockname(a0,(struct sockaddr __user *)a1, (int __user *)a[2]);            break;        case SYS_GETPEERNAME:            err = sys_getpeername(a0, (struct sockaddr __user *)a1, (int __user *)a[2]);            break;        case SYS_SOCKETPAIR:            err = sys_socketpair(a0,a1, a[2], (int __user *)a[3]);            break;        case SYS_SEND:            err = sys_send(a0, (void __user *)a1, a[2], a[3]);            break;        case SYS_SENDTO:            err = sys_sendto(a0,(void __user *)a1, a[2], a[3],                     (struct sockaddr __user *)a[4], a[5]);            break;        case SYS_RECV:            err = sys_recv(a0, (void __user *)a1, a[2], a[3]);            break;        case SYS_RECVFROM:            err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],                       (struct sockaddr __user *)a[4], (int __user *)a[5]);            break;        case SYS_SHUTDOWN:            err = sys_shutdown(a0,a1);            break;        case SYS_SETSOCKOPT:            err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);            break;        case SYS_GETSOCKOPT:            err = sys_getsockopt(a0, a1, a[2], (char __user *)a[3], (int __user *)a[4]);            break;        case SYS_SENDMSG:            err = sys_sendmsg(a0, (struct msghdr __user *) a1, a[2]);            break;        case SYS_RECVMSG:            err = sys_recvmsg(a0, (struct msghdr __user *) a1, a[2]);            break;        default:            err = -EINVAL;            break;    }    return err;}//include/asm/unistd.h#define __NR_socketcall        102  //系统调用号

下面来看一下sys_socket的实现:

//net/socket.c/*创建socket**首先建立一个socket数据结构,然后将其“映射”到一个已打开的文件.*/asmlinkage long sys_socket(int family, int type, int protocol){    int retval;    struct socket *sock;    //创建socket    retval = sock_create(family, type, protocol, &sock);    if (retval < 0)        goto out;    //将socket映射到文件描述符    retval = sock_map_fd(sock);    if (retval < 0)        goto out_release;out:    /* It may be already another descriptor 8) Not kernel problem. */    return retval;out_release:    sock_release(sock);    return retval;}int sock_create(int family, int type, int protocol, struct socket **res){    return __sock_create(family, type, protocol, res, 0);}static int __sock_create(int family, int type, int protocol, struct socket **res, int kern){    int i;    int err;    struct socket *sock;    /*     *    Check protocol is in range     */     //检查协议是否可用    if (family < 0 || family >= NPROTO)        return -EAFNOSUPPORT;    if (type < 0 || type >= SOCK_MAX)        return -EINVAL;    /* Compatibility.       This uglymoron is moved from INET layer to here to avoid       deadlock in module load.     */    if (family == PF_INET && type == SOCK_PACKET) {        static int warned;         if (!warned) {            warned = 1;            printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm);        }        family = PF_PACKET;    }    err = security_socket_create(family, type, protocol, kern);    if (err)        return err;#if defined(CONFIG_KMOD)    /* Attempt to load a protocol module if the find failed.      *      * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user      * requested real, full-featured networking support upon configuration.     * Otherwise module support will break!     */    if (net_families[family]==NULL)    {        request_module("net-pf-%d",family);    }#endif    net_family_read_lock();    if (net_families[family] == NULL) {        i = -EAFNOSUPPORT;        goto out;    }/* *    Allocate the socket and allow the family to set things up. if *    the protocol is 0, the family is instructed to select an appropriate *    default. */    //从sockfs分配一个inode,并为之分配一个套接字结构    if (!(sock = sock_alloc()))     {        printk(KERN_WARNING "socket: no more sockets\n");        i = -ENFILE;        /* Not exactly a match, but its the                       closest posix thing */        goto out;    }    //设置类型    sock->type  = type;    /*     * We will call the ->create function, that possibly is in a loadable     * module, so we have to bump that loadable module refcnt first.     */    i = -EAFNOSUPPORT;    if (!try_module_get(net_families[family]->owner))        goto out_release;    //调用具体协议的create函数    if ((i = net_families[family]->create(sock, protocol)) < 0)        goto out_module_put;    /*     * Now to bump the refcnt of the [loadable] module that owns this     * socket at sock_release time we decrement its refcnt.     */    if (!try_module_get(sock->ops->owner)) {        sock->ops = NULL;        goto out_module_put;    }    /*     * Now that we're done with the ->create function, the [loadable]     * module can have its refcnt decremented     */    module_put(net_families[family]->owner);    *res = sock;    security_socket_post_create(sock, family, type, protocol, kern);out:    net_family_read_unlock();    return i;out_module_put:    module_put(net_families[family]->owner);out_release:    sock_release(sock);    goto out;}///////////////////////////////////////////////////////////int sock_map_fd(struct socket *sock){    int fd;    struct qstr this;    char name[32];    /*     *    Find a file descriptor suitable for return to the user.      */    //分配一个没有使用的描述符    fd = get_unused_fd();    if (fd >= 0) {        struct file *file = get_empty_filp();        if (!file) {            put_unused_fd(fd);            fd = -ENFILE;            goto out;        }        sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);        this.name = name;        this.len = strlen(name);        this.hash = SOCK_INODE(sock)->i_ino;        //从sockfs文件系统中分配一个目录项对象        file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);        if (!file->f_dentry) {            put_filp(file);            put_unused_fd(fd);            fd = -ENOMEM;            goto out;        }        file->f_dentry->d_op = &sockfs_dentry_operations;        //将目录项对象与sock的索引节点关联起来        d_add(file->f_dentry, SOCK_INODE(sock));        file->f_vfsmnt = mntget(sock_mnt);        file->f_mapping = file->f_dentry->d_inode->i_mapping;        //设置sock对应的文件对象        sock->file = file;        //设置文件对象的操作函数        file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;        file->f_mode = FMODE_READ | FMODE_WRITE;        file->f_flags = O_RDWR;        file->f_pos = 0;        fd_install(fd, file);    }out:    return fd;}

3.2、INET套接字
INET套接字就是支持 Internet 地址族的套接字,它位于TCP协议之上, BSD套接字之下,如下:
这里写图片描述
3.2.1、数据结构

//include/net/sock.h//与特定协议相关的socketstruct sock {    /*     * Now struct tcp_tw_bucket also uses sock_common, so please just     * don't add nothing before this first member (__sk_common) --acme     */    struct sock_common    __sk_common;#define sk_family        __sk_common.skc_family#define sk_state        __sk_common.skc_state#define sk_reuse        __sk_common.skc_reuse#define sk_bound_dev_if        __sk_common.skc_bound_dev_if#define sk_node            __sk_common.skc_node#define sk_bind_node        __sk_common.skc_bind_node#define sk_refcnt        __sk_common.skc_refcnt    volatile unsigned char    sk_zapped;    unsigned char        sk_shutdown;    unsigned char        sk_use_write_queue;    unsigned char        sk_userlocks;    socket_lock_t        sk_lock;    int            sk_rcvbuf;    wait_queue_head_t    *sk_sleep;    struct dst_entry    *sk_dst_cache;    rwlock_t        sk_dst_lock;    struct xfrm_policy    *sk_policy[2];    atomic_t        sk_rmem_alloc;    struct sk_buff_head    sk_receive_queue;    atomic_t        sk_wmem_alloc;    struct sk_buff_head    sk_write_queue;    atomic_t        sk_omem_alloc;    int            sk_wmem_queued;    int            sk_forward_alloc;    unsigned int        sk_allocation;    int            sk_sndbuf;    unsigned long         sk_flags;    char             sk_no_check;    unsigned char        sk_debug;    unsigned char        sk_rcvtstamp;    unsigned char        sk_no_largesend;    int            sk_route_caps;    unsigned long            sk_lingertime;    int            sk_hashent;    /*     * The backlog queue is special, it is always used with     * the per-socket spinlock held and requires low latency     * access. Therefore we special case it's implementation.     */    struct {        struct sk_buff *head;        struct sk_buff *tail;    } sk_backlog;    rwlock_t        sk_callback_lock;    struct sk_buff_head    sk_error_queue;    struct proto        *sk_prot;    int            sk_err,                sk_err_soft;    unsigned short        sk_ack_backlog;    unsigned short        sk_max_ack_backlog;    __u32            sk_priority;    unsigned short        sk_type;    unsigned char        sk_localroute;    unsigned char        sk_protocol;    struct ucred        sk_peercred;    int            sk_rcvlowat;    long            sk_rcvtimeo;    long            sk_sndtimeo;    struct sk_filter          *sk_filter;    void            *sk_protinfo;    kmem_cache_t        *sk_slab;    struct timer_list    sk_timer;    struct timeval        sk_stamp;    struct socket        *sk_socket;    void            *sk_user_data;    struct module        *sk_owner;    struct page        *sk_sndmsg_page;    __u32            sk_sndmsg_off;    struct sk_buff        *sk_send_head;    int            sk_write_pending;    void            *sk_security;    __u8            sk_queue_shrunk;    /* three bytes hole, try to pack */    void            (*sk_state_change)(struct sock *sk);    void            (*sk_data_ready)(struct sock *sk, int bytes);    void            (*sk_write_space)(struct sock *sk);    void            (*sk_error_report)(struct sock *sk);      int            (*sk_backlog_rcv)(struct sock *sk,                          struct sk_buff *skb);      void                    (*sk_destruct)(struct sock *sk);};//底层协议的操作函数struct proto {    void            (*close)(struct sock *sk,                     long timeout);    int            (*connect)(struct sock *sk,                        struct sockaddr *uaddr,                     int addr_len);    int            (*disconnect)(struct sock *sk, int flags);    struct sock *        (*accept) (struct sock *sk, int flags, int *err);    int            (*ioctl)(struct sock *sk, int cmd,                     unsigned long arg);    int            (*init)(struct sock *sk);    int            (*destroy)(struct sock *sk);    void            (*shutdown)(struct sock *sk, int how);    int            (*setsockopt)(struct sock *sk, int level,                     int optname, char __user *optval,                    int optlen);    int            (*getsockopt)(struct sock *sk, int level,                     int optname, char __user *optval,                     int __user *option);           int            (*sendmsg)(struct kiocb *iocb, struct sock *sk,                       struct msghdr *msg, size_t len);    int            (*recvmsg)(struct kiocb *iocb, struct sock *sk,                       struct msghdr *msg,                    size_t len, int noblock, int flags,                     int *addr_len);    int            (*sendpage)(struct sock *sk, struct page *page,                    int offset, size_t size, int flags);    int            (*bind)(struct sock *sk,                     struct sockaddr *uaddr, int addr_len);    int            (*backlog_rcv) (struct sock *sk,                         struct sk_buff *skb);    /* Keeping track of sk's, looking them up, and port selection methods. */    void            (*hash)(struct sock *sk);    void            (*unhash)(struct sock *sk);    int            (*get_port)(struct sock *sk, unsigned short snum);    /* Memory pressure */    void            (*enter_memory_pressure)(void);    atomic_t        *memory_allocated;    /* Current allocated memory. */    atomic_t        *sockets_allocated;    /* Current number of sockets. */    /*     * Pressure flag: try to collapse.     * Technical note: it is used by multiple contexts non atomically.     * All the sk_stream_mem_schedule() is of this nature: accounting     * is strict, actions are advisory and have some latency.     */    int            *memory_pressure;    int            *sysctl_mem;    int            *sysctl_wmem;    int            *sysctl_rmem;    int            max_header;    kmem_cache_t        *slab;    int            slab_obj_size;    struct module        *owner;    char            name[32];    struct {        int inuse;        u8  __pad[SMP_CACHE_BYTES - sizeof(int)];    } stats[NR_CPUS];};

inet_init()函数:

//net/ipv4/af_inet.c/*系统初始化时被调用**调用路径:start_kernel() -->init() -->do_basic_setup() -->do_initcalls()-->inet_init()*/static int __init inet_init(void){    struct sk_buff *dummy_skb;    struct inet_protosw *q;    struct list_head *r;    int rc = -EINVAL;    if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)) {        printk(KERN_CRIT "%s: panic\n", __FUNCTION__);        goto out;    }    rc = sk_alloc_slab(&tcp_prot, "tcp_sock");    if (rc) {        sk_alloc_slab_error(&tcp_prot);        goto out;    }    rc = sk_alloc_slab(&udp_prot, "udp_sock");    if (rc) {        sk_alloc_slab_error(&udp_prot);        goto out_tcp_free_slab;    }    rc = sk_alloc_slab(&raw_prot, "raw_sock");    if (rc) {        sk_alloc_slab_error(&raw_prot);        goto out_udp_free_slab;    }    /*     *    Tell SOCKET that we are alive      */    //注册Internet协议簇的相关信息      (void)sock_register(&inet_family_ops);    /*     *    Add all the base protocols.     */    //添加基本的协议    if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)        printk(KERN_CRIT "inet_init: Cannot add ICMP protocol\n");    if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)        printk(KERN_CRIT "inet_init: Cannot add UDP protocol\n");    if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)        printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n");#ifdef CONFIG_IP_MULTICAST    if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)        printk(KERN_CRIT "inet_init: Cannot add IGMP protocol\n");#endif    /* Register the socket-side information for inet_create. */    for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)        INIT_LIST_HEAD(r);  //将inetsw_array中元素加入到inetsw链表中    for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)        inet_register_protosw(q);    /*     *    Set the ARP module up     */    arp_init(); //ARP协议初始化      /*       *    Set the IP module up       */    ip_init(); //IP协议初始化    tcp_v4_init(&inet_family_ops);    /* Setup TCP slab cache for open requests. */    tcp_init();    /*     *    Set the ICMP layer up     */    icmp_init(&inet_family_ops);    /*     *    Initialise the multicast router     */#if defined(CONFIG_IP_MROUTE)    ip_mr_init();#endif    /*     *    Initialise per-cpu ipv4 mibs     */     if(init_ipv4_mibs())        printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ;    ipv4_proc_init();    ipfrag_init();    rc = 0;out:    return rc;out_tcp_free_slab:    sk_free_slab(&tcp_prot);out_udp_free_slab:    sk_free_slab(&udp_prot);    goto out;}//net/ipv4/af_inet.c//INET协议簇信息static struct net_proto_family inet_family_ops = {    .family = PF_INET,    .create = inet_create,    .owner    = THIS_MODULE,};static struct list_head inetsw[SOCK_MAX];//该数组中的所有元素都会插入到inetsw的链表中static struct inet_protosw inetsw_array[] ={        {                .type =       SOCK_STREAM,                .protocol =   IPPROTO_TCP,                .prot =       &tcp_prot,                .ops =        &inet_stream_ops,                .capability = -1,                .no_check =   0,                .flags =      INET_PROTOSW_PERMANENT,        },        {                .type =       SOCK_DGRAM,                .protocol =   IPPROTO_UDP,                .prot =       &udp_prot,                .ops =        &inet_dgram_ops,                .capability = -1,                .no_check =   UDP_CSUM_DEFAULT,                .flags =      INET_PROTOSW_PERMANENT,       },       {               .type =       SOCK_RAW,               .protocol =   IPPROTO_IP,    /* wild card */               .prot =       &raw_prot,               .ops =        &inet_sockraw_ops,               .capability = CAP_NET_RAW,               .no_check =   UDP_CSUM_DEFAULT,               .flags =      INET_PROTOSW_REUSE,       }};//流套接字操作函数struct proto_ops inet_stream_ops = {    .family =    PF_INET,    .owner =    THIS_MODULE,    .release =    inet_release,    .bind =        inet_bind,    .connect =    inet_stream_connect,    .socketpair =    sock_no_socketpair,    .accept =    inet_accept,    .getname =    inet_getname,    .poll =        tcp_poll,    .ioctl =    inet_ioctl,    .listen =    inet_listen,    .shutdown =    inet_shutdown,    .setsockopt =    sock_common_setsockopt,    .getsockopt =    sock_common_getsockopt,    .sendmsg =    inet_sendmsg,    .recvmsg =    sock_common_recvmsg,    .mmap =        sock_no_mmap,    .sendpage =    tcp_sendpage};//tcp协议static struct net_protocol tcp_protocol = {    .handler =    tcp_v4_rcv,    .err_handler =    tcp_v4_err,    .no_policy =    1,};static struct net_protocol udp_protocol = {    .handler =    udp_rcv,    .err_handler =    udp_err,    .no_policy =    1,};static struct net_protocol icmp_protocol = {    .handler =    icmp_rcv,};//net/ipv4/tcp_ipv4.c//tcp协议的操作函数struct proto tcp_prot = {    .name            = "TCP",    .owner            = THIS_MODULE,    .close            = tcp_close,    .connect        = tcp_v4_connect,    .disconnect        = tcp_disconnect,    .accept            = tcp_accept,    .ioctl            = tcp_ioctl,    .init            = tcp_v4_init_sock,    .destroy        = tcp_v4_destroy_sock,    .shutdown        = tcp_shutdown,    .setsockopt        = tcp_setsockopt,    .getsockopt        = tcp_getsockopt,    .sendmsg        = tcp_sendmsg,    .recvmsg        = tcp_recvmsg,    .backlog_rcv        = tcp_v4_do_rcv,    .hash            = tcp_v4_hash,    .unhash            = tcp_unhash,    .get_port        = tcp_v4_get_port,    .enter_memory_pressure    = tcp_enter_memory_pressure,    .sockets_allocated    = &tcp_sockets_allocated,    .memory_allocated    = &tcp_memory_allocated,    .memory_pressure    = &tcp_memory_pressure,    .sysctl_mem        = sysctl_tcp_mem,    .sysctl_wmem        = sysctl_tcp_wmem,    .sysctl_rmem        = sysctl_tcp_rmem,    .max_header        = MAX_TCP_HEADER,    .slab_obj_size        = sizeof(struct tcp_sock),};

sock_register()函数:

//注册协议簇int sock_register(struct net_proto_family *ops){    int err;    if (ops->family >= NPROTO) {        printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);        return -ENOBUFS;    }    net_family_write_lock();    err = -EEXIST;    if (net_families[ops->family] == NULL) {        net_families[ops->family]=ops;        err = 0;    }    net_family_write_unlock();    printk(KERN_INFO "NET: Registered protocol family %d\n",           ops->family);    return err;}

inet_create()函数

//创建一个INET套接字static int inet_create(struct socket *sock, int protocol){    struct sock *sk;    struct list_head *p;    struct inet_protosw *answer;    struct inet_opt *inet;    struct proto *answer_prot;    unsigned char answer_flags;    char answer_no_check;    int err;    sock->state = SS_UNCONNECTED;    /* Look for the requested type/protocol pair. */    answer = NULL;    rcu_read_lock();    list_for_each_rcu(p, &inetsw[sock->type]) {        answer = list_entry(p, struct inet_protosw, list);        /* Check the non-wild match. */        if (protocol == answer->protocol) {            if (protocol != IPPROTO_IP)                break;        } else {            /* Check for the two wild cases. */            if (IPPROTO_IP == protocol) {                protocol = answer->protocol;                break;            }            if (IPPROTO_IP == answer->protocol)                break;        }        answer = NULL;    }    err = -ESOCKTNOSUPPORT;    if (!answer)        goto out_rcu_unlock;    err = -EPERM;    if (answer->capability > 0 && !capable(answer->capability))        goto out_rcu_unlock;    err = -EPROTONOSUPPORT;    if (!protocol)        goto out_rcu_unlock;    //BSD socket的操作函数    sock->ops = answer->ops;    answer_prot = answer->prot;    answer_no_check = answer->no_check;    answer_flags = answer->flags;    rcu_read_unlock();    BUG_TRAP(answer_prot->slab != NULL);    err = -ENOBUFS;    sk = sk_alloc(PF_INET, GFP_KERNEL,              answer_prot->slab_obj_size,              answer_prot->slab);    if (sk == NULL)        goto out;    err = 0;    //特定协议套接字的操作函数    sk->sk_prot = answer_prot;    sk->sk_no_check = answer_no_check;    if (INET_PROTOSW_REUSE & answer_flags)        sk->sk_reuse = 1;    inet = inet_sk(sk);    if (SOCK_RAW == sock->type) {        inet->num = protocol;        if (IPPROTO_RAW == protocol)            inet->hdrincl = 1;    }    if (ipv4_config.no_pmtu_disc)        inet->pmtudisc = IP_PMTUDISC_DONT;    else        inet->pmtudisc = IP_PMTUDISC_WANT;    inet->id = 0;    //将sock与sk关联起来    sock_init_data(sock, sk);    sk_set_owner(sk, sk->sk_prot->owner);    sk->sk_destruct       = inet_sock_destruct;    sk->sk_family       = PF_INET;    sk->sk_protocol       = protocol;    sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;    inet->uc_ttl    = -1;    inet->mc_loop    = 1;    inet->mc_ttl    = 1;    inet->mc_index    = 0;    inet->mc_list    = NULL;#ifdef INET_REFCNT_DEBUG    atomic_inc(&inet_sock_nr);#endif    if (inet->num) {        /* It assumes that any protocol which allows         * the user to assign a number at socket         * creation time automatically         * shares.         */        inet->sport = htons(inet->num);        /* Add to protocol hash chains. */        sk->sk_prot->hash(sk);    }    //调用init函数    if (sk->sk_prot->init) {        err = sk->sk_prot->init(sk);        if (err)            sk_common_release(sk);    }out:    return err;out_rcu_unlock:    rcu_read_unlock();    goto out;}
1 0
原创粉丝点击