sendmsg在传输层的实现(一)

来源:互联网 发布:知世超级超级超级犀利 编辑:程序博客网 时间:2024/06/04 19:08

         sendmsg系统调用在TCP中的实现共分为两层-套接口层和传输接口层,而其主要的实现为传输接口层。

系统调用首先通过sys_socketcall和系统调用号SYS_SENDMSG调用sys_sendmsg,sys_sendmsg通过proto_ops结构跳转到inet_sendmsg,而inet_sendmsg又通过proto结构跳转表调用tcp_sendmsg,从而实现在TCP层的数据发送。

一、基本的数据结构

       //include/linux/net.h

  

struct socket {        socket_statestate;//套接口所处的状态标志unsigned longflags;//标志位const struct proto_ops*ops;//用以将套接口层系统调用映射到传输层相应的实现struct fasync_struct*fasync_list;//存储异步通知队列struct file*file;//相关联的文件指针struct sock*sk;//传输层相关联的传输控制块wait_queue_head_twait;//等待该套接口的进程队列shorttype;//套接口的类型};<pre name="code" class="cpp">struct proto_ops {intfamily;//协议族struct module*owner;//所属模块        //以下函数为是与系统调用对应的传输层实现        int(*release)   (struct socket *sock);int(*bind)     (struct socket *sock,      struct sockaddr *myaddr,      int sockaddr_len);int(*connect)   (struct socket *sock,      struct sockaddr *vaddr,      int sockaddr_len, int flags);int(*socketpair)(struct socket *sock1,      struct socket *sock2);int(*accept)    (struct socket *sock,      struct socket *newsock, int flags);int(*getname)   (struct socket *sock,      struct sockaddr *addr,      int *sockaddr_len, int peer);unsigned int(*poll)     (struct file *file, struct socket *sock,      struct poll_table_struct *wait);int(*ioctl)     (struct socket *sock, unsigned int cmd,      unsigned long arg);int (*compat_ioctl) (struct socket *sock, unsigned int cmd,      unsigned long arg);int(*listen)    (struct socket *sock, int len);int(*shutdown)  (struct socket *sock, int flags);int(*setsockopt)(struct socket *sock, int level,      int optname, char __user *optval, int optlen);int(*getsockopt)(struct socket *sock, int level,      int optname, char __user *optval, int __user *optlen);int(*compat_setsockopt)(struct socket *sock, int level,      int optname, char __user *optval, int optlen);int(*compat_getsockopt)(struct socket *sock, int level,      int optname, char __user *optval, int __user *optlen);int(*sendmsg)   (struct kiocb *iocb, struct socket *sock,      struct msghdr *m, size_t total_len);//sendmsg系统调用对应int(*recvmsg)   (struct kiocb *iocb, struct socket *sock,      struct msghdr *m, size_t total_len,      int flags);int(*mmap)     (struct file *file, struct socket *sock,      struct vm_area_struct * vma);ssize_t(*sendpage)  (struct socket *sock, struct page *page,      int offset, size_t size, int flags);};//include/net/sock.h//proto为网络接口层,实现传输层到网络层的函数映射;<pre name="code" class="cpp">struct proto {void(*close)(struct sock *sk, long timeout);int(*connect)(struct sock *sk,        struct sockaddr *uaddr, int addr_len);int(*disconnect)(struct sock *sk, int flags);struct sock *(*accept) (struct sock *sk, int flags, int *err);int(*ioctl)(struct sock *sk, int cmd, unsigned long arg);int(*init)(struct sock *sk);int(*destroy)(struct sock *sk);void(*shutdown)(struct sock *sk, int how);int(*setsockopt)(struct sock *sk, int level, int optname, char __user *optval,int optlen);int(*getsockopt)(struct sock *sk, int level, int optname, char __user *optval, int __user *option);   int(*compat_setsockopt)(struct sock *sk,int level,int optname, char __user *optval,int optlen);int(*compat_getsockopt)(struct sock *sk,int level,int optname, char __user *optval,int __user *option);int(*sendmsg)(struct kiocb *iocb, struct sock *sk,   struct msghdr *msg, size_t len);int(*recvmsg)(struct kiocb *iocb, struct sock *sk,   struct msghdr *msg,size_t len, int noblock, int flags, int *addr_len);int(*sendpage)(struct sock *sk, struct page *page,int offset, size_t size, int flags);int(*bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len);int(*backlog_rcv) (struct sock *sk, struct sk_buff *skb);/* Keeping track of sk's, looking them up, and port selection methods. */void(*hash)(struct sock *sk);void(*unhash)(struct sock *sk);int(*get_port)(struct sock *sk, unsigned short snum);/* Memory pressure */void(*enter_memory_pressure)(void);atomic_t*memory_allocated;/* Current allocated memory. */atomic_t*sockets_allocated;/* Current number of sockets. *//* * Pressure flag: try to collapse. * Technical note: it is used by multiple contexts non atomically. * All the sk_stream_mem_schedule() is of this nature: accounting * is strict, actions are advisory and have some latency. */int*memory_pressure;int*sysctl_mem;int*sysctl_wmem;int*sysctl_rmem;intmax_header;kmem_cache_t*slab;unsigned intobj_size;atomic_t*orphan_count;struct request_sock_ops*rsk_prot;struct timewait_sock_ops *twsk_prot;struct module*owner;charname[32];struct list_headnode;#ifdef SOCK_REFCNT_DEBUGatomic_tsocks;#endifstruct {int inuse;u8  __pad[SMP_CACHE_BYTES - sizeof(int)];} stats[NR_CPUS];};
proto_ops和proto需要成对出现,在inet_protosw结构中封装了proto_ops和proto实例,inet_protosw定义在静态数组inetsw_array中,在创建socket时,会将其类型注册到一
个全局的list_head结构数组inetsw中,如:SOCK_STREAM对应的proto实例为tcp_prot,proto_ops实例为inet_stream_ops。
二、传输接口层实现
int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,size_t size){struct iovec *iov;struct tcp_sock *tp = tcp_sk(sk);struct sk_buff *skb;int iovlen, flags;int mss_now, size_goal;int err, copied;long timeo;        //传输控制块上锁lock_sock(sk);TCP_CHECK_TIMER(sk);        //判断是否阻塞flags = msg->msg_flags;timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);        //等待TCP连接建立/* Wait for a connection to finish. */if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)goto out_err;        //清除异步发送队列满标志/* This should be in poll */clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);        //获取当前有效MSS,以及发送到网络设备时的数据最大长度mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));size_goal = tp->xmit_size_goal;        //OK,开始发送数据,首先获取数据块数和数据块地址/* Ok commence sending. */iovlen = msg->msg_iovlen;iov = msg->msg_iov;copied = 0;        //用来判断现在是否可以发送数据err = -EPIPE;if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))goto do_error;       while (--iovlen >= 0) {int seglen = iov->iov_len;unsigned char __user *from = iov->iov_base;iov++;while (seglen > 0) {int copy;skb = sk->sk_write_queue.prev;                        //如果发送队列为空,或者skb已经充满,则分配新的Sskb块if (!sk->sk_send_head ||    (copy = size_goal - skb->len) <= 0) {new_segment:/* Allocate new segment. If the interface is SG, * allocate skb fitting to single page. */                                 //判断发送队列数据是否已达到缓冲区上限                                 if (!sk_stream_memory_free(sk))goto wait_for_sndbuf;                                 //分配skbskb = sk_stream_alloc_pskb(sk, select_size(sk, tp),   0, sk->sk_allocation);if (!skb)goto wait_for_memory;/* * Check whether we can use HW checksum. */if (sk->sk_route_caps & NETIF_F_ALL_CSUM)skb->ip_summed = CHECKSUM_HW;                                //添加到发送队列尾部skb_entail(sk, tp, skb);copy = size_goal;}/* Try to append data to the end of skb. */if (copy > seglen)copy = seglen;/* Where to copy to? */                        //从用户空间复制copy大小数据到skb中。                        if (skb_tailroom(skb) > 0) {/* We have some space in skb head. Superb! */if (copy > skb_tailroom(skb))copy = skb_tailroom(skb);if ((err = skb_add_data(skb, from, copy)) != 0)goto do_fault;} else {int merge = 0;int i = skb_shinfo(skb)->nr_frags;struct page *page = TCP_PAGE(sk);int off = TCP_OFF(sk);if (skb_can_coalesce(skb, i, page, off) &&    off != PAGE_SIZE) {/* We can extend the last page * fragment. */merge = 1;} else if (i == MAX_SKB_FRAGS ||   (!i &&   !(sk->sk_route_caps & NETIF_F_SG))) {/* Need to add new fragment and cannot * do this because interface is non-SG, * or because all the page slots are * busy. */tcp_mark_push(tp, skb);goto new_segment;} else if (page) {if (off == PAGE_SIZE) {put_page(page);TCP_PAGE(sk) = page = NULL;off = 0;}} elseoff = 0;if (copy > PAGE_SIZE - off)copy = PAGE_SIZE - off;if (!sk_stream_wmem_schedule(sk, copy))goto wait_for_memory;if (!page) {/* Allocate new cache page. */if (!(page = sk_stream_alloc_page(sk)))goto wait_for_memory;}/* Time to copy data. We are close to * the end! */err = skb_copy_to_page(sk, from, skb, page,       off, copy);if (err) {/* If this page was new, give it to the * socket so it does not get leaked. */if (!TCP_PAGE(sk)) {TCP_PAGE(sk) = page;TCP_OFF(sk) = 0;}goto do_error;}/* Update the skb. */if (merge) {skb_shinfo(skb)->frags[i - 1].size +=copy;} else {skb_fill_page_desc(skb, i, page, off, copy);if (TCP_PAGE(sk)) {get_page(page);} else if (off + copy < PAGE_SIZE) {get_page(page);TCP_PAGE(sk) = page;}}TCP_OFF(sk) = off + copy;}if (!copied)TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;tp->write_seq += copy;TCP_SKB_CB(skb)->end_seq += copy;skb_shinfo(skb)->gso_segs = 0;from += copy;copied += copy;if ((seglen -= copy) == 0 && iovlen == 0)goto out;if (skb->len < mss_now || (flags & MSG_OOB))continue;                        //检查是否必须立即发送if (forced_push(tp)) {tcp_mark_push(tp, skb);__tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH);} else if (skb == sk->sk_send_head)tcp_push_one(sk, mss_now);//只发送当前段continue;wait_for_sndbuf:set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);wait_for_memory:if (copied)tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)goto do_error;mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));size_goal = tp->xmit_size_goal;}}out:if (copied)tcp_push(sk, tp, flags, mss_now, tp->nonagle);TCP_CHECK_TIMER(sk);release_sock(sk);return copied;do_fault:if (!skb->len) {if (sk->sk_send_head == skb)sk->sk_send_head = NULL;__skb_unlink(skb, &sk->sk_write_queue);sk_stream_free_skb(sk, skb);}do_error:if (copied)goto out;out_err:err = sk_stream_error(sk, flags, err);TCP_CHECK_TIMER(sk);release_sock(sk);return err;}





0 0
原创粉丝点击