源码剖析之poll
来源:互联网 发布:软件实施工资怎么样 编辑:程序博客网 时间:2024/06/15 19:19
1. poll
从内核的角度看来,借助于VFS
, 一切皆file
// 文件表示 include/linux/fs.hstruct file { const struct file_operations *f_op; spinlock_t f_lock; // 文件内部实现细节 void *private_data; #ifdef CONFIG_EPOLL /* Used by fs/eventpoll.c to link all the hooks to this file */ struct list_head f_ep_links; struct list_head f_tfile_llink; #endif /* #ifdef CONFIG_EPOLL */ // 其他细节.... }; // 文件操作 include/linux/fs.hstruct file_operations { // 文件提供给poll/select/epoll // 调用poll_table_struct中指定的函数并获取文件的当前状态 unsigned int (*poll) (struct file *, struct poll_table_struct *); // 其他方法read/write 等... ... }; /** * 通常文件poll方法的实现 * 调用poll_table_struct中指定的函数,并获得文件当前就绪事件的掩码 * @param flip 文件的指针 * @param wait 指向poll_table_struct的指针 * @return 返回文件当前就绪事件掩码 */unsigned int XXX_poll (struct file *filp, struct poll_table_struct *wait) { unsigned int mask = 0; wait_queue_head_t * wait_queue; // 1. 根据事件掩码wait->key_和文件实现filp->private_data 取得事件掩码对应的一个或多个wait queue head some_code(); // 2. 调用poll_wait,目的是向获得的等待队列中添加等待队列项 poll_wait(filp, wait_queue, wait); // 3. 取得文件当前就绪事件的掩码并保存到mask some_code(); return mask; } // select/poll/epoll 向文件注册就绪后回调节点的接口结构 typedef struct poll_table_struct { // 向指定等待队列(wait_queue_head)添加等待队列项的(wait_queue_t)的接口函数 poll_queue_proc _qproc; // 关注的事件掩码, 文件的实现利用此掩码将对应的等待队列传递给_qproc unsigned long _key; } poll_table; // typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *, struct poll_table_struct *); // 通用的poll_wait 函数, 文件的f_ops->poll 通常会调用此函数 static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p) { if (p && p->_qproc && wait_address) { // 调用poll_table_struct 中指定的函数_qproc // qproc一般的作用是向指定事件等待队列中添加等待队列项 // 如果是select或poll 则是 __pollwait, 如果是 epoll 则是 ep_ptable_queue_proc p->_qproc(filp, wait_address, p); } }
2. upd的poll
2.1 poll
net/ipv4/af_inet.c
const struct proto_ops inet_dgram_ops = { // 其它细节 .poll = udp_poll, // 其它细节};
2.2 udp_poll
net/ipv4/udp.c
/** * udp_poll - wait for a UDP event. * @file - file struct * @sock - socket * @wait - poll table * * This is same as datagram poll, except for the special case of * blocking sockets. If application is using a blocking fd * and a packet with checksum error is in the queue; * then it could get return from select indicating data available * but then block when reading it. Add special case code * to work around these arguably broken applications. */unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait){ unsigned int mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; sock_rps_record_flow(sk); /* Check for false positives due to checksum errors */ if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1) mask &= ~(POLLIN | POLLRDNORM); return mask;}
2.3 datagram_poll
net/core/datagram.c
/** * datagram_poll - generic datagram poll * @file: file struct * @sock: socket * @wait: poll table * * Datagram poll: Again totally generic. This also handles * sequenced packet sockets providing the socket receive queue * is only ever holding data ready to receive. * * Note: when you _don't_ use this routine for this protocol, * and you use a different write policy from sock_writeable() * then please supply your own write_space callback. */unsigned int datagram_poll(struct file *file, struct socket *sock, poll_table *wait){ struct sock *sk = sock->sk; unsigned int mask; sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; /* exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) mask |= POLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP | POLLIN | POLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= POLLHUP; /* readable? */ if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; /* Connection-based need to check for termination and startup */ if (connection_based(sk)) { if (sk->sk_state == TCP_CLOSE) mask |= POLLHUP; /* connection hasn't started yet? */ if (sk->sk_state == TCP_SYN_SENT) return mask; } /* writable? */ if (sock_writeable(sk)) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; else sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); return mask;}
2.4 sock_poll_wait
include/net/sock.c
/** * sock_poll_wait - place memory barrier behind the poll_wait call. * @filp: file * @wait_address: socket wait queue * @p: poll_table * * See the comments in the wq_has_sleeper function. */static inline void sock_poll_wait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p){ if (!poll_does_not_wait(p) && wait_address) { poll_wait(filp, wait_address, p); /* We need to be sure we are in sync with the * socket flags modification. * * This memory barrier is paired in the wq_has_sleeper. */ smp_mb(); }}
2.5 poll_wait
include/linux/poll.h
static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p){ if (p && p->_qproc && wait_address) p->_qproc(filp, wait_address, p);}
2.6 小结
poll
-> udp_poll
-> datagram_poll
-> sock_poll_wait
-> poll_wait
3. TCP的poll
3.1 poll
net/ipv4/af_inet.c
const struct proto_ops inet_stream_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, .bind = inet_bind, .connect = inet_stream_connect, .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = inet_getname, .poll = tcp_poll, .ioctl = inet_ioctl, .listen = inet_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, .sendpage = inet_sendpage, .splice_read = tcp_splice_read, .read_sock = tcp_read_sock, .peek_len = tcp_peek_len,#ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, .compat_ioctl = inet_compat_ioctl,#endif};
3.2 tcp_poll
net/ipv4/tcp.c
/* * Wait for a TCP event. * * Note that we don't need to lock the socket, as the upper poll layers * take care of normal races (between the test and the event) and we don't * go look at any of the socket buffers directly. */unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait){ unsigned int mask; struct sock *sk = sock->sk; const struct tcp_sock *tp = tcp_sk(sk); int state; sock_rps_record_flow(sk); sock_poll_wait(file, sk_sleep(sk), wait); state = sk_state_load(sk); if (state == TCP_LISTEN) return inet_csk_listen_poll(sk); /* Socket is not locked. We are protected from async events * by poll logic and correct handling of state changes * made by other threads is impossible in any case. */ mask = 0; /* * POLLHUP is certainly not done right. But poll() doesn't * have a notion of HUP in just one direction, and for a * socket the read side is more interesting. * * Some poll() documentation says that POLLHUP is incompatible * with the POLLOUT/POLLWR flags, so somebody should check this * all. But careful, it tends to be safer to return too many * bits than too few, and you can easily break real applications * if you don't tell them that something has hung up! * * Check-me. * * Check number 1. POLLHUP is _UNMASKABLE_ event (see UNIX98 and * our fs/select.c). It means that after we received EOF, * poll always returns immediately, making impossible poll() on write() * in state CLOSE_WAIT. One solution is evident --- to set POLLHUP * if and only if shutdown has been made in both directions. * Actually, it is interesting to look how Solaris and DUX * solve this dilemma. I would prefer, if POLLHUP were maskable, * then we could set it on SND_SHUTDOWN. BTW examples given * in Stevens' books assume exactly this behaviour, it explains * why POLLHUP is incompatible with POLLOUT. --ANK * * NOTE. Check for TCP_CLOSE is added. The goal is to prevent * blocking on fresh not-connected or disconnected socket. --ANK */ if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) mask |= POLLHUP; if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLIN | POLLRDNORM | POLLRDHUP; /* Connected or passive Fast Open socket? */ if (state != TCP_SYN_SENT && (state != TCP_SYN_RECV || tp->fastopen_rsk)) { int target = sock_rcvlowat(sk, 0, INT_MAX); if (tp->urg_seq == tp->copied_seq && !sock_flag(sk, SOCK_URGINLINE) && tp->urg_data) target++; if (tp->rcv_nxt - tp->copied_seq >= target) mask |= POLLIN | POLLRDNORM; if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { if (sk_stream_is_writeable(sk)) { mask |= POLLOUT | POLLWRNORM; } else { /* send SIGIO later */ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); /* Race breaker. If space is freed after * wspace test but before the flags are set, * IO signal will be lost. Memory barrier * pairs with the input side. */ smp_mb__after_atomic(); if (sk_stream_is_writeable(sk)) mask |= POLLOUT | POLLWRNORM; } } else mask |= POLLOUT | POLLWRNORM; if (tp->urg_data & TCP_URG_VALID) mask |= POLLPRI; } /* This barrier is coupled with smp_wmb() in tcp_reset() */ smp_rmb(); if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) mask |= POLLERR; return mask;}
3.3 sock_poll_wait
include/net/sock.c
/** * sock_poll_wait - place memory barrier behind the poll_wait call. * @filp: file * @wait_address: socket wait queue * @p: poll_table * * See the comments in the wq_has_sleeper function. */static inline void sock_poll_wait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p){ if (!poll_does_not_wait(p) && wait_address) { poll_wait(filp, wait_address, p); /* We need to be sure we are in sync with the * socket flags modification. * * This memory barrier is paired in the wq_has_sleeper. */ smp_mb(); }}
3.4 poll_wait
include/linux/poll.h
static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p){ if (p && p->_qproc && wait_address) p->_qproc(filp, wait_address, p);}
3.5 小结
poll
-> tcp_poll
-> sock_poll_wait
-> poll_wait
阅读全文
0 0
- 源码剖析之poll
- poll源码剖析
- poll源码剖析
- poll与epoll源码剖析
- fork之源码剖析
- ThreadPoolExecutor 源码剖析之
- LDD3源码分析之poll分析
- LDD3源码分析之poll分析
- LDD3源码分析之poll分析
- LDD3源码分析之poll分析
- LDD3源码分析之poll分析
- LDD3源码分析之poll分析
- LDD3源码分析之poll分析
- LDD3源码分析之poll分析
- epoll和poll剖析
- poll内核源代码剖析
- Orchid poll 剖析
- dlmalloc源码剖析之:mALLOc
- AndroidStudio导入其他来源项目容易出现的问题
- spring 静态注入
- git pull出现There is no tracking information for the current branch
- Eclipse开发的相关问题总结
- phpstudy安装mongodb扩展
- 源码剖析之poll
- Python装饰器最经典教程,以最易理解的方式教会你
- C/C++中const关键字详解
- 用java操作hdfs
- 如何使用logisim的真值表自动生成电路?【木森小教程】
- highchart部分按钮和头部信息的汉化
- 字符串固定个数分页
- 新手认识json
- 简单sql练习