网络异步IO操作-select模型使用

来源：互联网发布：搞笑段子配音软件编辑：程序博客网时间：2024/05/02 08:11

文章目的：

在写在篇文章之前，select模型里面的为什么每次都给描述符集赋值呢？由于当时我学习的时候听信了网络上有些文章说明的什么描述符集FD_CLR把其他的也清空的话；以至于只是稍微的看了看，没有深究里面的道理，现在反过来再次看时发现不是那么回事；于是我抱着怀疑自己、怀疑一切的精神深入的研究下select的工作机制。

基础知识

1) 描述符集fd_set:

fd_set是select机制提供的一种数据结构，实际上就是一个long类型的数组。

定义头文件目录：

./include/linux/types.h./include/linux/posix_types.h

可能有人会问为啥是两个头文件呢？首先说明的是fd_set是__kernel_fd_set的typedef定义，这个包含在types.h中；具体的定义在posix_types.h里面。

结构体定义：

types.h定义：

typedef __kernel_fd_setfd_set;

Posix_types.h定义：

#undef __NFDBITS#define __NFDBITS(8 * sizeof(unsigned long))#undef __FD_SETSIZE#define __FD_SETSIZE1024#undef __FDSET_LONGS#define __FDSET_LONGS(__FD_SETSIZE/__NFDBITS)#undef __FDELT#define__FDELT(d)((d) / __NFDBITS)#undef __FDMASK#define__FDMASK(d)(1UL << ((d) % __NFDBITS))typedef struct {unsigned long fds_bits [__FDSET_LONGS];} __kernel_fd_set;

定义分析：

这个linux版本是3.0的，内部定义了一个__NFDBITS宏，数组为8*sizeof（long）即是32；宏__FD_SETSIZE为 1024；数组大小的宏__FDSET_LONGS为1024/（8*4）即是32。

其实经过这一系列的转化后我们发现其实这个数组就是为了1024bit，为什么这么定义呢？因为select模型里面操作的就是这些描述符集的位，select通过置位描述符集的相应位表示此描述符有变化（可读、可写、异常）

上面讲述了fd_set的数据结构，现在看看对这个fd_set的操作。

分析：对于一个数据结构的操作主要包括哪些部分呢？根据我们以前学习的数据结构来看主要有创建、清空（特殊的删除操作）、添加、删除、读取、插入等操作；那么针对fd_set数据结构包括添加（FD_SET）、清空（FD_ZERO）、删除（FD_CLR）、检测（FD_ISSET）。与之前定义fd_set一样这个也是进行了一层封装，分别定义在两个文件中。这些操作都定义在相应的arch下面，但是我在查ARM的时发现它是定义在./include/linux/time.h ./include/asm-generic/posix_types.h

time.h:

#define NFDBITS__NFDBITS#define FD_SETSIZE__FD_SETSIZE#define FD_SET(fd,fdsetp)__FD_SET(fd,fdsetp)#define FD_CLR(fd,fdsetp)__FD_CLR(fd,fdsetp)#define FD_ISSET(fd,fdsetp)__FD_ISSET(fd,fdsetp)#define FD_ZERO(fdsetp)__FD_ZERO(fdsetp)

posix_types.h:

#ifdef __KERNEL__#undef __FD_SETstatic inline void __FD_SET(unsigned long __fd, __kernel_fd_set *__fdsetp){unsigned long __tmp = __fd / __NFDBITS;unsigned long __rem = __fd % __NFDBITS;__fdsetp->fds_bits[__tmp] |= (1UL<<__rem);}#undef __FD_CLRstatic inline void __FD_CLR(unsigned long __fd, __kernel_fd_set *__fdsetp){unsigned long __tmp = __fd / __NFDBITS;unsigned long __rem = __fd % __NFDBITS;__fdsetp->fds_bits[__tmp] &= ~(1UL<<__rem);}#undef __FD_ISSETstatic inline int __FD_ISSET(unsigned long __fd, const __kernel_fd_set *__p){unsigned long __tmp = __fd / __NFDBITS;unsigned long __rem = __fd % __NFDBITS;return (__p->fds_bits[__tmp] & (1UL<<__rem)) != 0;}/* * This will unroll the loop for the normal constant case (8 ints, * for a 256-bit fd_set) */#undef __FD_ZEROstatic inline void __FD_ZERO(__kernel_fd_set *__p){unsigned long *__tmp = __p->fds_bits;int __i;if (__builtin_constant_p(__FDSET_LONGS)) {switch (__FDSET_LONGS) {case 16:__tmp[ 0] = 0; __tmp[ 1] = 0;__tmp[ 2] = 0; __tmp[ 3] = 0;__tmp[ 4] = 0; __tmp[ 5] = 0;__tmp[ 6] = 0; __tmp[ 7] = 0;__tmp[ 8] = 0; __tmp[ 9] = 0;__tmp[10] = 0; __tmp[11] = 0;__tmp[12] = 0; __tmp[13] = 0;__tmp[14] = 0; __tmp[15] = 0;return;case 8:__tmp[ 0] = 0; __tmp[ 1] = 0;__tmp[ 2] = 0; __tmp[ 3] = 0;__tmp[ 4] = 0; __tmp[ 5] = 0;__tmp[ 6] = 0; __tmp[ 7] = 0;return;case 4:__tmp[ 0] = 0; __tmp[ 1] = 0;__tmp[ 2] = 0; __tmp[ 3] = 0;return;}}__i = __FDSET_LONGS;while (__i) {__i--;*__tmp = 0;__tmp++;}}

从上面的代码我们分析：

__FD_SET：这个函数的目的是把fd对应的位置1。

__FD_CLR：这个函数的目的是把fd对应的位清零。

__FD_ISSET：这个函数的目的是检测FD_ISSET是否为1。

我们暂且不看__FD_ZERO，从以上三个函数我们是不是有疑问呢？我们在使用这些函数时先是添加（__FD_SET），然后是检测（__FD_ISSET）；如果我们中间的select没有对添加的描述符做相应的处理，那么现象时什么呢？那就是我们的没添加一个描述符都会把认为这个描述符改变了，我类个去，大家还记得我上面写的这篇文章的目的吧？网上说这个描述符经过select后是被FD_CLR清空的描述符集的，现在看来这个理论是扯淡啊！首先我们从逻辑上看FD_CLR是在FD_ISSET后调用的；再从代码上FD_CLR只是清空其中的一位的一个操作，打死它，它也不会操作到整个描述符集！

至此我的疑问已经没有了，我已经理解了肯定是select函数改变的描述符集，就需要看看select的具体源码了。下面先给fd_set基础知识扫个尾，简单介绍下对于fd_set的清零操作（__FD_ZERO）:

从上面的代码看：

__FD_ZERO：就是把定义的描述符数组中所有为清零。

代码分析：

从上面的代码看，首先获取数组指针，然后数组大小，然后逐个清零。我咋一看这段代码时我第一印象是为啥不用memset呢？接着我就明白了因为他赋值的是long型的；但是为啥不用循环逐个赋值呢？没有理解原因，可能是因为内核代码这样写代码执行的快，又或者是这只是个人编写代码的习惯而已，我想多了，哈哈，我就是这么较真啊！

Select函数：

Select是系统调用，是内核实现的函数。

函数原型：

int select(int n,fd_set* readfds,fd_set*writefds,fd_set *exceptfds,struct timeval *timeout);

函数源码：包含在./fs/select.c

SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp,fd_set __user *, exp, struct timeval __user *, tvp){struct timespec end_time, *to = NULL;struct timeval tv;int ret;if (tvp) {if (copy_from_user(&tv, tvp, sizeof(tv)))return -EFAULT;to = &end_time;if (poll_select_set_timeout(to,tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),(tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))return -EINVAL;}ret = core_sys_select(n, inp, outp, exp, to);ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);return ret;}

源码分析：

从源码看，首先是把超时等待的数据结构拷贝到内核里面；然后就是我们看到的core_sys_select函数，把我们定义的描述符集传到这个函数时，我们疑问将在这个函数解开，下面看看代码：

/* * We can actually return ERESTARTSYS instead of EINTR, but I'd * like to be certain this leads to no problems. So I return * EINTR just for safety. * * Update: ERESTARTSYS breaks at least the xview clock binary, so * I'm trying ERESTARTNOHAND which restart only when you want to. */int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,   fd_set __user *exp, struct timespec *end_time){fd_set_bits fds;void *bits;int ret, max_fds;unsigned int size;struct fdtable *fdt;/* Allocate small arguments on the stack to save memory and be faster */long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];ret = -EINVAL;if (n < 0)goto out_nofds;/* max_fds can increase, so grab it once to avoid race */rcu_read_lock();fdt = files_fdtable(current->files);max_fds = fdt->max_fds;rcu_read_unlock();if (n > max_fds)n = max_fds;/* * We need 6 bitmaps (in/out/ex for both incoming and outgoing), * since we used fdset we need to allocate memory in units of * long-words.  */size = FDS_BYTES(n);bits = stack_fds;if (size > sizeof(stack_fds) / 6) {/* Not enough space in on-stack array; must use kmalloc */ret = -ENOMEM;bits = kmalloc(6 * size, GFP_KERNEL);if (!bits)goto out_nofds;}fds.in      = bits;fds.out     = bits +   size;fds.ex      = bits + 2*size;fds.res_in  = bits + 3*size;fds.res_out = bits + 4*size;fds.res_ex  = bits + 5*size;if ((ret = get_fd_set(n, inp, fds.in)) ||    (ret = get_fd_set(n, outp, fds.out)) ||    (ret = get_fd_set(n, exp, fds.ex)))goto out;zero_fd_set(n, fds.res_in);zero_fd_set(n, fds.res_out);zero_fd_set(n, fds.res_ex);ret = do_select(n, &fds, end_time);if (ret < 0)goto out;if (!ret) {ret = -ERESTARTNOHAND;if (signal_pending(current))goto out;ret = 0;}if (set_fd_set(n, inp, fds.res_in) ||    set_fd_set(n, outp, fds.res_out) ||    set_fd_set(n, exp, fds.res_ex))ret = -EFAULT;out:if (bits != stack_fds)kfree(bits);out_nofds:return ret;}

前面是把最大的描述符加1赋值到n，由此换算出来需要多少bit即size的大小；接着定义6个这样大小的空间 (这里面又做了部分处理，首先如果数据大小不大于我们预先定义的2048的六分之一就不需要分配空间，直接用bits指向我们定义的stack_fds,否者就申请分配) ；把用户空间的数据拷贝到内核态保存fds结构体中，清空fds结构体的res_xx;然后进行处理（内部就是内核具体实现把res_xx成员中的相应位置位）经过这个过程后res_xx就保存的是对应描述符的置位；然后把这个res_xx又赋给了用户空间。至此我的猜测全部验证通过。

欢迎各位大神批评指正！

0 0