Orchid select 剖析

来源:互联网 发布:php上传文件带进度条 编辑:程序博客网 时间:2024/06/13 07:38
/*select 系统调用的功能是对多个文件描述符进行监视,当有文件描述符的文件读写操作完成,发生异常或者超时,该调用会返回这些文件描述符。int select(int nfds, fd_set *readfds, fd_set *writefds,fd_set *exceptfds, struct timeval *timeout);*/
Select缺点:1 每次调用select,都需要把fd集合从用户态拷贝到内核态;2 每次调用select,都需要在内核的遍历传递进来的所以fd,这两点在fd很多的时候开销很大;3 select支持的文件描述符数量太小(1024)Poll的实现与select相似,只是描述fd集合的方式不同,poll使用pollfd结构而select使用fd_set结构。1.typedefstruct fd_set {  2. u_int fd_count;  3. socket fd_array[FD_SETSIZE];  4.} fd_set;  int  isready(int  fd)    {        int    rc;        fd_set    fds;        struct timeval    tv;        FD_ZERO(&fds);        FD_SET(fd,  &fds);        tv.tv_sec = tv.tv_usec = 0;        rc = select(fd+1, &fds, NULL, NULL, &tv);        if( rc<0 )  //error          return -1;        return FD_ISSET(fd, &fds)  ? 1: 0;    }


typedef struct {unsigned long *in, *out, *ex;unsigned long *res_in, *res_out, *res_ex;} fd_set_bits;typedef void(*poll_queue_proc)(struct file *, wait_queue_head_t *, structpoll_table_struct *);typedef struct poll_table_struct {poll_queue_proc qproc;} poll_table;struct poll_table_entry {struct file * filp;// select 要监视的 struct file 结构体wait_queue_t wait;//等待队列的节点wait_queue_head_t * wait_address;//文件操作的等待队列的队首};struct poll_table_page {//保存的方式是单向链表,每个节点以页为单位,分配多个 poll_table_entry 项struct poll_table_page * next;struct poll_table_entry * entry;struct poll_table_entry entries[0];};struct poll_wqueues {//这是最主要的结构体,它保存了 select 过程中的重要信息poll_table pt;//用来保存回调函数(通常负责把进程放入等待队列等关键操作)struct poll_table_page * table;//记录了在 select 过程中生成的所有等待队列的结点int error;};//select的调用path如下:sys_select->do_selectasmlinkage longsys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp){fd_set_bits fds;char *bits;long timeout;int ret, size, max_fdset;timeout = MAX_SCHEDULE_TIMEOUT;if (tvp) {time_t sec, usec;if ((ret = verify_area(VERIFY_READ, tvp, sizeof(*tvp)))|| (ret = __get_user(sec, &tvp->tv_sec))|| (ret = __get_user(usec, &tvp->tv_usec)))goto out_nofds;ret = -EINVAL;if (sec < 0 || usec < 0)goto out_nofds;if ((unsigned long)sec < MAX_SELECT_SECONDS) {timeout = ROUND_UP(usec, 1000000 / HZ);timeout += sec * (unsigned long)HZ;}}ret = -EINVAL;if (n < 0)goto out_nofds;/* max_fdset can increase, so grab it once to avoid race */max_fdset = current->files->max_fdset;if (n > max_fdset)n = max_fdset;/** We need 6 bitmaps (in/out/ex for both incoming and outgoing),* since we used fdset we need to allocate memory in units of* long-words.*/ret = -ENOMEM;size = FDS_BYTES(n);bits = select_bits_alloc(size);if (!bits)goto out_nofds;fds.in = (unsigned long *)bits;fds.out = (unsigned long *)(bits + size);fds.ex = (unsigned long *)(bits + 2 * size);fds.res_in = (unsigned long *)(bits + 3 * size);fds.res_out = (unsigned long *)(bits + 4 * size);fds.res_ex = (unsigned long *)(bits + 5 * size);if ((ret = get_fd_set(n, inp, fds.in)) ||(ret = get_fd_set(n, outp, fds.out)) ||(ret = get_fd_set(n, exp, fds.ex)))goto out;zero_fd_set(n, fds.res_in);zero_fd_set(n, fds.res_out);zero_fd_set(n, fds.res_ex);ret = do_select(n, &fds, &timeout);if (tvp && !(current->personality & STICKY_TIMEOUTS)) {time_t sec = 0, usec = 0;if (timeout) {sec = timeout / HZ;usec = timeout % HZ;usec *= (1000000 / HZ);}put_user(sec, &tvp->tv_sec);put_user(usec, &tvp->tv_usec);}if (ret < 0)goto out;if (!ret) {ret = -ERESTARTNOHAND;if (signal_pending(current))goto out;ret = 0;}if (set_fd_set(n, inp, fds.res_in) ||set_fd_set(n, outp, fds.res_out) ||set_fd_set(n, exp, fds.res_ex))ret = -EFAULT;out:select_bits_free(bits, size);out_nofds:return ret;}int do_select(int n, fd_set_bits *fds, long *timeout){struct poll_wqueues table;poll_table *wait;int retval, i;long __timeout = *timeout;spin_lock(¤t->files->file_lock);retval = max_select_fd(n, fds);spin_unlock(¤t->files->file_lock);if (retval < 0)return retval;n = retval;poll_initwait(&table);   //作用就是把 poll_table 中的回调函数设置为__pollwait。wait = &table.pt;if (!__timeout)wait = NULL;retval = 0;//retval用于保存已经准备好的描述符数,初始为0for (;;) {unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;set_current_state(TASK_INTERRUPTIBLE);//将当前进程状态改为TASK_INTERRUPTIBLEinp = fds->in; outp = fds->out; exp = fds->ex;rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;for (i = 0; i < n; ++rinp, ++routp, ++rexp) {//遍历每个描述符unsigned long in, out, ex, all_bits, bit = 1, mask, j;unsigned long res_in = 0, res_out = 0, res_ex = 0;struct file_operations *f_op = NULL;struct file *file = NULL;in = *inp++; out = *outp++; ex = *exp++;all_bits = in | out | ex;if (all_bits == 0) {i += __NFDBITS;//如果这个字没有待查找的描述符, 跳过这个长字(32位)continue;}for (j = 0; j < __NFDBITS; ++j, ++i, bit <<= 1) {//遍历每个长字里的每个位if (i >= n)break;if (!(bit & all_bits))continue;file = fget(i);if (file) {f_op = file->f_op;mask = DEFAULT_POLLMASK;if (f_op && f_op->poll)/* 在这里循环调用所监测的fd_set内的所有文件描述符对应的驱动程序的poll函数 */mask = (*f_op->poll)(file, retval ? NULL : wait);fput(file);if ((mask & POLLIN_SET) && (in & bit)) {res_in |= bit;retval++;}if ((mask & POLLOUT_SET) && (out & bit)) {res_out |= bit;retval++;}if ((mask & POLLEX_SET) && (ex & bit)) {res_ex |= bit;retval++;}}}if (res_in)*rinp = res_in;if (res_out)*routp = res_out;if (res_ex)*rexp = res_ex;}wait = NULL;if (retval || !__timeout || signal_pending(current))break;if (table.error) {retval = table.error;break;}__timeout = schedule_timeout(__timeout);}__set_current_state(TASK_RUNNING);poll_freewait(&table);   /** Up-to-date the caller timeout.*/*timeout = __timeout;return retval;}

                                             
0 0
原创粉丝点击