poll源码剖析

来源：互联网发布：如何网络共享文件夹编辑：程序博客网时间：2024/06/16 07:55

下雨天最适合剖源码了→_→**老套路，在看源码之前，先来看看poll源码中的重要数据结构。。。

struct poll_wqueues {    poll_table pt;//实质上只有一个函数指针    struct poll_table_page * table;//记录poll_table_entry结构的第一个元素的地址    int error;};

struct poll_table_page {    struct poll_table_page * next;    struct poll_table_entry * entry;//用于将current挂到设备等待队列上    struct poll_table_entry entries[0];};

struct poll_table_entry {    struct file * filp;    wait_queue_t wait;//通过wait将当前进程挂到设备等待队列上    wait_queue_head_t * wait_address;//等待队列};

//指向链表结点，每个链表的结点通常是（4k）struct poll_list {    struct poll_list *next;    int len;    struct pollfd entries[0];//记录fd的信息};

struct pollfd {    int fd;//文件描述符    short events;//请求检测的事件    short revents;//表示检测之后返回的事件，如果当某个文件描述符有状态变化时，revents值不为0};

poll系统调用底层实现是通过sys_poll函数实现的，现在就看看sys_poll函数吧~

asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long timeout){    struct poll_wqueues table;    int fdcount, err;    unsigned int i;    struct poll_list *head;    struct poll_list *walk;    /* 用户给的nfds的大小不能超过一个struct file结构支持的最大fd数（默认为256） */    if (nfds > current->files->max_fdset && nfds > OPEN_MAX)        return -EINVAL;    if (timeout) {        /* Careful about overflow in the intermediate values */        if ((unsigned long) timeout < MAX_SCHEDULE_TIMEOUT / HZ)            timeout = (unsigned long)(timeout*HZ+999)/1000+1;        else /* Negative or overflow */            timeout = MAX_SCHEDULE_TIMEOUT;    }    poll_initwait(&table);//初始化table变量    head = NULL;    walk = NULL;    i = nfds;//描述符个数    err = -ENOMEM;    while(i!=0) {//建立链表        struct poll_list *pp;        //一个poll_list和i个pollfd结构        pp = kmalloc(sizeof(struct poll_list)+                sizeof(struct pollfd)*                (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i),                    GFP_KERNEL);        if(pp==NULL)            goto out_fds;        pp->next=NULL;        pp->len = (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i);        if (head == NULL)            head = pp;        else            walk->next = pp;        walk = pp;        /*将描述符考入内核*/        if (copy_from_user(pp->entries, ufds + nfds-i,                 sizeof(struct pollfd)*pp->len)) {            err = -EFAULT;            goto out_fds;        }        i -= pp->len;    }    //检测是否有fd就绪    fdcount = do_poll(nfds, head, &table, timeout);    walk = head;    err = -EFAULT;    while(walk != NULL) {        struct pollfd *fds = walk->entries;        int j;//从内核态拷贝到用户态        for (j=0; j < walk->len; j++, ufds++) {            if(__put_user(fds[j].revents, &ufds->revents))                goto out_fds;        }        walk = walk->next;    }    err = fdcount;    if (!fdcount && signal_pending(current))        err = -EINTR;

void poll_initwait(struct poll_wqueues *pwq){//把table变量的poll_tabel变量对应的回调函数置为_pollwait    init_poll_funcptr(&pwq->pt, __pollwait);    pwq->error = 0;    pwq->table = NULL;}//回调函数/*一次__pollwait即一次设备poll调用只创建一个poll_table_entry结构，并通过poll_table_entry的wait成员，将current挂到设备等待队列上*/void __pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *_p){//table记录当前进程由poll_table_page结构组成的单链的第一个可用结点    struct poll_wqueues *p = container_of(_p, struct poll_wqueues, pt);    struct poll_table_page *table = p->table;//如果单链不存在或所有poll_table_page结构的页面都被poll_table_entry结构使用，即没有空闲空间时    if (!table || POLL_TABLE_FULL(table)) {        struct poll_table_page *new_table;  //为其分配一个新的页面，扩充其容量        new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);        if (!new_table) {            p->error = -ENOMEM;            __set_current_state(TASK_RUNNING);            return;        }          //设置新poll_table_page结构页面的第一个可用poll_table_entry结构为poll_table_entry结构数组的第一个元素        new_table->entry = new_table->entries;        //poll_table_page结构页面的单链表进行更新        new_table->next = table;          //当前进程的poll_table结构成员进行更新        p->table = new_table;        table = new_table;    }    /* Add a new entry 文件描述符节点*/    {  //获取当前进程的第一个空闲poll_table_entry结构        struct poll_table_entry * entry = table->entry;          //更新第一个空闲poll_table_entry结构        table->entry = entry+1;        //对该文件的引用计数加1        get_file(filp);          //将此poll_table_entry结构的filp成员设置为该文件        entry->filp = filp;        //将此poll_table_entry结构的wait_address成员，即等待队列的队头设置为该文件的等待队列的队头        entry->wait_address = wait_address;     //将此poll_table_entry结构的wait成员，即每个进程对应的wait_queue_t结构，将其中的task_struck结构设置为当前进程的task_struck        init_waitqueue_entry(&entry->wait, current);        //将该进程对应的wait_queue_t结构链入该文件的等待队列中        add_wait_queue(wait_address,&entry->wait);    }}

//实际上__pollwait就创建了下面所示的数据结构
这里写图片描述

static int do_poll(unsigned int nfds,  struct poll_list *list,            struct poll_wqueues *wait, long timeout){    int count = 0;    poll_table* pt = &wait->pt;    if (!timeout)        pt = NULL;    for (;;) {        struct poll_list *walk;        set_current_state(TASK_INTERRUPTIBLE);        walk = list;        while(walk != NULL) {            do_pollfd( walk->len, walk->entries, &pt, &count);            walk = walk->next;        }        pt = NULL;        if (count || !timeout || signal_pending(current))            break;        count = wait->error;        if (count)//count>0            break;        timeout = schedule_timeout(timeout);    }    __set_current_state(TASK_RUNNING);    return count;}//可以看出，do_poll函数，主要是在循环内等待，知道count>0才跳出，而count主要是靠do_pollfd来处理的。static void do_pollfd(unsigned int num, struct pollfd * fdpage,    poll_table ** pwait, int *count){    int i;    for (i = 0; i < num; i++) {        int fd;        unsigned int mask;        struct pollfd *fdp;        mask = 0;        fdp = fdpage+i;        fd = fdp->fd;        if (fd >= 0) {            struct file * file = fget(fd);            mask = POLLNVAL;//描述符不是一个打开的文件            if (file != NULL) {                mask = DEFAULT_POLLMASK; //此时需要判断该文件是否支持操作和poll操作                if (file->f_op && file->f_op->poll)                    mask = file->f_op->poll(file, *pwait);                    //如果支持，就调用该类型文件的poll操作所对应的回调函数，并传入该文件结构体和该进程所对应的wait_queue_t结构，mask记录返回值                    //所以真正的查询由poll回调函数完成                mask &= fdp->events | POLLERR | POLLHUP;                fput(file);            }            if (mask) {                *pwait = NULL;                (*count)++;            }        }        fdp->revents = mask;//修改revents的值    }}

让图片带你飞~~

这里写图片描述
没看懂的再跟着图片再来一遍哦~~

阅读全文

0 0