【OVS2.5.0源码分析】ovsd进程运行机制分析(1)

来源:互联网 发布:php布尔类型 编辑:程序博客网 时间:2024/06/05 06:38

ovsd作为ovs的用户态管理主进程,负责处理ovs-vsctl命令接口、ovs-ofctl命令接口、与controller交互等等。 本篇分析它的整个处理机制。

1、main函数

    bridge_init(remote);    free(remote);    exiting = false;    while (!exiting) {     //while循环,直到退出        memory_run();        if (memory_should_report()) {            struct simap usage;            simap_init(&usage);            bridge_get_memory_usage(&usage);            memory_report(&usage);            simap_destroy(&usage);        }        bridge_run();                    //处理controller交互,ovs-ofctl命令        unixctl_server_run(unixctl);     //处理ovs-vsctl命令        netdev_run();        memory_wait();        bridge_wait();        unixctl_server_wait(unixctl);        netdev_wait();        if (exiting) {            poll_immediate_wake();        }        poll_block();                     //阻塞,当没有请求处理的时候,阻塞在此处        if (should_service_stop()) {            exiting = true;        }    }    bridge_exit();    unixctl_server_destroy(unixctl);    service_stop();
2、poll_block函数

voidpoll_block(void){    struct poll_loop *loop = poll_loop();     //ovsd进程携带的poll信息,其中最重要的就是fd信息    struct poll_node *node;    struct pollfd *pollfds;    HANDLE *wevents = NULL;    int elapsed;    int retval;    int i;    /* Register fatal signal events before actually doing any real work for     * poll_block. */    fatal_signal_wait();    if (loop->timeout_when == LLONG_MIN) {        COVERAGE_INC(poll_zero_timeout);    }    timewarp_run();    pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds);#ifdef _WIN32    wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents);#endif    /* Populate with all the fds and events. */    i = 0;    HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) {        pollfds[i] = node->pollfd;#ifdef _WIN32        wevents[i] = node->wevent;        if (node->pollfd.fd && node->wevent) {            short int wsa_events = 0;            if (node->pollfd.events & POLLIN) {                wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE;            }            if (node->pollfd.events & POLLOUT) {                wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE;            }            WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events);        }#endif        i++;    }    retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), wevents,<span style="white-space:pre"></span>//time_poll会最终调用linux的poll函数                       loop->timeout_when, &elapsed);    if (retval < 0) {        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);        VLOG_ERR_RL(&rl, "poll: %s", ovs_strerror(-retval));    } else if (!retval) {        log_wakeup(loop->timeout_where, NULL, elapsed);    } else if (get_cpu_usage() > 50 || VLOG_IS_DBG_ENABLED()) {        i = 0;        HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) {            if (pollfds[i].revents) {                log_wakeup(node->where, &pollfds[i], 0);            }            i++;        }    }    free_poll_nodes(loop);    loop->timeout_when = LLONG_MAX;    loop->timeout_where = NULL;    free(pollfds);    free(wevents);    /* Handle any pending signals before doing anything else. */    fatal_signal_run();    seq_woke();}
3、time_poll函数

inttime_poll(struct pollfd *pollfds, int n_pollfds, HANDLE *handles OVS_UNUSED,          long long int timeout_when, int *elapsed){    long long int *last_wakeup = last_wakeup_get();    long long int start;    bool quiescent;    int retval = 0;    time_init();    coverage_clear();    coverage_run();    if (*last_wakeup && !thread_is_pmd()) {        log_poll_interval(*last_wakeup);    }    start = time_msec();    timeout_when = MIN(timeout_when, deadline);    quiescent = ovsrcu_is_quiescent();    for (;;) {        long long int now = time_msec();        int time_left;        if (now >= timeout_when) {            time_left = 0;        } else if ((unsigned long long int) timeout_when - now > INT_MAX) {            time_left = INT_MAX;        } else {            time_left = timeout_when - now;        }        if (!quiescent) {            if (!time_left) {                ovsrcu_quiesce();            } else {                ovsrcu_quiesce_start();            }        }#ifndef _WIN32        retval = poll(pollfds, n_pollfds, time_left);    //调用linux的poll函数        if (retval < 0) {            retval = -errno;        }#else        if (n_pollfds > MAXIMUM_WAIT_OBJECTS) {            VLOG_ERR("Cannot handle more than maximum wait objects\n");        } else if (n_pollfds != 0) {            retval = WaitForMultipleObjects(n_pollfds, handles, FALSE,                                            time_left);        }        if (retval < 0) {            /* XXX This will be replace by a win error to errno               conversion function */            retval = -WSAGetLastError();            retval = -EINVAL;        }#endif        if (!quiescent && time_left) {            ovsrcu_quiesce_end();        }        if (deadline <= time_msec()) {#ifndef _WIN32            fatal_signal_handler(SIGALRM);#else            VLOG_ERR("wake up from WaitForMultipleObjects after deadline");            fatal_signal_handler(SIGTERM);#endif            if (retval < 0) {                retval = 0;            }            break;        }        if (retval != -EINTR) {            break;        }    }    *last_wakeup = time_msec();    refresh_rusage();    *elapsed = *last_wakeup - start;    return retval;}

由此可以看到,poll_block会最终调用linux poll函数,那么会监听哪些fd呢,这些fd是合适被放到进程信息中的呢,以下以ofservice为例作为说明,其他的句柄也是类似,只是添加的函数略有不同而已。

bridge_wait函数

voidbridge_wait(void){    struct sset types;    const char *type;    ovsdb_idl_wait(idl);    if (daemonize_txn) {        ovsdb_idl_txn_wait(daemonize_txn);    }    if_notifier_wait();    if (ifaces_changed) {        poll_immediate_wake();    }    sset_init(&types);    ofproto_enumerate_types(&types);    SSET_FOR_EACH (type, &types) {        ofproto_type_wait(type);    }    sset_destroy(&types);    if (!hmap_is_empty(&all_bridges)) {        struct bridge *br;        HMAP_FOR_EACH (br, node, &all_bridges) {            ofproto_wait(br->ofproto);                //调用入口        }        stats_update_wait();        status_update_wait();    }    system_stats_wait();}
ofproto_wait函数

voidofproto_wait(struct ofproto *p){    p->ofproto_class->wait(p);    if (p->ofproto_class->port_poll_wait) {        p->ofproto_class->port_poll_wait(p);    }    seq_wait(connectivity_seq_get(), p->change_seq);    connmgr_wait(p->connmgr);    //调用入口}
connmgr_wait函数

voidconnmgr_wait(struct connmgr *mgr){    struct ofservice *ofservice;    struct ofconn *ofconn;    size_t i;    LIST_FOR_EACH (ofconn, node, &mgr->all_conns) {        ofconn_wait(ofconn);    }    ofmonitor_wait(mgr);    if (mgr->in_band) {        in_band_wait(mgr->in_band);    }    if (mgr->fail_open) {        fail_open_wait(mgr->fail_open);    }    HMAP_FOR_EACH (ofservice, node, &mgr->services) {        pvconn_wait(ofservice->pvconn);     //监听ofservice句柄    }    for (i = 0; i < mgr->n_snoops; i++) {        pvconn_wait(mgr->snoops[i]);    }}
pvconn_wait函数

voidpvconn_wait(struct pvconn *pvconn){    (pvconn->pvclass->wait)(pvconn);//实际调用pvconn_pstream_wait函数}
pvconn_pstream_wait函数

static voidpvconn_pstream_wait(struct pvconn *pvconn){    struct pvconn_pstream *ps = pvconn_pstream_cast(pvconn);    pstream_wait(ps->pstream);}
pstream_wait函数

voidpstream_wait(struct pstream *pstream){    (pstream->class->wait)(pstream);//实际调用fd_pstream_class的pfd_wait函数}
pfd_wait函数

static voidpfd_wait(struct pstream *pstream){    struct fd_pstream *ps = fd_pstream_cast(pstream);    poll_fd_wait(ps->fd, POLLIN);    //该fd就是介绍ofservice服务建立时打开的句柄}
poll_fd_wait函数

#define poll_fd_wait(fd, events) poll_fd_wait_at(fd, events, OVS_SOURCE_LOCATOR)
poll_fd_wait_at函数

/* Registers 'fd' as waiting for the specified 'events' (which should be POLLIN * or POLLOUT or POLLIN | POLLOUT).  The following call to poll_block() will * wake up when 'fd' becomes ready for one or more of the requested events. * * On Windows, 'fd' must be a socket. * * The event registration is one-shot: only the following call to poll_block() * is affected.  The event will need to be re-registered after poll_block() is * called if it is to persist. * * ('where' is used in debug logging.  Commonly one would use poll_fd_wait() to * automatically provide the caller's source file and line number for * 'where'.) */voidpoll_fd_wait_at(int fd, short int events, const char *where){    poll_create_node(fd, 0, events, where);}
poll_create_node函数

static voidpoll_create_node(int fd, HANDLE wevent, short int events, const char *where){    struct poll_loop *loop = poll_loop();//进程poll_loop信息    struct poll_node *node;    COVERAGE_INC(poll_create_node);    /* Both 'fd' and 'wevent' cannot be set. */    ovs_assert(!fd != !wevent);    /* Check for duplicate.  If found, "or" the events. */    node = find_poll_node(loop, fd, wevent);    if (node) {        node->pollfd.events |= events;//如果已监听该fd,则增加监听的event    } else {        node = xzalloc(sizeof *node);//新建poll_node,并插入到poll_loop中        hmap_insert(&loop->poll_nodes, &node->hmap_node,                    hash_2words(fd, (uint32_t)wevent));        node->pollfd.fd = fd;        node->pollfd.events = events;#ifdef _WIN32        if (!wevent) {            wevent = CreateEvent(NULL, FALSE, FALSE, NULL);        }#endif        node->wevent = wevent;        node->where = where;    }}
至此,了解了句柄是如何添加到poll_loop中,并通过poll_block进行监听,其他句柄的监听也是大同小异,不再赘述。









0 0