ovs vswitchd的启动分析

来源:互联网 发布:淘宝考试以下不是催情 编辑:程序博客网 时间:2024/06/07 08:28

ovs vswitchd的启动

vswitchd启动代码可参考ovs-vswitchd.c的main函数,其中最重要的两个函数是bridge_run以及netdev_run

bridge_run

void bridge_run() {    ...    /* Initialize the ofproto library.  This only needs to run once, but     * it must be done after the configuration is set.  If the     * initialization has already occurred, bridge_init_ofproto()     * returns immediately. */    bridge_init_ofproto(cfg);    bridge_run__();    ...}

bridge_init_ofproto用于初始化ofproto-dpif组件,首先会从ovsdb记录中汇总bridge, port, interfaces等信息,之后针对这些调用ofproto_init

/* Must be called to initialize the ofproto library. * * The caller may pass in 'iface_hints', which contains an shash of * "iface_hint" elements indexed by the interface's name.  The provider * may use these hints to describe the startup configuration in order to * reinitialize its state.  The caller owns the provided data, so a * provider will make copies of anything required.  An ofproto provider * will remove any existing state that is not described by the hint, and * may choose to remove it all. */voidofproto_init(const struct shash *iface_hints){    struct shash_node *node;    size_t i;    ofproto_class_register(&ofproto_dpif_class);    /* Make a local copy, since we don't own 'iface_hints' elements. */    SHASH_FOR_EACH(node, iface_hints) {        const struct iface_hint *orig_hint = node->data;        struct iface_hint *new_hint = xmalloc(sizeof *new_hint);        const char *br_type = ofproto_normalize_type(orig_hint->br_type);        new_hint->br_name = xstrdup(orig_hint->br_name);        new_hint->br_type = xstrdup(br_type);        new_hint->ofp_port = orig_hint->ofp_port;        shash_add(&init_ofp_ports, node->name, new_hint);    }    for (i = 0; i < n_ofproto_classes; i++) {        ofproto_classes[i]->init(&init_ofp_ports);    }    ofproto_unixctl_init();}

ofproto_init首先注册ofproto_dpif_class,这个是ovs的ofproto-dpif的默认实现,之后对已经注册的所有ofproto_classes,都会调用ofproto_class->init函数。
ofproto_dpif_class是ovs的ofproto实现,在ofproto/ofproto-dpif.c中,这里先介绍init函数

static voidinit(const struct shash *iface_hints){    struct shash_node *node;    /* Make a local copy, since we don't own 'iface_hints' elements. */    SHASH_FOR_EACH(node, iface_hints) {        const struct iface_hint *orig_hint = node->data;        struct iface_hint *new_hint = xmalloc(sizeof *new_hint);        new_hint->br_name = xstrdup(orig_hint->br_name);        new_hint->br_type = xstrdup(orig_hint->br_type);        new_hint->ofp_port = orig_hint->ofp_port;        shash_add(&init_ofp_ports, node->name, new_hint);    }    ofproto_unixctl_init();    udpif_init();}

ofproto_unixctl_init,udpif_init都通过unixctl_command_register注册了多个ovs-appctl的命令参数,

bridge_run___最终都是调用的ofproto_dpif_class里注册的函数

static voidbridge_run__(void){    struct bridge *br;    struct sset types;    const char *type;    /* Let each datapath type do the work that it needs to do. */    sset_init(&types);    ofproto_enumerate_types(&types);    SSET_FOR_EACH (type, &types) {        ofproto_type_run(type);    }    sset_destroy(&types);    /* Let each bridge do the work that it needs to do. */    HMAP_FOR_EACH (br, node, &all_bridges) {        ofproto_run(br->ofproto);    }}

ofproto_enumerate_types实际调用的是ofproto_dpif_class里注册的enumerate_types函数,里面实际调用了dp_enumerate_types

dp_enumerate_types(struct sset *types){    struct shash_node *node;    dp_initialize();    ovs_mutex_lock(&dpif_mutex);    SHASH_FOR_EACH(node, &dpif_classes) {        const struct registered_dpif_class *registered_class = node->data;        sset_add(types, registered_class->dpif_class->type);    }    ovs_mutex_unlock(&dpif_mutex);}

dp_enumerate_types列出所有支持的dpif_class,目前主要有dpif_netdev_class以及dpif_netlink_class两类

ofproto_type_run实际调用了ofproto_dpif_class的type_run函数,里面有调用了dpif_class->run,以及udpif_run
struct udpif代表了ofproto_dpif的upcall handler结构体,包含两部分,一是struct handler的数组,用于处理upcall请求,可以看做是一个线程池,二是revalidators的数组,这块我没有细看,应该是一种类似gc机制的线程池,用于回收过期的flow

/* An upcall handler for ofproto_dpif. * * udpif keeps records of two kind of logically separate units: * * upcall handling * --------------- * *    - An array of 'struct handler's for upcall handling and flow *      installation. * * flow revalidation * ----------------- * *    - Revalidation threads which read the datapath flow table and maintains *      them. */struct udpif {    struct ovs_list list_node;         /* In all_udpifs list. */    struct dpif *dpif;                 /* Datapath handle. */    struct dpif_backer *backer;        /* Opaque dpif_backer pointer. */    struct handler *handlers;          /* Upcall handlers. */    size_t n_handlers;    struct revalidator *revalidators;  /* Flow revalidators. */    size_t n_revalidators;    struct latch exit_latch;           /* Tells child threads to exit. */    /* Revalidation. */    struct seq *reval_seq;             /* Incremented to force revalidation. */    bool reval_exit;                   /* Set by leader on 'exit_latch. */    struct ovs_barrier reval_barrier;  /* Barrier used by revalidators. */    struct dpif_flow_dump *dump;       /* DPIF flow dump state. */    long long int dump_duration;       /* Duration of the last flow dump. */    struct seq *dump_seq;              /* Increments each dump iteration. */    atomic_bool enable_ufid;           /* If true, skip dumping flow attrs. */    /* These variables provide a mechanism for the main thread to pause     * all revalidation without having to completely shut the threads down.     * 'pause_latch' is shared between the main thread and the lead     * revalidator thread, so when it is desirable to halt revalidation, the     * main thread will set the latch. 'pause' and 'pause_barrier' are shared     * by revalidator threads. The lead revalidator will set 'pause' when it     * observes the latch has been set, and this will cause all revalidator     * threads to wait on 'pause_barrier' at the beginning of the next     * revalidation round. */    bool pause;                        /* Set by leader on 'pause_latch. */    struct latch pause_latch;          /* Set to force revalidators pause. */    struct ovs_barrier pause_barrier;  /* Barrier used to pause all */                                       /* revalidators by main thread. */    /* Following fields are accessed and modified by different threads. */    atomic_uint flow_limit;            /* Datapath flow hard limit. */    /* n_flows_mutex prevents multiple threads updating these concurrently. */    atomic_uint n_flows;               /* Number of flows in the datapath. */    atomic_llong n_flows_timestamp;    /* Last time n_flows was updated. */    struct ovs_mutex n_flows_mutex;    /* Following fields are accessed and modified only from the main thread. */    struct unixctl_conn **conns;       /* Connections waiting on dump_seq. */    uint64_t conn_seq;                 /* Corresponds to 'dump_seq' when                                          conns[n_conns-1] was stored. */    size_t n_conns;                    /* Number of connections waiting. */};

struct handler以及struct revalidator

/* A thread that reads upcalls from dpif, forwards each upcall's packet, * and possibly sets up a kernel flow as a cache. */struct handler {    struct udpif *udpif;               /* Parent udpif. */    pthread_t thread;                  /* Thread ID. */    uint32_t handler_id;               /* Handler id. */};/* A thread that processes datapath flows, updates OpenFlow statistics, and * updates or removes them if necessary. * * Revalidator threads operate in two phases: "dump" and "sweep". In between * each phase, all revalidators sync up so that all revalidator threads are * either in one phase or the other, but not a combination. * *     During the dump phase, revalidators fetch flows from the datapath and *     attribute the statistics to OpenFlow rules. Each datapath flow has a *     corresponding ukey which caches the most recently seen statistics. If *     a flow needs to be deleted (for example, because it is unused over a *     period of time), revalidator threads may delete the flow during the *     dump phase. The datapath is not guaranteed to reliably dump all flows *     from the datapath, and there is no mapping between datapath flows to *     revalidators, so a particular flow may be handled by zero or more *     revalidators during a single dump phase. To avoid duplicate attribution *     of statistics, ukeys are never deleted during this phase. *     During the sweep phase, each revalidator takes ownership of a different *     slice of umaps and sweeps through all ukeys in those umaps to figure out *     whether they need to be deleted. During this phase, revalidators may *     fetch individual flows which were not dumped during the dump phase to *     validate them and attribute statistics. */struct revalidator {    struct udpif *udpif;               /* Parent udpif. */    pthread_t thread;                  /* Thread ID. */    unsigned int id;                   /* ovsthread_id_self(). */};

之后对每个bridge,调用ofproto_run


netdev_run

/* Performs periodic work needed by all the various kinds of netdevs. * * If your program opens any netdevs, it must call this function within its * main poll loop. */voidnetdev_run(void)    OVS_EXCLUDED(netdev_mutex){    netdev_initialize();    struct netdev_registered_class *rc;    CMAP_FOR_EACH (rc, cmap_node, &netdev_classes) {        if (rc->class->run) {            rc->class->run(rc->class);        }    }}

netdev_run会首先调用netdev_initialize,该函数用一个struct ovsthread_once代码块来管理,保证只会调用一次。其主要工作是注册了不同的netdev_class和tunnel_class

static voidnetdev_initialize(void)    OVS_EXCLUDED(netdev_mutex){    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;    if (ovsthread_once_start(&once)) {        fatal_signal_add_hook(restore_all_flags, NULL, NULL, true);        netdev_vport_patch_register();        netdev_register_provider(&netdev_linux_class);        netdev_register_provider(&netdev_internal_class);        netdev_register_provider(&netdev_tap_class);        netdev_vport_tunnel_register();        ovsthread_once_done(&once);    }}

netdev_vport_patch_register注册了patch port这么一类的vport(patch port用于连接不同的bridge,本身没有dpif的任何属性),netdev_vport_tunnel_register则注册了多种tunnel类型,e.g.

    static const struct vport_class vport_classes[] = {        TUNNEL_CLASS("geneve", "genev_sys", netdev_geneve_build_header,                                            netdev_tnl_push_udp_header,                                            netdev_geneve_pop_header),        TUNNEL_CLASS("gre", "gre_sys", netdev_gre_build_header,                                       netdev_gre_push_header,                                       netdev_gre_pop_header),        TUNNEL_CLASS("ipsec_gre", "gre_sys", NULL, NULL, NULL),        TUNNEL_CLASS("vxlan", "vxlan_sys", netdev_vxlan_build_header,                                           netdev_tnl_push_udp_header,                                           netdev_vxlan_pop_header),        TUNNEL_CLASS("lisp", "lisp_sys", NULL, NULL, NULL),        TUNNEL_CLASS("stt", "stt_sys", NULL, NULL, NULL),    };

除此之外,多个类型的netdev_class也被注册进来,包括dpdk的多个netdev_class

const struct netdev_class netdev_linux_class =    NETDEV_LINUX_CLASS(        "system",        netdev_linux_construct,        netdev_linux_get_stats,        netdev_linux_get_features,        netdev_linux_get_status);const struct netdev_class netdev_tap_class =    NETDEV_LINUX_CLASS(        "tap",        netdev_linux_construct_tap,        netdev_tap_get_stats,        netdev_linux_get_features,        netdev_linux_get_status);const struct netdev_class netdev_internal_class =    NETDEV_LINUX_CLASS(        "internal",        netdev_linux_construct,        netdev_internal_get_stats,        NULL,                  /* get_features */        netdev_internal_get_status);static const struct netdev_class dpdk_class =    NETDEV_DPDK_CLASS(        "dpdk",        netdev_dpdk_construct,        netdev_dpdk_destruct,        netdev_dpdk_set_config,        netdev_dpdk_set_tx_multiq,        netdev_dpdk_eth_send,        netdev_dpdk_get_carrier,        netdev_dpdk_get_stats,        netdev_dpdk_get_features,        netdev_dpdk_get_status,        netdev_dpdk_reconfigure,        netdev_dpdk_rxq_recv);static const struct netdev_class dpdk_ring_class =    NETDEV_DPDK_CLASS(        "dpdkr",        netdev_dpdk_ring_construct,        netdev_dpdk_destruct,        netdev_dpdk_ring_set_config,        netdev_dpdk_set_tx_multiq,        netdev_dpdk_ring_send,        netdev_dpdk_get_carrier,        netdev_dpdk_get_stats,        netdev_dpdk_get_features,        netdev_dpdk_get_status,        netdev_dpdk_reconfigure,        netdev_dpdk_rxq_recv);static const struct netdev_class dpdk_vhost_class =    NETDEV_DPDK_CLASS(        "dpdkvhostuser",        netdev_dpdk_vhost_construct,        netdev_dpdk_vhost_destruct,        NULL,        NULL,        netdev_dpdk_vhost_send,        netdev_dpdk_vhost_get_carrier,        netdev_dpdk_vhost_get_stats,        NULL,        NULL,        netdev_dpdk_vhost_reconfigure,        netdev_dpdk_vhost_rxq_recv);

最后对于每一类注册的netdev_class,都会调用run函数,以netdev_linux_class为例(dpdk_class的run函数为空),主要是通过netlink sock来定期做一些更新的工作,不细说了

0 0
原创粉丝点击