【OVS2.5.0源码分析】normal action精确流表生成和刷新过程分析

来源:互联网 发布:书生阅读器windows版 编辑:程序博客网 时间:2024/06/15 15:00

一、首包精确流表生成

首包到达OVS交换机时,尚未建立基于目的mac的流表规则,需要upcall到用户态进行学习,此时生成的规则是把报文flood到其他端口。

1、upcall_xlate函数

    upcall->dump_seq = seq_read(udpif->dump_seq);    upcall->reval_seq = seq_read(udpif->reval_seq);    xlate_actions(&xin, &upcall->xout);     upcall->xout_initialized = true;

2、xlate_actions函数

<pre name="code" class="cpp">   /* The bridge is now known so obtain its table version. */    ctx.tables_version = ofproto_dpif_get_tables_version(ctx.xbridge->ofproto);    if (!xin->ofpacts && !ctx.rule) {        ctx.rule = rule_dpif_lookup_from_table(       //检索流表            ctx.xbridge->ofproto, ctx.tables_version, flow, xin->wc,            ctx.xin->resubmit_stats, &ctx.table_id,            flow->in_port.ofp_port, true, true);        if (ctx.xin->resubmit_stats) {            rule_dpif_credit_stats(ctx.rule, ctx.xin->resubmit_stats);        }        if (ctx.xin->xcache) {            struct xc_entry *entry;            entry = xlate_cache_add_entry(ctx.xin->xcache, XC_RULE);            entry->u.rule = ctx.rule;            rule_dpif_ref(ctx.rule);        }        if (OVS_UNLIKELY(ctx.xin->resubmit_hook)) {            ctx.xin->resubmit_hook(ctx.xin, ctx.rule, 0);        }    }    xout->fail_open = ctx.rule && rule_dpif_is_fail_open(ctx.rule);    /* Get the proximate input port of the packet.  (If xin->recirc,     * flow->in_port is the ultimate input port of the packet.) */    struct xport *in_port = get_ofp_port(xbridge,                                         ctx.base_flow.in_port.ofp_port);    /* Tunnel stats only for non-recirculated packets. */    if (!xin->recirc && in_port && in_port->is_tunnel) {        if (ctx.xin->resubmit_stats) {            netdev_vport_inc_rx(in_port->netdev, ctx.xin->resubmit_stats);            if (in_port->bfd) {                bfd_account_rx(in_port->bfd, ctx.xin->resubmit_stats);            }        }        if (ctx.xin->xcache) {            struct xc_entry *entry;            entry = xlate_cache_add_entry(ctx.xin->xcache, XC_NETDEV);            entry->u.dev.rx = netdev_ref(in_port->netdev);            entry->u.dev.bfd = bfd_ref(in_port->bfd);        }    }    if (!xin->recirc && process_special(&ctx, in_port)) {        /* process_special() did all the processing for this packet.         *         * We do not perform special processing on recirculated packets, as         * recirculated packets are not really received by the bridge.*/    } else if (in_port && in_port->xbundle               && xbundle_mirror_out(xbridge, in_port->xbundle)) {        if (ctx.xin->packet != NULL) {            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);            VLOG_WARN_RL(&rl, "bridge %s: dropping packet received on port "                         "%s, which is reserved exclusively for mirroring",                         ctx.xbridge->name, in_port->xbundle->name);        }    } else {        /* Sampling is done only for packets really received by the bridge. */        unsigned int user_cookie_offset = 0;        if (!xin->recirc) {            user_cookie_offset = compose_sflow_action(&ctx);            compose_ipfix_action(&ctx, ODPP_NONE);        }        size_t sample_actions_len = ctx.odp_actions->size;        if (tnl_process_ecn(flow)            && (!in_port || may_receive(in_port, &ctx))) {            const struct ofpact *ofpacts;            size_t ofpacts_len;            if (xin->ofpacts) {                ofpacts = xin->ofpacts;                ofpacts_len = xin->ofpacts_len;            } else if (ctx.rule) {                const struct rule_actions *actions                    = rule_dpif_get_actions(ctx.rule);                ofpacts = actions->ofpacts;                ofpacts_len = actions->ofpacts_len;                ctx.rule_cookie = rule_dpif_get_flow_cookie(ctx.rule);            } else {                OVS_NOT_REACHED();            }            mirror_ingress_packet(&ctx);            do_xlate_actions(ofpacts, ofpacts_len, &ctx);      //把openflow流表转化为精确流表            if (ctx.error) {                goto exit;            }            /* We've let OFPP_NORMAL and the learning action look at the             * packet, so drop it now if forwarding is disabled. */            if (in_port && (!xport_stp_forward_state(in_port) ||                            !xport_rstp_forward_state(in_port))) {                /* Drop all actions added by do_xlate_actions() above. */                ctx.odp_actions->size = sample_actions_len;                /* Undo changes that may have been done for recirculation. */                if (exit_recirculates(&ctx)) {                    ctx.action_set.size = ctx.recirc_action_offset;                    ctx.recirc_action_offset = -1;                    ctx.last_unroll_offset = -1;                }            } else if (ctx.action_set.size) {                /* Translate action set only if not dropping the packet and                 * not recirculating. */                if (!exit_recirculates(&ctx)) {                    xlate_action_set(&ctx);                }            }            /* Check if need to recirculate. */            if (exit_recirculates(&ctx)) {                compose_recirculate_action(&ctx);            }        }

3、do_xlate_actions函数

        switch (a->type) {        case OFPACT_OUTPUT:            xlate_output_action(ctx, ofpact_get_OUTPUT(a)->port,                                ofpact_get_OUTPUT(a)->max_len, true);            break;

4、xlate_output_action函数

    switch (port) {    case OFPP_IN_PORT:        compose_output_action(ctx, ctx->xin->flow.in_port.ofp_port, NULL);        break;    case OFPP_TABLE:        xlate_table_action(ctx, ctx->xin->flow.in_port.ofp_port,                           0, may_packet_in, true);        break;    case OFPP_NORMAL:        xlate_normal(ctx);      //normal规则        break;    case OFPP_FLOOD:        flood_packets(ctx,  false);        break;

5、xlate_normal函数

        ovs_rwlock_unlock(&ms->rwlock);    } else {        ovs_rwlock_rdlock(&ctx->xbridge->ml->rwlock);        mac = mac_learning_lookup(ctx->xbridge->ml, flow->dl_dst, vlan);        mac_port = mac ? mac_entry_get_port(ctx->xbridge->ml, mac) : NULL;        ovs_rwlock_unlock(&ctx->xbridge->ml->rwlock);        if (mac_port) {            struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);            struct xbundle *mac_xbundle = xbundle_lookup(xcfg, mac_port);            if (mac_xbundle && mac_xbundle != in_xbundle) {                xlate_report(ctx, "forwarding to learned port");                output_normal(ctx, mac_xbundle, vlan);                 //能够匹配到mac,确定出端口            } else if (!mac_xbundle) {                xlate_report(ctx, "learned port is unknown, dropping");            } else {                xlate_report(ctx, "learned port is input port, dropping");            }        } else {            xlate_report(ctx, "no learned MAC for destination, flooding");            xlate_normal_flood(ctx, in_xbundle, vlan);      //初始,会flood到其他端口;        }

二、后续报文规则刷新

当首包的响应报文到达交换机后,那么源报文的流表可以刷新了(因为知道目的mac在哪个端口),OVS是通过revalidate线程来刷新流表的。

1、revalidate函数

static voidrevalidate(struct revalidator *revalidator){    uint64_t odp_actions_stub[1024 / 8];    struct ofpbuf odp_actions = OFPBUF_STUB_INITIALIZER(odp_actions_stub);    struct udpif *udpif = revalidator->udpif;    struct dpif_flow_dump_thread *dump_thread;    uint64_t dump_seq, reval_seq;    unsigned int flow_limit;    dump_seq = seq_read(udpif->dump_seq);    reval_seq = seq_read(udpif->reval_seq);    atomic_read_relaxed(&udpif->flow_limit, &flow_limit);    dump_thread = dpif_flow_dump_thread_create(udpif->dump);    for (;;) {        struct ukey_op ops[REVALIDATE_MAX_BATCH];        int n_ops = 0;        struct dpif_flow flows[REVALIDATE_MAX_BATCH];        const struct dpif_flow *f;        int n_dumped;        long long int max_idle;        long long int now;        size_t n_dp_flows;        bool kill_them_all;        n_dumped = dpif_flow_dump_next(dump_thread, flows, ARRAY_SIZE(flows));    //从datapath中获取流表        if (!n_dumped) {            break;        }        now = time_msec();        /* In normal operation we want to keep flows around until they have         * been idle for 'ofproto_max_idle' milliseconds.  However:         *         *     - If the number of datapath flows climbs above 'flow_limit',         *       drop that down to 100 ms to try to bring the flows down to         *       the limit.         *         *     - If the number of datapath flows climbs above twice         *       'flow_limit', delete all the datapath flows as an emergency         *       measure.  (We reassess this condition for the next batch of         *       datapath flows, so we will recover before all the flows are         *       gone.) */        n_dp_flows = udpif_get_n_flows(udpif);        kill_them_all = n_dp_flows > flow_limit * 2;        max_idle = n_dp_flows > flow_limit ? 100 : ofproto_max_idle;        for (f = flows; f < &flows[n_dumped]; f++) {            long long int used = f->stats.used;            struct recirc_refs recircs = RECIRC_REFS_EMPTY_INITIALIZER;            enum reval_result result;            struct udpif_key *ukey;            bool already_dumped;            int error;            if (ukey_acquire(udpif, f, &ukey, &error)) {                if (error == EBUSY) {                    /* Another thread is processing this flow, so don't bother                     * processing it.*/                    COVERAGE_INC(upcall_ukey_contention);                } else {                    log_unexpected_flow(f, error);                    if (error != ENOENT) {                        delete_op_init__(udpif, &ops[n_ops++], f);                    }                }                continue;            }            already_dumped = ukey->dump_seq == dump_seq;            if (already_dumped) {                /* The flow has already been handled during this flow dump                 * operation. Skip it. */                if (ukey->xcache) {                    COVERAGE_INC(dumped_duplicate_flow);                } else {                    COVERAGE_INC(dumped_new_flow);                }                ovs_mutex_unlock(&ukey->mutex);                continue;            }            if (!used) {                used = ukey->created;            }            if (kill_them_all || (used && used < now - max_idle)) {                result = UKEY_DELETE;            } else {                result = revalidate_ukey(udpif, ukey, &f->stats, &odp_actions,     //比较流表是否有变化                                         reval_seq, &recircs);            }            ukey->dump_seq = dump_seq;            ukey->flow_exists = result != UKEY_DELETE;            if (result != UKEY_KEEP) {                /* Takes ownership of 'recircs'. */                reval_op_init(&ops[n_ops++], result, udpif, ukey, &recircs,                              &odp_actions);            }            ovs_mutex_unlock(&ukey->mutex);        }        if (n_ops) {            push_ukey_ops__(udpif, ops, n_ops);      //是否有刷新则刷新流表        }        ovsrcu_quiesce();    }    dpif_flow_dump_thread_destroy(dump_thread);    ofpbuf_uninit(&odp_actions);}

2、revalidate_ukey函数

static enum reval_resultrevalidate_ukey(struct udpif *udpif, struct udpif_key *ukey,                const struct dpif_flow_stats *stats,                struct ofpbuf *odp_actions, uint64_t reval_seq,                struct recirc_refs *recircs)    OVS_REQUIRES(ukey->mutex){    struct xlate_out xout, *xoutp;    struct netflow *netflow;    struct ofproto_dpif *ofproto;    struct dpif_flow_stats push;    struct flow flow;    struct flow_wildcards dp_mask, wc;    enum reval_result result;    ofp_port_t ofp_in_port;    struct xlate_in xin;    long long int last_used;    int error;    bool need_revalidate;    result = UKEY_DELETE;    xoutp = NULL;    netflow = NULL;    ofpbuf_clear(odp_actions);    need_revalidate = (ukey->reval_seq != reval_seq);    last_used = ukey->stats.used;    push.used = stats->used;    push.tcp_flags = stats->tcp_flags;    push.n_packets = (stats->n_packets > ukey->stats.n_packets                      ? stats->n_packets - ukey->stats.n_packets                      : 0);    push.n_bytes = (stats->n_bytes > ukey->stats.n_bytes                    ? stats->n_bytes - ukey->stats.n_bytes                    : 0);    if (need_revalidate && last_used        && !should_revalidate(udpif, push.n_packets, last_used)) {        goto exit;    }    /* We will push the stats, so update the ukey stats cache. */    ukey->stats = *stats;    if (!push.n_packets && !need_revalidate) {        result = UKEY_KEEP;        goto exit;    }    if (ukey->xcache && !need_revalidate) {        xlate_push_stats(ukey->xcache, &push);        result = UKEY_KEEP;        goto exit;    }    if (odp_flow_key_to_flow(ukey->key, ukey->key_len, &flow)        == ODP_FIT_ERROR) {        goto exit;    }    error = xlate_lookup(udpif->backer, &flow, &ofproto, NULL, NULL, &netflow,                         &ofp_in_port);    if (error) {        goto exit;    }    if (need_revalidate) {        xlate_cache_clear(ukey->xcache);    }    if (!ukey->xcache) {        ukey->xcache = xlate_cache_new();    }    xlate_in_init(&xin, ofproto, &flow, ofp_in_port, NULL, push.tcp_flags,                  NULL, need_revalidate ? &wc : NULL, odp_actions);    if (push.n_packets) {        xin.resubmit_stats = &push;        xin.may_learn = true;    }    xin.xcache = ukey->xcache;    xlate_actions(&xin, &xout);      //action学习,由于此时可以确认目的mac的端口,所以生成的action会不同    xoutp = &xout;    if (!need_revalidate) {        result = UKEY_KEEP;        goto exit;    }    if (xout.slow) {        ofpbuf_clear(odp_actions);        compose_slow_path(udpif, &xout, &flow, flow.in_port.odp_port,                          odp_actions);    }    if (odp_flow_key_to_mask(ukey->mask, ukey->mask_len, ukey->key,                             ukey->key_len, &dp_mask, &flow)        == ODP_FIT_ERROR) {        goto exit;    }    /* Do not modify if any bit is wildcarded by the installed datapath flow,     * but not the newly revalidated wildcard mask (wc), i.e., if revalidation     * tells that the datapath flow is now too generic and must be narrowed     * down.  Note that we do not know if the datapath has ignored any of the     * wildcarded bits, so we may be overtly conservative here. */    if (flow_wildcards_has_extra(&dp_mask, &wc)) {        goto exit;    }    if (!ofpbuf_equal(odp_actions,                      ovsrcu_get(struct ofpbuf *, &ukey->actions))) {     //判断action是否有差异,有则需要刷新        /* The datapath mask was OK, but the actions seem to have changed.         * Let's modify it in place. */        result = UKEY_MODIFY;        /* Transfer recirc action ID references to the caller. */        recirc_refs_swap(recircs, &xoutp->recircs);        goto exit;    }    result = UKEY_KEEP;exit:    if (result != UKEY_DELETE) {        ukey->reval_seq = reval_seq;    }    if (netflow && result == UKEY_DELETE) {        netflow_flow_clear(netflow, &flow);    }    xlate_out_uninit(xoutp);    return result;}

0 0
原创粉丝点击