One of the major tasks of ofproto-dpif is to translate OpenFlow actions into "ODP" datapath actions. These translations are essentially a cache that requires revalidation when certain state changes occur. For best performance it's important to revalidate flows only when necessary, so from time to time Open vSwitch has gotten this wrong, which meant that stale flows could persist in the kernel and cause surprising behavior.
This commit implements a simple "self check": every trip through the Open vSwitch main loop randomly chooses one flow entry and checks that its actions have been correctly translated. If not, Open vSwitch logs the details of the problem. This should help find problems more quickly in the future. Signed-off-by: Ben Pfaff <b...@nicira.com> --- ofproto/ofproto-dpif.c | 163 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 163 insertions(+), 0 deletions(-) diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index f131ebd..924cd69 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -313,6 +313,7 @@ static struct facet *facet_find(struct ofproto_dpif *, const struct flow *); static struct facet *facet_lookup_valid(struct ofproto_dpif *, const struct flow *); static bool facet_revalidate(struct facet *); +static bool facet_check_consistency(struct facet *); static bool execute_controller_action(struct ofproto_dpif *, const struct flow *, const struct nlattr *odp_actions, @@ -373,6 +374,8 @@ static struct subfacet *subfacet_find(struct ofproto_dpif *, const struct nlattr *key, size_t key_len); static void subfacet_destroy(struct subfacet *); static void subfacet_destroy__(struct subfacet *); +static void subfacet_get_key(struct subfacet *, struct odputil_keybuf *, + struct ofpbuf *key); static void subfacet_reset_dp_stats(struct subfacet *, struct dpif_flow_stats *); static void subfacet_update_time(struct subfacet *, long long int used); @@ -814,6 +817,19 @@ run(struct ofproto *ofproto_) } } + /* Check the consistency of a random facet, to aid debugging. */ + if (!hmap_is_empty(&ofproto->facets) && !ofproto->need_revalidate) { + struct facet *facet; + + facet = CONTAINER_OF(hmap_random_node(&ofproto->facets), + struct facet, hmap_node); + if (!tag_set_intersects(&ofproto->revalidate_set, facet->tags)) { + if (!facet_check_consistency(facet)) { + ofproto->need_revalidate = true; + } + } + } + return 0; } @@ -3340,6 +3356,116 @@ facet_lookup_valid(struct ofproto_dpif *ofproto, const struct flow *flow) return facet; } +static bool +facet_check_consistency(struct facet *facet) +{ + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 15); + + struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto); + + struct rule_dpif *rule; + struct subfacet *subfacet; + bool ok; + + COVERAGE_INC(facet_revalidate); + + /* Check the rule for consistency. */ + rule = rule_dpif_lookup(ofproto, &facet->flow, 0); + if (!rule) { + if (!VLOG_DROP_WARN(&rl)) { + char *s = flow_to_string(&facet->flow); + VLOG_WARN("%s: facet should not exist", s); + free(s); + } + return false; + } else if (rule != facet->rule) { + struct ds s; + + flow_format(&s, &facet->flow); + ds_put_format(&s, ": facet associated with wrong rule (was " + "table=%"PRIu8",", facet->rule->up.table_id); + cls_rule_format(&facet->rule->up.cr, &s); + ds_put_format(&s, ") (should have been table=%"PRIu8",", + rule->up.table_id); + cls_rule_format(&rule->up.cr, &s); + ds_put_char(&s, ')'); + + VLOG_WARN("%s", ds_cstr(&s)); + ds_destroy(&s); + + ok = false; + } else { + ok = true; + } + + /* Check the datapath actions for consistency. */ + LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) { + struct action_xlate_ctx ctx; + struct ofpbuf *odp_actions; + bool actions_changed; + bool should_install; + + action_xlate_ctx_init(&ctx, ofproto, &facet->flow, + subfacet->initial_tci, NULL); + odp_actions = xlate_actions(&ctx, rule->up.actions, + rule->up.n_actions); + + should_install = (ctx.may_set_up_flow + && subfacet->key_fitness != ODP_FIT_TOO_LITTLE); + if (!should_install && !subfacet->installed) { + /* The actions for uninstallable flows may vary from one packet to + * the next, so don't compare the actions. */ + goto next; + } + + actions_changed = (subfacet->actions_len != odp_actions->size + || memcmp(subfacet->actions, odp_actions->data, + subfacet->actions_len)); + if (should_install != subfacet->installed || actions_changed) { + struct odputil_keybuf keybuf; + struct ofpbuf key; + struct ds s; + + ok = false; + + ds_init(&s); + subfacet_get_key(subfacet, &keybuf, &key); + odp_flow_key_format(key.data, key.size, &s); + + ds_put_cstr(&s, ": inconsistency in subfacet"); + if (should_install != subfacet->installed) { + ds_put_format(&s, " (should%s have been installed)", + should_install ? "" : " not"); + ds_put_format(&s, " (may_set_up_flow=%s, fitness=%s)", + ctx.may_set_up_flow ? "true" : "false", + subfacet->key_fitness == ODP_FIT_TOO_LITTLE + ? "too_little" : "OK"); + } + if (actions_changed) { + ds_put_cstr(&s, " (actions were: "); + format_odp_actions(&s, subfacet->actions, + subfacet->actions_len); + ds_put_cstr(&s, ") (correct actions: "); + format_odp_actions(&s, odp_actions->data, + odp_actions->size); + ds_put_char(&s, ')'); + } else { + ds_put_cstr(&s, " (actions: "); + format_odp_actions(&s, subfacet->actions, + subfacet->actions_len); + ds_put_char(&s, ')'); + } + VLOG_WARN("%s", ds_cstr(&s)); + ds_destroy(&s); + } + + next: + ofpbuf_delete(odp_actions); + } + + return ok; +} + /* Re-searches the classifier for 'facet': * * - If the rule found is different from 'facet''s current rule, moves @@ -5812,6 +5938,41 @@ ofproto_dpif_unclog(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED, } static void +ofproto_dpif_self_check(struct unixctl_conn *conn, + int argc OVS_UNUSED, const char *argv[], + void *aux OVS_UNUSED) +{ + const char *dpname = argv[1]; + struct ofproto_dpif *ofproto; + struct facet *facet; + char *reply; + int errors; + + ofproto = ofproto_dpif_lookup(dpname); + if (!ofproto) { + unixctl_command_reply(conn, 501, "Unknown ofproto (use ofproto/list " + "for help)"); + return; + } + + errors = 0; + HMAP_FOR_EACH (facet, hmap_node, &ofproto->facets) { + if (!facet_check_consistency(facet)) { + errors++; + } + } + if (errors) { + ofproto->need_revalidate = true; + } + + reply = (errors + ? xasprintf("%s: self-check failed (%d errors)", dpname, errors) + : xasprintf("%s: self-check passed", dpname)); + unixctl_command_reply(conn, 200, reply); + free(reply); +} + +static void ofproto_dpif_unixctl_init(void) { static bool registered; @@ -5832,6 +5993,8 @@ ofproto_dpif_unixctl_init(void) ofproto_dpif_clog, NULL); unixctl_command_register("ofproto/unclog", "", 0, 0, ofproto_dpif_unclog, NULL); + unixctl_command_register("ofproto/self-check", "bridge", 1, 1, + ofproto_dpif_self_check, NULL); } /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.) -- 1.7.2.5 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev