On Fri, Jan 13, 2012 at 04:01:49PM -0800, Ethan Jackson wrote: > Looks like this will be pretty useful. > > In facet_check_consistency() > Is facet_revalidate the appropriate coverage counter to increment?
No. Removed. > When rule != facet->rule we don't initialize the dynamic string. Ouch. Thank you, fixed. > I think the code only warns when installable facets aren't installed, > not when un-installable facets are installed. The code looks OK to me. Can you point out the problem more specifically? > It may be worth printing the other possible values of > subfacet->key_fitness in case it's not OK but not TOO_LITTLE. OK, I added a function odp_key_fitness_to_string(). > In ofproto_dpif_self_check(): > I've found it useful to do appctl commands for all instances when one > isn't specified in the lacp, bond, and cfm module. It may be worth > doing it here as well, though I don't feel strongly about it. Good idea. Done. Here's an incremental followed by a full respin. diff --git a/lib/odp-util.c b/lib/odp-util.c index ffb486e..12e8daf 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2010, 2011 Nicira Networks. + * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1687,6 +1687,24 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len, expected_attrs, flow, key, key_len); } +/* Returns 'fitness' as a string, for use in debug messages. */ +const char * +odp_key_fitness_to_string(enum odp_key_fitness fitness) +{ + switch (fitness) { + case ODP_FIT_PERFECT: + return "OK"; + case ODP_FIT_TOO_MUCH: + return "too_much"; + case ODP_FIT_TOO_LITTLE: + return "too_little"; + case ODP_FIT_ERROR: + return "error"; + default: + return "<unknown>"; + } +} + /* Appends an OVS_ACTION_ATTR_USERSPACE action to 'odp_actions' that specifies * Netlink PID 'pid'. If 'cookie' is nonnull, adds a userdata attribute whose * contents contains 'cookie' and returns the offset within 'odp_actions' of diff --git a/lib/odp-util.h b/lib/odp-util.h index a6f8a30..0028499 100644 --- a/lib/odp-util.h +++ b/lib/odp-util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2010, 2011 Nicira Networks. + * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -114,6 +114,7 @@ enum odp_key_fitness { }; enum odp_key_fitness odp_flow_key_to_flow(const struct nlattr *, size_t, struct flow *); +const char *odp_key_fitness_to_string(enum odp_key_fitness); enum user_action_cookie_type { USER_ACTION_COOKIE_UNSPEC, diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 6e73e0d..64e9073 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -3366,8 +3366,6 @@ facet_check_consistency(struct facet *facet) struct subfacet *subfacet; bool ok; - COVERAGE_INC(facet_revalidate); - /* Check the rule for consistency. */ rule = rule_dpif_lookup(ofproto, &facet->flow, 0); if (!rule) { @@ -3380,6 +3378,7 @@ facet_check_consistency(struct facet *facet) } else if (rule != facet->rule) { struct ds s; + ds_init(&s); flow_format(&s, &facet->flow); ds_put_format(&s, ": facet associated with wrong rule (was " "table=%"PRIu8",", facet->rule->up.table_id); @@ -3434,12 +3433,13 @@ facet_check_consistency(struct facet *facet) ds_put_cstr(&s, ": inconsistency in subfacet"); if (should_install != subfacet->installed) { + enum odp_key_fitness fitness = subfacet->key_fitness; + ds_put_format(&s, " (should%s have been installed)", should_install ? "" : " not"); ds_put_format(&s, " (may_set_up_flow=%s, fitness=%s)", ctx.may_set_up_flow ? "true" : "false", - subfacet->key_fitness == ODP_FIT_TOO_LITTLE - ? "too_little" : "OK"); + odp_key_fitness_to_string(fitness)); } if (actions_changed) { ds_put_cstr(&s, " (actions were: "); @@ -6048,24 +6048,14 @@ ofproto_dpif_unclog(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED, unixctl_command_reply(conn, 200, NULL); } +/* Runs a self-check of flow translations in 'ofproto'. Appends a message to + * 'reply' describing the results. */ static void -ofproto_dpif_self_check(struct unixctl_conn *conn, - int argc OVS_UNUSED, const char *argv[], - void *aux OVS_UNUSED) +ofproto_dpif_self_check__(struct ofproto_dpif *ofproto, struct ds *reply) { - const char *dpname = argv[1]; - struct ofproto_dpif *ofproto; struct facet *facet; - char *reply; int errors; - ofproto = ofproto_dpif_lookup(dpname); - if (!ofproto) { - unixctl_command_reply(conn, 501, "Unknown ofproto (use ofproto/list " - "for help)"); - return; - } - errors = 0; HMAP_FOR_EACH (facet, hmap_node, &ofproto->facets) { if (!facet_check_consistency(facet)) { @@ -6076,11 +6066,38 @@ ofproto_dpif_self_check(struct unixctl_conn *conn, ofproto->need_revalidate = true; } - reply = (errors - ? xasprintf("%s: self-check failed (%d errors)", dpname, errors) - : xasprintf("%s: self-check passed", dpname)); - unixctl_command_reply(conn, 200, reply); - free(reply); + if (errors) { + ds_put_format(reply, "%s: self-check failed (%d errors)\n", + ofproto->up.name, errors); + } else { + ds_put_format(reply, "%s: self-check passed\n", ofproto->up.name); + } +} + +static void +ofproto_dpif_self_check(struct unixctl_conn *conn, + int argc OVS_UNUSED, const char *argv[], + void *aux OVS_UNUSED) +{ + struct ds reply = DS_EMPTY_INITIALIZER; + struct ofproto_dpif *ofproto; + + if (argc > 1) { + ofproto = ofproto_dpif_lookup(argv[1]); + if (!ofproto) { + unixctl_command_reply(conn, 501, "Unknown ofproto (use " + "ofproto/list for help)"); + return; + } + ofproto_dpif_self_check__(ofproto, &reply); + } else { + HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { + ofproto_dpif_self_check__(ofproto, &reply); + } + } + + unixctl_command_reply(conn, 200, ds_cstr(&reply)); + ds_destroy(&reply); } static void @@ -6104,7 +6121,7 @@ ofproto_dpif_unixctl_init(void) ofproto_dpif_clog, NULL); unixctl_command_register("ofproto/unclog", "", 0, 0, ofproto_dpif_unclog, NULL); - unixctl_command_register("ofproto/self-check", "bridge", 1, 1, + unixctl_command_register("ofproto/self-check", "[bridge]", 0, 1, ofproto_dpif_self_check, NULL); } diff --git a/ofproto/ofproto-unixctl.man b/ofproto/ofproto-unixctl.man index 5ae90f2..ad9e02d 100644 --- a/ofproto/ofproto-unixctl.man +++ b/ofproto/ofproto-unixctl.man @@ -64,3 +64,10 @@ This form of \fBofproto/trace\fR cannot determine the complete set of datapath actions in some corner cases. If the results say that this is the case, rerun \fBofproto/trace\fR supplying a packet in the flow to get complete results. +.IP "\fBofproto/self\-check\fR [\fIswitch\fR]" +Runs an internal consistency check on \fIswitch\fR, if specified, +otherwise on all ofproto instances, and responds with a brief summary +of the results. If the summary reports any errors, then the Open +vSwitch logs should contain more detailed information. Please pass +along errors reported by this command to the Open vSwitch developers +as bugs. --8<--------------------------cut here-------------------------->8-- From: Ben Pfaff <b...@nicira.com> Date: Mon, 16 Jan 2012 11:15:25 -0800 Subject: [PATCH] ofproto-dpif: Implement self-check of flow translations. One of the major tasks of ofproto-dpif is to translate OpenFlow actions into "ODP" datapath actions. These translations are essentially a cache that requires revalidation when certain state changes occur. For best performance it's important to revalidate flows only when necessary, so from time to time Open vSwitch has gotten this wrong, which meant that stale flows could persist in the kernel and cause surprising behavior. This commit implements a simple "self check": every trip through the Open vSwitch main loop randomly chooses one flow entry and checks that its actions have been correctly translated. If not, Open vSwitch logs the details of the problem. This should help find problems more quickly in the future. Signed-off-by: Ben Pfaff <b...@nicira.com> --- lib/odp-util.c | 20 +++++- lib/odp-util.h | 3 +- ofproto/ofproto-dpif.c | 181 +++++++++++++++++++++++++++++++++++++++++++ ofproto/ofproto-unixctl.man | 7 ++ 4 files changed, 209 insertions(+), 2 deletions(-) diff --git a/lib/odp-util.c b/lib/odp-util.c index ffb486e..12e8daf 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2010, 2011 Nicira Networks. + * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1687,6 +1687,24 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len, expected_attrs, flow, key, key_len); } +/* Returns 'fitness' as a string, for use in debug messages. */ +const char * +odp_key_fitness_to_string(enum odp_key_fitness fitness) +{ + switch (fitness) { + case ODP_FIT_PERFECT: + return "OK"; + case ODP_FIT_TOO_MUCH: + return "too_much"; + case ODP_FIT_TOO_LITTLE: + return "too_little"; + case ODP_FIT_ERROR: + return "error"; + default: + return "<unknown>"; + } +} + /* Appends an OVS_ACTION_ATTR_USERSPACE action to 'odp_actions' that specifies * Netlink PID 'pid'. If 'cookie' is nonnull, adds a userdata attribute whose * contents contains 'cookie' and returns the offset within 'odp_actions' of diff --git a/lib/odp-util.h b/lib/odp-util.h index a6f8a30..0028499 100644 --- a/lib/odp-util.h +++ b/lib/odp-util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2010, 2011 Nicira Networks. + * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -114,6 +114,7 @@ enum odp_key_fitness { }; enum odp_key_fitness odp_flow_key_to_flow(const struct nlattr *, size_t, struct flow *); +const char *odp_key_fitness_to_string(enum odp_key_fitness); enum user_action_cookie_type { USER_ACTION_COOKIE_UNSPEC, diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index e3cb51b..64e9073 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -327,6 +327,7 @@ static struct facet *facet_find(struct ofproto_dpif *, const struct flow *); static struct facet *facet_lookup_valid(struct ofproto_dpif *, const struct flow *); static bool facet_revalidate(struct facet *); +static bool facet_check_consistency(struct facet *); static void facet_flush_stats(struct facet *); @@ -382,6 +383,8 @@ static struct subfacet *subfacet_find(struct ofproto_dpif *, const struct nlattr *key, size_t key_len); static void subfacet_destroy(struct subfacet *); static void subfacet_destroy__(struct subfacet *); +static void subfacet_get_key(struct subfacet *, struct odputil_keybuf *, + struct ofpbuf *key); static void subfacet_reset_dp_stats(struct subfacet *, struct dpif_flow_stats *); static void subfacet_update_time(struct subfacet *, long long int used); @@ -821,6 +824,19 @@ run(struct ofproto *ofproto_) } } + /* Check the consistency of a random facet, to aid debugging. */ + if (!hmap_is_empty(&ofproto->facets) && !ofproto->need_revalidate) { + struct facet *facet; + + facet = CONTAINER_OF(hmap_random_node(&ofproto->facets), + struct facet, hmap_node); + if (!tag_set_intersects(&ofproto->revalidate_set, facet->tags)) { + if (!facet_check_consistency(facet)) { + ofproto->need_revalidate = true; + } + } + } + return 0; } @@ -3339,6 +3355,117 @@ facet_lookup_valid(struct ofproto_dpif *ofproto, const struct flow *flow) return facet; } +static bool +facet_check_consistency(struct facet *facet) +{ + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 15); + + struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto); + + struct rule_dpif *rule; + struct subfacet *subfacet; + bool ok; + + /* Check the rule for consistency. */ + rule = rule_dpif_lookup(ofproto, &facet->flow, 0); + if (!rule) { + if (!VLOG_DROP_WARN(&rl)) { + char *s = flow_to_string(&facet->flow); + VLOG_WARN("%s: facet should not exist", s); + free(s); + } + return false; + } else if (rule != facet->rule) { + struct ds s; + + ds_init(&s); + flow_format(&s, &facet->flow); + ds_put_format(&s, ": facet associated with wrong rule (was " + "table=%"PRIu8",", facet->rule->up.table_id); + cls_rule_format(&facet->rule->up.cr, &s); + ds_put_format(&s, ") (should have been table=%"PRIu8",", + rule->up.table_id); + cls_rule_format(&rule->up.cr, &s); + ds_put_char(&s, ')'); + + VLOG_WARN("%s", ds_cstr(&s)); + ds_destroy(&s); + + ok = false; + } else { + ok = true; + } + + /* Check the datapath actions for consistency. */ + LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) { + struct action_xlate_ctx ctx; + struct ofpbuf *odp_actions; + bool actions_changed; + bool should_install; + + action_xlate_ctx_init(&ctx, ofproto, &facet->flow, + subfacet->initial_tci, rule->up.flow_cookie, + NULL); + odp_actions = xlate_actions(&ctx, rule->up.actions, + rule->up.n_actions); + + should_install = (ctx.may_set_up_flow + && subfacet->key_fitness != ODP_FIT_TOO_LITTLE); + if (!should_install && !subfacet->installed) { + /* The actions for uninstallable flows may vary from one packet to + * the next, so don't compare the actions. */ + goto next; + } + + actions_changed = (subfacet->actions_len != odp_actions->size + || memcmp(subfacet->actions, odp_actions->data, + subfacet->actions_len)); + if (should_install != subfacet->installed || actions_changed) { + struct odputil_keybuf keybuf; + struct ofpbuf key; + struct ds s; + + ok = false; + + ds_init(&s); + subfacet_get_key(subfacet, &keybuf, &key); + odp_flow_key_format(key.data, key.size, &s); + + ds_put_cstr(&s, ": inconsistency in subfacet"); + if (should_install != subfacet->installed) { + enum odp_key_fitness fitness = subfacet->key_fitness; + + ds_put_format(&s, " (should%s have been installed)", + should_install ? "" : " not"); + ds_put_format(&s, " (may_set_up_flow=%s, fitness=%s)", + ctx.may_set_up_flow ? "true" : "false", + odp_key_fitness_to_string(fitness)); + } + if (actions_changed) { + ds_put_cstr(&s, " (actions were: "); + format_odp_actions(&s, subfacet->actions, + subfacet->actions_len); + ds_put_cstr(&s, ") (correct actions: "); + format_odp_actions(&s, odp_actions->data, + odp_actions->size); + ds_put_char(&s, ')'); + } else { + ds_put_cstr(&s, " (actions: "); + format_odp_actions(&s, subfacet->actions, + subfacet->actions_len); + ds_put_char(&s, ')'); + } + VLOG_WARN("%s", ds_cstr(&s)); + ds_destroy(&s); + } + + next: + ofpbuf_delete(odp_actions); + } + + return ok; +} + /* Re-searches the classifier for 'facet': * * - If the rule found is different from 'facet''s current rule, moves @@ -5921,6 +6048,58 @@ ofproto_dpif_unclog(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED, unixctl_command_reply(conn, 200, NULL); } +/* Runs a self-check of flow translations in 'ofproto'. Appends a message to + * 'reply' describing the results. */ +static void +ofproto_dpif_self_check__(struct ofproto_dpif *ofproto, struct ds *reply) +{ + struct facet *facet; + int errors; + + errors = 0; + HMAP_FOR_EACH (facet, hmap_node, &ofproto->facets) { + if (!facet_check_consistency(facet)) { + errors++; + } + } + if (errors) { + ofproto->need_revalidate = true; + } + + if (errors) { + ds_put_format(reply, "%s: self-check failed (%d errors)\n", + ofproto->up.name, errors); + } else { + ds_put_format(reply, "%s: self-check passed\n", ofproto->up.name); + } +} + +static void +ofproto_dpif_self_check(struct unixctl_conn *conn, + int argc OVS_UNUSED, const char *argv[], + void *aux OVS_UNUSED) +{ + struct ds reply = DS_EMPTY_INITIALIZER; + struct ofproto_dpif *ofproto; + + if (argc > 1) { + ofproto = ofproto_dpif_lookup(argv[1]); + if (!ofproto) { + unixctl_command_reply(conn, 501, "Unknown ofproto (use " + "ofproto/list for help)"); + return; + } + ofproto_dpif_self_check__(ofproto, &reply); + } else { + HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { + ofproto_dpif_self_check__(ofproto, &reply); + } + } + + unixctl_command_reply(conn, 200, ds_cstr(&reply)); + ds_destroy(&reply); +} + static void ofproto_dpif_unixctl_init(void) { @@ -5942,6 +6121,8 @@ ofproto_dpif_unixctl_init(void) ofproto_dpif_clog, NULL); unixctl_command_register("ofproto/unclog", "", 0, 0, ofproto_dpif_unclog, NULL); + unixctl_command_register("ofproto/self-check", "[bridge]", 0, 1, + ofproto_dpif_self_check, NULL); } /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.) diff --git a/ofproto/ofproto-unixctl.man b/ofproto/ofproto-unixctl.man index 5ae90f2..ad9e02d 100644 --- a/ofproto/ofproto-unixctl.man +++ b/ofproto/ofproto-unixctl.man @@ -64,3 +64,10 @@ This form of \fBofproto/trace\fR cannot determine the complete set of datapath actions in some corner cases. If the results say that this is the case, rerun \fBofproto/trace\fR supplying a packet in the flow to get complete results. +.IP "\fBofproto/self\-check\fR [\fIswitch\fR]" +Runs an internal consistency check on \fIswitch\fR, if specified, +otherwise on all ofproto instances, and responds with a brief summary +of the results. If the summary reports any errors, then the Open +vSwitch logs should contain more detailed information. Please pass +along errors reported by this command to the Open vSwitch developers +as bugs. -- 1.7.2.5 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev