> The code looks OK to me. Can you point out the problem more > specifically?
Looking over it again, I misread the code. The existing code is correct. In ofproto_dpif_self_check() you say 'argc' is OVS_UNUSED even though you use it. Otherwise looks good, Ethan > >> It may be worth printing the other possible values of >> subfacet->key_fitness in case it's not OK but not TOO_LITTLE. > > OK, I added a function odp_key_fitness_to_string(). > >> In ofproto_dpif_self_check(): >> I've found it useful to do appctl commands for all instances when one >> isn't specified in the lacp, bond, and cfm module. It may be worth >> doing it here as well, though I don't feel strongly about it. > > Good idea. Done. > > Here's an incremental followed by a full respin. > > diff --git a/lib/odp-util.c b/lib/odp-util.c > index ffb486e..12e8daf 100644 > --- a/lib/odp-util.c > +++ b/lib/odp-util.c > @@ -1,5 +1,5 @@ > /* > - * Copyright (c) 2009, 2010, 2011 Nicira Networks. > + * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks. > * > * Licensed under the Apache License, Version 2.0 (the "License"); > * you may not use this file except in compliance with the License. > @@ -1687,6 +1687,24 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t > key_len, > expected_attrs, flow, key, key_len); > } > > +/* Returns 'fitness' as a string, for use in debug messages. */ > +const char * > +odp_key_fitness_to_string(enum odp_key_fitness fitness) > +{ > + switch (fitness) { > + case ODP_FIT_PERFECT: > + return "OK"; > + case ODP_FIT_TOO_MUCH: > + return "too_much"; > + case ODP_FIT_TOO_LITTLE: > + return "too_little"; > + case ODP_FIT_ERROR: > + return "error"; > + default: > + return "<unknown>"; > + } > +} > + > /* Appends an OVS_ACTION_ATTR_USERSPACE action to 'odp_actions' that > specifies > * Netlink PID 'pid'. If 'cookie' is nonnull, adds a userdata attribute whose > * contents contains 'cookie' and returns the offset within 'odp_actions' of > diff --git a/lib/odp-util.h b/lib/odp-util.h > index a6f8a30..0028499 100644 > --- a/lib/odp-util.h > +++ b/lib/odp-util.h > @@ -1,5 +1,5 @@ > /* > - * Copyright (c) 2009, 2010, 2011 Nicira Networks. > + * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks. > * > * Licensed under the Apache License, Version 2.0 (the "License"); > * you may not use this file except in compliance with the License. > @@ -114,6 +114,7 @@ enum odp_key_fitness { > }; > enum odp_key_fitness odp_flow_key_to_flow(const struct nlattr *, size_t, > struct flow *); > +const char *odp_key_fitness_to_string(enum odp_key_fitness); > > enum user_action_cookie_type { > USER_ACTION_COOKIE_UNSPEC, > diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c > index 6e73e0d..64e9073 100644 > --- a/ofproto/ofproto-dpif.c > +++ b/ofproto/ofproto-dpif.c > @@ -3366,8 +3366,6 @@ facet_check_consistency(struct facet *facet) > struct subfacet *subfacet; > bool ok; > > - COVERAGE_INC(facet_revalidate); > - > /* Check the rule for consistency. */ > rule = rule_dpif_lookup(ofproto, &facet->flow, 0); > if (!rule) { > @@ -3380,6 +3378,7 @@ facet_check_consistency(struct facet *facet) > } else if (rule != facet->rule) { > struct ds s; > > + ds_init(&s); > flow_format(&s, &facet->flow); > ds_put_format(&s, ": facet associated with wrong rule (was " > "table=%"PRIu8",", facet->rule->up.table_id); > @@ -3434,12 +3433,13 @@ facet_check_consistency(struct facet *facet) > > ds_put_cstr(&s, ": inconsistency in subfacet"); > if (should_install != subfacet->installed) { > + enum odp_key_fitness fitness = subfacet->key_fitness; > + > ds_put_format(&s, " (should%s have been installed)", > should_install ? "" : " not"); > ds_put_format(&s, " (may_set_up_flow=%s, fitness=%s)", > ctx.may_set_up_flow ? "true" : "false", > - subfacet->key_fitness == ODP_FIT_TOO_LITTLE > - ? "too_little" : "OK"); > + odp_key_fitness_to_string(fitness)); > } > if (actions_changed) { > ds_put_cstr(&s, " (actions were: "); > @@ -6048,24 +6048,14 @@ ofproto_dpif_unclog(struct unixctl_conn *conn > OVS_UNUSED, int argc OVS_UNUSED, > unixctl_command_reply(conn, 200, NULL); > } > > +/* Runs a self-check of flow translations in 'ofproto'. Appends a message to > + * 'reply' describing the results. */ > static void > -ofproto_dpif_self_check(struct unixctl_conn *conn, > - int argc OVS_UNUSED, const char *argv[], > - void *aux OVS_UNUSED) > +ofproto_dpif_self_check__(struct ofproto_dpif *ofproto, struct ds *reply) > { > - const char *dpname = argv[1]; > - struct ofproto_dpif *ofproto; > struct facet *facet; > - char *reply; > int errors; > > - ofproto = ofproto_dpif_lookup(dpname); > - if (!ofproto) { > - unixctl_command_reply(conn, 501, "Unknown ofproto (use ofproto/list " > - "for help)"); > - return; > - } > - > errors = 0; > HMAP_FOR_EACH (facet, hmap_node, &ofproto->facets) { > if (!facet_check_consistency(facet)) { > @@ -6076,11 +6066,38 @@ ofproto_dpif_self_check(struct unixctl_conn *conn, > ofproto->need_revalidate = true; > } > > - reply = (errors > - ? xasprintf("%s: self-check failed (%d errors)", dpname, errors) > - : xasprintf("%s: self-check passed", dpname)); > - unixctl_command_reply(conn, 200, reply); > - free(reply); > + if (errors) { > + ds_put_format(reply, "%s: self-check failed (%d errors)\n", > + ofproto->up.name, errors); > + } else { > + ds_put_format(reply, "%s: self-check passed\n", ofproto->up.name); > + } > +} > + > +static void > +ofproto_dpif_self_check(struct unixctl_conn *conn, > + int argc OVS_UNUSED, const char *argv[], > + void *aux OVS_UNUSED) > +{ > + struct ds reply = DS_EMPTY_INITIALIZER; > + struct ofproto_dpif *ofproto; > + > + if (argc > 1) { > + ofproto = ofproto_dpif_lookup(argv[1]); > + if (!ofproto) { > + unixctl_command_reply(conn, 501, "Unknown ofproto (use " > + "ofproto/list for help)"); > + return; > + } > + ofproto_dpif_self_check__(ofproto, &reply); > + } else { > + HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { > + ofproto_dpif_self_check__(ofproto, &reply); > + } > + } > + > + unixctl_command_reply(conn, 200, ds_cstr(&reply)); > + ds_destroy(&reply); > } > > static void > @@ -6104,7 +6121,7 @@ ofproto_dpif_unixctl_init(void) > ofproto_dpif_clog, NULL); > unixctl_command_register("ofproto/unclog", "", 0, 0, > ofproto_dpif_unclog, NULL); > - unixctl_command_register("ofproto/self-check", "bridge", 1, 1, > + unixctl_command_register("ofproto/self-check", "[bridge]", 0, 1, > ofproto_dpif_self_check, NULL); > } > > diff --git a/ofproto/ofproto-unixctl.man b/ofproto/ofproto-unixctl.man > index 5ae90f2..ad9e02d 100644 > --- a/ofproto/ofproto-unixctl.man > +++ b/ofproto/ofproto-unixctl.man > @@ -64,3 +64,10 @@ This form of \fBofproto/trace\fR cannot determine the > complete set of > datapath actions in some corner cases. If the results say that this > is the case, rerun \fBofproto/trace\fR supplying a packet in the flow > to get complete results. > +.IP "\fBofproto/self\-check\fR [\fIswitch\fR]" > +Runs an internal consistency check on \fIswitch\fR, if specified, > +otherwise on all ofproto instances, and responds with a brief summary > +of the results. If the summary reports any errors, then the Open > +vSwitch logs should contain more detailed information. Please pass > +along errors reported by this command to the Open vSwitch developers > +as bugs. > > --8<--------------------------cut here-------------------------->8-- > > From: Ben Pfaff <b...@nicira.com> > Date: Mon, 16 Jan 2012 11:15:25 -0800 > Subject: [PATCH] ofproto-dpif: Implement self-check of flow translations. > > One of the major tasks of ofproto-dpif is to translate OpenFlow > actions into "ODP" datapath actions. These translations are essentially > a cache that requires revalidation when certain state changes occur. For > best performance it's important to revalidate flows only when necessary, > so from time to time Open vSwitch has gotten this wrong, which meant that > stale flows could persist in the kernel and cause surprising behavior. > > This commit implements a simple "self check": every trip through the > Open vSwitch main loop randomly chooses one flow entry and checks that > its actions have been correctly translated. If not, Open vSwitch logs > the details of the problem. This should help find problems more > quickly in the future. > > Signed-off-by: Ben Pfaff <b...@nicira.com> > --- > lib/odp-util.c | 20 +++++- > lib/odp-util.h | 3 +- > ofproto/ofproto-dpif.c | 181 > +++++++++++++++++++++++++++++++++++++++++++ > ofproto/ofproto-unixctl.man | 7 ++ > 4 files changed, 209 insertions(+), 2 deletions(-) > > diff --git a/lib/odp-util.c b/lib/odp-util.c > index ffb486e..12e8daf 100644 > --- a/lib/odp-util.c > +++ b/lib/odp-util.c > @@ -1,5 +1,5 @@ > /* > - * Copyright (c) 2009, 2010, 2011 Nicira Networks. > + * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks. > * > * Licensed under the Apache License, Version 2.0 (the "License"); > * you may not use this file except in compliance with the License. > @@ -1687,6 +1687,24 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t > key_len, > expected_attrs, flow, key, key_len); > } > > +/* Returns 'fitness' as a string, for use in debug messages. */ > +const char * > +odp_key_fitness_to_string(enum odp_key_fitness fitness) > +{ > + switch (fitness) { > + case ODP_FIT_PERFECT: > + return "OK"; > + case ODP_FIT_TOO_MUCH: > + return "too_much"; > + case ODP_FIT_TOO_LITTLE: > + return "too_little"; > + case ODP_FIT_ERROR: > + return "error"; > + default: > + return "<unknown>"; > + } > +} > + > /* Appends an OVS_ACTION_ATTR_USERSPACE action to 'odp_actions' that > specifies > * Netlink PID 'pid'. If 'cookie' is nonnull, adds a userdata attribute whose > * contents contains 'cookie' and returns the offset within 'odp_actions' of > diff --git a/lib/odp-util.h b/lib/odp-util.h > index a6f8a30..0028499 100644 > --- a/lib/odp-util.h > +++ b/lib/odp-util.h > @@ -1,5 +1,5 @@ > /* > - * Copyright (c) 2009, 2010, 2011 Nicira Networks. > + * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks. > * > * Licensed under the Apache License, Version 2.0 (the "License"); > * you may not use this file except in compliance with the License. > @@ -114,6 +114,7 @@ enum odp_key_fitness { > }; > enum odp_key_fitness odp_flow_key_to_flow(const struct nlattr *, size_t, > struct flow *); > +const char *odp_key_fitness_to_string(enum odp_key_fitness); > > enum user_action_cookie_type { > USER_ACTION_COOKIE_UNSPEC, > diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c > index e3cb51b..64e9073 100644 > --- a/ofproto/ofproto-dpif.c > +++ b/ofproto/ofproto-dpif.c > @@ -327,6 +327,7 @@ static struct facet *facet_find(struct ofproto_dpif *, > const struct flow *); > static struct facet *facet_lookup_valid(struct ofproto_dpif *, > const struct flow *); > static bool facet_revalidate(struct facet *); > +static bool facet_check_consistency(struct facet *); > > static void facet_flush_stats(struct facet *); > > @@ -382,6 +383,8 @@ static struct subfacet *subfacet_find(struct ofproto_dpif > *, > const struct nlattr *key, size_t > key_len); > static void subfacet_destroy(struct subfacet *); > static void subfacet_destroy__(struct subfacet *); > +static void subfacet_get_key(struct subfacet *, struct odputil_keybuf *, > + struct ofpbuf *key); > static void subfacet_reset_dp_stats(struct subfacet *, > struct dpif_flow_stats *); > static void subfacet_update_time(struct subfacet *, long long int used); > @@ -821,6 +824,19 @@ run(struct ofproto *ofproto_) > } > } > > + /* Check the consistency of a random facet, to aid debugging. */ > + if (!hmap_is_empty(&ofproto->facets) && !ofproto->need_revalidate) { > + struct facet *facet; > + > + facet = CONTAINER_OF(hmap_random_node(&ofproto->facets), > + struct facet, hmap_node); > + if (!tag_set_intersects(&ofproto->revalidate_set, facet->tags)) { > + if (!facet_check_consistency(facet)) { > + ofproto->need_revalidate = true; > + } > + } > + } > + > return 0; > } > > @@ -3339,6 +3355,117 @@ facet_lookup_valid(struct ofproto_dpif *ofproto, > const struct flow *flow) > return facet; > } > > +static bool > +facet_check_consistency(struct facet *facet) > +{ > + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 15); > + > + struct ofproto_dpif *ofproto = > ofproto_dpif_cast(facet->rule->up.ofproto); > + > + struct rule_dpif *rule; > + struct subfacet *subfacet; > + bool ok; > + > + /* Check the rule for consistency. */ > + rule = rule_dpif_lookup(ofproto, &facet->flow, 0); > + if (!rule) { > + if (!VLOG_DROP_WARN(&rl)) { > + char *s = flow_to_string(&facet->flow); > + VLOG_WARN("%s: facet should not exist", s); > + free(s); > + } > + return false; > + } else if (rule != facet->rule) { > + struct ds s; > + > + ds_init(&s); > + flow_format(&s, &facet->flow); > + ds_put_format(&s, ": facet associated with wrong rule (was " > + "table=%"PRIu8",", facet->rule->up.table_id); > + cls_rule_format(&facet->rule->up.cr, &s); > + ds_put_format(&s, ") (should have been table=%"PRIu8",", > + rule->up.table_id); > + cls_rule_format(&rule->up.cr, &s); > + ds_put_char(&s, ')'); > + > + VLOG_WARN("%s", ds_cstr(&s)); > + ds_destroy(&s); > + > + ok = false; > + } else { > + ok = true; > + } > + > + /* Check the datapath actions for consistency. */ > + LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) { > + struct action_xlate_ctx ctx; > + struct ofpbuf *odp_actions; > + bool actions_changed; > + bool should_install; > + > + action_xlate_ctx_init(&ctx, ofproto, &facet->flow, > + subfacet->initial_tci, rule->up.flow_cookie, > + NULL); > + odp_actions = xlate_actions(&ctx, rule->up.actions, > + rule->up.n_actions); > + > + should_install = (ctx.may_set_up_flow > + && subfacet->key_fitness != ODP_FIT_TOO_LITTLE); > + if (!should_install && !subfacet->installed) { > + /* The actions for uninstallable flows may vary from one packet > to > + * the next, so don't compare the actions. */ > + goto next; > + } > + > + actions_changed = (subfacet->actions_len != odp_actions->size > + || memcmp(subfacet->actions, odp_actions->data, > + subfacet->actions_len)); > + if (should_install != subfacet->installed || actions_changed) { > + struct odputil_keybuf keybuf; > + struct ofpbuf key; > + struct ds s; > + > + ok = false; > + > + ds_init(&s); > + subfacet_get_key(subfacet, &keybuf, &key); > + odp_flow_key_format(key.data, key.size, &s); > + > + ds_put_cstr(&s, ": inconsistency in subfacet"); > + if (should_install != subfacet->installed) { > + enum odp_key_fitness fitness = subfacet->key_fitness; > + > + ds_put_format(&s, " (should%s have been installed)", > + should_install ? "" : " not"); > + ds_put_format(&s, " (may_set_up_flow=%s, fitness=%s)", > + ctx.may_set_up_flow ? "true" : "false", > + odp_key_fitness_to_string(fitness)); > + } > + if (actions_changed) { > + ds_put_cstr(&s, " (actions were: "); > + format_odp_actions(&s, subfacet->actions, > + subfacet->actions_len); > + ds_put_cstr(&s, ") (correct actions: "); > + format_odp_actions(&s, odp_actions->data, > + odp_actions->size); > + ds_put_char(&s, ')'); > + } else { > + ds_put_cstr(&s, " (actions: "); > + format_odp_actions(&s, subfacet->actions, > + subfacet->actions_len); > + ds_put_char(&s, ')'); > + } > + VLOG_WARN("%s", ds_cstr(&s)); > + ds_destroy(&s); > + } > + > + next: > + ofpbuf_delete(odp_actions); > + } > + > + return ok; > +} > + > /* Re-searches the classifier for 'facet': > * > * - If the rule found is different from 'facet''s current rule, moves > @@ -5921,6 +6048,58 @@ ofproto_dpif_unclog(struct unixctl_conn *conn > OVS_UNUSED, int argc OVS_UNUSED, > unixctl_command_reply(conn, 200, NULL); > } > > +/* Runs a self-check of flow translations in 'ofproto'. Appends a message to > + * 'reply' describing the results. */ > +static void > +ofproto_dpif_self_check__(struct ofproto_dpif *ofproto, struct ds *reply) > +{ > + struct facet *facet; > + int errors; > + > + errors = 0; > + HMAP_FOR_EACH (facet, hmap_node, &ofproto->facets) { > + if (!facet_check_consistency(facet)) { > + errors++; > + } > + } > + if (errors) { > + ofproto->need_revalidate = true; > + } > + > + if (errors) { > + ds_put_format(reply, "%s: self-check failed (%d errors)\n", > + ofproto->up.name, errors); > + } else { > + ds_put_format(reply, "%s: self-check passed\n", ofproto->up.name); > + } > +} > + > +static void > +ofproto_dpif_self_check(struct unixctl_conn *conn, > + int argc OVS_UNUSED, const char *argv[], > + void *aux OVS_UNUSED) > +{ > + struct ds reply = DS_EMPTY_INITIALIZER; > + struct ofproto_dpif *ofproto; > + > + if (argc > 1) { > + ofproto = ofproto_dpif_lookup(argv[1]); > + if (!ofproto) { > + unixctl_command_reply(conn, 501, "Unknown ofproto (use " > + "ofproto/list for help)"); > + return; > + } > + ofproto_dpif_self_check__(ofproto, &reply); > + } else { > + HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { > + ofproto_dpif_self_check__(ofproto, &reply); > + } > + } > + > + unixctl_command_reply(conn, 200, ds_cstr(&reply)); > + ds_destroy(&reply); > +} > + > static void > ofproto_dpif_unixctl_init(void) > { > @@ -5942,6 +6121,8 @@ ofproto_dpif_unixctl_init(void) > ofproto_dpif_clog, NULL); > unixctl_command_register("ofproto/unclog", "", 0, 0, > ofproto_dpif_unclog, NULL); > + unixctl_command_register("ofproto/self-check", "[bridge]", 0, 1, > + ofproto_dpif_self_check, NULL); > } > > /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.) > diff --git a/ofproto/ofproto-unixctl.man b/ofproto/ofproto-unixctl.man > index 5ae90f2..ad9e02d 100644 > --- a/ofproto/ofproto-unixctl.man > +++ b/ofproto/ofproto-unixctl.man > @@ -64,3 +64,10 @@ This form of \fBofproto/trace\fR cannot determine the > complete set of > datapath actions in some corner cases. If the results say that this > is the case, rerun \fBofproto/trace\fR supplying a packet in the flow > to get complete results. > +.IP "\fBofproto/self\-check\fR [\fIswitch\fR]" > +Runs an internal consistency check on \fIswitch\fR, if specified, > +otherwise on all ofproto instances, and responds with a brief summary > +of the results. If the summary reports any errors, then the Open > +vSwitch logs should contain more detailed information. Please pass > +along errors reported by this command to the Open vSwitch developers > +as bugs. > -- > 1.7.2.5 > _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev