The changes display the cfm_health of an interface. The cfm_health is an exponential weighted moving average of the health of all remote_mpids. The value can vary from 0 to 100, 100 being very healthy and 0 being unhealthy.
Feature #10363 Requested-by: Ethan Jackson <et...@nicira.com> Signed-off-by: Mehak Mahajan <mmaha...@nicira.com> --- NEWS | 2 + lib/cfm.c | 79 +++++++++++++++++++++++++++++++++++++++++++- lib/cfm.h | 1 + ofproto/ofproto-dpif.c | 9 +++++ ofproto/ofproto-provider.h | 11 ++++++ ofproto/ofproto.c | 13 +++++++ ofproto/ofproto.h | 3 +- vswitchd/bridge.c | 11 ++++++ vswitchd/vswitch.ovsschema | 9 ++++- vswitchd/vswitch.xml | 20 +++++++++++ 10 files changed, 154 insertions(+), 4 deletions(-) diff --git a/NEWS b/NEWS index a466f92..ed3fc88 100644 --- a/NEWS +++ b/NEWS @@ -6,6 +6,8 @@ post-v1.6.0 - Added ability to configure dscp setting for manager and controller connections. By default, these connections have a DSCP value of Internetwork Control (0xc0). + - Added the granular link health statistics, 'cfm_health', to an + interface. v1.6.0 - xx xxx xxxx diff --git a/lib/cfm.c b/lib/cfm.c index 8b9e5bc..622c1bd 100644 --- a/lib/cfm.c +++ b/lib/cfm.c @@ -60,6 +60,7 @@ static const uint8_t eth_addr_ccm_x[6] = { #define CCM_MAID_LEN 48 #define CCM_OPCODE 1 /* CFM message opcode meaning CCM. */ #define CCM_RDI_MASK 0x80 +#define CFM_HEALTH_INTERVAL 6 struct ccm { uint8_t mdlevel_version; /* MD Level and Version */ uint8_t opcode; @@ -111,6 +112,10 @@ struct cfm { * avoid flapping. */ uint64_t *rmps_array; /* Cache of remote_mps. */ size_t rmps_array_len; /* Number of rmps in 'rmps_array'. */ + + int health; /* Average health over all remote_mps */ + int health_interval; /* Num of fault_intervals used to compute the + * health. */ }; /* Remote MPs represent foreign network entities that are configured to have @@ -124,6 +129,10 @@ struct remote_mp { receiving CCMs that it's expecting to. */ bool opup; /* Operational State. */ uint32_t seq; /* Most recently received sequence number. */ + uint8_t num_health_ccm; /* Number of received ccm frames per + fault_interval. */ + int health; /* Exponentially weighted moving average of link + health */ }; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(20, 30); @@ -290,6 +299,7 @@ cfm_create(const char *name) hmap_insert(&all_cfms, &cfm->hmap_node, hash_string(cfm->name, 0)); cfm->remote_opup = true; cfm->fault_override = -1; + cfm->health = 100; return cfm; } @@ -314,6 +324,13 @@ cfm_destroy(struct cfm *cfm) free(cfm); } +/* Returns the health as a percentage. */ +static int +cfm_get_rmp_health_stats(struct remote_mp *rmp) +{ + return rmp->health; +} + /* Should be run periodically to update fault statistics messages. */ void cfm_run(struct cfm *cfm) @@ -332,8 +349,44 @@ cfm_run(struct cfm *cfm) sizeof *cfm->rmps_array); cfm->remote_opup = true; + if (cfm->health_interval == CFM_HEALTH_INTERVAL) { + int cfm_health = 0; + + /* Calculate the cfm health of the interface. If the number of + * remote_mpids of a cfm interface is > 1, the cfm health is + * undefined. If the number of remote_mpids is 1, the cfm Health is + * simply the remote_mpid's health, else it is 0. */ + if (hmap_count(&cfm->remote_mps) > 1) { + cfm->health = -1; + } else { + HMAP_FOR_EACH (rmp, node, &cfm->remote_mps) { + int lost, exp_ccm_recvd; + + exp_ccm_recvd = (CFM_HEALTH_INTERVAL << 2) - + (CFM_HEALTH_INTERVAL >> 1); + + /* Calculate the percentage of healthy ccm frames received. + * Since the 'fault_interval' is (3.5 * cfm_interval), and + * 1 CCM packet must be received every cfm_interval, + * the 'remote_mpid' health reports the percentage of + * healthy CCM frames received every + * 'CFM_HEALTH_INTERVAL'th 'fault_interval'. */ + rmp->health = (rmp->num_health_ccm * 100) / exp_ccm_recvd; + rmp->health = MIN(rmp->health, 100); + assert(rmp->health >= 0 && rmp->health <= 100); + cfm_health += rmp->health; + rmp->num_health_ccm = 0; + } + /* Calculate the cfm health. */ + cfm->health = hmap_is_empty(&cfm->remote_mps) + ? 0 : + cfm_health; + assert(cfm->health >= 0 && cfm->health <= 100); + } + cfm->health_interval = 0; + } + cfm->health_interval++; HMAP_FOR_EACH_SAFE (rmp, rmp_next, node, &cfm->remote_mps) { - if (!rmp->recv) { VLOG_DBG("%s: no CCM from RMP %"PRIu64" in the last %lldms", cfm->name, rmp->mpid, interval); @@ -535,6 +588,7 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p) uint64_t ccm_mpid; uint32_t ccm_seq; bool ccm_opdown; + bool fault = false; if (cfm->extended) { ccm_mpid = ntohll(ccm->mpid64); @@ -549,6 +603,7 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p) VLOG_WARN_RL(&rl, "%s: received a CCM with an invalid interval" " (%"PRIu8") from RMP %"PRIu64, cfm->name, ccm_interval, ccm_mpid); + fault = true; } if (cfm->extended && ccm_interval == 0 @@ -556,6 +611,7 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p) VLOG_WARN_RL(&rl, "%s: received a CCM with an invalid extended" " interval (%"PRIu16"ms) from RMP %"PRIu64, cfm->name, ccm_interval_ms_x, ccm_mpid); + fault = true; } rmp = lookup_remote_mp(cfm, ccm_mpid); @@ -563,12 +619,15 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p) if (hmap_count(&cfm->remote_mps) < CFM_MAX_RMPS) { rmp = xzalloc(sizeof *rmp); hmap_insert(&cfm->remote_mps, &rmp->node, hash_mpid(ccm_mpid)); + rmp->num_health_ccm = 0; + rmp->health = 100; } else { cfm->recv_fault |= CFM_FAULT_OVERFLOW; VLOG_WARN_RL(&rl, "%s: dropped CCM with MPID %"PRIu64" from MAC " ETH_ADDR_FMT, cfm->name, ccm_mpid, ETH_ADDR_ARGS(eth->eth_src)); + fault = true; } } @@ -576,16 +635,23 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p) " (interval %"PRIu8") (RDI %s)", cfm->name, ccm_seq, ccm_mpid, ccm_interval, ccm_rdi ? "true" : "false"); + if (ccm_rdi) { + fault = true; + } if (rmp) { if (rmp->seq && ccm_seq != (rmp->seq + 1)) { VLOG_WARN_RL(&rl, "%s: (mpid %"PRIu64") detected sequence" " numbers which indicate possible connectivity" " problems (previous %"PRIu32") (current %"PRIu32 ")", cfm->name, ccm_mpid, rmp->seq, ccm_seq); + fault = true; } rmp->mpid = ccm_mpid; rmp->recv = true; + if (!fault) { + rmp->num_health_ccm++; + } rmp->seq = ccm_seq; rmp->rdi = ccm_rdi; rmp->opup = !ccm_opdown; @@ -605,6 +671,15 @@ cfm_get_fault(const struct cfm *cfm) return cfm->fault; } +/* Gets the health of 'cfm'. Returns an integer between 0 and 100 indicating + * the health of the link as a percentage which is calculated as an average of + * the health of all remote_mps. */ +int +cfm_get_health(const struct cfm *cfm) +{ + return cfm->health; +} + /* Gets the operational state of 'cfm'. 'cfm' is considered operationally down * if it has received a CCM with the operationally down bit set from any of its * remote maintenance points. Returns true if 'cfm' is operationally up. False @@ -656,6 +731,7 @@ cfm_print_details(struct ds *ds, const struct cfm *cfm) ds_put_cstr(ds, "\n"); } + ds_put_format(ds, "\taverage health: %d\n", cfm->health); ds_put_format(ds, "\topstate: %s\n", cfm->opup ? "up" : "down"); ds_put_format(ds, "\tremote_opstate: %s\n", cfm->remote_opup ? "up" : "down"); @@ -672,6 +748,7 @@ cfm_print_details(struct ds *ds, const struct cfm *cfm) ds_put_format(ds, "\trecv since check: %s\n", rmp->recv ? "true" : "false"); ds_put_format(ds, "\topstate: %s\n", rmp->opup? "up" : "down"); + ds_put_format(ds, "\tlink health: %d\n", cfm_get_rmp_health_stats(rmp)); } } diff --git a/lib/cfm.h b/lib/cfm.h index 2556a32..2b4f888 100644 --- a/lib/cfm.h +++ b/lib/cfm.h @@ -69,6 +69,7 @@ bool cfm_configure(struct cfm *, const struct cfm_settings *); bool cfm_should_process_flow(const struct cfm *cfm, const struct flow *); void cfm_process_heartbeat(struct cfm *, const struct ofpbuf *packet); int cfm_get_fault(const struct cfm *); +int cfm_get_health(const struct cfm *); bool cfm_get_opup(const struct cfm *); void cfm_get_remote_mpids(const struct cfm *, const uint64_t **rmps, size_t *n_rmps); diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 51b847f..a42d09e 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -1119,6 +1119,14 @@ get_cfm_remote_mpids(const struct ofport *ofport_, const uint64_t **rmps, return -1; } } + +static int +get_cfm_health(const struct ofport *ofport_) +{ + struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); + + return ofport->cfm ? cfm_get_health(ofport->cfm) : -1; +} /* Spanning Tree. */ @@ -6491,6 +6499,7 @@ const struct ofproto_class ofproto_dpif_class = { set_cfm, get_cfm_fault, get_cfm_remote_mpids, + get_cfm_health, set_stp, get_stp_status, set_stp_port, diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index 26904ef..3f01cc8 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -980,6 +980,17 @@ struct ofproto_class { int (*get_cfm_remote_mpids)(const struct ofport *ofport, const uint64_t **rmps, size_t *n_rmps); + /* Checks the health of CFM configured on 'ofport'. Returns an integer + * to indicate the health percentage of the 'ofport' which is an average of + * the health of all the remote_mps. Returns an integer between 0 and 100 + * where 0 means that the 'ofport' is very unhealthy and 100 means the + * 'ofport' is perfectly healthy. Returns -1 if CFM is not enabled on + * 'port' + * + * This function may be a null pointer if the ofproto implementation does + * not support CFM. */ + int (*get_cfm_health)(const struct ofport *ofport); + /* Configures spanning tree protocol (STP) on 'ofproto' using the * settings defined in 's'. * diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index e7e0401..f934306 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -2481,6 +2481,19 @@ ofproto_port_get_cfm_remote_mpids(const struct ofproto *ofproto, : -1); } +/* Checks the health of the CFM for 'ofp_port' within 'ofproto'. Returns an + * integer value between 0 and 100 to indicate the health of the port as a + * percentage which is the average of cfm health of all the remote_mpids or + * returns -1 if CFM is not enabled on 'ofport'. */ +int +ofproto_port_get_cfm_health(const struct ofproto *ofproto, uint16_t ofp_port) +{ + struct ofport *ofport = ofproto_get_port(ofproto, ofp_port); + return (ofport && ofproto->ofproto_class->get_cfm_health + ? ofproto->ofproto_class->get_cfm_health(ofport) + : -1); +} + static enum ofperr handle_aggregate_stats_request(struct ofconn *ofconn, const struct ofp_stats_msg *osm) diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h index 6172f29..c40f5d3 100644 --- a/ofproto/ofproto.h +++ b/ofproto/ofproto.h @@ -348,7 +348,8 @@ int ofproto_port_get_cfm_fault(const struct ofproto *, uint16_t ofp_port); int ofproto_port_get_cfm_remote_mpids(const struct ofproto *, uint16_t ofp_port, const uint64_t **rmps, size_t *n_rmps); - +int ofproto_port_get_cfm_health(const struct ofproto *ofproto, + uint16_t ofp_port); void ofproto_get_ofproto_controller_info(const struct ofproto *, struct shash *); void ofproto_free_ofproto_controller_info(struct shash *); diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index adc3b47..37093d8 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -1547,6 +1547,8 @@ iface_refresh_cfm_stats(struct iface *iface) int fault, error; const uint64_t *rmps; size_t n_rmps; + int health; + int64_t cfm_health = 0; if (iface_is_synthetic(iface)) { return; @@ -1582,6 +1584,15 @@ iface_refresh_cfm_stats(struct iface *iface) } else { ovsrec_interface_set_cfm_remote_mpids(cfg, NULL, 0); } + + health = ofproto_port_get_cfm_health(iface->port->bridge->ofproto, + iface->ofp_port); + if (health >= 0) { + cfm_health = (int64_t) health; + ovsrec_interface_set_cfm_health(cfg, &cfm_health, 1); + } else { + ovsrec_interface_set_cfm_health(cfg, NULL, 0); + } } static void diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema index a3847e7..c7e1ac9 100644 --- a/vswitchd/vswitch.ovsschema +++ b/vswitchd/vswitch.ovsschema @@ -1,6 +1,6 @@ {"name": "Open_vSwitch", - "version": "6.8.0", - "cksum": "4106006492 16485", + "version": "6.9.0", + "cksum": "617116616 16682", "tables": { "Open_vSwitch": { "columns": { @@ -197,6 +197,11 @@ "ephemeral": true}, "cfm_fault_status": { "type": {"key": "string", "min": 0, "max": "unlimited"}}, + "cfm_health": { + "type": {"key": {"type": "integer", + "minInteger": 0, + "maxInteger": 100}, + "min": 0, "max": 1}}, "lacp_current": { "type": {"key": {"type": "boolean"}, "min": 0, "max": 1}, diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index f3ea338..bad9135 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -1726,6 +1726,26 @@ an <code>ovs-appctl</code> command. </column> + <column name="cfm_health"> + <p> + Indicates the health of the interface as a percentage of CCM frames + received over 21 <ref column="other_config" key="cfm_interval"/>s. The + health of an interface is undefined if it is communicating with more + that one <ref column="cfm_remote_mpids"/>. It reduces if healthy + heartbeats are not received at the expected rate, and gradually + improves as healthy heartbeats are received at the desired rate. It is + refreshed every 21 <ref column="other_config" key="cfm_interval"/>s. + </p> + <p> + As mentioned above, the faults can be triggered for several reasons. + The link health will deteriorate even if heartbeats are received but + they are reported to be unhealthy. An unhealthy heartbeat in this + context is a heartbeat for which either some fault is set or is out + of sequence. The interface health can be 100 only on receiving + healthy heartbeats at the desired rate. + </p> + </column> + <column name="cfm_remote_mpids"> When CFM is properly configured, Open vSwitch will occasionally receive CCM broadcasts. These broadcasts contain the MPID of the -- 1.7.2.5 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev