Hey Ben, Joe, Could you review this V3 series?
I think it is closer, and better,~ besides, no backlogged transactions when flapping all 5K tunnels for the entire morning ;D Alex Wang, On Fri, Apr 11, 2014 at 11:20 AM, Alex Wang <al...@nicira.com> wrote: > This commit removes the 'Instant' stats related logic in bridge.c. > Instead, the corresponding status is updated immediately after the > global connectivity sequence number changes. > > This change brings the following effects: > > 1. The status database update is slightly faster, since the 'Instant' > stats update period is 100 millisecond. > > 2. The main thread will no longer be waken up every 100 ms for 'Instant' > stats check. Some logic (e.g. ofproto_dpif_send_packet_in()) relies > on this fact for function checking or I/O. So, after this commit, > they should explicitly notify the main thread via either registering > a timeout in poll loop or using the global sequence number. > > 3. The update is more efficient, since the netdev's sequence number is > used to avoid updating unchanged netdev status. > > 4. When status is changing super fast, to prevent frequent database > transaction and backlogged jasonrpc message, logic is added to > always wait for unfinished status database transaction. This also > helps batch multiple future status changes in one transaction. > > In the experiment with 5K internal ports, when one port is flapping > at rate of every 0.3 second, there is no additional cpu consumption > observed. When all ports are flapping at the rate of every 0.3 second, > there is no backlogged transactions observed. > > Signed-off-by: Alex Wang <al...@nicira.com> > > --- > V2 -> V3: > - refine the commit log. > - when the current status database transaction returns "TXN_INCOMPLETE", > do not allow future status updates until the hanging transaction > finishes. > This is extremely helpful in that it can batch multiple future status > changes into one transaction. And that is main reason for the > elimination > of backlogged (jasonrpc) database transactions. > > PATCH -> V2: > - refine the commit message. > - refine the comments. > --- > ofproto/ofproto-dpif.c | 2 + > vswitchd/bridge.c | 264 > +++++++++++++++++++++--------------------------- > 2 files changed, 116 insertions(+), 150 deletions(-) > > diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c > index 04d5454..7f327b9 100644 > --- a/ofproto/ofproto-dpif.c > +++ b/ofproto/ofproto-dpif.c > @@ -375,6 +375,8 @@ ofproto_dpif_send_packet_in(struct ofproto_dpif > *ofproto, > COVERAGE_INC(packet_in_overflow); > free(CONST_CAST(void *, pin->up.packet)); > free(pin); > + } else { > + seq_change(connectivity_seq_get()); > } > } > > diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c > index 44fdb4e..78ec94d 100644 > --- a/vswitchd/bridge.c > +++ b/vswitchd/bridge.c > @@ -77,6 +77,7 @@ struct iface { > char *name; /* Host network device name. */ > struct netdev *netdev; /* Network device. */ > ofp_port_t ofp_port; /* OpenFlow port number. */ > + uint64_t change_seq; > > /* These members are valid only within bridge_reconfigure(). */ > const char *type; /* Usually same as cfg->type. */ > @@ -263,7 +264,8 @@ static void iface_configure_qos(struct iface *, const > struct ovsrec_qos *); > static void iface_configure_cfm(struct iface *); > static void iface_refresh_cfm_stats(struct iface *); > static void iface_refresh_stats(struct iface *); > -static void iface_refresh_status(struct iface *); > +static void iface_refresh_netdev_status(struct iface *); > +static void iface_refresh_ofproto_status(struct iface *); > static bool iface_is_synthetic(const struct iface *); > static ofp_port_t iface_get_requested_ofp_port( > const struct ovsrec_interface *); > @@ -1506,7 +1508,7 @@ iface_create(struct bridge *br, const struct > ovsrec_interface *iface_cfg, > > /* Populate initial status in database. */ > iface_refresh_stats(iface); > - iface_refresh_status(iface); > + iface_refresh_netdev_status(iface); > > /* Add bond fake iface if necessary. */ > if (port_is_bond_fake_iface(port)) { > @@ -1764,22 +1766,27 @@ dpid_from_hash(const void *data, size_t n) > } > > static void > -iface_refresh_status(struct iface *iface) > +iface_refresh_netdev_status(struct iface *iface) > { > struct smap smap; > > enum netdev_features current; > - int64_t bps; > - int mtu; > - int64_t mtu_64; > + enum netdev_flags flags; > + const char *link_state; > uint8_t mac[ETH_ADDR_LEN]; > - int64_t ifindex64; > - int error; > + int64_t bps, mtu_64, ifindex64, link_resets; > + int mtu, error; > > if (iface_is_synthetic(iface)) { > return; > } > > + if (iface->change_seq == netdev_get_change_seq(iface->netdev)) { > + return; > + } > + > + iface->change_seq = netdev_get_change_seq(iface->netdev); > + > smap_init(&smap); > > if (!netdev_get_status(iface->netdev, &smap)) { > @@ -1790,6 +1797,21 @@ iface_refresh_status(struct iface *iface) > > smap_destroy(&smap); > > + error = netdev_get_flags(iface->netdev, &flags); > + if (!error) { > + const char *state = flags & NETDEV_UP ? "up" : "down"; > + > + ovsrec_interface_set_admin_state(iface->cfg, state); > + } else { > + ovsrec_interface_set_admin_state(iface->cfg, NULL); > + } > + > + link_state = netdev_get_carrier(iface->netdev) ? "up" : "down"; > + ovsrec_interface_set_link_state(iface->cfg, link_state); > + > + link_resets = netdev_get_carrier_resets(iface->netdev); > + ovsrec_interface_set_link_resets(iface->cfg, &link_resets, 1); > + > error = netdev_get_features(iface->netdev, ¤t, NULL, NULL, > NULL); > bps = !error ? netdev_features_to_bps(current, 0) : 0; > if (bps) { > @@ -1829,6 +1851,38 @@ iface_refresh_status(struct iface *iface) > ovsrec_interface_set_ifindex(iface->cfg, &ifindex64, 1); > } > > +static void > +iface_refresh_ofproto_status(struct iface *iface) > +{ > + struct smap smap; > + int current, error; > + > + if (iface_is_synthetic(iface)) { > + return; > + } > + > + current = ofproto_port_is_lacp_current(iface->port->bridge->ofproto, > + iface->ofp_port); > + if (current >= 0) { > + bool bl = current; > + ovsrec_interface_set_lacp_current(iface->cfg, &bl, 1); > + } else { > + ovsrec_interface_set_lacp_current(iface->cfg, NULL, 0); > + } > + > + iface_refresh_cfm_stats(iface); > + > + smap_init(&smap); > + error = ofproto_port_get_bfd_status(iface->port->bridge->ofproto, > + iface->ofp_port, &smap); > + /* No need to do the following work if there is no status change. */ > + if (error < 0) { > + return; > + } > + ovsrec_interface_set_bfd_status(iface->cfg, &smap); > + smap_destroy(&smap); > +} > + > /* Writes 'iface''s CFM statistics to the database. 'iface' must not be > * synthetic. */ > static void > @@ -2143,145 +2197,6 @@ refresh_controller_status(void) > ofproto_free_ofproto_controller_info(&info); > } > > -/* "Instant" stats. > - * > - * Some information in the database must be kept as up-to-date as > possible to > - * allow controllers to respond rapidly to network outages. We call these > - * statistics "instant" stats. > - * > - * We wish to update these statistics every INSTANT_INTERVAL_MSEC > milliseconds, > - * assuming that they've changed. The only means we have to determine > whether > - * they have changed are: > - * > - * - Try to commit changes to the database. If nothing changed, then > - * ovsdb_idl_txn_commit() returns TXN_UNCHANGED, otherwise some other > - * value. > - * > - * - instant_stats_run() is called late in the run loop, after anything > that > - * might change any of the instant stats. > - * > - * We use these two facts together to avoid waking the process up every > - * INSTANT_INTERVAL_MSEC whether there is any change or not. > - */ > - > -/* Minimum interval between writing updates to the instant stats to the > - * database. */ > -#define INSTANT_INTERVAL_MSEC 100 > - > -/* Current instant stats database transaction, NULL if there is no ongoing > - * transaction. */ > -static struct ovsdb_idl_txn *instant_txn; > - > -/* Next time (in msec on monotonic clock) at which we will update the > instant > - * stats. */ > -static long long int instant_next_txn = LLONG_MIN; > - > -/* True if the run loop has run since we last saw that the instant stats > were > - * unchanged, that is, this is true if we need to wake up at > 'instant_next_txn' > - * to refresh the instant stats. */ > -static bool instant_stats_could_have_changed; > - > -static void > -instant_stats_run(void) > -{ > - enum ovsdb_idl_txn_status status; > - > - instant_stats_could_have_changed = true; > - > - if (!instant_txn) { > - struct bridge *br; > - uint64_t seq; > - > - if (time_msec() < instant_next_txn) { > - return; > - } > - instant_next_txn = time_msec() + INSTANT_INTERVAL_MSEC; > - > - seq = seq_read(connectivity_seq_get()); > - if (seq == connectivity_seqno) { > - return; > - } > - connectivity_seqno = seq; > - > - instant_txn = ovsdb_idl_txn_create(idl); > - HMAP_FOR_EACH (br, node, &all_bridges) { > - struct iface *iface; > - struct port *port; > - > - br_refresh_stp_status(br); > - > - HMAP_FOR_EACH (port, hmap_node, &br->ports) { > - port_refresh_stp_status(port); > - } > - > - HMAP_FOR_EACH (iface, name_node, &br->iface_by_name) { > - enum netdev_flags flags; > - struct smap smap; > - const char *link_state; > - int64_t link_resets; > - int current, error; > - > - if (iface_is_synthetic(iface)) { > - continue; > - } > - > - current = ofproto_port_is_lacp_current(br->ofproto, > - iface->ofp_port); > - if (current >= 0) { > - bool bl = current; > - ovsrec_interface_set_lacp_current(iface->cfg, &bl, 1); > - } else { > - ovsrec_interface_set_lacp_current(iface->cfg, NULL, > 0); > - } > - > - error = netdev_get_flags(iface->netdev, &flags); > - if (!error) { > - const char *state = flags & NETDEV_UP ? "up" : "down"; > - ovsrec_interface_set_admin_state(iface->cfg, state); > - } else { > - ovsrec_interface_set_admin_state(iface->cfg, NULL); > - } > - > - link_state = netdev_get_carrier(iface->netdev) ? "up" : > "down"; > - ovsrec_interface_set_link_state(iface->cfg, link_state); > - > - link_resets = netdev_get_carrier_resets(iface->netdev); > - ovsrec_interface_set_link_resets(iface->cfg, > &link_resets, 1); > - > - iface_refresh_cfm_stats(iface); > - > - smap_init(&smap); > - error = ofproto_port_get_bfd_status(br->ofproto, > iface->ofp_port, > - &smap); > - if (error < 0) { > - continue; > - } > - ovsrec_interface_set_bfd_status(iface->cfg, &smap); > - smap_destroy(&smap); > - } > - } > - } > - > - status = ovsdb_idl_txn_commit(instant_txn); > - if (status != TXN_INCOMPLETE) { > - ovsdb_idl_txn_destroy(instant_txn); > - instant_txn = NULL; > - } > - if (status == TXN_UNCHANGED) { > - instant_stats_could_have_changed = false; > - } > -} > - > -static void > -instant_stats_wait(void) > -{ > - if (instant_txn) { > - ovsdb_idl_txn_wait(instant_txn); > - } else if (instant_stats_could_have_changed) { > - poll_timer_wait_until(instant_next_txn); > - } > -} > - > static void > bridge_run__(void) > { > @@ -2303,6 +2218,14 @@ bridge_run__(void) > } > } > > +/* When the status update commit returns "TXN_INCOMPLETE", should > register a > + * timeout in "STATUS_CHECK_AGAIN_MSEC" to check again. */ > +#define STATUS_CHECK_AGAIN_MSEC 500 > + > +/* Current status database transaction, NULL if there is no ongoing > + * transaction. */ > +static struct ovsdb_idl_txn *status_txn; > + > void > bridge_run(void) > { > @@ -2311,6 +2234,7 @@ bridge_run(void) > > bool vlan_splinters_changed; > struct bridge *br; > + uint64_t seq; > > ovsrec_open_vswitch_init(&null_cfg); > > @@ -2431,7 +2355,6 @@ bridge_run(void) > > LIST_FOR_EACH (iface, port_elem, &port->ifaces) { > iface_refresh_stats(iface); > - iface_refresh_status(iface); > } > > port_refresh_stp_stats(port); > @@ -2450,8 +2373,40 @@ bridge_run(void) > iface_stats_timer = time_msec() + IFACE_STATS_INTERVAL; > } > > + /* Check the need to update status. */ > + seq = seq_read(connectivity_seq_get()); > + if (seq != connectivity_seqno) { > + enum ovsdb_idl_txn_status status; > + > + if (!status_txn) { > + connectivity_seqno = seq; > + status_txn = ovsdb_idl_txn_create(idl); > + HMAP_FOR_EACH (br, node, &all_bridges) { > + struct port *port; > + > + br_refresh_stp_status(br); > + HMAP_FOR_EACH (port, hmap_node, &br->ports) { > + struct iface *iface; > + > + port_refresh_stp_status(port); > + LIST_FOR_EACH (iface, port_elem, &port->ifaces) { > + iface_refresh_netdev_status(iface); > + iface_refresh_ofproto_status(iface); > + } > + } > + } > + } > + > + status = ovsdb_idl_txn_commit(status_txn); > + /* Do not destroy "status_txn" if the transaction is > + * "TXN_INCOMPLETE". */ > + if (status != TXN_INCOMPLETE) { > + ovsdb_idl_txn_destroy(status_txn); > + status_txn = NULL; > + } > + } > + > run_system_stats(); > - instant_stats_run(); > } > > void > @@ -2481,8 +2436,17 @@ bridge_wait(void) > poll_timer_wait_until(iface_stats_timer); > } > > + /* If the status database transaction is "TXN_INCOMPLETE" in this run, > + * register a timeout in "STATUS_CHECK_AGAIN_MSEC". Else, wait on the > + * global connectivity sequence number. Note, this also helps batch > + * multiple status changes into one transaction. */ > + if (status_txn) { > + poll_timer_wait_until(time_msec() + STATUS_CHECK_AGAIN_MSEC); > + } else { > + seq_wait(connectivity_seq_get(), connectivity_seqno); > + } > + > system_stats_wait(); > - instant_stats_wait(); > } > > /* Adds some memory usage statistics for bridges into 'usage', for use > with > -- > 1.7.9.5 > >
_______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev