This commit removes the 'Instant' stats related logic in bridge.c.
Instead, the corresponding status is updated immediately after the
global connectivity sequence number changes.

This change brings the following effects:

1. The status database update is slightly faster, since the 'Instant'
   stats update period is 100 millisecond.

2. The main thread will no longer be waken up every 100 ms for 'Instant'
   stats check.  Some logic (e.g. ofproto_dpif_send_packet_in()) relies
   on this fact for function checking or I/O.  So, after this commit,
   they should explicitly notify the main thread via either registering
   a timeout in poll loop or using the global sequence number.

3. The update is more efficient, since the netdev's sequence number is
   used to avoid updating unchanged netdev status.

4. When status is changing super fast, to prevent frequent database
   transaction and backlogged jasonrpc message, logic is added to
   always wait for unfinished status database transaction.  This also
   helps batch multiple future status changes in one transaction.

In the experiment with 5K internal ports, when one port is flapping
at rate of every 0.3 second, there is no additional cpu consumption
observed.  When all ports are flapping at the rate of every 0.3 second,
there is no backlogged transactions observed.

Signed-off-by: Alex Wang <al...@nicira.com>

---
V2 -> V3:
- refine the commit log.
- when the current status database transaction returns "TXN_INCOMPLETE",
  do not allow future status updates until the hanging transaction finishes.
  This is extremely helpful in that it can batch multiple future status
  changes into one transaction.  And that is main reason for the elimination
  of backlogged (jasonrpc) database transactions.

PATCH -> V2:
- refine the commit message.
- refine the comments.
---
 ofproto/ofproto-dpif.c |    2 +
 vswitchd/bridge.c      |  264 +++++++++++++++++++++---------------------------
 2 files changed, 116 insertions(+), 150 deletions(-)

diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 04d5454..7f327b9 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -375,6 +375,8 @@ ofproto_dpif_send_packet_in(struct ofproto_dpif *ofproto,
         COVERAGE_INC(packet_in_overflow);
         free(CONST_CAST(void *, pin->up.packet));
         free(pin);
+    } else {
+        seq_change(connectivity_seq_get());
     }
 }
 
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 44fdb4e..78ec94d 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -77,6 +77,7 @@ struct iface {
     char *name;                 /* Host network device name. */
     struct netdev *netdev;      /* Network device. */
     ofp_port_t ofp_port;        /* OpenFlow port number. */
+    uint64_t change_seq;
 
     /* These members are valid only within bridge_reconfigure(). */
     const char *type;           /* Usually same as cfg->type. */
@@ -263,7 +264,8 @@ static void iface_configure_qos(struct iface *, const 
struct ovsrec_qos *);
 static void iface_configure_cfm(struct iface *);
 static void iface_refresh_cfm_stats(struct iface *);
 static void iface_refresh_stats(struct iface *);
-static void iface_refresh_status(struct iface *);
+static void iface_refresh_netdev_status(struct iface *);
+static void iface_refresh_ofproto_status(struct iface *);
 static bool iface_is_synthetic(const struct iface *);
 static ofp_port_t iface_get_requested_ofp_port(
     const struct ovsrec_interface *);
@@ -1506,7 +1508,7 @@ iface_create(struct bridge *br, const struct 
ovsrec_interface *iface_cfg,
 
     /* Populate initial status in database. */
     iface_refresh_stats(iface);
-    iface_refresh_status(iface);
+    iface_refresh_netdev_status(iface);
 
     /* Add bond fake iface if necessary. */
     if (port_is_bond_fake_iface(port)) {
@@ -1764,22 +1766,27 @@ dpid_from_hash(const void *data, size_t n)
 }
 
 static void
-iface_refresh_status(struct iface *iface)
+iface_refresh_netdev_status(struct iface *iface)
 {
     struct smap smap;
 
     enum netdev_features current;
-    int64_t bps;
-    int mtu;
-    int64_t mtu_64;
+    enum netdev_flags flags;
+    const char *link_state;
     uint8_t mac[ETH_ADDR_LEN];
-    int64_t ifindex64;
-    int error;
+    int64_t bps, mtu_64, ifindex64, link_resets;
+    int mtu, error;
 
     if (iface_is_synthetic(iface)) {
         return;
     }
 
+    if (iface->change_seq == netdev_get_change_seq(iface->netdev)) {
+        return;
+    }
+
+    iface->change_seq = netdev_get_change_seq(iface->netdev);
+
     smap_init(&smap);
 
     if (!netdev_get_status(iface->netdev, &smap)) {
@@ -1790,6 +1797,21 @@ iface_refresh_status(struct iface *iface)
 
     smap_destroy(&smap);
 
+    error = netdev_get_flags(iface->netdev, &flags);
+    if (!error) {
+        const char *state = flags & NETDEV_UP ? "up" : "down";
+
+        ovsrec_interface_set_admin_state(iface->cfg, state);
+    } else {
+        ovsrec_interface_set_admin_state(iface->cfg, NULL);
+    }
+
+    link_state = netdev_get_carrier(iface->netdev) ? "up" : "down";
+    ovsrec_interface_set_link_state(iface->cfg, link_state);
+
+    link_resets = netdev_get_carrier_resets(iface->netdev);
+    ovsrec_interface_set_link_resets(iface->cfg, &link_resets, 1);
+
     error = netdev_get_features(iface->netdev, &current, NULL, NULL, NULL);
     bps = !error ? netdev_features_to_bps(current, 0) : 0;
     if (bps) {
@@ -1829,6 +1851,38 @@ iface_refresh_status(struct iface *iface)
     ovsrec_interface_set_ifindex(iface->cfg, &ifindex64, 1);
 }
 
+static void
+iface_refresh_ofproto_status(struct iface *iface)
+{
+    struct smap smap;
+    int current, error;
+
+    if (iface_is_synthetic(iface)) {
+        return;
+    }
+
+    current = ofproto_port_is_lacp_current(iface->port->bridge->ofproto,
+                                           iface->ofp_port);
+    if (current >= 0) {
+        bool bl = current;
+        ovsrec_interface_set_lacp_current(iface->cfg, &bl, 1);
+    } else {
+        ovsrec_interface_set_lacp_current(iface->cfg, NULL, 0);
+    }
+
+    iface_refresh_cfm_stats(iface);
+
+    smap_init(&smap);
+    error = ofproto_port_get_bfd_status(iface->port->bridge->ofproto,
+                                        iface->ofp_port, &smap);
+    /* No need to do the following work if there is no status change. */
+    if (error < 0) {
+        return;
+    }
+    ovsrec_interface_set_bfd_status(iface->cfg, &smap);
+    smap_destroy(&smap);
+}
+
 /* Writes 'iface''s CFM statistics to the database. 'iface' must not be
  * synthetic. */
 static void
@@ -2143,145 +2197,6 @@ refresh_controller_status(void)
     ofproto_free_ofproto_controller_info(&info);
 }
 
-/* "Instant" stats.
- *
- * Some information in the database must be kept as up-to-date as possible to
- * allow controllers to respond rapidly to network outages.  We call these
- * statistics "instant" stats.
- *
- * We wish to update these statistics every INSTANT_INTERVAL_MSEC milliseconds,
- * assuming that they've changed.  The only means we have to determine whether
- * they have changed are:
- *
- *   - Try to commit changes to the database.  If nothing changed, then
- *     ovsdb_idl_txn_commit() returns TXN_UNCHANGED, otherwise some other
- *     value.
- *
- *   - instant_stats_run() is called late in the run loop, after anything that
- *     might change any of the instant stats.
- *
- * We use these two facts together to avoid waking the process up every
- * INSTANT_INTERVAL_MSEC whether there is any change or not.
- */
-
-/* Minimum interval between writing updates to the instant stats to the
- * database. */
-#define INSTANT_INTERVAL_MSEC 100
-
-/* Current instant stats database transaction, NULL if there is no ongoing
- * transaction. */
-static struct ovsdb_idl_txn *instant_txn;
-
-/* Next time (in msec on monotonic clock) at which we will update the instant
- * stats.  */
-static long long int instant_next_txn = LLONG_MIN;
-
-/* True if the run loop has run since we last saw that the instant stats were
- * unchanged, that is, this is true if we need to wake up at 'instant_next_txn'
- * to refresh the instant stats. */
-static bool instant_stats_could_have_changed;
-
-static void
-instant_stats_run(void)
-{
-    enum ovsdb_idl_txn_status status;
-
-    instant_stats_could_have_changed = true;
-
-    if (!instant_txn) {
-        struct bridge *br;
-        uint64_t seq;
-
-        if (time_msec() < instant_next_txn) {
-            return;
-        }
-        instant_next_txn = time_msec() + INSTANT_INTERVAL_MSEC;
-
-        seq = seq_read(connectivity_seq_get());
-        if (seq == connectivity_seqno) {
-            return;
-        }
-        connectivity_seqno = seq;
-
-        instant_txn = ovsdb_idl_txn_create(idl);
-        HMAP_FOR_EACH (br, node, &all_bridges) {
-            struct iface *iface;
-            struct port *port;
-
-            br_refresh_stp_status(br);
-
-            HMAP_FOR_EACH (port, hmap_node, &br->ports) {
-                port_refresh_stp_status(port);
-            }
-
-            HMAP_FOR_EACH (iface, name_node, &br->iface_by_name) {
-                enum netdev_flags flags;
-                struct smap smap;
-                const char *link_state;
-                int64_t link_resets;
-                int current, error;
-
-                if (iface_is_synthetic(iface)) {
-                    continue;
-                }
-
-                current = ofproto_port_is_lacp_current(br->ofproto,
-                                                       iface->ofp_port);
-                if (current >= 0) {
-                    bool bl = current;
-                    ovsrec_interface_set_lacp_current(iface->cfg, &bl, 1);
-                } else {
-                    ovsrec_interface_set_lacp_current(iface->cfg, NULL, 0);
-                }
-
-                error = netdev_get_flags(iface->netdev, &flags);
-                if (!error) {
-                    const char *state = flags & NETDEV_UP ? "up" : "down";
-                    ovsrec_interface_set_admin_state(iface->cfg, state);
-                } else {
-                    ovsrec_interface_set_admin_state(iface->cfg, NULL);
-                }
-
-                link_state = netdev_get_carrier(iface->netdev) ? "up" : "down";
-                ovsrec_interface_set_link_state(iface->cfg, link_state);
-
-                link_resets = netdev_get_carrier_resets(iface->netdev);
-                ovsrec_interface_set_link_resets(iface->cfg, &link_resets, 1);
-
-                iface_refresh_cfm_stats(iface);
-
-                smap_init(&smap);
-                error = ofproto_port_get_bfd_status(br->ofproto, 
iface->ofp_port,
-                                                    &smap);
-                if (error < 0) {
-                    continue;
-                }
-                ovsrec_interface_set_bfd_status(iface->cfg, &smap);
-                smap_destroy(&smap);
-            }
-        }
-    }
-
-    status = ovsdb_idl_txn_commit(instant_txn);
-    if (status != TXN_INCOMPLETE) {
-        ovsdb_idl_txn_destroy(instant_txn);
-        instant_txn = NULL;
-    }
-    if (status == TXN_UNCHANGED) {
-        instant_stats_could_have_changed = false;
-    }
-}
-
-static void
-instant_stats_wait(void)
-{
-    if (instant_txn) {
-        ovsdb_idl_txn_wait(instant_txn);
-    } else if (instant_stats_could_have_changed) {
-        poll_timer_wait_until(instant_next_txn);
-    }
-}
-
 static void
 bridge_run__(void)
 {
@@ -2303,6 +2218,14 @@ bridge_run__(void)
     }
 }
 
+/* When the status update commit returns "TXN_INCOMPLETE", should register a
+ * timeout in "STATUS_CHECK_AGAIN_MSEC" to check again. */
+#define STATUS_CHECK_AGAIN_MSEC 500
+
+/* Current status database transaction, NULL if there is no ongoing
+ * transaction. */
+static struct ovsdb_idl_txn *status_txn;
+
 void
 bridge_run(void)
 {
@@ -2311,6 +2234,7 @@ bridge_run(void)
 
     bool vlan_splinters_changed;
     struct bridge *br;
+    uint64_t seq;
 
     ovsrec_open_vswitch_init(&null_cfg);
 
@@ -2431,7 +2355,6 @@ bridge_run(void)
 
                     LIST_FOR_EACH (iface, port_elem, &port->ifaces) {
                         iface_refresh_stats(iface);
-                        iface_refresh_status(iface);
                     }
 
                     port_refresh_stp_stats(port);
@@ -2450,8 +2373,40 @@ bridge_run(void)
         iface_stats_timer = time_msec() + IFACE_STATS_INTERVAL;
     }
 
+    /* Check the need to update status. */
+    seq = seq_read(connectivity_seq_get());
+    if (seq != connectivity_seqno) {
+        enum ovsdb_idl_txn_status status;
+
+        if (!status_txn) {
+            connectivity_seqno = seq;
+            status_txn = ovsdb_idl_txn_create(idl);
+            HMAP_FOR_EACH (br, node, &all_bridges) {
+                struct port *port;
+
+                br_refresh_stp_status(br);
+                HMAP_FOR_EACH (port, hmap_node, &br->ports) {
+                    struct iface *iface;
+
+                    port_refresh_stp_status(port);
+                    LIST_FOR_EACH (iface, port_elem, &port->ifaces) {
+                        iface_refresh_netdev_status(iface);
+                        iface_refresh_ofproto_status(iface);
+                    }
+                }
+            }
+        }
+
+        status = ovsdb_idl_txn_commit(status_txn);
+        /* Do not destroy "status_txn" if the transaction is
+         * "TXN_INCOMPLETE". */
+        if (status != TXN_INCOMPLETE) {
+            ovsdb_idl_txn_destroy(status_txn);
+            status_txn = NULL;
+        }
+    }
+
     run_system_stats();
-    instant_stats_run();
 }
 
 void
@@ -2481,8 +2436,17 @@ bridge_wait(void)
         poll_timer_wait_until(iface_stats_timer);
     }
 
+    /* If the status database transaction is "TXN_INCOMPLETE" in this run,
+     * register a timeout in "STATUS_CHECK_AGAIN_MSEC".  Else, wait on the
+     * global connectivity sequence number.  Note, this also helps batch
+     * multiple status changes into one transaction. */
+    if (status_txn) {
+        poll_timer_wait_until(time_msec() + STATUS_CHECK_AGAIN_MSEC);
+    } else {
+        seq_wait(connectivity_seq_get(), connectivity_seqno);
+    }
+
     system_stats_wait();
-    instant_stats_wait();
 }
 
 /* Adds some memory usage statistics for bridges into 'usage', for use with
-- 
1.7.9.5

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to