These commands can be used to get packets and cycles counters on a pmd
thread basis.  They're useful to get a clearer picture about the
performance of the userspace datapath.

They export these pieces of information:

- A (per-thread) view of the caches hit rate. Hits in the exact match
  cache are reported separately from hits in the masked classifier
- A rough cycles count. This will allow to estimate the load of OVS and
  the polling overhead.

Signed-off-by: Daniele Di Proietto <diproiet...@vmware.com>
---
 INSTALL.DPDK.md            |   8 +++
 lib/dpif-netdev.c          | 150 ++++++++++++++++++++++++++++++++++++++++++++-
 vswitchd/ovs-vswitchd.8.in |  18 ++++++
 3 files changed, 175 insertions(+), 1 deletion(-)

diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md
index 60889d0..c2486ee 100644
--- a/INSTALL.DPDK.md
+++ b/INSTALL.DPDK.md
@@ -250,6 +250,14 @@ Using the DPDK with ovs-vswitchd:
    Note, core 0 is always reserved from non-pmd threads and should never be set
    in the cpu mask.
 
+   To understand where most of the time is spent and whether the caches are
+   effective, these commands can be used:
+
+   ```
+   ovs-appctl dpif-netdev/pmd-stats-clear #To reset statistics
+   ovs-appctl dpif-netdev/pmd-stats-show
+   ```
+
 DPDK Rings :
 ------------
 
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 4db7901..8eaa251 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -416,6 +416,13 @@ struct dp_netdev_pmd_thread {
                                     /* threads on same numa node. */
     int core_id;                    /* CPU core id of this pmd thread. */
     int numa_id;                    /* numa node id of this pmd thread. */
+
+    /* Only the thread can write on its own 'cycles' and 'stats'.
+     * The main thread keeps 'stats_zero' and 'cycles_zero' as base
+     * values and subtracts them from 'stats' and 'time_profile' before
+     * reporting to the user */
+    struct dp_netdev_pmd_cycles cycles_zero;
+    struct dp_netdev_pmd_stats stats_zero;
 };
 
 static inline uint64_t pmd_cycles_counter_diff(struct dp_netdev_pmd_thread *);
@@ -528,6 +535,147 @@ get_dp_netdev(const struct dpif *dpif)
     return dpif_netdev_cast(dpif)->dp;
 }
 
+enum pmd_info_type {
+    PMD_INFO_SHOW_STATS,  /* show how cpu cycles are spent */
+    PMD_INFO_CLEAR_STATS  /* set the cycles count to 0 */
+};
+
+static void
+dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
+                     void *aux)
+{
+    struct ds reply = DS_EMPTY_INITIALIZER;
+    struct dp_netdev_pmd_thread *pmd;
+    struct dp_netdev *dp = NULL;
+    enum pmd_info_type type = (enum pmd_info_type) aux;
+
+    ovs_mutex_lock(&dp_netdev_mutex);
+
+    if (argc == 2) {
+        dp = shash_find_data(&dp_netdevs, argv[1]);
+    } else if (shash_count(&dp_netdevs) == 1) {
+        /* There's only one datapath */
+        dp = shash_random_node(&dp_netdevs)->data;
+    }
+
+    if (!dp) {
+        ovs_mutex_unlock(&dp_netdev_mutex);
+        unixctl_command_reply_error(conn,
+                                    "please specify an existing datapath");
+        return;
+    }
+
+    CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+        struct dp_netdev_pmd_stats s;
+        struct dp_netdev_pmd_cycles t;
+        unsigned long long total_packets = 0;
+        uint64_t total_cycles = 0;
+        uint32_t c;
+        int i;
+
+        if (type == PMD_INFO_CLEAR_STATS) {
+            /* We cannot write 'stats' and 'cycles' (because they're written
+             * by other threads) and we shouldn't change 'stats' (because
+             * ther're used to count datapath stats, which must not be cleared
+             * here).
+             * Instead, we save the current values and subtract them from the
+             * values to be displayed in the future */
+            do {
+                c = u64_stats_read_begin(&pmd->stats_lock);
+                pmd->cycles_zero = pmd->cycles;
+                pmd->stats_zero = pmd->stats;
+            } while (!u64_stats_read_correct(&pmd->stats_lock, c));
+
+            continue;
+        }
+
+        do {
+            c = u64_stats_read_begin(&pmd->stats_lock);
+            t = pmd->cycles;
+            s = pmd->stats;
+        } while (!u64_stats_read_correct(&pmd->stats_lock, c));
+
+        for (i = 0; i < ARRAY_SIZE(t.n); i++) {
+            t.n[i] -= pmd->cycles_zero.n[i];
+            total_cycles += t.n[i];
+        }
+        for (i = 0; i < ARRAY_SIZE(s.n); i++) {
+            s.n[i] -= pmd->stats_zero.n[i];
+            if (i != DP_STAT_LOST) {
+                total_packets += s.n[i];
+            }
+        }
+
+        ds_put_cstr(&reply, (pmd->core_id == NON_PMD_CORE_ID)
+                            ?"main thread":"pmd thread");
+
+        if(pmd->numa_id != OVS_NUMA_UNSPEC) {
+            ds_put_format(&reply, " numa_id %d", pmd->numa_id);
+        }
+        if(pmd->core_id != OVS_CORE_UNSPEC) {
+            ds_put_format(&reply, " core_id %d", pmd->core_id);
+        }
+        ds_put_cstr(&reply, ":\n");
+
+        ds_put_format(&reply,
+                      "\temc hits:%llu\n"
+                      "\tmasked hits:%llu\n"
+                      "\tmiss:%llu\n"
+                      "\tlost:%llu\n",
+                      s.n[DP_STAT_EXACT_HIT],
+                      s.n[DP_STAT_MASKED_HIT],
+                      s.n[DP_STAT_MISS],
+                      s.n[DP_STAT_LOST]);
+
+        if (total_cycles == 0) {
+            ds_put_cstr(&reply, "\tcycles counters not supported\n");
+            continue;
+        }
+
+        ds_put_format(&reply,
+                      "\tpolling cycles:%"PRIu64" (%.02f%%)\n"
+                      "\tprocessing cycles:%"PRIu64" (%.02f%%)\n"
+                      "\tother cycles:%"PRIu64" (%.02f%%)\n",
+                      t.n[PMD_CYCLES_POLLING],
+                      t.n[PMD_CYCLES_POLLING]/(double)total_cycles*100,
+                      t.n[PMD_CYCLES_PROCESSING],
+                      t.n[PMD_CYCLES_PROCESSING]/(double)total_cycles*100,
+                      t.n[PMD_CYCLES_OTHER],
+                      t.n[PMD_CYCLES_OTHER]/(double)total_cycles*100);
+
+        if (total_packets == 0) {
+            ds_put_cstr(&reply, "\tno packets processed yet\n");
+            continue;
+        }
+        ds_put_format(&reply,
+                      "\tavg cycles per packet: %.02f (%"PRIu64"/%llu)\n",
+                      total_cycles/(double)total_packets,
+                      total_cycles, total_packets);
+
+        ds_put_format(&reply,
+                      "\tavg processing cycles per packet: %.02f 
(%"PRIu64"/%llu)\n",
+                      t.n[PMD_CYCLES_PROCESSING]/(double)total_packets,
+                      t.n[PMD_CYCLES_PROCESSING], total_packets);
+    }
+
+    ovs_mutex_unlock(&dp_netdev_mutex);
+
+    unixctl_command_reply(conn, ds_cstr(&reply));
+    ds_destroy(&reply);
+}
+
+static int
+dpif_netdev_init(void)
+{
+    unixctl_command_register("dpif-netdev/pmd-stats-show", "[dp]",
+                             0, 1, dpif_netdev_pmd_info,
+                             (void *)PMD_INFO_SHOW_STATS);
+    unixctl_command_register("dpif-netdev/pmd-stats-clear", "[dp]",
+                             0, 1, dpif_netdev_pmd_info,
+                             (void *)PMD_INFO_CLEAR_STATS);
+    return 0;
+}
+
 static int
 dpif_netdev_enumerate(struct sset *all_dps,
                       const struct dpif_class *dpif_class)
@@ -3354,7 +3502,7 @@ dp_netdev_execute_actions(struct dp_netdev_pmd_thread 
*pmd,
 
 const struct dpif_class dpif_netdev_class = {
     "netdev",
-    NULL,                       /* init */
+    dpif_netdev_init,
     dpif_netdev_enumerate,
     dpif_netdev_port_open_type,
     dpif_netdev_open,
diff --git a/vswitchd/ovs-vswitchd.8.in b/vswitchd/ovs-vswitchd.8.in
index 7f165ea..26b257b 100644
--- a/vswitchd/ovs-vswitchd.8.in
+++ b/vswitchd/ovs-vswitchd.8.in
@@ -239,6 +239,24 @@ type).
 ..
 .so lib/dpctl.man
 .
+.SS "DPIF-NETDEV COMMANDS"
+These commands are used to expose internal information (mostly statistics)
+about the ``dpif-netdev'' userspace datapath. If there is only one datapath
+(as is often the case, unless \fBdpctl/\fR commands are used), the \fIdp\fR
+argument can be omitted.
+.IP "\fBdpif-netdev/pmd-stats-show\fR [\fIdp\fR]"
+Shows performance statistics for each pmd thread of the datapath \fIdp\fR.
+The special thread ``main'' sums up the statistics of every other thread.
+The sum of ``emc hits'', ``masked hits'' and ``miss'' is the number of
+packets received by the datapath. Cycles are counted using the TSC or similar
+facilities (when available on the platform). To reset these counters use
+\fBdpif-netdev/pmd-stats-clear\fR. The duration of one cycle depends on the
+measuring infrastructure.
+.IP "\fBdpif-netdev/pmd-stats-clear\fR [\fIdp\fR]"
+Resets to zero the per pmd thread performance numbers shown by the
+\fBdpif-netdev/pmd-stats-show\fR command. It will NOT reset datapath or
+bridge statistics, only the values shown by the above command.
+.
 .so ofproto/ofproto-dpif-unixctl.man
 .so ofproto/ofproto-unixctl.man
 .so lib/vlog-unixctl.man
-- 
2.1.4

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to