These commands can be used to get packets and cycles counters on a pmd thread basis. They're useful to get a clearer picture about the performance of the userspace datapath.
They export these pieces of information: - A (per-thread) view of the caches hit rate. Hits in the exact match cache are reported separately from hits in the masked classifier - A rough cycles count. This will allow to estimate the load of OVS and the polling overhead. Signed-off-by: Daniele Di Proietto <diproiet...@vmware.com> --- INSTALL.DPDK.md | 8 +++ lib/dpif-netdev.c | 150 ++++++++++++++++++++++++++++++++++++++++++++- vswitchd/ovs-vswitchd.8.in | 18 ++++++ 3 files changed, 175 insertions(+), 1 deletion(-) diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md index 60889d0..c2486ee 100644 --- a/INSTALL.DPDK.md +++ b/INSTALL.DPDK.md @@ -250,6 +250,14 @@ Using the DPDK with ovs-vswitchd: Note, core 0 is always reserved from non-pmd threads and should never be set in the cpu mask. + To understand where most of the time is spent and whether the caches are + effective, these commands can be used: + + ``` + ovs-appctl dpif-netdev/pmd-stats-clear #To reset statistics + ovs-appctl dpif-netdev/pmd-stats-show + ``` + DPDK Rings : ------------ diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 4db7901..8eaa251 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -416,6 +416,13 @@ struct dp_netdev_pmd_thread { /* threads on same numa node. */ int core_id; /* CPU core id of this pmd thread. */ int numa_id; /* numa node id of this pmd thread. */ + + /* Only the thread can write on its own 'cycles' and 'stats'. + * The main thread keeps 'stats_zero' and 'cycles_zero' as base + * values and subtracts them from 'stats' and 'time_profile' before + * reporting to the user */ + struct dp_netdev_pmd_cycles cycles_zero; + struct dp_netdev_pmd_stats stats_zero; }; static inline uint64_t pmd_cycles_counter_diff(struct dp_netdev_pmd_thread *); @@ -528,6 +535,147 @@ get_dp_netdev(const struct dpif *dpif) return dpif_netdev_cast(dpif)->dp; } +enum pmd_info_type { + PMD_INFO_SHOW_STATS, /* show how cpu cycles are spent */ + PMD_INFO_CLEAR_STATS /* set the cycles count to 0 */ +}; + +static void +dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[], + void *aux) +{ + struct ds reply = DS_EMPTY_INITIALIZER; + struct dp_netdev_pmd_thread *pmd; + struct dp_netdev *dp = NULL; + enum pmd_info_type type = (enum pmd_info_type) aux; + + ovs_mutex_lock(&dp_netdev_mutex); + + if (argc == 2) { + dp = shash_find_data(&dp_netdevs, argv[1]); + } else if (shash_count(&dp_netdevs) == 1) { + /* There's only one datapath */ + dp = shash_random_node(&dp_netdevs)->data; + } + + if (!dp) { + ovs_mutex_unlock(&dp_netdev_mutex); + unixctl_command_reply_error(conn, + "please specify an existing datapath"); + return; + } + + CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { + struct dp_netdev_pmd_stats s; + struct dp_netdev_pmd_cycles t; + unsigned long long total_packets = 0; + uint64_t total_cycles = 0; + uint32_t c; + int i; + + if (type == PMD_INFO_CLEAR_STATS) { + /* We cannot write 'stats' and 'cycles' (because they're written + * by other threads) and we shouldn't change 'stats' (because + * ther're used to count datapath stats, which must not be cleared + * here). + * Instead, we save the current values and subtract them from the + * values to be displayed in the future */ + do { + c = u64_stats_read_begin(&pmd->stats_lock); + pmd->cycles_zero = pmd->cycles; + pmd->stats_zero = pmd->stats; + } while (!u64_stats_read_correct(&pmd->stats_lock, c)); + + continue; + } + + do { + c = u64_stats_read_begin(&pmd->stats_lock); + t = pmd->cycles; + s = pmd->stats; + } while (!u64_stats_read_correct(&pmd->stats_lock, c)); + + for (i = 0; i < ARRAY_SIZE(t.n); i++) { + t.n[i] -= pmd->cycles_zero.n[i]; + total_cycles += t.n[i]; + } + for (i = 0; i < ARRAY_SIZE(s.n); i++) { + s.n[i] -= pmd->stats_zero.n[i]; + if (i != DP_STAT_LOST) { + total_packets += s.n[i]; + } + } + + ds_put_cstr(&reply, (pmd->core_id == NON_PMD_CORE_ID) + ?"main thread":"pmd thread"); + + if(pmd->numa_id != OVS_NUMA_UNSPEC) { + ds_put_format(&reply, " numa_id %d", pmd->numa_id); + } + if(pmd->core_id != OVS_CORE_UNSPEC) { + ds_put_format(&reply, " core_id %d", pmd->core_id); + } + ds_put_cstr(&reply, ":\n"); + + ds_put_format(&reply, + "\temc hits:%llu\n" + "\tmasked hits:%llu\n" + "\tmiss:%llu\n" + "\tlost:%llu\n", + s.n[DP_STAT_EXACT_HIT], + s.n[DP_STAT_MASKED_HIT], + s.n[DP_STAT_MISS], + s.n[DP_STAT_LOST]); + + if (total_cycles == 0) { + ds_put_cstr(&reply, "\tcycles counters not supported\n"); + continue; + } + + ds_put_format(&reply, + "\tpolling cycles:%"PRIu64" (%.02f%%)\n" + "\tprocessing cycles:%"PRIu64" (%.02f%%)\n" + "\tother cycles:%"PRIu64" (%.02f%%)\n", + t.n[PMD_CYCLES_POLLING], + t.n[PMD_CYCLES_POLLING]/(double)total_cycles*100, + t.n[PMD_CYCLES_PROCESSING], + t.n[PMD_CYCLES_PROCESSING]/(double)total_cycles*100, + t.n[PMD_CYCLES_OTHER], + t.n[PMD_CYCLES_OTHER]/(double)total_cycles*100); + + if (total_packets == 0) { + ds_put_cstr(&reply, "\tno packets processed yet\n"); + continue; + } + ds_put_format(&reply, + "\tavg cycles per packet: %.02f (%"PRIu64"/%llu)\n", + total_cycles/(double)total_packets, + total_cycles, total_packets); + + ds_put_format(&reply, + "\tavg processing cycles per packet: %.02f (%"PRIu64"/%llu)\n", + t.n[PMD_CYCLES_PROCESSING]/(double)total_packets, + t.n[PMD_CYCLES_PROCESSING], total_packets); + } + + ovs_mutex_unlock(&dp_netdev_mutex); + + unixctl_command_reply(conn, ds_cstr(&reply)); + ds_destroy(&reply); +} + +static int +dpif_netdev_init(void) +{ + unixctl_command_register("dpif-netdev/pmd-stats-show", "[dp]", + 0, 1, dpif_netdev_pmd_info, + (void *)PMD_INFO_SHOW_STATS); + unixctl_command_register("dpif-netdev/pmd-stats-clear", "[dp]", + 0, 1, dpif_netdev_pmd_info, + (void *)PMD_INFO_CLEAR_STATS); + return 0; +} + static int dpif_netdev_enumerate(struct sset *all_dps, const struct dpif_class *dpif_class) @@ -3354,7 +3502,7 @@ dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd, const struct dpif_class dpif_netdev_class = { "netdev", - NULL, /* init */ + dpif_netdev_init, dpif_netdev_enumerate, dpif_netdev_port_open_type, dpif_netdev_open, diff --git a/vswitchd/ovs-vswitchd.8.in b/vswitchd/ovs-vswitchd.8.in index 7f165ea..26b257b 100644 --- a/vswitchd/ovs-vswitchd.8.in +++ b/vswitchd/ovs-vswitchd.8.in @@ -239,6 +239,24 @@ type). .. .so lib/dpctl.man . +.SS "DPIF-NETDEV COMMANDS" +These commands are used to expose internal information (mostly statistics) +about the ``dpif-netdev'' userspace datapath. If there is only one datapath +(as is often the case, unless \fBdpctl/\fR commands are used), the \fIdp\fR +argument can be omitted. +.IP "\fBdpif-netdev/pmd-stats-show\fR [\fIdp\fR]" +Shows performance statistics for each pmd thread of the datapath \fIdp\fR. +The special thread ``main'' sums up the statistics of every other thread. +The sum of ``emc hits'', ``masked hits'' and ``miss'' is the number of +packets received by the datapath. Cycles are counted using the TSC or similar +facilities (when available on the platform). To reset these counters use +\fBdpif-netdev/pmd-stats-clear\fR. The duration of one cycle depends on the +measuring infrastructure. +.IP "\fBdpif-netdev/pmd-stats-clear\fR [\fIdp\fR]" +Resets to zero the per pmd thread performance numbers shown by the +\fBdpif-netdev/pmd-stats-show\fR command. It will NOT reset datapath or +bridge statistics, only the values shown by the above command. +. .so ofproto/ofproto-dpif-unixctl.man .so ofproto/ofproto-unixctl.man .so lib/vlog-unixctl.man -- 2.1.4 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev