Salut Robin,

On Thu, Feb 2, 2023 at 2:44 PM Robin Jarry <rja...@redhat.com> wrote:
>
> Reuse the --record-core-cycles option to account for busy cycles. One
> turn of packet_fwd_t is considered "busy" if there was at least one
> received or transmitted packet.
>
> Add a new busy_cycles field in struct fwd_stream. Update get_end_cycles
> to accept an additional argument for the number of processed packets.
> Update fwd_stream.busy_cycles when the number of packets is greater than
> zero.
>
> When --record-core-cycles is specified, register a callback with
> rte_lcore_register_usage_cb(). In the callback, use the new lcore_id
> field in struct fwd_lcore to identify the correct index in fwd_lcores
> and return the sum of busy/total cycles of all fwd_streams.
>
> This makes the cycles counters available in rte_lcore_dump() and the
> lcore telemetry API:
>
>  testpmd> dump_lcores
>  lcore 3, socket 0, role RTE, cpuset 3
>  lcore 4, socket 0, role RTE, cpuset 4, busy cycles 1228584096/9239923140
>  lcore 5, socket 0, role RTE, cpuset 5, busy cycles 1255661768/9218141538

I have been playing a bit with this series with two lcores, each one
polling a net/null port.
At first it looked good, but then I started to have one idle lcore, by
asking net/null not to receive anything.

$ build-clang/app/dpdk-testpmd -c 7 --no-huge -m 40 -a 0:0.0 --vdev
net_null1,no-rx=1 --vdev net_null2 -- --no-mlockall
--total-num-mbufs=2048 -ia --record-core-cycles --nb-cores=2

One thing that struck me is that an idle lcore was always showing less
"total_cycles" than a busy one.
The more time testpmd was running, the bigger the divergence between
lcores would be.

Re-reading the API, it is unclear to me (which is the reason for my
comments on patch 2).
Let's first sort out my patch 2 comments and we may revisit this patch
4 implementation afterwards (as I think we are not accounting some
mainloop cycles with current implementation).


For now, I have some comments on the existing data structures, see below.

[snip]

> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
> index e366f81a0f46..105f75ad5f35 100644
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c
> @@ -2053,7 +2053,7 @@ fwd_stats_display(void)
>                                 fs->rx_bad_outer_ip_csum;
>
>                 if (record_core_cycles)
> -                       fwd_cycles += fs->core_cycles;
> +                       fwd_cycles += fs->busy_cycles;
>         }
>         for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
>                 pt_id = fwd_ports_ids[i];
> @@ -2184,6 +2184,7 @@ fwd_stats_reset(void)
>
>                 memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats));
>                 memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats));
> +               fs->busy_cycles = 0;
>                 fs->core_cycles = 0;
>         }
>  }
> @@ -2260,6 +2261,7 @@ run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t 
> pkt_fwd)
>         tics_datum = rte_rdtsc();
>         tics_per_1sec = rte_get_timer_hz();
>  #endif
> +       fc->lcore_id = rte_lcore_id();

A fwd_lcore object is bound to a single lcore, so this lcore_id is unneeded.


>         fsm = &fwd_streams[fc->stream_idx];
>         nb_fs = fc->stream_nb;
>         do {
> @@ -2288,6 +2290,38 @@ run_pkt_fwd_on_lcore(struct fwd_lcore *fc, 
> packet_fwd_t pkt_fwd)
>         } while (! fc->stopped);
>  }
>
> +static int
> +lcore_usage_callback(unsigned int lcore_id, struct rte_lcore_usage *usage)
> +{
> +       struct fwd_stream **fsm;
> +       struct fwd_lcore *fc;
> +       streamid_t nb_fs;
> +       streamid_t sm_id;
> +       int c;
> +
> +       for (c = 0; c < nb_lcores; c++) {
> +               fc = fwd_lcores[c];
> +               if (fc->lcore_id != lcore_id)
> +                       continue;

You can find which fwd_lcore is mapped to a lcore using existing structures.
This requires updating some helper, something like:

diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 7d24d25970..e5297ee7fb 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -785,25 +785,31 @@ is_proc_primary(void)
        return rte_eal_process_type() == RTE_PROC_PRIMARY;
 }

-static inline unsigned int
-lcore_num(void)
+void
+parse_fwd_portlist(const char *port);
+
+static inline struct fwd_lcore *
+lcore_to_fwd_lcore(uint16_t lcore_id)
 {
        unsigned int i;

-       for (i = 0; i < RTE_MAX_LCORE; ++i)
-               if (fwd_lcores_cpuids[i] == rte_lcore_id())
-                       return i;
+       for (i = 0; i < cur_fwd_config.nb_fwd_lcores; ++i) {
+               if (fwd_lcores_cpuids[i] == lcore_id)
+                       return fwd_lcores[i];
+       }

-       rte_panic("lcore_id of current thread not found in
fwd_lcores_cpuids\n");
+       return NULL;
 }

-void
-parse_fwd_portlist(const char *port);
-
 static inline struct fwd_lcore *
 current_fwd_lcore(void)
 {
-       return fwd_lcores[lcore_num()];
+       struct fwd_lcore *fc = lcore_to_fwd_lcore(rte_lcore_id());
+
+       if (fc == NULL)
+               rte_panic("lcore_id of current thread not found in
fwd_lcores_cpuids\n");
+
+       return fc;
 }

 /* Mbuf Pools */


And then by using this new helper, lcore_usage_callback becomes simpler:

+static int
+lcore_usage_callback(unsigned int lcore_id, struct rte_lcore_usage *usage)
+{
+    struct fwd_stream **fsm;
+    struct fwd_lcore *fc;
+    streamid_t nb_fs;
+    streamid_t sm_id;
+
+    fc = lcore_to_fwd_lcore(lcore_id);
+    if (fc == NULL)
+        return -1;
+
+    fsm = &fwd_streams[fc->stream_idx];
+    nb_fs = fc->stream_nb;
+    usage->busy_cycles = 0;
+    usage->total_cycles = 0;
+
+    for (sm_id = 0; sm_id < nb_fs; sm_id++) {
+        if (fsm[sm_id]->disabled)
+            continue;
+
+        usage->busy_cycles += fsm[sm_id]->busy_cycles;
+        usage->total_cycles += fsm[sm_id]->core_cycles;
+    }
+
+    return 0;
+}
+


-- 
David Marchand

Reply via email to