Salut Robin, On Thu, Feb 2, 2023 at 2:44 PM Robin Jarry <rja...@redhat.com> wrote: > > Reuse the --record-core-cycles option to account for busy cycles. One > turn of packet_fwd_t is considered "busy" if there was at least one > received or transmitted packet. > > Add a new busy_cycles field in struct fwd_stream. Update get_end_cycles > to accept an additional argument for the number of processed packets. > Update fwd_stream.busy_cycles when the number of packets is greater than > zero. > > When --record-core-cycles is specified, register a callback with > rte_lcore_register_usage_cb(). In the callback, use the new lcore_id > field in struct fwd_lcore to identify the correct index in fwd_lcores > and return the sum of busy/total cycles of all fwd_streams. > > This makes the cycles counters available in rte_lcore_dump() and the > lcore telemetry API: > > testpmd> dump_lcores > lcore 3, socket 0, role RTE, cpuset 3 > lcore 4, socket 0, role RTE, cpuset 4, busy cycles 1228584096/9239923140 > lcore 5, socket 0, role RTE, cpuset 5, busy cycles 1255661768/9218141538
I have been playing a bit with this series with two lcores, each one polling a net/null port. At first it looked good, but then I started to have one idle lcore, by asking net/null not to receive anything. $ build-clang/app/dpdk-testpmd -c 7 --no-huge -m 40 -a 0:0.0 --vdev net_null1,no-rx=1 --vdev net_null2 -- --no-mlockall --total-num-mbufs=2048 -ia --record-core-cycles --nb-cores=2 One thing that struck me is that an idle lcore was always showing less "total_cycles" than a busy one. The more time testpmd was running, the bigger the divergence between lcores would be. Re-reading the API, it is unclear to me (which is the reason for my comments on patch 2). Let's first sort out my patch 2 comments and we may revisit this patch 4 implementation afterwards (as I think we are not accounting some mainloop cycles with current implementation). For now, I have some comments on the existing data structures, see below. [snip] > diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c > index e366f81a0f46..105f75ad5f35 100644 > --- a/app/test-pmd/testpmd.c > +++ b/app/test-pmd/testpmd.c > @@ -2053,7 +2053,7 @@ fwd_stats_display(void) > fs->rx_bad_outer_ip_csum; > > if (record_core_cycles) > - fwd_cycles += fs->core_cycles; > + fwd_cycles += fs->busy_cycles; > } > for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) { > pt_id = fwd_ports_ids[i]; > @@ -2184,6 +2184,7 @@ fwd_stats_reset(void) > > memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats)); > memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats)); > + fs->busy_cycles = 0; > fs->core_cycles = 0; > } > } > @@ -2260,6 +2261,7 @@ run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t > pkt_fwd) > tics_datum = rte_rdtsc(); > tics_per_1sec = rte_get_timer_hz(); > #endif > + fc->lcore_id = rte_lcore_id(); A fwd_lcore object is bound to a single lcore, so this lcore_id is unneeded. > fsm = &fwd_streams[fc->stream_idx]; > nb_fs = fc->stream_nb; > do { > @@ -2288,6 +2290,38 @@ run_pkt_fwd_on_lcore(struct fwd_lcore *fc, > packet_fwd_t pkt_fwd) > } while (! fc->stopped); > } > > +static int > +lcore_usage_callback(unsigned int lcore_id, struct rte_lcore_usage *usage) > +{ > + struct fwd_stream **fsm; > + struct fwd_lcore *fc; > + streamid_t nb_fs; > + streamid_t sm_id; > + int c; > + > + for (c = 0; c < nb_lcores; c++) { > + fc = fwd_lcores[c]; > + if (fc->lcore_id != lcore_id) > + continue; You can find which fwd_lcore is mapped to a lcore using existing structures. This requires updating some helper, something like: diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index 7d24d25970..e5297ee7fb 100644 --- a/app/test-pmd/testpmd.h +++ b/app/test-pmd/testpmd.h @@ -785,25 +785,31 @@ is_proc_primary(void) return rte_eal_process_type() == RTE_PROC_PRIMARY; } -static inline unsigned int -lcore_num(void) +void +parse_fwd_portlist(const char *port); + +static inline struct fwd_lcore * +lcore_to_fwd_lcore(uint16_t lcore_id) { unsigned int i; - for (i = 0; i < RTE_MAX_LCORE; ++i) - if (fwd_lcores_cpuids[i] == rte_lcore_id()) - return i; + for (i = 0; i < cur_fwd_config.nb_fwd_lcores; ++i) { + if (fwd_lcores_cpuids[i] == lcore_id) + return fwd_lcores[i]; + } - rte_panic("lcore_id of current thread not found in fwd_lcores_cpuids\n"); + return NULL; } -void -parse_fwd_portlist(const char *port); - static inline struct fwd_lcore * current_fwd_lcore(void) { - return fwd_lcores[lcore_num()]; + struct fwd_lcore *fc = lcore_to_fwd_lcore(rte_lcore_id()); + + if (fc == NULL) + rte_panic("lcore_id of current thread not found in fwd_lcores_cpuids\n"); + + return fc; } /* Mbuf Pools */ And then by using this new helper, lcore_usage_callback becomes simpler: +static int +lcore_usage_callback(unsigned int lcore_id, struct rte_lcore_usage *usage) +{ + struct fwd_stream **fsm; + struct fwd_lcore *fc; + streamid_t nb_fs; + streamid_t sm_id; + + fc = lcore_to_fwd_lcore(lcore_id); + if (fc == NULL) + return -1; + + fsm = &fwd_streams[fc->stream_idx]; + nb_fs = fc->stream_nb; + usage->busy_cycles = 0; + usage->total_cycles = 0; + + for (sm_id = 0; sm_id < nb_fs; sm_id++) { + if (fsm[sm_id]->disabled) + continue; + + usage->busy_cycles += fsm[sm_id]->busy_cycles; + usage->total_cycles += fsm[sm_id]->core_cycles; + } + + return 0; +} + -- David Marchand