On Thu, May 11, 2023 at 1:55 PM Feifei Wang <feifei.wa...@arm.com> wrote: > > This patch is to collect empty poll of 'rte_eth_rx_burst' functions in > dpdk l3fwd application. Empty poll means Rx burst function receives no > pkts in one loop. > > Furthermore, we also add 'nic_xstats_display' API to show NIC counters. > > Usage: > With this patch, no special settings, just run l3fwd, and when you > stoping l3fwd, thread will print the info above. > > Note: > This patch has just a slight impact on performance and can be ignored.
IMO, We should not introduce regression as l3fwd kind of uses as reference application. I think, l3fwd should limit to stats exposed by ethdev(i.e directly from NIC, without performance regression). > > dpdk version:23.03 > > Suggested-by: Lijian Zhang <lijian.zh...@arm.com> > Signed-off-by: Feifei Wang <feifei.wa...@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com> > Reviewed-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com> > --- > examples/l3fwd/l3fwd.h | 68 ++++++++++++++++++++++++++++++++++++++ > examples/l3fwd/l3fwd_lpm.c | 26 +++++++++++++-- > examples/l3fwd/main.c | 22 ++++++++++++ > 3 files changed, 114 insertions(+), 2 deletions(-) > > diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h > index b55855c932..2b3fca62f3 100644 > --- a/examples/l3fwd/l3fwd.h > +++ b/examples/l3fwd/l3fwd.h > @@ -56,6 +56,17 @@ > #define L3FWD_HASH_ENTRIES (1024*1024*1) > #endif > > +struct lcore_stats { > + uint32_t nb_rx_pkts[16]; > + uint32_t num_loop[16]; > + uint32_t none_loop[16]; > + uint32_t no_full_loop[16]; > + float none_loop_per[16]; > + float no_full_loop_per[16]; > +} __rte_cache_aligned; > + > +extern struct lcore_stats stats[RTE_MAX_LCORE]; > + > struct parm_cfg { > const char *rule_ipv4_name; > const char *rule_ipv6_name; > @@ -115,6 +126,63 @@ extern struct acl_algorithms acl_alg[]; > > extern uint32_t max_pkt_len; > > +static inline void > +nic_xstats_display(uint32_t port_id) > +{ > + struct rte_eth_xstat *xstats; > + int cnt_xstats, idx_xstat; > + struct rte_eth_xstat_name *xstats_names; > + > + printf("###### NIC extended statistics for port %-2d\n", port_id); > + if (!rte_eth_dev_is_valid_port(port_id)) { > + fprintf(stderr, "Error: Invalid port number %i\n", port_id); > + return; > + } > + > + /* Get count */ > + cnt_xstats = rte_eth_xstats_get_names(port_id, NULL, 0); > + if (cnt_xstats < 0) { > + fprintf(stderr, "Error: Cannot get count of xstats\n"); > + return; > + } > + > + /* Get id-name lookup table */ > + xstats_names = malloc(sizeof(struct rte_eth_xstat_name) * > cnt_xstats); > + if (xstats_names == NULL) { > + fprintf(stderr, "Cannot allocate memory for xstats > lookup\n"); > + return; > + } > + if (cnt_xstats != rte_eth_xstats_get_names( > + port_id, xstats_names, cnt_xstats)) { > + fprintf(stderr, "Error: Cannot get xstats lookup\n"); > + free(xstats_names); > + return; > + } > + > + /* Get stats themselves */ > + xstats = malloc(sizeof(struct rte_eth_xstat) * cnt_xstats); > + if (xstats == NULL) { > + fprintf(stderr, "Cannot allocate memory for xstats\n"); > + free(xstats_names); > + return; > + } > + if (cnt_xstats != rte_eth_xstats_get(port_id, xstats, cnt_xstats)) { > + fprintf(stderr, "Error: Unable to get xstats\n"); > + free(xstats_names); > + free(xstats); > + return; > + } > + > + /* Display xstats */ > + for (idx_xstat = 0; idx_xstat < cnt_xstats; idx_xstat++) { > + printf("%s: %"PRIu64"\n", > + xstats_names[idx_xstat].name, > + xstats[idx_xstat].value); > + } > + free(xstats_names); > + free(xstats); > +} > + > /* Send burst of packets on an output interface */ > static inline int > send_burst(struct lcore_conf *qconf, uint16_t n, uint16_t port) > diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c > index 4ac1925c84..9e27e954b9 100644 > --- a/examples/l3fwd/l3fwd_lpm.c > +++ b/examples/l3fwd/l3fwd_lpm.c > @@ -41,6 +41,8 @@ > static struct rte_lpm *ipv4_l3fwd_lpm_lookup_struct[NB_SOCKETS]; > static struct rte_lpm6 *ipv6_l3fwd_lpm_lookup_struct[NB_SOCKETS]; > > +extern struct lcore_stats stats[RTE_MAX_LCORE]; > + > /* Performing LPM-based lookups. 8< */ > static inline uint16_t > lpm_get_ipv4_dst_port(const struct rte_ipv4_hdr *ipv4_hdr, > @@ -153,6 +155,7 @@ lpm_main_loop(__rte_unused void *dummy) > struct lcore_conf *qconf; > const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / > US_PER_S * BURST_TX_DRAIN_US; > + bool start_count = 0; > > lcore_id = rte_lcore_id(); > qconf = &lcore_conf[lcore_id]; > @@ -207,8 +210,22 @@ lpm_main_loop(__rte_unused void *dummy) > queueid = qconf->rx_queue_list[i].queue_id; > nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, > MAX_PKT_BURST); > - if (nb_rx == 0) > - continue; > + if (start_count == 0) { > + if (nb_rx != 0) > + start_count = 1; > + } > + > + if (start_count == 1) { > + stats[lcore_id].nb_rx_pkts[i] += nb_rx; > + stats[lcore_id].num_loop[i]++; > + if (nb_rx < MAX_PKT_BURST && nb_rx > 0) > + stats[lcore_id].no_full_loop[i]++; > + > + if (nb_rx == 0) { > + stats[lcore_id].none_loop[i]++; > + continue; > + } > + } > > #if defined RTE_ARCH_X86 || defined __ARM_NEON \ > || defined RTE_ARCH_PPC_64 > @@ -223,6 +240,11 @@ lpm_main_loop(__rte_unused void *dummy) > cur_tsc = rte_rdtsc(); > } > > + for (i = 0; i < n_rx_q; ++i) { > + stats[lcore_id].none_loop_per[i] = > (float)stats[lcore_id].none_loop[i]/stats[lcore_id].num_loop[i]*100; > + stats[lcore_id].no_full_loop_per[i] = > (float)stats[lcore_id].no_full_loop[i]/stats[lcore_id].num_loop[i]*100; > + } > + > return 0; > } > > diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c > index a4f061537e..4727215eae 100644 > --- a/examples/l3fwd/main.c > +++ b/examples/l3fwd/main.c > @@ -53,6 +53,8 @@ > > #define MAX_LCORE_PARAMS 1024 > > +struct lcore_stats stats[RTE_MAX_LCORE]; > + > uint16_t nb_rxd = RX_DESC_DEFAULT; > uint16_t nb_txd = TX_DESC_DEFAULT; > > @@ -1592,6 +1594,26 @@ main(int argc, char **argv) > } else { > rte_eal_mp_wait_lcore(); > > + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { > + if (rte_lcore_is_enabled(lcore_id) == 0) > + continue; > + qconf = &lcore_conf[lcore_id]; > + for (queue = 0; queue < qconf->n_rx_queue; ++queue) { > + printf("\nlcore id:%d\n", lcore_id); > + printf("queue_id:%d\n",queue); > + printf("Rx pkt %d\n", > stats[lcore_id].nb_rx_pkts[queue]); > + printf("loop number: %d, 0 pkts loop:%d, <32 > pkts loop:%d\n", > + stats[lcore_id].num_loop[queue], > stats[lcore_id].none_loop[queue], stats[lcore_id].no_full_loop[queue]); > + printf("0 pkts loop percentage:%.2f%%, <32 > pkts loop percentage:%.2f%%\n", > + stats[lcore_id].none_loop_per[queue], > stats[lcore_id].no_full_loop_per[queue]); > + > printf("------------------------------------\n\n"); > + > + } > + } > + > + nic_xstats_display(0); > + nic_xstats_display(1); > + > RTE_ETH_FOREACH_DEV(portid) { > if ((enabled_port_mask & (1 << portid)) == 0) > continue; > -- > 2.25.1 >