<snip> > > > > > > > > On Thu, May 11, 2023 at 1:55 PM Feifei Wang <feifei.wa...@arm.com> > > > wrote: > > > > > > > > This patch is to collect empty poll of 'rte_eth_rx_burst' > > > > functions in dpdk l3fwd application. Empty poll means Rx burst > > > > function receives no pkts in one loop. > > > > > > > > Furthermore, we also add 'nic_xstats_display' API to show NIC counters. > > > > > > > > Usage: > > > > With this patch, no special settings, just run l3fwd, and when you > > > > stoping l3fwd, thread will print the info above. > > > > > > > > Note: > > > > This patch has just a slight impact on performance and can be ignored. > > How much is the regression? > > > > > > > > IMO, We should not introduce regression as l3fwd kind of uses as > > > reference application. > > > I think, l3fwd should limit to stats exposed by ethdev(i.e directly > > > from NIC, without performance regression). > > Agree L3fwd is the reference app. Unfortunately, it is not in a state to > > debug > any problems. May be many are just believing the numbers without > understanding that there are problems. > > Can we place these stats under a run time flag and reduce the impact > further? > > I think, example applications, we can have compile time option for new > feature addtions in fastpath or add new forwarding mode in testpmd. New forwarding (L3fwd in this case) mode for testpmd was rejected overwhelmingly. So, that's not an option.
I thought compile time options are discouraged as well. But, I am fine with the compile time approach to get some debugging capabilities in this application. May be we could understand the performance difference with run time flag? > > > > > > > > > > > > > > > > > > > > dpdk version:23.03 > > > > > > > > Suggested-by: Lijian Zhang <lijian.zh...@arm.com> > > > > Signed-off-by: Feifei Wang <feifei.wa...@arm.com> > > > > Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com> > > > > Reviewed-by: Honnappa Nagarahalli > <honnappa.nagaraha...@arm.com> > > > > --- > > > > examples/l3fwd/l3fwd.h | 68 > > > ++++++++++++++++++++++++++++++++++++++ > > > > examples/l3fwd/l3fwd_lpm.c | 26 +++++++++++++-- > > > > examples/l3fwd/main.c | 22 ++++++++++++ > > > > 3 files changed, 114 insertions(+), 2 deletions(-) > > > > > > > > diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h index > > > > b55855c932..2b3fca62f3 100644 > > > > --- a/examples/l3fwd/l3fwd.h > > > > +++ b/examples/l3fwd/l3fwd.h > > > > @@ -56,6 +56,17 @@ > > > > #define L3FWD_HASH_ENTRIES (1024*1024*1) > > > > #endif > > > > > > > > +struct lcore_stats { > > > > + uint32_t nb_rx_pkts[16]; > > > > + uint32_t num_loop[16]; > > > > + uint32_t none_loop[16]; > > > > + uint32_t no_full_loop[16]; > > > > + float none_loop_per[16]; > > > > + float no_full_loop_per[16]; > > > > +} __rte_cache_aligned; > > > > + > > > > +extern struct lcore_stats stats[RTE_MAX_LCORE]; > > > > + > > > > struct parm_cfg { > > > > const char *rule_ipv4_name; > > > > const char *rule_ipv6_name; @@ -115,6 +126,63 @@ extern > > > > struct acl_algorithms acl_alg[]; > > > > > > > > extern uint32_t max_pkt_len; > > > > > > > > +static inline void > > > > +nic_xstats_display(uint32_t port_id) { > > > > + struct rte_eth_xstat *xstats; > > > > + int cnt_xstats, idx_xstat; > > > > + struct rte_eth_xstat_name *xstats_names; > > > > + > > > > + printf("###### NIC extended statistics for port %-2d\n", > > > > port_id); > > > > + if (!rte_eth_dev_is_valid_port(port_id)) { > > > > + fprintf(stderr, "Error: Invalid port number %i\n", > > > > port_id); > > > > + return; > > > > + } > > > > + > > > > + /* Get count */ > > > > + cnt_xstats = rte_eth_xstats_get_names(port_id, NULL, 0); > > > > + if (cnt_xstats < 0) { > > > > + fprintf(stderr, "Error: Cannot get count of xstats\n"); > > > > + return; > > > > + } > > > > + > > > > + /* Get id-name lookup table */ > > > > + xstats_names = malloc(sizeof(struct rte_eth_xstat_name) * > cnt_xstats); > > > > + if (xstats_names == NULL) { > > > > + fprintf(stderr, "Cannot allocate memory for xstats > > > > lookup\n"); > > > > + return; > > > > + } > > > > + if (cnt_xstats != rte_eth_xstats_get_names( > > > > + port_id, xstats_names, cnt_xstats)) { > > > > + fprintf(stderr, "Error: Cannot get xstats lookup\n"); > > > > + free(xstats_names); > > > > + return; > > > > + } > > > > + > > > > + /* Get stats themselves */ > > > > + xstats = malloc(sizeof(struct rte_eth_xstat) * cnt_xstats); > > > > + if (xstats == NULL) { > > > > + fprintf(stderr, "Cannot allocate memory for xstats\n"); > > > > + free(xstats_names); > > > > + return; > > > > + } > > > > + if (cnt_xstats != rte_eth_xstats_get(port_id, xstats, > > > > cnt_xstats)) { > > > > + fprintf(stderr, "Error: Unable to get xstats\n"); > > > > + free(xstats_names); > > > > + free(xstats); > > > > + return; > > > > + } > > > > + > > > > + /* Display xstats */ > > > > + for (idx_xstat = 0; idx_xstat < cnt_xstats; idx_xstat++) { > > > > + printf("%s: %"PRIu64"\n", > > > > + xstats_names[idx_xstat].name, > > > > + xstats[idx_xstat].value); > > > > + } > > > > + free(xstats_names); > > > > + free(xstats); > > > > +} > > > > + > > > > /* Send burst of packets on an output interface */ static inline > > > > int send_burst(struct lcore_conf *qconf, uint16_t n, uint16_t > > > > port) diff --git a/examples/l3fwd/l3fwd_lpm.c > > > > b/examples/l3fwd/l3fwd_lpm.c index > > > > 4ac1925c84..9e27e954b9 100644 > > > > --- a/examples/l3fwd/l3fwd_lpm.c > > > > +++ b/examples/l3fwd/l3fwd_lpm.c > > > > @@ -41,6 +41,8 @@ > > > > static struct rte_lpm *ipv4_l3fwd_lpm_lookup_struct[NB_SOCKETS]; > > > > static struct rte_lpm6 *ipv6_l3fwd_lpm_lookup_struct[NB_SOCKETS]; > > > > > > > > +extern struct lcore_stats stats[RTE_MAX_LCORE]; > > > > + > > > > /* Performing LPM-based lookups. 8< */ static inline uint16_t > > > > lpm_get_ipv4_dst_port(const struct rte_ipv4_hdr *ipv4_hdr, @@ > > > > -153,6 > > > > +155,7 @@ lpm_main_loop(__rte_unused void *dummy) > > > > struct lcore_conf *qconf; > > > > const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / > > > > US_PER_S * BURST_TX_DRAIN_US; > > > > + bool start_count = 0; > > > > > > > > lcore_id = rte_lcore_id(); > > > > qconf = &lcore_conf[lcore_id]; @@ -207,8 +210,22 @@ > > > > lpm_main_loop(__rte_unused void *dummy) > > > > queueid = qconf->rx_queue_list[i].queue_id; > > > > nb_rx = rte_eth_rx_burst(portid, queueid, > > > > pkts_burst, > > > > MAX_PKT_BURST); > > > > - if (nb_rx == 0) > > > > - continue; > > > > + if (start_count == 0) { > > > > + if (nb_rx != 0) > > > > + start_count = 1; > > > > + } > > > > + > > > > + if (start_count == 1) { > > > > + stats[lcore_id].nb_rx_pkts[i] += nb_rx; > > > > + stats[lcore_id].num_loop[i]++; > > > > + if (nb_rx < MAX_PKT_BURST && nb_rx > > > > + > 0) > > > > + > > > > + stats[lcore_id].no_full_loop[i]++; > > > > + > > > > + if (nb_rx == 0) { > > > > + stats[lcore_id].none_loop[i]++; > > > > + continue; > > > > + } > > > > + } > > > > > > > > #if defined RTE_ARCH_X86 || defined __ARM_NEON \ > > > > || defined RTE_ARCH_PPC_64 @@ -223,6 > > > > +240,11 @@ lpm_main_loop(__rte_unused void *dummy) > > > > cur_tsc = rte_rdtsc(); > > > > } > > > > > > > > + for (i = 0; i < n_rx_q; ++i) { > > > > + stats[lcore_id].none_loop_per[i] = > > > (float)stats[lcore_id].none_loop[i]/stats[lcore_id].num_loop[i]*100; > > > > + stats[lcore_id].no_full_loop_per[i] = > > > (float)stats[lcore_id].no_full_loop[i]/stats[lcore_id].num_loop[i]*1 > > > 00; > > > > + } > > > > + > > > > return 0; > > > > } > > > > > > > > diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c index > > > > a4f061537e..4727215eae 100644 > > > > --- a/examples/l3fwd/main.c > > > > +++ b/examples/l3fwd/main.c > > > > @@ -53,6 +53,8 @@ > > > > > > > > #define MAX_LCORE_PARAMS 1024 > > > > > > > > +struct lcore_stats stats[RTE_MAX_LCORE]; > > > > + > > > > uint16_t nb_rxd = RX_DESC_DEFAULT; uint16_t nb_txd = > > > > TX_DESC_DEFAULT; > > > > > > > > @@ -1592,6 +1594,26 @@ main(int argc, char **argv) > > > > } else { > > > > rte_eal_mp_wait_lcore(); > > > > > > > > + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; > > > > lcore_id++) { > > > > + if (rte_lcore_is_enabled(lcore_id) == 0) > > > > + continue; > > > > + qconf = &lcore_conf[lcore_id]; > > > > + for (queue = 0; queue < qconf->n_rx_queue; > > > > ++queue) { > > > > + printf("\nlcore id:%d\n", lcore_id); > > > > + printf("queue_id:%d\n",queue); > > > > + printf("Rx pkt %d\n", > stats[lcore_id].nb_rx_pkts[queue]); > > > > + printf("loop number: %d, 0 pkts > > > > + loop:%d, <32 pkts > > > loop:%d\n", > > > > + > > > > + stats[lcore_id].num_loop[queue], > > > stats[lcore_id].none_loop[queue], > > > stats[lcore_id].no_full_loop[queue]); > > > > + printf("0 pkts loop > > > > + percentage:%.2f%%, <32 pkts loop > > > percentage:%.2f%%\n", > > > > + > > > > + stats[lcore_id].none_loop_per[queue], > > > stats[lcore_id].no_full_loop_per[queue]); > > > > + > > > > + printf("------------------------------------\n\n"); > > > > + > > > > + } > > > > + } > > > > + > > > > + nic_xstats_display(0); > > > > + nic_xstats_display(1); > > > > + > > > > RTE_ETH_FOREACH_DEV(portid) { > > > > if ((enabled_port_mask & (1 << portid)) == 0) > > > > continue; > > > > -- > > > > 2.25.1 > > > >