<snip>

> 
> On Thu, May 11, 2023 at 1:55 PM Feifei Wang <feifei.wa...@arm.com>
> wrote:
> >
> > This patch is to collect empty poll of 'rte_eth_rx_burst' functions in
> > dpdk l3fwd application. Empty poll means Rx burst function receives no
> > pkts in one loop.
> >
> > Furthermore, we also add 'nic_xstats_display' API to show NIC counters.
> >
> > Usage:
> > With this patch, no special settings, just run l3fwd, and when you
> > stoping l3fwd, thread will print the info above.
> >
> > Note:
> > This patch has just a slight impact on performance and can be ignored.
How much is the regression?

> 
> IMO, We should not introduce regression as l3fwd kind of uses as reference
> application.
> I think, l3fwd should limit to stats exposed by ethdev(i.e directly from NIC,
> without performance regression).
Agree L3fwd is the reference app. Unfortunately, it is not in a state to debug 
any problems. May be many are just believing the numbers without understanding 
that there are problems.
Can we place these stats under a run time flag and reduce the impact further?

> 
> 
> 
> >
> > dpdk version:23.03
> >
> > Suggested-by: Lijian Zhang <lijian.zh...@arm.com>
> > Signed-off-by: Feifei Wang <feifei.wa...@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com>
> > Reviewed-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com>
> > ---
> >  examples/l3fwd/l3fwd.h     | 68
> ++++++++++++++++++++++++++++++++++++++
> >  examples/l3fwd/l3fwd_lpm.c | 26 +++++++++++++--
> >  examples/l3fwd/main.c      | 22 ++++++++++++
> >  3 files changed, 114 insertions(+), 2 deletions(-)
> >
> > diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h index
> > b55855c932..2b3fca62f3 100644
> > --- a/examples/l3fwd/l3fwd.h
> > +++ b/examples/l3fwd/l3fwd.h
> > @@ -56,6 +56,17 @@
> >  #define L3FWD_HASH_ENTRIES             (1024*1024*1)
> >  #endif
> >
> > +struct lcore_stats {
> > +    uint32_t nb_rx_pkts[16];
> > +    uint32_t num_loop[16];
> > +    uint32_t none_loop[16];
> > +    uint32_t no_full_loop[16];
> > +    float  none_loop_per[16];
> > +    float no_full_loop_per[16];
> > +} __rte_cache_aligned;
> > +
> > +extern struct lcore_stats stats[RTE_MAX_LCORE];
> > +
> >  struct parm_cfg {
> >         const char *rule_ipv4_name;
> >         const char *rule_ipv6_name;
> > @@ -115,6 +126,63 @@ extern struct acl_algorithms acl_alg[];
> >
> >  extern uint32_t max_pkt_len;
> >
> > +static inline void
> > +nic_xstats_display(uint32_t port_id)
> > +{
> > +        struct rte_eth_xstat *xstats;
> > +        int cnt_xstats, idx_xstat;
> > +        struct rte_eth_xstat_name *xstats_names;
> > +
> > +        printf("###### NIC extended statistics for port %-2d\n", port_id);
> > +        if (!rte_eth_dev_is_valid_port(port_id)) {
> > +                fprintf(stderr, "Error: Invalid port number %i\n", 
> > port_id);
> > +                return;
> > +        }
> > +
> > +        /* Get count */
> > +        cnt_xstats = rte_eth_xstats_get_names(port_id, NULL, 0);
> > +        if (cnt_xstats  < 0) {
> > +                fprintf(stderr, "Error: Cannot get count of xstats\n");
> > +                return;
> > +        }
> > +
> > +        /* Get id-name lookup table */
> > +        xstats_names = malloc(sizeof(struct rte_eth_xstat_name) * 
> > cnt_xstats);
> > +        if (xstats_names == NULL) {
> > +                fprintf(stderr, "Cannot allocate memory for xstats 
> > lookup\n");
> > +                return;
> > +        }
> > +        if (cnt_xstats != rte_eth_xstats_get_names(
> > +                        port_id, xstats_names, cnt_xstats)) {
> > +                fprintf(stderr, "Error: Cannot get xstats lookup\n");
> > +                free(xstats_names);
> > +                return;
> > +        }
> > +
> > +        /* Get stats themselves */
> > +        xstats = malloc(sizeof(struct rte_eth_xstat) * cnt_xstats);
> > +        if (xstats == NULL) {
> > +                fprintf(stderr, "Cannot allocate memory for xstats\n");
> > +                free(xstats_names);
> > +                return;
> > +        }
> > +        if (cnt_xstats != rte_eth_xstats_get(port_id, xstats, cnt_xstats)) 
> > {
> > +                fprintf(stderr, "Error: Unable to get xstats\n");
> > +                free(xstats_names);
> > +                free(xstats);
> > +                return;
> > +        }
> > +
> > +        /* Display xstats */
> > +        for (idx_xstat = 0; idx_xstat < cnt_xstats; idx_xstat++) {
> > +                printf("%s: %"PRIu64"\n",
> > +                        xstats_names[idx_xstat].name,
> > +                        xstats[idx_xstat].value);
> > +        }
> > +        free(xstats_names);
> > +        free(xstats);
> > +}
> > +
> >  /* Send burst of packets on an output interface */  static inline int
> > send_burst(struct lcore_conf *qconf, uint16_t n, uint16_t port) diff
> > --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c index
> > 4ac1925c84..9e27e954b9 100644
> > --- a/examples/l3fwd/l3fwd_lpm.c
> > +++ b/examples/l3fwd/l3fwd_lpm.c
> > @@ -41,6 +41,8 @@
> >  static struct rte_lpm *ipv4_l3fwd_lpm_lookup_struct[NB_SOCKETS];
> >  static struct rte_lpm6 *ipv6_l3fwd_lpm_lookup_struct[NB_SOCKETS];
> >
> > +extern struct lcore_stats stats[RTE_MAX_LCORE];
> > +
> >  /* Performing LPM-based lookups. 8< */  static inline uint16_t
> > lpm_get_ipv4_dst_port(const struct rte_ipv4_hdr *ipv4_hdr, @@ -153,6
> > +155,7 @@ lpm_main_loop(__rte_unused void *dummy)
> >         struct lcore_conf *qconf;
> >         const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
> >                 US_PER_S * BURST_TX_DRAIN_US;
> > +       bool start_count = 0;
> >
> >         lcore_id = rte_lcore_id();
> >         qconf = &lcore_conf[lcore_id]; @@ -207,8 +210,22 @@
> > lpm_main_loop(__rte_unused void *dummy)
> >                         queueid = qconf->rx_queue_list[i].queue_id;
> >                         nb_rx = rte_eth_rx_burst(portid, queueid, 
> > pkts_burst,
> >                                 MAX_PKT_BURST);
> > -                       if (nb_rx == 0)
> > -                               continue;
> > +                       if (start_count == 0) {
> > +                               if (nb_rx != 0)
> > +                                       start_count = 1;
> > +                       }
> > +
> > +                       if (start_count == 1) {
> > +                               stats[lcore_id].nb_rx_pkts[i] += nb_rx;
> > +                               stats[lcore_id].num_loop[i]++;
> > +                               if (nb_rx < MAX_PKT_BURST && nb_rx > 0)
> > +
> > + stats[lcore_id].no_full_loop[i]++;
> > +
> > +                               if (nb_rx == 0) {
> > +                                       stats[lcore_id].none_loop[i]++;
> > +                                       continue;
> > +                               }
> > +                       }
> >
> >  #if defined RTE_ARCH_X86 || defined __ARM_NEON \
> >                          || defined RTE_ARCH_PPC_64 @@ -223,6 +240,11
> > @@ lpm_main_loop(__rte_unused void *dummy)
> >                 cur_tsc = rte_rdtsc();
> >         }
> >
> > +       for (i = 0; i < n_rx_q; ++i) {
> > +               stats[lcore_id].none_loop_per[i] =
> (float)stats[lcore_id].none_loop[i]/stats[lcore_id].num_loop[i]*100;
> > +               stats[lcore_id].no_full_loop_per[i] =
> (float)stats[lcore_id].no_full_loop[i]/stats[lcore_id].num_loop[i]*100;
> > +       }
> > +
> >         return 0;
> >  }
> >
> > diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c index
> > a4f061537e..4727215eae 100644
> > --- a/examples/l3fwd/main.c
> > +++ b/examples/l3fwd/main.c
> > @@ -53,6 +53,8 @@
> >
> >  #define MAX_LCORE_PARAMS 1024
> >
> > +struct lcore_stats stats[RTE_MAX_LCORE];
> > +
> >  uint16_t nb_rxd = RX_DESC_DEFAULT;
> >  uint16_t nb_txd = TX_DESC_DEFAULT;
> >
> > @@ -1592,6 +1594,26 @@ main(int argc, char **argv)
> >         } else {
> >                 rte_eal_mp_wait_lcore();
> >
> > +               for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> > +                       if (rte_lcore_is_enabled(lcore_id) == 0)
> > +                               continue;
> > +                       qconf = &lcore_conf[lcore_id];
> > +                       for (queue = 0; queue < qconf->n_rx_queue; ++queue) 
> > {
> > +                               printf("\nlcore id:%d\n", lcore_id);
> > +                               printf("queue_id:%d\n",queue);
> > +                               printf("Rx pkt %d\n", 
> > stats[lcore_id].nb_rx_pkts[queue]);
> > +                               printf("loop number: %d, 0 pkts loop:%d, 
> > <32 pkts
> loop:%d\n",
> > +                                       stats[lcore_id].num_loop[queue],
> stats[lcore_id].none_loop[queue], stats[lcore_id].no_full_loop[queue]);
> > +                               printf("0 pkts loop percentage:%.2f%%, <32 
> > pkts loop
> percentage:%.2f%%\n",
> > +                                       
> > stats[lcore_id].none_loop_per[queue],
> stats[lcore_id].no_full_loop_per[queue]);
> > +
> > + printf("------------------------------------\n\n");
> > +
> > +                       }
> > +               }
> > +
> > +               nic_xstats_display(0);
> > +               nic_xstats_display(1);
> > +
> >                 RTE_ETH_FOREACH_DEV(portid) {
> >                         if ((enabled_port_mask & (1 << portid)) == 0)
> >                                 continue;
> > --
> > 2.25.1
> >

Reply via email to