Hi Viacheslav,
> -----Original Message----- > From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Viacheslav Ovsiienko > Sent: Monday, May 27, 2019 6:47 AM > To: dev@dpdk.org > Cc: Yigit, Ferruh <ferruh.yi...@intel.com> > Subject: [dpdk-dev] [RFC] app/testpmd: add profiling for Rx/Tx burst routines > > There is the testpmd configuration option called > RTE_TEST_PMD_RECORD_CORE_CYCLES, if this one is turned on the testpmd > application measures the CPU clocks spent within forwarding loop. This time is > the sum of execution times of rte_eth_rx_burst(), rte_eth_tx_burst(), > rte_delay_us(), > rte_pktmbuf_free() and so on, depending on fwd mode set. > > While debugging and performance optimization of datapath burst routines tt > would be useful to see the pure execution times of these ones. It is proposed > to > add separated profiling > options: > > CONFIG_RTE_TEST_PMD_RECORD_CORE_TX_CYCLES > enables gathering profiling data for transmit datapath, > ticks spent within rte_eth_tx_burst() > > CONFIG_RTE_TEST_PMD_RECORD_CORE_RX_CYCLES > enables gathering profiling data for transmit datapath, > ticks spent within rte_eth_rx_burst() > > Signed-off-by: Viacheslav Ovsiienko <viachesl...@mellanox.com> > --- > app/test-pmd/csumonly.c | 25 ++++++++++++------------- > app/test-pmd/flowgen.c | 25 +++++++++++++------------ > app/test-pmd/icmpecho.c | 26 +++++++++++++------------- > app/test-pmd/iofwd.c | 24 ++++++++++++------------ > app/test-pmd/macfwd.c | 24 +++++++++++++----------- > app/test-pmd/macswap.c | 26 ++++++++++++++------------ > app/test-pmd/rxonly.c | 17 ++++++----------- > app/test-pmd/softnicfwd.c | 24 ++++++++++++------------ > app/test-pmd/testpmd.c | 32 ++++++++++++++++++++++++++++++++ > app/test-pmd/testpmd.h | 40 > ++++++++++++++++++++++++++++++++++++++++ > app/test-pmd/txonly.c | 23 +++++++++++------------ > config/common_base | 2 ++ > 12 files changed, 180 insertions(+), 108 deletions(-) > > diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c index > f4f2a7b..251e179 100644 > --- a/app/test-pmd/csumonly.c > +++ b/app/test-pmd/csumonly.c > @@ -710,19 +710,19 @@ struct simple_gre_hdr { > uint16_t nb_segments = 0; > int ret; > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - uint64_t start_tsc; > - uint64_t end_tsc; > - uint64_t core_cycles; > +#if defined(RTE_TEST_PMD_RECORD_CORE_TX_CYCLES) > + uint64_t start_tx_tsc; Should the RTE_TEST_PMD_RECORD_CORE_CYCLES macro be checked here too? > #endif > - > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - start_tsc = rte_rdtsc(); > +#if defined(RTE_TEST_PMD_RECORD_CORE_CYCLES) || \ > + defined(RTE_TEST_PMD_RECORD_CORE_RX_CYCLES) > + uint64_t start_rx_tsc; > #endif > > /* receive a burst of packet */ > + TEST_PMD_CORE_CYC_RX_START(start_rx_tsc); > nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, > nb_pkt_per_burst); > + TEST_PMD_CORE_CYC_RX_ADD(fs, start_rx_tsc); > if (unlikely(nb_rx == 0)) > return; > #ifdef RTE_TEST_PMD_RECORD_BURST_STATS > @@ -982,8 +982,10 @@ struct simple_gre_hdr { > printf("Preparing packet burst to transmit failed: %s\n", > rte_strerror(rte_errno)); > > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, tx_pkts_burst, > nb_prep); > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > > /* > * Retry if necessary > @@ -992,8 +994,10 @@ struct simple_gre_hdr { > retry = 0; > while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) { > rte_delay_us(burst_tx_delay_time); > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue, > &tx_pkts_burst[nb_tx], nb_rx - nb_tx); > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > } > } > fs->tx_packets += nb_tx; > @@ -1010,12 +1014,7 @@ struct simple_gre_hdr { > rte_pktmbuf_free(tx_pkts_burst[nb_tx]); > } while (++nb_tx < nb_rx); > } > - > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - end_tsc = rte_rdtsc(); > - core_cycles = (end_tsc - start_tsc); > - fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); > -#endif > + TEST_PMD_CORE_CYC_FWD_ADD(fs, start_rx_tsc); > } > > struct fwd_engine csum_fwd_engine = { > diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c index > 3214e3c..b128e68 100644 > --- a/app/test-pmd/flowgen.c > +++ b/app/test-pmd/flowgen.c > @@ -130,20 +130,21 @@ > uint16_t i; > uint32_t retry; > uint64_t tx_offloads; > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - uint64_t start_tsc; > - uint64_t end_tsc; > - uint64_t core_cycles; > -#endif > static int next_flow = 0; > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - start_tsc = rte_rdtsc(); > +#if defined(RTE_TEST_PMD_RECORD_CORE_TX_CYCLES) Should the RTE_TEST_PMD_RECORD_CORE_CYCLES macro be checked here too? > + uint64_t start_tx_tsc; > +#endif > +#if defined(RTE_TEST_PMD_RECORD_CORE_CYCLES) || \ > + defined(RTE_TEST_PMD_RECORD_CORE_RX_CYCLES) > + uint64_t start_rx_tsc; > #endif > > /* Receive a burst of packets and discard them. */ > + TEST_PMD_CORE_CYC_RX_START(start_rx_tsc); > nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, > nb_pkt_per_burst); > + TEST_PMD_CORE_CYC_RX_ADD(fs, start_rx_tsc); > fs->rx_packets += nb_rx; > > for (i = 0; i < nb_rx; i++) > @@ -212,7 +213,9 @@ > next_flow = (next_flow + 1) % cfg_n_flows; > } > > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt); > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > /* > * Retry if necessary > */ > @@ -220,8 +223,10 @@ > retry = 0; > while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) { > rte_delay_us(burst_tx_delay_time); > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue, > &pkts_burst[nb_tx], nb_rx - nb_tx); > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > } > } > fs->tx_packets += nb_tx; > @@ -239,11 +244,7 @@ > rte_pktmbuf_free(pkts_burst[nb_tx]); > } while (++nb_tx < nb_pkt); > } > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - end_tsc = rte_rdtsc(); > - core_cycles = (end_tsc - start_tsc); > - fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); > -#endif > + TEST_PMD_CORE_CYC_FWD_ADD(fs, start_rx_tsc); > } > > struct fwd_engine flow_gen_engine = { > diff --git a/app/test-pmd/icmpecho.c b/app/test-pmd/icmpecho.c index > 55d266d..a539fe8 100644 > --- a/app/test-pmd/icmpecho.c > +++ b/app/test-pmd/icmpecho.c > @@ -293,21 +293,22 @@ > uint32_t cksum; > uint8_t i; > int l2_len; > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - uint64_t start_tsc; > - uint64_t end_tsc; > - uint64_t core_cycles; > -#endif > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - start_tsc = rte_rdtsc(); > +#if defined(RTE_TEST_PMD_RECORD_CORE_TX_CYCLES) > + uint64_t start_tx_tsc; > +#endif > +#if defined(RTE_TEST_PMD_RECORD_CORE_CYCLES) || \ > + defined(RTE_TEST_PMD_RECORD_CORE_RX_CYCLES) > + uint64_t start_rx_tsc; > #endif > > /* > * First, receive a burst of packets. > */ > + TEST_PMD_CORE_CYC_RX_START(start_rx_tsc); > nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, > nb_pkt_per_burst); > + TEST_PMD_CORE_CYC_RX_ADD(fs, start_rx_tsc); > if (unlikely(nb_rx == 0)) > return; > > @@ -487,8 +488,10 @@ > > /* Send back ICMP echo replies, if any. */ > if (nb_replies > 0) { > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, > nb_replies); > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > /* > * Retry if necessary > */ > @@ -497,10 +500,12 @@ > while (nb_tx < nb_replies && > retry++ < burst_tx_retry_num) { > rte_delay_us(burst_tx_delay_time); > + > TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > nb_tx += rte_eth_tx_burst(fs->tx_port, > fs->tx_queue, > &pkts_burst[nb_tx], > nb_replies - nb_tx); > + TEST_PMD_CORE_CYC_TX_ADD(fs, > start_tx_tsc); > } > } > fs->tx_packets += nb_tx; > @@ -514,12 +519,7 @@ > } while (++nb_tx < nb_replies); > } > } > - > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - end_tsc = rte_rdtsc(); > - core_cycles = (end_tsc - start_tsc); > - fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); > -#endif > + TEST_PMD_CORE_CYC_FWD_ADD(fs, start_rx_tsc); > } > > struct fwd_engine icmp_echo_engine = { > diff --git a/app/test-pmd/iofwd.c b/app/test-pmd/iofwd.c index > 9dce76e..dc66a88 100644 > --- a/app/test-pmd/iofwd.c > +++ b/app/test-pmd/iofwd.c > @@ -51,21 +51,21 @@ > uint16_t nb_tx; > uint32_t retry; > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - uint64_t start_tsc; > - uint64_t end_tsc; > - uint64_t core_cycles; > +#if defined(RTE_TEST_PMD_RECORD_CORE_TX_CYCLES) Should the RTE_TEST_PMD_RECORD_CORE_CYCLES macro be checked here too? > + uint64_t start_tx_tsc; > #endif > - > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - start_tsc = rte_rdtsc(); > +#if defined(RTE_TEST_PMD_RECORD_CORE_CYCLES) || \ > + defined(RTE_TEST_PMD_RECORD_CORE_RX_CYCLES) > + uint64_t start_rx_tsc; > #endif > > /* > * Receive a burst of packets and forward them. > */ > + TEST_PMD_CORE_CYC_RX_START(start_rx_tsc); > nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, > pkts_burst, nb_pkt_per_burst); > + TEST_PMD_CORE_CYC_RX_ADD(fs, start_rx_tsc); > if (unlikely(nb_rx == 0)) > return; > fs->rx_packets += nb_rx; > @@ -73,8 +73,10 @@ > #ifdef RTE_TEST_PMD_RECORD_BURST_STATS > fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; > #endif > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, > pkts_burst, nb_rx); > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > /* > * Retry if necessary > */ > @@ -82,8 +84,10 @@ > retry = 0; > while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) { > rte_delay_us(burst_tx_delay_time); > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue, > &pkts_burst[nb_tx], nb_rx - nb_tx); > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > } > } > fs->tx_packets += nb_tx; > @@ -96,11 +100,7 @@ > rte_pktmbuf_free(pkts_burst[nb_tx]); > } while (++nb_tx < nb_rx); > } > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - end_tsc = rte_rdtsc(); > - core_cycles = (end_tsc - start_tsc); > - fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); > -#endif > + TEST_PMD_CORE_CYC_FWD_ADD(fs, start_rx_tsc); > } > > struct fwd_engine io_fwd_engine = { > diff --git a/app/test-pmd/macfwd.c b/app/test-pmd/macfwd.c index > 7cac757..2fd38ea 100644 > --- a/app/test-pmd/macfwd.c > +++ b/app/test-pmd/macfwd.c > @@ -56,21 +56,23 @@ > uint16_t i; > uint64_t ol_flags = 0; > uint64_t tx_offloads; > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - uint64_t start_tsc; > - uint64_t end_tsc; > - uint64_t core_cycles; > + > +#if defined(RTE_TEST_PMD_RECORD_CORE_TX_CYCLES) Should the RTE_TEST_PMD_RECORD_CORE_CYCLES macro be checked here too? > + uint64_t start_tx_tsc; > #endif > +#if defined(RTE_TEST_PMD_RECORD_CORE_CYCLES) || \ > + defined(RTE_TEST_PMD_RECORD_CORE_RX_CYCLES) > + uint64_t start_rx_tsc; > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - start_tsc = rte_rdtsc(); > #endif > > /* > * Receive a burst of packets and forward them. > */ > + TEST_PMD_CORE_CYC_RX_START(start_rx_tsc); > nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, > nb_pkt_per_burst); > + TEST_PMD_CORE_CYC_RX_ADD(fs, start_rx_tsc); > if (unlikely(nb_rx == 0)) > return; > > @@ -103,7 +105,9 @@ > mb->vlan_tci = txp->tx_vlan_id; > mb->vlan_tci_outer = txp->tx_vlan_id_outer; > } > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx); > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > /* > * Retry if necessary > */ > @@ -111,8 +115,10 @@ > retry = 0; > while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) { > rte_delay_us(burst_tx_delay_time); > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue, > &pkts_burst[nb_tx], nb_rx - nb_tx); > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > } > } > > @@ -126,11 +132,7 @@ > rte_pktmbuf_free(pkts_burst[nb_tx]); > } while (++nb_tx < nb_rx); > } > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - end_tsc = rte_rdtsc(); > - core_cycles = (end_tsc - start_tsc); > - fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); > -#endif > + TEST_PMD_CORE_CYC_FWD_ADD(fs, start_rx_tsc); > } > > struct fwd_engine mac_fwd_engine = { > diff --git a/app/test-pmd/macswap.c b/app/test-pmd/macswap.c index > 71af916..b22acdb 100644 > --- a/app/test-pmd/macswap.c > +++ b/app/test-pmd/macswap.c > @@ -86,21 +86,22 @@ > uint16_t nb_rx; > uint16_t nb_tx; > uint32_t retry; > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - uint64_t start_tsc; > - uint64_t end_tsc; > - uint64_t core_cycles; > -#endif > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - start_tsc = rte_rdtsc(); > +#if defined(RTE_TEST_PMD_RECORD_CORE_TX_CYCLES) Should the RTE_TEST_PMD_RECORD_CORE_CYCLES macro be checked here too? > + uint64_t start_tx_tsc; > +#endif > +#if defined(RTE_TEST_PMD_RECORD_CORE_CYCLES) || \ > + defined(RTE_TEST_PMD_RECORD_CORE_RX_CYCLES) > + uint64_t start_rx_tsc; > #endif > > /* > * Receive a burst of packets and forward them. > */ > + TEST_PMD_CORE_CYC_RX_START(start_rx_tsc); > nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, > nb_pkt_per_burst); > + TEST_PMD_CORE_CYC_RX_ADD(fs, start_rx_tsc); > if (unlikely(nb_rx == 0)) > return; > > @@ -112,7 +113,10 @@ > > do_macswap(pkts_burst, nb_rx, txp); > > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx); > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > + > /* > * Retry if necessary > */ > @@ -120,8 +124,10 @@ > retry = 0; > while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) { > rte_delay_us(burst_tx_delay_time); > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue, > &pkts_burst[nb_tx], nb_rx - nb_tx); > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > } > } > fs->tx_packets += nb_tx; > @@ -134,11 +140,7 @@ > rte_pktmbuf_free(pkts_burst[nb_tx]); > } while (++nb_tx < nb_rx); > } > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - end_tsc = rte_rdtsc(); > - core_cycles = (end_tsc - start_tsc); > - fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); > -#endif > + TEST_PMD_CORE_CYC_FWD_ADD(fs, start_rx_tsc); > } > > struct fwd_engine mac_swap_engine = { > diff --git a/app/test-pmd/rxonly.c b/app/test-pmd/rxonly.c index > 5c65fc4..d1da357 100644 > --- a/app/test-pmd/rxonly.c > +++ b/app/test-pmd/rxonly.c > @@ -50,19 +50,18 @@ > uint16_t nb_rx; > uint16_t i; > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - uint64_t start_tsc; > - uint64_t end_tsc; > - uint64_t core_cycles; > - > - start_tsc = rte_rdtsc(); > +#if defined(RTE_TEST_PMD_RECORD_CORE_CYCLES) || \ > + defined(RTE_TEST_PMD_RECORD_CORE_RX_CYCLES) > + uint64_t start_rx_tsc; > #endif > > /* > * Receive a burst of packets. > */ > + TEST_PMD_CORE_CYC_RX_START(start_rx_tsc); > nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, > nb_pkt_per_burst); > + TEST_PMD_CORE_CYC_RX_ADD(fs, start_rx_tsc); > if (unlikely(nb_rx == 0)) > return; > > @@ -73,11 +72,7 @@ > for (i = 0; i < nb_rx; i++) > rte_pktmbuf_free(pkts_burst[i]); > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - end_tsc = rte_rdtsc(); > - core_cycles = (end_tsc - start_tsc); > - fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); > -#endif > + TEST_PMD_CORE_CYC_FWD_ADD(fs, start_rx_tsc); > } > > struct fwd_engine rx_only_engine = { > diff --git a/app/test-pmd/softnicfwd.c b/app/test-pmd/softnicfwd.c index > 94e6669..9b2b0e6 100644 > --- a/app/test-pmd/softnicfwd.c > +++ b/app/test-pmd/softnicfwd.c > @@ -87,35 +87,39 @@ struct tm_hierarchy { > uint16_t nb_tx; > uint32_t retry; > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - uint64_t start_tsc; > - uint64_t end_tsc; > - uint64_t core_cycles; > +#if defined(RTE_TEST_PMD_RECORD_CORE_TX_CYCLES) Should the RTE_TEST_PMD_RECORD_CORE_CYCLES macro be checked here too? > + uint64_t start_tx_tsc; > #endif > - > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - start_tsc = rte_rdtsc(); > +#if defined(RTE_TEST_PMD_RECORD_CORE_CYCLES) || \ > + defined(RTE_TEST_PMD_RECORD_CORE_RX_CYCLES) > + uint64_t start_rx_tsc; > #endif > > /* Packets Receive */ > + TEST_PMD_CORE_CYC_RX_START(start_rx_tsc); > nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, > pkts_burst, nb_pkt_per_burst); > + TEST_PMD_CORE_CYC_RX_ADD(fs, start_rx_tsc); > fs->rx_packets += nb_rx; > > #ifdef RTE_TEST_PMD_RECORD_BURST_STATS > fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; > #endif > > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, > pkts_burst, nb_rx); > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > > /* Retry if necessary */ > if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) { > retry = 0; > while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) { > rte_delay_us(burst_tx_delay_time); > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue, > &pkts_burst[nb_tx], nb_rx - nb_tx); > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > } > } > fs->tx_packets += nb_tx; > @@ -130,11 +134,7 @@ struct tm_hierarchy { > rte_pktmbuf_free(pkts_burst[nb_tx]); > } while (++nb_tx < nb_rx); > } > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - end_tsc = rte_rdtsc(); > - core_cycles = (end_tsc - start_tsc); > - fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); > -#endif > + TEST_PMD_CORE_CYC_FWD_ADD(fs, start_rx_tsc); > } > > static void > diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index > f0061d9..de8478f 100644 > --- a/app/test-pmd/testpmd.c > +++ b/app/test-pmd/testpmd.c > @@ -1483,6 +1483,12 @@ struct extmem_param { #ifdef > RTE_TEST_PMD_RECORD_CORE_CYCLES > uint64_t fwd_cycles = 0; > #endif > +#ifdef RTE_TEST_PMD_RECORD_CORE_RX_CYCLES > + uint64_t rx_cycles = 0; > +#endif > +#ifdef RTE_TEST_PMD_RECORD_CORE_TX_CYCLES > + uint64_t tx_cycles = 0; > +#endif > uint64_t total_recv = 0; > uint64_t total_xmit = 0; > struct rte_port *port; > @@ -1513,6 +1519,12 @@ struct extmem_param { #ifdef > RTE_TEST_PMD_RECORD_CORE_CYCLES > fwd_cycles += fs->core_cycles; > #endif > +#ifdef RTE_TEST_PMD_RECORD_CORE_RX_CYCLES > + rx_cycles += fs->core_rx_cycles; > +#endif > +#ifdef RTE_TEST_PMD_RECORD_CORE_TX_CYCLES > + tx_cycles += fs->core_tx_cycles; > +#endif > } > for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) { > uint8_t j; > @@ -1648,6 +1660,20 @@ struct extmem_param { > (unsigned int)(fwd_cycles / total_recv), > fwd_cycles, total_recv); > #endif > +#ifdef RTE_TEST_PMD_RECORD_CORE_RX_CYCLES > + if (total_recv > 0) > + printf("\n rx CPU cycles/packet=%u (total cycles=" > + "%"PRIu64" / total RX packets=%"PRIu64")\n", > + (unsigned int)(rx_cycles / total_recv), > + rx_cycles, total_recv); > +#endif > +#ifdef RTE_TEST_PMD_RECORD_CORE_TX_CYCLES > + if (total_xmit > 0) > + printf("\n tx CPU cycles/packet=%u (total cycles=" > + "%"PRIu64" / total TX packets=%"PRIu64")\n", > + (unsigned int)(tx_cycles / total_xmit), > + tx_cycles, total_xmit); > +#endif > } > > void > @@ -1678,6 +1704,12 @@ struct extmem_param { #ifdef > RTE_TEST_PMD_RECORD_CORE_CYCLES > fs->core_cycles = 0; > #endif > +#ifdef RTE_TEST_PMD_RECORD_CORE_RX_CYCLES > + fs->core_rx_cycles = 0; > +#endif > +#ifdef RTE_TEST_PMD_RECORD_CORE_TX_CYCLES > + fs->core_tx_cycles = 0; > +#endif > } > } > > diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index > 1d9b7a2..4e8af8a 100644 > --- a/app/test-pmd/testpmd.h > +++ b/app/test-pmd/testpmd.h > @@ -130,12 +130,52 @@ struct fwd_stream { #ifdef > RTE_TEST_PMD_RECORD_CORE_CYCLES > uint64_t core_cycles; /**< used for RX and TX processing */ > #endif > +#ifdef RTE_TEST_PMD_RECORD_CORE_TX_CYCLES > + uint64_t core_tx_cycles; /**< used for tx_burst processing */ > +#endif > +#ifdef RTE_TEST_PMD_RECORD_CORE_RX_CYCLES > + uint64_t core_rx_cycles; /**< used for rx_burst processing */ > +#endif > #ifdef RTE_TEST_PMD_RECORD_BURST_STATS > struct pkt_burst_stats rx_burst_stats; > struct pkt_burst_stats tx_burst_stats; #endif }; > > +#if defined(RTE_TEST_PMD_RECORD_CORE_TX_CYCLES) > +#define TEST_PMD_CORE_CYC_TX_START(a) {a = rte_rdtsc(); } #else #define > +TEST_PMD_CORE_CYC_TX_START(a) #endif > + > +#if defined(RTE_TEST_PMD_RECORD_CORE_CYCLES) || \ > + defined(RTE_TEST_PMD_RECORD_CORE_RX_CYCLES) > +#define TEST_PMD_CORE_CYC_RX_START(a) {a = rte_rdtsc(); } #else #define > +TEST_PMD_CORE_CYC_RX_START(a) #endif > + > +#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > +#define TEST_PMD_CORE_CYC_FWD_ADD(fs, s) \ {uint64_t end_tsc = > +rte_rdtsc(); fs->core_cycles += end_tsc - (s); } #else #define > +TEST_PMD_CORE_CYC_FWD_ADD(fs, s) #endif > + > +#ifdef RTE_TEST_PMD_RECORD_CORE_TX_CYCLES > +#define TEST_PMD_CORE_CYC_TX_ADD(fs, s) \ {uint64_t end_tsc = > +rte_rdtsc(); fs->core_tx_cycles += end_tsc - (s); } #else #define > +TEST_PMD_CORE_CYC_TX_ADD(fs, s) #endif > + > +#ifdef RTE_TEST_PMD_RECORD_CORE_RX_CYCLES > +#define TEST_PMD_CORE_CYC_RX_ADD(fs, s) \ {uint64_t end_tsc = > +rte_rdtsc(); fs->core_rx_cycles += end_tsc - (s); } #else #define > +TEST_PMD_CORE_CYC_RX_ADD(fs, s) #endif > + > /** Descriptor for a single flow. */ > struct port_flow { > struct port_flow *next; /**< Next flow in list. */ diff --git > a/app/test- > pmd/txonly.c b/app/test-pmd/txonly.c index fdfca14..fe3045a 100644 > --- a/app/test-pmd/txonly.c > +++ b/app/test-pmd/txonly.c > @@ -241,16 +241,16 @@ > uint32_t retry; > uint64_t ol_flags = 0; > uint64_t tx_offloads; > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - uint64_t start_tsc; > - uint64_t end_tsc; > - uint64_t core_cycles; > +#if defined(RTE_TEST_PMD_RECORD_CORE_TX_CYCLES) > + uint64_t start_tx_tsc; > +#endif > +#if defined(RTE_TEST_PMD_RECORD_CORE_CYCLES) > + uint64_t start_rx_tsc; > #endif > > #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - start_tsc = rte_rdtsc(); > + TEST_PMD_CORE_CYC_RX_START(start_rx_tsc); > #endif > - > mbp = current_fwd_lcore()->mbp; > txp = &ports[fs->tx_port]; > tx_offloads = txp->dev_conf.txmode.offloads; @@ -302,7 +302,9 @@ > if (nb_pkt == 0) > return; > > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt); > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > /* > * Retry if necessary > */ > @@ -310,8 +312,10 @@ > retry = 0; > while (nb_tx < nb_pkt && retry++ < burst_tx_retry_num) { > rte_delay_us(burst_tx_delay_time); > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue, > &pkts_burst[nb_tx], nb_pkt - nb_tx); > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > } > } > fs->tx_packets += nb_tx; > @@ -334,12 +338,7 @@ > rte_pktmbuf_free(pkts_burst[nb_tx]); > } while (++nb_tx < nb_pkt); > } > - > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > - end_tsc = rte_rdtsc(); > - core_cycles = (end_tsc - start_tsc); > - fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); > -#endif > + TEST_PMD_CORE_CYC_FWD_ADD(fs, start_rx_tsc); > } > > static void > diff --git a/config/common_base b/config/common_base index > 6b96e0e..6e84af4 100644 > --- a/config/common_base > +++ b/config/common_base > @@ -998,6 +998,8 @@ CONFIG_RTE_PROC_INFO=n # > CONFIG_RTE_TEST_PMD=y > CONFIG_RTE_TEST_PMD_RECORD_CORE_CYCLES=n > +CONFIG_RTE_TEST_PMD_RECORD_CORE_RX_CYCLES=n > +CONFIG_RTE_TEST_PMD_RECORD_CORE_TX_CYCLES=n > CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n Should the RECORD macros be documented in the run_app.rst file ? > # > -- > 1.8.3.1 Regards, Bernard