Hi, Bernard Thanks for the comment.
> > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - uint64_t start_tsc; > > - uint64_t end_tsc; > > - uint64_t core_cycles; > > +#if defined(RTE_TEST_PMD_RECORD_CORE_TX_CYCLES) > > + uint64_t start_tx_tsc; > > Should the RTE_TEST_PMD_RECORD_CORE_CYCLES macro be checked here > too? > I think - it should not. All of options: RTE_TEST_PMD_RECORD_CORE_CYCLES RTE_TEST_PMD_RECORD_CORE_TX_CYCLES RTE_TEST_PMD_RECORD_CORE_RX_CYCLES are supposed to be defined independently. I've compiled for all 8 possible CORE_xx_CYCLES combinations. RTE_TEST_PMD_RECORD_CORE_TX_CYCLES uses the dedicated TSC start point "start_tx_tsc". RTE_TEST_PMD_RECORD_CORE_CYCLES and RTE_TEST_PMD_RECORD_CORE_RX_CYCLES share the "start_rx_tsc". With best regards, Slava (Viacheslav) > -----Original Message----- > From: Iremonger, Bernard <bernard.iremon...@intel.com> > Sent: Friday, June 7, 2019 19:08 > To: Slava Ovsiienko <viachesl...@mellanox.com>; dev@dpdk.org > Cc: Yigit, Ferruh <ferruh.yi...@intel.com> > Subject: RE: [dpdk-dev] [RFC] app/testpmd: add profiling for Rx/Tx burst > routines > > Hi Viacheslav, > > > > -----Original Message----- > > From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Viacheslav > > Ovsiienko > > Sent: Monday, May 27, 2019 6:47 AM > > To: dev@dpdk.org > > Cc: Yigit, Ferruh <ferruh.yi...@intel.com> > > Subject: [dpdk-dev] [RFC] app/testpmd: add profiling for Rx/Tx burst > > routines > > > > There is the testpmd configuration option called > > RTE_TEST_PMD_RECORD_CORE_CYCLES, if this one is turned on the > testpmd > > application measures the CPU clocks spent within forwarding loop. This > > time is the sum of execution times of rte_eth_rx_burst(), > > rte_eth_tx_burst(), rte_delay_us(), > > rte_pktmbuf_free() and so on, depending on fwd mode set. > > > > While debugging and performance optimization of datapath burst > > routines tt would be useful to see the pure execution times of these > > ones. It is proposed to add separated profiling > > options: > > > > CONFIG_RTE_TEST_PMD_RECORD_CORE_TX_CYCLES > > enables gathering profiling data for transmit datapath, > > ticks spent within rte_eth_tx_burst() > > > > CONFIG_RTE_TEST_PMD_RECORD_CORE_RX_CYCLES > > enables gathering profiling data for transmit datapath, > > ticks spent within rte_eth_rx_burst() > > > > Signed-off-by: Viacheslav Ovsiienko <viachesl...@mellanox.com> > > --- > > app/test-pmd/csumonly.c | 25 ++++++++++++------------- > > app/test-pmd/flowgen.c | 25 +++++++++++++------------ > > app/test-pmd/icmpecho.c | 26 +++++++++++++------------- > > app/test-pmd/iofwd.c | 24 ++++++++++++------------ > > app/test-pmd/macfwd.c | 24 +++++++++++++----------- > > app/test-pmd/macswap.c | 26 ++++++++++++++------------ > > app/test-pmd/rxonly.c | 17 ++++++----------- > > app/test-pmd/softnicfwd.c | 24 ++++++++++++------------ > > app/test-pmd/testpmd.c | 32 ++++++++++++++++++++++++++++++++ > > app/test-pmd/testpmd.h | 40 > > ++++++++++++++++++++++++++++++++++++++++ > > app/test-pmd/txonly.c | 23 +++++++++++------------ > > config/common_base | 2 ++ > > 12 files changed, 180 insertions(+), 108 deletions(-) > > > > diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c index > > f4f2a7b..251e179 100644 > > --- a/app/test-pmd/csumonly.c > > +++ b/app/test-pmd/csumonly.c > > @@ -710,19 +710,19 @@ struct simple_gre_hdr { > > uint16_t nb_segments = 0; > > int ret; > > > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - uint64_t start_tsc; > > - uint64_t end_tsc; > > - uint64_t core_cycles; > > +#if defined(RTE_TEST_PMD_RECORD_CORE_TX_CYCLES) > > + uint64_t start_tx_tsc; > > Should the RTE_TEST_PMD_RECORD_CORE_CYCLES macro be checked here > too? > > > #endif > > - > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - start_tsc = rte_rdtsc(); > > +#if defined(RTE_TEST_PMD_RECORD_CORE_CYCLES) || \ > > + defined(RTE_TEST_PMD_RECORD_CORE_RX_CYCLES) > > + uint64_t start_rx_tsc; > > #endif > > > > /* receive a burst of packet */ > > + TEST_PMD_CORE_CYC_RX_START(start_rx_tsc); > > nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, > > nb_pkt_per_burst); > > + TEST_PMD_CORE_CYC_RX_ADD(fs, start_rx_tsc); > > if (unlikely(nb_rx == 0)) > > return; > > #ifdef RTE_TEST_PMD_RECORD_BURST_STATS @@ -982,8 +982,10 @@ > struct > > simple_gre_hdr { > > printf("Preparing packet burst to transmit failed: %s\n", > > rte_strerror(rte_errno)); > > > > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > > nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, tx_pkts_burst, > > nb_prep); > > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > > > > /* > > * Retry if necessary > > @@ -992,8 +994,10 @@ struct simple_gre_hdr { > > retry = 0; > > while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) { > > rte_delay_us(burst_tx_delay_time); > > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > > nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue, > > &tx_pkts_burst[nb_tx], nb_rx - > nb_tx); > > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > > } > > } > > fs->tx_packets += nb_tx; > > @@ -1010,12 +1014,7 @@ struct simple_gre_hdr { > > rte_pktmbuf_free(tx_pkts_burst[nb_tx]); > > } while (++nb_tx < nb_rx); > > } > > - > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - end_tsc = rte_rdtsc(); > > - core_cycles = (end_tsc - start_tsc); > > - fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); > > -#endif > > + TEST_PMD_CORE_CYC_FWD_ADD(fs, start_rx_tsc); > > } > > > > struct fwd_engine csum_fwd_engine = { diff --git > > a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c index > > 3214e3c..b128e68 100644 > > --- a/app/test-pmd/flowgen.c > > +++ b/app/test-pmd/flowgen.c > > @@ -130,20 +130,21 @@ > > uint16_t i; > > uint32_t retry; > > uint64_t tx_offloads; > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - uint64_t start_tsc; > > - uint64_t end_tsc; > > - uint64_t core_cycles; > > -#endif > > static int next_flow = 0; > > > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - start_tsc = rte_rdtsc(); > > +#if defined(RTE_TEST_PMD_RECORD_CORE_TX_CYCLES) > > Should the RTE_TEST_PMD_RECORD_CORE_CYCLES macro be checked here > too? > > > + uint64_t start_tx_tsc; > > +#endif > > +#if defined(RTE_TEST_PMD_RECORD_CORE_CYCLES) || \ > > + defined(RTE_TEST_PMD_RECORD_CORE_RX_CYCLES) > > + uint64_t start_rx_tsc; > > #endif > > > > /* Receive a burst of packets and discard them. */ > > + TEST_PMD_CORE_CYC_RX_START(start_rx_tsc); > > nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, > > nb_pkt_per_burst); > > + TEST_PMD_CORE_CYC_RX_ADD(fs, start_rx_tsc); > > fs->rx_packets += nb_rx; > > > > for (i = 0; i < nb_rx; i++) > > @@ -212,7 +213,9 @@ > > next_flow = (next_flow + 1) % cfg_n_flows; > > } > > > > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > > nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, > > nb_pkt); > > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > > /* > > * Retry if necessary > > */ > > @@ -220,8 +223,10 @@ > > retry = 0; > > while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) { > > rte_delay_us(burst_tx_delay_time); > > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > > nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue, > > &pkts_burst[nb_tx], nb_rx - nb_tx); > > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > > } > > } > > fs->tx_packets += nb_tx; > > @@ -239,11 +244,7 @@ > > rte_pktmbuf_free(pkts_burst[nb_tx]); > > } while (++nb_tx < nb_pkt); > > } > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - end_tsc = rte_rdtsc(); > > - core_cycles = (end_tsc - start_tsc); > > - fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); > > -#endif > > + TEST_PMD_CORE_CYC_FWD_ADD(fs, start_rx_tsc); > > } > > > > struct fwd_engine flow_gen_engine = { diff --git > > a/app/test-pmd/icmpecho.c b/app/test-pmd/icmpecho.c index > > 55d266d..a539fe8 100644 > > --- a/app/test-pmd/icmpecho.c > > +++ b/app/test-pmd/icmpecho.c > > @@ -293,21 +293,22 @@ > > uint32_t cksum; > > uint8_t i; > > int l2_len; > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - uint64_t start_tsc; > > - uint64_t end_tsc; > > - uint64_t core_cycles; > > -#endif > > > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - start_tsc = rte_rdtsc(); > > +#if defined(RTE_TEST_PMD_RECORD_CORE_TX_CYCLES) > > + uint64_t start_tx_tsc; > > +#endif > > +#if defined(RTE_TEST_PMD_RECORD_CORE_CYCLES) || \ > > + defined(RTE_TEST_PMD_RECORD_CORE_RX_CYCLES) > > + uint64_t start_rx_tsc; > > #endif > > > > /* > > * First, receive a burst of packets. > > */ > > + TEST_PMD_CORE_CYC_RX_START(start_rx_tsc); > > nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, > > nb_pkt_per_burst); > > + TEST_PMD_CORE_CYC_RX_ADD(fs, start_rx_tsc); > > if (unlikely(nb_rx == 0)) > > return; > > > > @@ -487,8 +488,10 @@ > > > > /* Send back ICMP echo replies, if any. */ > > if (nb_replies > 0) { > > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > > nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, > pkts_burst, > > nb_replies); > > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > > /* > > * Retry if necessary > > */ > > @@ -497,10 +500,12 @@ > > while (nb_tx < nb_replies && > > retry++ < burst_tx_retry_num) { > > rte_delay_us(burst_tx_delay_time); > > + > > TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > > nb_tx += rte_eth_tx_burst(fs->tx_port, > > fs->tx_queue, > > &pkts_burst[nb_tx], > > nb_replies - nb_tx); > > + TEST_PMD_CORE_CYC_TX_ADD(fs, > > start_tx_tsc); > > } > > } > > fs->tx_packets += nb_tx; > > @@ -514,12 +519,7 @@ > > } while (++nb_tx < nb_replies); > > } > > } > > - > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - end_tsc = rte_rdtsc(); > > - core_cycles = (end_tsc - start_tsc); > > - fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); > > -#endif > > + TEST_PMD_CORE_CYC_FWD_ADD(fs, start_rx_tsc); > > } > > > > struct fwd_engine icmp_echo_engine = { diff --git > > a/app/test-pmd/iofwd.c b/app/test-pmd/iofwd.c index > > 9dce76e..dc66a88 100644 > > --- a/app/test-pmd/iofwd.c > > +++ b/app/test-pmd/iofwd.c > > @@ -51,21 +51,21 @@ > > uint16_t nb_tx; > > uint32_t retry; > > > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - uint64_t start_tsc; > > - uint64_t end_tsc; > > - uint64_t core_cycles; > > +#if defined(RTE_TEST_PMD_RECORD_CORE_TX_CYCLES) > > Should the RTE_TEST_PMD_RECORD_CORE_CYCLES macro be checked here > too? > > > + uint64_t start_tx_tsc; > > #endif > > - > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - start_tsc = rte_rdtsc(); > > +#if defined(RTE_TEST_PMD_RECORD_CORE_CYCLES) || \ > > + defined(RTE_TEST_PMD_RECORD_CORE_RX_CYCLES) > > + uint64_t start_rx_tsc; > > #endif > > > > /* > > * Receive a burst of packets and forward them. > > */ > > + TEST_PMD_CORE_CYC_RX_START(start_rx_tsc); > > nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, > > pkts_burst, nb_pkt_per_burst); > > + TEST_PMD_CORE_CYC_RX_ADD(fs, start_rx_tsc); > > if (unlikely(nb_rx == 0)) > > return; > > fs->rx_packets += nb_rx; > > @@ -73,8 +73,10 @@ > > #ifdef RTE_TEST_PMD_RECORD_BURST_STATS > > fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; > > #endif > > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > > nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, > > pkts_burst, nb_rx); > > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > > /* > > * Retry if necessary > > */ > > @@ -82,8 +84,10 @@ > > retry = 0; > > while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) { > > rte_delay_us(burst_tx_delay_time); > > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > > nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue, > > &pkts_burst[nb_tx], nb_rx - nb_tx); > > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > > } > > } > > fs->tx_packets += nb_tx; > > @@ -96,11 +100,7 @@ > > rte_pktmbuf_free(pkts_burst[nb_tx]); > > } while (++nb_tx < nb_rx); > > } > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - end_tsc = rte_rdtsc(); > > - core_cycles = (end_tsc - start_tsc); > > - fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); > > -#endif > > + TEST_PMD_CORE_CYC_FWD_ADD(fs, start_rx_tsc); > > } > > > > struct fwd_engine io_fwd_engine = { > > diff --git a/app/test-pmd/macfwd.c b/app/test-pmd/macfwd.c index > > 7cac757..2fd38ea 100644 > > --- a/app/test-pmd/macfwd.c > > +++ b/app/test-pmd/macfwd.c > > @@ -56,21 +56,23 @@ > > uint16_t i; > > uint64_t ol_flags = 0; > > uint64_t tx_offloads; > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - uint64_t start_tsc; > > - uint64_t end_tsc; > > - uint64_t core_cycles; > > + > > +#if defined(RTE_TEST_PMD_RECORD_CORE_TX_CYCLES) > > Should the RTE_TEST_PMD_RECORD_CORE_CYCLES macro be checked here > too? > > > + uint64_t start_tx_tsc; > > #endif > > +#if defined(RTE_TEST_PMD_RECORD_CORE_CYCLES) || \ > > + defined(RTE_TEST_PMD_RECORD_CORE_RX_CYCLES) > > + uint64_t start_rx_tsc; > > > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - start_tsc = rte_rdtsc(); > > #endif > > > > /* > > * Receive a burst of packets and forward them. > > */ > > + TEST_PMD_CORE_CYC_RX_START(start_rx_tsc); > > nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, > > nb_pkt_per_burst); > > + TEST_PMD_CORE_CYC_RX_ADD(fs, start_rx_tsc); > > if (unlikely(nb_rx == 0)) > > return; > > > > @@ -103,7 +105,9 @@ > > mb->vlan_tci = txp->tx_vlan_id; > > mb->vlan_tci_outer = txp->tx_vlan_id_outer; > > } > > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > > nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, > > nb_rx); > > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > > /* > > * Retry if necessary > > */ > > @@ -111,8 +115,10 @@ > > retry = 0; > > while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) { > > rte_delay_us(burst_tx_delay_time); > > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > > nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue, > > &pkts_burst[nb_tx], nb_rx - nb_tx); > > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > > } > > } > > > > @@ -126,11 +132,7 @@ > > rte_pktmbuf_free(pkts_burst[nb_tx]); > > } while (++nb_tx < nb_rx); > > } > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - end_tsc = rte_rdtsc(); > > - core_cycles = (end_tsc - start_tsc); > > - fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); > > -#endif > > + TEST_PMD_CORE_CYC_FWD_ADD(fs, start_rx_tsc); > > } > > > > struct fwd_engine mac_fwd_engine = { > > diff --git a/app/test-pmd/macswap.c b/app/test-pmd/macswap.c index > > 71af916..b22acdb 100644 > > --- a/app/test-pmd/macswap.c > > +++ b/app/test-pmd/macswap.c > > @@ -86,21 +86,22 @@ > > uint16_t nb_rx; > > uint16_t nb_tx; > > uint32_t retry; > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - uint64_t start_tsc; > > - uint64_t end_tsc; > > - uint64_t core_cycles; > > -#endif > > > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - start_tsc = rte_rdtsc(); > > +#if defined(RTE_TEST_PMD_RECORD_CORE_TX_CYCLES) > > Should the RTE_TEST_PMD_RECORD_CORE_CYCLES macro be checked here > too? > > > + uint64_t start_tx_tsc; > > +#endif > > +#if defined(RTE_TEST_PMD_RECORD_CORE_CYCLES) || \ > > + defined(RTE_TEST_PMD_RECORD_CORE_RX_CYCLES) > > + uint64_t start_rx_tsc; > > #endif > > > > /* > > * Receive a burst of packets and forward them. > > */ > > + TEST_PMD_CORE_CYC_RX_START(start_rx_tsc); > > nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, > > nb_pkt_per_burst); > > + TEST_PMD_CORE_CYC_RX_ADD(fs, start_rx_tsc); > > if (unlikely(nb_rx == 0)) > > return; > > > > @@ -112,7 +113,10 @@ > > > > do_macswap(pkts_burst, nb_rx, txp); > > > > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > > nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, > > nb_rx); > > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > > + > > /* > > * Retry if necessary > > */ > > @@ -120,8 +124,10 @@ > > retry = 0; > > while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) { > > rte_delay_us(burst_tx_delay_time); > > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > > nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue, > > &pkts_burst[nb_tx], nb_rx - nb_tx); > > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > > } > > } > > fs->tx_packets += nb_tx; > > @@ -134,11 +140,7 @@ > > rte_pktmbuf_free(pkts_burst[nb_tx]); > > } while (++nb_tx < nb_rx); > > } > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - end_tsc = rte_rdtsc(); > > - core_cycles = (end_tsc - start_tsc); > > - fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); > > -#endif > > + TEST_PMD_CORE_CYC_FWD_ADD(fs, start_rx_tsc); > > } > > > > struct fwd_engine mac_swap_engine = { diff --git > > a/app/test-pmd/rxonly.c b/app/test-pmd/rxonly.c index > > 5c65fc4..d1da357 100644 > > --- a/app/test-pmd/rxonly.c > > +++ b/app/test-pmd/rxonly.c > > @@ -50,19 +50,18 @@ > > uint16_t nb_rx; > > uint16_t i; > > > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - uint64_t start_tsc; > > - uint64_t end_tsc; > > - uint64_t core_cycles; > > - > > - start_tsc = rte_rdtsc(); > > +#if defined(RTE_TEST_PMD_RECORD_CORE_CYCLES) || \ > > + defined(RTE_TEST_PMD_RECORD_CORE_RX_CYCLES) > > + uint64_t start_rx_tsc; > > #endif > > > > /* > > * Receive a burst of packets. > > */ > > + TEST_PMD_CORE_CYC_RX_START(start_rx_tsc); > > nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, > > nb_pkt_per_burst); > > + TEST_PMD_CORE_CYC_RX_ADD(fs, start_rx_tsc); > > if (unlikely(nb_rx == 0)) > > return; > > > > @@ -73,11 +72,7 @@ > > for (i = 0; i < nb_rx; i++) > > rte_pktmbuf_free(pkts_burst[i]); > > > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - end_tsc = rte_rdtsc(); > > - core_cycles = (end_tsc - start_tsc); > > - fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); > > -#endif > > + TEST_PMD_CORE_CYC_FWD_ADD(fs, start_rx_tsc); > > } > > > > struct fwd_engine rx_only_engine = { > > diff --git a/app/test-pmd/softnicfwd.c b/app/test-pmd/softnicfwd.c > > index > > 94e6669..9b2b0e6 100644 > > --- a/app/test-pmd/softnicfwd.c > > +++ b/app/test-pmd/softnicfwd.c > > @@ -87,35 +87,39 @@ struct tm_hierarchy { > > uint16_t nb_tx; > > uint32_t retry; > > > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - uint64_t start_tsc; > > - uint64_t end_tsc; > > - uint64_t core_cycles; > > +#if defined(RTE_TEST_PMD_RECORD_CORE_TX_CYCLES) > > Should the RTE_TEST_PMD_RECORD_CORE_CYCLES macro be checked here > too? > > > + uint64_t start_tx_tsc; > > #endif > > - > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - start_tsc = rte_rdtsc(); > > +#if defined(RTE_TEST_PMD_RECORD_CORE_CYCLES) || \ > > + defined(RTE_TEST_PMD_RECORD_CORE_RX_CYCLES) > > + uint64_t start_rx_tsc; > > #endif > > > > /* Packets Receive */ > > + TEST_PMD_CORE_CYC_RX_START(start_rx_tsc); > > nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, > > pkts_burst, nb_pkt_per_burst); > > + TEST_PMD_CORE_CYC_RX_ADD(fs, start_rx_tsc); > > fs->rx_packets += nb_rx; > > > > #ifdef RTE_TEST_PMD_RECORD_BURST_STATS > > fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; > > #endif > > > > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > > nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, > > pkts_burst, nb_rx); > > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > > > > /* Retry if necessary */ > > if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) { > > retry = 0; > > while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) { > > rte_delay_us(burst_tx_delay_time); > > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > > nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue, > > &pkts_burst[nb_tx], nb_rx - nb_tx); > > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > > } > > } > > fs->tx_packets += nb_tx; > > @@ -130,11 +134,7 @@ struct tm_hierarchy { > > rte_pktmbuf_free(pkts_burst[nb_tx]); > > } while (++nb_tx < nb_rx); > > } > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - end_tsc = rte_rdtsc(); > > - core_cycles = (end_tsc - start_tsc); > > - fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); > > -#endif > > + TEST_PMD_CORE_CYC_FWD_ADD(fs, start_rx_tsc); > > } > > > > static void > > diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index > > f0061d9..de8478f 100644 > > --- a/app/test-pmd/testpmd.c > > +++ b/app/test-pmd/testpmd.c > > @@ -1483,6 +1483,12 @@ struct extmem_param { #ifdef > > RTE_TEST_PMD_RECORD_CORE_CYCLES > > uint64_t fwd_cycles = 0; > > #endif > > +#ifdef RTE_TEST_PMD_RECORD_CORE_RX_CYCLES > > + uint64_t rx_cycles = 0; > > +#endif > > +#ifdef RTE_TEST_PMD_RECORD_CORE_TX_CYCLES > > + uint64_t tx_cycles = 0; > > +#endif > > uint64_t total_recv = 0; > > uint64_t total_xmit = 0; > > struct rte_port *port; > > @@ -1513,6 +1519,12 @@ struct extmem_param { #ifdef > > RTE_TEST_PMD_RECORD_CORE_CYCLES > > fwd_cycles += fs->core_cycles; > > #endif > > +#ifdef RTE_TEST_PMD_RECORD_CORE_RX_CYCLES > > + rx_cycles += fs->core_rx_cycles; > > +#endif > > +#ifdef RTE_TEST_PMD_RECORD_CORE_TX_CYCLES > > + tx_cycles += fs->core_tx_cycles; > > +#endif > > } > > for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) { > > uint8_t j; > > @@ -1648,6 +1660,20 @@ struct extmem_param { > > (unsigned int)(fwd_cycles / total_recv), > > fwd_cycles, total_recv); > > #endif > > +#ifdef RTE_TEST_PMD_RECORD_CORE_RX_CYCLES > > + if (total_recv > 0) > > + printf("\n rx CPU cycles/packet=%u (total cycles=" > > + "%"PRIu64" / total RX packets=%"PRIu64")\n", > > + (unsigned int)(rx_cycles / total_recv), > > + rx_cycles, total_recv); > > +#endif > > +#ifdef RTE_TEST_PMD_RECORD_CORE_TX_CYCLES > > + if (total_xmit > 0) > > + printf("\n tx CPU cycles/packet=%u (total cycles=" > > + "%"PRIu64" / total TX packets=%"PRIu64")\n", > > + (unsigned int)(tx_cycles / total_xmit), > > + tx_cycles, total_xmit); > > +#endif > > } > > > > void > > @@ -1678,6 +1704,12 @@ struct extmem_param { #ifdef > > RTE_TEST_PMD_RECORD_CORE_CYCLES > > fs->core_cycles = 0; > > #endif > > +#ifdef RTE_TEST_PMD_RECORD_CORE_RX_CYCLES > > + fs->core_rx_cycles = 0; > > +#endif > > +#ifdef RTE_TEST_PMD_RECORD_CORE_TX_CYCLES > > + fs->core_tx_cycles = 0; > > +#endif > > } > > } > > > > diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index > > 1d9b7a2..4e8af8a 100644 > > --- a/app/test-pmd/testpmd.h > > +++ b/app/test-pmd/testpmd.h > > @@ -130,12 +130,52 @@ struct fwd_stream { #ifdef > > RTE_TEST_PMD_RECORD_CORE_CYCLES > > uint64_t core_cycles; /**< used for RX and TX processing */ > > #endif > > +#ifdef RTE_TEST_PMD_RECORD_CORE_TX_CYCLES > > + uint64_t core_tx_cycles; /**< used for tx_burst processing */ > > +#endif > > +#ifdef RTE_TEST_PMD_RECORD_CORE_RX_CYCLES > > + uint64_t core_rx_cycles; /**< used for rx_burst processing */ > > +#endif > > #ifdef RTE_TEST_PMD_RECORD_BURST_STATS > > struct pkt_burst_stats rx_burst_stats; > > struct pkt_burst_stats tx_burst_stats; #endif }; > > > > +#if defined(RTE_TEST_PMD_RECORD_CORE_TX_CYCLES) > > +#define TEST_PMD_CORE_CYC_TX_START(a) {a = rte_rdtsc(); } #else > > +#define > > +TEST_PMD_CORE_CYC_TX_START(a) #endif > > + > > +#if defined(RTE_TEST_PMD_RECORD_CORE_CYCLES) || \ > > + defined(RTE_TEST_PMD_RECORD_CORE_RX_CYCLES) > > +#define TEST_PMD_CORE_CYC_RX_START(a) {a = rte_rdtsc(); } #else > > +#define > > +TEST_PMD_CORE_CYC_RX_START(a) #endif > > + > > +#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES #define > > +TEST_PMD_CORE_CYC_FWD_ADD(fs, s) \ {uint64_t end_tsc = rte_rdtsc(); > > +fs->core_cycles += end_tsc - (s); } #else #define > > +TEST_PMD_CORE_CYC_FWD_ADD(fs, s) #endif > > + > > +#ifdef RTE_TEST_PMD_RECORD_CORE_TX_CYCLES > > +#define TEST_PMD_CORE_CYC_TX_ADD(fs, s) \ {uint64_t end_tsc = > > +rte_rdtsc(); fs->core_tx_cycles += end_tsc - (s); } #else #define > > +TEST_PMD_CORE_CYC_TX_ADD(fs, s) #endif > > + > > +#ifdef RTE_TEST_PMD_RECORD_CORE_RX_CYCLES > > +#define TEST_PMD_CORE_CYC_RX_ADD(fs, s) \ {uint64_t end_tsc = > > +rte_rdtsc(); fs->core_rx_cycles += end_tsc - (s); } #else #define > > +TEST_PMD_CORE_CYC_RX_ADD(fs, s) #endif > > + > > /** Descriptor for a single flow. */ > > struct port_flow { > > struct port_flow *next; /**< Next flow in list. */ diff --git > > a/app/test- pmd/txonly.c b/app/test-pmd/txonly.c index > > fdfca14..fe3045a 100644 > > --- a/app/test-pmd/txonly.c > > +++ b/app/test-pmd/txonly.c > > @@ -241,16 +241,16 @@ > > uint32_t retry; > > uint64_t ol_flags = 0; > > uint64_t tx_offloads; > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - uint64_t start_tsc; > > - uint64_t end_tsc; > > - uint64_t core_cycles; > > +#if defined(RTE_TEST_PMD_RECORD_CORE_TX_CYCLES) > > + uint64_t start_tx_tsc; > > +#endif > > +#if defined(RTE_TEST_PMD_RECORD_CORE_CYCLES) > > + uint64_t start_rx_tsc; > > #endif > > > > #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - start_tsc = rte_rdtsc(); > > + TEST_PMD_CORE_CYC_RX_START(start_rx_tsc); > > #endif > > - > > mbp = current_fwd_lcore()->mbp; > > txp = &ports[fs->tx_port]; > > tx_offloads = txp->dev_conf.txmode.offloads; @@ -302,7 +302,9 > @@ > > if (nb_pkt == 0) > > return; > > > > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > > nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, > > nb_pkt); > > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > > /* > > * Retry if necessary > > */ > > @@ -310,8 +312,10 @@ > > retry = 0; > > while (nb_tx < nb_pkt && retry++ < burst_tx_retry_num) { > > rte_delay_us(burst_tx_delay_time); > > + TEST_PMD_CORE_CYC_TX_START(start_tx_tsc); > > nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue, > > &pkts_burst[nb_tx], nb_pkt - nb_tx); > > + TEST_PMD_CORE_CYC_TX_ADD(fs, start_tx_tsc); > > } > > } > > fs->tx_packets += nb_tx; > > @@ -334,12 +338,7 @@ > > rte_pktmbuf_free(pkts_burst[nb_tx]); > > } while (++nb_tx < nb_pkt); > > } > > - > > -#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES > > - end_tsc = rte_rdtsc(); > > - core_cycles = (end_tsc - start_tsc); > > - fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); > > -#endif > > + TEST_PMD_CORE_CYC_FWD_ADD(fs, start_rx_tsc); > > } > > > > static void > > diff --git a/config/common_base b/config/common_base index > > 6b96e0e..6e84af4 100644 > > --- a/config/common_base > > +++ b/config/common_base > > @@ -998,6 +998,8 @@ CONFIG_RTE_PROC_INFO=n # > CONFIG_RTE_TEST_PMD=y > > CONFIG_RTE_TEST_PMD_RECORD_CORE_CYCLES=n > > +CONFIG_RTE_TEST_PMD_RECORD_CORE_RX_CYCLES=n > > +CONFIG_RTE_TEST_PMD_RECORD_CORE_TX_CYCLES=n > > CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n > > Should the RECORD macros be documented in the run_app.rst file ? > > > # > > -- > > 1.8.3.1 > > Regards, > > Bernard