To minimize cache miss, adds flags and burst size used in forwarding to
stream, moves condition tests in forwarding to flags in stream.

Signed-off-by: Xueming Li <xuemi...@nvidia.com>
---
 app/test-pmd/config.c    | 18 ++++++++++++++----
 app/test-pmd/flowgen.c   |  6 +++---
 app/test-pmd/noisy_vnf.c |  2 +-
 app/test-pmd/testpmd.h   | 21 ++++++++++++---------
 app/test-pmd/txonly.c    |  8 ++++----
 5 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index 035247c33f..5cdf8fa082 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -3050,6 +3050,16 @@ fwd_topology_tx_port_get(portid_t rxp)
        }
 }
 
+static void
+fwd_stream_set_common(struct fwd_stream *fs)
+{
+       fs->nb_pkt_per_burst = nb_pkt_per_burst;
+       fs->record_burst_stats = !!record_burst_stats;
+       fs->record_core_cycles = !!record_core_cycles;
+       fs->retry_enabled = !!retry_enabled;
+       fs->rxq_share = !!rxq_share;
+}
+
 static void
 simple_fwd_config_setup(void)
 {
@@ -3079,7 +3089,7 @@ simple_fwd_config_setup(void)
                                fwd_ports_ids[fwd_topology_tx_port_get(i)];
                fwd_streams[i]->tx_queue  = 0;
                fwd_streams[i]->peer_addr = fwd_streams[i]->tx_port;
-               fwd_streams[i]->retry_enabled = retry_enabled;
+               fwd_stream_set_common(fwd_streams[i]);
        }
 }
 
@@ -3140,7 +3150,7 @@ rss_fwd_config_setup(void)
                fs->tx_port = fwd_ports_ids[txp];
                fs->tx_queue = rxq;
                fs->peer_addr = fs->tx_port;
-               fs->retry_enabled = retry_enabled;
+               fwd_stream_set_common(fs);
                rxp++;
                if (rxp < nb_fwd_ports)
                        continue;
@@ -3255,7 +3265,7 @@ dcb_fwd_config_setup(void)
                                fs->tx_port = fwd_ports_ids[txp];
                                fs->tx_queue = txq + j % nb_tx_queue;
                                fs->peer_addr = fs->tx_port;
-                               fs->retry_enabled = retry_enabled;
+                               fwd_stream_set_common(fs);
                        }
                        fwd_lcores[lc_id]->stream_nb +=
                                rxp_dcb_info.tc_queue.tc_rxq[i][tc].nb_queue;
@@ -3326,7 +3336,7 @@ icmp_echo_config_setup(void)
                        fs->tx_port = fs->rx_port;
                        fs->tx_queue = rxq;
                        fs->peer_addr = fs->tx_port;
-                       fs->retry_enabled = retry_enabled;
+                       fwd_stream_set_common(fs);
                        if (verbose_level > 0)
                                printf("  stream=%d port=%d rxq=%d txq=%d\n",
                                       sm_id, fs->rx_port, fs->rx_queue,
diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
index aa45948b4c..c282f3bcb1 100644
--- a/app/test-pmd/flowgen.c
+++ b/app/test-pmd/flowgen.c
@@ -97,12 +97,12 @@ flow_gen_stream(struct fwd_stream *fs, uint16_t nb_rx,
        if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT)
                ol_flags |= PKT_TX_MACSEC;
 
-       for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+       for (nb_pkt = 0; nb_pkt < fs->nb_pkt_per_burst; nb_pkt++) {
                if (!nb_pkt || !nb_clones) {
                        nb_clones = nb_pkt_flowgen_clones;
                        /* Logic limitation */
-                       if (nb_clones > nb_pkt_per_burst)
-                               nb_clones = nb_pkt_per_burst;
+                       if (nb_clones > fs->nb_pkt_per_burst)
+                               nb_clones = fs->nb_pkt_per_burst;
 
                        pkt = rte_mbuf_raw_alloc(mbp);
                        if (!pkt)
diff --git a/app/test-pmd/noisy_vnf.c b/app/test-pmd/noisy_vnf.c
index 382a4c2aae..56bf6a4e70 100644
--- a/app/test-pmd/noisy_vnf.c
+++ b/app/test-pmd/noisy_vnf.c
@@ -153,7 +153,7 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs)
        uint64_t now;
 
        nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue,
-                       pkts_burst, nb_pkt_per_burst);
+                       pkts_burst, fs->nb_pkt_per_burst);
        inc_rx_burst_stats(fs, nb_rx);
        if (unlikely(nb_rx == 0))
                goto flush;
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 4792bef03b..3b8796a7a5 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -128,12 +128,17 @@ struct fwd_stream {
        queueid_t  tx_queue;  /**< TX queue to send forwarded packets */
        streamid_t peer_addr; /**< index of peer ethernet address of packets */
 
-       unsigned int retry_enabled;
+       uint16_t nb_pkt_per_burst;
+       unsigned int record_burst_stats:1;
+       unsigned int record_core_cycles:1;
+       unsigned int retry_enabled:1;
+       unsigned int rxq_share:1;
 
        /* "read-write" results */
        uint64_t rx_packets;  /**< received packets */
        uint64_t tx_packets;  /**< received packets transmitted */
        uint64_t fwd_dropped; /**< received packets not forwarded */
+       uint64_t core_cycles; /**< used for RX and TX processing */
        uint64_t rx_bad_ip_csum ; /**< received packets has bad ip checksum */
        uint64_t rx_bad_l4_csum ; /**< received packets has bad l4 checksum */
        uint64_t rx_bad_outer_l4_csum;
@@ -141,7 +146,6 @@ struct fwd_stream {
        uint64_t rx_bad_outer_ip_csum;
        /**< received packets having bad outer ip checksum */
        unsigned int gro_times; /**< GRO operation times */
-       uint64_t     core_cycles; /**< used for RX and TX processing */
        struct pkt_burst_stats rx_burst_stats;
        struct pkt_burst_stats tx_burst_stats;
        struct fwd_lcore *lcore; /**< Lcore being scheduled. */
@@ -750,28 +754,27 @@ port_pci_reg_write(struct rte_port *port, uint32_t 
reg_off, uint32_t reg_v)
 static inline void
 get_start_cycles(uint64_t *start_tsc)
 {
-       if (record_core_cycles)
-               *start_tsc = rte_rdtsc();
+       *start_tsc = rte_rdtsc();
 }
 
 static inline void
 get_end_cycles(struct fwd_stream *fs, uint64_t start_tsc)
 {
-       if (record_core_cycles)
+       if (unlikely(fs->record_core_cycles))
                fs->core_cycles += rte_rdtsc() - start_tsc;
 }
 
 static inline void
 inc_rx_burst_stats(struct fwd_stream *fs, uint16_t nb_rx)
 {
-       if (record_burst_stats)
+       if (unlikely(fs->record_burst_stats))
                fs->rx_burst_stats.pkt_burst_spread[nb_rx]++;
 }
 
 static inline void
 inc_tx_burst_stats(struct fwd_stream *fs, uint16_t nb_tx)
 {
-       if (record_burst_stats)
+       if (unlikely(fs->record_burst_stats))
                fs->tx_burst_stats.pkt_burst_spread[nb_tx]++;
 }
 
@@ -1032,13 +1035,13 @@ int update_jumbo_frame_offload(portid_t portid);
 static void                                                     \
 pkt_burst_fwd(struct fwd_stream *fs)                            \
 {                                                               \
-       struct rte_mbuf *pkts_burst[nb_pkt_per_burst];          \
+       struct rte_mbuf *pkts_burst[fs->nb_pkt_per_burst];      \
        uint16_t nb_rx;                                         \
        uint64_t start_tsc = 0;                                 \
                                                                \
        get_start_cycles(&start_tsc);                           \
        nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue,     \
-                       pkts_burst, nb_pkt_per_burst);          \
+                       pkts_burst, fs->nb_pkt_per_burst);      \
        inc_rx_burst_stats(fs, nb_rx);                          \
        if (unlikely(nb_rx == 0))                               \
                return;                                         \
diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index aed820f5d3..db6130421c 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -367,8 +367,8 @@ pkt_burst_transmit(struct fwd_stream *fs)
        eth_hdr.ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
 
        if (rte_mempool_get_bulk(mbp, (void **)pkts_burst,
-                               nb_pkt_per_burst) == 0) {
-               for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+                               fs->nb_pkt_per_burst) == 0) {
+               for (nb_pkt = 0; nb_pkt < fs->nb_pkt_per_burst; nb_pkt++) {
                        if (unlikely(!pkt_burst_prepare(pkts_burst[nb_pkt], mbp,
                                                        &eth_hdr, vlan_tci,
                                                        vlan_tci_outer,
@@ -376,12 +376,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
                                                        nb_pkt, fs))) {
                                rte_mempool_put_bulk(mbp,
                                                (void **)&pkts_burst[nb_pkt],
-                                               nb_pkt_per_burst - nb_pkt);
+                                               fs->nb_pkt_per_burst - nb_pkt);
                                break;
                        }
                }
        } else {
-               for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+               for (nb_pkt = 0; nb_pkt < fs->nb_pkt_per_burst; nb_pkt++) {
                        pkt = rte_mbuf_raw_alloc(mbp);
                        if (pkt == NULL)
                                break;
-- 
2.33.0

Reply via email to