To minimize cache miss, adds flags and burst size used in forwarding to stream, moves condition tests in forwarding to flags in stream.
Signed-off-by: Xueming Li <xuemi...@nvidia.com> --- app/test-pmd/config.c | 18 ++++++++++++++---- app/test-pmd/flowgen.c | 6 +++--- app/test-pmd/noisy_vnf.c | 2 +- app/test-pmd/testpmd.h | 21 ++++++++++++--------- app/test-pmd/txonly.c | 8 ++++---- 5 files changed, 34 insertions(+), 21 deletions(-) diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c index 035247c33f..5cdf8fa082 100644 --- a/app/test-pmd/config.c +++ b/app/test-pmd/config.c @@ -3050,6 +3050,16 @@ fwd_topology_tx_port_get(portid_t rxp) } } +static void +fwd_stream_set_common(struct fwd_stream *fs) +{ + fs->nb_pkt_per_burst = nb_pkt_per_burst; + fs->record_burst_stats = !!record_burst_stats; + fs->record_core_cycles = !!record_core_cycles; + fs->retry_enabled = !!retry_enabled; + fs->rxq_share = !!rxq_share; +} + static void simple_fwd_config_setup(void) { @@ -3079,7 +3089,7 @@ simple_fwd_config_setup(void) fwd_ports_ids[fwd_topology_tx_port_get(i)]; fwd_streams[i]->tx_queue = 0; fwd_streams[i]->peer_addr = fwd_streams[i]->tx_port; - fwd_streams[i]->retry_enabled = retry_enabled; + fwd_stream_set_common(fwd_streams[i]); } } @@ -3140,7 +3150,7 @@ rss_fwd_config_setup(void) fs->tx_port = fwd_ports_ids[txp]; fs->tx_queue = rxq; fs->peer_addr = fs->tx_port; - fs->retry_enabled = retry_enabled; + fwd_stream_set_common(fs); rxp++; if (rxp < nb_fwd_ports) continue; @@ -3255,7 +3265,7 @@ dcb_fwd_config_setup(void) fs->tx_port = fwd_ports_ids[txp]; fs->tx_queue = txq + j % nb_tx_queue; fs->peer_addr = fs->tx_port; - fs->retry_enabled = retry_enabled; + fwd_stream_set_common(fs); } fwd_lcores[lc_id]->stream_nb += rxp_dcb_info.tc_queue.tc_rxq[i][tc].nb_queue; @@ -3326,7 +3336,7 @@ icmp_echo_config_setup(void) fs->tx_port = fs->rx_port; fs->tx_queue = rxq; fs->peer_addr = fs->tx_port; - fs->retry_enabled = retry_enabled; + fwd_stream_set_common(fs); if (verbose_level > 0) printf(" stream=%d port=%d rxq=%d txq=%d\n", sm_id, fs->rx_port, fs->rx_queue, diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c index aa45948b4c..c282f3bcb1 100644 --- a/app/test-pmd/flowgen.c +++ b/app/test-pmd/flowgen.c @@ -97,12 +97,12 @@ flow_gen_stream(struct fwd_stream *fs, uint16_t nb_rx, if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT) ol_flags |= PKT_TX_MACSEC; - for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) { + for (nb_pkt = 0; nb_pkt < fs->nb_pkt_per_burst; nb_pkt++) { if (!nb_pkt || !nb_clones) { nb_clones = nb_pkt_flowgen_clones; /* Logic limitation */ - if (nb_clones > nb_pkt_per_burst) - nb_clones = nb_pkt_per_burst; + if (nb_clones > fs->nb_pkt_per_burst) + nb_clones = fs->nb_pkt_per_burst; pkt = rte_mbuf_raw_alloc(mbp); if (!pkt) diff --git a/app/test-pmd/noisy_vnf.c b/app/test-pmd/noisy_vnf.c index 382a4c2aae..56bf6a4e70 100644 --- a/app/test-pmd/noisy_vnf.c +++ b/app/test-pmd/noisy_vnf.c @@ -153,7 +153,7 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs) uint64_t now; nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, - pkts_burst, nb_pkt_per_burst); + pkts_burst, fs->nb_pkt_per_burst); inc_rx_burst_stats(fs, nb_rx); if (unlikely(nb_rx == 0)) goto flush; diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index 4792bef03b..3b8796a7a5 100644 --- a/app/test-pmd/testpmd.h +++ b/app/test-pmd/testpmd.h @@ -128,12 +128,17 @@ struct fwd_stream { queueid_t tx_queue; /**< TX queue to send forwarded packets */ streamid_t peer_addr; /**< index of peer ethernet address of packets */ - unsigned int retry_enabled; + uint16_t nb_pkt_per_burst; + unsigned int record_burst_stats:1; + unsigned int record_core_cycles:1; + unsigned int retry_enabled:1; + unsigned int rxq_share:1; /* "read-write" results */ uint64_t rx_packets; /**< received packets */ uint64_t tx_packets; /**< received packets transmitted */ uint64_t fwd_dropped; /**< received packets not forwarded */ + uint64_t core_cycles; /**< used for RX and TX processing */ uint64_t rx_bad_ip_csum ; /**< received packets has bad ip checksum */ uint64_t rx_bad_l4_csum ; /**< received packets has bad l4 checksum */ uint64_t rx_bad_outer_l4_csum; @@ -141,7 +146,6 @@ struct fwd_stream { uint64_t rx_bad_outer_ip_csum; /**< received packets having bad outer ip checksum */ unsigned int gro_times; /**< GRO operation times */ - uint64_t core_cycles; /**< used for RX and TX processing */ struct pkt_burst_stats rx_burst_stats; struct pkt_burst_stats tx_burst_stats; struct fwd_lcore *lcore; /**< Lcore being scheduled. */ @@ -750,28 +754,27 @@ port_pci_reg_write(struct rte_port *port, uint32_t reg_off, uint32_t reg_v) static inline void get_start_cycles(uint64_t *start_tsc) { - if (record_core_cycles) - *start_tsc = rte_rdtsc(); + *start_tsc = rte_rdtsc(); } static inline void get_end_cycles(struct fwd_stream *fs, uint64_t start_tsc) { - if (record_core_cycles) + if (unlikely(fs->record_core_cycles)) fs->core_cycles += rte_rdtsc() - start_tsc; } static inline void inc_rx_burst_stats(struct fwd_stream *fs, uint16_t nb_rx) { - if (record_burst_stats) + if (unlikely(fs->record_burst_stats)) fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; } static inline void inc_tx_burst_stats(struct fwd_stream *fs, uint16_t nb_tx) { - if (record_burst_stats) + if (unlikely(fs->record_burst_stats)) fs->tx_burst_stats.pkt_burst_spread[nb_tx]++; } @@ -1032,13 +1035,13 @@ int update_jumbo_frame_offload(portid_t portid); static void \ pkt_burst_fwd(struct fwd_stream *fs) \ { \ - struct rte_mbuf *pkts_burst[nb_pkt_per_burst]; \ + struct rte_mbuf *pkts_burst[fs->nb_pkt_per_burst]; \ uint16_t nb_rx; \ uint64_t start_tsc = 0; \ \ get_start_cycles(&start_tsc); \ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, \ - pkts_burst, nb_pkt_per_burst); \ + pkts_burst, fs->nb_pkt_per_burst); \ inc_rx_burst_stats(fs, nb_rx); \ if (unlikely(nb_rx == 0)) \ return; \ diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c index aed820f5d3..db6130421c 100644 --- a/app/test-pmd/txonly.c +++ b/app/test-pmd/txonly.c @@ -367,8 +367,8 @@ pkt_burst_transmit(struct fwd_stream *fs) eth_hdr.ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4); if (rte_mempool_get_bulk(mbp, (void **)pkts_burst, - nb_pkt_per_burst) == 0) { - for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) { + fs->nb_pkt_per_burst) == 0) { + for (nb_pkt = 0; nb_pkt < fs->nb_pkt_per_burst; nb_pkt++) { if (unlikely(!pkt_burst_prepare(pkts_burst[nb_pkt], mbp, ð_hdr, vlan_tci, vlan_tci_outer, @@ -376,12 +376,12 @@ pkt_burst_transmit(struct fwd_stream *fs) nb_pkt, fs))) { rte_mempool_put_bulk(mbp, (void **)&pkts_burst[nb_pkt], - nb_pkt_per_burst - nb_pkt); + fs->nb_pkt_per_burst - nb_pkt); break; } } } else { - for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) { + for (nb_pkt = 0; nb_pkt < fs->nb_pkt_per_burst; nb_pkt++) { pkt = rte_mbuf_raw_alloc(mbp); if (pkt == NULL) break; -- 2.33.0