From: Pavan Nikhilesh <pbhagavat...@marvell.com> Optimize testpmd txonly mode by 1. Moving per packet ethernet header copy above the loop. 2. Use bulk ops for allocating segments instead of having a inner loop for every segment.
Also, move the packet prepare logic into a separate function so that it can be reused later. Signed-off-by: Pavan Nikhilesh <pbhagavat...@marvell.com> --- v3 Changes: - Split the patches for easier review. (Thomas) - Remove unnecessary assignments to 0. (Bernard) v2 Changes: - Use bulk ops for fetching segments. (Andrew Rybchenko) - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko) - Fix mbufs not being freed when there is no more mbufs available for segments. (Andrew Rybchenko) app/test-pmd/txonly.c | 140 +++++++++++++++++++++++------------------- 1 file changed, 76 insertions(+), 64 deletions(-) diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c index 1f08b6ed3..5610e0e42 100644 --- a/app/test-pmd/txonly.c +++ b/app/test-pmd/txonly.c @@ -147,6 +147,62 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr *ip_hdr, ip_hdr->hdr_checksum = (uint16_t) ip_cksum; } +static inline bool +pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp, + struct ether_hdr *eth_hdr, const uint16_t vlan_tci, + const uint16_t vlan_tci_outer, const uint64_t ol_flags) +{ + struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT]; + struct rte_mbuf *pkt_seg; + uint32_t nb_segs, pkt_len; + uint8_t i; + + if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND)) + nb_segs = random() % tx_pkt_nb_segs + 1; + else + nb_segs = tx_pkt_nb_segs; + + if (nb_segs > 1) { + if (rte_mempool_get_bulk(mbp, (void **)pkt_segs, nb_segs)) + return false; + } + + rte_pktmbuf_reset_headroom(pkt); + pkt->data_len = tx_pkt_seg_lengths[0]; + pkt->ol_flags = ol_flags; + pkt->vlan_tci = vlan_tci; + pkt->vlan_tci_outer = vlan_tci_outer; + pkt->l2_len = sizeof(struct ether_hdr); + pkt->l3_len = sizeof(struct ipv4_hdr); + + pkt_seg = pkt; + for (i = 1; i < nb_segs; i++) { + pkt_seg->next = pkt_segs[i - 1]; + pkt_seg = pkt_seg->next; + pkt_seg->data_len = tx_pkt_seg_lengths[i]; + pkt_len += pkt_seg->data_len; + } + pkt_seg->next = NULL; /* Last segment of packet. */ + /* + * Copy headers in first packet segment(s). + */ + copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0); + copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt, + sizeof(struct ether_hdr)); + copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt, + sizeof(struct ether_hdr) + + sizeof(struct ipv4_hdr)); + + /* + * Complete first mbuf of packet and append it to the + * burst of packets to be transmitted. + */ + pkt->nb_segs = nb_segs; + pkt->pkt_len += pkt_len; + + return true; +} + /* * Transmit a burst of multi-segments packets. */ @@ -154,9 +210,8 @@ static void pkt_burst_transmit(struct fwd_stream *fs) { struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; - struct rte_port *txp; struct rte_mbuf *pkt; - struct rte_mbuf *pkt_seg; + struct rte_port *txp; struct rte_mempool *mbp; struct ether_hdr eth_hdr; uint16_t nb_tx; @@ -164,14 +219,12 @@ pkt_burst_transmit(struct fwd_stream *fs) uint16_t vlan_tci, vlan_tci_outer; uint32_t retry; uint64_t ol_flags = 0; - uint8_t i; uint64_t tx_offloads; #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES uint64_t start_tsc; uint64_t end_tsc; uint64_t core_cycles; #endif - uint32_t nb_segs, pkt_len; #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES start_tsc = rte_rdtsc(); @@ -188,72 +241,31 @@ pkt_burst_transmit(struct fwd_stream *fs) ol_flags |= PKT_TX_QINQ_PKT; if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT) ol_flags |= PKT_TX_MACSEC; + + /* + * Initialize Ethernet header. + */ + ether_addr_copy(&peer_eth_addrs[fs->peer_addr], ð_hdr.d_addr); + ether_addr_copy(&ports[fs->tx_port].eth_addr, ð_hdr.s_addr); + eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4); + for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) { pkt = rte_mbuf_raw_alloc(mbp); - if (pkt == NULL) { - nomore_mbuf: - if (nb_pkt == 0) - return; + if (pkt == NULL) + break; + if (unlikely(!pkt_burst_prepare(pkt, mbp, + ð_hdr, vlan_tci, + vlan_tci_outer, + ol_flags))) { + rte_mempool_put(mbp, pkt); break; } - - /* - * Using raw alloc is good to improve performance, - * but some consumers may use the headroom and so - * decrement data_off. We need to make sure it is - * reset to default value. - */ - rte_pktmbuf_reset_headroom(pkt); - pkt->data_len = tx_pkt_seg_lengths[0]; - pkt_seg = pkt; - if (tx_pkt_split == TX_PKT_SPLIT_RND) - nb_segs = random() % tx_pkt_nb_segs + 1; - else - nb_segs = tx_pkt_nb_segs; - pkt_len = pkt->data_len; - for (i = 1; i < nb_segs; i++) { - pkt_seg->next = rte_mbuf_raw_alloc(mbp); - if (pkt_seg->next == NULL) { - pkt->nb_segs = i; - rte_pktmbuf_free(pkt); - goto nomore_mbuf; - } - pkt_seg = pkt_seg->next; - pkt_seg->data_len = tx_pkt_seg_lengths[i]; - pkt_len += pkt_seg->data_len; - } - pkt_seg->next = NULL; /* Last segment of packet. */ - - /* - * Initialize Ethernet header. - */ - ether_addr_copy(&peer_eth_addrs[fs->peer_addr],ð_hdr.d_addr); - ether_addr_copy(&ports[fs->tx_port].eth_addr, ð_hdr.s_addr); - eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4); - - /* - * Copy headers in first packet segment(s). - */ - copy_buf_to_pkt(ð_hdr, sizeof(eth_hdr), pkt, 0); - copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt, - sizeof(struct ether_hdr)); - copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt, - sizeof(struct ether_hdr) + - sizeof(struct ipv4_hdr)); - - /* - * Complete first mbuf of packet and append it to the - * burst of packets to be transmitted. - */ - pkt->nb_segs = nb_segs; - pkt->pkt_len = pkt_len; - pkt->ol_flags = ol_flags; - pkt->vlan_tci = vlan_tci; - pkt->vlan_tci_outer = vlan_tci_outer; - pkt->l2_len = sizeof(struct ether_hdr); - pkt->l3_len = sizeof(struct ipv4_hdr); pkts_burst[nb_pkt] = pkt; } + + if (nb_pkt == 0) + return; + nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt); /* * Retry if necessary -- 2.20.1