Since all current drivers supports Tx preparation API, it is used
in csum forwarding engine by default for all drivers.

Adding additional step to the csum engine costs about 3-4% of performance
drop, on my setup with ixgbe driver. It's caused mostly by the need
of reaccessing and modification of packet data.

Signed-off-by: Tomasz Kulasek <tomaszx.kula...@intel.com>
Acked-by: Konstantin Ananyev <konstantin.anan...@intel.com>
---
 app/test-pmd/csumonly.c |   37 ++++++++++++++++---------------------
 app/test-pmd/testpmd.c  |    5 +++++
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 57e6ae2..806f957 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -112,15 +112,6 @@ struct simple_gre_hdr {
 } __attribute__((__packed__));
 
 static uint16_t
-get_psd_sum(void *l3_hdr, uint16_t ethertype, uint64_t ol_flags)
-{
-       if (ethertype == _htons(ETHER_TYPE_IPv4))
-               return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
-       else /* assume ethertype == ETHER_TYPE_IPv6 */
-               return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
-}
-
-static uint16_t
 get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype)
 {
        if (ethertype == _htons(ETHER_TYPE_IPv4))
@@ -370,11 +361,9 @@ struct simple_gre_hdr {
                /* do not recalculate udp cksum if it was 0 */
                if (udp_hdr->dgram_cksum != 0) {
                        udp_hdr->dgram_cksum = 0;
-                       if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) {
+                       if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM)
                                ol_flags |= PKT_TX_UDP_CKSUM;
-                               udp_hdr->dgram_cksum = get_psd_sum(l3_hdr,
-                                       info->ethertype, ol_flags);
-                       } else {
+                       else {
                                udp_hdr->dgram_cksum =
                                        get_udptcp_checksum(l3_hdr, udp_hdr,
                                                info->ethertype);
@@ -383,15 +372,11 @@ struct simple_gre_hdr {
        } else if (info->l4_proto == IPPROTO_TCP) {
                tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + info->l3_len);
                tcp_hdr->cksum = 0;
-               if (tso_segsz) {
+               if (tso_segsz)
                        ol_flags |= PKT_TX_TCP_SEG;
-                       tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-                               ol_flags);
-               } else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) {
+               else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM)
                        ol_flags |= PKT_TX_TCP_CKSUM;
-                       tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-                               ol_flags);
-               } else {
+               else {
                        tcp_hdr->cksum =
                                get_udptcp_checksum(l3_hdr, tcp_hdr,
                                        info->ethertype);
@@ -648,6 +633,7 @@ struct simple_gre_hdr {
        void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */
        uint16_t nb_rx;
        uint16_t nb_tx;
+       uint16_t nb_prep;
        uint16_t i;
        uint64_t rx_ol_flags, tx_ol_flags;
        uint16_t testpmd_ol_flags;
@@ -857,7 +843,16 @@ struct simple_gre_hdr {
                        printf("\n");
                }
        }
-       nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx);
+
+       nb_prep = rte_eth_tx_prepare(fs->tx_port, fs->tx_queue,
+                       pkts_burst, nb_rx);
+       if (nb_prep != nb_rx)
+               printf("Preparing packet burst to transmit failed: %s\n",
+                               rte_strerror(rte_errno));
+
+       nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
+                       nb_prep);
+
        /*
         * Retry if necessary
         */
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index a0332c2..634f10b 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -180,6 +180,11 @@ struct fwd_engine * fwd_engines[] = {
 enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
 /**< Split policy for packets to TX. */
 
+/*
+ * Enable Tx preparation path in the "csum" engine.
+ */
+uint8_t tx_prepare;
+
 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
 
-- 
1.7.9.5

Reply via email to