Implement TSO (TCP segmentation offload) in ixgbe driver. To delegate
the TCP segmentation to the hardware, the user has to:

- set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
  PKT_TX_IP_CKSUM and PKT_TX_TCP_CKSUM)
- fill the mbuf->hw_offload information: l2_len, l3_len, l4_len, mss
- calculate the pseudo header checksum and set it in the TCP header,
  as required when doing hardware TCP checksum offload
- set the IP checksum to 0

This approach seems generic enough to be used for other hw/drivers
in the future.

In the patch, the tx_desc_cksum_flags_to_olinfo() and
tx_desc_ol_flags_to_cmdtype() functions have been reworked to make them
clearer. This does not impact performance as gcc (version 4.8 in my
case) is smart enough to convert the tests into a code that does not
contain any branch instruction.

validation
==========

platform:

  Tester (linux)   <---->   DUT (DPDK)

Run testpmd on DUT:

  cd dpdk.org/
  make install T=x86_64-default-linuxapp-gcc
  cd x86_64-default-linuxapp-gcc/
  modprobe uio
  insmod kmod/igb_uio.ko
  python ../tools/igb_uio_bind.py -b igb_uio 0000:02:00.0
  echo 0 > /proc/sys/kernel/randomize_va_space
  echo 1000 > 
/sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
  echo 1000 > 
/sys/devices/system/node/node1/hugepages/hugepages-2048kB/nr_hugepages
  mount -t hugetlbfs none /mnt/huge
  ./app/testpmd -c 0x55 -n 4 -m 800 -- -i --port-topology=chained

Disable all offload feature on Tester, and start capture:

  ethtool -K ixgbe0 rx off tx off tso off gso off gro off lro off
  ip l set ixgbe0 up
  tcpdump -n -e -i ixgbe0 -s 0 -w /tmp/cap

We use the following scapy script for testing:

  def test():
    ############### IPv4
    # checksum TCP
    p=Ether()/IP(src=RandIP(), dst=RandIP())/TCP(flags=0x10)/Raw(RandString(50))
    sendp(p, iface="ixgbe0", count=5)
    # checksum UDP
    p=Ether()/IP(src=RandIP(), dst=RandIP())/UDP()/Raw(RandString(50))
    sendp(p, iface="ixgbe0", count=5)
    # bad IP checksum
    p=Ether()/IP(src=RandIP(), dst=RandIP(), 
chksum=0x1234)/TCP(flags=0x10)/Raw(RandString(50))
    sendp(p, iface="ixgbe0", count=5)
    # bad TCP checksum
    p=Ether()/IP(src=RandIP(), dst=RandIP())/TCP(flags=0x10, 
chksum=0x1234)/Raw(RandString(50))
    sendp(p, iface="ixgbe0", count=5)
    # large packet
    p=Ether()/IP(src=RandIP(), 
dst=RandIP())/TCP(flags=0x10)/Raw(RandString(1400))
    sendp(p, iface="ixgbe0", count=5)
    ############### IPv6v6
    # checksum TCP
    p=Ether()/IPv6(src=RandIP6(), 
dst=RandIP6())/TCP(flags=0x10)/Raw(RandString(50))
    sendp(p, iface="ixgbe0", count=5)
    # checksum UDP
    p=Ether()/IPv6(src=RandIP6(), dst=RandIP6())/UDP()/Raw(RandString(50))
    sendp(p, iface="ixgbe0", count=5)
    # bad TCP checksum
    p=Ether()/IPv6(src=RandIP6(), dst=RandIP6())/TCP(flags=0x10, 
chksum=0x1234)/Raw(RandString(50))
    sendp(p, iface="ixgbe0", count=5)
    # large packet
    p=Ether()/IPv6(src=RandIP6(), 
dst=RandIP6())/TCP(flags=0x10)/Raw(RandString(1400))
    sendp(p, iface="ixgbe0", count=5)

Without hw cksum
----------------

On DUT:

  # disable hw cksum (use sw) in csumonly test, disable tso
  stop
  set fwd csum
  tx_checksum set 0x0 0
  tso set 0 0
  start

On tester:

  >>> test()

Then check the capture file.

With hw cksum
-------------

On DUT:

  # enable hw cksum in csumonly test, disable tso
  stop
  set fwd csum
  tx_checksum set 0xf 0
  tso set 0 0
  start

On tester:

  >>> test()

Then check the capture file.

With TSO
--------

On DUT:

  set fwd csum
  tx_checksum set 0xf 0
  tso set 800 0
  start

On tester:

  >>> test()

Then check the capture file.

Signed-off-by: Olivier Matz <olivier.matz at 6wind.com>
---
 app/test-pmd/cmdline.c            |  45 +++++++++++
 app/test-pmd/config.c             |   8 ++
 app/test-pmd/csumonly.c           |  16 ++++
 app/test-pmd/testpmd.h            |   2 +
 lib/librte_mbuf/rte_mbuf.h        |   7 +-
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 165 ++++++++++++++++++++++++++++----------
 6 files changed, 200 insertions(+), 43 deletions(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index a95b279..c628773 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -2305,6 +2305,50 @@ cmdline_parse_inst_t cmd_tx_cksum_set = {
        },
 };

+/* *** ENABLE HARDWARE SEGMENTATION IN TX PACKETS *** */
+struct cmd_tso_set_result {
+       cmdline_fixed_string_t tso;
+       cmdline_fixed_string_t set;
+       uint16_t mss;
+       uint8_t port_id;
+};
+
+static void
+cmd_tso_set_parsed(void *parsed_result,
+                      __attribute__((unused)) struct cmdline *cl,
+                      __attribute__((unused)) void *data)
+{
+       struct cmd_tso_set_result *res = parsed_result;
+       tso_set(res->port_id, res->mss);
+}
+
+cmdline_parse_token_string_t cmd_tso_set_tso =
+       TOKEN_STRING_INITIALIZER(struct cmd_tso_set_result,
+                               tso, "tso");
+cmdline_parse_token_string_t cmd_tso_set_set =
+       TOKEN_STRING_INITIALIZER(struct cmd_tso_set_result,
+                               set, "set");
+cmdline_parse_token_num_t cmd_tso_set_mss =
+       TOKEN_NUM_INITIALIZER(struct cmd_tso_set_result,
+                               mss, UINT16);
+cmdline_parse_token_num_t cmd_tso_set_portid =
+       TOKEN_NUM_INITIALIZER(struct cmd_tso_set_result,
+                               port_id, UINT8);
+
+cmdline_parse_inst_t cmd_tso_set = {
+       .f = cmd_tso_set_parsed,
+       .data = NULL,
+       .help_str = "Enable hardware segmentation (set MSS to 0 to disable): "
+       "tso set <MSS> <PORT>",
+       .tokens = {
+               (void *)&cmd_tso_set_tso,
+               (void *)&cmd_tso_set_set,
+               (void *)&cmd_tso_set_mss,
+               (void *)&cmd_tso_set_portid,
+               NULL,
+       },
+};
+
 /* *** ENABLE/DISABLE FLUSH ON RX STREAMS *** */
 struct cmd_set_flush_rx {
        cmdline_fixed_string_t set;
@@ -5151,6 +5195,7 @@ cmdline_parse_ctx_t main_ctx[] = {
        (cmdline_parse_inst_t *)&cmd_tx_vlan_set,
        (cmdline_parse_inst_t *)&cmd_tx_vlan_reset,
        (cmdline_parse_inst_t *)&cmd_tx_cksum_set,
+       (cmdline_parse_inst_t *)&cmd_tso_set,
        (cmdline_parse_inst_t *)&cmd_link_flow_control_set,
        (cmdline_parse_inst_t *)&cmd_priority_flow_control_set,
        (cmdline_parse_inst_t *)&cmd_config_dcb,
diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index cd82f60..a6d749d 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -1455,6 +1455,14 @@ tx_cksum_set(portid_t port_id, uint32_t ol_flags)
 }

 void
+tso_set(portid_t port_id, uint16_t mss)
+{
+       if (port_id_is_invalid(port_id))
+               return;
+       ports[port_id].tx_mss = mss;
+}
+
+void
 fdir_add_signature_filter(portid_t port_id, uint8_t queue_id,
                          struct rte_fdir_filter *fdir_filter)
 {
diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index e93d75f..9983618 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -220,10 +220,12 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
        uint32_t ol_flags;
        uint32_t pkt_ol_flags;
        uint32_t tx_ol_flags;
+       uint16_t tx_mss;
        uint16_t l4_proto;
        uint16_t eth_type;
        uint8_t  l2_len;
        uint8_t  l3_len;
+       uint8_t  l4_len;

        uint32_t rx_bad_ip_csum;
        uint32_t rx_bad_l4_csum;
@@ -255,6 +257,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)

        txp = &ports[fs->tx_port];
        tx_ol_flags = txp->tx_ol_flags;
+       tx_mss = txp->tx_mss;

        for (i = 0; i < nb_rx; i++) {

@@ -272,6 +275,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
                                ((uintptr_t)&eth_hdr->ether_type +
                                sizeof(struct vlan_hdr)));
                }
+               l4_len  = 0;

                /* Update the L3/L4 checksum error packet count  */
                rx_bad_ip_csum += ((pkt_ol_flags & PKT_RX_IP_CKSUM_BAD) != 0);
@@ -347,6 +351,11 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
                                        tcp_hdr->cksum = 
get_ipv4_udptcp_checksum(ipv4_hdr,
                                                        (uint16_t*)tcp_hdr);
                                }
+
+                               if (tx_mss != 0) {
+                                       ol_flags |= PKT_TX_TCP_SEG;
+                                       l4_len = (tcp_hdr->data_off & 0xf0) >> 
2;
+                               }
                        }
                        else if (l4_proto == IPPROTO_SCTP) {
                                sctp_hdr = (struct sctp_hdr*) 
(rte_pktmbuf_mtod(mb,
@@ -404,6 +413,11 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
                                        tcp_hdr->cksum = 
get_ipv6_udptcp_checksum(ipv6_hdr,
                                                        (uint16_t*)tcp_hdr);
                                }
+
+                               if (tx_mss != 0) {
+                                       ol_flags |= PKT_TX_TCP_SEG;
+                                       l4_len = (tcp_hdr->data_off & 0xf0) >> 
2;
+                               }
                        }
                        else if (l4_proto == IPPROTO_SCTP) {
                                sctp_hdr = (struct sctp_hdr*) 
(rte_pktmbuf_mtod(mb,
@@ -434,6 +448,8 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
                /* Combine the packet header write. VLAN is not consider here */
                mb->hw_offload.l2_len = l2_len;
                mb->hw_offload.l3_len = l3_len;
+               mb->hw_offload.l4_len = l4_len;
+               mb->hw_offload.mss = tx_mss;
                mb->ol_flags = ol_flags;
        }
        nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx);
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 77dcc30..6f567e7 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -134,6 +134,7 @@ struct rte_port {
        struct fwd_stream       *tx_stream; /**< Port TX stream, if unique */
        unsigned int            socket_id;  /**< For NUMA support */
        uint32_t                tx_ol_flags;/**< Offload Flags of TX packets. */
+       uint16_t                tx_mss;     /**< MSS for segmentation offload. 
*/
        uint16_t                tx_vlan_id; /**< Tag Id. in TX VLAN packets. */
        void                    *fwd_ctx;   /**< Forwarding mode context */
        uint64_t                rx_bad_ip_csum; /**< rx pkts with bad ip 
checksum  */
@@ -480,6 +481,7 @@ void tx_vlan_reset(portid_t port_id);
 void set_qmap(portid_t port_id, uint8_t is_rx, uint16_t queue_id, uint8_t 
map_value);

 void tx_cksum_set(portid_t port_id, uint32_t ol_flags);
+void tso_set(portid_t port_id, uint16_t mss);

 void set_verbose_level(uint16_t vb_level);
 void set_tx_pkt_segments(unsigned *seg_lengths, unsigned nb_segs);
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index d71c86c..75298bd 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -96,6 +96,7 @@ extern "C" {
 #define PKT_TX_SCTP_CKSUM    0x00080000 /**< SCTP cksum of TX pkt. computed by 
NIC. */
 #define PKT_TX_UDP_CKSUM     0x000C0000 /**< UDP cksum of TX pkt. computed by 
NIC. */
 #define PKT_TX_IEEE1588_TMST 0x00100000 /**< TX IEEE1588 packet to timestamp. 
*/
+#define PKT_TX_TCP_SEG       0x00200000 /**< TCP segmentation offload. */

 /**
  * Get the name of a RX offload flag
@@ -140,6 +141,7 @@ static inline const char *rte_get_tx_ol_flag_name(uint32_t 
mask)
        case PKT_TX_SCTP_CKSUM: return "PKT_TX_SCTP_CKSUM";
        case PKT_TX_UDP_CKSUM: return "PKT_TX_UDP_CKSUM";
        case PKT_TX_IEEE1588_TMST: return "PKT_TX_IEEE1588_TMST";
+       case PKT_TX_TCP_SEG: return "PKT_TX_TCP_SEG";
        default: return NULL;
        }
 }
@@ -153,11 +155,12 @@ union rte_hw_offload {
 #define HW_OFFLOAD_L4_LEN_MASK 0xff
                uint32_t l2_len:7; /**< L2 (MAC) Header Length. */
                uint32_t l3_len:9; /**< L3 (IP) Header Length. */
-               uint32_t reserved:16;
+               uint32_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
+               uint32_t reserved:8;

                uint16_t vlan_tci;
                /**< VLAN Tag Control Identifier (CPU order). */
-               uint16_t reserved2;
+               uint16_t mss; /**< Maximum segment size. */
        };
 };

diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c 
b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index d52482e..75ff16e 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -347,13 +347,59 @@ ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf 
**tx_pkts,
        return nb_tx;
 }

+/* When doing TSO, the IP length must not be included in the pseudo
+ * header checksum of the packet given to the hardware */
+static inline void
+ixgbe_fix_tcp_phdr_cksum(struct rte_mbuf *m)
+{
+       char *data;
+       uint16_t *cksum_ptr;
+       uint16_t prev_cksum;
+       uint16_t new_cksum;
+       uint16_t ip_len, ip_paylen;
+       uint32_t tmp;
+       uint8_t ip_version;
+
+       /* get phdr cksum at offset 16 of TCP header */
+       data = rte_pktmbuf_mtod(m, char *);
+       cksum_ptr = (uint16_t *)(data + m->hw_offload.l2_len +
+               m->hw_offload.l3_len + 16);
+       prev_cksum = *cksum_ptr;
+
+       /* get ip_version */
+       ip_version = (*(uint8_t *)(data + m->hw_offload.l2_len)) >> 4;
+
+       /* get ip_len at offset 2 of IP header or offset 4 of IPv6 header */
+       if (ip_version == 4) {
+               /* override ip cksum to 0 */
+               data[m->hw_offload.l2_len + 10] = 0;
+               data[m->hw_offload.l2_len + 11] = 0;
+
+               ip_len = *(uint16_t *)(data + m->hw_offload.l2_len + 2);
+               ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
+                       m->hw_offload.l3_len);
+       } else {
+               ip_paylen = *(uint16_t *)(data + m->hw_offload.l2_len + 4);
+       }
+
+       /* calculate the new phdr checksum that doesn't include ip_paylen */
+       tmp = prev_cksum ^ 0xffff;
+       if (tmp < ip_paylen)
+               tmp += 0xffff;
+       tmp -= ip_paylen;
+       new_cksum = tmp;
+
+       /* replace it in the packet */
+       *cksum_ptr = new_cksum;
+}
+
 static inline void
 ixgbe_set_xmit_ctx(struct igb_tx_queue* txq,
                volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
                uint32_t ol_flags, union rte_hw_offload hw_offload)
 {
        uint32_t type_tucmd_mlhl;
-       uint32_t mss_l4len_idx;
+       uint32_t mss_l4len_idx = 0;
        uint32_t ctx_idx;
        uint32_t vlan_macip_lens;
        union rte_hw_offload offload_mask;
@@ -362,44 +408,61 @@ ixgbe_set_xmit_ctx(struct igb_tx_queue* txq,
        offload_mask.u64 = 0;
        type_tucmd_mlhl = 0;

+       /* Specify which HW CTX to upload. */
+       mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
+
        if (ol_flags & PKT_TX_VLAN_PKT) {
                offload_mask.vlan_tci = 0xffff;
        }

-       if (ol_flags & PKT_TX_IP_CKSUM) {
-               type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
+       /* check if TCP segmentation required for this packet */
+       if (ol_flags & PKT_TX_TCP_SEG) {
+               /* implies IP cksum and TCP cksum */
+               type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
+                       IXGBE_ADVTXD_TUCMD_L4T_TCP |
+                       IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;;
+
                offload_mask.l2_len = HW_OFFLOAD_L2_LEN_MASK;
                offload_mask.l3_len = HW_OFFLOAD_L3_LEN_MASK;
-       }
+               offload_mask.l4_len = HW_OFFLOAD_L4_LEN_MASK;
+               offload_mask.mss = 0xffff;
+               mss_l4len_idx |= hw_offload.mss << IXGBE_ADVTXD_MSS_SHIFT;
+               mss_l4len_idx |= hw_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
+       } else { /* no TSO, check if hardware checksum is needed */
+               if (ol_flags & PKT_TX_IP_CKSUM) {
+                       type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
+                       offload_mask.l2_len = HW_OFFLOAD_L2_LEN_MASK;
+                       offload_mask.l3_len = HW_OFFLOAD_L3_LEN_MASK;
+               }

-       /* Specify which HW CTX to upload. */
-       mss_l4len_idx = (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
-       switch (ol_flags & PKT_TX_L4_MASK) {
-       case PKT_TX_UDP_CKSUM:
-               type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
+               switch (ol_flags & PKT_TX_L4_MASK) {
+               case PKT_TX_UDP_CKSUM:
+                       type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
                                IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
-               mss_l4len_idx |= sizeof(struct udp_hdr) << 
IXGBE_ADVTXD_L4LEN_SHIFT;
-               offload_mask.l2_len = HW_OFFLOAD_L2_LEN_MASK;
-               offload_mask.l3_len = HW_OFFLOAD_L3_LEN_MASK;
-               break;
-       case PKT_TX_TCP_CKSUM:
-               type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
+                       mss_l4len_idx |= sizeof(struct udp_hdr) << 
IXGBE_ADVTXD_L4LEN_SHIFT;
+                       offload_mask.l2_len = HW_OFFLOAD_L2_LEN_MASK;
+                       offload_mask.l3_len = HW_OFFLOAD_L3_LEN_MASK;
+                       break;
+               case PKT_TX_TCP_CKSUM:
+                       type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
                                IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
-               mss_l4len_idx |= sizeof(struct tcp_hdr) << 
IXGBE_ADVTXD_L4LEN_SHIFT;
-               offload_mask.l2_len = HW_OFFLOAD_L2_LEN_MASK;
-               offload_mask.l3_len = HW_OFFLOAD_L3_LEN_MASK;
-               break;
-       case PKT_TX_SCTP_CKSUM:
-               type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
+                       mss_l4len_idx |= sizeof(struct tcp_hdr) << 
IXGBE_ADVTXD_L4LEN_SHIFT;
+                       offload_mask.l2_len = HW_OFFLOAD_L2_LEN_MASK;
+                       offload_mask.l3_len = HW_OFFLOAD_L3_LEN_MASK;
+                       offload_mask.l4_len = HW_OFFLOAD_L4_LEN_MASK;
+                       break;
+               case PKT_TX_SCTP_CKSUM:
+                       type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
                                IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
-               mss_l4len_idx |= sizeof(struct sctp_hdr) << 
IXGBE_ADVTXD_L4LEN_SHIFT;
-               offload_mask.l2_len = HW_OFFLOAD_L2_LEN_MASK;
-               offload_mask.l3_len = HW_OFFLOAD_L3_LEN_MASK;
-               break;
-       default:
-               type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
+                       mss_l4len_idx |= sizeof(struct sctp_hdr) << 
IXGBE_ADVTXD_L4LEN_SHIFT;
+                       offload_mask.l2_len = HW_OFFLOAD_L2_LEN_MASK;
+                       offload_mask.l3_len = HW_OFFLOAD_L3_LEN_MASK;
+                       break;
+               default:
+                       type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
                                IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
-               break;
+                       break;
+               }
        }

        txq->ctx_cache[ctx_idx].flags = ol_flags;
@@ -446,20 +509,25 @@ what_advctx_update(struct igb_tx_queue *txq, uint32_t 
flags,
 static inline uint32_t
 tx_desc_cksum_flags_to_olinfo(uint32_t ol_flags)
 {
-       static const uint32_t l4_olinfo[2] = {0, IXGBE_ADVTXD_POPTS_TXSM};
-       static const uint32_t l3_olinfo[2] = {0, IXGBE_ADVTXD_POPTS_IXSM};
-       uint32_t tmp;
-
-       tmp  = l4_olinfo[(ol_flags & PKT_TX_L4_MASK)  != PKT_TX_L4_NO_CKSUM];
-       tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
+       uint32_t tmp = 0;
+       if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
+               tmp |= IXGBE_ADVTXD_POPTS_TXSM;
+       if (ol_flags & PKT_TX_IP_CKSUM)
+               tmp |= IXGBE_ADVTXD_POPTS_IXSM;
+       if (ol_flags & PKT_TX_TCP_SEG)
+               tmp |= IXGBE_ADVTXD_POPTS_TXSM | IXGBE_ADVTXD_POPTS_IXSM;
        return tmp;
 }

 static inline uint32_t
-tx_desc_vlan_flags_to_cmdtype(uint32_t ol_flags)
+tx_desc_ol_flags_to_cmdtype(uint32_t ol_flags)
 {
-       static const uint32_t vlan_cmd[2] = {0, IXGBE_ADVTXD_DCMD_VLE};
-       return vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
+       uint32_t cmdtype = 0;
+       if (ol_flags & PKT_TX_VLAN_PKT)
+               cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
+       if (ol_flags & PKT_TX_TCP_SEG)
+               cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
+       return cmdtype;
 }

 /* Default RS bit threshold values */
@@ -583,7 +651,8 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,

                /* If hardware offload required */
                tx_ol_req = ol_flags &
-                       (PKT_TX_VLAN_PKT | PKT_TX_IP_CKSUM | PKT_TX_L4_MASK);
+                       (PKT_TX_VLAN_PKT | PKT_TX_IP_CKSUM | PKT_TX_L4_MASK |
+                       PKT_TX_TCP_SEG);
                if (tx_ol_req) {
                        /* If new context need be built or reuse the exist ctx. 
*/
                        ctx = what_advctx_update(txq, tx_ol_req,
@@ -702,7 +771,20 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
                 */
                cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
                        IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
+
+               if (ol_flags & PKT_TX_TCP_SEG) {
+                       /* paylen in descriptor is the not the packet
+                        * len bu the tcp payload len if TSO in on */
+                       pkt_len -= (hw_offload.l2_len + hw_offload.l3_len +
+                               hw_offload.l4_len);
+
+                       /* the pseudo header checksum must be modified:
+                        * it should not include the ip_len */
+                       ixgbe_fix_tcp_phdr_cksum(tx_pkt);
+               }
+
                olinfo_status = (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
+
 #ifdef RTE_LIBRTE_IEEE1588
                if (ol_flags & PKT_TX_IEEE1588_TMST)
                        cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
@@ -741,7 +823,7 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
                         * This path will go through
                         * whatever new/reuse the context descriptor
                         */
-                       cmd_type_len  |= 
tx_desc_vlan_flags_to_cmdtype(ol_flags);
+                       cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
                        olinfo_status |= 
tx_desc_cksum_flags_to_olinfo(ol_flags);
                        olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
                }
@@ -3420,9 +3502,10 @@ ixgbe_dev_tx_init(struct rte_eth_dev *dev)
        PMD_INIT_FUNC_TRACE();
        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);

-       /* Enable TX CRC (checksum offload requirement) */
+       /* Enable TX CRC (checksum offload requirement) and hw padding
+        * (TSO requirement) */
        hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
-       hlreg0 |= IXGBE_HLREG0_TXCRCEN;
+       hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
        IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);

        /* Setup the Base and Length of the Tx Descriptor Rings */
-- 
1.9.2

Reply via email to