[dpdk-dev] [PATCH] ixgbe: fix a problem with NIC TSO offload
From: Miroslaw Walukiewicz The patch fixes a minor issue with setting up of TSO feature for ixgbe NICs. The values for l4_len and tso_segsz was chagned first by txoffload mask and next set up in the NIC descriptor. Signed-off-by: Mirek Walukiewicz --- lib/librte_pmd_ixgbe/ixgbe_rxtx.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c index 8559ef6..c9c3104 100644 --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c @@ -390,13 +390,13 @@ ixgbe_set_xmit_ctx(struct igb_tx_queue* txq, type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 | IXGBE_ADVTXD_TUCMD_L4T_TCP | IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT; + mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT; + mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT; tx_offload_mask.l2_len = ~0; tx_offload_mask.l3_len = ~0; tx_offload_mask.l4_len = ~0; tx_offload_mask.tso_segsz = ~0; - mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT; - mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT; } else { /* no TSO, check if hardware checksum is needed */ if (ol_flags & PKT_TX_IP_CKSUM) { type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
[dpdk-dev] [PATCH] dpdk: fix a crash during rte_table_hash_key16_ext overload
From: Miroslaw Walukiewicz The hash_key16_ext table allocates a cache entries to support table overload cases. The crash can occur when cache entry is free after use. The problem is with computing the index of the free cache entry. The patch fixes a problem. Signed-off-by: Mirek Walukiewicz --- lib/librte_table/rte_table_hash_key16.c |5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/librte_table/rte_table_hash_key16.c b/lib/librte_table/rte_table_hash_key16.c index ee5f639..e0c99bd 100644 --- a/lib/librte_table/rte_table_hash_key16.c +++ b/lib/librte_table/rte_table_hash_key16.c @@ -535,9 +535,8 @@ rte_table_hash_entry_delete_key16_ext( memset(bucket, 0, sizeof(struct rte_bucket_4_16)); - bucket_index = (bucket - - ((struct rte_bucket_4_16 *) - f->memory)) - f->n_buckets; + bucket_index = (((uint8_t *)bucket - + (uint8_t *)f->memory)/f->bucket_size) - f->n_buckets; f->stack[f->stack_pos++] = bucket_index; }
[dpdk-dev] [PATCH 1/3] pmd: add new flag to indicate TX TSO operation on the packet
From: Miroslaw Walukiewicz Transmission of TCP packets could be accelerated by HW Transmit Segmentation Offload. With TSO packets up to 64K could be transmismitted. When this flag is set the PMD drived will enable TCP segmentation. The new field tso_segsz is added to indicate how long is TCP TSO segment. Signed-off-by: Mirek Walukiewicz --- lib/librte_mbuf/rte_mbuf.h |5 + 1 file changed, 5 insertions(+) diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h index ddadc21..63cbc36 100644 --- a/lib/librte_mbuf/rte_mbuf.h +++ b/lib/librte_mbuf/rte_mbuf.h @@ -117,6 +117,9 @@ extern "C" { /* Use final bit of flags to indicate a control mbuf */ #define CTRL_MBUF_FLAG (1ULL << 63) /**< Mbuf contains control data */ +/* Bit 50 - TSO (TCP Transmit Segmenation Offload) */ +#define PKT_TX_TCP_TSO (1ULL << 50) /**< Mbuf needs TSO enabling */ + /** * Bit Mask to indicate what bits required for building TX context */ @@ -196,6 +199,8 @@ struct rte_mbuf { uint16_t l2_len:7; /**< L2 (MAC) Header Length. */ }; }; + /* field to support TSO segment size */ + uint32_t tso_segsz; } __rte_cache_aligned; /**
[dpdk-dev] [PATCH 3/3] pmd i40e: Enable Transmit Segmentation Offload for TCP traffic
From: Miroslaw Walukiewicz The patch enables the TSO HW feature for i40e PMD driver. The feature is reported by rte_dev_info_get() if enabled. Signed-off-by: Mirek Walukiewicz --- lib/librte_pmd_i40e/i40e_ethdev.c |1 + lib/librte_pmd_i40e/i40e_rxtx.c | 56 ++--- 2 files changed, 53 insertions(+), 4 deletions(-) diff --git a/lib/librte_pmd_i40e/i40e_ethdev.c b/lib/librte_pmd_i40e/i40e_ethdev.c index 46c43a7..01b21eb 100644 --- a/lib/librte_pmd_i40e/i40e_ethdev.c +++ b/lib/librte_pmd_i40e/i40e_ethdev.c @@ -1399,6 +1399,7 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM | DEV_TX_OFFLOAD_TCP_CKSUM | + DEV_TX_OFFLOAD_TCP_TSO | DEV_TX_OFFLOAD_SCTP_CKSUM; dev_info->default_rxconf = (struct rte_eth_rxconf) { diff --git a/lib/librte_pmd_i40e/i40e_rxtx.c b/lib/librte_pmd_i40e/i40e_rxtx.c index 2b53677..bc7af2b 100644 --- a/lib/librte_pmd_i40e/i40e_rxtx.c +++ b/lib/librte_pmd_i40e/i40e_rxtx.c @@ -50,6 +50,8 @@ #include #include #include +#include +#include #include "i40e_logs.h" #include "i40e/i40e_prototype.h" @@ -440,6 +442,11 @@ i40e_txd_enable_checksum(uint32_t ol_flags, *td_offset |= (l3_len >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; } + if (ol_flags & PKT_TX_TCP_TSO) { + *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; + /* td offset will be set next */ + return; + } /* Enable L4 checksum offloads */ switch (ol_flags & PKT_TX_L4_MASK) { case PKT_TX_TCP_CKSUM: @@ -1073,12 +1080,46 @@ i40e_calc_context_desc(uint64_t flags) #ifdef RTE_LIBRTE_IEEE1588 mask |= PKT_TX_IEEE1588_TMST; #endif + /* need for context descriptor when TSO enabled */ + mask |= PKT_TX_TCP_TSO; if (flags & mask) return 1; return 0; } +/* set i40e TSO context descriptor */ +static inline uint64_t +i40e_set_tso_ctx(struct rte_mbuf *mbuf, uint8_t l2_len, uint8_t l3_len, uint32_t *td_offset) +{ + uint64_t ctx_desc; + struct ipv4_hdr *ip; + struct tcp_hdr *th; + uint32_t tcp_hlen; + uint32_t hdrlen; + uint32_t paylen; + + /* set mss */ + ip = (struct ipv4_hdr *) (rte_pktmbuf_mtod(mbuf, unsigned char *) + l2_len); + ip->hdr_checksum = 0; + ip->total_length = 0; + th = (struct tcp_hdr *)((caddr_t)ip + l3_len); + th->cksum = rte_in_pseudo(ip->src_addr, ip->dst_addr, I40E_HTONS(IPPROTO_TCP)); + tcp_hlen = (th->data_off >> 4) << 2; + *td_offset |= (tcp_hlen >> 2) << + I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + hdrlen = l2_len + l3_len + tcp_hlen; + paylen = mbuf->pkt_len - hdrlen; + + ctx_desc = ((uint64_t)mbuf->tso_segsz << + I40E_TXD_CTX_QW1_MSS_SHIFT) | + ((uint64_t)paylen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | + ((uint64_t)I40E_TX_CTX_DESC_TSO << + I40E_TXD_CTX_QW1_CMD_SHIFT); + + return ctx_desc; +} + uint16_t i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { @@ -1192,12 +1233,19 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) rte_pktmbuf_free_seg(txe->mbuf); txe->mbuf = NULL; } + /* TSO enabled means no timestamp */ + if (ol_flags & PKT_TX_TCP_TSO) { + cd_type_cmd_tso_mss |= + i40e_set_tso_ctx(tx_pkt, l2_len, l3_len, &td_offset); + } + else { #ifdef RTE_LIBRTE_IEEE1588 - if (ol_flags & PKT_TX_IEEE1588_TMST) - cd_type_cmd_tso_mss |= - ((uint64_t)I40E_TX_CTX_DESC_TSYN << - I40E_TXD_CTX_QW1_CMD_SHIFT); + if (ol_flags & PKT_TX_IEEE1588_TMST) + cd_type_cmd_tso_mss |= + ((uint64_t)I40E_TX_CTX_DESC_TSYN << + I40E_TXD_CTX_QW1_CMD_SHIFT); #endif + } ctx_txd->tunneling_params = rte_cpu_to_le_32(cd_tunneling_params); ctx_txd->l2tag2 = rte_cpu_to_le_16(cd_l2tag2);
[dpdk-dev] [PATCH 2/3] pmd: add new header containing TCP offload specific definitions
From: Miroslaw Walukiewicz The function for computing initial TCP header checksum. The file is common for both i40e and ixgbe PMD drivers Signed-off-by: Mirek Walukiewicz --- lib/librte_net/Makefile |3 + lib/librte_net/rte_tcp_off.h | 122 ++ 2 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 lib/librte_net/rte_tcp_off.h diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile index ad2e482..83e76d1 100644 --- a/lib/librte_net/Makefile +++ b/lib/librte_net/Makefile @@ -34,7 +34,8 @@ include $(RTE_SDK)/mk/rte.vars.mk CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 # install includes -SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_sctp.h rte_icmp.h rte_arp.h +SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_sctp.h rte_icmp.h rte_arp.h \ +rte_tcp_off.h include $(RTE_SDK)/mk/rte.install.mk diff --git a/lib/librte_net/rte_tcp_off.h b/lib/librte_net/rte_tcp_off.h new file mode 100644 index 000..6143396 --- /dev/null +++ b/lib/librte_net/rte_tcp_off.h @@ -0,0 +1,122 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *notice, this list of conditions and the following disclaimer in the + *documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + *must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + *may be used to endorse or promote products derived from this software + *without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in.h8.3 (Berkeley) 1/3/94 + * $FreeBSD: src/sys/netinet/in.h,v 1.82 2003/10/25 09:37:10 ume Exp $ + */ + +#ifndef _RTE_TCP_OFF_H_ +#define _RTE_TCP_OFF_H_ + +/** + * @file + * + * TCP offload -related defines + */ + +#include + +#ifdef __
[dpdk-dev] [PATCH] pmd: Add generic support for TCP TSO (Transmit Segmentation Offload)
From: Miroslaw Walukiewicz Add new PKT_TX_TCP_SEG flag Add new fields in the tx offload fields indicating MSS and L4 len Signed-off-by: Mirek Walukiewicz --- lib/librte_mbuf/rte_mbuf.h | 23 ++- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h index ddadc21..bcb09b9 100644 --- a/lib/librte_mbuf/rte_mbuf.h +++ b/lib/librte_mbuf/rte_mbuf.h @@ -114,6 +114,9 @@ extern "C" { /* Bit 51 - IEEE1588*/ #define PKT_TX_IEEE1588_TMST (1ULL << 51) /**< TX IEEE1588 packet to timestamp. */ +/* Bit 49 - TCP transmit segmenation offload */ +#define PKT_TX_TCP_SEG (1ULL << 49) /**< TX TSO offload */ + /* Use final bit of flags to indicate a control mbuf */ #define CTRL_MBUF_FLAG (1ULL << 63) /**< Mbuf contains control data */ @@ -189,12 +192,22 @@ struct rte_mbuf { struct rte_mbuf *next;/**< Next segment of scattered packet. */ /* fields to support TX offloads */ - union { - uint16_t l2_l3_len; /**< combined l2/l3 lengths as single var */ - struct { - uint16_t l3_len:9; /**< L3 (IP) Header Length. */ - uint16_t l2_len:7; /**< L2 (MAC) Header Length. */ + /* two bytes - l2/l3 len for compatibility (endian issues) +* two bytes - reseved for alignment +* two bytes - l4 len (TCP/UDP) header len +* two bytes - TCP tso segment size +*/ + struct { + union { + uint16_t l2_l3_len; /**< combined l2/l3 len */ + struct { + uint16_t l3_len:9; /**< L3 (IP) Header */ + uint16_t l2_len:7; /**< L2 (MAC) Header */ + }; }; + uint16_t reserved_tx_offload; + uint16_t l4_len;/**< TCP/UDP header len */ + uint16_t tso_segsz; /**< TCP TSO segment size */ }; } __rte_cache_aligned;
[dpdk-dev] [PATCH v2] pmd: Add generic support for TCP TSO (Transmit Segmentation Offload)
From: Miroslaw Walukiewicz The NICs supported by DPDK have a possibility to accelerate TCP traffic by sergnention offload. The application preprares a packet with valid TCP header with size up to 64K and NIC makes packet segmenation generating valid checksums and TCP segments. The patch defines a generic support for TSO offload. - Add new PKT_TX_TCP_SEG flag. Only packets with this flag set in ol_flags will be handled as TSO packets. - Add new fields in indicating TCP TSO segment size and TCP header len. The TSO requires from application setting following fields in mbuf. 1. L2 header len including MAC/VLANs/SNAP if present 2. L3 header len including IP options 3. L4 header len (new field) including TCP options 4. tso_segsz (new field) the size of TCP segment The apllication has obligation to compute the pseudo header checksum instead of full TCP checksum and put it in the TCP header csum field. Handling complexity of creation combined l2_l3_len field a new macro RTE_MBUF_TO_L2_L3_LEN() is defined to retrieve this part of rte_mbuf. Signed-off-by: Mirek Walukiewicz --- app/test-pmd/testpmd.c|3 ++- lib/librte_mbuf/rte_mbuf.h| 27 +-- lib/librte_pmd_e1000/igb_rxtx.c |2 +- lib/librte_pmd_ixgbe/ixgbe_rxtx.c |2 +- 4 files changed, 25 insertions(+), 9 deletions(-) diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index f76406f..d8fd025 100644 --- a/app/test-pmd/testpmd.c +++ b/app/test-pmd/testpmd.c @@ -408,7 +408,8 @@ testpmd_mbuf_ctor(struct rte_mempool *mp, mb->ol_flags = 0; mb->data_off = RTE_PKTMBUF_HEADROOM; mb->nb_segs = 1; - mb->l2_l3_len = 0; + mb->l2_len = 0; + mb->l3_len = 0; mb->vlan_tci = 0; mb->hash.rss = 0; } diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h index ddadc21..2e2e315 100644 --- a/lib/librte_mbuf/rte_mbuf.h +++ b/lib/librte_mbuf/rte_mbuf.h @@ -114,6 +114,9 @@ extern "C" { /* Bit 51 - IEEE1588*/ #define PKT_TX_IEEE1588_TMST (1ULL << 51) /**< TX IEEE1588 packet to timestamp. */ +/* Bit 49 - TCP transmit segmenation offload */ +#define PKT_TX_TCP_SEG (1ULL << 49) /**< TX TSO offload */ + /* Use final bit of flags to indicate a control mbuf */ #define CTRL_MBUF_FLAG (1ULL << 63) /**< Mbuf contains control data */ @@ -189,16 +192,28 @@ struct rte_mbuf { struct rte_mbuf *next;/**< Next segment of scattered packet. */ /* fields to support TX offloads */ - union { - uint16_t l2_l3_len; /**< combined l2/l3 lengths as single var */ + /* two bytes - l2 len (including MAC/VLANs/SNAP if present) +* two bytes - l3 len (including IP options) +* two bytes - l4 len TCP/UDP header len - including TCP options +* two bytes - TCP tso segment size +*/ + union{ + uint64_t l2_l3_l4_tso_seg; /**< combined for easy fetch */ struct { - uint16_t l3_len:9; /**< L3 (IP) Header Length. */ - uint16_t l2_len:7; /**< L2 (MAC) Header Length. */ + uint16_t l3_len; /**< L3 (IP) Header */ + uint16_t l2_len; /**< L2 (MAC) Header */ + uint16_t l4_len; /**< TCP/UDP header len */ + uint16_t tso_segsz; /**< TCP TSO segment size */ }; }; } __rte_cache_aligned; /** + * Given the rte_mbuf returns the l2_l3_len combined + */ +#define RTE_MBUF_TO_L2_L3_LEN(mb) (uint32_t)(((mb)->l2_len << 16) | (mb)->l3_len) + +/** * Given the buf_addr returns the pointer to corresponding mbuf. */ #define RTE_MBUF_FROM_BADDR(ba) (((struct rte_mbuf *)(ba)) - 1) @@ -545,7 +560,7 @@ static inline void rte_pktmbuf_reset(struct rte_mbuf *m) { m->next = NULL; m->pkt_len = 0; - m->l2_l3_len = 0; + m->l2_l3_l4_tso_seg = 0; m->vlan_tci = 0; m->nb_segs = 1; m->port = 0xff; @@ -613,7 +628,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *md) mi->data_len = md->data_len; mi->port = md->port; mi->vlan_tci = md->vlan_tci; - mi->l2_l3_len = md->l2_l3_len; + mi->l2_l3_l4_tso_seg = md->l2_l3_l4_tso_seg; mi->hash = md->hash; mi->next = NULL; diff --git a/lib/librte_pmd_e1000/igb_rxtx.c b/lib/librte_pmd_e1000/igb_rxtx.c index f09c525..0f3248e 100644 --- a/lib/librte_pmd_e1000/igb_rxtx.c +++ b/lib/librte_pmd_e1000/igb_rxtx.c @@ -399,7 +399,7 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, ol_flags = tx_pkt->ol_flags; vlan_macip_lens.f.vlan_tci = tx_pkt->vlan_tci; - vlan_macip_lens.f.l2_l3_len = tx_pkt->l2_l3_len; + vlan_macip_lens.f.l2_l3_len = RTE_MBUF_TO_L2_L3_LEN(tx_pkt); tx_ol_req = ol_flags & PKT_TX_OFFLOAD_MASK;