Hi Wenzhuo, > -----Original Message----- > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Wenzhuo Lu > Sent: Thursday, January 14, 2016 1:39 AM > To: dev at dpdk.org > Subject: [dpdk-dev] [PATCH v2 5/6] ixgbe: support VxLAN & NVGRE TX checksum > off-load > > The patch add VxLAN & NVGRE TX checksum off-load. When the flag of > outer IP header checksum offload is set, we'll set the context > descriptor to enable this checksum off-load. > > Signed-off-by: Wenzhuo Lu <wenzhuo.lu at intel.com> > --- > drivers/net/ixgbe/ixgbe_rxtx.c | 52 > ++++++++++++++++++++++++++++++++++-------- > drivers/net/ixgbe/ixgbe_rxtx.h | 6 ++++- > lib/librte_mbuf/rte_mbuf.h | 2 ++ > 3 files changed, 49 insertions(+), 11 deletions(-) > > diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c > index 512ac3a..fea2495 100644 > --- a/drivers/net/ixgbe/ixgbe_rxtx.c > +++ b/drivers/net/ixgbe/ixgbe_rxtx.c > @@ -85,7 +85,8 @@ > PKT_TX_VLAN_PKT | \ > PKT_TX_IP_CKSUM | \ > PKT_TX_L4_MASK | \ > - PKT_TX_TCP_SEG) > + PKT_TX_TCP_SEG | \ > + PKT_TX_OUTER_IP_CKSUM)
I think you also need to update dev_info.tx_offload_capa, couldn't find where you doing it. > > static inline struct rte_mbuf * > rte_rxmbuf_alloc(struct rte_mempool *mp) > @@ -364,9 +365,11 @@ ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq, > uint32_t ctx_idx; > uint32_t vlan_macip_lens; > union ixgbe_tx_offload tx_offload_mask; > + uint32_t seqnum_seed = 0; > > ctx_idx = txq->ctx_curr; > - tx_offload_mask.data = 0; > + tx_offload_mask.data[0] = 0; > + tx_offload_mask.data[1] = 0; > type_tucmd_mlhl = 0; > > /* Specify which HW CTX to upload. */ > @@ -430,9 +433,20 @@ ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq, > } > } > > + if (ol_flags & PKT_TX_OUTER_IP_CKSUM) { > + tx_offload_mask.outer_l3_len |= ~0; > + tx_offload_mask.outer_l2_len |= ~0; > + seqnum_seed |= tx_offload.outer_l3_len > + << IXGBE_ADVTXD_OUTER_IPLEN; > + seqnum_seed |= tx_offload.outer_l2_len > + << IXGBE_ADVTXD_TUNNEL_LEN; > + } I don't have an X550 card off-hand, but reading through datasheet - it doesn't seem right. When OUTER_IP_CKSUM is enabled MACLEN becomes outer_l2_len and TUNNEL_LEN should be: outer_l4_len + tunnel_hdr_len + inner_l2_len. So I think that in our case TUNNEL_LEN should be set to l2_len. > + > txq->ctx_cache[ctx_idx].flags = ol_flags; > - txq->ctx_cache[ctx_idx].tx_offload.data = > - tx_offload_mask.data & tx_offload.data; > + txq->ctx_cache[ctx_idx].tx_offload.data[0] = > + tx_offload_mask.data[0] & tx_offload.data[0]; > + txq->ctx_cache[ctx_idx].tx_offload.data[1] = > + tx_offload_mask.data[1] & tx_offload.data[1]; > txq->ctx_cache[ctx_idx].tx_offload_mask = tx_offload_mask; > > ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl); > @@ -441,7 +455,7 @@ ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq, > vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << > IXGBE_ADVTXD_VLAN_SHIFT); > ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens); > ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx); > - ctx_txd->seqnum_seed = 0; > + ctx_txd->seqnum_seed = seqnum_seed; > } > > /* > @@ -454,16 +468,24 @@ what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t > flags, > { > /* If match with the current used context */ > if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) && > - (txq->ctx_cache[txq->ctx_curr].tx_offload.data == > - (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & > tx_offload.data)))) { > + (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] == > + (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0] > + & tx_offload.data[0])) && > + (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] == > + (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1] > + & tx_offload.data[1])))) { > return txq->ctx_curr; > } > > /* What if match with the next context */ > txq->ctx_curr ^= 1; > if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) && > - (txq->ctx_cache[txq->ctx_curr].tx_offload.data == > - (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & > tx_offload.data)))) { > + (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] == > + (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0] > + & tx_offload.data[0])) && > + (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] == > + (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1] > + & tx_offload.data[1])))) { > return txq->ctx_curr; > } > > @@ -492,6 +514,12 @@ tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags) > cmdtype |= IXGBE_ADVTXD_DCMD_VLE; > if (ol_flags & PKT_TX_TCP_SEG) > cmdtype |= IXGBE_ADVTXD_DCMD_TSE; > + if (ol_flags & PKT_TX_OUTER_IP_CKSUM) > + cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT); > + if (ol_flags & PKT_TX_VXLAN_PKT) > + cmdtype &= ~(1 << IXGBE_ADVTXD_TUNNEL_TYPE_NVGRE); > + else > + cmdtype |= (1 << IXGBE_ADVTXD_TUNNEL_TYPE_NVGRE); > return cmdtype; > } > > @@ -588,8 +616,10 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf > **tx_pkts, > uint64_t tx_ol_req; > uint32_t ctx = 0; > uint32_t new_ctx; > - union ixgbe_tx_offload tx_offload = {0}; > + union ixgbe_tx_offload tx_offload; > > + tx_offload.data[0] = 0; > + tx_offload.data[1] = 0; > txq = tx_queue; > sw_ring = txq->sw_ring; > txr = txq->tx_ring; > @@ -623,6 +653,8 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, > tx_offload.l4_len = tx_pkt->l4_len; > tx_offload.vlan_tci = tx_pkt->vlan_tci; > tx_offload.tso_segsz = tx_pkt->tso_segsz; > + tx_offload.outer_l2_len = tx_pkt->outer_l2_len; > + tx_offload.outer_l3_len = tx_pkt->outer_l3_len; > > /* If new context need be built or reuse the exist ctx. > */ > ctx = what_advctx_update(txq, tx_ol_req, > diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h > index 475a800..c15f9fa 100644 > --- a/drivers/net/ixgbe/ixgbe_rxtx.h > +++ b/drivers/net/ixgbe/ixgbe_rxtx.h > @@ -163,7 +163,7 @@ enum ixgbe_advctx_num { > > /** Offload features */ > union ixgbe_tx_offload { > - uint64_t data; > + uint64_t data[2]; I wonder what is there any performance impact of increasing size of tx_offload? > struct { > uint64_t l2_len:7; /**< L2 (MAC) Header Length. */ > uint64_t l3_len:9; /**< L3 (IP) Header Length. */ > @@ -171,6 +171,10 @@ union ixgbe_tx_offload { > uint64_t tso_segsz:16; /**< TCP TSO segment size */ > uint64_t vlan_tci:16; > /**< VLAN Tag Control Identifier (CPU order). */ > + > + /* fields for TX offloading of tunnels */ > + uint64_t outer_l3_len:8; /**< Outer L3 (IP) Hdr Length. */ > + uint64_t outer_l2_len:8; /**< Outer L2 (MAC) Hdr Length. */ > }; > }; > > diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h > index 5ad5e59..1bda00e 100644 > --- a/lib/librte_mbuf/rte_mbuf.h > +++ b/lib/librte_mbuf/rte_mbuf.h > @@ -103,6 +103,8 @@ extern "C" { > > /* add new TX flags here */ > > +#define PKT_TX_VXLAN_PKT (1ULL << 48) /**< TX packet is a VxLAN packet. > */ > + >From reading X550 spec, I don't really understand what for we need to specify >is it GRE or VXLAN packet, so probably we don't need that flag for now at all? If we really do, might bw worth to organise it like KT_TX_L4_CKSUM (as enum) and reserve few values for future expansion (2 or 3 bits?). Konstantin