Hi Wenzhuo,

> -----Original Message-----
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Wenzhuo Lu
> Sent: Thursday, January 14, 2016 1:39 AM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH v2 5/6] ixgbe: support VxLAN & NVGRE TX checksum 
> off-load
> 
> The patch add VxLAN & NVGRE TX checksum off-load. When the flag of
> outer IP header checksum offload is set, we'll set the context
> descriptor to enable this checksum off-load.
> 
> Signed-off-by: Wenzhuo Lu <wenzhuo.lu at intel.com>
> ---
>  drivers/net/ixgbe/ixgbe_rxtx.c | 52 
> ++++++++++++++++++++++++++++++++++--------
>  drivers/net/ixgbe/ixgbe_rxtx.h |  6 ++++-
>  lib/librte_mbuf/rte_mbuf.h     |  2 ++
>  3 files changed, 49 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> index 512ac3a..fea2495 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> @@ -85,7 +85,8 @@
>               PKT_TX_VLAN_PKT |                \
>               PKT_TX_IP_CKSUM |                \
>               PKT_TX_L4_MASK |                 \
> -             PKT_TX_TCP_SEG)
> +             PKT_TX_TCP_SEG |                 \
> +             PKT_TX_OUTER_IP_CKSUM)

I think you also need to update dev_info.tx_offload_capa,
couldn't find where you doing it.

> 
>  static inline struct rte_mbuf *
>  rte_rxmbuf_alloc(struct rte_mempool *mp)
> @@ -364,9 +365,11 @@ ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
>       uint32_t ctx_idx;
>       uint32_t vlan_macip_lens;
>       union ixgbe_tx_offload tx_offload_mask;
> +     uint32_t seqnum_seed = 0;
> 
>       ctx_idx = txq->ctx_curr;
> -     tx_offload_mask.data = 0;
> +     tx_offload_mask.data[0] = 0;
> +     tx_offload_mask.data[1] = 0;
>       type_tucmd_mlhl = 0;
> 
>       /* Specify which HW CTX to upload. */
> @@ -430,9 +433,20 @@ ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
>               }
>       }
> 
> +     if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
> +             tx_offload_mask.outer_l3_len |= ~0;
> +             tx_offload_mask.outer_l2_len |= ~0;
> +             seqnum_seed |= tx_offload.outer_l3_len
> +                            << IXGBE_ADVTXD_OUTER_IPLEN;
> +             seqnum_seed |= tx_offload.outer_l2_len
> +                            << IXGBE_ADVTXD_TUNNEL_LEN;
> +     }

I don't have an X550 card off-hand, but reading through datasheet - it doesn't 
seem right.
When OUTER_IP_CKSUM is enabled MACLEN becomes outer_l2_len and 
TUNNEL_LEN should be: outer_l4_len + tunnel_hdr_len + inner_l2_len.
So I think that in our case TUNNEL_LEN should be set to l2_len. 

> +
>       txq->ctx_cache[ctx_idx].flags = ol_flags;
> -     txq->ctx_cache[ctx_idx].tx_offload.data  =
> -             tx_offload_mask.data & tx_offload.data;
> +     txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
> +             tx_offload_mask.data[0] & tx_offload.data[0];
> +     txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
> +             tx_offload_mask.data[1] & tx_offload.data[1];
>       txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
> 
>       ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
> @@ -441,7 +455,7 @@ ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
>       vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << 
> IXGBE_ADVTXD_VLAN_SHIFT);
>       ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
>       ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
> -     ctx_txd->seqnum_seed     = 0;
> +     ctx_txd->seqnum_seed     = seqnum_seed;
>  }
> 
>  /*
> @@ -454,16 +468,24 @@ what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t 
> flags,
>  {
>       /* If match with the current used context */
>       if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
> -             (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
> -             (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & 
> tx_offload.data)))) {
> +             (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
> +             (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
> +              & tx_offload.data[0])) &&
> +             (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
> +             (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
> +              & tx_offload.data[1])))) {
>                       return txq->ctx_curr;
>       }
> 
>       /* What if match with the next context  */
>       txq->ctx_curr ^= 1;
>       if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
> -             (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
> -             (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & 
> tx_offload.data)))) {
> +             (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
> +             (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
> +              & tx_offload.data[0])) &&
> +             (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
> +             (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
> +              & tx_offload.data[1])))) {
>                       return txq->ctx_curr;
>       }
> 
> @@ -492,6 +514,12 @@ tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
>               cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
>       if (ol_flags & PKT_TX_TCP_SEG)
>               cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
> +     if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
> +             cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
> +     if (ol_flags & PKT_TX_VXLAN_PKT)
> +             cmdtype &= ~(1 << IXGBE_ADVTXD_TUNNEL_TYPE_NVGRE);
> +     else
> +             cmdtype |= (1 << IXGBE_ADVTXD_TUNNEL_TYPE_NVGRE);
>       return cmdtype;
>  }
> 
> @@ -588,8 +616,10 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf 
> **tx_pkts,
>       uint64_t tx_ol_req;
>       uint32_t ctx = 0;
>       uint32_t new_ctx;
> -     union ixgbe_tx_offload tx_offload = {0};
> +     union ixgbe_tx_offload tx_offload;
> 
> +     tx_offload.data[0] = 0;
> +     tx_offload.data[1] = 0;
>       txq = tx_queue;
>       sw_ring = txq->sw_ring;
>       txr     = txq->tx_ring;
> @@ -623,6 +653,8 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>                       tx_offload.l4_len = tx_pkt->l4_len;
>                       tx_offload.vlan_tci = tx_pkt->vlan_tci;
>                       tx_offload.tso_segsz = tx_pkt->tso_segsz;
> +                     tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
> +                     tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
> 
>                       /* If new context need be built or reuse the exist ctx. 
> */
>                       ctx = what_advctx_update(txq, tx_ol_req,
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
> index 475a800..c15f9fa 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.h
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.h
> @@ -163,7 +163,7 @@ enum ixgbe_advctx_num {
> 
>  /** Offload features */
>  union ixgbe_tx_offload {
> -     uint64_t data;
> +     uint64_t data[2];

I wonder what is there any performance impact of increasing size of tx_offload?

>       struct {
>               uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
>               uint64_t l3_len:9; /**< L3 (IP) Header Length. */
> @@ -171,6 +171,10 @@ union ixgbe_tx_offload {
>               uint64_t tso_segsz:16; /**< TCP TSO segment size */
>               uint64_t vlan_tci:16;
>               /**< VLAN Tag Control Identifier (CPU order). */
> +
> +             /* fields for TX offloading of tunnels */
> +             uint64_t outer_l3_len:8; /**< Outer L3 (IP) Hdr Length. */
> +             uint64_t outer_l2_len:8; /**< Outer L2 (MAC) Hdr Length. */
>       };
>  };
> 
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index 5ad5e59..1bda00e 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -103,6 +103,8 @@ extern "C" {
> 
>  /* add new TX flags here */
> 
> +#define PKT_TX_VXLAN_PKT      (1ULL << 48) /**< TX packet is a VxLAN packet. 
> */
> +

>From reading X550 spec, I don't really understand what for we need to specify 
>is it
GRE or VXLAN packet, so probably we don't need that flag for now at all?
If we really do, might bw worth to organise it like KT_TX_L4_CKSUM (as enum)
and reserve few values for future expansion (2 or 3 bits?).

Konstantin

Reply via email to