Hi ALL,

Could you please review and provide suggestions if any.

Thanks,
Mingjin

> -----Original Message-----
> From: Ye, MingjinX <mingjinx...@intel.com>
> Sent: 2022年11月8日 21:28
> To: dev@dpdk.org
> Cc: Yang, Qiming <qiming.y...@intel.com>; sta...@dpdk.org; Zhou, YidingX
> <yidingx.z...@intel.com>; Ye, MingjinX <mingjinx...@intel.com>;
> Richardson, Bruce <bruce.richard...@intel.com>; Konstantin Ananyev
> <konstantin.v.anan...@yandex.ru>; Zhang, Qi Z <qi.z.zh...@intel.com>; Lu,
> Wenzhuo <wenzhuo...@intel.com>; Junyu Jiang <junyux.ji...@intel.com>;
> Rong, Leyi <leyi.r...@intel.com>; Ajit Khaparde
> <ajit.khapa...@broadcom.com>; Jerin Jacob <jer...@marvell.com>; Xu,
> Rosen <rosen...@intel.com>; Hemant Agrawal
> <hemant.agra...@nxp.com>; Wisam Jaddo <wis...@nvidia.com>
> Subject: [PATCH v5 1/2] net/ice: fix vlan offload
> 
> The vlan tag and flag in Rx descriptor are not processed on vector path, then
> the upper application can't fetch the tci from mbuf.
> 
> This patch is to add handling of vlan RX offloading.
> 
> Fixes: c68a52b8b38c ("net/ice: support vector SSE in Rx")
> Fixes: ece1f8a8f1c8 ("net/ice: switch to flexible descriptor in SSE path")
> Fixes: 12443386a0b0 ("net/ice: support flex Rx descriptor RxDID22")
> Fixes: 214f452f7d5f ("net/ice: add AVX2 offload Rx")
> Fixes: 7f85d5ebcfe1 ("net/ice: add AVX512 vector path")
> Fixes: 295968d17407 ("ethdev: add namespace")
> Fixes: 808a17b3c1e6 ("net/ice: add Rx AVX512 offload path")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Mingjin Ye <mingjinx...@intel.com>
> 
> v3:
>       * Fix macros in ice_rxtx_vec_sse.c source file.
> v4:
>       * Fix ice_rx_desc_to_olflags_v define in ice_rxtx_vec_sse.c source
> file.
> ---
>  drivers/net/ice/ice_rxtx_vec_avx2.c   | 135 +++++++++++++++++-----
>  drivers/net/ice/ice_rxtx_vec_avx512.c | 154 +++++++++++++++++++++-----
>  drivers/net/ice/ice_rxtx_vec_sse.c    | 132 ++++++++++++++++------
>  3 files changed, 332 insertions(+), 89 deletions(-)
> 
> diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c
> b/drivers/net/ice/ice_rxtx_vec_avx2.c
> index 31d6af42fd..bddfd6cf65 100644
> --- a/drivers/net/ice/ice_rxtx_vec_avx2.c
> +++ b/drivers/net/ice/ice_rxtx_vec_avx2.c
> @@ -474,7 +474,7 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue
> *rxq, struct rte_mbuf **rx_pkts,
>                        * will cause performance drop to get into this
> context.
>                        */
>                       if (rxq->vsi->adapter->pf.dev_data-
> >dev_conf.rxmode.offloads &
> -                                     RTE_ETH_RX_OFFLOAD_RSS_HASH) {
> +                                     (RTE_ETH_RX_OFFLOAD_RSS_HASH |
> RTE_ETH_RX_OFFLOAD_VLAN)) {
>                               /* load bottom half of every 32B desc */
>                               const __m128i raw_desc_bh7 =
>                                       _mm_load_si128
> @@ -529,33 +529,112 @@ _ice_recv_raw_pkts_vec_avx2(struct
> ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>                                * to shift the 32b RSS hash value to the
>                                * highest 32b of each 128b before mask
>                                */
> -                             __m256i rss_hash6_7 =
> -                                     _mm256_slli_epi64(raw_desc_bh6_7,
> 32);
> -                             __m256i rss_hash4_5 =
> -                                     _mm256_slli_epi64(raw_desc_bh4_5,
> 32);
> -                             __m256i rss_hash2_3 =
> -                                     _mm256_slli_epi64(raw_desc_bh2_3,
> 32);
> -                             __m256i rss_hash0_1 =
> -                                     _mm256_slli_epi64(raw_desc_bh0_1,
> 32);
> -
> -                             __m256i rss_hash_msk =
> -                                     _mm256_set_epi32(0xFFFFFFFF, 0, 0,
> 0,
> -                                                      0xFFFFFFFF, 0, 0, 0);
> -
> -                             rss_hash6_7 = _mm256_and_si256
> -                                             (rss_hash6_7, rss_hash_msk);
> -                             rss_hash4_5 = _mm256_and_si256
> -                                             (rss_hash4_5, rss_hash_msk);
> -                             rss_hash2_3 = _mm256_and_si256
> -                                             (rss_hash2_3, rss_hash_msk);
> -                             rss_hash0_1 = _mm256_and_si256
> -                                             (rss_hash0_1, rss_hash_msk);
> -
> -                             mb6_7 = _mm256_or_si256(mb6_7,
> rss_hash6_7);
> -                             mb4_5 = _mm256_or_si256(mb4_5,
> rss_hash4_5);
> -                             mb2_3 = _mm256_or_si256(mb2_3,
> rss_hash2_3);
> -                             mb0_1 = _mm256_or_si256(mb0_1,
> rss_hash0_1);
> -                     } /* if() on RSS hash parsing */
> +                             if (rxq->vsi->adapter->pf.dev_data-
> >dev_conf.rxmode.offloads &
> +
>       RTE_ETH_RX_OFFLOAD_RSS_HASH) {
> +                                     __m256i rss_hash6_7 =
> +
>       _mm256_slli_epi64(raw_desc_bh6_7, 32);
> +                                     __m256i rss_hash4_5 =
> +
>       _mm256_slli_epi64(raw_desc_bh4_5, 32);
> +                                     __m256i rss_hash2_3 =
> +
>       _mm256_slli_epi64(raw_desc_bh2_3, 32);
> +                                     __m256i rss_hash0_1 =
> +
>       _mm256_slli_epi64(raw_desc_bh0_1, 32);
> +
> +                                     __m256i rss_hash_msk =
> +
>       _mm256_set_epi32(0xFFFFFFFF, 0, 0, 0,
> +                                                             0xFFFFFFFF, 0,
> 0, 0);
> +
> +                                     rss_hash6_7 = _mm256_and_si256
> +                                                     (rss_hash6_7,
> rss_hash_msk);
> +                                     rss_hash4_5 = _mm256_and_si256
> +                                                     (rss_hash4_5,
> rss_hash_msk);
> +                                     rss_hash2_3 = _mm256_and_si256
> +                                                     (rss_hash2_3,
> rss_hash_msk);
> +                                     rss_hash0_1 = _mm256_and_si256
> +                                                     (rss_hash0_1,
> rss_hash_msk);
> +
> +                                     mb6_7 = _mm256_or_si256(mb6_7,
> rss_hash6_7);
> +                                     mb4_5 = _mm256_or_si256(mb4_5,
> rss_hash4_5);
> +                                     mb2_3 = _mm256_or_si256(mb2_3,
> rss_hash2_3);
> +                                     mb0_1 = _mm256_or_si256(mb0_1,
> rss_hash0_1);
> +                             } /* if() on RSS hash parsing */
> +
> +                             if (rxq->vsi->adapter->pf.dev_data-
> >dev_conf.rxmode.offloads &
> +
>       RTE_ETH_RX_OFFLOAD_VLAN) {
> +                                     /* merge the status/error-1 bits into
> one register */
> +                                     const __m256i status1_4_7 =
> +
>       _mm256_unpacklo_epi32(raw_desc_bh6_7,
> +                                                     raw_desc_bh4_5);
> +                                     const __m256i status1_0_3 =
> +
>       _mm256_unpacklo_epi32(raw_desc_bh2_3,
> +                                                     raw_desc_bh0_1);
> +
> +                                     const __m256i status1_0_7 =
> +
>       _mm256_unpacklo_epi64(status1_4_7,
> +                                                     status1_0_3);
> +
> +                                     const __m256i l2tag2p_flag_mask =
> +
>       _mm256_set1_epi32(1 << 11);
> +
> +                                     __m256i l2tag2p_flag_bits =
> +                                                     _mm256_and_si256
> +                                                     (status1_0_7,
> l2tag2p_flag_mask);
> +
> +                                     l2tag2p_flag_bits =
> +
>       _mm256_srli_epi32(l2tag2p_flag_bits,
> +                                                                     11);
> +
> +                                     __m256i vlan_flags =
> _mm256_setzero_si256();
> +                                     const __m256i l2tag2_flags_shuf =
> +                                                     _mm256_set_epi8(0,
> 0, 0, 0,
> +                                                                     0, 0, 0,
> 0,
> +                                                                     0, 0, 0,
> 0,
> +                                                                     0, 0, 0,
> 0,
> +                                                                     /*
> end up 128-bits */
> +                                                                     0, 0, 0,
> 0,
> +                                                                     0, 0, 0,
> 0,
> +                                                                     0, 0, 0,
> 0,
> +                                                                     0, 0,
> +
>       RTE_MBUF_F_RX_VLAN |
> +
>       RTE_MBUF_F_RX_VLAN_STRIPPED,
> +                                                                     0);
> +                                     vlan_flags =
> +
>       _mm256_shuffle_epi8(l2tag2_flags_shuf,
> +                                                     l2tag2p_flag_bits);
> +
> +                                     /* merge with vlan_flags */
> +                                     mbuf_flags = _mm256_or_si256
> +                                                     (mbuf_flags,
> vlan_flags);
> +
> +                                     /* L2TAG2_2 */
> +                                     __m256i vlan_tci6_7 =
> +
>       _mm256_slli_si256(raw_desc_bh6_7, 4);
> +                                     __m256i vlan_tci4_5 =
> +
>       _mm256_slli_si256(raw_desc_bh4_5, 4);
> +                                     __m256i vlan_tci2_3 =
> +
>       _mm256_slli_si256(raw_desc_bh2_3, 4);
> +                                     __m256i vlan_tci0_1 =
> +
>       _mm256_slli_si256(raw_desc_bh0_1, 4);
> +
> +                                     const __m256i vlan_tci_msk =
> +                                                     _mm256_set_epi32(0,
> 0xFFFF0000, 0, 0,
> +                                                     0, 0xFFFF0000, 0, 0);
> +
> +                                     vlan_tci6_7 = _mm256_and_si256
> +
>       (vlan_tci6_7, vlan_tci_msk);
> +                                     vlan_tci4_5 = _mm256_and_si256
> +
>       (vlan_tci4_5, vlan_tci_msk);
> +                                     vlan_tci2_3 = _mm256_and_si256
> +
>       (vlan_tci2_3, vlan_tci_msk);
> +                                     vlan_tci0_1 = _mm256_and_si256
> +
>       (vlan_tci0_1, vlan_tci_msk);
> +
> +                                     mb6_7 = _mm256_or_si256(mb6_7,
> vlan_tci6_7);
> +                                     mb4_5 = _mm256_or_si256(mb4_5,
> vlan_tci4_5);
> +                                     mb2_3 = _mm256_or_si256(mb2_3,
> vlan_tci2_3);
> +                                     mb0_1 = _mm256_or_si256(mb0_1,
> vlan_tci0_1);
> +                             }
> +                     }
>  #endif
>               }
> 
> diff --git a/drivers/net/ice/ice_rxtx_vec_avx512.c
> b/drivers/net/ice/ice_rxtx_vec_avx512.c
> index 5bfd5152df..5d5e4bf3cd 100644
> --- a/drivers/net/ice/ice_rxtx_vec_avx512.c
> +++ b/drivers/net/ice/ice_rxtx_vec_avx512.c
> @@ -585,7 +585,7 @@ _ice_recv_raw_pkts_vec_avx512(struct
> ice_rx_queue *rxq,
>                        * will cause performance drop to get into this
> context.
>                        */
>                       if (rxq->vsi->adapter->pf.dev_data-
> >dev_conf.rxmode.offloads &
> -                                     RTE_ETH_RX_OFFLOAD_RSS_HASH) {
> +                                     (RTE_ETH_RX_OFFLOAD_RSS_HASH |
> RTE_ETH_RX_OFFLOAD_VLAN)) {
>                               /* load bottom half of every 32B desc */
>                               const __m128i raw_desc_bh7 =
>                                       _mm_load_si128
> @@ -640,33 +640,131 @@ _ice_recv_raw_pkts_vec_avx512(struct
> ice_rx_queue *rxq,
>                                * to shift the 32b RSS hash value to the
>                                * highest 32b of each 128b before mask
>                                */
> -                             __m256i rss_hash6_7 =
> -                                     _mm256_slli_epi64(raw_desc_bh6_7,
> 32);
> -                             __m256i rss_hash4_5 =
> -                                     _mm256_slli_epi64(raw_desc_bh4_5,
> 32);
> -                             __m256i rss_hash2_3 =
> -                                     _mm256_slli_epi64(raw_desc_bh2_3,
> 32);
> -                             __m256i rss_hash0_1 =
> -                                     _mm256_slli_epi64(raw_desc_bh0_1,
> 32);
> -
> -                             __m256i rss_hash_msk =
> -                                     _mm256_set_epi32(0xFFFFFFFF, 0, 0,
> 0,
> -                                                      0xFFFFFFFF, 0, 0, 0);
> -
> -                             rss_hash6_7 = _mm256_and_si256
> -                                             (rss_hash6_7, rss_hash_msk);
> -                             rss_hash4_5 = _mm256_and_si256
> -                                             (rss_hash4_5, rss_hash_msk);
> -                             rss_hash2_3 = _mm256_and_si256
> -                                             (rss_hash2_3, rss_hash_msk);
> -                             rss_hash0_1 = _mm256_and_si256
> -                                             (rss_hash0_1, rss_hash_msk);
> -
> -                             mb6_7 = _mm256_or_si256(mb6_7,
> rss_hash6_7);
> -                             mb4_5 = _mm256_or_si256(mb4_5,
> rss_hash4_5);
> -                             mb2_3 = _mm256_or_si256(mb2_3,
> rss_hash2_3);
> -                             mb0_1 = _mm256_or_si256(mb0_1,
> rss_hash0_1);
> -                     } /* if() on RSS hash parsing */
> +                             if (rxq->vsi->adapter->pf.dev_data-
> >dev_conf.rxmode.offloads &
> +
>       RTE_ETH_RX_OFFLOAD_RSS_HASH) {
> +                                     __m256i rss_hash6_7 =
> +
>       _mm256_slli_epi64(raw_desc_bh6_7, 32);
> +                                     __m256i rss_hash4_5 =
> +
>       _mm256_slli_epi64(raw_desc_bh4_5, 32);
> +                                     __m256i rss_hash2_3 =
> +
>       _mm256_slli_epi64(raw_desc_bh2_3, 32);
> +                                     __m256i rss_hash0_1 =
> +
>       _mm256_slli_epi64(raw_desc_bh0_1, 32);
> +
> +                                     __m256i rss_hash_msk =
> +
>       _mm256_set_epi32(0xFFFFFFFF, 0, 0, 0,
> +                                                             0xFFFFFFFF, 0,
> 0, 0);
> +
> +                                     rss_hash6_7 = _mm256_and_si256
> +                                                     (rss_hash6_7,
> rss_hash_msk);
> +                                     rss_hash4_5 = _mm256_and_si256
> +                                                     (rss_hash4_5,
> rss_hash_msk);
> +                                     rss_hash2_3 = _mm256_and_si256
> +                                                     (rss_hash2_3,
> rss_hash_msk);
> +                                     rss_hash0_1 = _mm256_and_si256
> +                                                     (rss_hash0_1,
> rss_hash_msk);
> +
> +                                     mb6_7 = _mm256_or_si256(mb6_7,
> rss_hash6_7);
> +                                     mb4_5 = _mm256_or_si256(mb4_5,
> rss_hash4_5);
> +                                     mb2_3 = _mm256_or_si256(mb2_3,
> rss_hash2_3);
> +                                     mb0_1 = _mm256_or_si256(mb0_1,
> rss_hash0_1);
> +                             } /* if() on RSS hash parsing */
> +
> +                             if (rxq->vsi->adapter->pf.dev_data-
> >dev_conf.rxmode.offloads &
> +
>       RTE_ETH_RX_OFFLOAD_VLAN) {
> +                                     /* merge the status/error-1 bits into
> one register */
> +                                     const __m256i status1_4_7 =
> +                                             _mm256_unpacklo_epi32
> +                                             (raw_desc_bh6_7,
> +                                             raw_desc_bh4_5);
> +                                     const __m256i status1_0_3 =
> +                                             _mm256_unpacklo_epi32
> +                                             (raw_desc_bh2_3,
> +                                             raw_desc_bh0_1);
> +
> +                                     const __m256i status1_0_7 =
> +                                             _mm256_unpacklo_epi64
> +                                             (status1_4_7, status1_0_3);
> +
> +                                     const __m256i l2tag2p_flag_mask =
> +                                             _mm256_set1_epi32
> +                                             (1 << 11);
> +
> +                                     __m256i l2tag2p_flag_bits =
> +                                             _mm256_and_si256
> +                                             (status1_0_7,
> +                                             l2tag2p_flag_mask);
> +
> +                                     l2tag2p_flag_bits =
> +                                             _mm256_srli_epi32
> +                                             (l2tag2p_flag_bits,
> +                                             11);
> +                                     const __m256i l2tag2_flags_shuf =
> +                                             _mm256_set_epi8
> +                                                     (0, 0, 0, 0,
> +                                                     0, 0, 0, 0,
> +                                                     0, 0, 0, 0,
> +                                                     0, 0, 0, 0,
> +                                                     /* end up 128-bits */
> +                                                     0, 0, 0, 0,
> +                                                     0, 0, 0, 0,
> +                                                     0, 0, 0, 0,
> +                                                     0, 0,
> +
>       RTE_MBUF_F_RX_VLAN |
> +
>       RTE_MBUF_F_RX_VLAN_STRIPPED,
> +                                                     0);
> +                                     __m256i vlan_flags =
> +                                             _mm256_shuffle_epi8
> +                                                     (l2tag2_flags_shuf,
> +                                                     l2tag2p_flag_bits);
> +
> +                                     /* merge with vlan_flags */
> +                                     mbuf_flags = _mm256_or_si256
> +                                                     (mbuf_flags,
> +                                                     vlan_flags);
> +
> +                                     /* L2TAG2_2 */
> +                                     __m256i vlan_tci6_7 =
> +                                             _mm256_slli_si256
> +                                                     (raw_desc_bh6_7, 4);
> +                                     __m256i vlan_tci4_5 =
> +                                             _mm256_slli_si256
> +                                                     (raw_desc_bh4_5, 4);
> +                                     __m256i vlan_tci2_3 =
> +                                             _mm256_slli_si256
> +                                                     (raw_desc_bh2_3, 4);
> +                                     __m256i vlan_tci0_1 =
> +                                             _mm256_slli_si256
> +                                                     (raw_desc_bh0_1, 4);
> +
> +                                     const __m256i vlan_tci_msk =
> +                                             _mm256_set_epi32
> +                                             (0, 0xFFFF0000, 0, 0,
> +                                             0, 0xFFFF0000, 0, 0);
> +
> +                                     vlan_tci6_7 = _mm256_and_si256
> +                                                     (vlan_tci6_7,
> +                                                     vlan_tci_msk);
> +                                     vlan_tci4_5 = _mm256_and_si256
> +                                                     (vlan_tci4_5,
> +                                                     vlan_tci_msk);
> +                                     vlan_tci2_3 = _mm256_and_si256
> +                                                     (vlan_tci2_3,
> +                                                     vlan_tci_msk);
> +                                     vlan_tci0_1 = _mm256_and_si256
> +                                                     (vlan_tci0_1,
> +                                                     vlan_tci_msk);
> +
> +                                     mb6_7 = _mm256_or_si256
> +                                                     (mb6_7, vlan_tci6_7);
> +                                     mb4_5 = _mm256_or_si256
> +                                                     (mb4_5, vlan_tci4_5);
> +                                     mb2_3 = _mm256_or_si256
> +                                                     (mb2_3, vlan_tci2_3);
> +                                     mb0_1 = _mm256_or_si256
> +                                                     (mb0_1, vlan_tci0_1);
> +                             }
> +                     }
>  #endif
>               }
> 
> diff --git a/drivers/net/ice/ice_rxtx_vec_sse.c
> b/drivers/net/ice/ice_rxtx_vec_sse.c
> index fd94cedde3..cc5b8510dc 100644
> --- a/drivers/net/ice/ice_rxtx_vec_sse.c
> +++ b/drivers/net/ice/ice_rxtx_vec_sse.c
> @@ -100,9 +100,15 @@ ice_rxq_rearm(struct ice_rx_queue *rxq)
>       ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);  }
> 
> +#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
> +static inline void
> +ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4],
> __m128i descs_bh[4],
> +                      struct rte_mbuf **rx_pkts)
> +#else
>  static inline void
>  ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4],
>                        struct rte_mbuf **rx_pkts)
> +#endif
>  {
>       const __m128i mbuf_init = _mm_set_epi64x(0, rxq-
> >mbuf_initializer);
>       __m128i rearm0, rearm1, rearm2, rearm3; @@ -214,6 +220,38 @@
> ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4],
>       /* merge the flags */
>       flags = _mm_or_si128(flags, rss_vlan);
> 
> + #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
> +     if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
> +
>       RTE_ETH_RX_OFFLOAD_VLAN) {
> +             const __m128i l2tag2_mask =
> +                     _mm_set1_epi32(1 << 11);
> +             const __m128i vlan_tci0_1 =
> +                     _mm_unpacklo_epi32(descs_bh[0], descs_bh[1]);
> +             const __m128i vlan_tci2_3 =
> +                     _mm_unpacklo_epi32(descs_bh[2], descs_bh[3]);
> +             const __m128i vlan_tci0_3 =
> +                     _mm_unpacklo_epi64(vlan_tci0_1, vlan_tci2_3);
> +
> +             __m128i vlan_bits = _mm_and_si128(vlan_tci0_3,
> l2tag2_mask);
> +
> +             vlan_bits = _mm_srli_epi32(vlan_bits, 11);
> +
> +             const __m128i vlan_flags_shuf =
> +                     _mm_set_epi8(0, 0, 0, 0,
> +                                     0, 0, 0, 0,
> +                                     0, 0, 0, 0,
> +                                     0, 0,
> +                                     RTE_MBUF_F_RX_VLAN |
> +                                     RTE_MBUF_F_RX_VLAN_STRIPPED,
> +                                     0);
> +
> +             const __m128i vlan_flags =
> _mm_shuffle_epi8(vlan_flags_shuf,
> +vlan_bits);
> +
> +             /* merge with vlan_flags */
> +             flags = _mm_or_si128(flags, vlan_flags);
> +     }
> +#endif
> +
>       if (rxq->fdir_enabled) {
>               const __m128i fdir_id0_1 =
>                       _mm_unpackhi_epi32(descs[0], descs[1]); @@ -
> 405,6 +443,9 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct
> rte_mbuf **rx_pkts,
>            pos += ICE_DESCS_PER_LOOP,
>            rxdp += ICE_DESCS_PER_LOOP) {
>               __m128i descs[ICE_DESCS_PER_LOOP];
> +             #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
> +             __m128i descs_bh[ICE_DESCS_PER_LOOP];
> +             #endif
>               __m128i pkt_mb0, pkt_mb1, pkt_mb2, pkt_mb3;
>               __m128i staterr, sterr_tmp1, sterr_tmp2;
>               /* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */ @@ -
> 463,8 +504,6 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct
> rte_mbuf **rx_pkts,
>               /* C.1 4=>2 filter staterr info only */
>               sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]);
> 
> -             ice_rx_desc_to_olflags_v(rxq, descs, &rx_pkts[pos]);
> -
>               /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
>               pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust);
>               pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust); @@ -
> 479,21 +518,21 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq,
> struct rte_mbuf **rx_pkts,
>                * will cause performance drop to get into this context.
>                */
>               if (rxq->vsi->adapter->pf.dev_data-
> >dev_conf.rxmode.offloads &
> -                             RTE_ETH_RX_OFFLOAD_RSS_HASH) {
> +                                     (RTE_ETH_RX_OFFLOAD_RSS_HASH |
> RTE_ETH_RX_OFFLOAD_VLAN)) {
>                       /* load bottom half of every 32B desc */
> -                     const __m128i raw_desc_bh3 =
> +                     descs_bh[3] =
>                               _mm_load_si128
>                                       ((void
> *)(&rxdp[3].wb.status_error1));
>                       rte_compiler_barrier();
> -                     const __m128i raw_desc_bh2 =
> +                     descs_bh[2] =
>                               _mm_load_si128
>                                       ((void
> *)(&rxdp[2].wb.status_error1));
>                       rte_compiler_barrier();
> -                     const __m128i raw_desc_bh1 =
> +                     descs_bh[1] =
>                               _mm_load_si128
>                                       ((void
> *)(&rxdp[1].wb.status_error1));
>                       rte_compiler_barrier();
> -                     const __m128i raw_desc_bh0 =
> +                     descs_bh[0] =
>                               _mm_load_si128
>                                       ((void
> *)(&rxdp[0].wb.status_error1));
> 
> @@ -501,32 +540,59 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq,
> struct rte_mbuf **rx_pkts,
>                        * to shift the 32b RSS hash value to the
>                        * highest 32b of each 128b before mask
>                        */
> -                     __m128i rss_hash3 =
> -                             _mm_slli_epi64(raw_desc_bh3, 32);
> -                     __m128i rss_hash2 =
> -                             _mm_slli_epi64(raw_desc_bh2, 32);
> -                     __m128i rss_hash1 =
> -                             _mm_slli_epi64(raw_desc_bh1, 32);
> -                     __m128i rss_hash0 =
> -                             _mm_slli_epi64(raw_desc_bh0, 32);
> -
> -                     __m128i rss_hash_msk =
> -                             _mm_set_epi32(0xFFFFFFFF, 0, 0, 0);
> -
> -                     rss_hash3 = _mm_and_si128
> -                                     (rss_hash3, rss_hash_msk);
> -                     rss_hash2 = _mm_and_si128
> -                                     (rss_hash2, rss_hash_msk);
> -                     rss_hash1 = _mm_and_si128
> -                                     (rss_hash1, rss_hash_msk);
> -                     rss_hash0 = _mm_and_si128
> -                                     (rss_hash0, rss_hash_msk);
> -
> -                     pkt_mb3 = _mm_or_si128(pkt_mb3, rss_hash3);
> -                     pkt_mb2 = _mm_or_si128(pkt_mb2, rss_hash2);
> -                     pkt_mb1 = _mm_or_si128(pkt_mb1, rss_hash1);
> -                     pkt_mb0 = _mm_or_si128(pkt_mb0, rss_hash0);
> -             } /* if() on RSS hash parsing */
> +                     if (rxq->vsi->adapter->pf.dev_data-
> >dev_conf.rxmode.offloads &
> +
>       RTE_ETH_RX_OFFLOAD_RSS_HASH) {
> +                             __m128i rss_hash3 =
> +                                     _mm_slli_epi64(descs_bh[3], 32);
> +                             __m128i rss_hash2 =
> +                                     _mm_slli_epi64(descs_bh[2], 32);
> +                             __m128i rss_hash1 =
> +                                     _mm_slli_epi64(descs_bh[1], 32);
> +                             __m128i rss_hash0 =
> +                                     _mm_slli_epi64(descs_bh[0], 32);
> +
> +                             __m128i rss_hash_msk =
> +                                     _mm_set_epi32(0xFFFFFFFF, 0, 0, 0);
> +
> +                             rss_hash3 = _mm_and_si128
> +                                             (rss_hash3, rss_hash_msk);
> +                             rss_hash2 = _mm_and_si128
> +                                             (rss_hash2, rss_hash_msk);
> +                             rss_hash1 = _mm_and_si128
> +                                             (rss_hash1, rss_hash_msk);
> +                             rss_hash0 = _mm_and_si128
> +                                             (rss_hash0, rss_hash_msk);
> +
> +                             pkt_mb3 = _mm_or_si128(pkt_mb3,
> rss_hash3);
> +                             pkt_mb2 = _mm_or_si128(pkt_mb2,
> rss_hash2);
> +                             pkt_mb1 = _mm_or_si128(pkt_mb1,
> rss_hash1);
> +                             pkt_mb0 = _mm_or_si128(pkt_mb0,
> rss_hash0);
> +                     } /* if() on RSS hash parsing */
> +
> +                     if (rxq->vsi->adapter->pf.dev_data-
> >dev_conf.rxmode.offloads &
> +
>       RTE_ETH_RX_OFFLOAD_VLAN) {
> +                                                                     /*
> L2TAG2_2 */
> +                             __m128i vlan_tci3 =
> _mm_slli_si128(descs_bh[3], 4);
> +                             __m128i vlan_tci2 =
> _mm_slli_si128(descs_bh[2], 4);
> +                             __m128i vlan_tci1 =
> _mm_slli_si128(descs_bh[1], 4);
> +                             __m128i vlan_tci0 =
> _mm_slli_si128(descs_bh[0], 4);
> +
> +                             const __m128i vlan_tci_msk =
> _mm_set_epi32(0, 0xFFFF0000, 0, 0);
> +
> +                             vlan_tci3 = _mm_and_si128(vlan_tci3,
> vlan_tci_msk);
> +                             vlan_tci2 = _mm_and_si128(vlan_tci2,
> vlan_tci_msk);
> +                             vlan_tci1 = _mm_and_si128(vlan_tci1,
> vlan_tci_msk);
> +                             vlan_tci0 = _mm_and_si128(vlan_tci0,
> vlan_tci_msk);
> +
> +                             pkt_mb3 = _mm_or_si128(pkt_mb3,
> vlan_tci3);
> +                             pkt_mb2 = _mm_or_si128(pkt_mb2,
> vlan_tci2);
> +                             pkt_mb1 = _mm_or_si128(pkt_mb1,
> vlan_tci1);
> +                             pkt_mb0 = _mm_or_si128(pkt_mb0,
> vlan_tci0);
> +                             }
> +             ice_rx_desc_to_olflags_v(rxq, descs, descs_bh,
> &rx_pkts[pos]);
> +             }
> +#else
> +             ice_rx_desc_to_olflags_v(rxq, descs, &rx_pkts[pos]);
>  #endif
> 
>               /* C.2 get 4 pkts staterr value  */
> --
> 2.34.1

Reply via email to