When receiving tunneled packets, the testpmd output log shows 'ol_flags'
value always as 'RTE_MBUF_F_RX_OUTER_L4_CKSUM_UNKNOWN', but expected value
should be 'RX_OUTER_L4_CKSUM_GOOD' or 'RX_OUTER_L4_CKSUM_BAD'.

Adding 'RX_OUTER_L4_CKSUM_GOOD' and 'RX_OUTER_L4_CKSUM_BAD' to 'flags' for
normal path, 'l3_l4_flags_shuf' for AVX2 and AVX512 vector path and
'cksum_flags' for SSE vector path to ensure that the 'ol_flags'
can match correct flags.

Fixes: b8b4c54ef9b0 ("net/iavf: support flexible Rx descriptor in normal path")
Fixes: 1162f5a0ef31 ("net/iavf: support flexible Rx descriptor in SSE path")
Fixes: 5b6e8859081d ("net/iavf: support flexible Rx descriptor in AVX path")
Fixes: 9c9aa0040344 ("net/iavf: add offload path for Rx AVX512 flex descriptor")
Cc: sta...@dpdk.org

Signed-off-by: Zhichao Zeng <zhichaox.z...@intel.com>
---
 drivers/net/iavf/iavf_rxtx.c            |   9 +-
 drivers/net/iavf/iavf_rxtx_vec_avx2.c   | 118 +++++++++++++++------
 drivers/net/iavf/iavf_rxtx_vec_avx512.c | 133 ++++++++++++++++++------
 drivers/net/iavf/iavf_rxtx_vec_sse.c    |  77 ++++++++++----
 4 files changed, 252 insertions(+), 85 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 3deabe1d7e..e1681024a6 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -1277,7 +1277,9 @@ iavf_flex_rxd_error_to_pkt_flags(uint16_t stat_err0)
                return 0;
 
        if (likely(!(stat_err0 & IAVF_RX_FLEX_ERR0_BITS))) {
-               flags |= (RTE_MBUF_F_RX_IP_CKSUM_GOOD | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD);
+               flags |= (RTE_MBUF_F_RX_IP_CKSUM_GOOD |
+                       RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+                       RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD);
                return flags;
        }
 
@@ -1294,6 +1296,11 @@ iavf_flex_rxd_error_to_pkt_flags(uint16_t stat_err0)
        if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))
                flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD;
 
+       if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)))
+               flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD;
+       else
+               flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD;
+
        return flags;
 }
 
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c 
b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index d6243b96e2..862f6eb0c0 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -622,43 +622,88 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct 
iavf_rx_queue *rxq,
         * bit13 is for VLAN indication.
         */
        const __m256i flags_mask =
-                _mm256_set1_epi32((7 << 4) | (1 << 12) | (1 << 13));
+                _mm256_set1_epi32((0xF << 4) | (1 << 12) | (1 << 13));
        /**
         * data to be shuffled by the result of the flags mask shifted by 4
         * bits.  This gives use the l3_l4 flags.
         */
-       const __m256i l3_l4_flags_shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
-                       /* shift right 1 bit to make sure it not exceed 255 */
-                       (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
-                        RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-                       (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
-                        RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
-                       (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
-                        RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-                       (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
-                        RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
-                       (RTE_MBUF_F_RX_L4_CKSUM_BAD | 
RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-                       (RTE_MBUF_F_RX_L4_CKSUM_BAD | 
RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
-                       (RTE_MBUF_F_RX_L4_CKSUM_GOOD | 
RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-                       (RTE_MBUF_F_RX_L4_CKSUM_GOOD | 
RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
-                       /* second 128-bits */
-                       0, 0, 0, 0, 0, 0, 0, 0,
-                       (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
-                        RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-                       (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
-                        RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
-                       (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
-                        RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-                       (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
-                        RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
-                       (RTE_MBUF_F_RX_L4_CKSUM_BAD | 
RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-                       (RTE_MBUF_F_RX_L4_CKSUM_BAD | 
RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
-                       (RTE_MBUF_F_RX_L4_CKSUM_GOOD | 
RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-                       (RTE_MBUF_F_RX_L4_CKSUM_GOOD | 
RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1);
+       const __m256i l3_l4_flags_shuf =
+               _mm256_set_epi8((RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 |
+                RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD |
+                 RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+                RTE_MBUF_F_RX_L4_CKSUM_BAD | RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+                RTE_MBUF_F_RX_L4_CKSUM_GOOD | RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+                RTE_MBUF_F_RX_L4_CKSUM_GOOD | RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 
1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | 
RTE_MBUF_F_RX_L4_CKSUM_BAD  |
+                RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | 
RTE_MBUF_F_RX_L4_CKSUM_BAD  |
+                RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+                RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+                RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+                RTE_MBUF_F_RX_L4_CKSUM_BAD | RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+                RTE_MBUF_F_RX_L4_CKSUM_BAD | RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+                RTE_MBUF_F_RX_L4_CKSUM_GOOD | RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+                RTE_MBUF_F_RX_L4_CKSUM_GOOD | RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 
1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
+                RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
+                RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+                RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+                RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+               /**
+                * second 128-bits
+                * shift right 20 bits to use the low two bits to indicate
+                * outer checksum status
+                * shift right 1 bit to make sure it not exceed 255
+                */
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+                RTE_MBUF_F_RX_L4_CKSUM_BAD | RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+                RTE_MBUF_F_RX_L4_CKSUM_BAD | RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+                RTE_MBUF_F_RX_L4_CKSUM_GOOD | RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+                RTE_MBUF_F_RX_L4_CKSUM_GOOD | RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 
1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | 
RTE_MBUF_F_RX_L4_CKSUM_BAD  |
+                RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | 
RTE_MBUF_F_RX_L4_CKSUM_BAD  |
+                RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+                RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+                RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+                RTE_MBUF_F_RX_L4_CKSUM_BAD | RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+                RTE_MBUF_F_RX_L4_CKSUM_BAD | RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+                RTE_MBUF_F_RX_L4_CKSUM_GOOD | RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+                RTE_MBUF_F_RX_L4_CKSUM_GOOD | RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 
1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
+                RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
+                RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+                RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+                RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1);
        const __m256i cksum_mask =
-                _mm256_set1_epi32(RTE_MBUF_F_RX_IP_CKSUM_GOOD | 
RTE_MBUF_F_RX_IP_CKSUM_BAD |
-                                  RTE_MBUF_F_RX_L4_CKSUM_GOOD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
-                                  RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD);
+                _mm256_set1_epi32(RTE_MBUF_F_RX_IP_CKSUM_MASK |
+                                  RTE_MBUF_F_RX_L4_CKSUM_MASK |
+                                  RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+                                  RTE_MBUF_F_RX_OUTER_L4_CKSUM_MASK);
        /**
         * data to be shuffled by result of flag mask, shifted down 12.
         * If RSS(bit12)/VLAN(bit13) are set,
@@ -836,6 +881,15 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue 
*rxq,
                __m256i l3_l4_flags = _mm256_shuffle_epi8(l3_l4_flags_shuf,
                                _mm256_srli_epi32(flag_bits, 4));
                l3_l4_flags = _mm256_slli_epi32(l3_l4_flags, 1);
+               __m256i l4_outer_mask = _mm256_set1_epi32(0x6);
+               __m256i l4_outer_flags =
+                               _mm256_and_si256(l3_l4_flags, l4_outer_mask);
+               l4_outer_flags = _mm256_slli_epi32(l4_outer_flags, 20);
+
+               __m256i l3_l4_mask = _mm256_set1_epi32(~0x6);
+
+               l3_l4_flags = _mm256_and_si256(l3_l4_flags, l3_l4_mask);
+               l3_l4_flags = _mm256_or_si256(l3_l4_flags, l4_outer_flags);
                l3_l4_flags = _mm256_and_si256(l3_l4_flags, cksum_mask);
 
                /* set rss and vlan flags */
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx512.c 
b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
index 3bfec63851..b416a716cf 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
@@ -969,45 +969,105 @@ _iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct 
iavf_rx_queue *rxq,
                         * bit13 is for VLAN indication.
                         */
                        const __m256i flags_mask =
-                               _mm256_set1_epi32((7 << 4) | (1 << 12) | (1 << 
13));
+                                _mm256_set1_epi32((0xF << 4) | (1 << 12) | (1 
<< 13));
 #endif
 #ifdef IAVF_RX_CSUM_OFFLOAD
                        /**
                         * data to be shuffled by the result of the flags mask 
shifted by 4
                         * bits.  This gives use the l3_l4 flags.
                         */
-                       const __m256i l3_l4_flags_shuf = _mm256_set_epi8(0, 0, 
0, 0, 0, 0, 0, 0,
-                                       /* shift right 1 bit to make sure it 
not exceed 255 */
-                                       (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
-                                        RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-                                       (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
-                                        RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
-                                       (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
-                                        RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-                                       (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
-                                        RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
-                                       (RTE_MBUF_F_RX_L4_CKSUM_BAD | 
RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-                                       (RTE_MBUF_F_RX_L4_CKSUM_BAD | 
RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
-                                       (RTE_MBUF_F_RX_L4_CKSUM_GOOD | 
RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-                                       (RTE_MBUF_F_RX_L4_CKSUM_GOOD | 
RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
-                                       /* second 128-bits */
-                                       0, 0, 0, 0, 0, 0, 0, 0,
-                                       (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
-                                        RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-                                       (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
-                                        RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
-                                       (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
-                                        RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-                                       (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
-                                        RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
-                                       (RTE_MBUF_F_RX_L4_CKSUM_BAD | 
RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-                                       (RTE_MBUF_F_RX_L4_CKSUM_BAD | 
RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
-                                       (RTE_MBUF_F_RX_L4_CKSUM_GOOD | 
RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-                                       (RTE_MBUF_F_RX_L4_CKSUM_GOOD | 
RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1);
+                       const __m256i l3_l4_flags_shuf =
+                               
_mm256_set_epi8((RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 |
+                                RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
+                                RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 |
+                                RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
+                                RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 |
+                                RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+                                RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 |
+                                RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+                                RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 |
+                                RTE_MBUF_F_RX_L4_CKSUM_BAD | 
RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 |
+                                RTE_MBUF_F_RX_L4_CKSUM_BAD | 
RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 |
+                                RTE_MBUF_F_RX_L4_CKSUM_GOOD | 
RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 |
+                                RTE_MBUF_F_RX_L4_CKSUM_GOOD | 
RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+                                RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
+                                RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+                                RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
+                                RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+                                RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+                                RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+                                RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+                                RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+                                RTE_MBUF_F_RX_L4_CKSUM_BAD | 
RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+                                RTE_MBUF_F_RX_L4_CKSUM_BAD | 
RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+                                RTE_MBUF_F_RX_L4_CKSUM_GOOD | 
RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+                                RTE_MBUF_F_RX_L4_CKSUM_GOOD | 
RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+                               /**
+                                * second 128-bits
+                                * shift right 20 bits to use the low two bits 
to indicate
+                                * outer checksum status
+                                * shift right 1 bit to make sure it not exceed 
255
+                                */
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 |
+                                RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
+                                RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 |
+                                RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
+                                RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 |
+                                RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+                                RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 |
+                                RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+                                RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 |
+                                RTE_MBUF_F_RX_L4_CKSUM_BAD | 
RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 |
+                                RTE_MBUF_F_RX_L4_CKSUM_BAD | 
RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 |
+                                RTE_MBUF_F_RX_L4_CKSUM_GOOD | 
RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 |
+                                RTE_MBUF_F_RX_L4_CKSUM_GOOD | 
RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+                                RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
+                                RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+                                RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
+                                RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+                                RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+                                RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+                                RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+                                RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+                                RTE_MBUF_F_RX_L4_CKSUM_BAD | 
RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+                                RTE_MBUF_F_RX_L4_CKSUM_BAD | 
RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+                                RTE_MBUF_F_RX_L4_CKSUM_GOOD | 
RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+                               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+                                RTE_MBUF_F_RX_L4_CKSUM_GOOD | 
RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1);
                        const __m256i cksum_mask =
-                               _mm256_set1_epi32(RTE_MBUF_F_RX_IP_CKSUM_GOOD | 
RTE_MBUF_F_RX_IP_CKSUM_BAD |
-                                                 RTE_MBUF_F_RX_L4_CKSUM_GOOD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
-                                                 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD);
+                                _mm256_set1_epi32(RTE_MBUF_F_RX_IP_CKSUM_MASK |
+                                                  RTE_MBUF_F_RX_L4_CKSUM_MASK |
+                                                  
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+                                                  
RTE_MBUF_F_RX_OUTER_L4_CKSUM_MASK);
 #endif
 #if defined(IAVF_RX_VLAN_OFFLOAD) || defined(IAVF_RX_RSS_OFFLOAD)
                        /**
@@ -1057,6 +1117,15 @@ _iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct 
iavf_rx_queue *rxq,
                        __m256i l3_l4_flags = 
_mm256_shuffle_epi8(l3_l4_flags_shuf,
                                        _mm256_srli_epi32(flag_bits, 4));
                        l3_l4_flags = _mm256_slli_epi32(l3_l4_flags, 1);
+                       __m256i l4_outer_mask = _mm256_set1_epi32(0x6);
+                       __m256i l4_outer_flags =
+                                       _mm256_and_si256(l3_l4_flags, 
l4_outer_mask);
+                       l4_outer_flags = _mm256_slli_epi32(l4_outer_flags, 20);
+
+                       __m256i l3_l4_mask = _mm256_set1_epi32(~0x6);
+
+                       l3_l4_flags = _mm256_and_si256(l3_l4_flags, l3_l4_mask);
+                       l3_l4_flags = _mm256_or_si256(l3_l4_flags, 
l4_outer_flags);
                        l3_l4_flags = _mm256_and_si256(l3_l4_flags, cksum_mask);
 #endif
 #if defined(IAVF_RX_VLAN_OFFLOAD) || defined(IAVF_RX_RSS_OFFLOAD)
diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c 
b/drivers/net/iavf/iavf_rxtx_vec_sse.c
index 4a5232c1d2..1f477eada5 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
@@ -222,39 +222,69 @@ flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i 
descs[4],
         * bit12 for RSS indication.
         * bit13 for VLAN indication.
         */
-       const __m128i desc_mask = _mm_set_epi32(0x3070, 0x3070,
-                                               0x3070, 0x3070);
+       const __m128i desc_mask = _mm_set_epi32(0x30f0, 0x30f0,
+                                               0x30f0, 0x30f0);
 
        const __m128i cksum_mask = _mm_set_epi32(RTE_MBUF_F_RX_IP_CKSUM_MASK |
                                                 RTE_MBUF_F_RX_L4_CKSUM_MASK |
+                                                
RTE_MBUF_F_RX_OUTER_L4_CKSUM_MASK |
                                                 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD,
                                                 RTE_MBUF_F_RX_IP_CKSUM_MASK |
                                                 RTE_MBUF_F_RX_L4_CKSUM_MASK |
+                                                
RTE_MBUF_F_RX_OUTER_L4_CKSUM_MASK |
                                                 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD,
                                                 RTE_MBUF_F_RX_IP_CKSUM_MASK |
                                                 RTE_MBUF_F_RX_L4_CKSUM_MASK |
+                                                
RTE_MBUF_F_RX_OUTER_L4_CKSUM_MASK |
                                                 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD,
                                                 RTE_MBUF_F_RX_IP_CKSUM_MASK |
                                                 RTE_MBUF_F_RX_L4_CKSUM_MASK |
+                                                
RTE_MBUF_F_RX_OUTER_L4_CKSUM_MASK |
                                                 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD);
 
        /* map the checksum, rss and vlan fields to the checksum, rss
         * and vlan flag
         */
-       const __m128i cksum_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
-                       /* shift right 1 bit to make sure it not exceed 255 */
-                       (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
-                        RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-                       (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
-                        RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
-                       (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
-                        RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-                       (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
-                        RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
-                       (RTE_MBUF_F_RX_L4_CKSUM_BAD | 
RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-                       (RTE_MBUF_F_RX_L4_CKSUM_BAD | 
RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
-                       (RTE_MBUF_F_RX_L4_CKSUM_GOOD | 
RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-                       (RTE_MBUF_F_RX_L4_CKSUM_GOOD | 
RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1);
+       const __m128i cksum_flags =
+               _mm_set_epi8((RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 |
+                RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD |
+                 RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+                RTE_MBUF_F_RX_L4_CKSUM_BAD | RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+                RTE_MBUF_F_RX_L4_CKSUM_GOOD | RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+                RTE_MBUF_F_RX_L4_CKSUM_GOOD | RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 
1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
+                RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
+                RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+                RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD >> 20 | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+                RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+               /**
+                * shift right 20 bits to use the low two bits to indicate
+                * outer checksum status
+                * shift right 1 bit to make sure it not exceed 255
+                */
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+                RTE_MBUF_F_RX_L4_CKSUM_BAD | RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+                RTE_MBUF_F_RX_L4_CKSUM_BAD | RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+                RTE_MBUF_F_RX_L4_CKSUM_GOOD | RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | 
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+                RTE_MBUF_F_RX_L4_CKSUM_GOOD | RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 
1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
+                RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
+                RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+                RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
+               (RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD >> 20 | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+                RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1);
+
 
        const __m128i rss_vlan_flags = _mm_set_epi8(0, 0, 0, 0,
                        0, 0, 0, 0,
@@ -274,6 +304,13 @@ flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i 
descs[4],
        flags = _mm_shuffle_epi8(cksum_flags, tmp_desc);
        /* then we shift left 1 bit */
        flags = _mm_slli_epi32(flags, 1);
+       __m128i l4_outer_mask = _mm_set_epi32(0x6, 0x6, 0x6, 0x6);
+       __m128i l4_outer_flags = _mm_and_si128(flags, l4_outer_mask);
+       l4_outer_flags = _mm_slli_epi32(l4_outer_flags, 20);
+
+       __m128i l3_l4_mask = _mm_set_epi32(~0x6, ~0x6, ~0x6, ~0x6);
+       __m128i l3_l4_flags = _mm_and_si128(flags, l3_l4_mask);
+       flags = _mm_or_si128(l3_l4_flags, l4_outer_flags);
        /* we need to mask out the redundant bits introduced by RSS or
         * VLAN fields.
         */
@@ -325,10 +362,10 @@ flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i 
descs[4],
         * appropriate flags means that we have to do a shift and blend for
         * each mbuf before we do the write.
         */
-       rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 8), 0x10);
-       rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 4), 0x10);
-       rearm2 = _mm_blend_epi16(mbuf_init, flags, 0x10);
-       rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(flags, 4), 0x10);
+       rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 8), 0x30);
+       rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 4), 0x30);
+       rearm2 = _mm_blend_epi16(mbuf_init, flags, 0x30);
+       rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(flags, 4), 0x30);
 
        /* write the rearm data and the olflags in one write */
        RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
-- 
2.25.1

Reply via email to