Currently, we store metadata in BE and need to change
it to CPU endian when attaching to mbuf.

For dv_xmeta_en = 2, no bit shift.
For dv_xmeta_en = 1, 16 bits shift.

For the non-vector mode, endian change should be first,
then bit shift.

For the vector mode, the behavior is right. In this update,
we use __builtin_clz instead of __builtin_popcount to
avoid confusing.

Fixes: 743ac28ddf0b ("net/mlx5: fix metadata calculation in legacy mode")
Cc: sta...@dpdk.org

Signed-off-by: rongwei liu <rongw...@nvidia.com>
---
 drivers/net/mlx5/mlx5_rx.c               | 4 ++--
 drivers/net/mlx5/mlx5_rxtx_vec_altivec.h | 2 +-
 drivers/net/mlx5/mlx5_rxtx_vec_neon.h    | 2 +-
 drivers/net/mlx5/mlx5_rxtx_vec_sse.h     | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
index 777a1d6e45..012745dc26 100644
--- a/drivers/net/mlx5/mlx5_rx.c
+++ b/drivers/net/mlx5/mlx5_rx.c
@@ -740,8 +740,8 @@ rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf 
*pkt,
                }
        }
        if (rxq->dynf_meta) {
-               uint32_t meta = rte_be_to_cpu_32(cqe->flow_table_metadata >>
-                       __builtin_popcount(rxq->flow_meta_port_mask)) &
+               uint32_t meta = (rte_be_to_cpu_32(cqe->flow_table_metadata) >>
+                       __builtin_clz(rxq->flow_meta_port_mask)) &
                        rxq->flow_meta_port_mask;
 
                if (meta) {
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h 
b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
index 648c59e2c2..4a0da1bd3b 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
@@ -1223,7 +1223,7 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile 
struct mlx5_cqe *cq,
                        int32_t offs = rxq->flow_meta_offset;
                        uint32_t mask = rxq->flow_meta_port_mask;
                        uint32_t shift =
-                               __builtin_popcount(rxq->flow_meta_port_mask);
+                               __builtin_clz(rxq->flow_meta_port_mask);
                        uint32_t metadata;
 
                        /* This code is subject for futher optimization. */
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h 
b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
index 5c569ee199..38cbcb6fdc 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
@@ -834,7 +834,7 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct 
mlx5_cqe *cq,
                        int32_t offs = rxq->flow_meta_offset;
                        uint32_t mask = rxq->flow_meta_port_mask;
                        uint32_t shift =
-                               __builtin_popcount(rxq->flow_meta_port_mask);
+                               __builtin_clz(rxq->flow_meta_port_mask);
 
                        *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) =
                                (rte_be_to_cpu_32(container_of
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h 
b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
index 661fa7273c..480583acbb 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
@@ -770,7 +770,7 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct 
mlx5_cqe *cq,
                        int32_t offs = rxq->flow_meta_offset;
                        uint32_t mask = rxq->flow_meta_port_mask;
                        uint32_t shift =
-                               __builtin_popcount(rxq->flow_meta_port_mask);
+                               __builtin_clz(rxq->flow_meta_port_mask);
 
                        *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) =
                                (rte_be_to_cpu_32
-- 
2.27.0

Reply via email to