Using a global mbuf dynamic field for metadata incurs some
performance penalty on a datapath. Store this information in
the Rx queue descriptor for a better cache locality.

Fixes: a18ac6113331 ("net/mlx5: add metadata support to Rx datapath")
Cc: sta...@dpdk.org

Signed-off-by: Alexander Kozyrev <akozy...@mellanox.com>
Acked-by: Viacheslav Ovsiienko <viachesl...@mellanox.com>
---
 drivers/net/mlx5/mlx5.h                  |  1 +
 drivers/net/mlx5/mlx5_flow.c             | 29 +++++++++++++++++++
 drivers/net/mlx5/mlx5_rxtx.c             |  7 +++--
 drivers/net/mlx5/mlx5_rxtx.h             |  4 ++-
 drivers/net/mlx5/mlx5_rxtx_vec_altivec.h | 27 +++++++++++-------
 drivers/net/mlx5/mlx5_rxtx_vec_neon.h    | 49 ++++++++++++++++++--------------
 drivers/net/mlx5/mlx5_rxtx_vec_sse.h     | 49 ++++++++++++++++++--------------
 drivers/net/mlx5/mlx5_trigger.c          |  2 ++
 lib/librte_ethdev/rte_flow.c             |  2 +-
 lib/librte_ethdev/rte_flow.h             |  2 +-
 10 files changed, 113 insertions(+), 59 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 318618e..16c60c8 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -753,6 +753,7 @@ int mlx5_counter_query(struct rte_eth_dev *dev, uint32_t 
cnt,
                       bool clear, uint64_t *pkts, uint64_t *bytes);
 int mlx5_flow_dev_dump(struct rte_eth_dev *dev, FILE *file,
                       struct rte_flow_error *error);
+void mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev);
 
 /* mlx5_mp.c */
 int mlx5_mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer);
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index c529aa3..70e42a6 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -894,6 +894,35 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev 
*dev, int32_t priority,
        }
 }
 
+/**
+ * Set the Rx queue dynamic metadata (mask and offset) for a flow
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ */
+void
+mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+       struct mlx5_rxq_data *data;
+       unsigned int i;
+
+       for (i = 0; i != priv->rxqs_n; ++i) {
+               if (!(*priv->rxqs)[i])
+                       continue;
+               data = (*priv->rxqs)[i];
+               if (!rte_flow_dynf_metadata_avail()) {
+                       data->dynf_meta = 0;
+                       data->flow_meta_mask = 0;
+                       data->flow_meta_offset = -1;
+               } else {
+                       data->dynf_meta = 1;
+                       data->flow_meta_mask = rte_flow_dynf_metadata_mask;
+                       data->flow_meta_offset = rte_flow_dynf_metadata_offs;
+               }
+       }
+}
+
 /*
  * return a pointer to the desired action in the list of actions.
  *
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 3e583d4..0010b42 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -1339,9 +1339,10 @@ enum mlx5_txcmp_code {
                        pkt->hash.fdir.hi = mlx5_flow_mark_get(mark);
                }
        }
-       if (rte_flow_dynf_metadata_avail() && cqe->flow_table_metadata) {
-               pkt->ol_flags |= PKT_RX_DYNF_METADATA;
-               *RTE_FLOW_DYNF_METADATA(pkt) = cqe->flow_table_metadata;
+       if (rxq->dynf_meta && cqe->flow_table_metadata) {
+               pkt->ol_flags |= rxq->flow_meta_mask;
+               *RTE_MBUF_DYNFIELD(pkt, rxq->flow_meta_offset, uint32_t *) =
+                       cqe->flow_table_metadata;
        }
        if (rxq->csum)
                pkt->ol_flags |= rxq_cq_to_ol_flags(cqe);
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 8695218..48f2b79 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -121,7 +121,7 @@ struct mlx5_rxq_data {
        unsigned int err_state:2; /* enum mlx5_rxq_err_state. */
        unsigned int strd_scatter_en:1; /* Scattered packets from a stride. */
        unsigned int lro:1; /* Enable LRO. */
-       unsigned int :1; /* Remaining bits. */
+       unsigned int dynf_meta:1; /* Dynamic metadata is configured. */
        volatile uint32_t *rq_db;
        volatile uint32_t *cq_db;
        uint16_t port_id;
@@ -159,6 +159,8 @@ struct mlx5_rxq_data {
        /* CQ (UAR) access lock required for 32bit implementations */
 #endif
        uint32_t tunnel; /* Tunnel information. */
+       uint64_t flow_meta_mask;
+       int32_t flow_meta_offset;
 } __rte_cache_aligned;
 
 enum mlx5_rxq_obj_type {
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h 
b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
index 0bb8b7a..03c0be2 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
@@ -264,17 +264,22 @@
                        elts[pos + 2]->hash.fdir.hi = flow_tag;
                        elts[pos + 3]->hash.fdir.hi = flow_tag;
                }
-               if (rte_flow_dynf_metadata_avail()) {
-                       const uint32_t meta = *RTE_FLOW_DYNF_METADATA(t_pkt);
+               if (!!rxq->flow_meta_mask) {
+                       int32_t offs = rxq->flow_meta_offset;
+                       const uint32_t meta =
+                               *RTE_MBUF_DYNFIELD(t_pkt, offs, uint32_t *);
 
                        /* Check if title packet has valid metadata. */
                        if (meta) {
-                               MLX5_ASSERT(t_pkt->ol_flags &
-                                           PKT_RX_DYNF_METADATA);
-                               *RTE_FLOW_DYNF_METADATA(elts[pos]) = meta;
-                               *RTE_FLOW_DYNF_METADATA(elts[pos + 1]) = meta;
-                               *RTE_FLOW_DYNF_METADATA(elts[pos + 2]) = meta;
-                               *RTE_FLOW_DYNF_METADATA(elts[pos + 3]) = meta;
+                               MLX5_ASSERT(t_pkt->ol_flags & offs);
+                               *RTE_MBUF_DYNFIELD(elts[pos], offs,
+                                                       uint32_t *) = meta;
+                               *RTE_MBUF_DYNFIELD(elts[pos + 1], offs,
+                                                       uint32_t *) = meta;
+                               *RTE_MBUF_DYNFIELD(elts[pos + 2], offs,
+                                                       uint32_t *) = meta;
+                               *RTE_MBUF_DYNFIELD(elts[pos + 3], offs,
+                                                       uint32_t *) = meta;
                        }
                }
 
@@ -1023,9 +1028,9 @@
                        pkts[pos + 3]->timestamp =
                                rte_be_to_cpu_64(cq[pos + p3].timestamp);
                }
-               if (rte_flow_dynf_metadata_avail()) {
-                       uint64_t flag = rte_flow_dynf_metadata_mask;
-                       int offs = rte_flow_dynf_metadata_offs;
+               if (!!rxq->flow_meta_mask) {
+                       uint64_t flag = rxq->flow_meta_mask;
+                       int32_t offs = rxq->flow_meta_offset;
                        uint32_t metadata;
 
                        /* This code is subject for futher optimization. */
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h 
b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
index 7b6c5db..d39e726 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
@@ -205,17 +205,22 @@
                        elts[pos + 2]->hash.fdir.hi = flow_tag;
                        elts[pos + 3]->hash.fdir.hi = flow_tag;
                }
-               if (rte_flow_dynf_metadata_avail()) {
-                       const uint32_t meta = *RTE_FLOW_DYNF_METADATA(t_pkt);
+               if (!!rxq->flow_meta_mask) {
+                       int32_t offs = rxq->flow_meta_offset;
+                       const uint32_t meta =
+                               *RTE_MBUF_DYNFIELD(t_pkt, offs, uint32_t *);
 
                        /* Check if title packet has valid metadata. */
                        if (meta) {
-                               MLX5_ASSERT(t_pkt->ol_flags &
-                                           PKT_RX_DYNF_METADATA);
-                               *RTE_FLOW_DYNF_METADATA(elts[pos]) = meta;
-                               *RTE_FLOW_DYNF_METADATA(elts[pos + 1]) = meta;
-                               *RTE_FLOW_DYNF_METADATA(elts[pos + 2]) = meta;
-                               *RTE_FLOW_DYNF_METADATA(elts[pos + 3]) = meta;
+                               MLX5_ASSERT(t_pkt->ol_flags & offs);
+                               *RTE_MBUF_DYNFIELD(elts[pos], offs,
+                                                       uint32_t *) = meta;
+                               *RTE_MBUF_DYNFIELD(elts[pos + 1], offs,
+                                                       uint32_t *) = meta;
+                               *RTE_MBUF_DYNFIELD(elts[pos + 2], offs,
+                                                       uint32_t *) = meta;
+                               *RTE_MBUF_DYNFIELD(elts[pos + 3], offs,
+                                                       uint32_t *) = meta;
                        }
                }
                pos += MLX5_VPMD_DESCS_PER_LOOP;
@@ -701,28 +706,30 @@
                                        container_of(p3, struct mlx5_cqe,
                                                     pkt_info)->timestamp);
                }
-               if (rte_flow_dynf_metadata_avail()) {
+               if (!!rxq->flow_meta_mask) {
                        /* This code is subject for futher optimization. */
-                       *RTE_FLOW_DYNF_METADATA(elts[pos]) =
+                       int32_t offs = rxq->flow_meta_offset;
+
+                       *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) =
                                container_of(p0, struct mlx5_cqe,
                                             pkt_info)->flow_table_metadata;
-                       *RTE_FLOW_DYNF_METADATA(elts[pos + 1]) =
+                       *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) =
                                container_of(p1, struct mlx5_cqe,
                                             pkt_info)->flow_table_metadata;
-                       *RTE_FLOW_DYNF_METADATA(elts[pos + 2]) =
+                       *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) =
                                container_of(p2, struct mlx5_cqe,
                                             pkt_info)->flow_table_metadata;
-                       *RTE_FLOW_DYNF_METADATA(elts[pos + 3]) =
+                       *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) =
                                container_of(p3, struct mlx5_cqe,
                                             pkt_info)->flow_table_metadata;
-                       if (*RTE_FLOW_DYNF_METADATA(elts[pos]))
-                               elts[pos]->ol_flags |= PKT_RX_DYNF_METADATA;
-                       if (*RTE_FLOW_DYNF_METADATA(elts[pos + 1]))
-                               elts[pos + 1]->ol_flags |= PKT_RX_DYNF_METADATA;
-                       if (*RTE_FLOW_DYNF_METADATA(elts[pos + 2]))
-                               elts[pos + 2]->ol_flags |= PKT_RX_DYNF_METADATA;
-                       if (*RTE_FLOW_DYNF_METADATA(elts[pos + 3]))
-                               elts[pos + 3]->ol_flags |= PKT_RX_DYNF_METADATA;
+                       if (*RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *))
+                               elts[pos]->ol_flags |= rxq->flow_meta_mask;
+                       if (*RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *))
+                               elts[pos + 1]->ol_flags |= rxq->flow_meta_mask;
+                       if (*RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *))
+                               elts[pos + 2]->ol_flags |= rxq->flow_meta_mask;
+                       if (*RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *))
+                               elts[pos + 3]->ol_flags |= rxq->flow_meta_mask;
                }
 #ifdef MLX5_PMD_SOFT_COUNTERS
                /* Add up received bytes count. */
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h 
b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
index 4b711f0..f110f73 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
@@ -192,17 +192,22 @@
                        elts[pos + 2]->hash.fdir.hi = flow_tag;
                        elts[pos + 3]->hash.fdir.hi = flow_tag;
                }
-               if (rte_flow_dynf_metadata_avail()) {
-                       const uint32_t meta = *RTE_FLOW_DYNF_METADATA(t_pkt);
+               if (rxq->dynf_meta) {
+                       int32_t offs = rxq->flow_meta_offset;
+                       const uint32_t meta =
+                               *RTE_MBUF_DYNFIELD(t_pkt, offs, uint32_t *);
 
                        /* Check if title packet has valid metadata. */
                        if (meta) {
-                               MLX5_ASSERT(t_pkt->ol_flags &
-                                           PKT_RX_DYNF_METADATA);
-                               *RTE_FLOW_DYNF_METADATA(elts[pos]) = meta;
-                               *RTE_FLOW_DYNF_METADATA(elts[pos + 1]) = meta;
-                               *RTE_FLOW_DYNF_METADATA(elts[pos + 2]) = meta;
-                               *RTE_FLOW_DYNF_METADATA(elts[pos + 3]) = meta;
+                               MLX5_ASSERT(t_pkt->ol_flags & offs);
+                               *RTE_MBUF_DYNFIELD(elts[pos], offs,
+                                                       uint32_t *) = meta;
+                               *RTE_MBUF_DYNFIELD(elts[pos + 1], offs,
+                                                       uint32_t *) = meta;
+                               *RTE_MBUF_DYNFIELD(elts[pos + 2], offs,
+                                                       uint32_t *) = meta;
+                               *RTE_MBUF_DYNFIELD(elts[pos + 3], offs,
+                                                       uint32_t *) = meta;
                        }
                }
                pos += MLX5_VPMD_DESCS_PER_LOOP;
@@ -654,24 +659,26 @@
                        pkts[pos + 3]->timestamp =
                                rte_be_to_cpu_64(cq[pos + p3].timestamp);
                }
-               if (rte_flow_dynf_metadata_avail()) {
+               if (rxq->dynf_meta) {
                        /* This code is subject for futher optimization. */
-                       *RTE_FLOW_DYNF_METADATA(pkts[pos]) =
+                       int32_t offs = rxq->flow_meta_offset;
+
+                       *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) =
                                cq[pos].flow_table_metadata;
-                       *RTE_FLOW_DYNF_METADATA(pkts[pos + 1]) =
+                       *RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *) =
                                cq[pos + p1].flow_table_metadata;
-                       *RTE_FLOW_DYNF_METADATA(pkts[pos + 2]) =
+                       *RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *) =
                                cq[pos + p2].flow_table_metadata;
-                       *RTE_FLOW_DYNF_METADATA(pkts[pos + 3]) =
+                       *RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *) =
                                cq[pos + p3].flow_table_metadata;
-                       if (*RTE_FLOW_DYNF_METADATA(pkts[pos]))
-                               pkts[pos]->ol_flags |= PKT_RX_DYNF_METADATA;
-                       if (*RTE_FLOW_DYNF_METADATA(pkts[pos + 1]))
-                               pkts[pos + 1]->ol_flags |= PKT_RX_DYNF_METADATA;
-                       if (*RTE_FLOW_DYNF_METADATA(pkts[pos + 2]))
-                               pkts[pos + 2]->ol_flags |= PKT_RX_DYNF_METADATA;
-                       if (*RTE_FLOW_DYNF_METADATA(pkts[pos + 3]))
-                               pkts[pos + 3]->ol_flags |= PKT_RX_DYNF_METADATA;
+                       if (*RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *))
+                               pkts[pos]->ol_flags |= rxq->flow_meta_mask;
+                       if (*RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *))
+                               pkts[pos + 1]->ol_flags |= rxq->flow_meta_mask;
+                       if (*RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *))
+                               pkts[pos + 2]->ol_flags |= rxq->flow_meta_mask;
+                       if (*RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *))
+                               pkts[pos + 3]->ol_flags |= rxq->flow_meta_mask;
                }
 #ifdef MLX5_PMD_SOFT_COUNTERS
                /* Add up received bytes count. */
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 7596704..feb9154 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -323,6 +323,8 @@
                        dev->data->port_id);
                goto error;
        }
+       /* Set a mask and offset of dynamic metadata flows into Rx queues*/
+       mlx5_flow_rxq_dynf_metadata_set(dev);
        /*
         * In non-cached mode, it only needs to start the default mreg copy
         * action and no flow created by application exists anymore.
diff --git a/lib/librte_ethdev/rte_flow.c b/lib/librte_ethdev/rte_flow.c
index 3699edc..1685be5 100644
--- a/lib/librte_ethdev/rte_flow.c
+++ b/lib/librte_ethdev/rte_flow.c
@@ -19,7 +19,7 @@
 #include "rte_flow.h"
 
 /* Mbuf dynamic field name for metadata. */
-int rte_flow_dynf_metadata_offs = -1;
+int32_t rte_flow_dynf_metadata_offs = -1;
 
 /* Mbuf dynamic field flag bit number for metadata. */
 uint64_t rte_flow_dynf_metadata_mask;
diff --git a/lib/librte_ethdev/rte_flow.h b/lib/librte_ethdev/rte_flow.h
index fab44f6..132b44e 100644
--- a/lib/librte_ethdev/rte_flow.h
+++ b/lib/librte_ethdev/rte_flow.h
@@ -2653,7 +2653,7 @@ struct rte_flow_action_set_dscp {
 };
 
 /* Mbuf dynamic field offset for metadata. */
-extern int rte_flow_dynf_metadata_offs;
+extern int32_t rte_flow_dynf_metadata_offs;
 
 /* Mbuf dynamic field flag mask for metadata. */
 extern uint64_t rte_flow_dynf_metadata_mask;
-- 
1.8.3.1

Reply via email to