Calculate the number of scatters on the fly according to
the maximum expected packet size.

Signed-off-by: Vasily Philipov <vasi...@mellanox.com>
Signed-off-by: Ophir Munk <ophi...@mellanox.com>
---
 drivers/net/mlx4/mlx4_rxq.c  | 64 +++++++++++++++++++++++++++++++++++++-------
 drivers/net/mlx4/mlx4_rxtx.c | 11 +++++---
 drivers/net/mlx4/mlx4_rxtx.h |  1 +
 3 files changed, 62 insertions(+), 14 deletions(-)

diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
index 9b98d86..44d095d 100644
--- a/drivers/net/mlx4/mlx4_rxq.c
+++ b/drivers/net/mlx4/mlx4_rxq.c
@@ -78,6 +78,7 @@
 mlx4_rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n)
 {
        unsigned int i;
+       const uint32_t sges_n = 1 << rxq->sges_n;
        struct rte_mbuf *(*elts)[elts_n] =
                rte_calloc_socket("RXQ", 1, sizeof(*elts), 0, rxq->socket);
 
@@ -101,6 +102,9 @@
                /* Buffer is supposed to be empty. */
                assert(rte_pktmbuf_data_len(buf) == 0);
                assert(rte_pktmbuf_pkt_len(buf) == 0);
+               /* Only the first segment keeps headroom. */
+               if (i % sges_n)
+                       buf->data_off = 0;
                buf->port = rxq->port_id;
                buf->data_len = rte_pktmbuf_tailroom(buf);
                buf->pkt_len = rte_pktmbuf_tailroom(buf);
@@ -113,8 +117,8 @@
                };
                (*elts)[i] = buf;
        }
-       DEBUG("%p: allocated and configured %u single-segment WRs",
-             (void *)rxq, elts_n);
+       DEBUG("%p: allocated and configured %u segments (max %u packets)",
+             (void *)rxq, elts_n, elts_n >> rxq->sges_n);
        rxq->elts_n = log2above(elts_n);
        rxq->elts = elts;
        return 0;
@@ -185,12 +189,15 @@
  *   Completion queue to associate with QP.
  * @param desc
  *   Number of descriptors in QP (hint only).
+ * @param sges_n
+ *   Maximum number of segments per packet.
  *
  * @return
  *   QP pointer or NULL in case of error and rte_errno is set.
  */
 static struct ibv_qp *
-mlx4_rxq_setup_qp(struct priv *priv, struct ibv_cq *cq, uint16_t desc)
+mlx4_rxq_setup_qp(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
+                 uint32_t sges_n)
 {
        struct ibv_qp *qp;
        struct ibv_qp_init_attr attr = {
@@ -204,7 +211,7 @@
                                        priv->device_attr.max_qp_wr :
                                        desc),
                        /* Maximum number of segments per packet. */
-                       .max_recv_sge = 1,
+                       .max_recv_sge = sges_n,
                },
                .qp_type = IBV_QPT_RAW_PACKET,
        };
@@ -263,11 +270,31 @@
        assert(mb_len >= RTE_PKTMBUF_HEADROOM);
        if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
            (mb_len - RTE_PKTMBUF_HEADROOM)) {
-               ;
+               tmpl.sges_n = 0;
        } else if (dev->data->dev_conf.rxmode.enable_scatter) {
-               WARN("%p: scattered mode has been requested but is"
-                    " not supported, this may lead to packet loss",
-                    (void *)dev);
+               uint32_t size =
+                       RTE_PKTMBUF_HEADROOM +
+                       dev->data->dev_conf.rxmode.max_rx_pkt_len;
+               uint32_t sges_n;
+
+               /*
+                * Determine the number of SGEs needed for a full packet
+                * and round it to the next power of two.
+                */
+               sges_n = log2above((size / mb_len) + !!(size % mb_len));
+               tmpl.sges_n = sges_n;
+               /* Make sure sges_n did not overflow. */
+               size = mb_len * (1 << tmpl.sges_n);
+               size -= RTE_PKTMBUF_HEADROOM;
+               if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
+                       rte_errno = EOVERFLOW;
+                       ERROR("%p: too many SGEs (%u) needed to handle"
+                             " requested maximum packet size %u",
+                             (void *)dev,
+                             1 << sges_n,
+                             dev->data->dev_conf.rxmode.max_rx_pkt_len);
+                       goto error;
+               }
        } else {
                WARN("%p: the requested maximum Rx packet size (%u) is"
                     " larger than a single mbuf (%u) and scattered"
@@ -276,6 +303,17 @@
                     dev->data->dev_conf.rxmode.max_rx_pkt_len,
                     mb_len - RTE_PKTMBUF_HEADROOM);
        }
+       DEBUG("%p: maximum number of segments per packet: %u",
+             (void *)dev, 1 << tmpl.sges_n);
+       if (desc % (1 << tmpl.sges_n)) {
+               rte_errno = EINVAL;
+               ERROR("%p: number of RX queue descriptors (%u) is not a"
+                     " multiple of maximum segments per packet (%u)",
+                     (void *)dev,
+                     desc,
+                     1 << tmpl.sges_n);
+               goto error;
+       }
        /* Use the entire Rx mempool as the memory region. */
        tmpl.mr = mlx4_mp2mr(priv->pd, mp);
        if (tmpl.mr == NULL) {
@@ -300,7 +338,8 @@
                        goto error;
                }
        }
-       tmpl.cq = ibv_create_cq(priv->ctx, desc, NULL, tmpl.channel, 0);
+       tmpl.cq = ibv_create_cq(priv->ctx, desc >> tmpl.sges_n, NULL,
+                               tmpl.channel, 0);
        if (tmpl.cq == NULL) {
                rte_errno = ENOMEM;
                ERROR("%p: CQ creation failure: %s",
@@ -311,7 +350,8 @@
              priv->device_attr.max_qp_wr);
        DEBUG("priv->device_attr.max_sge is %d",
              priv->device_attr.max_sge);
-       tmpl.qp = mlx4_rxq_setup_qp(priv, tmpl.cq, desc);
+       tmpl.qp = mlx4_rxq_setup_qp(priv, tmpl.cq, desc >> tmpl.sges_n,
+                                   1 << tmpl.sges_n);
        if (tmpl.qp == NULL) {
                ERROR("%p: QP creation failure: %s",
                      (void *)dev, strerror(rte_errno));
@@ -373,6 +413,10 @@
        mlx4_rxq_cleanup(rxq);
        *rxq = tmpl;
        DEBUG("%p: rxq updated with %p", (void *)rxq, (void *)&tmpl);
+       /* Update doorbell counter. */
+       rxq->rq_ci = desc >> rxq->sges_n;
+       rte_wmb();
+       *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
        return 0;
 error:
        ret = rte_errno;
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index 5c1b8ef..fd8ef7b 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -563,10 +563,11 @@
 {
        struct rxq *rxq = dpdk_rxq;
        const uint32_t wr_cnt = (1 << rxq->elts_n) - 1;
+       const uint16_t sges_n = rxq->sges_n;
        struct rte_mbuf *pkt = NULL;
        struct rte_mbuf *seg = NULL;
        unsigned int i = 0;
-       uint32_t rq_ci = rxq->rq_ci;
+       uint32_t rq_ci = rxq->rq_ci << sges_n;
        int len = 0;
 
        while (pkts_n) {
@@ -646,13 +647,15 @@
                --pkts_n;
                ++i;
 skip:
-               /* Update consumer index */
+               /* Align consumer index to the next stride. */
+               rq_ci >>= sges_n;
                ++rq_ci;
+               rq_ci <<= sges_n;
        }
-       if (unlikely(i == 0 && rq_ci == rxq->rq_ci))
+       if (unlikely(i == 0 && (rq_ci >> sges_n) == rxq->rq_ci))
                return 0;
        /* Update the consumer index. */
-       rxq->rq_ci = rq_ci;
+       rxq->rq_ci = rq_ci >> sges_n;
        rte_wmb();
        *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
        *rxq->mcq.set_ci_db = rte_cpu_to_be_32(rxq->mcq.cons_index & 0xffffff);
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index 939ae75..ac84177 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -72,6 +72,7 @@ struct rxq {
        struct ibv_comp_channel *channel; /**< Rx completion channel. */
        uint16_t rq_ci; /**< Saved RQ consumer index. */
        uint16_t port_id; /**< Port ID for incoming packets. */
+       uint16_t sges_n; /**< Number of segments per packet (log2 value). */
        uint16_t elts_n; /**< Mbuf queue size (log2 value). */
        struct rte_mbuf *(*elts)[]; /**< Rx elements. */
        volatile struct mlx4_wqe_data_seg (*wqes)[]; /**< HW queue entries. */
-- 
1.8.3.1

Reply via email to