> -----Original Message-----
> From: Alexander Kozyrev <akozy...@mellanox.com>
> Sent: Wednesday, April 1, 2020 0:53
> To: dev@dpdk.org
> Cc: Raslan Darawsheh <rasl...@mellanox.com>; Matan Azrad
> <ma...@mellanox.com>; Slava Ovsiienko <viachesl...@mellanox.com>;
> ferruh.yi...@intel.com; Thomas Monjalon <tho...@monjalon.net>
> Subject: [PATCH 2/4] net/mlx5: enable MPRQ multi-stride operations
> 
> MPRQ feature should be updated to allow a packet to be received into
> multiple strides in order to support the MTU exceeding 8KB.
> Special care is needed to prevent the headroom corruption in the multi-stride
> mode since the headroom space is borrowed by the PMD from the tail of the
> preceding stride. Copy the whole packet into a separate mbuf in this case or
> just the overlapping data if the Rx scattering is supported by an application.
> 
> Signed-off-by: Alexander Kozyrev <akozy...@mellanox.com>
Acked-by: Viacheslav Ovsiienko <viachesl...@mellanox.com>

> ---
>  drivers/net/mlx5/mlx5_rxq.c  | 25 ++++------------
> drivers/net/mlx5/mlx5_rxtx.c | 68 +++++++++++++++++++-------------------------
>  drivers/net/mlx5/mlx5_rxtx.h |  2 +-
>  3 files changed, 35 insertions(+), 60 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index
> 85fcfe6..a64f536 100644
> --- a/drivers/net/mlx5/mlx5_rxq.c
> +++ b/drivers/net/mlx5/mlx5_rxq.c
> @@ -1793,9 +1793,7 @@ struct mlx5_rxq_ctrl *
>       struct mlx5_priv *priv = dev->data->dev_private;
>       struct mlx5_rxq_ctrl *tmpl;
>       unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
> -     unsigned int mprq_stride_size;
>       struct mlx5_dev_config *config = &priv->config;
> -     unsigned int strd_headroom_en;
>       /*
>        * Always allocate extra slots, even if eventually
>        * the vector Rx will not be used.
> @@ -1841,27 +1839,13 @@ struct mlx5_rxq_ctrl *
>       tmpl->socket = socket;
>       if (dev->data->dev_conf.intr_conf.rxq)
>               tmpl->irq = 1;
> -     /*
> -      * LRO packet may consume all the stride memory, hence we cannot
> -      * guaranty head-room near the packet memory in the stride.
> -      * In this case scatter is, for sure, enabled and an empty mbuf may be
> -      * added in the start for the head-room.
> -      */
> -     if (lro_on_queue && RTE_PKTMBUF_HEADROOM > 0 &&
> -         non_scatter_min_mbuf_size > mb_len) {
> -             strd_headroom_en = 0;
> -             mprq_stride_size = RTE_MIN(max_rx_pkt_len,
> -                                     1u << config-
> >mprq.max_stride_size_n);
> -     } else {
> -             strd_headroom_en = 1;
> -             mprq_stride_size = non_scatter_min_mbuf_size;
> -     }
>       if (!config->mprq.stride_num_n)
>               config->mprq.stride_num_n = MLX5_MPRQ_STRIDE_NUM_N;
>       if (!config->mprq.stride_size_n)
> -             config->mprq.stride_size_n = (mprq_stride_size <=
> +             config->mprq.stride_size_n = (non_scatter_min_mbuf_size <=
>                               (1U << config->mprq.max_stride_size_n)) ?
> -                     log2above(mprq_stride_size) :
> MLX5_MPRQ_STRIDE_SIZE_N;
> +                     log2above(non_scatter_min_mbuf_size) :
> +                     MLX5_MPRQ_STRIDE_SIZE_N;
>       /*
>        * This Rx queue can be configured as a Multi-Packet RQ if all of the
>        * following conditions are met:
> @@ -1877,7 +1861,8 @@ struct mlx5_rxq_ctrl *
>               tmpl->rxq.strd_num_n = config->mprq.stride_num_n;
>               tmpl->rxq.strd_sz_n = config->mprq.stride_size_n;
>               tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT;
> -             tmpl->rxq.strd_headroom_en = strd_headroom_en;
> +             tmpl->rxq.strd_scatter_en =
> +                             !!(offloads & DEV_RX_OFFLOAD_SCATTER);
>               tmpl->rxq.mprq_max_memcpy_len =
> RTE_MIN(first_mb_free_size,
>                               config->mprq.max_memcpy_len);
>               max_lro_size = RTE_MIN(max_rx_pkt_len, diff --git
> a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index
> f3bf763..4c27952 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.c
> +++ b/drivers/net/mlx5/mlx5_rxtx.c
> @@ -1658,21 +1658,20 @@ enum mlx5_txcmp_code {
>       unsigned int i = 0;
>       uint32_t rq_ci = rxq->rq_ci;
>       uint16_t consumed_strd = rxq->consumed_strd;
> -     uint16_t headroom_sz = rxq->strd_headroom_en *
> RTE_PKTMBUF_HEADROOM;
>       struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
> 
>       while (i < pkts_n) {
>               struct rte_mbuf *pkt;
>               void *addr;
>               int ret;
> -             unsigned int len;
> +             uint32_t len;
>               uint16_t strd_cnt;
>               uint16_t strd_idx;
>               uint32_t offset;
>               uint32_t byte_cnt;
> +             int32_t hdrm_overlap;
>               volatile struct mlx5_mini_cqe8 *mcqe = NULL;
>               uint32_t rss_hash_res = 0;
> -             uint8_t lro_num_seg;
> 
>               if (consumed_strd == strd_n) {
>                       /* Replace WQE only if the buffer is still in use. */
> @@ -1719,18 +1718,6 @@ enum mlx5_txcmp_code {
>               MLX5_ASSERT(strd_idx < strd_n);
>               MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) &
>                           wq_mask));
> -             lro_num_seg = cqe->lro_num_seg;
> -             /*
> -              * Currently configured to receive a packet per a stride. But if
> -              * MTU is adjusted through kernel interface, device could
> -              * consume multiple strides without raising an error. In this
> -              * case, the packet should be dropped because it is bigger
> than
> -              * the max_rx_pkt_len.
> -              */
> -             if (unlikely(!lro_num_seg && strd_cnt > 1)) {
> -                     ++rxq->stats.idropped;
> -                     continue;
> -             }
>               pkt = rte_pktmbuf_alloc(rxq->mp);
>               if (unlikely(pkt == NULL)) {
>                       ++rxq->stats.rx_nombuf;
> @@ -1742,12 +1729,16 @@ enum mlx5_txcmp_code {
>                       len -= RTE_ETHER_CRC_LEN;
>               offset = strd_idx * strd_sz + strd_shift;
>               addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n),
> offset);
> +             hdrm_overlap = len + RTE_PKTMBUF_HEADROOM - strd_cnt *
> strd_sz;
>               /*
>                * Memcpy packets to the target mbuf if:
>                * - The size of packet is smaller than
> mprq_max_memcpy_len.
>                * - Out of buffer in the Mempool for Multi-Packet RQ.
> +              * - There is no space for a headroom and scatter is disabled.
>                */
> -             if (len <= rxq->mprq_max_memcpy_len || rxq->mprq_repl ==
> NULL) {
> +             if (len <= rxq->mprq_max_memcpy_len ||
> +                 rxq->mprq_repl == NULL ||
> +                 (hdrm_overlap > 0 && !rxq->strd_scatter_en)) {
>                       /*
>                        * When memcpy'ing packet due to out-of-buffer, the
>                        * packet must be smaller than the target mbuf.
> @@ -1769,7 +1760,7 @@ enum mlx5_txcmp_code {
>                       rte_atomic16_add_return(&buf->refcnt, 1);
>                       MLX5_ASSERT((uint16_t)rte_atomic16_read(&buf-
> >refcnt) <=
>                                   strd_n + 1);
> -                     buf_addr = RTE_PTR_SUB(addr, headroom_sz);
> +                     buf_addr = RTE_PTR_SUB(addr,
> RTE_PKTMBUF_HEADROOM);
>                       /*
>                        * MLX5 device doesn't use iova but it is necessary in a
>                        * case where the Rx packet is transmitted via a @@ -
> 1788,43 +1779,42 @@ enum mlx5_txcmp_code {
>                       rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova,
>                                                 buf_len, shinfo);
>                       /* Set mbuf head-room. */
> -                     pkt->data_off = headroom_sz;
> +                     SET_DATA_OFF(pkt, RTE_PKTMBUF_HEADROOM);
>                       MLX5_ASSERT(pkt->ol_flags ==
> EXT_ATTACHED_MBUF);
> -                     /*
> -                      * Prevent potential overflow due to MTU change
> through
> -                      * kernel interface.
> -                      */
> -                     if (unlikely(rte_pktmbuf_tailroom(pkt) < len)) {
> -                             rte_pktmbuf_free_seg(pkt);
> -                             ++rxq->stats.idropped;
> -                             continue;
> -                     }
> +                     MLX5_ASSERT(rte_pktmbuf_tailroom(pkt) <
> +                             len - (hdrm_overlap > 0 ? hdrm_overlap : 0));
>                       DATA_LEN(pkt) = len;
>                       /*
> -                      * LRO packet may consume all the stride memory, in
> this
> -                      * case packet head-room space is not guaranteed so
> must
> -                      * to add an empty mbuf for the head-room.
> +                      * Copy the last fragment of a packet (up to
> headroom
> +                      * size bytes) in case there is a stride overlap with
> +                      * a next packet's headroom. Allocate a separate
> mbuf
> +                      * to store this fragment and link it. Scatter is on.
>                        */
> -                     if (!rxq->strd_headroom_en) {
> -                             struct rte_mbuf *headroom_mbuf =
> -                                             rte_pktmbuf_alloc(rxq->mp);
> +                     if (hdrm_overlap > 0) {
> +                             MLX5_ASSERT(rxq->strd_scatter_en);
> +                             struct rte_mbuf *seg =
> +                                     rte_pktmbuf_alloc(rxq->mp);
> 
> -                             if (unlikely(headroom_mbuf == NULL)) {
> +                             if (unlikely(seg == NULL)) {
>                                       rte_pktmbuf_free_seg(pkt);
>                                       ++rxq->stats.rx_nombuf;
>                                       break;
>                               }
> -                             PORT(pkt) = rxq->port_id;
> -                             NEXT(headroom_mbuf) = pkt;
> -                             pkt = headroom_mbuf;
> +                             SET_DATA_OFF(seg, 0);
> +                             rte_memcpy(rte_pktmbuf_mtod(seg, void *),
> +                                     RTE_PTR_ADD(addr, len -
> hdrm_overlap),
> +                                     hdrm_overlap);
> +                             DATA_LEN(seg) = hdrm_overlap;
> +                             DATA_LEN(pkt) = len - hdrm_overlap;
> +                             NEXT(pkt) = seg;
>                               NB_SEGS(pkt) = 2;
>                       }
>               }
>               rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res);
> -             if (lro_num_seg > 1) {
> +             if (cqe->lro_num_seg > 1) {
>                       mlx5_lro_update_hdr(addr, cqe, len);
>                       pkt->ol_flags |= PKT_RX_LRO;
> -                     pkt->tso_segsz = strd_sz;
> +                     pkt->tso_segsz = len / cqe->lro_num_seg;
>               }
>               PKT_LEN(pkt) = len;
>               PORT(pkt) = rxq->port_id;
> diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
> index 939778a..d155c24 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.h
> +++ b/drivers/net/mlx5/mlx5_rxtx.h
> @@ -119,7 +119,7 @@ struct mlx5_rxq_data {
>       unsigned int strd_sz_n:4; /* Log 2 of stride size. */
>       unsigned int strd_shift_en:1; /* Enable 2bytes shift on a stride. */
>       unsigned int err_state:2; /* enum mlx5_rxq_err_state. */
> -     unsigned int strd_headroom_en:1; /* Enable mbuf headroom in
> MPRQ. */
> +     unsigned int strd_scatter_en:1; /* Scattered packets from a stride. */
>       unsigned int lro:1; /* Enable LRO. */
>       unsigned int :1; /* Remaining bits. */
>       volatile uint32_t *rq_db;
> --
> 1.8.3.1

Reply via email to