Introduce bulking capability in xdp tx return path (XDP_REDIRECT).
xdp_return_frame is usually run inside the driver NAPI tx completion
loop so it is possible batch it.
Current implementation considers only page_pool memory model.
Convert mvneta driver to xdp_return_frame_bulk APIs.

Suggested-by: Jesper Dangaard Brouer <bro...@redhat.com>
Signed-off-by: Lorenzo Bianconi <lore...@kernel.org>
---
 drivers/net/ethernet/marvell/mvneta.c |  5 ++-
 include/net/xdp.h                     |  9 +++++
 net/core/xdp.c                        | 51 +++++++++++++++++++++++++++
 3 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c 
b/drivers/net/ethernet/marvell/mvneta.c
index 54b0bf574c05..43ab8a73900e 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1834,8 +1834,10 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp,
                                 struct netdev_queue *nq, bool napi)
 {
        unsigned int bytes_compl = 0, pkts_compl = 0;
+       struct xdp_frame_bulk bq;
        int i;
 
+       bq.xa = NULL;
        for (i = 0; i < num; i++) {
                struct mvneta_tx_buf *buf = &txq->buf[txq->txq_get_index];
                struct mvneta_tx_desc *tx_desc = txq->descs +
@@ -1857,9 +1859,10 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp,
                        if (napi && buf->type == MVNETA_TYPE_XDP_TX)
                                xdp_return_frame_rx_napi(buf->xdpf);
                        else
-                               xdp_return_frame(buf->xdpf);
+                               xdp_return_frame_bulk(buf->xdpf, &bq);
                }
        }
+       xdp_flush_frame_bulk(&bq);
 
        netdev_tx_completed_queue(nq, pkts_compl, bytes_compl);
 }
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 3814fb631d52..9567110845ef 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -104,6 +104,12 @@ struct xdp_frame {
        struct net_device *dev_rx; /* used by cpumap */
 };
 
+#define XDP_BULK_QUEUE_SIZE    16
+struct xdp_frame_bulk {
+       void *q[XDP_BULK_QUEUE_SIZE];
+       int count;
+       void *xa;
+};
 
 static inline struct skb_shared_info *
 xdp_get_shared_info_from_frame(struct xdp_frame *frame)
@@ -194,6 +200,9 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff 
*xdp)
 void xdp_return_frame(struct xdp_frame *xdpf);
 void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
 void xdp_return_buff(struct xdp_buff *xdp);
+void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq);
+void xdp_return_frame_bulk(struct xdp_frame *xdpf,
+                          struct xdp_frame_bulk *bq);
 
 /* When sending xdp_frame into the network stack, then there is no
  * return point callback, which is needed to release e.g. DMA-mapping
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 48aba933a5a8..93eabd789246 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -380,6 +380,57 @@ void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
 }
 EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
 
+void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq)
+{
+       struct xdp_mem_allocator *xa = bq->xa;
+       int i;
+
+       if (unlikely(!xa))
+               return;
+
+       for (i = 0; i < bq->count; i++) {
+               struct page *page = virt_to_head_page(bq->q[i]);
+
+               page_pool_put_full_page(xa->page_pool, page, false);
+       }
+       bq->count = 0;
+}
+EXPORT_SYMBOL_GPL(xdp_flush_frame_bulk);
+
+void xdp_return_frame_bulk(struct xdp_frame *xdpf,
+                          struct xdp_frame_bulk *bq)
+{
+       struct xdp_mem_info *mem = &xdpf->mem;
+       struct xdp_mem_allocator *xa, *nxa;
+
+       if (mem->type != MEM_TYPE_PAGE_POOL) {
+               __xdp_return(xdpf->data, &xdpf->mem, false);
+               return;
+       }
+
+       rcu_read_lock();
+
+       xa = bq->xa;
+       if (unlikely(!xa || mem->id != xa->mem.id)) {
+               nxa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
+               if (unlikely(!xa)) {
+                       bq->count = 0;
+                       bq->xa = nxa;
+                       xa = nxa;
+               }
+       }
+
+       if (mem->id != xa->mem.id || bq->count == XDP_BULK_QUEUE_SIZE)
+               xdp_flush_frame_bulk(bq);
+
+       bq->q[bq->count++] = xdpf->data;
+       if (mem->id != xa->mem.id)
+               bq->xa = nxa;
+
+       rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(xdp_return_frame_bulk);
+
 void xdp_return_buff(struct xdp_buff *xdp)
 {
        __xdp_return(xdp->data, &xdp->rxq->mem, true);
-- 
2.26.2

Reply via email to