Use the paged attachment mappings support to create NET_RX pages.
NET_RX pages are pages that can be used in the networking receive path:

Bind the pages to the driver's rx queues specified by the create_flags
param, and create a gen_pool to hold the free pages available for the
driver to allocate.

Signed-off-by: Mina Almasry <almasrym...@google.com>
---
 drivers/dma-buf/dma-buf.c    | 174 +++++++++++++++++++++++++++++++++++
 include/linux/dma-buf.h      |  20 ++++
 include/linux/netdevice.h    |   1 +
 include/uapi/linux/dma-buf.h |   2 +
 4 files changed, 197 insertions(+)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 50b1d813cf5c..acb86bf406f4 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -27,6 +27,7 @@
 #include <linux/dma-resv.h>
 #include <linux/mm.h>
 #include <linux/mount.h>
+#include <linux/netdevice.h>
 #include <linux/pseudo_fs.h>
 
 #include <uapi/linux/dma-buf.h>
@@ -1681,6 +1682,8 @@ static void dma_buf_pages_destroy(struct percpu_ref *ref)
        pci_dev_put(priv->pci_dev);
 }
 
+const struct dma_buf_pages_type_ops net_rx_ops;
+
 static long dma_buf_create_pages(struct file *file,
                                 struct dma_buf_create_pages_info *create_info)
 {
@@ -1793,6 +1796,9 @@ static long dma_buf_create_pages(struct file *file,
        priv->create_flags = create_info->create_flags;
 
        switch (priv->type) {
+       case DMA_BUF_PAGES_NET_RX:
+               priv->type_ops = &net_rx_ops;
+               break;
        default:
                err = -EINVAL;
                goto out_put_new_file;
@@ -1966,3 +1972,171 @@ static void __exit dma_buf_deinit(void)
        dma_buf_uninit_sysfs_statistics();
 }
 __exitcall(dma_buf_deinit);
+
+/********************************
+ *     dma_buf_pages_net_rx    *
+ ********************************/
+
+void dma_buf_pages_net_rx_release(struct dma_buf_pages *priv, struct file 
*file)
+{
+       struct netdev_rx_queue *rxq;
+       unsigned long xa_idx;
+
+       xa_for_each(&priv->net_rx.bound_rxq_list, xa_idx, rxq)
+               if (rxq->dmabuf_pages == file)
+                       rxq->dmabuf_pages = NULL;
+}
+
+static int dev_is_class(struct device *dev, void *class)
+{
+       if (dev->class != NULL && !strcmp(dev->class->name, class))
+               return 1;
+
+       return 0;
+}
+
+int dma_buf_pages_net_rx_init(struct dma_buf_pages *priv, struct file *file)
+{
+       struct netdev_rx_queue *rxq;
+       struct net_device *netdev;
+       int xa_id, err, rxq_idx;
+       struct device *device;
+
+       priv->net_rx.page_pool =
+               gen_pool_create(PAGE_SHIFT, dev_to_node(&priv->pci_dev->dev));
+
+       if (!priv->net_rx.page_pool)
+               return -ENOMEM;
+
+       /*
+        * We start with PAGE_SIZE instead of 0 since gen_pool_alloc_*() returns
+        * NULL on error
+        */
+       err = gen_pool_add_virt(priv->net_rx.page_pool, PAGE_SIZE, 0,
+                               PAGE_SIZE * priv->num_pages,
+                               dev_to_node(&priv->pci_dev->dev));
+       if (err)
+               goto out_destroy_pool;
+
+       xa_init_flags(&priv->net_rx.bound_rxq_list, XA_FLAGS_ALLOC);
+
+       device = device_find_child(&priv->pci_dev->dev, "net", dev_is_class);
+       if (!device) {
+               err = -ENODEV;
+               goto out_destroy_xarray;
+       }
+
+       netdev = to_net_dev(device);
+       if (!netdev) {
+               err = -ENODEV;
+               goto out_put_dev;
+       }
+
+       for (rxq_idx = 0; rxq_idx < (sizeof(priv->create_flags) * 8);
+            rxq_idx++) {
+               if (!(priv->create_flags & (1ULL << rxq_idx)))
+                       continue;
+
+               if (rxq_idx >= netdev->num_rx_queues) {
+                       err = -ERANGE;
+                       goto out_release_rx;
+               }
+
+               rxq = __netif_get_rx_queue(netdev, rxq_idx);
+
+               err = xa_alloc(&priv->net_rx.bound_rxq_list, &xa_id, rxq,
+                              xa_limit_32b, GFP_KERNEL);
+               if (err)
+                       goto out_release_rx;
+
+               /* We previously have done a dma_buf_attach(), which validates
+                * that the net_device we're trying to attach to can reach the
+                * dmabuf, so we don't need to check here as well.
+                */
+               rxq->dmabuf_pages = file;
+       }
+       put_device(device);
+       return 0;
+
+out_release_rx:
+       dma_buf_pages_net_rx_release(priv, file);
+out_put_dev:
+       put_device(device);
+out_destroy_xarray:
+       xa_destroy(&priv->net_rx.bound_rxq_list);
+out_destroy_pool:
+       gen_pool_destroy(priv->net_rx.page_pool);
+       return err;
+}
+
+void dma_buf_pages_net_rx_free(struct dma_buf_pages *priv)
+{
+       xa_destroy(&priv->net_rx.bound_rxq_list);
+       gen_pool_destroy(priv->net_rx.page_pool);
+}
+
+static unsigned long dma_buf_page_to_gen_pool_addr(struct page *page)
+{
+       struct dma_buf_pages *priv;
+       struct dev_pagemap *pgmap;
+       unsigned long offset;
+
+       pgmap = page->pgmap;
+       priv = container_of(pgmap, struct dma_buf_pages, pgmap);
+       offset = page - priv->pages;
+       /* Offset + 1 is due to the fact that we want to avoid 0 virt address
+        * returned from the gen_pool. The gen_pool returns 0 on error, and virt
+        * address 0 is indistinguishable from an error.
+        */
+       return (offset + 1) << PAGE_SHIFT;
+}
+
+static struct page *
+dma_buf_gen_pool_addr_to_page(unsigned long addr, struct dma_buf_pages *priv)
+{
+       /* - 1 is due to the fact that we want to avoid 0 virt address
+        * returned from the gen_pool. See comment in dma_buf_create_pages()
+        * for details.
+        */
+       unsigned long offset = (addr >> PAGE_SHIFT) - 1;
+       return &priv->pages[offset];
+}
+
+void dma_buf_page_free_net_rx(struct dma_buf_pages *priv, struct page *page)
+{
+       unsigned long addr = dma_buf_page_to_gen_pool_addr(page);
+
+       if (gen_pool_has_addr(priv->net_rx.page_pool, addr, PAGE_SIZE))
+               gen_pool_free(priv->net_rx.page_pool, addr, PAGE_SIZE);
+}
+
+const struct dma_buf_pages_type_ops net_rx_ops = {
+       .dma_buf_pages_init             = dma_buf_pages_net_rx_init,
+       .dma_buf_pages_release          = dma_buf_pages_net_rx_release,
+       .dma_buf_pages_destroy          = dma_buf_pages_net_rx_free,
+       .dma_buf_page_free              = dma_buf_page_free_net_rx,
+};
+
+struct page *dma_buf_pages_net_rx_alloc(struct dma_buf_pages *priv)
+{
+       unsigned long gen_pool_addr;
+       struct page *pg;
+
+       if (!(priv->type & DMA_BUF_PAGES_NET_RX))
+               return NULL;
+
+       gen_pool_addr = gen_pool_alloc(priv->net_rx.page_pool, PAGE_SIZE);
+       if (!gen_pool_addr)
+               return NULL;
+
+       if (!PAGE_ALIGNED(gen_pool_addr)) {
+               net_err_ratelimited("dmabuf page pool allocation not aligned");
+               gen_pool_free(priv->net_rx.page_pool, gen_pool_addr, PAGE_SIZE);
+               return NULL;
+       }
+
+       pg = dma_buf_gen_pool_addr_to_page(gen_pool_addr, priv);
+
+       percpu_ref_get(&priv->pgmap.ref);
+       return pg;
+}
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 5789006180ea..e8e66d6407d0 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -22,6 +22,9 @@
 #include <linux/fs.h>
 #include <linux/dma-fence.h>
 #include <linux/wait.h>
+#include <linux/genalloc.h>
+#include <linux/xarray.h>
+#include <net/page_pool.h>
 
 struct device;
 struct dma_buf;
@@ -552,6 +555,11 @@ struct dma_buf_pages_type_ops {
                                  struct page *page);
 };
 
+struct dma_buf_pages_net_rx {
+       struct gen_pool *page_pool;
+       struct xarray bound_rxq_list;
+};
+
 struct dma_buf_pages {
        /* fields for dmabuf */
        struct dma_buf *dmabuf;
@@ -568,6 +576,10 @@ struct dma_buf_pages {
        unsigned int type;
        const struct dma_buf_pages_type_ops *type_ops;
        __u64 create_flags;
+
+       union {
+               struct dma_buf_pages_net_rx net_rx;
+       };
 };
 
 /**
@@ -671,6 +683,8 @@ static inline bool is_dma_buf_pages_file(struct file *file)
        return file->f_op == &dma_buf_pages_fops;
 }
 
+struct page *dma_buf_pages_net_rx_alloc(struct dma_buf_pages *priv);
+
 static inline bool is_dma_buf_page(struct page *page)
 {
        return (is_zone_device_page(page) && page->pgmap &&
@@ -718,6 +732,12 @@ static inline int dma_buf_map_sg(struct device *dev, 
struct scatterlist *sg,
 {
        return 0;
 }
+
+static inline struct page *dma_buf_pages_net_rx_alloc(struct dma_buf_pages 
*priv)
+{
+       return NULL;
+}
+
 #endif
 
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c2f0c6002a84..7a087ffa9baa 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -796,6 +796,7 @@ struct netdev_rx_queue {
 #ifdef CONFIG_XDP_SOCKETS
        struct xsk_buff_pool            *pool;
 #endif
+       struct file __rcu               *dmabuf_pages;
 } ____cacheline_aligned_in_smp;
 
 /*
diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h
index d0f63a2ab7e4..b392cef9d3c6 100644
--- a/include/uapi/linux/dma-buf.h
+++ b/include/uapi/linux/dma-buf.h
@@ -186,6 +186,8 @@ struct dma_buf_create_pages_info {
        __u64 create_flags;
 };
 
+#define DMA_BUF_PAGES_NET_RX           (1 << 0)
+
 #define DMA_BUF_CREATE_PAGES   _IOW(DMA_BUF_BASE, 4, struct 
dma_buf_create_pages_info)
 
 #endif
-- 
2.41.0.390.g38632f3daf-goog

Reply via email to