Like cpumap create queue for xdp frames that will be bulked.  For now,
this patch simply invoke ndo_xdp_xmit foreach frame.  This happens,
either when the map flush operation is envoked, or when the limit
DEV_MAP_BULK_SIZE is reached.

Signed-off-by: Jesper Dangaard Brouer <bro...@redhat.com>
---
 kernel/bpf/devmap.c |   77 ++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 73 insertions(+), 4 deletions(-)

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 808808bf2bf2..cab72c100bb5 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -54,11 +54,18 @@
 #define DEV_CREATE_FLAG_MASK \
        (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
 
+#define DEV_MAP_BULK_SIZE 16
+struct xdp_bulk_queue {
+       struct xdp_frame *q[DEV_MAP_BULK_SIZE];
+       unsigned int count;
+};
+
 /* objects in the map */
 struct bpf_dtab_netdev {
        struct net_device *dev; /* must be first member, due to tracepoint */
        struct bpf_dtab *dtab;
        unsigned int bit;
+       struct xdp_bulk_queue __percpu *bulkq;
        struct rcu_head rcu;
 };
 
@@ -209,6 +216,38 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
        __set_bit(bit, bitmap);
 }
 
+static int bq_xmit_all(struct bpf_dtab_netdev *obj,
+                        struct xdp_bulk_queue *bq)
+{
+       unsigned int processed = 0, drops = 0;
+       struct net_device *dev = obj->dev;
+       int i;
+
+       if (unlikely(!bq->count))
+               return 0;
+
+       for (i = 0; i < bq->count; i++) {
+               struct xdp_frame *xdpf = bq->q[i];
+
+               prefetch(xdpf);
+       }
+
+       for (i = 0; i < bq->count; i++) {
+               struct xdp_frame *xdpf = bq->q[i];
+               int err;
+
+               err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
+               if (err) {
+                       drops++;
+                       xdp_return_frame(xdpf);
+               }
+               processed++;
+       }
+       bq->count = 0;
+
+       return 0;
+}
+
 /* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
  * from the driver before returning from its napi->poll() routine. The poll()
  * routine is called either from busy_poll context or net_rx_action signaled
@@ -224,6 +263,7 @@ void __dev_map_flush(struct bpf_map *map)
 
        for_each_set_bit(bit, bitmap, map->max_entries) {
                struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
+               struct xdp_bulk_queue *bq;
                struct net_device *netdev;
 
                /* This is possible if the dev entry is removed by user space
@@ -233,6 +273,9 @@ void __dev_map_flush(struct bpf_map *map)
                        continue;
 
                __clear_bit(bit, bitmap);
+
+               bq = this_cpu_ptr(dev->bulkq);
+               bq_xmit_all(dev, bq);
                netdev = dev->dev;
                if (likely(netdev->netdev_ops->ndo_xdp_flush))
                        netdev->netdev_ops->ndo_xdp_flush(netdev);
@@ -255,6 +298,20 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct 
bpf_map *map, u32 key)
        return obj;
 }
 
+/* Runs under RCU-read-side, plus in softirq under NAPI protection.
+ * Thus, safe percpu variable access.
+ */
+static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf)
+{
+       struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
+
+       if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
+               bq_xmit_all(obj, bq);
+
+       bq->q[bq->count++] = xdpf;
+       return 0;
+}
+
 int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp)
 {
        struct net_device *dev = dst->dev;
@@ -268,8 +325,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct 
xdp_buff *xdp)
        if (unlikely(!xdpf))
                return -EOVERFLOW;
 
-       /* TODO: implement a bulking/enqueue step later */
-       err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
+       err = bq_enqueue(dst, xdpf);
        if (err)
                return err;
 
@@ -288,13 +344,18 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
 {
        if (dev->dev->netdev_ops->ndo_xdp_flush) {
                struct net_device *fl = dev->dev;
+               struct xdp_bulk_queue *bq;
                unsigned long *bitmap;
+
                int cpu;
 
                for_each_online_cpu(cpu) {
                        bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu);
                        __clear_bit(dev->bit, bitmap);
 
+                       bq = per_cpu_ptr(dev->bulkq, cpu);
+                       bq_xmit_all(dev, bq);
+
                        fl->netdev_ops->ndo_xdp_flush(dev->dev);
                }
        }
@@ -306,6 +367,7 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
 
        dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
        dev_map_flush_old(dev);
+       free_percpu(dev->bulkq);
        dev_put(dev->dev);
        kfree(dev);
 }
@@ -338,6 +400,7 @@ static int dev_map_update_elem(struct bpf_map *map, void 
*key, void *value,
 {
        struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
        struct net *net = current->nsproxy->net_ns;
+       gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
        struct bpf_dtab_netdev *dev, *old_dev;
        u32 i = *(u32 *)key;
        u32 ifindex = *(u32 *)value;
@@ -352,11 +415,17 @@ static int dev_map_update_elem(struct bpf_map *map, void 
*key, void *value,
        if (!ifindex) {
                dev = NULL;
        } else {
-               dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
-                                  map->numa_node);
+               dev = kmalloc_node(sizeof(*dev), gfp, map->numa_node);
                if (!dev)
                        return -ENOMEM;
 
+               dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
+                                               sizeof(void *), gfp);
+               if (!dev->bulkq) {
+                       kfree(dev);
+                       return -ENOMEM;
+               }
+
                dev->dev = dev_get_by_index(net, ifindex);
                if (!dev->dev) {
                        kfree(dev);

Reply via email to