The subsequent commits introducing default maps and a hash-based ifindex
devmap require a bit of refactoring of the devmap code. Perform this first
so the subsequent commits become easier to read.

Signed-off-by: Toke Høiland-Jørgensen <t...@redhat.com>
---
 kernel/bpf/devmap.c |  177 +++++++++++++++++++++++++++++++--------------------
 1 file changed, 109 insertions(+), 68 deletions(-)

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 191b79948424..1037fc08c504 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -75,6 +75,7 @@ struct bpf_dtab {
        struct bpf_dtab_netdev **netdev_map;
        unsigned long __percpu *flush_needed;
        struct list_head list;
+       struct rcu_head rcu;
 };
 
 static DEFINE_SPINLOCK(dev_map_lock);
@@ -85,23 +86,11 @@ static u64 dev_map_bitmap_size(const union bpf_attr *attr)
        return BITS_TO_LONGS((u64) attr->max_entries) * sizeof(unsigned long);
 }
 
-static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
+static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr,
+                           bool check_memlock)
 {
-       struct bpf_dtab *dtab;
-       int err = -EINVAL;
        u64 cost;
-
-       if (!capable(CAP_NET_ADMIN))
-               return ERR_PTR(-EPERM);
-
-       /* check sanity of attributes */
-       if (attr->max_entries == 0 || attr->key_size != 4 ||
-           attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK)
-               return ERR_PTR(-EINVAL);
-
-       dtab = kzalloc(sizeof(*dtab), GFP_USER);
-       if (!dtab)
-               return ERR_PTR(-ENOMEM);
+       int err;
 
        bpf_map_init_from_attr(&dtab->map, attr);
 
@@ -109,60 +98,72 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
        cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
        cost += dev_map_bitmap_size(attr) * num_possible_cpus();
        if (cost >= U32_MAX - PAGE_SIZE)
-               goto free_dtab;
+               return -EINVAL;
 
        dtab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
 
-       /* if map size is larger than memlock limit, reject it early */
-       err = bpf_map_precharge_memlock(dtab->map.pages);
-       if (err)
-               goto free_dtab;
-
-       err = -ENOMEM;
+       if (check_memlock) {
+               /* if map size is larger than memlock limit, reject it early */
+               err = bpf_map_precharge_memlock(dtab->map.pages);
+               if (err)
+                       return -EINVAL;
+       }
 
        /* A per cpu bitfield with a bit per possible net device */
        dtab->flush_needed = __alloc_percpu_gfp(dev_map_bitmap_size(attr),
                                                __alignof__(unsigned long),
                                                GFP_KERNEL | __GFP_NOWARN);
        if (!dtab->flush_needed)
-               goto free_dtab;
+               goto err_alloc;
 
        dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries *
                                              sizeof(struct bpf_dtab_netdev *),
                                              dtab->map.numa_node);
        if (!dtab->netdev_map)
-               goto free_dtab;
+               goto err_map;
 
-       spin_lock(&dev_map_lock);
-       list_add_tail_rcu(&dtab->list, &dev_map_list);
-       spin_unlock(&dev_map_lock);
+       return 0;
 
-       return &dtab->map;
-free_dtab:
+ err_map:
        free_percpu(dtab->flush_needed);
-       kfree(dtab);
-       return ERR_PTR(err);
+ err_alloc:
+       return -ENOMEM;
 }
 
-static void dev_map_free(struct bpf_map *map)
+static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
 {
-       struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
-       int i, cpu;
+       struct bpf_dtab *dtab;
+       int err;
 
-       /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
-        * so the programs (can be more than one that used this map) were
-        * disconnected from events. Wait for outstanding critical sections in
-        * these programs to complete. The rcu critical section only guarantees
-        * no further reads against netdev_map. It does __not__ ensure pending
-        * flush operations (if any) are complete.
-        */
+       if (!capable(CAP_NET_ADMIN))
+               return ERR_PTR(-EPERM);
+
+       /* check sanity of attributes */
+       if (attr->max_entries == 0 || attr->key_size != 4 ||
+           attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK)
+               return ERR_PTR(-EINVAL);
+
+       dtab = kzalloc(sizeof(*dtab), GFP_USER);
+       if (!dtab)
+               return ERR_PTR(-ENOMEM);
+
+       err = dev_map_init_map(dtab, attr, true);
+       if (err) {
+               kfree(dtab);
+               return ERR_PTR(err);
+       }
 
        spin_lock(&dev_map_lock);
-       list_del_rcu(&dtab->list);
+       list_add_tail_rcu(&dtab->list, &dev_map_list);
        spin_unlock(&dev_map_lock);
 
-       bpf_clear_redirect_map(map);
-       synchronize_rcu();
+       return &dtab->map;
+}
+
+static void __dev_map_free(struct rcu_head *rcu)
+{
+       struct bpf_dtab *dtab = container_of(rcu, struct bpf_dtab, rcu);
+       int i, cpu;
 
        /* To ensure all pending flush operations have completed wait for flush
         * bitmap to indicate all flush_needed bits to be zero on _all_ cpus.
@@ -192,6 +193,26 @@ static void dev_map_free(struct bpf_map *map)
        kfree(dtab);
 }
 
+static void dev_map_free(struct bpf_map *map)
+{
+       struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+
+       /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
+        * so the programs (can be more than one that used this map) were
+        * disconnected from events. Wait for outstanding critical sections in
+        * these programs to complete. The rcu critical section only guarantees
+        * no further reads against netdev_map. It does __not__ ensure pending
+        * flush operations (if any) are complete.
+        */
+
+       spin_lock(&dev_map_lock);
+       list_del_rcu(&dtab->list);
+       spin_unlock(&dev_map_lock);
+
+       bpf_clear_redirect_map(map);
+       call_rcu(&dtab->rcu, __dev_map_free);
+}
+
 static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 {
        struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
@@ -429,12 +450,42 @@ static int dev_map_delete_elem(struct bpf_map *map, void 
*key)
        return 0;
 }
 
-static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
-                               u64 map_flags)
+static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
+                                                   struct bpf_dtab *dtab,
+                                                   u32 ifindex,
+                                                   unsigned int bit)
 {
-       struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
-       struct net *net = current->nsproxy->net_ns;
        gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
+       struct bpf_dtab_netdev *dev;
+
+       dev = kmalloc_node(sizeof(*dev), gfp, dtab->map.numa_node);
+       if (!dev)
+               return ERR_PTR(-ENOMEM);
+
+       dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
+                                       sizeof(void *), gfp);
+       if (!dev->bulkq) {
+               kfree(dev);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       dev->dev = dev_get_by_index(net, ifindex);
+       if (!dev->dev) {
+               free_percpu(dev->bulkq);
+               kfree(dev);
+               return ERR_PTR(-EINVAL);
+       }
+
+       dev->bit = bit;
+       dev->dtab = dtab;
+
+       return dev;
+}
+
+static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
+                                void *key, void *value, u64 map_flags)
+{
+       struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
        struct bpf_dtab_netdev *dev, *old_dev;
        u32 i = *(u32 *)key;
        u32 ifindex = *(u32 *)value;
@@ -449,26 +500,9 @@ static int dev_map_update_elem(struct bpf_map *map, void 
*key, void *value,
        if (!ifindex) {
                dev = NULL;
        } else {
-               dev = kmalloc_node(sizeof(*dev), gfp, map->numa_node);
-               if (!dev)
-                       return -ENOMEM;
-
-               dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
-                                               sizeof(void *), gfp);
-               if (!dev->bulkq) {
-                       kfree(dev);
-                       return -ENOMEM;
-               }
-
-               dev->dev = dev_get_by_index(net, ifindex);
-               if (!dev->dev) {
-                       free_percpu(dev->bulkq);
-                       kfree(dev);
-                       return -EINVAL;
-               }
-
-               dev->bit = i;
-               dev->dtab = dtab;
+               dev = __dev_map_alloc_node(net, dtab, ifindex, i);
+               if (IS_ERR(dev))
+                       return PTR_ERR(dev);
        }
 
        /* Use call_rcu() here to ensure rcu critical sections have completed
@@ -482,6 +516,13 @@ static int dev_map_update_elem(struct bpf_map *map, void 
*key, void *value,
        return 0;
 }
 
+static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
+                              u64 map_flags)
+{
+       return __dev_map_update_elem(current->nsproxy->net_ns,
+                                    map, key, value, map_flags);
+}
+
 const struct bpf_map_ops dev_map_ops = {
        .map_alloc = dev_map_alloc,
        .map_free = dev_map_free,

Reply via email to