'maps' is a generic storage of different types for sharing data between kernel
and userspace.

The maps are accessed from user space via BPF syscall, which has commands:

- create a map with given id, type and attributes
  map_id = bpf_map_create(int map_id, map_type, struct nlattr *attr, int len)
  returns positive map id or negative error

- delete map with given map id
  err = bpf_map_delete(int map_id)
  returns zero or negative error

- lookup key in a given map referenced by map_id
  err = bpf_map_lookup_elem(int map_id, void *key, void *value)
  returns zero and stores found elem into value or negative error

- create or update key/value pair in a given map
  err = bpf_map_update_elem(int map_id, void *key, void *value)
  returns zero or negative error

- find and delete element by key in a given map
  err = bpf_map_delete_elem(int map_id, void *key)

- iterate map elements (based on input key return next_key)
  err = bpf_map_get_next_key(int map_id, void *key, void *next_key)

Signed-off-by: Alexei Starovoitov <a...@plumgrid.com>
---
 include/linux/bpf.h      |    6 ++
 include/uapi/linux/bpf.h |   25 +++++++
 kernel/bpf/syscall.c     |  180 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 211 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6448b9beea89..19cd394bdbcc 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -18,6 +18,12 @@ struct bpf_map_ops {
        /* funcs callable from userspace (via syscall) */
        struct bpf_map *(*map_alloc)(struct nlattr *attrs[BPF_MAP_ATTR_MAX + 
1]);
        void (*map_free)(struct bpf_map *);
+       int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
+
+       /* funcs callable from userspace and from eBPF programs */
+       void *(*map_lookup_elem)(struct bpf_map *map, void *key);
+       int (*map_update_elem)(struct bpf_map *map, void *key, void *value);
+       int (*map_delete_elem)(struct bpf_map *map, void *key);
 };
 
 struct bpf_map {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 04374e57c290..faed2ce2d25a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -315,6 +315,31 @@ enum bpf_cmd {
         * returns zero or negative error
         */
        BPF_MAP_DELETE,
+
+       /* lookup key in a given map referenced by map_id
+        * err = bpf_map_lookup_elem(int map_id, void *key, void *value)
+        * returns zero and stores found elem into value
+        * or negative error
+        */
+       BPF_MAP_LOOKUP_ELEM,
+
+       /* create or update key/value pair in a given map
+        * err = bpf_map_update_elem(int map_id, void *key, void *value)
+        * returns zero or negative error
+        */
+       BPF_MAP_UPDATE_ELEM,
+
+       /* find and delete elem by key in a given map
+        * err = bpf_map_delete_elem(int map_id, void *key)
+        * returns zero or negative error
+        */
+       BPF_MAP_DELETE_ELEM,
+
+       /* lookup key in a given map and return next key
+        * err = bpf_map_get_elem(int map_id, void *key, void *next_key)
+        * returns zero and stores next key or negative error
+        */
+       BPF_MAP_GET_NEXT_KEY,
 };
 
 enum bpf_map_attributes {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b9509923b16f..1a48da23a939 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -219,6 +219,174 @@ static int map_delete(int map_id)
        return 0;
 }
 
+static int map_lookup_elem(int map_id, void __user *ukey, void __user *uvalue)
+{
+       struct bpf_map *map;
+       void *key, *value;
+       int err;
+
+       if (map_id < 0)
+               return -EINVAL;
+
+       rcu_read_lock();
+       map = idr_find(&bpf_map_id_idr, map_id);
+       err = -EINVAL;
+       if (!map)
+               goto err_unlock;
+
+       err = -ENOMEM;
+       key = kmalloc(map->key_size, GFP_ATOMIC);
+       if (!key)
+               goto err_unlock;
+
+       err = -EFAULT;
+       if (copy_from_user(key, ukey, map->key_size) != 0)
+               goto free_key;
+
+       err = -ESRCH;
+       value = map->ops->map_lookup_elem(map, key);
+       if (!value)
+               goto free_key;
+
+       err = -EFAULT;
+       if (copy_to_user(uvalue, value, map->value_size) != 0)
+               goto free_key;
+
+       err = 0;
+
+free_key:
+       kfree(key);
+err_unlock:
+       rcu_read_unlock();
+       return err;
+}
+
+static int map_update_elem(int map_id, void __user *ukey, void __user *uvalue)
+{
+       struct bpf_map *map;
+       void *key, *value;
+       int err;
+
+       if (map_id < 0)
+               return -EINVAL;
+
+       rcu_read_lock();
+       map = idr_find(&bpf_map_id_idr, map_id);
+       err = -EINVAL;
+       if (!map)
+               goto err_unlock;
+
+       err = -ENOMEM;
+       key = kmalloc(map->key_size, GFP_ATOMIC);
+       if (!key)
+               goto err_unlock;
+
+       err = -EFAULT;
+       if (copy_from_user(key, ukey, map->key_size) != 0)
+               goto free_key;
+
+       err = -ENOMEM;
+       value = kmalloc(map->value_size, GFP_ATOMIC);
+       if (!value)
+               goto free_key;
+
+       err = -EFAULT;
+       if (copy_from_user(value, uvalue, map->value_size) != 0)
+               goto free_value;
+
+       err = map->ops->map_update_elem(map, key, value);
+
+free_value:
+       kfree(value);
+free_key:
+       kfree(key);
+err_unlock:
+       rcu_read_unlock();
+       return err;
+}
+
+static int map_delete_elem(int map_id, void __user *ukey)
+{
+       struct bpf_map *map;
+       void *key;
+       int err;
+
+       if (map_id < 0)
+               return -EINVAL;
+
+       rcu_read_lock();
+       map = idr_find(&bpf_map_id_idr, map_id);
+       err = -EINVAL;
+       if (!map)
+               goto err_unlock;
+
+       err = -ENOMEM;
+       key = kmalloc(map->key_size, GFP_ATOMIC);
+       if (!key)
+               goto err_unlock;
+
+       err = -EFAULT;
+       if (copy_from_user(key, ukey, map->key_size) != 0)
+               goto free_key;
+
+       err = map->ops->map_delete_elem(map, key);
+
+free_key:
+       kfree(key);
+err_unlock:
+       rcu_read_unlock();
+       return err;
+}
+
+static int map_get_next_key(int map_id, void __user *ukey,
+                           void __user *unext_key)
+{
+       struct bpf_map *map;
+       void *key, *next_key;
+       int err;
+
+       if (map_id < 0)
+               return -EINVAL;
+
+       rcu_read_lock();
+       map = idr_find(&bpf_map_id_idr, map_id);
+       err = -EINVAL;
+       if (!map)
+               goto err_unlock;
+
+       err = -ENOMEM;
+       key = kmalloc(map->key_size, GFP_ATOMIC);
+       if (!key)
+               goto err_unlock;
+
+       err = -EFAULT;
+       if (copy_from_user(key, ukey, map->key_size) != 0)
+               goto free_key;
+
+       err = -ENOMEM;
+       next_key = kmalloc(map->key_size, GFP_ATOMIC);
+       if (!next_key)
+               goto free_key;
+
+       err = map->ops->map_get_next_key(map, key, next_key);
+       if (err)
+               goto free_next_key;
+
+       err = -EFAULT;
+       if (copy_to_user(unext_key, next_key, map->key_size) != 0)
+               goto free_next_key;
+
+       err = 0;
+
+free_next_key:
+       kfree(next_key);
+free_key:
+       kfree(key);
+err_unlock:
+       rcu_read_unlock();
+       return err;
+}
+
 SYSCALL_DEFINE5(bpf, int, cmd, unsigned long, arg2, unsigned long, arg3,
                unsigned long, arg4, unsigned long, arg5)
 {
@@ -232,6 +400,18 @@ SYSCALL_DEFINE5(bpf, int, cmd, unsigned long, arg2, 
unsigned long, arg3,
        case BPF_MAP_DELETE:
                return map_delete((int) arg2);
 
+       case BPF_MAP_LOOKUP_ELEM:
+               return map_lookup_elem((int) arg2, (void __user *) arg3,
+                                      (void __user *) arg4);
+       case BPF_MAP_UPDATE_ELEM:
+               return map_update_elem((int) arg2, (void __user *) arg3,
+                                      (void __user *) arg4);
+       case BPF_MAP_DELETE_ELEM:
+               return map_delete_elem((int) arg2, (void __user *) arg3);
+
+       case BPF_MAP_GET_NEXT_KEY:
+               return map_get_next_key((int) arg2, (void __user *) arg3,
+                                       (void __user *) arg4);
        default:
                return -EINVAL;
        }
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to