The following patch implements a bpf queue/stack maps that provides the peek/pop/push functions. There is not a direct relationship between those functions and the current operations supported by a map, hence a new lookup_and_delete map operation is added, this operation would be used by the pop helper.
A pop operation is not added because it will too specific to stack/queue maps, instead this new operation could be useful for other maps as well. Signed-off-by: Mauricio Vasquez B <mauricio.vasq...@polito.it> --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 523481a3471b..c63a44381d3f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -39,6 +39,7 @@ struct bpf_map_ops { void *(*map_lookup_elem)(struct bpf_map *map, void *key); int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); int (*map_delete_elem)(struct bpf_map *map, void *key); + void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key); /* funcs called by prog_array and perf_event_array map */ void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 66917a4eba27..4cda584c6640 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -103,6 +103,7 @@ enum bpf_cmd { BPF_BTF_LOAD, BPF_BTF_GET_FD_BY_ID, BPF_TASK_FD_QUERY, + BPF_MAP_LOOKUP_AND_DELETE_ELEM, }; enum bpf_map_type { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index f2d4e4f280dc..7d429123a298 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -968,6 +968,85 @@ static int map_get_next_key(union bpf_attr *attr) return err; } +#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value + +static int map_lookup_and_delete_elem(union bpf_attr *attr) +{ + void __user *ukey = u64_to_user_ptr(attr->key); + void __user *uvalue = u64_to_user_ptr(attr->value); + int ufd = attr->map_fd; + struct bpf_map *map; + void *key, *value, *ptr; + u32 value_size; + struct fd f; + int err; + + if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) + return -EINVAL; + + f = fdget(ufd); + map = __bpf_map_get(f); + if (IS_ERR(map)) + return PTR_ERR(map); + + if (!(f.file->f_mode & FMODE_CAN_WRITE)) { + err = -EPERM; + goto err_put; + } + + if (!map->ops->map_lookup_and_delete_elem) { + err = -ENOTSUPP; + goto err_put; + } + + key = __bpf_copy_key(ukey, map->key_size); + if (IS_ERR(key)) { + err = PTR_ERR(key); + goto err_put; + } + + value_size = map->value_size; + + err = -ENOMEM; + value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); + if (!value) + goto free_key; + + err = -EFAULT; + if (copy_from_user(value, uvalue, value_size) != 0) + goto free_value; + + /* must increment bpf_prog_active to avoid kprobe+bpf triggering from + * inside bpf map update or delete otherwise deadlocks are possible + */ + preempt_disable(); + __this_cpu_inc(bpf_prog_active); + rcu_read_lock(); + ptr = map->ops->map_lookup_and_delete_elem(map, key); + if (ptr) + memcpy(value, ptr, value_size); + rcu_read_unlock(); + err = ptr ? 0 : -ENOENT; + __this_cpu_dec(bpf_prog_active); + preempt_enable(); + + if (err) + goto free_value; + + if (copy_to_user(uvalue, value, value_size) != 0) + goto free_value; + + err = 0; + +free_value: + kfree(value); +free_key: + kfree(key); +err_put: + fdput(f); + return err; +} + static const struct bpf_prog_ops * const bpf_prog_types[] = { #define BPF_PROG_TYPE(_id, _name) \ [_id] = & _name ## _prog_ops, @@ -2428,6 +2507,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_TASK_FD_QUERY: err = bpf_task_fd_query(&attr, uattr); break; + case BPF_MAP_LOOKUP_AND_DELETE_ELEM: + err = map_lookup_and_delete_elem(&attr); + break; default: err = -EINVAL; break;