On Sun, Apr 01, 2018 at 08:01:10AM -0700, John Fastabend wrote:
> Sockmap is currently backed by an array and enforces keys to be
> four bytes. This works well for many use cases and was originally
> modeled after devmap which also uses four bytes keys. However,
> this has become limiting in larger use cases where a hash would
> be more appropriate. For example users may want to use the 5-tuple
> of the socket as the lookup key.
> 
> To support this add hash support.
> 
> Signed-off-by: John Fastabend <john.fastab...@gmail.com>

api looks good, but I think it came a bit too late for this release.
_nulls part you don't need for this hash. Few other nits:

> +static void htab_elem_free_rcu(struct rcu_head *head)
> +{
> +     struct htab_elem *l = container_of(head, struct htab_elem, rcu);
> +
> +     /* must increment bpf_prog_active to avoid kprobe+bpf triggering while
> +      * we're calling kfree, otherwise deadlock is possible if kprobes
> +      * are placed somewhere inside of slub
> +      */
> +     preempt_disable();
> +     __this_cpu_inc(bpf_prog_active);
> +     kfree(l);
> +     __this_cpu_dec(bpf_prog_active);
> +     preempt_enable();

I don't think it's necessary.

> +static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
> +{
> +     struct bpf_htab *htab;
> +     int i, err;
> +     u64 cost;
> +
> +     if (!capable(CAP_NET_ADMIN))
> +             return ERR_PTR(-EPERM);
> +
> +     /* check sanity of attributes */
> +     if (attr->max_entries == 0 ||
> +         attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
> +             return ERR_PTR(-EINVAL);
> +
> +     if (attr->value_size > KMALLOC_MAX_SIZE)
> +             return ERR_PTR(-E2BIG);

doesn't seem to match
+       u32 fd = *(u32 *)value;
that is done later.

> +static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head,
> +                                      u32 hash, void *key, u32 key_size)
> +{
> +     struct hlist_nulls_node *n;
> +     struct htab_elem *l;
> +
> +     hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
> +             if (l->hash == hash && !memcmp(&l->key, key, key_size))
> +                     return l;

if nulls is needed, there gotta be a comment explaining it.

please add tests for all methods.

> diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
> index f95fa67..2fa4cbb 100644
> --- a/tools/bpf/bpftool/map.c
> +++ b/tools/bpf/bpftool/map.c
> @@ -67,6 +67,7 @@
>       [BPF_MAP_TYPE_DEVMAP]           = "devmap",
>       [BPF_MAP_TYPE_SOCKMAP]          = "sockmap",
>       [BPF_MAP_TYPE_CPUMAP]           = "cpumap",
> +     [BPF_MAP_TYPE_SOCKHASH]         = "sockhash",
>  };
>  
>  static unsigned int get_possible_cpus(void)
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 9d07465..1a19450 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -115,6 +115,7 @@ enum bpf_map_type {
>       BPF_MAP_TYPE_DEVMAP,
>       BPF_MAP_TYPE_SOCKMAP,
>       BPF_MAP_TYPE_CPUMAP,
> +     BPF_MAP_TYPE_SOCKHASH,

tools/* updates should be in separate commit.

Reply via email to