On 06/19/2018 08:00 PM, Tushar Dave wrote: > Add new eBPF prog type BPF_PROG_TYPE_SOCKET_SG_FILTER which uses the > existing socket filter infrastructure for bpf program attach and load. > SOCKET_SG_FILTER eBPF program receives struct scatterlist as bpf context > contrast to SOCKET_FILTER which deals with struct skb. This is useful > for kernel entities that don't have skb to represent packet data but > want to run eBPF socket filter on packet data that is in form of struct > scatterlist e.g. IB/RDMA > > Signed-off-by: Tushar Dave <tushar.n.d...@oracle.com> > Acked-by: Sowmini Varadhan <sowmini.varad...@oracle.com> > --- > include/linux/bpf_types.h | 1 + > include/linux/filter.h | 8 +++++ > include/uapi/linux/bpf.h | 7 ++++ > kernel/bpf/syscall.c | 1 + > kernel/bpf/verifier.c | 1 + > net/core/filter.c | 77 > ++++++++++++++++++++++++++++++++++++++++-- > samples/bpf/bpf_load.c | 11 ++++-- > tools/bpf/bpftool/prog.c | 1 + > tools/include/uapi/linux/bpf.h | 7 ++++ > tools/lib/bpf/libbpf.c | 3 ++ > tools/lib/bpf/libbpf.h | 2 ++ > 11 files changed, 114 insertions(+), 5 deletions(-) > > diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h > index c5700c2..f8b4b56 100644 > --- a/include/linux/bpf_types.h > +++ b/include/linux/bpf_types.h > @@ -16,6 +16,7 @@ > BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops) > BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb) > BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg) > +BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_SG_FILTER, socksg_filter) > #endif > #ifdef CONFIG_BPF_EVENTS > BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) > diff --git a/include/linux/filter.h b/include/linux/filter.h > index 45fc0f5..71618b1 100644 > --- a/include/linux/filter.h > +++ b/include/linux/filter.h > @@ -517,6 +517,14 @@ struct bpf_skb_data_end { > void *data_end; > }; > > +struct bpf_scatterlist { > + struct scatterlist *sg; > + void *start; > + void *end; > + int cur_sg; > + int num_sg; > +}; > + > struct sk_msg_buff { > void *data; > void *data_end; > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 59b19b6..ef0a7b6 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -144,6 +144,7 @@ enum bpf_prog_type { > BPF_PROG_TYPE_CGROUP_SOCK_ADDR, > BPF_PROG_TYPE_LWT_SEG6LOCAL, > BPF_PROG_TYPE_LIRC_MODE2, > + BPF_PROG_TYPE_SOCKET_SG_FILTER, > }; > > enum bpf_attach_type { > @@ -2358,6 +2359,12 @@ enum sk_action { > SK_PASS, > }; > > +/* use accessible scatterlist */ > +struct sg_filter_md { > + void *data; /* sg_virt(sg) */ > + void *data_end; /* sg_virt(sg) + sg->length */ > +}; > + > /* user accessible metadata for SK_MSG packet hook, new fields must > * be added to the end of this structure > */ > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > index 0fa2062..74193a8 100644 > --- a/kernel/bpf/syscall.c > +++ b/kernel/bpf/syscall.c > @@ -1300,6 +1300,7 @@ static int bpf_prog_load(union bpf_attr *attr) > > if (type != BPF_PROG_TYPE_SOCKET_FILTER && > type != BPF_PROG_TYPE_CGROUP_SKB && > + type != BPF_PROG_TYPE_SOCKET_SG_FILTER && > !capable(CAP_SYS_ADMIN)) > return -EPERM; > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index d6403b5..a00d3eb 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -1320,6 +1320,7 @@ static bool may_access_direct_pkt_data(struct > bpf_verifier_env *env, > case BPF_PROG_TYPE_LWT_XMIT: > case BPF_PROG_TYPE_SK_SKB: > case BPF_PROG_TYPE_SK_MSG: > + case BPF_PROG_TYPE_SOCKET_SG_FILTER: > if (meta) > return meta->pkt_access; > > diff --git a/net/core/filter.c b/net/core/filter.c > index 3d9ba7e..8f67942 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -1130,7 +1130,8 @@ static void bpf_release_orig_filter(struct bpf_prog *fp) > > static void __bpf_prog_release(struct bpf_prog *prog) > { > - if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) { > + if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER || > + prog->type == BPF_PROG_TYPE_SOCKET_SG_FILTER) { > bpf_prog_put(prog); > } else { > bpf_release_orig_filter(prog); > @@ -1551,10 +1552,16 @@ int sk_reuseport_attach_filter(struct sock_fprog > *fprog, struct sock *sk) > > static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk) > { > + struct bpf_prog *prog; > + > if (sock_flag(sk, SOCK_FILTER_LOCKED)) > return ERR_PTR(-EPERM); > > - return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER); > + prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER); > + if (IS_ERR(prog)) > + prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_SG_FILTER); > + > + return prog; > }
Hmm, I don't think this works: this now means as unpriviledged I can attach a new BPF_PROG_TYPE_SOCKET_SG_FILTER to a non-rds socket e.g. normal tcp/udp through the SO_ATTACH_BPF sockopt, where input context is skb instead of sg list and thus crash my box?