On 06/19/2018 08:00 PM, Tushar Dave wrote: > When sg_filter_run() is invoked it runs the attached eBPF > SOCKET_SG_FILTER program which deals with struct scatterlist. > > In addition, this patch also adds bpf_sg_next helper function that > allows users to retrieve the next sg element from sg list. > > Signed-off-by: Tushar Dave <tushar.n.d...@oracle.com> > Acked-by: Sowmini Varadhan <sowmini.varad...@oracle.com> > --- > include/linux/filter.h | 2 + > include/uapi/linux/bpf.h | 10 ++++- > net/core/filter.c | 72 > +++++++++++++++++++++++++++++++ > tools/include/uapi/linux/bpf.h | 10 ++++- > tools/testing/selftests/bpf/bpf_helpers.h | 3 ++ > 5 files changed, 95 insertions(+), 2 deletions(-) > > diff --git a/include/linux/filter.h b/include/linux/filter.h > index 71618b1..d176402 100644 > --- a/include/linux/filter.h > +++ b/include/linux/filter.h > @@ -1072,4 +1072,6 @@ struct bpf_sock_ops_kern { > */ > }; > > +int sg_filter_run(struct sock *sk, struct scatterlist *sg); > + > #endif /* __LINUX_FILTER_H__ */ > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index ef0a7b6..036432b 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -2076,6 +2076,13 @@ struct bpf_stack_build_id { > * Return > * A 64-bit integer containing the current cgroup id based > * on the cgroup within which the current task is running. > + * > + * int bpf_sg_next(struct bpf_scatterlist *sg) > + * Description > + * This helper allows user to retrieve next sg element from > + * sg list. > + * Return > + * Returns 0 on success, or a negative error in case of failure. > */ > #define __BPF_FUNC_MAPPER(FN) \ > FN(unspec), \ > @@ -2158,7 +2165,8 @@ struct bpf_stack_build_id { > FN(rc_repeat), \ > FN(rc_keydown), \ > FN(skb_cgroup_id), \ > - FN(get_current_cgroup_id), > + FN(get_current_cgroup_id), \ > + FN(sg_next), > > /* integer value in 'imm' field of BPF_CALL instruction selects which helper > * function eBPF program intends to call > diff --git a/net/core/filter.c b/net/core/filter.c > index 8f67942..702ff5b 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -121,6 +121,53 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff > *skb, unsigned int cap) > } > EXPORT_SYMBOL(sk_filter_trim_cap); > > +int sg_filter_run(struct sock *sk, struct scatterlist *sg) > +{ > + struct sk_filter *filter; > + int err; > + > + rcu_read_lock(); > + filter = rcu_dereference(sk->sk_filter); > + if (filter) { > + struct bpf_scatterlist bpfsg; > + int num_sg; > + > + if (!sg) { > + err = -EINVAL; > + goto out; > + } > + > + num_sg = sg_nents(sg); > + if (num_sg <= 0) { > + err = -EINVAL; > + goto out; > + } > + > + /* We store a reference to the sg list so it can later used by > + * eBPF helpers to retrieve the next sg element. > + */ > + bpfsg.num_sg = num_sg; > + bpfsg.cur_sg = 0; > + bpfsg.sg = sg; > + > + /* For the first sg element, we store the pkt access pointers > + * into start and end so eBPF program can have pkt access using > + * data and data_end. The pkt access for subsequent element of > + * sg list is possible when eBPF program invokes bpf_sg_next > + * which takes care of setting start and end to the correct sg > + * element. > + */ > + bpfsg.start = sg_virt(sg); > + bpfsg.end = bpfsg.start + sg->length; > + BPF_PROG_RUN(filter->prog, &bpfsg); > + } > +out: > + rcu_read_unlock(); > + > + return err; > +} > +EXPORT_SYMBOL(sg_filter_run); > + > BPF_CALL_1(bpf_skb_get_pay_offset, struct sk_buff *, skb) > { > return skb_get_poff(skb); > @@ -3753,6 +3800,29 @@ static unsigned long bpf_xdp_copy(void *dst_buff, > const void *src_buff, > .arg1_type = ARG_PTR_TO_CTX, > }; > > +BPF_CALL_1(bpf_sg_next, struct bpf_scatterlist *, bpfsg) > +{ > + struct scatterlist *sg = bpfsg->sg; > + int cur_sg = bpfsg->cur_sg; > + > + cur_sg++; > + if (cur_sg >= bpfsg->num_sg) > + return -ENODATA; > + > + bpfsg->cur_sg = cur_sg; > + bpfsg->start = sg_virt(&sg[cur_sg]); > + bpfsg->end = bpfsg->start + sg[cur_sg].length; > + > + return 0; > +} > + > +static const struct bpf_func_proto bpf_sg_next_proto = { > + .func = bpf_sg_next, > + .gpl_only = false, > + .ret_type = RET_INTEGER, > + .arg1_type = ARG_PTR_TO_CTX, > +};
Should be added to bpf_helper_changes_pkt_data() in order to enforce a reload of all pkt pointers. Otherwise this is buggy in the sense that someone could only reload pkt_end pointer in the prog while old pkt_start still points to previous sg entry, so you would be able to access out of bounds. Thanks, Daniel