On 06/19/2018 08:00 PM, Tushar Dave wrote:
> When sg_filter_run() is invoked it runs the attached eBPF
> SOCKET_SG_FILTER program which deals with struct scatterlist.
> 
> In addition, this patch also adds bpf_sg_next helper function that
> allows users to retrieve the next sg element from sg list.
> 
> Signed-off-by: Tushar Dave <tushar.n.d...@oracle.com>
> Acked-by: Sowmini Varadhan <sowmini.varad...@oracle.com>
> ---
>  include/linux/filter.h                    |  2 +
>  include/uapi/linux/bpf.h                  | 10 ++++-
>  net/core/filter.c                         | 72 
> +++++++++++++++++++++++++++++++
>  tools/include/uapi/linux/bpf.h            | 10 ++++-
>  tools/testing/selftests/bpf/bpf_helpers.h |  3 ++
>  5 files changed, 95 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 71618b1..d176402 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -1072,4 +1072,6 @@ struct bpf_sock_ops_kern {
>                                        */
>  };
>  
> +int sg_filter_run(struct sock *sk, struct scatterlist *sg);
> +
>  #endif /* __LINUX_FILTER_H__ */
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index ef0a7b6..036432b 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2076,6 +2076,13 @@ struct bpf_stack_build_id {
>   *   Return
>   *           A 64-bit integer containing the current cgroup id based
>   *           on the cgroup within which the current task is running.
> + *
> + * int bpf_sg_next(struct bpf_scatterlist *sg)
> + *   Description
> + *           This helper allows user to retrieve next sg element from
> + *           sg list.
> + *   Return
> + *           Returns 0 on success, or a negative error in case of failure.
>   */
>  #define __BPF_FUNC_MAPPER(FN)                \
>       FN(unspec),                     \
> @@ -2158,7 +2165,8 @@ struct bpf_stack_build_id {
>       FN(rc_repeat),                  \
>       FN(rc_keydown),                 \
>       FN(skb_cgroup_id),              \
> -     FN(get_current_cgroup_id),
> +     FN(get_current_cgroup_id),      \
> +     FN(sg_next),
>  
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 8f67942..702ff5b 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -121,6 +121,53 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff 
> *skb, unsigned int cap)
>  }
>  EXPORT_SYMBOL(sk_filter_trim_cap);
>  
> +int sg_filter_run(struct sock *sk, struct scatterlist *sg)
> +{
> +     struct sk_filter *filter;
> +     int err;
> +
> +     rcu_read_lock();
> +     filter = rcu_dereference(sk->sk_filter);
> +     if (filter) {
> +             struct bpf_scatterlist bpfsg;
> +             int num_sg;
> +
> +             if (!sg) {
> +                     err = -EINVAL;
> +                     goto out;
> +             }
> +
> +             num_sg = sg_nents(sg);
> +             if (num_sg <= 0) {
> +                     err = -EINVAL;
> +                     goto out;
> +             }
> +
> +             /* We store a reference  to the sg list so it can later used by
> +              * eBPF helpers to retrieve the next sg element.
> +              */
> +             bpfsg.num_sg = num_sg;
> +             bpfsg.cur_sg = 0;
> +             bpfsg.sg = sg;
> +
> +             /* For the first sg element, we store the pkt access pointers
> +              * into start and end so eBPF program can have pkt access using
> +              * data and data_end. The pkt access for subsequent element of
> +              * sg list is possible when eBPF program invokes bpf_sg_next
> +              * which takes care of setting start and end to the correct sg
> +              * element.
> +              */
> +             bpfsg.start = sg_virt(sg);
> +             bpfsg.end = bpfsg.start + sg->length;
> +             BPF_PROG_RUN(filter->prog, &bpfsg);
> +     }
> +out:
> +     rcu_read_unlock();
> +
> +     return err;
> +}
> +EXPORT_SYMBOL(sg_filter_run);
> +
>  BPF_CALL_1(bpf_skb_get_pay_offset, struct sk_buff *, skb)
>  {
>       return skb_get_poff(skb);
> @@ -3753,6 +3800,29 @@ static unsigned long bpf_xdp_copy(void *dst_buff, 
> const void *src_buff,
>       .arg1_type      = ARG_PTR_TO_CTX,
>  };
>  
> +BPF_CALL_1(bpf_sg_next, struct bpf_scatterlist *, bpfsg)
> +{
> +     struct scatterlist *sg = bpfsg->sg;
> +     int cur_sg = bpfsg->cur_sg;
> +
> +     cur_sg++;
> +     if (cur_sg >= bpfsg->num_sg)
> +             return -ENODATA;
> +
> +     bpfsg->cur_sg = cur_sg;
> +     bpfsg->start = sg_virt(&sg[cur_sg]);
> +     bpfsg->end = bpfsg->start + sg[cur_sg].length;
> +
> +     return 0;
> +}
> +
> +static const struct bpf_func_proto bpf_sg_next_proto = {
> +     .func           = bpf_sg_next,
> +     .gpl_only       = false,
> +     .ret_type       = RET_INTEGER,
> +     .arg1_type      = ARG_PTR_TO_CTX,
> +};

Should be added to bpf_helper_changes_pkt_data() in order to enforce a reload
of all pkt pointers. Otherwise this is buggy in the sense that someone could 
only
reload pkt_end pointer in the prog while old pkt_start still points to previous
sg entry, so you would be able to access out of bounds.

Thanks,
Daniel

Reply via email to