On 10/18/2018 06:06 PM, Song Liu wrote: > BPF programs of BPF_PROG_TYPE_CGROUP_SKB need to access headers in the > skb. This patch enables direct access of skb for these programs. > > Two helper functions bpf_compute_and_save_data_pointers() and > bpf_restore_data_pointers() are introduced. There are used in > __cgroup_bpf_run_filter_skb(), to compute proper data_end for the > BPF program, and restore original data afterwards. > > Signed-off-by: Song Liu <songliubrav...@fb.com> > --- > include/linux/filter.h | 24 ++++++++++++++++++++++++ > kernel/bpf/cgroup.c | 6 ++++++ > net/core/filter.c | 36 +++++++++++++++++++++++++++++++++++- > 3 files changed, 65 insertions(+), 1 deletion(-) > > diff --git a/include/linux/filter.h b/include/linux/filter.h > index 5771874bc01e..96b3ee7f14c9 100644 > --- a/include/linux/filter.h > +++ b/include/linux/filter.h > @@ -548,6 +548,30 @@ static inline void bpf_compute_data_pointers(struct > sk_buff *skb) > cb->data_end = skb->data + skb_headlen(skb); > } > > +/* Similar to bpf_compute_data_pointers(), except that save orginal > + * data in cb->data and cb->meta_data for restore. > + */ > +static inline void bpf_compute_and_save_data_pointers( > + struct sk_buff *skb, void *saved_pointers[2]) > +{ > + struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; > + > + saved_pointers[0] = cb->data_meta; > + saved_pointers[1] = cb->data_end; > + cb->data_meta = skb->data - skb_metadata_len(skb); > + cb->data_end = skb->data + skb_headlen(skb);
Hmm, can you elaborate why populating data_meta here ... > +} > + > +/* Restore data saved by bpf_compute_data_pointers(). */ > +static inline void bpf_restore_data_pointers( > + struct sk_buff *skb, void *saved_pointers[2]) > +{ > + struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; > + > + cb->data_meta = saved_pointers[0]; > + cb->data_end = saved_pointers[1];; > +} > + > static inline u8 *bpf_skb_cb(struct sk_buff *skb) > { > /* eBPF programs may read/write skb->cb[] area to transfer meta > diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c > index 00f6ed2e4f9a..5f5180104ddc 100644 > --- a/kernel/bpf/cgroup.c > +++ b/kernel/bpf/cgroup.c > @@ -554,6 +554,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, > unsigned int offset = skb->data - skb_network_header(skb); > struct sock *save_sk; > struct cgroup *cgrp; > + void *saved_pointers[2]; > int ret; > > if (!sk || !sk_fullsock(sk)) > @@ -566,8 +567,13 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, > save_sk = skb->sk; > skb->sk = sk; > __skb_push(skb, offset); > + > + /* compute pointers for the bpf prog */ > + bpf_compute_and_save_data_pointers(skb, saved_pointers); > + > ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, > bpf_prog_run_save_cb); > + bpf_restore_data_pointers(skb, saved_pointers); > __skb_pull(skb, offset); > skb->sk = save_sk; > return ret == 1 ? 0 : -EPERM; > diff --git a/net/core/filter.c b/net/core/filter.c > index 1a3ac6c46873..e3ca30bd6840 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -5346,6 +5346,40 @@ static bool sk_filter_is_valid_access(int off, int > size, > return bpf_skb_is_valid_access(off, size, type, prog, info); > } > > +static bool cg_skb_is_valid_access(int off, int size, > + enum bpf_access_type type, > + const struct bpf_prog *prog, > + struct bpf_insn_access_aux *info) > +{ > + switch (off) { > + case bpf_ctx_range(struct __sk_buff, tc_classid): > + case bpf_ctx_range(struct __sk_buff, data_meta): > + case bpf_ctx_range(struct __sk_buff, flow_keys): > + return false; ... if it's disallowed anyway (disallowing it is the right thing to do, but no need to save/restore then..)? > + } > + if (type == BPF_WRITE) { > + switch (off) { > + case bpf_ctx_range(struct __sk_buff, mark): > + case bpf_ctx_range(struct __sk_buff, priority): > + case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): > + break; > + default: > + return false; > + } > + } > + > + switch (off) { > + case bpf_ctx_range(struct __sk_buff, data): > + info->reg_type = PTR_TO_PACKET; > + break; > + case bpf_ctx_range(struct __sk_buff, data_end): > + info->reg_type = PTR_TO_PACKET_END; > + break; > + } > + > + return bpf_skb_is_valid_access(off, size, type, prog, info); > +} > + > static bool lwt_is_valid_access(int off, int size, > enum bpf_access_type type, > const struct bpf_prog *prog, > @@ -7038,7 +7072,7 @@ const struct bpf_prog_ops xdp_prog_ops = { > > const struct bpf_verifier_ops cg_skb_verifier_ops = { > .get_func_proto = cg_skb_func_proto, > - .is_valid_access = sk_filter_is_valid_access, > + .is_valid_access = cg_skb_is_valid_access, > .convert_ctx_access = bpf_convert_ctx_access, > }; > >