On 04/17/2018 06:48 AM, Eyal Birger wrote: > This commit introduces a helper which allows fetching xfrm state > parameters by eBPF programs attached to TC. > > Prototype: > bpf_skb_get_xfrm_state(skb, index, xfrm_state, size, flags) > > skb: pointer to skb > index: the index in the skb xfrm_state secpath array > xfrm_state: pointer to 'struct bpf_xfrm_state' > size: size of 'struct bpf_xfrm_state' > flags: reserved for future extensions > > The helper returns 0 on success. Non zero if no xfrm state at the index > is found - or non exists at all. > > struct bpf_xfrm_state currently includes the SPI, peer IPv4/IPv6 > address and the reqid; it can be further extended by adding elements to > its end - indicating the populated fields by the 'size' argument - > keeping backwards compatibility. > > Typical usage: > > struct bpf_xfrm_state x = {}; > bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0); > ... > > Signed-off-by: Eyal Birger <eyal.bir...@gmail.com>
Patch looks good to me, two comments below: > --- > include/uapi/linux/bpf.h | 25 ++++++++++++++++++++++++- > net/core/filter.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 70 insertions(+), 1 deletion(-) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index c5ec897..132e172 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -755,6 +755,15 @@ union bpf_attr { > * @addr: pointer to struct sockaddr to bind socket to > * @addr_len: length of sockaddr structure > * Return: 0 on success or negative error code > + * > + * int bpf_skb_get_xfrm_state(skb, index, xfrm_state, size, flags) > + * retrieve XFRM state > + * @skb: pointer to skb > + * @index: index of the xfrm state in the secpath > + * @key: pointer to 'struct bpf_xfrm_state' > + * @size: size of 'struct bpf_xfrm_state' > + * @flags: room for future extensions > + * Return: 0 on success or negative error > */ > #define __BPF_FUNC_MAPPER(FN) \ > FN(unspec), \ > @@ -821,7 +830,8 @@ union bpf_attr { > FN(msg_apply_bytes), \ > FN(msg_cork_bytes), \ > FN(msg_pull_data), \ > - FN(bind), > + FN(bind), \ > + FN(skb_get_xfrm_state), > > /* integer value in 'imm' field of BPF_CALL instruction selects which helper > * function eBPF program intends to call > @@ -927,6 +937,19 @@ struct bpf_tunnel_key { > __u32 tunnel_label; > }; > > +/* user accessible mirror of in-kernel xfrm_state. > + * new fields can only be added to the end of this structure > + */ > +struct bpf_xfrm_state { > + __u32 reqid; > + __u32 spi; > + __u16 family; > + union { > + __u32 remote_ipv4; > + __u32 remote_ipv6[4]; > + }; > +}; > + > /* Generic BPF return codes which all BPF program types may support. > * The values are binary compatible with their TC_ACT_* counter-part to > * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT > diff --git a/net/core/filter.c b/net/core/filter.c > index d31aff9..c06600a 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -57,6 +57,7 @@ > #include <net/sock_reuseport.h> > #include <net/busy_poll.h> > #include <net/tcp.h> > +#include <net/xfrm.h> > #include <linux/bpf_trace.h> > > /** > @@ -3703,6 +3704,49 @@ static const struct bpf_func_proto bpf_bind_proto = { > .arg3_type = ARG_CONST_SIZE, > }; > > +BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index, > + struct bpf_xfrm_state *, to, u32, size, u64, flags) > +{ > +#ifdef CONFIG_XFRM > + const struct sec_path *sp = skb_sec_path(skb); > + const struct xfrm_state *x; > + > + if (!sp || index >= sp->len) This should be something like: if (!sp || unlikely(index >= sp->len || flags)) Such that we unconditionally bail out on any flags currently, since this is reserved for future use and anything non-zero would be invalid and rejected until we start extending it. > + goto err_clear; > + > + x = sp->xvec[index]; > + > + if (unlikely(size != sizeof(struct bpf_xfrm_state))) > + goto err_clear; > + > + to->reqid = x->props.reqid; > + to->spi = be32_to_cpu(x->id.spi); > + to->family = x->props.family; > + if (to->family == AF_INET6) { > + memcpy(to->remote_ipv6, x->props.saddr.a6, > + sizeof(to->remote_ipv6)); > + } else { > + to->remote_ipv4 = be32_to_cpu(x->props.saddr.a4); > + } > + > + return 0; > +err_clear: > +#endif > + memset(to, 0, size); > + return -EINVAL; > +} > + > +static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = { > + .func = bpf_skb_get_xfrm_state, > + .gpl_only = false, > + .ret_type = RET_INTEGER, > + .arg1_type = ARG_PTR_TO_CTX, > + .arg2_type = ARG_ANYTHING, > + .arg3_type = ARG_PTR_TO_UNINIT_MEM, > + .arg4_type = ARG_CONST_SIZE, > + .arg5_type = ARG_ANYTHING, > +}; > + > static const struct bpf_func_proto * > bpf_base_func_proto(enum bpf_func_id func_id) > { > @@ -3844,6 +3888,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const > struct bpf_prog *prog) > return &bpf_get_socket_cookie_proto; > case BPF_FUNC_get_socket_uid: > return &bpf_get_socket_uid_proto; > + case BPF_FUNC_skb_get_xfrm_state: > + return &bpf_skb_get_xfrm_state_proto; Potentially, on kernels with !CONFIG_XFRM, you might want to let the program bail out at program verification phase already? Thus it would become ... #ifdef CONFIG_XFRM case BPF_FUNC_skb_get_xfrm_state: return &bpf_skb_get_xfrm_state_proto; #endif ... where you'd also wrap the helper + state_proto in CONFIG_XFRM. > default: > return bpf_base_func_proto(func_id); > } >