On 05/17/2018 04:28 PM, Mathieu Xhonneux wrote: > This patch adds the End.BPF action to the LWT seg6local infrastructure. > This action works like any other seg6local End action, meaning that an IPv6 > header with SRH is needed, whose DA has to be equal to the SID of the > action. It will also advance the SRH to the next segment, the BPF program > does not have to take care of this. > > Since the BPF program may not be a source of instability in the kernel, it > is important to ensure that the integrity of the packet is maintained > before yielding it back to the IPv6 layer. The hook hence keeps track if > the SRH has been altered through the helpers, and re-validates its > content if needed with seg6_validate_srh. The state kept for validation is > stored in a per-CPU buffer. The BPF program is not allowed to directly > write into the packet, and only some fields of the SRH can be altered > through the helper bpf_lwt_seg6_store_bytes. > > Performances profiling has shown that the SRH re-validation does not induce > a significant overhead. If the altered SRH is deemed as invalid, the packet > is dropped. > > This validation is also done before executing any action through > bpf_lwt_seg6_action, and will not be performed again if the SRH is not > modified after calling the action. > > The BPF program may return 3 types of return codes: > - BPF_OK: the End.BPF action will look up the next destination through > seg6_lookup_nexthop. > - BPF_REDIRECT: if an action has been executed through the > bpf_lwt_seg6_action helper, the BPF program should return this > value, as the skb's destination is already set and the default > lookup should not be performed. > - BPF_DROP : the packet will be dropped. > > Signed-off-by: Mathieu Xhonneux <m.xhonn...@gmail.com> > Acked-by: David Lebrun <dleb...@google.com> [...] > static struct seg6_action_desc seg6_action_table[] = { > { > .action = SEG6_LOCAL_ACTION_END, > @@ -497,7 +568,13 @@ static struct seg6_action_desc seg6_action_table[] = { > .attrs = (1 << SEG6_LOCAL_SRH), > .input = input_action_end_b6_encap, > .static_headroom = sizeof(struct ipv6hdr), > - } > + }, > + { > + .action = SEG6_LOCAL_ACTION_END_BPF, > + .attrs = (1 << SEG6_LOCAL_BPF), > + .input = input_action_end_bpf, > + }, > + > }; > > static struct seg6_action_desc *__get_action_desc(int action) > @@ -542,6 +619,7 @@ static const struct nla_policy > seg6_local_policy[SEG6_LOCAL_MAX + 1] = { > .len = sizeof(struct in6_addr) }, > [SEG6_LOCAL_IIF] = { .type = NLA_U32 }, > [SEG6_LOCAL_OIF] = { .type = NLA_U32 }, > + [SEG6_LOCAL_BPF] = { .type = NLA_NESTED }, > }; > > static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt) > @@ -719,6 +797,71 @@ static int cmp_nla_oif(struct seg6_local_lwt *a, struct > seg6_local_lwt *b) > return 0; > } > > +#define MAX_PROG_NAME 256 > +static const struct nla_policy bpf_prog_policy[LWT_BPF_PROG_MAX + 1] = { > + [LWT_BPF_PROG_FD] = { .type = NLA_U32, },
>From UAPI point of view, I wouldn't name it LWT_BPF_PROG_FD but rather >something like LWT_BPF_PROG for example. That way, the setup can contain the fd number, but on the dump you can put the prog->aux->id in there so that prog lookup can be done again. > + [LWT_BPF_PROG_NAME] = { .type = NLA_NUL_STRING, > + .len = MAX_PROG_NAME }, > +}; > + > +static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt) > +{ > + struct nlattr *tb[LWT_BPF_PROG_MAX + 1]; > + struct bpf_prog *p; > + int ret; > + u32 fd; > + > + ret = nla_parse_nested(tb, LWT_BPF_PROG_MAX, attrs[SEG6_LOCAL_BPF], > + bpf_prog_policy, NULL); > + if (ret < 0) > + return ret; > + > + if (!tb[LWT_BPF_PROG_FD] || !tb[LWT_BPF_PROG_NAME]) > + return -EINVAL; > + > + slwt->bpf.name = nla_memdup(tb[LWT_BPF_PROG_NAME], GFP_KERNEL); > + if (!slwt->bpf.name) > + return -ENOMEM; > + > + fd = nla_get_u32(tb[LWT_BPF_PROG_FD]); > + p = bpf_prog_get_type(fd, BPF_PROG_TYPE_LWT_SEG6LOCAL); > + if (IS_ERR(p)) > + return PTR_ERR(p); Here in the above error path is definitely a bug in that you don't free the prior allocated slwt->bpf.name from nla_memdup(). Also when you destroy the struct seg6_local_lwt object, what I'm not getting is where you drop the prog reference again and free slwt->bpf.name there? > + > + slwt->bpf.prog = p; > + > + return 0; > +} > + > +static int put_nla_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt) > +{ > + struct nlattr *nest; > + > + if (!slwt->bpf.prog) > + return 0; > + > + nest = nla_nest_start(skb, SEG6_LOCAL_BPF); > + if (!nest) > + return -EMSGSIZE; > + > + if (slwt->bpf.name && > + nla_put_string(skb, LWT_BPF_PROG_NAME, slwt->bpf.name)) > + return -EMSGSIZE; > + > + return nla_nest_end(skb, nest); > +} > + > +static int cmp_nla_bpf(struct seg6_local_lwt *a, struct seg6_local_lwt *b) > +{ > + if (!a->bpf.name && !b->bpf.name) > + return 0; > + > + if (!a->bpf.name || !b->bpf.name) > + return 1; > + > + return strcmp(a->bpf.name, b->bpf.name); > +} > + > struct seg6_action_param { > int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt); > int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt); > @@ -749,6 +892,11 @@ static struct seg6_action_param > seg6_action_params[SEG6_LOCAL_MAX + 1] = { > [SEG6_LOCAL_OIF] = { .parse = parse_nla_oif, > .put = put_nla_oif, > .cmp = cmp_nla_oif }, > + > + [SEG6_LOCAL_BPF] = { .parse = parse_nla_bpf, > + .put = put_nla_bpf, > + .cmp = cmp_nla_bpf }, > + > }; > > static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt > *slwt) > @@ -797,7 +945,6 @@ static int seg6_local_build_state(struct nlattr *nla, > unsigned int family, > > err = nla_parse_nested(tb, SEG6_LOCAL_MAX, nla, seg6_local_policy, > extack); > - > if (err < 0) > return err; > > @@ -886,6 +1033,11 @@ static int seg6_local_get_encap_size(struct > lwtunnel_state *lwt) > if (attrs & (1 << SEG6_LOCAL_OIF)) > nlsize += nla_total_size(4); > > + if (attrs & (1 << SEG6_LOCAL_BPF)) > + nlsize += nla_total_size(sizeof(struct nlattr)) + > + nla_total_size(MAX_PROG_NAME) + > + nla_total_size(4); > + > return nlsize; > } > > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c > index 3dbe217bf23e..a29fed1dfce2 100644 > --- a/tools/lib/bpf/libbpf.c > +++ b/tools/lib/bpf/libbpf.c > @@ -1456,6 +1456,7 @@ static bool bpf_prog_type__needs_kver(enum > bpf_prog_type type) > case BPF_PROG_TYPE_LWT_IN: > case BPF_PROG_TYPE_LWT_OUT: > case BPF_PROG_TYPE_LWT_XMIT: > + case BPF_PROG_TYPE_LWT_SEG6LOCAL: > case BPF_PROG_TYPE_SOCK_OPS: > case BPF_PROG_TYPE_SK_SKB: > case BPF_PROG_TYPE_CGROUP_DEVICE: >