On Fri, 2018-07-13 at 16:13 +0200, Daniel Borkmann wrote: > On 07/13/2018 11:55 AM, Paolo Abeni wrote: > > This patch changes the TC_ACT_REDIRECT code path to allow > > providing the redirect parameters via the tcf_result argument. > > > > Such union is expanded to host the redirect device, the redirect > > direction (ingress/egress) and the stats to be updated on error > > conditions. > > > > Actions/classifiers using TC_ACT_REDIRECT can either: > > * fill the tcf_result redirect related fields > > * clear such fields and use the bpf per cpu redirect info > > > > skb_do_redirect now tries to fetch the relevant data from tcf_result > > and fall back to access redirect info. It also updates the stats > > accordingly to the redirect result, if provided by the caller. > > > > This will allow using the TC_ACT_REDIRECT action in more places in > > the next patch. > > > > Signed-off-by: Paolo Abeni <pab...@redhat.com> > > --- > > include/net/sch_generic.h | 15 ++++++++++++++- > > net/core/dev.c | 4 ++-- > > net/core/filter.c | 29 +++++++++++++++++++++++------ > > net/core/lwt_bpf.c | 5 ++++- > > net/sched/act_bpf.c | 4 +++- > > net/sched/cls_bpf.c | 8 +++++--- > > 6 files changed, 51 insertions(+), 14 deletions(-) > > > > diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h > > index 056dc1083aa3..dd9e00d017b3 100644 > > --- a/include/net/sch_generic.h > > +++ b/include/net/sch_generic.h > > @@ -235,9 +235,22 @@ struct tcf_result { > > u32 classid; > > }; > > const struct tcf_proto *goto_tp; > > + > > + /* used by the TC_ACT_REDIRECT action */ > > + struct { > > + /* device and direction, or 0 bpf redirect */ > > + long dev_ingress; > > + struct gnet_stats_queue *qstats; > > + }; > > }; > > }; > > > > +#define TCF_RESULT_REDIR_DEV(res) \ > > + ((struct net_device *)((res)->dev_ingress & ~1)) > > +#define TCF_RESULT_REDIR_INGRESS(res) ((res)->dev_ingress & 1) > > +#define TCF_RESULT_SET_REDIRECT(res, dev, ingress) \ > > + ((res)->dev_ingress = (long)(dev) | (!!(ingress))) > > + > > struct tcf_proto_ops { > > struct list_head head; > > char kind[IFNAMSIZ]; > > @@ -543,7 +556,7 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue > > *dev_queue, > > struct netlink_ext_ack *extack); > > void __qdisc_calculate_pkt_len(struct sk_buff *skb, > > const struct qdisc_size_table *stab); > > -int skb_do_redirect(struct sk_buff *); > > +int skb_do_redirect(struct sk_buff *skb, struct tcf_result *res); > > > > static inline void skb_reset_tc(struct sk_buff *skb) > > { > > diff --git a/net/core/dev.c b/net/core/dev.c > > index 14a748ee8cc9..a283dbfde30c 100644 > > --- a/net/core/dev.c > > +++ b/net/core/dev.c > > @@ -3538,7 +3538,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, > > struct net_device *dev) > > return NULL; > > case TC_ACT_REDIRECT: > > /* No need to push/pop skb's mac_header here on egress! */ > > - skb_do_redirect(skb); > > + skb_do_redirect(skb, &cl_res); > > *ret = NET_XMIT_SUCCESS; > > return NULL; > > default: > > @@ -4600,7 +4600,7 @@ sch_handle_ingress(struct sk_buff *skb, struct > > packet_type **pt_prev, int *ret, > > * redirecting to another netdev > > */ > > __skb_push(skb, skb->mac_len); > > - skb_do_redirect(skb); > > + skb_do_redirect(skb, &cl_res); > > return NULL; > > default: > > break; > > diff --git a/net/core/filter.c b/net/core/filter.c > > index b9ec916f4e3a..4f64cf5189e6 100644 > > --- a/net/core/filter.c > > +++ b/net/core/filter.c > > @@ -2062,19 +2062,36 @@ BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags) > > return TC_ACT_REDIRECT; > > } > > > > -int skb_do_redirect(struct sk_buff *skb) > > +int skb_do_redirect(struct sk_buff *skb, struct tcf_result *res) > > { > > - struct redirect_info *ri = this_cpu_ptr(&redirect_info); > > + struct gnet_stats_queue *stats; > > struct net_device *dev; > > + int ret, flags; > > > > - dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex); > > - ri->ifindex = 0; > > + if (!res->dev_ingress) { > > + struct redirect_info *ri = this_cpu_ptr(&redirect_info); > > + > > + dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex); > > + flags = ri->flags; > > + ri->ifindex = 0; > > + stats = NULL; > > + } else { > > + dev = TCF_RESULT_REDIR_DEV(res); > > + flags = TCF_RESULT_REDIR_INGRESS(res) ? BPF_F_INGRESS : 0; > > + stats = res->qstats; > > + } > > if (unlikely(!dev)) { > > kfree_skb(skb); > > - return -EINVAL; > > + ret = -EINVAL; > > + goto out; > > } > > > > - return __bpf_redirect(skb, dev, ri->flags); > > + ret = __bpf_redirect(skb, dev, flags); > > + > > +out: > > + if (ret && stats) > > + qstats_overlimit_inc(res->qstats); > > + return ret; > > } > > > > static const struct bpf_func_proto bpf_redirect_proto = { > > diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c > > index e7e626fb87bb..8dde1093994a 100644 > > --- a/net/core/lwt_bpf.c > > +++ b/net/core/lwt_bpf.c > > @@ -65,7 +65,10 @@ static int run_lwt_bpf(struct sk_buff *skb, struct > > bpf_lwt_prog *lwt, > > lwt->name ? : "<unknown>"); > > ret = BPF_OK; > > } else { > > - ret = skb_do_redirect(skb); > > + struct tcf_result res; > > + > > + res.dev_ingress = 0; > > + ret = skb_do_redirect(skb, &res); > > if (ret == 0) > > ret = BPF_REDIRECT; > > } > > diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c > > index ac20266460c0..6fd46b691181 100644 > > --- a/net/sched/act_bpf.c > > +++ b/net/sched/act_bpf.c > > @@ -67,10 +67,12 @@ static int tcf_bpf(struct sk_buff *skb, const struct > > tc_action *act, > > * returned. > > */ > > switch (filter_res) { > > + case TC_ACT_REDIRECT: > > + res->dev_ingress = 0; > > + /* fall-through */ > > case TC_ACT_PIPE: > > case TC_ACT_RECLASSIFY: > > case TC_ACT_OK: > > - case TC_ACT_REDIRECT: > > action = filter_res; > > break; > > case TC_ACT_SHOT: > > diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c > > index 66e0ac9811f9..f0fb7ded8fe2 100644 > > --- a/net/sched/cls_bpf.c > > +++ b/net/sched/cls_bpf.c > > @@ -65,14 +65,16 @@ static const struct nla_policy bpf_policy[TCA_BPF_MAX + > > 1] = { > > .len = sizeof(struct sock_filter) * > > BPF_MAXINSNS }, > > }; > > > > -static int cls_bpf_exec_opcode(int code) > > +static int cls_bpf_exec_opcode(int code, struct tcf_result *res) > > { > > switch (code) { > > + case TC_ACT_REDIRECT: > > + res->dev_ingress = 0; > > + /* fall-through */ > > case TC_ACT_OK: > > case TC_ACT_SHOT: > > case TC_ACT_STOLEN: > > case TC_ACT_TRAP: > > - case TC_ACT_REDIRECT: > > case TC_ACT_UNSPEC: > > return code; > > default: > > @@ -113,7 +115,7 @@ static int cls_bpf_classify(struct sk_buff *skb, const > > struct tcf_proto *tp, > > res->classid = TC_H_MAJ(prog->res.classid) | > > qdisc_skb_cb(skb)->tc_classid; > > > > - ret = cls_bpf_exec_opcode(filter_res); > > + ret = cls_bpf_exec_opcode(filter_res, res); > > if (ret == TC_ACT_UNSPEC) > > continue; > > break; > > > > Can't we just export the struct redirect_info and let others like > act_mirred use it, then we wouldn't need all these extra changes in > fast path?
Thank you for the feedback. The use of tcf_result allows passing to the redirect helper the stats to be updated, and avoids an additional dev lookup for the mirred datapath. Some changes are necessary to make the skb_do_redirect() function more generic, and code duplication could be reduced with some additional helper. Do you have so strong opinion against that? Thanks, Paolo