> -----Original Message----- > From: netdev-ow...@vger.kernel.org [mailto:netdev-ow...@vger.kernel.org] > On Behalf Of Jiri Pirko > Sent: Wednesday, October 11, 2017 1:11 PM > To: netdev@vger.kernel.org > Cc: da...@davemloft.net; j...@mojatatu.com; xiyou.wangc...@gmail.com; > sae...@mellanox.com; mat...@mellanox.com; leo...@mellanox.com; > ml...@mellanox.com; david.lai...@aculab.com; gerlitz...@gmail.com > Subject: [patch net-next v2 2/4] net: sched: introduce per-egress action > device > callbacks > > From: Jiri Pirko <j...@mellanox.com> > > Introduce infrastructure that allows drivers to register callbacks that are > called > whenever tc would offload inserted rule and specified device acts as tc action > egress device. > > Signed-off-by: Jiri Pirko <j...@mellanox.com> > --- > v1->v2: > - take rtnl for register/unregister > --- > include/net/act_api.h | 34 ++++++++ > include/net/pkt_cls.h | 2 + > net/sched/act_api.c | 220 > ++++++++++++++++++++++++++++++++++++++++++++++++++ > net/sched/cls_api.c | 30 +++++++ > 4 files changed, 286 insertions(+) > > diff --git a/include/net/act_api.h b/include/net/act_api.h index > 900168a..f5e8c90 100644 > --- a/include/net/act_api.h > +++ b/include/net/act_api.h > @@ -174,4 +174,38 @@ static inline void tcf_action_stats_update(struct > tc_action *a, u64 bytes, #endif } > > +typedef int tc_setup_cb_t(enum tc_setup_type type, > + void *type_data, void *cb_priv); > + > +#ifdef CONFIG_NET_CLS_ACT > +int tc_setup_cb_egdev_register(const struct net_device *dev, > + tc_setup_cb_t *cb, void *cb_priv); void > +tc_setup_cb_egdev_unregister(const struct net_device *dev, > + tc_setup_cb_t *cb, void *cb_priv); int > +tc_setup_cb_egdev_call(const struct net_device *dev, > + enum tc_setup_type type, void *type_data, > + bool err_stop); > +#else > +static inline > +int tc_setup_cb_egdev_register(const struct net_device *dev, > + tc_setup_cb_t *cb, void *cb_priv) { > + return 0; > +} > + > +static inline > +void tc_setup_cb_egdev_unregister(const struct net_device *dev, > + tc_setup_cb_t *cb, void *cb_priv) { } > + > +static inline > +int tc_setup_cb_egdev_call(const struct net_device *dev, > + enum tc_setup_type type, void *type_data, > + bool err_stop) > +{ > + return 0; > +} > +#endif > + > #endif > diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index > e80edd8..6f8149c 100644 > --- a/include/net/pkt_cls.h > +++ b/include/net/pkt_cls.h > @@ -206,6 +206,8 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts > *exts); int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts); > int > tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, > struct net_device **hw_dev); > +int tcf_exts_egdev_cb_call(struct tcf_exts *exts, enum tc_setup_type type, > + void *type_data, bool err_stop); > > /** > * struct tcf_pkt_info - packet information diff --git a/net/sched/act_api.c > b/net/sched/act_api.c index da6fa82..ac97db9 100644 > --- a/net/sched/act_api.c > +++ b/net/sched/act_api.c > @@ -21,6 +21,8 @@ > #include <linux/kmod.h> > #include <linux/err.h> > #include <linux/module.h> > +#include <linux/rhashtable.h> > +#include <linux/list.h> > #include <net/net_namespace.h> > #include <net/sock.h> > #include <net/sch_generic.h> > @@ -1249,8 +1251,226 @@ static int tc_dump_action(struct sk_buff *skb, > struct netlink_callback *cb) > return skb->len; > } > > +struct tcf_action_net { > + struct rhashtable egdev_ht; > +}; > + > +static unsigned int tcf_action_net_id; > + > +struct tcf_action_egdev_cb { > + struct list_head list; > + tc_setup_cb_t *cb; > + void *cb_priv; > +}; > + > +struct tcf_action_egdev { > + struct rhash_head ht_node; > + const struct net_device *dev; > + unsigned int refcnt; > + struct list_head cb_list; > +}; > + > +static const struct rhashtable_params tcf_action_egdev_ht_params = { > + .key_offset = offsetof(struct tcf_action_egdev, dev), > + .head_offset = offsetof(struct tcf_action_egdev, ht_node), > + .key_len = sizeof(const struct net_device *), }; > + > +static struct tcf_action_egdev * > +tcf_action_egdev_lookup(const struct net_device *dev) { > + struct net *net = dev_net(dev); > + struct tcf_action_net *tan = net_generic(net, tcf_action_net_id); > + > + return rhashtable_lookup_fast(&tan->egdev_ht, &dev, > + tcf_action_egdev_ht_params); > +} > + > +static struct tcf_action_egdev * > +tcf_action_egdev_get(const struct net_device *dev) { > + struct tcf_action_egdev *egdev; > + struct tcf_action_net *tan; > + > + egdev = tcf_action_egdev_lookup(dev); > + if (egdev) > + goto inc_ref; > + > + egdev = kzalloc(sizeof(*egdev), GFP_KERNEL); > + if (!egdev) > + return NULL; > + INIT_LIST_HEAD(&egdev->cb_list); > + tan = net_generic(dev_net(dev), tcf_action_net_id); > + rhashtable_insert_fast(&tan->egdev_ht, &egdev->ht_node, > + tcf_action_egdev_ht_params); > + > +inc_ref: > + egdev->refcnt++; > + return egdev; > +} > + > +static void tcf_action_egdev_put(struct tcf_action_egdev *egdev) { > + struct tcf_action_net *tan; > + > + if (--egdev->refcnt) > + return; > + tan = net_generic(dev_net(egdev->dev), tcf_action_net_id); > + rhashtable_remove_fast(&tan->egdev_ht, &egdev->ht_node, > + tcf_action_egdev_ht_params); > + kfree(egdev); > +} > + > +static struct tcf_action_egdev_cb * > +tcf_action_egdev_cb_lookup(struct tcf_action_egdev *egdev, > + tc_setup_cb_t *cb, void *cb_priv) { > + struct tcf_action_egdev_cb *egdev_cb; > + > + list_for_each_entry(egdev_cb, &egdev->cb_list, list) > + if (egdev_cb->cb == cb && egdev_cb->cb_priv == cb_priv) > + return egdev_cb; > + return NULL; > +} > + > +static int tcf_action_egdev_cb_call(struct tcf_action_egdev *egdev, > + enum tc_setup_type type, > + void *type_data, bool err_stop) { > + struct tcf_action_egdev_cb *egdev_cb; > + int ok_count = 0; > + int err; > + > + list_for_each_entry(egdev_cb, &egdev->cb_list, list) { > + err = egdev_cb->cb(type, type_data, egdev_cb->cb_priv); > + if (err) { > + if (err_stop) > + return err; > + } else { > + ok_count++; > + } > + } > + return ok_count; > +} > + > +static int tcf_action_egdev_cb_add(struct tcf_action_egdev *egdev, > + tc_setup_cb_t *cb, void *cb_priv) { > + struct tcf_action_egdev_cb *egdev_cb; > + > + egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv); > + if (WARN_ON(egdev_cb)) > + return -EEXIST; > + egdev_cb = kzalloc(sizeof(*egdev_cb), GFP_KERNEL); > + if (!egdev_cb) > + return -ENOMEM; > + egdev_cb->cb = cb; > + egdev_cb->cb_priv = cb_priv; > + list_add(&egdev_cb->list, &egdev->cb_list); > + return 0; > +} > + > +static void tcf_action_egdev_cb_del(struct tcf_action_egdev *egdev, > + tc_setup_cb_t *cb, void *cb_priv) { > + struct tcf_action_egdev_cb *egdev_cb; > + > + egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv); > + if (WARN_ON(!egdev_cb)) > + return; > + list_del(&egdev_cb->list); > + kfree(egdev_cb); > +} > + > +static int __tc_setup_cb_egdev_register(const struct net_device *dev, > + tc_setup_cb_t *cb, void *cb_priv) > +{ > + struct tcf_action_egdev *egdev = tcf_action_egdev_get(dev); > + int err; > + > + if (!egdev) > + return -ENOMEM; > + err = tcf_action_egdev_cb_add(egdev, cb, cb_priv); > + if (err) > + goto err_cb_add; > + return 0; > + > +err_cb_add: > + tcf_action_egdev_put(egdev); > + return err; > +} > +int tc_setup_cb_egdev_register(const struct net_device *dev, > + tc_setup_cb_t *cb, void *cb_priv) { > + int err; > + > + rtnl_lock(); > + err = __tc_setup_cb_egdev_register(dev, cb, cb_priv); > + rtnl_unlock(); > + return err; > +} > +EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_register); > + > +static void __tc_setup_cb_egdev_unregister(const struct net_device *dev, > + tc_setup_cb_t *cb, void *cb_priv) { > + struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev); > + > + if (WARN_ON(!egdev)) > + return; > + tcf_action_egdev_cb_del(egdev, cb, cb_priv); > + tcf_action_egdev_put(egdev); > +} > +void tc_setup_cb_egdev_unregister(const struct net_device *dev, > + tc_setup_cb_t *cb, void *cb_priv) { > + rtnl_lock(); > + __tc_setup_cb_egdev_unregister(dev, cb, cb_priv); > + rtnl_unlock(); > +} > +EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_unregister); > + > +int tc_setup_cb_egdev_call(const struct net_device *dev, > + enum tc_setup_type type, void *type_data, > + bool err_stop) > +{ > + struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev); > + > + if (!egdev) > + return 0; > + return tcf_action_egdev_cb_call(egdev, type, type_data, err_stop); } > +EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_call); > + > +static __net_init int tcf_action_net_init(struct net *net) { > + struct tcf_action_net *tan = net_generic(net, tcf_action_net_id); > + > + return rhashtable_init(&tan->egdev_ht, &tcf_action_egdev_ht_params); > } > + > +static void __net_exit tcf_action_net_exit(struct net *net) { > + struct tcf_action_net *tan = net_generic(net, tcf_action_net_id); > + > + rhashtable_destroy(&tan->egdev_ht); > +} > + > +static struct pernet_operations tcf_action_net_ops = { > + .init = tcf_action_net_init, > + .exit = tcf_action_net_exit, > + .id = &tcf_action_net_id, > + .size = sizeof(struct tcf_action_net), }; > + > static int __init tc_action_init(void) > { > + int err; > + > + err = register_pernet_subsys(&tcf_action_net_ops); > + if (err) > + return err; > + > rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0); > rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0); > rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, > tc_dump_action, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index > 450873b..99f9432 100644 > --- a/net/sched/cls_api.c > +++ b/net/sched/cls_api.c > @@ -1026,6 +1026,36 @@ int tcf_exts_get_dev(struct net_device *dev, struct > tcf_exts *exts, } EXPORT_SYMBOL(tcf_exts_get_dev); > > +int tcf_exts_egdev_cb_call(struct tcf_exts *exts, enum tc_setup_type type, > + void *type_data, bool err_stop) > +{ > + int ok_count = 0; > +#ifdef CONFIG_NET_CLS_ACT > + const struct tc_action *a; > + struct net_device *dev; > + LIST_HEAD(actions); > + int ret; > + > + if (!tcf_exts_has_actions(exts)) > + return 0; > + > + tcf_exts_to_list(exts, &actions); > + list_for_each_entry(a, &actions, list) { > + if (!a->ops->get_dev) > + continue; > + dev = a->ops->get_dev(a); > + if (!dev || !tc_can_offload(dev)) > + continue; > + ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop); > + if (ret < 0) > + return ret; > + ok_count += ret; > + } > +#endif > + return ok_count; > +} > +EXPORT_SYMBOL(tcf_exts_egdev_cb_call); > + > static int __init tc_filter_init(void) > { > rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0); > -- > 2.9.5
Hi Jiri, I am just trying to understand this a bit specially in context of offloaded macvlan devices. I am assuming that any action device [fox example - offloaded macvlan device] can register for tc callback function using " tc_setup_cb_egdev_register()" It was hard for me to call this above API from offloaded macvlan flow [.ndo_dfwd_add_station] in PF driver, since it already grabs rtnl_lock(), so I tried to register it from different context in PF driver somehow for the offloaded macvlan device. Now, I didn't understand actually the usage of egdev callback registration here - Consider this example - tc qdisc add dev <pf_device> ingress tc filter add dev <pf_device> prio 1 protocol ip parent ffff: flower skip_sw dst_ip 192.168.55.55/24 action mirred egress redirect dev mvlan_0 The above filter add command is received by the PF registered callback [which was supplied by tcf_block_cb_register()], the same command is also received by the callback function which was supplied in tc_setup_cb_egdev_register(). What is the significance of getting two callbacks here ? Since, I can add the above said filter in first callback function even. What am I supposed to do in second callback ? is there any special motivation for egdev callback registration ? Thanks, Manish