The net failover should be a simple library, not a virtual object with function callbacks (see callback hell). The code is simpler is smaller both for the netvsc and virtio use case.
The code is restructured in many ways. I should have given these as review comments to net_failover during review but did not want to overwhelm the original submitter. Therefore it was merged prematurely. Some of the many items changed are: * The support routines should just be selected as needed in kernel config, no need for them to be visible config items. * Both netvsc and net_failover should keep their list of their own devices. Not a common list. * The matching of secondary device to primary device policy is up to the network device. Both net_failover and netvsc will use MAC for now but can change separately. * The match policy is only used during initial discovery; after that the secondary device knows what the upper device is because of the parent/child relationship; no searching is required. * Now, netvsc and net_failover use the same delayed work type mechanism for setup. Previously, net_failover code was triggering off name change but a similar policy was rejected for netvsc. "what is good for the goose is good for the gander" * The net_failover private device info 'struct net_failover_info' should have been private to the driver file, not a visible API. * The net_failover device should use SET_NETDEV_DEV that is intended only for physical devices not virtual devices. * No point in having DocBook style comments on a driver file. They only make sense on an external exposed API. * net_failover only supports Ethernet, so use ether_addr_copy. * Set permanent and current address of net_failover device to match the primary. * Carrier should be marked off before registering device the net_failover device. * Use netdev_XXX for log messages, in net_failover (not dev_xxx) * Since failover infrastructure is about linking devices just use RTNL no need for other locking in init and teardown. * Don't bother with ERR_PTR() style return if only possible return is success or no memory. * As much as possible, the terms master and slave should be avoided because of their cultural connotations. Note; this code has been tested on Hyper-V but is compile tested only on virtio. Fixes: 30c8bd5aa8b2 ("net: Introduce generic failover module") Signed-off-by: Stephen Hemminger <sthem...@microsoft.com> --- Although this patch needs to go into 4.18 (linux-net), this version is based against net-next because net-next hasn't been merged into linux-net yet. drivers/net/hyperv/hyperv_net.h | 3 +- drivers/net/hyperv/netvsc_drv.c | 173 +++++++++++------ drivers/net/net_failover.c | 312 ++++++++++++++++++++----------- drivers/net/virtio_net.c | 9 +- include/net/failover.h | 31 +--- include/net/net_failover.h | 32 +--- net/Kconfig | 13 +- net/core/failover.c | 316 ++++---------------------------- 8 files changed, 373 insertions(+), 516 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 99d8e7398a5b..c7d25d10765e 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -902,6 +902,8 @@ struct net_device_context { struct hv_device *device_ctx; /* netvsc_device */ struct netvsc_device __rcu *nvdev; + /* list of netvsc net_devices */ + struct list_head list; /* reconfigure work */ struct delayed_work dwork; /* last reconfig time */ @@ -933,7 +935,6 @@ struct net_device_context { /* Serial number of the VF to team with */ u32 vf_serial; - struct failover *failover; }; /* Per channel data */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index bef4d55a108c..074e6b8578df 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -70,6 +70,8 @@ static int debug = -1; module_param(debug, int, 0444); MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); +static LIST_HEAD(netvsc_dev_list); + static void netvsc_change_rx_flags(struct net_device *net, int change) { struct net_device_context *ndev_ctx = netdev_priv(net); @@ -1846,101 +1848,120 @@ static void netvsc_vf_setup(struct work_struct *w) } vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev); - if (vf_netdev) + if (vf_netdev) { __netvsc_vf_setup(ndev, vf_netdev); - + dev_put(vf_netdev); + } rtnl_unlock(); } -static int netvsc_pre_register_vf(struct net_device *vf_netdev, - struct net_device *ndev) +static struct net_device *get_netvsc_bymac(const u8 *mac) { - struct net_device_context *net_device_ctx; - struct netvsc_device *netvsc_dev; + struct net_device_context *ndev_ctx; - net_device_ctx = netdev_priv(ndev); - netvsc_dev = rtnl_dereference(net_device_ctx->nvdev); - if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev)) - return -ENODEV; + ASSERT_RTNL(); - return 0; + list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) { + struct net_device *dev = hv_get_drvdata(ndev_ctx->device_ctx); + + if (ether_addr_equal(mac, dev->perm_addr)) + return dev; + } + + return NULL; } -static int netvsc_register_vf(struct net_device *vf_netdev, - struct net_device *ndev) +static int netvsc_register_vf(struct net_device *vf_netdev) { - struct net_device_context *ndev_ctx = netdev_priv(ndev); + struct net_device *ndev; + struct net_device_context *ndev_ctx; + + /* Must use Ethernet addresses */ + if (vf_netdev->addr_len != ETH_ALEN) + return NOTIFY_DONE; + + /* VF must be a physical device not VLAN, etc */ + if (!vf_netdev->dev.parent) + return NOTIFY_DONE; + + /* Use the MAC address to locate the synthetic interface to + * associate with the VF interface. + */ + ndev = get_netvsc_bymac(vf_netdev->perm_addr); + if (!ndev) + return NOTIFY_DONE; + + /* If network device is being removed, don't do anything */ + ndev_ctx = netdev_priv(ndev); + if (!rtnl_dereference(ndev_ctx->nvdev)) + return NOTIFY_DONE; + + if (netdev_failover_join(vf_netdev, ndev, netvsc_vf_handle_frame)) { + netdev_err(vf_netdev, "could not join: %s", ndev->name); + return NOTIFY_DONE; + } /* set slave flag before open to prevent IPv6 addrconf */ vf_netdev->flags |= IFF_SLAVE; + dev_hold(vf_netdev); + schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); call_netdevice_notifiers(NETDEV_JOIN, vf_netdev); netdev_info(vf_netdev, "joined to %s\n", ndev->name); - dev_hold(vf_netdev); rcu_assign_pointer(ndev_ctx->vf_netdev, vf_netdev); - return 0; + return NOTIFY_OK; } /* VF up/down change detected, schedule to change data path */ -static int netvsc_vf_changed(struct net_device *vf_netdev, - struct net_device *ndev) +static int netvsc_vf_changed(struct net_device *vf_netdev) { struct net_device_context *net_device_ctx; struct netvsc_device *netvsc_dev; + struct net_device *ndev; bool vf_is_up = netif_running(vf_netdev); + ndev = netdev_failover_upper_get(vf_netdev); + if (!ndev) + return NOTIFY_DONE; + net_device_ctx = netdev_priv(ndev); netvsc_dev = rtnl_dereference(net_device_ctx->nvdev); if (!netvsc_dev) - return -ENODEV; + return NOTIFY_DONE; netvsc_switch_datapath(ndev, vf_is_up); netdev_info(ndev, "Data path switched %s VF: %s\n", vf_is_up ? "to" : "from", vf_netdev->name); - return 0; + return NOTIFY_OK; } -static int netvsc_pre_unregister_vf(struct net_device *vf_netdev, - struct net_device *ndev) +static int netvsc_unregister_vf(struct net_device *vf_netdev) { struct net_device_context *net_device_ctx; + struct net_device *ndev; - net_device_ctx = netdev_priv(ndev); - cancel_delayed_work_sync(&net_device_ctx->vf_takeover); - - return 0; -} - -static int netvsc_unregister_vf(struct net_device *vf_netdev, - struct net_device *ndev) -{ - struct net_device_context *net_device_ctx; + ndev = netdev_failover_upper_get(vf_netdev); + if (!ndev) + return NOTIFY_DONE; net_device_ctx = netdev_priv(ndev); + if (cancel_delayed_work_sync(&net_device_ctx->vf_takeover)) + dev_put(vf_netdev); netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); + netdev_failover_unjoin(vf_netdev, ndev); RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL); - dev_put(vf_netdev); - return 0; + return NOTIFY_OK; } -static struct failover_ops netvsc_failover_ops = { - .slave_pre_register = netvsc_pre_register_vf, - .slave_register = netvsc_register_vf, - .slave_pre_unregister = netvsc_pre_unregister_vf, - .slave_unregister = netvsc_unregister_vf, - .slave_link_change = netvsc_vf_changed, - .slave_handle_frame = netvsc_vf_handle_frame, -}; - static int netvsc_probe(struct hv_device *dev, const struct hv_vmbus_device_id *dev_id) { @@ -2009,6 +2030,8 @@ static int netvsc_probe(struct hv_device *dev, memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); + net->priv_flags |= IFF_FAILOVER; + /* hw_features computed in rndis_netdev_set_hwcaps() */ net->features = net->hw_features | NETIF_F_HIGHDMA | NETIF_F_SG | @@ -2024,23 +2047,19 @@ static int netvsc_probe(struct hv_device *dev, else net->max_mtu = ETH_DATA_LEN; - ret = register_netdev(net); + rtnl_lock(); + ret = register_netdevice(net); if (ret != 0) { pr_err("Unable to register netdev.\n"); goto register_failed; } - net_device_ctx->failover = failover_register(net, &netvsc_failover_ops); - if (IS_ERR(net_device_ctx->failover)) { - ret = PTR_ERR(net_device_ctx->failover); - goto err_failover; - } - - return ret; + list_add(&net_device_ctx->list, &netvsc_dev_list); + rtnl_unlock(); + return 0; -err_failover: - unregister_netdev(net); register_failed: + rtnl_unlock(); rndis_filter_device_remove(dev, nvdev); rndis_failed: free_percpu(net_device_ctx->vf_stats); @@ -2079,15 +2098,17 @@ static int netvsc_remove(struct hv_device *dev) */ rtnl_lock(); vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev); - if (vf_netdev) - failover_slave_unregister(vf_netdev); + if (vf_netdev) { + netdev_failover_unjoin(vf_netdev, net); + dev_put(vf_netdev); + } if (nvdev) rndis_filter_device_remove(dev, nvdev); unregister_netdevice(net); - failover_unregister(ndev_ctx->failover); + list_del(&ndev_ctx->list); rtnl_unlock(); rcu_read_unlock(); @@ -2115,8 +2136,47 @@ static struct hv_driver netvsc_drv = { .remove = netvsc_remove, }; +/* On Hyper-V, every VF interface is matched with a corresponding + * synthetic interface. The synthetic interface is presented first + * to the guest. When the corresponding VF instance is registered, + * we will take care of switching the data path. + */ +static int netvsc_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + + /* Skip parent events */ + if (netif_is_failover(event_dev)) + return NOTIFY_DONE; + + /* Avoid non-Ethernet type devices */ + if (event_dev->type != ARPHRD_ETHER) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_REGISTER: + return netvsc_register_vf(event_dev); + + case NETDEV_UNREGISTER: + return netvsc_unregister_vf(event_dev); + + case NETDEV_UP: + case NETDEV_DOWN: + return netvsc_vf_changed(event_dev); + + default: + return NOTIFY_DONE; + } +} + +static struct notifier_block netvsc_netdev_notifier = { + .notifier_call = netvsc_netdev_event, +}; + static void __exit netvsc_drv_exit(void) { + unregister_netdevice_notifier(&netvsc_netdev_notifier); vmbus_driver_unregister(&netvsc_drv); } @@ -2136,6 +2196,7 @@ static int __init netvsc_drv_init(void) if (ret) return ret; + register_netdevice_notifier(&netvsc_netdev_notifier); return 0; } diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c index 83f7420ddea5..e0d30527f748 100644 --- a/drivers/net/net_failover.c +++ b/drivers/net/net_failover.c @@ -28,6 +28,46 @@ #include <uapi/linux/if_arp.h> #include <net/net_failover.h> +static LIST_HEAD(net_failover_list); + +/* failover state */ +struct net_failover_info { + struct net_device *failover_dev; + + /* list of failover virtual devices */ + struct list_head list; + + /* primary netdev with same MAC */ + struct net_device __rcu *primary_dev; + + /* standby netdev */ + struct net_device __rcu *standby_dev; + + /* primary netdev stats */ + struct rtnl_link_stats64 primary_stats; + + /* standby netdev stats */ + struct rtnl_link_stats64 standby_stats; + + /* aggregated stats */ + struct rtnl_link_stats64 failover_stats; + + /* spinlock while updating stats */ + spinlock_t stats_lock; + + /* delayed setup of slave */ + struct delayed_work standby_init; +}; + +#define FAILOVER_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ + NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \ + NETIF_F_HIGHDMA | NETIF_F_LRO) + +#define FAILOVER_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ + NETIF_F_RXCSUM | NETIF_F_ALL_TSO) + +#define FAILOVER_SETUP_INTERVAL (HZ / 10) + static bool net_failover_xmit_ready(struct net_device *dev) { return netif_running(dev) && netif_carrier_ok(dev); @@ -460,22 +500,42 @@ static void net_failover_lower_state_changed(struct net_device *slave_dev, netdev_lower_state_changed(slave_dev, &info); } -static int net_failover_slave_pre_register(struct net_device *slave_dev, - struct net_device *failover_dev) +static struct net_device *get_net_failover_bymac(const u8 *mac) { - struct net_device *standby_dev, *primary_dev; + struct net_failover_info *nfo_info; + + ASSERT_RTNL(); + + list_for_each_entry(nfo_info, &net_failover_list, list) { + struct net_device *failover_dev = nfo_info->failover_dev; + + if (ether_addr_equal(mac, failover_dev->perm_addr)) + return failover_dev; + } + + return NULL; +} + +static int net_failover_register_event(struct net_device *slave_dev) +{ + struct net_device *failover_dev, *standby_dev, *primary_dev; struct net_failover_info *nfo_info; bool slave_is_standby; + failover_dev = get_net_failover_bymac(slave_dev->perm_addr); + if (!failover_dev) + return NOTIFY_DONE; + nfo_info = netdev_priv(failover_dev); standby_dev = rtnl_dereference(nfo_info->standby_dev); primary_dev = rtnl_dereference(nfo_info->primary_dev); slave_is_standby = slave_dev->dev.parent == failover_dev->dev.parent; if (slave_is_standby ? standby_dev : primary_dev) { - netdev_err(failover_dev, "%s attempting to register as slave dev when %s already present\n", + netdev_err(failover_dev, + "%s attempting to register as slave dev when %s already present\n", slave_dev->name, slave_is_standby ? "standby" : "primary"); - return -EINVAL; + return NOTIFY_DONE; } /* We want to allow only a direct attached VF device as a primary @@ -484,23 +544,33 @@ static int net_failover_slave_pre_register(struct net_device *slave_dev, */ if (!slave_is_standby && (!slave_dev->dev.parent || !dev_is_pci(slave_dev->dev.parent))) - return -EINVAL; + return NOTIFY_DONE; if (failover_dev->features & NETIF_F_VLAN_CHALLENGED && vlan_uses_dev(failover_dev)) { - netdev_err(failover_dev, "Device %s is VLAN challenged and failover device has VLAN set up\n", + netdev_err(failover_dev, + "Device %s is VLAN challenged and failover device has VLAN set up\n", failover_dev->name); - return -EINVAL; + return NOTIFY_DONE; } - return 0; + if (netdev_failover_join(slave_dev, failover_dev, + net_failover_handle_frame)) { + netdev_err(failover_dev, "could not join: %s", slave_dev->name); + return NOTIFY_DONE; + } + + /* Trigger rest of setup in process context */ + schedule_delayed_work(&nfo_info->standby_init, FAILOVER_SETUP_INTERVAL); + + return NOTIFY_OK; } -static int net_failover_slave_register(struct net_device *slave_dev, - struct net_device *failover_dev) +static void __net_failover_setup(struct net_device *failover_dev) { + struct net_failover_info *nfo_info = netdev_priv(failover_dev); + struct net_device *slave_dev = rtnl_dereference(nfo_info->standby_dev); struct net_device *standby_dev, *primary_dev; - struct net_failover_info *nfo_info; bool slave_is_standby; u32 orig_mtu; int err; @@ -509,13 +579,12 @@ static int net_failover_slave_register(struct net_device *slave_dev, orig_mtu = slave_dev->mtu; err = dev_set_mtu(slave_dev, failover_dev->mtu); if (err) { - netdev_err(failover_dev, "unable to change mtu of %s to %u register failed\n", + netdev_err(failover_dev, + "unable to change mtu of %s to %u register failed\n", slave_dev->name, failover_dev->mtu); goto done; } - dev_hold(slave_dev); - if (netif_running(failover_dev)) { err = dev_open(slave_dev); if (err && (err != -EBUSY)) { @@ -537,7 +606,6 @@ static int net_failover_slave_register(struct net_device *slave_dev, goto err_vlan_add; } - nfo_info = netdev_priv(failover_dev); standby_dev = rtnl_dereference(nfo_info->standby_dev); primary_dev = rtnl_dereference(nfo_info->primary_dev); slave_is_standby = slave_dev->dev.parent == failover_dev->dev.parent; @@ -562,52 +630,56 @@ static int net_failover_slave_register(struct net_device *slave_dev, netdev_info(failover_dev, "failover %s slave:%s registered\n", slave_is_standby ? "standby" : "primary", slave_dev->name); - return 0; + return; err_vlan_add: dev_uc_unsync(slave_dev, failover_dev); dev_mc_unsync(slave_dev, failover_dev); dev_close(slave_dev); err_dev_open: - dev_put(slave_dev); dev_set_mtu(slave_dev, orig_mtu); done: - return err; + return; } -static int net_failover_slave_pre_unregister(struct net_device *slave_dev, - struct net_device *failover_dev) +static void net_failover_setup(struct work_struct *w) { - struct net_device *standby_dev, *primary_dev; - struct net_failover_info *nfo_info; + struct net_failover_info *nfo_info + = container_of(w, struct net_failover_info, standby_init.work); + struct net_device *failover_dev = nfo_info->failover_dev; - nfo_info = netdev_priv(failover_dev); - primary_dev = rtnl_dereference(nfo_info->primary_dev); - standby_dev = rtnl_dereference(nfo_info->standby_dev); - - if (slave_dev != primary_dev && slave_dev != standby_dev) - return -ENODEV; + /* handle race with cancel delayed work on removal */ + if (!rtnl_trylock()) { + schedule_delayed_work(&nfo_info->standby_init, 0); + return; + } - return 0; + __net_failover_setup(failover_dev); + rtnl_unlock(); } -static int net_failover_slave_unregister(struct net_device *slave_dev, - struct net_device *failover_dev) +static int net_failover_unregister_event(struct net_device *slave_dev) { - struct net_device *standby_dev, *primary_dev; + struct net_device *failover_dev, *primary_dev, *standby_dev; struct net_failover_info *nfo_info; bool slave_is_standby; + failover_dev = netdev_failover_upper_get(slave_dev); + if (!failover_dev) + return NOTIFY_DONE; + nfo_info = netdev_priv(failover_dev); primary_dev = rtnl_dereference(nfo_info->primary_dev); standby_dev = rtnl_dereference(nfo_info->standby_dev); + if (slave_dev != primary_dev && slave_dev != standby_dev) + return NOTIFY_DONE; + vlan_vids_del_by_dev(slave_dev, failover_dev); dev_uc_unsync(slave_dev, failover_dev); dev_mc_unsync(slave_dev, failover_dev); dev_close(slave_dev); - nfo_info = netdev_priv(failover_dev); dev_get_stats(failover_dev, &nfo_info->failover_stats); slave_is_standby = slave_dev->dev.parent == failover_dev->dev.parent; @@ -628,22 +700,25 @@ static int net_failover_slave_unregister(struct net_device *slave_dev, netdev_info(failover_dev, "failover %s slave:%s unregistered\n", slave_is_standby ? "standby" : "primary", slave_dev->name); - return 0; + return NOTIFY_OK; } -static int net_failover_slave_link_change(struct net_device *slave_dev, - struct net_device *failover_dev) +static int net_failover_link_event(struct net_device *slave_dev) + { - struct net_device *primary_dev, *standby_dev; + struct net_device *failover_dev, *primary_dev, *standby_dev; struct net_failover_info *nfo_info; - nfo_info = netdev_priv(failover_dev); + failover_dev = netdev_failover_upper_get(slave_dev); + if (!failover_dev) + return NOTIFY_DONE; + nfo_info = netdev_priv(failover_dev); primary_dev = rtnl_dereference(nfo_info->primary_dev); standby_dev = rtnl_dereference(nfo_info->standby_dev); if (slave_dev != primary_dev && slave_dev != standby_dev) - return -ENODEV; + return NOTIFY_DONE; if ((primary_dev && net_failover_xmit_ready(primary_dev)) || (standby_dev && net_failover_xmit_ready(standby_dev))) { @@ -657,43 +732,11 @@ static int net_failover_slave_link_change(struct net_device *slave_dev, net_failover_lower_state_changed(slave_dev, primary_dev, standby_dev); - return 0; + return NOTIFY_DONE; } -static int net_failover_slave_name_change(struct net_device *slave_dev, - struct net_device *failover_dev) -{ - struct net_device *primary_dev, *standby_dev; - struct net_failover_info *nfo_info; - - nfo_info = netdev_priv(failover_dev); - - primary_dev = rtnl_dereference(nfo_info->primary_dev); - standby_dev = rtnl_dereference(nfo_info->standby_dev); - - if (slave_dev != primary_dev && slave_dev != standby_dev) - return -ENODEV; - - /* We need to bring up the slave after the rename by udev in case - * open failed with EBUSY when it was registered. - */ - dev_open(slave_dev); - - return 0; -} - -static struct failover_ops net_failover_ops = { - .slave_pre_register = net_failover_slave_pre_register, - .slave_register = net_failover_slave_register, - .slave_pre_unregister = net_failover_slave_pre_unregister, - .slave_unregister = net_failover_slave_unregister, - .slave_link_change = net_failover_slave_link_change, - .slave_name_change = net_failover_slave_name_change, - .slave_handle_frame = net_failover_handle_frame, -}; - /** - * net_failover_create - Create and register a failover instance + * net_failover_create - Create and register a failover device * * @dev: standby netdev * @@ -703,13 +746,12 @@ static struct failover_ops net_failover_ops = { * the original standby netdev and a VF netdev with the same MAC gets * registered as primary netdev. * - * Return: pointer to failover instance + * Return: pointer to failover network device */ -struct failover *net_failover_create(struct net_device *standby_dev) +struct net_device *net_failover_create(struct net_device *standby_dev) { - struct device *dev = standby_dev->dev.parent; + struct net_failover_info *nfo_info; struct net_device *failover_dev; - struct failover *failover; int err; /* Alloc at least 2 queues, for now we are going with 16 assuming @@ -717,18 +759,22 @@ struct failover *net_failover_create(struct net_device *standby_dev) */ failover_dev = alloc_etherdev_mq(sizeof(struct net_failover_info), 16); if (!failover_dev) { - dev_err(dev, "Unable to allocate failover_netdev!\n"); - return ERR_PTR(-ENOMEM); + netdev_err(standby_dev, "Unable to allocate failover_netdev!\n"); + return NULL; } + nfo_info = netdev_priv(failover_dev); dev_net_set(failover_dev, dev_net(standby_dev)); - SET_NETDEV_DEV(failover_dev, dev); + nfo_info->failover_dev = failover_dev; + INIT_DELAYED_WORK(&nfo_info->standby_init, net_failover_setup); failover_dev->netdev_ops = &failover_dev_ops; failover_dev->ethtool_ops = &failover_ethtool_ops; /* Initialize the device options */ - failover_dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE; + failover_dev->priv_flags |= IFF_UNICAST_FLT | + IFF_NO_QUEUE | + IFF_FAILOVER; failover_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); @@ -746,29 +792,38 @@ struct failover *net_failover_create(struct net_device *standby_dev) failover_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL; failover_dev->features |= failover_dev->hw_features; - memcpy(failover_dev->dev_addr, standby_dev->dev_addr, - failover_dev->addr_len); + ether_addr_copy(failover_dev->dev_addr, standby_dev->dev_addr); + ether_addr_copy(failover_dev->perm_addr, standby_dev->perm_addr); failover_dev->min_mtu = standby_dev->min_mtu; failover_dev->max_mtu = standby_dev->max_mtu; - err = register_netdev(failover_dev); + netif_carrier_off(failover_dev); + + rtnl_lock(); + err = register_netdevice(failover_dev); if (err) { - dev_err(dev, "Unable to register failover_dev!\n"); + netdev_err(standby_dev, "Unable to register failover_dev!\n"); goto err_register_netdev; } - netif_carrier_off(failover_dev); + err = netdev_failover_join(standby_dev, failover_dev, + net_failover_handle_frame); + if (err) { + netdev_err(failover_dev, "Unable to join with %s\n", + standby_dev->name); + goto err_failover_join; + } - failover = failover_register(failover_dev, &net_failover_ops); - if (IS_ERR(failover)) - goto err_failover_register; + list_add(&nfo_info->list, &net_failover_list); + rtnl_unlock(); - return failover; + return failover_dev; -err_failover_register: - unregister_netdev(failover_dev); +err_failover_join: + unregister_netdevice(failover_dev); err_register_netdev: + rtnl_unlock(); free_netdev(failover_dev); return ERR_PTR(err); @@ -786,31 +841,27 @@ EXPORT_SYMBOL_GPL(net_failover_create); * netdev. Used by paravirtual drivers that use 3-netdev model. * */ -void net_failover_destroy(struct failover *failover) +void net_failover_destroy(struct net_device *failover_dev) { - struct net_failover_info *nfo_info; - struct net_device *failover_dev; + struct net_failover_info *nfo_info = netdev_priv(failover_dev); struct net_device *slave_dev; - if (!failover) - return; - - failover_dev = rcu_dereference(failover->failover_dev); - nfo_info = netdev_priv(failover_dev); - netif_device_detach(failover_dev); rtnl_lock(); - slave_dev = rtnl_dereference(nfo_info->primary_dev); - if (slave_dev) - failover_slave_unregister(slave_dev); + if (slave_dev) { + netdev_failover_unjoin(slave_dev, failover_dev); + dev_put(slave_dev); + } slave_dev = rtnl_dereference(nfo_info->standby_dev); - if (slave_dev) - failover_slave_unregister(slave_dev); + if (slave_dev) { + netdev_failover_unjoin(slave_dev, failover_dev); + dev_put(slave_dev); + } - failover_unregister(failover); + list_del(&nfo_info->list); unregister_netdevice(failover_dev); @@ -820,9 +871,53 @@ void net_failover_destroy(struct failover *failover) } EXPORT_SYMBOL_GPL(net_failover_destroy); +static int net_failover_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + + /* Skip parent events */ + if (netif_is_failover(event_dev)) + return NOTIFY_DONE; + + /* Avoid non-Ethernet type devices */ + if (event_dev->type != ARPHRD_ETHER) + return NOTIFY_DONE; + + /* Avoid Vlan dev with same MAC registering as VF */ + if (is_vlan_dev(event_dev)) + return NOTIFY_DONE; + + /* Avoid Bonding master dev with same MAC registering as VF */ + if ((event_dev->priv_flags & IFF_BONDING) && + (event_dev->flags & IFF_MASTER)) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_REGISTER: + return net_failover_register_event(event_dev); + + case NETDEV_UNREGISTER: + return net_failover_unregister_event(event_dev); + + case NETDEV_UP: + case NETDEV_DOWN: + case NETDEV_CHANGE: + return net_failover_link_event(event_dev); + + default: + return NOTIFY_DONE; + } +} + +static struct notifier_block net_failover_notifier = { + .notifier_call = net_failover_event, +}; + static __init int net_failover_init(void) { + register_netdevice_notifier(&net_failover_notifier); return 0; } module_init(net_failover_init); @@ -830,6 +925,7 @@ module_init(net_failover_init); static __exit void net_failover_exit(void) { + unregister_netdevice_notifier(&net_failover_notifier); } module_exit(net_failover_exit); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 6d710b8b41c5..b40ae28dac93 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -215,7 +215,7 @@ struct virtnet_info { unsigned long guest_offloads; /* failover when STANDBY feature enabled */ - struct failover *failover; + struct net_device *failover; }; struct padded_vnet_hdr { @@ -2930,11 +2930,10 @@ static int virtnet_probe(struct virtio_device *vdev) virtnet_init_settings(dev); if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { - vi->failover = net_failover_create(vi->dev); - if (IS_ERR(vi->failover)) { - err = PTR_ERR(vi->failover); + err = -ENOMEM; + vi->failover = net_failover_create(dev); + if (!vi->failover) goto free_vqs; - } } err = register_netdev(dev); diff --git a/include/net/failover.h b/include/net/failover.h index bb15438f39c7..22d6c1369101 100644 --- a/include/net/failover.h +++ b/include/net/failover.h @@ -6,31 +6,10 @@ #include <linux/netdevice.h> -struct failover_ops { - int (*slave_pre_register)(struct net_device *slave_dev, - struct net_device *failover_dev); - int (*slave_register)(struct net_device *slave_dev, - struct net_device *failover_dev); - int (*slave_pre_unregister)(struct net_device *slave_dev, - struct net_device *failover_dev); - int (*slave_unregister)(struct net_device *slave_dev, - struct net_device *failover_dev); - int (*slave_link_change)(struct net_device *slave_dev, - struct net_device *failover_dev); - int (*slave_name_change)(struct net_device *slave_dev, - struct net_device *failover_dev); - rx_handler_result_t (*slave_handle_frame)(struct sk_buff **pskb); -}; - -struct failover { - struct list_head list; - struct net_device __rcu *failover_dev; - struct failover_ops __rcu *ops; -}; - -struct failover *failover_register(struct net_device *dev, - struct failover_ops *ops); -void failover_unregister(struct failover *failover); -int failover_slave_unregister(struct net_device *slave_dev); +int netdev_failover_join(struct net_device *lower, struct net_device *upper, + rx_handler_func_t *rx_handler); +struct net_device *netdev_failover_upper_get(struct net_device *lower); +void netdev_failover_unjoin(struct net_device *lower, + struct net_device *upper); #endif /* _FAILOVER_H */ diff --git a/include/net/net_failover.h b/include/net/net_failover.h index b12a1c469d1c..a99b3b00b4e3 100644 --- a/include/net/net_failover.h +++ b/include/net/net_failover.h @@ -6,35 +6,7 @@ #include <net/failover.h> -/* failover state */ -struct net_failover_info { - /* primary netdev with same MAC */ - struct net_device __rcu *primary_dev; - - /* standby netdev */ - struct net_device __rcu *standby_dev; - - /* primary netdev stats */ - struct rtnl_link_stats64 primary_stats; - - /* standby netdev stats */ - struct rtnl_link_stats64 standby_stats; - - /* aggregated stats */ - struct rtnl_link_stats64 failover_stats; - - /* spinlock while updating stats */ - spinlock_t stats_lock; -}; - -struct failover *net_failover_create(struct net_device *standby_dev); -void net_failover_destroy(struct failover *failover); - -#define FAILOVER_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ - NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \ - NETIF_F_HIGHDMA | NETIF_F_LRO) - -#define FAILOVER_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ - NETIF_F_RXCSUM | NETIF_F_ALL_TSO) +struct net_device *net_failover_create(struct net_device *standby_dev); +void net_failover_destroy(struct net_device *failover_dev); #endif /* _NET_FAILOVER_H */ diff --git a/net/Kconfig b/net/Kconfig index f738a6f27665..697d84202695 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -433,17 +433,8 @@ config PAGE_POOL bool config FAILOVER - tristate "Generic failover module" - help - The failover module provides a generic interface for paravirtual - drivers to register a netdev and a set of ops with a failover - instance. The ops are used as event handlers that get called to - handle netdev register/unregister/link change/name change events - on slave pci ethernet devices with the same mac address as the - failover netdev. This enables paravirtual drivers to use a - VF as an accelerated low latency datapath. It also allows live - migration of VMs with direct attached VFs by failing over to the - paravirtual datapath when the VF is unplugged. + bool + default n endif # if NET diff --git a/net/core/failover.c b/net/core/failover.c index 4a92a98ccce9..499f0fd7e4d3 100644 --- a/net/core/failover.c +++ b/net/core/failover.c @@ -1,10 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2018, Intel Corporation. */ -/* A common module to handle registrations and notifications for paravirtual +/* A library for managing chained upper/oower devices such as * drivers to enable accelerated datapath and support VF live migration. - * - * The notifier and event handling code is based on netvsc driver. */ #include <linux/module.h> @@ -14,302 +12,62 @@ #include <linux/if_vlan.h> #include <net/failover.h> -static LIST_HEAD(failover_list); -static DEFINE_SPINLOCK(failover_lock); - -static struct net_device *failover_get_bymac(u8 *mac, struct failover_ops **ops) -{ - struct net_device *failover_dev; - struct failover *failover; - - spin_lock(&failover_lock); - list_for_each_entry(failover, &failover_list, list) { - failover_dev = rtnl_dereference(failover->failover_dev); - if (ether_addr_equal(failover_dev->perm_addr, mac)) { - *ops = rtnl_dereference(failover->ops); - spin_unlock(&failover_lock); - return failover_dev; - } - } - spin_unlock(&failover_lock); - return NULL; -} - -/** - * failover_slave_register - Register a slave netdev - * - * @slave_dev: slave netdev that is being registered - * - * Registers a slave device to a failover instance. Only ethernet devices - * are supported. - */ -static int failover_slave_register(struct net_device *slave_dev) +/* failover_join - Join an lower netdev with an upper device. */ +int netdev_failover_join(struct net_device *lower_dev, + struct net_device *upper_dev, + rx_handler_func_t *rx_handler) { - struct netdev_lag_upper_info lag_upper_info; - struct net_device *failover_dev; - struct failover_ops *fops; int err; - if (slave_dev->type != ARPHRD_ETHER) - goto done; - ASSERT_RTNL(); - failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops); - if (!failover_dev) - goto done; + /* Don't allow joining devices of different protocols */ + if (upper_dev->type != lower_dev->type) + return -EINVAL; - if (fops && fops->slave_pre_register && - fops->slave_pre_register(slave_dev, failover_dev)) - goto done; - - err = netdev_rx_handler_register(slave_dev, fops->slave_handle_frame, - failover_dev); + err = netdev_rx_handler_register(lower_dev, rx_handler, upper_dev); if (err) { - netdev_err(slave_dev, "can not register failover rx handler (err = %d)\n", + netdev_err(lower_dev, + "can not register failover rx handler (err = %d)\n", err); - goto done; + return err; } - lag_upper_info.tx_type = NETDEV_LAG_TX_TYPE_ACTIVEBACKUP; - err = netdev_master_upper_dev_link(slave_dev, failover_dev, NULL, - &lag_upper_info, NULL); + err = netdev_master_upper_dev_link(lower_dev, upper_dev, NULL, + NULL, NULL); if (err) { - netdev_err(slave_dev, "can not set failover device %s (err = %d)\n", - failover_dev->name, err); - goto err_upper_link; + netdev_err(lower_dev, + "can not set failover device %s (err = %d)\n", + upper_dev->name, err); + netdev_rx_handler_unregister(lower_dev); + return err; } - slave_dev->priv_flags |= IFF_FAILOVER_SLAVE; - - if (fops && fops->slave_register && - !fops->slave_register(slave_dev, failover_dev)) - return NOTIFY_OK; - - netdev_upper_dev_unlink(slave_dev, failover_dev); - slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE; -err_upper_link: - netdev_rx_handler_unregister(slave_dev); -done: - return NOTIFY_DONE; -} - -/** - * failover_slave_unregister - Unregister a slave netdev - * - * @slave_dev: slave netdev that is being unregistered - * - * Unregisters a slave device from a failover instance. - */ -int failover_slave_unregister(struct net_device *slave_dev) -{ - struct net_device *failover_dev; - struct failover_ops *fops; - - if (!netif_is_failover_slave(slave_dev)) - goto done; - - ASSERT_RTNL(); - - failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops); - if (!failover_dev) - goto done; - - if (fops && fops->slave_pre_unregister && - fops->slave_pre_unregister(slave_dev, failover_dev)) - goto done; - - netdev_rx_handler_unregister(slave_dev); - netdev_upper_dev_unlink(slave_dev, failover_dev); - slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE; - - if (fops && fops->slave_unregister && - !fops->slave_unregister(slave_dev, failover_dev)) - return NOTIFY_OK; - -done: - return NOTIFY_DONE; + dev_hold(lower_dev); + lower_dev->priv_flags |= IFF_FAILOVER_SLAVE; + return 0; } -EXPORT_SYMBOL_GPL(failover_slave_unregister); +EXPORT_SYMBOL_GPL(netdev_failover_join); -static int failover_slave_link_change(struct net_device *slave_dev) +/* Find upper network device for failover slave device */ +struct net_device *netdev_failover_upper_get(struct net_device *lower_dev) { - struct net_device *failover_dev; - struct failover_ops *fops; - - if (!netif_is_failover_slave(slave_dev)) - goto done; - - ASSERT_RTNL(); - - failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops); - if (!failover_dev) - goto done; - - if (!netif_running(failover_dev)) - goto done; + if (!netif_is_failover_slave(lower_dev)) + return NULL; - if (fops && fops->slave_link_change && - !fops->slave_link_change(slave_dev, failover_dev)) - return NOTIFY_OK; - -done: - return NOTIFY_DONE; + return netdev_master_upper_dev_get(lower_dev); } +EXPORT_SYMBOL_GPL(netdev_failover_upper_get); -static int failover_slave_name_change(struct net_device *slave_dev) +/* failover_unjoin - Break connection between lower and upper device. */ +void netdev_failover_unjoin(struct net_device *lower_dev, + struct net_device *upper_dev) { - struct net_device *failover_dev; - struct failover_ops *fops; - - if (!netif_is_failover_slave(slave_dev)) - goto done; - ASSERT_RTNL(); - failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops); - if (!failover_dev) - goto done; - - if (!netif_running(failover_dev)) - goto done; - - if (fops && fops->slave_name_change && - !fops->slave_name_change(slave_dev, failover_dev)) - return NOTIFY_OK; - -done: - return NOTIFY_DONE; -} - -static int -failover_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); - - /* Skip parent events */ - if (netif_is_failover(event_dev)) - return NOTIFY_DONE; - - switch (event) { - case NETDEV_REGISTER: - return failover_slave_register(event_dev); - case NETDEV_UNREGISTER: - return failover_slave_unregister(event_dev); - case NETDEV_UP: - case NETDEV_DOWN: - case NETDEV_CHANGE: - return failover_slave_link_change(event_dev); - case NETDEV_CHANGENAME: - return failover_slave_name_change(event_dev); - default: - return NOTIFY_DONE; - } -} - -static struct notifier_block failover_notifier = { - .notifier_call = failover_event, -}; - -static void -failover_existing_slave_register(struct net_device *failover_dev) -{ - struct net *net = dev_net(failover_dev); - struct net_device *dev; - - rtnl_lock(); - for_each_netdev(net, dev) { - if (netif_is_failover(dev)) - continue; - if (ether_addr_equal(failover_dev->perm_addr, dev->perm_addr)) - failover_slave_register(dev); - } - rtnl_unlock(); -} - -/** - * failover_register - Register a failover instance - * - * @dev: failover netdev - * @ops: failover ops - * - * Allocate and register a failover instance for a failover netdev. ops - * provides handlers for slave device register/unregister/link change/ - * name change events. - * - * Return: pointer to failover instance - */ -struct failover *failover_register(struct net_device *dev, - struct failover_ops *ops) -{ - struct failover *failover; - - if (dev->type != ARPHRD_ETHER) - return ERR_PTR(-EINVAL); - - failover = kzalloc(sizeof(*failover), GFP_KERNEL); - if (!failover) - return ERR_PTR(-ENOMEM); - - rcu_assign_pointer(failover->ops, ops); - dev_hold(dev); - dev->priv_flags |= IFF_FAILOVER; - rcu_assign_pointer(failover->failover_dev, dev); - - spin_lock(&failover_lock); - list_add_tail(&failover->list, &failover_list); - spin_unlock(&failover_lock); - - netdev_info(dev, "failover master:%s registered\n", dev->name); - - failover_existing_slave_register(dev); - - return failover; -} -EXPORT_SYMBOL_GPL(failover_register); - -/** - * failover_unregister - Unregister a failover instance - * - * @failover: pointer to failover instance - * - * Unregisters and frees a failover instance. - */ -void failover_unregister(struct failover *failover) -{ - struct net_device *failover_dev; - - failover_dev = rcu_dereference(failover->failover_dev); - - netdev_info(failover_dev, "failover master:%s unregistered\n", - failover_dev->name); - - failover_dev->priv_flags &= ~IFF_FAILOVER; - dev_put(failover_dev); - - spin_lock(&failover_lock); - list_del(&failover->list); - spin_unlock(&failover_lock); - - kfree(failover); + netdev_rx_handler_unregister(lower_dev); + netdev_upper_dev_unlink(lower_dev, upper_dev); + dev_put(lower_dev); + lower_dev->priv_flags &= ~IFF_FAILOVER_SLAVE; } -EXPORT_SYMBOL_GPL(failover_unregister); - -static __init int -failover_init(void) -{ - register_netdevice_notifier(&failover_notifier); - - return 0; -} -module_init(failover_init); - -static __exit -void failover_exit(void) -{ - unregister_netdevice_notifier(&failover_notifier); -} -module_exit(failover_exit); - -MODULE_DESCRIPTION("Generic failover infrastructure/interface"); -MODULE_LICENSE("GPL v2"); +EXPORT_SYMBOL_GPL(netdev_failover_unjoin); -- 2.17.1