devlink device reload is a special operation which brings down and up
the device. Such operation will unregister devlink device of sub
function port.
During devlink_reload() with devlink_mutex held leads to cyclic
dependency. For example,

devlink_reload()
  mutex_lock(&devlink_mutex); <- First lock acquire
  mlx5_reload_down(PCI PF device)
    disable_sf_devices();
      sf_state_set(inactive);
        ancillary_dev->remove();
           mlx5_adev_remove(adev);
             devlink_unregister(adev->devlink_instance);
               mutex_lock(&devlink_mutex); <- Second lock acquire

Hence devlink_reload() operation cannot be done under global
devlink_mutex mutex.

In second such instance reload_down() callback likely to disable reload
on child devlink device. This also prevents devlink_reload() to use
the overloaded global devlink_mutex.

devlink_reload()
  mutex_lock(&devlink_mutex); <- First lock acquire
    mlx5_reload_down(PCI PF device)
      disable_sf_devices();
        ancillary_dev->remove();
           mlx5_adev_remove(adev);
             devlink_reload_disable(adev->devlink_instance);
               mutex_lock(&devlink_mutex); <- Second lock acquire

Therefore, introduce a reload_lock per devlink instance which is held
when performing devlink device reload.

Signed-off-by: Parav Pandit <pa...@nvidia.com>
Reviewed-by: Jiri Pirko <j...@nvidia.com>
---
 include/net/devlink.h |  1 +
 net/core/devlink.c    | 25 +++++++++++++++----------
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 124bac130c22..ef487b8ed17b 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -52,6 +52,7 @@ struct devlink {
        struct mutex lock; /* Serializes access to devlink instance specific 
objects such as
                            * port, sb, dpipe, resource, params, region, traps 
and more.
                            */
+       struct mutex reload_lock; /* Protects reload operation */
        u8 reload_failed:1,
           reload_enabled:1,
           registered:1;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 3e59ba73d5c4..c7c6f274d392 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3307,29 +3307,32 @@ static int devlink_reload(struct devlink *devlink, 
struct net *dest_net,
        u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
        int err;
 
-       if (!devlink->reload_enabled)
-               return -EOPNOTSUPP;
+       mutex_lock(&devlink->reload_lock);
+       if (!devlink->reload_enabled) {
+               err = -EOPNOTSUPP;
+               goto done;
+       }
 
        memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
               sizeof(remote_reload_stats));
        err = devlink->ops->reload_down(devlink, !!dest_net, action, limit, 
extack);
        if (err)
-               return err;
+               goto done;
 
        if (dest_net && !net_eq(dest_net, devlink_net(devlink)))
                devlink_reload_netns_change(devlink, dest_net);
 
        err = devlink->ops->reload_up(devlink, action, limit, 
actions_performed, extack);
        devlink_reload_failed_set(devlink, !!err);
-       if (err)
-               return err;
 
        WARN_ON(!(*actions_performed & BIT(action)));
        /* Catch driver on updating the remote action within devlink reload */
        WARN_ON(memcmp(remote_reload_stats, devlink->stats.remote_reload_stats,
                       sizeof(remote_reload_stats)));
        devlink_reload_stats_update(devlink, limit, *actions_performed);
-       return 0;
+done:
+       mutex_unlock(&devlink->reload_lock);
+       return err;
 }
 
 static int
@@ -8118,6 +8121,7 @@ struct devlink *devlink_alloc(const struct devlink_ops 
*ops, size_t priv_size)
        INIT_LIST_HEAD(&devlink->trap_policer_list);
        mutex_init(&devlink->lock);
        mutex_init(&devlink->reporters_lock);
+       mutex_init(&devlink->reload_lock);
        return devlink;
 }
 EXPORT_SYMBOL_GPL(devlink_alloc);
@@ -8166,9 +8170,9 @@ EXPORT_SYMBOL_GPL(devlink_unregister);
  */
 void devlink_reload_enable(struct devlink *devlink)
 {
-       mutex_lock(&devlink_mutex);
+       mutex_lock(&devlink->reload_lock);
        devlink->reload_enabled = true;
-       mutex_unlock(&devlink_mutex);
+       mutex_unlock(&devlink->reload_lock);
 }
 EXPORT_SYMBOL_GPL(devlink_reload_enable);
 
@@ -8182,12 +8186,12 @@ EXPORT_SYMBOL_GPL(devlink_reload_enable);
  */
 void devlink_reload_disable(struct devlink *devlink)
 {
-       mutex_lock(&devlink_mutex);
+       mutex_lock(&devlink->reload_lock);
        /* Mutex is taken which ensures that no reload operation is in
         * progress while setting up forbidded flag.
         */
        devlink->reload_enabled = false;
-       mutex_unlock(&devlink_mutex);
+       mutex_unlock(&devlink->reload_lock);
 }
 EXPORT_SYMBOL_GPL(devlink_reload_disable);
 
@@ -8198,6 +8202,7 @@ EXPORT_SYMBOL_GPL(devlink_reload_disable);
  */
 void devlink_free(struct devlink *devlink)
 {
+       mutex_destroy(&devlink->reload_lock);
        mutex_destroy(&devlink->reporters_lock);
        mutex_destroy(&devlink->lock);
        WARN_ON(!list_empty(&devlink->trap_policer_list));
-- 
2.26.2

Reply via email to