On Mon, 2018-11-19 at 15:59 -0600, Juliet Kim wrote:
> This patch changes to use rtnl_lock only during a reset to avoid
> deadlock that could occur when a thread operating close is holding
> rtnl_lock and waiting for reset_lock acquired by another thread,
> which is waiting for rtnl_lock in order to set the number of tx/rx
> queues during a reset.
> 
> Also, we now setting the number of tx/rx queues during a soft reset
> for failover or LPM events.
> 
> Signed-off-by: Juliet Kim <juli...@linux.vnet.ibm.com>
> ---
>  drivers/net/ethernet/ibm/ibmvnic.c |   59 
> +++++++++++++-----------------------
>  drivers/net/ethernet/ibm/ibmvnic.h |    2 +
>  2 files changed, 22 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
> b/drivers/net/ethernet/ibm/ibmvnic.c
> index 7893bef..4a5de59 100644
> --- a/drivers/net/ethernet/ibm/ibmvnic.c
> +++ b/drivers/net/ethernet/ibm/ibmvnic.c
> @@ -1103,20 +1103,15 @@ static int ibmvnic_open(struct net_device *netdev)
>               return 0;
>       }
> 
> -     mutex_lock(&adapter->reset_lock);
> -
>       if (adapter->state != VNIC_CLOSED) {
>               rc = ibmvnic_login(netdev);
> -             if (rc) {
> -                     mutex_unlock(&adapter->reset_lock);
> +             if (rc)
>                       return rc;
> -             }
> 
>               rc = init_resources(adapter);
>               if (rc) {
>                       netdev_err(netdev, "failed to initialize resources\n");
>                       release_resources(adapter);
> -                     mutex_unlock(&adapter->reset_lock);
>                       return rc;
>               }
>       }
> @@ -1124,8 +1119,6 @@ static int ibmvnic_open(struct net_device *netdev)
>       rc = __ibmvnic_open(netdev);
>       netif_carrier_on(netdev);
> 
> -     mutex_unlock(&adapter->reset_lock);
> -
>       return rc;
>  }
> 
> @@ -1269,10 +1262,8 @@ static int ibmvnic_close(struct net_device *netdev)
>               return 0;
>       }
> 
> -     mutex_lock(&adapter->reset_lock);
>       rc = __ibmvnic_close(netdev);
>       ibmvnic_cleanup(netdev);
> -     mutex_unlock(&adapter->reset_lock);
> 
>       return rc;
>  }
> @@ -1820,20 +1811,15 @@ static int do_reset(struct ibmvnic_adapter *adapter,
>                               return rc;
>               } else if (adapter->req_rx_queues != old_num_rx_queues ||
>                          adapter->req_tx_queues != old_num_tx_queues) {
> -                     adapter->map_id = 1;
>                       release_rx_pools(adapter);
>                       release_tx_pools(adapter);
> -                     rc = init_rx_pools(netdev);
> -                     if (rc)
> -                             return rc;
> -                     rc = init_tx_pools(netdev);
> -                     if (rc)
> -                             return rc;
> -
>                       release_napi(adapter);
> -                     rc = init_napi(adapter);
> +                     release_vpd_data(adapter);
> +
> +                     rc = init_resources(adapter);
>                       if (rc)
>                               return rc;
> +
>               } else {
>                       rc = reset_tx_pools(adapter);
>                       if (rc)
> @@ -1917,17 +1903,8 @@ static int do_hard_reset(struct ibmvnic_adapter 
> *adapter,
>               adapter->state = VNIC_PROBED;
>               return 0;
>       }
> -     /* netif_set_real_num_xx_queues needs to take rtnl lock here
> -      * unless wait_for_reset is set, in which case the rtnl lock
> -      * has already been taken before initializing the reset
> -      */
> -     if (!adapter->wait_for_reset) {
> -             rtnl_lock();
> -             rc = init_resources(adapter);
> -             rtnl_unlock();
> -     } else {
> -             rc = init_resources(adapter);
> -     }
> +
> +     rc = init_resources(adapter);
>       if (rc)
>               return rc;
> 
> @@ -1986,13 +1963,21 @@ static void __ibmvnic_reset(struct work_struct *work)
>       struct ibmvnic_rwi *rwi;
>       struct ibmvnic_adapter *adapter;
>       struct net_device *netdev;
> +     bool we_lock_rtnl = false;
>       u32 reset_state;
>       int rc = 0;
> 
>       adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
>       netdev = adapter->netdev;
> 
> -     mutex_lock(&adapter->reset_lock);
> +     /* netif_set_real_num_xx_queues needs to take rtnl lock here
> +      * unless wait_for_reset is set, in which case the rtnl lock
> +      * has already been taken before initializing the reset
> +      */
> +     if (!adapter->wait_for_reset) {
> +             rtnl_lock();
> +             we_lock_rtnl = true;
> +     }
>       reset_state = adapter->state;
> 
>       rwi = get_next_rwi(adapter);
> @@ -2020,12 +2005,11 @@ static void __ibmvnic_reset(struct work_struct *work)
>       if (rc) {
>               netdev_dbg(adapter->netdev, "Reset failed\n");
>               free_all_rwi(adapter);
> -             mutex_unlock(&adapter->reset_lock);
> -             return;
>       }
> 
>       adapter->resetting = false;
> -     mutex_unlock(&adapter->reset_lock);
> +     if (we_lock_rtnl)
> +             rtnl_unlock();
>  }
> 
>  static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
> @@ -4768,7 +4752,6 @@ static int ibmvnic_probe(struct vio_dev *dev, const 
> struct vio_device_id *id)
> 
>       INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset);
>       INIT_LIST_HEAD(&adapter->rwi_list);
> -     mutex_init(&adapter->reset_lock);
>       mutex_init(&adapter->rwi_lock);
>       adapter->resetting = false;
> 
> @@ -4840,8 +4823,8 @@ static int ibmvnic_remove(struct vio_dev *dev)
>       struct ibmvnic_adapter *adapter = netdev_priv(netdev);
> 
>       adapter->state = VNIC_REMOVING;
> -     unregister_netdev(netdev);
> -     mutex_lock(&adapter->reset_lock);
> +     rtnl_lock();
> +     unregister_netdevice(netdev);
> 
>       release_resources(adapter);
>       release_sub_crqs(adapter, 1);
> @@ -4852,7 +4835,7 @@ static int ibmvnic_remove(struct vio_dev *dev)
> 
>       adapter->state = VNIC_REMOVED;
> 
> -     mutex_unlock(&adapter->reset_lock);
> +     rtnl_unlock();
>       device_remove_file(&dev->dev, &dev_attr_failover);
>       free_netdev(netdev);
>       dev_set_drvdata(&dev->dev, NULL);
> diff --git a/drivers/net/ethernet/ibm/ibmvnic.h 
> b/drivers/net/ethernet/ibm/ibmvnic.h
> index 18103b8..99c4f8d 100644
> --- a/drivers/net/ethernet/ibm/ibmvnic.h
> +++ b/drivers/net/ethernet/ibm/ibmvnic.h
> @@ -1075,7 +1075,7 @@ struct ibmvnic_adapter {
>       struct tasklet_struct tasklet;
>       enum vnic_state state;
>       enum ibmvnic_reset_reason reset_reason;
> -     struct mutex reset_lock, rwi_lock;
> +     struct mutex rwi_lock;
>       struct list_head rwi_list;
>       struct work_struct ibmvnic_reset;
>       bool resetting;
> 

Thanks for the fix, Please add Reported-and-tested-by: Abdul Haleem 
<abdha...@linux.vnet.ibm.com>

-- 
Regard's

Abdul Haleem
IBM Linux Technology Centre



Reply via email to