On 4/2/19 12:25 AM, Saeed Mahameed wrote:
> On Mon, 2019-04-01 at 20:24 +0800, Firo Yang wrote:
>> This crash is triggered by a user-after-free since lake of
>> the synchronization of a race condition between 
>> be_update_queues() modifying multi-purpose channels of
>> network device and be_tx_timeout().
>>
>> BUG: unable to handle kernel NULL pointer dereference at (null)
>> Call Trace:
>> be_tx_timeout+0xa5/0x360 [be2net]
>> dev_watchdog+0x1d8/0x210
>> call_timer_fn+0x32/0x140
>>
>> To fix it, detach the interface before modifying
>> multi-purpose channels of network device.
>>
>> Signed-off-by: Firo Yang <fy...@suse.com>
>> ---
>>  drivers/net/ethernet/emulex/benet/be_main.c | 12 ++++++++----
>>  1 file changed, 8 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/emulex/benet/be_main.c
>> b/drivers/net/ethernet/emulex/benet/be_main.c
>> index d5026909dec5..25d0128bf684 100644
>> --- a/drivers/net/ethernet/emulex/benet/be_main.c
>> +++ b/drivers/net/ethernet/emulex/benet/be_main.c
>> @@ -4705,6 +4705,8 @@ int be_update_queues(struct be_adapter
>> *adapter)
>>      struct net_device *netdev = adapter->netdev;
>>      int status;
>>  
>> +    netif_device_detach(netdev);
>> +
> 
> This will reduce the probability, but will not do the trick.
> since this will not guarantee that the dev_watchdog is disabled.
Hi Saeed,

What about using dev_watchdog_down/up() to temporarily disable the
dev_watchdog?

+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -4697,6 +4697,8 @@ int be_update_queues(struct be_adapter *adapter)
        struct net_device *netdev = adapter->netdev;
        int status;

+       dev_watchdog_down();
+
        if (netif_running(netdev))
                be_close(netdev);

@@ -4711,21 +4713,21 @@ int be_update_queues(struct be_adapter *adapter)
        be_clear_queues(adapter);
        status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
        if (status)
-               return status;
+               goto out;

        if (!msix_enabled(adapter)) {
                status = be_msix_enable(adapter);
                if (status)
-                       return status;
+                       goto out;
        }

        status = be_if_create(adapter);
        if (status)
-               return status;
+               goto out;

        status = be_setup_queues(adapter);
        if (status)
-               return status;
+               goto out;

        be_schedule_worker(adapter);

@@ -4741,6 +4743,9 @@ int be_update_queues(struct be_adapter *adapter)
        if (netif_running(netdev))
                status = be_open(netdev);

+out:
+       dev_watchdog_up();

Thanks,
Firo

> 
> what you need is proper locking mechanism and/or scheduling the
> tx_timeout handling out of atomic context if a mutex will be required.
> 
> netif_device_detach is a too heavy hammer for such synchronizations
> tasks.
> 
>>      if (netif_running(netdev))
>>              be_close(netdev);
>>  
>> @@ -4719,21 +4721,21 @@ int be_update_queues(struct be_adapter
>> *adapter)
>>      be_clear_queues(adapter);
>>      status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
>>      if (status)
>> -            return status;
>> +            goto out;
>>  
>>      if (!msix_enabled(adapter)) {
>>              status = be_msix_enable(adapter);
>>              if (status)
>> -                    return status;
>> +                    goto out;
>>      }
>>  
>>      status = be_if_create(adapter);
>>      if (status)
>> -            return status;
>> +            goto out;
>>  
>>      status = be_setup_queues(adapter);
>>      if (status)
>> -            return status;
>> +            goto out;
>>  
>>      be_schedule_worker(adapter);
>>  
>> @@ -4748,6 +4750,8 @@ int be_update_queues(struct be_adapter
>> *adapter)
>>      if (netif_running(netdev))
>>              status = be_open(netdev);
>>  
>> +out:
>> +    netif_device_attach(netdev);
>>      return status;
>>  }
>>  

Reply via email to