On 2/27/25 10:50, Nikolay Aleksandrov wrote:
> On 2/27/25 10:37, Hangbin Liu wrote:
>> The fixed commit placed mutex_lock() inside spin_lock_bh(), which triggers
>> a warning:
>>
>>   BUG: sleeping function called from invalid context at...
>>
>> Fix this by moving the IPsec deletion operation to bond_ipsec_free_sa,
>> which is not held by spin_lock_bh().
>>
>> Additionally, delete the IPsec list in bond_ipsec_del_sa_all() when the
>> XFRM state is DEAD to prevent xdo_dev_state_free() from being triggered
>> again in bond_ipsec_free_sa().
>>
>> For bond_ipsec_free_sa(), there are now three conditions:
>>
>>   1. if (!slave): When no active device exists.
>>   2. if (!xs->xso.real_dev): When xdo_dev_state_add() fails.
>>   3. if (xs->xso.real_dev != real_dev): When an xs has already been freed
>>      by bond_ipsec_del_sa_all() due to migration, and the active slave has
>>      changed to a new device. At the same time, the xs is marked as DEAD
>>      due to the XFRM entry is removed, triggering xfrm_state_gc_task() and
>>      bond_ipsec_free_sa().
>>
>> In all three cases, xdo_dev_state_free() should not be called, only xs
>> should be removed from bond->ipsec list.
>>
>> Fixes: 2aeeef906d5a ("bonding: change ipsec_lock from spin lock to mutex")
>> Reported-by: Jakub Kicinski <k...@kernel.org>
>> Closes: https://lore.kernel.org/netdev/20241212062734.182a0...@kernel.org
>> Suggested-by: Cosmin Ratiu <cra...@nvidia.com>
>> Signed-off-by: Hangbin Liu <liuhang...@gmail.com>
>> ---
>>  drivers/net/bonding/bond_main.c | 34 ++++++++++++++++++++++-----------
>>  1 file changed, 23 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/net/bonding/bond_main.c 
>> b/drivers/net/bonding/bond_main.c
>> index e45bba240cbc..683bf1221caf 100644
>> --- a/drivers/net/bonding/bond_main.c
>> +++ b/drivers/net/bonding/bond_main.c
>> @@ -537,6 +537,10 @@ static void bond_ipsec_add_sa_all(struct bonding *bond)
>>      }
>>  
>>      list_for_each_entry(ipsec, &bond->ipsec_list, list) {
>> +            /* Skip dead xfrm states, they'll be freed later. */
>> +            if (ipsec->xs->km.state == XFRM_STATE_DEAD)
>> +                    continue;
>> +
>>              /* If new state is added before ipsec_lock acquired */
>>              if (ipsec->xs->xso.real_dev == real_dev)
>>                      continue;
>> @@ -560,7 +564,6 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs)
>>      struct net_device *bond_dev = xs->xso.dev;
>>      struct net_device *real_dev;
>>      netdevice_tracker tracker;
>> -    struct bond_ipsec *ipsec;
>>      struct bonding *bond;
>>      struct slave *slave;
>>  
>> @@ -592,15 +595,6 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs)
>>      real_dev->xfrmdev_ops->xdo_dev_state_delete(xs);
>>  out:
>>      netdev_put(real_dev, &tracker);
>> -    mutex_lock(&bond->ipsec_lock);
>> -    list_for_each_entry(ipsec, &bond->ipsec_list, list) {
>> -            if (ipsec->xs == xs) {
>> -                    list_del(&ipsec->list);
>> -                    kfree(ipsec);
>> -                    break;
>> -            }
>> -    }
>> -    mutex_unlock(&bond->ipsec_lock);
>>  }
>>  
>>  static void bond_ipsec_del_sa_all(struct bonding *bond)
>> @@ -617,6 +611,12 @@ static void bond_ipsec_del_sa_all(struct bonding *bond)
>>  
>>      mutex_lock(&bond->ipsec_lock);
>>      list_for_each_entry(ipsec, &bond->ipsec_list, list) {
>> +            if (ipsec->xs->km.state == XFRM_STATE_DEAD) {
>> +                    list_del(&ipsec->list);
> 
> To be able to do this here, you'll have to use list_for_each_entry_safe().
> 

One more thing - note I'm not an xfrm expert by far but it seems to me here you 
have
to also call  xdo_dev_state_free() with the old active slave dev otherwise that 
will
never get called with the original real_dev after the switch to a new
active slave (or more accurately it might if the GC runs between the switching
but it is a race), care must be taken wrt sequence of events because the XFRM
GC may be running in parallel which probably means that in bond_ipsec_free_sa()
you'll have to take the mutex before calling xdo_dev_state_free() and check
if the entry is still linked in the bond's ipsec list before calling the free_sa
callback, if it isn't then del_sa_all got to it before the GC and there's 
nothing
to do if it also called the dev's free_sa callback. The check for real_dev 
doesn't
seem enough to protect against this race.

Cheers,
 Nik

>> +                    kfree(ipsec);
>> +                    continue;
>> +            }
>> +
>>              if (!ipsec->xs->xso.real_dev)
>>                      continue;
>>  
>> @@ -640,6 +640,7 @@ static void bond_ipsec_free_sa(struct xfrm_state *xs)
>>      struct net_device *bond_dev = xs->xso.dev;
>>      struct net_device *real_dev;
>>      netdevice_tracker tracker;
>> +    struct bond_ipsec *ipsec;
>>      struct bonding *bond;
>>      struct slave *slave;
>>  
>> @@ -659,13 +660,24 @@ static void bond_ipsec_free_sa(struct xfrm_state *xs)
>>      if (!xs->xso.real_dev)
>>              goto out;
>>  
>> -    WARN_ON(xs->xso.real_dev != real_dev);
>> +    if (xs->xso.real_dev != real_dev)
>> +            goto out;
>>  
>>      if (real_dev && real_dev->xfrmdev_ops &&
>>          real_dev->xfrmdev_ops->xdo_dev_state_free)
>>              real_dev->xfrmdev_ops->xdo_dev_state_free(xs);
>>  out:
>>      netdev_put(real_dev, &tracker);
>> +
>> +    mutex_lock(&bond->ipsec_lock);
>> +    list_for_each_entry(ipsec, &bond->ipsec_list, list) {
>> +            if (ipsec->xs == xs) {
>> +                    list_del(&ipsec->list);
>> +                    kfree(ipsec);
>> +                    break;
>> +            }
>> +    }
>> +    mutex_unlock(&bond->ipsec_lock);
>>  }
>>  
>>  /**
> 


Reply via email to