The fixed commit placed mutex_lock() inside spin_lock_bh(), which triggers
a warning:

  BUG: sleeping function called from invalid context at...

Fix this by moving the IPsec deletion operation to bond_ipsec_free_sa,
which is not held by spin_lock_bh().

Additionally, delete the IPsec list in bond_ipsec_del_sa_all() when the
XFRM state is DEAD to prevent xdo_dev_state_free() from being triggered
again in bond_ipsec_free_sa().

For bond_ipsec_free_sa(), there are now three conditions:

  1. if (!slave): When no active device exists.
  2. if (!xs->xso.real_dev): When xdo_dev_state_add() fails.
  3. if (xs->xso.real_dev != real_dev): When an xs has already been freed
     by bond_ipsec_del_sa_all() due to migration, and the active slave has
     changed to a new device. At the same time, the xs is marked as DEAD
     due to the XFRM entry is removed, triggering xfrm_state_gc_task() and
     bond_ipsec_free_sa().

In all three cases, xdo_dev_state_free() should not be called, only xs
should be removed from bond->ipsec list.

At the same time, protect bond_ipsec_del_sa_all and bond_ipsec_add_sa_all
with x->lock for each xs being processed. This prevents XFRM from
concurrently initiating add/delete operations on the managed states.

Fixes: 2aeeef906d5a ("bonding: change ipsec_lock from spin lock to mutex")
Reported-by: Jakub Kicinski <k...@kernel.org>
Closes: https://lore.kernel.org/netdev/20241212062734.182a0...@kernel.org
Suggested-by: Cosmin Ratiu <cra...@nvidia.com>
Signed-off-by: Hangbin Liu <liuhang...@gmail.com>
---
 drivers/net/bonding/bond_main.c | 53 +++++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 16 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index e45bba240cbc..06b060d9b031 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -537,15 +537,22 @@ static void bond_ipsec_add_sa_all(struct bonding *bond)
        }
 
        list_for_each_entry(ipsec, &bond->ipsec_list, list) {
+               spin_lock_bh(&ipsec->xs->lock);
+               /* Skip dead xfrm states, they'll be freed later. */
+               if (ipsec->xs->km.state == XFRM_STATE_DEAD)
+                       goto next;
+
                /* If new state is added before ipsec_lock acquired */
                if (ipsec->xs->xso.real_dev == real_dev)
-                       continue;
+                       goto next;
 
                ipsec->xs->xso.real_dev = real_dev;
                if (real_dev->xfrmdev_ops->xdo_dev_state_add(ipsec->xs, NULL)) {
                        slave_warn(bond_dev, real_dev, "%s: failed to add 
SA\n", __func__);
                        ipsec->xs->xso.real_dev = NULL;
                }
+next:
+               spin_unlock_bh(&ipsec->xs->lock);
        }
 out:
        mutex_unlock(&bond->ipsec_lock);
@@ -560,7 +567,6 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs)
        struct net_device *bond_dev = xs->xso.dev;
        struct net_device *real_dev;
        netdevice_tracker tracker;
-       struct bond_ipsec *ipsec;
        struct bonding *bond;
        struct slave *slave;
 
@@ -592,15 +598,6 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs)
        real_dev->xfrmdev_ops->xdo_dev_state_delete(xs);
 out:
        netdev_put(real_dev, &tracker);
-       mutex_lock(&bond->ipsec_lock);
-       list_for_each_entry(ipsec, &bond->ipsec_list, list) {
-               if (ipsec->xs == xs) {
-                       list_del(&ipsec->list);
-                       kfree(ipsec);
-                       break;
-               }
-       }
-       mutex_unlock(&bond->ipsec_lock);
 }
 
 static void bond_ipsec_del_sa_all(struct bonding *bond)
@@ -617,8 +614,18 @@ static void bond_ipsec_del_sa_all(struct bonding *bond)
 
        mutex_lock(&bond->ipsec_lock);
        list_for_each_entry(ipsec, &bond->ipsec_list, list) {
+               spin_lock_bh(&ipsec->xs->lock);
                if (!ipsec->xs->xso.real_dev)
-                       continue;
+                       goto next;
+
+               if (ipsec->xs->km.state == XFRM_STATE_DEAD) {
+                       /* already dead no need to delete again */
+                       if (real_dev->xfrmdev_ops->xdo_dev_state_free)
+                               
real_dev->xfrmdev_ops->xdo_dev_state_free(ipsec->xs);
+                       list_del(&ipsec->list);
+                       kfree(ipsec);
+                       goto next;
+               }
 
                if (!real_dev->xfrmdev_ops ||
                    !real_dev->xfrmdev_ops->xdo_dev_state_delete ||
@@ -631,6 +638,8 @@ static void bond_ipsec_del_sa_all(struct bonding *bond)
                        if (real_dev->xfrmdev_ops->xdo_dev_state_free)
                                
real_dev->xfrmdev_ops->xdo_dev_state_free(ipsec->xs);
                }
+next:
+               spin_unlock_bh(&ipsec->xs->lock);
        }
        mutex_unlock(&bond->ipsec_lock);
 }
@@ -640,6 +649,7 @@ static void bond_ipsec_free_sa(struct xfrm_state *xs)
        struct net_device *bond_dev = xs->xso.dev;
        struct net_device *real_dev;
        netdevice_tracker tracker;
+       struct bond_ipsec *ipsec;
        struct bonding *bond;
        struct slave *slave;
 
@@ -659,11 +669,22 @@ static void bond_ipsec_free_sa(struct xfrm_state *xs)
        if (!xs->xso.real_dev)
                goto out;
 
-       WARN_ON(xs->xso.real_dev != real_dev);
+       mutex_lock(&bond->ipsec_lock);
+       list_for_each_entry(ipsec, &bond->ipsec_list, list) {
+               if (ipsec->xs == xs) {
+                       /* do xdo_dev_state_free if real_dev matches,
+                        * otherwise only remove the list
+                        */
+                       if (real_dev && real_dev->xfrmdev_ops &&
+                           real_dev->xfrmdev_ops->xdo_dev_state_free)
+                               real_dev->xfrmdev_ops->xdo_dev_state_free(xs);
+                       list_del(&ipsec->list);
+                       kfree(ipsec);
+                       break;
+               }
+       }
+       mutex_unlock(&bond->ipsec_lock);
 
-       if (real_dev && real_dev->xfrmdev_ops &&
-           real_dev->xfrmdev_ops->xdo_dev_state_free)
-               real_dev->xfrmdev_ops->xdo_dev_state_free(xs);
 out:
        netdev_put(real_dev, &tracker);
 }
-- 
2.46.0


Reply via email to