The next patch will delegate fanout operations to a background worker,
which adds cancel_work_sync() to rds_tcp_conn_free(). This is called
during a connection cleanup and requires an operations to be blocking.
However, the error path of __rds_conn_create() currently calls
trans->conn_free() while holding rds_conn_lock (spinlock) and
rcu_read_lock, which creates an atomic context where cancel_work_sync()
cannot sleep.

To avoid this, refactor the error/race paths to defer
trans->conn_free() calls until after locks are released at the out:
label. The 'out:' label already runs outside locks because
destroy_workqueue() can sleep. So we extend this pattern to transport
cleanup as well.  This way, connections that "lose" the race are safe
to clean up outside the critical section since they were never added
to the hashtable, and therefore are inaccessible to other threads.

Link: https://ci.syzbot.org/series/1a5ef180-c02c-401d-9df7-670b18570a55
Signed-off-by: Allison Henderson <[email protected]>
---
 net/rds/connection.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/net/rds/connection.c b/net/rds/connection.c
index 185f73b01694..804e928da223 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -170,6 +170,7 @@ static struct rds_connection *__rds_conn_create(struct net 
*net,
        struct hlist_head *head = rds_conn_bucket(laddr, faddr);
        struct rds_transport *loop_trans;
        struct rds_conn_path *free_cp = NULL;
+       struct rds_transport *free_trans = NULL;
        unsigned long flags;
        int ret, i;
        int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1);
@@ -305,7 +306,7 @@ static struct rds_connection *__rds_conn_create(struct net 
*net,
        if (parent) {
                /* Creating passive conn */
                if (parent->c_passive) {
-                       trans->conn_free(conn->c_path[0].cp_transport_data);
+                       free_trans = trans;
                        free_cp = conn->c_path;
                        kmem_cache_free(rds_conn_slab, conn);
                        conn = parent->c_passive;
@@ -321,18 +322,7 @@ static struct rds_connection *__rds_conn_create(struct net 
*net,
                found = rds_conn_lookup(net, head, laddr, faddr, trans,
                                        tos, dev_if);
                if (found) {
-                       struct rds_conn_path *cp;
-                       int i;
-
-                       for (i = 0; i < npaths; i++) {
-                               cp = &conn->c_path[i];
-                               /* The ->conn_alloc invocation may have
-                                * allocated resource for all paths, so all
-                                * of them may have to be freed here.
-                                */
-                               if (cp->cp_transport_data)
-                                       trans->conn_free(cp->cp_transport_data);
-                       }
+                       free_trans = trans;
                        free_cp = conn->c_path;
                        kmem_cache_free(rds_conn_slab, conn);
                        conn = found;
@@ -349,9 +339,23 @@ static struct rds_connection *__rds_conn_create(struct net 
*net,
 
 out:
        if (free_cp) {
-               for (i = 0; i < npaths; i++)
+               for (i = 0; i < npaths; i++) {
+                       /*
+                        * The trans->conn_alloc call may have allocated
+                        * resources for the cp paths, which will need to
+                        * be freed before freeing cp itself.  We do this here
+                        * after the out: label so it runs outside the
+                        * rds_conn_lock spinlock and rcu_read_lock section,
+                        * since both destroy_workqueue() and conn_free can
+                        * block. The local free_trans pointer is only set in
+                        * the race-loss paths where conn_alloc() succeeded.
+                        */
+                       if (free_trans && free_cp[i].cp_transport_data)
+                               free_trans->conn_free
+                                       (free_cp[i].cp_transport_data);
                        if (free_cp[i].cp_wq != rds_wq)
                                destroy_workqueue(free_cp[i].cp_wq);
+               }
                kfree(free_cp);
        }
 
-- 
2.43.0


Reply via email to