There is circular lock between gfx and kfd path with HMM change:
lock(dqm) -> bo::reserve -> amdgpu_mn_lock

To avoid this, move init/unint_mqd() out of lock(dqm), to remove nested
locking between mmap_sem and bo::reserve. The locking order
is: bo::reserve -> amdgpu_mn_lock(p->mn)

Change-Id: I2ec09a47571f6b4c8eaef93f22c0a600f5f70153
Signed-off-by: Philip Yang <philip.y...@amd.com>
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 67 ++++++++++---------
 1 file changed, 36 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 8372556b52eb..fe120cc0930c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1158,6 +1158,33 @@ static int create_queue_cpsch(struct 
device_queue_manager *dqm, struct queue *q,
 
        retval = 0;
 
+       /* Do init_mqd before dqm_lock(dqm) to avoid circular locking order:
+        * lock(dqm) -> bo::reserve
+        */
+       mqd_mgr = dqm->ops.get_mqd_manager(dqm,
+                       get_mqd_type_from_queue_type(q->properties.type));
+
+       if (!mqd_mgr) {
+               retval = -ENOMEM;
+               goto out;
+       }
+
+       /*
+        * Eviction state logic: we only mark active queues as evicted
+        * to avoid the overhead of restoring inactive queues later
+        */
+       if (qpd->evicted)
+               q->properties.is_evicted = (q->properties.queue_size > 0 &&
+                                           q->properties.queue_percent > 0 &&
+                                           q->properties.queue_address != 0);
+       dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
+       q->properties.tba_addr = qpd->tba_addr;
+       q->properties.tma_addr = qpd->tma_addr;
+       retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
+                               &q->gart_mqd_addr, &q->properties);
+       if (retval)
+               goto out;
+
        dqm_lock(dqm);
 
        if (dqm->total_queue_count >= max_num_of_queues_per_device) {
@@ -1181,30 +1208,6 @@ static int create_queue_cpsch(struct 
device_queue_manager *dqm, struct queue *q,
        if (retval)
                goto out_deallocate_sdma_queue;
 
-       mqd_mgr = dqm->ops.get_mqd_manager(dqm,
-                       get_mqd_type_from_queue_type(q->properties.type));
-
-       if (!mqd_mgr) {
-               retval = -ENOMEM;
-               goto out_deallocate_doorbell;
-       }
-       /*
-        * Eviction state logic: we only mark active queues as evicted
-        * to avoid the overhead of restoring inactive queues later
-        */
-       if (qpd->evicted)
-               q->properties.is_evicted = (q->properties.queue_size > 0 &&
-                                           q->properties.queue_percent > 0 &&
-                                           q->properties.queue_address != 0);
-
-       dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
-
-       q->properties.tba_addr = qpd->tba_addr;
-       q->properties.tma_addr = qpd->tma_addr;
-       retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
-                               &q->gart_mqd_addr, &q->properties);
-       if (retval)
-               goto out_deallocate_doorbell;
 
        list_add(&q->list, &qpd->queues_list);
        qpd->queue_count++;
@@ -1228,14 +1231,12 @@ static int create_queue_cpsch(struct 
device_queue_manager *dqm, struct queue *q,
        dqm_unlock(dqm);
        return retval;
 
-out_deallocate_doorbell:
-       deallocate_doorbell(qpd, q);
 out_deallocate_sdma_queue:
        if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
                deallocate_sdma_queue(dqm, q->sdma_id);
 out_unlock:
        dqm_unlock(dqm);
-
+out:
        return retval;
 }
 
@@ -1398,8 +1399,6 @@ static int destroy_queue_cpsch(struct 
device_queue_manager *dqm,
                        qpd->reset_wavefronts = true;
        }
 
-       mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
-
        /*
         * Unconditionally decrement this counter, regardless of the queue's
         * type
@@ -1410,6 +1409,9 @@ static int destroy_queue_cpsch(struct 
device_queue_manager *dqm,
 
        dqm_unlock(dqm);
 
+       /* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */
+       mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+
        return retval;
 
 failed:
@@ -1631,7 +1633,11 @@ static int process_termination_cpsch(struct 
device_queue_manager *dqm,
                qpd->reset_wavefronts = false;
        }
 
-       /* lastly, free mqd resources */
+       dqm_unlock(dqm);
+
+       /* Lastly, free mqd resources.
+        * Do uninit_mqd() after dqm_unlock to avoid circular locking.
+        */
        list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
                mqd_mgr = dqm->ops.get_mqd_manager(dqm,
                        get_mqd_type_from_queue_type(q->properties.type));
@@ -1645,7 +1651,6 @@ static int process_termination_cpsch(struct 
device_queue_manager *dqm,
        }
 
 out:
-       dqm_unlock(dqm);
        return retval;
 }
 
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to