The cp hang occurs in OCL conformance test only on supermicro
platform which has 40 cores and the test generates 40 threads.
The root cause is race condition in non-protected flags.

The fix is to add flags of is_evicted and is_active(init_mqd())
into protected area.

Signed-off-by: Eric Huang <jinhuieric.hu...@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 9ffdda5..535c981 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1157,12 +1157,7 @@ static int create_queue_cpsch(struct 
device_queue_manager *dqm, struct queue *q,
 
        mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
                        q->properties.type)];
-       /*
-        * Eviction state logic: mark all queues as evicted, even ones
-        * not currently active. Restoring inactive queues later only
-        * updates the is_evicted flag but is a no-op otherwise.
-        */
-       q->properties.is_evicted = !!qpd->evicted;
+
        if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
                q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
                dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
@@ -1173,9 +1168,17 @@ static int create_queue_cpsch(struct 
device_queue_manager *dqm, struct queue *q,
                retval = -ENOMEM;
                goto out_deallocate_doorbell;
        }
+
+       dqm_lock(dqm);
+       /*
+        * Eviction state logic: mark all queues as evicted, even ones
+        * not currently active. Restoring inactive queues later only
+        * updates the is_evicted flag but is a no-op otherwise.
+        */
+       q->properties.is_evicted = !!qpd->evicted;
+       q->properties.is_suspended = false;
        mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
                                &q->gart_mqd_addr, &q->properties);
-       dqm_lock(dqm);
 
        list_add(&q->list, &qpd->queues_list);
        qpd->queue_count++;
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to