Add few optimizations to userq fence driver.

v1:(Christian):
  - Remove unnecessary comments.
  - In drm_exec_init call give num_bo_handles as last parameter it would
    making allocation of the array more efficient
  - Handle return value of __xa_store() and improve the error handling of
    amdgpu_userq_fence_driver_alloc().
v2:(Christian):
   - Revert userq_xa xarray init to XA_FLAGS_LOCK_IRQ.
   - move the xa_unlock before the error check of the call xa_err(__xa_store())
     and moved this change to a separate patch as this is adding a missing error
     handling.
   - Removed the unnecessary comments.

Signed-off-by: Arunpravin Paneer Selvam <arunpravin.paneersel...@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 44 ++++++++++++-------
 .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c |  6 +--
 .../gpu/drm/amd/include/amdgpu_userqueue.h    |  2 +-
 4 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index 8d2a0331cd96..f3576c31428c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -76,7 +76,8 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device 
*adev,
        fence_drv = kzalloc(sizeof(*fence_drv), GFP_KERNEL);
        if (!fence_drv) {
                DRM_ERROR("Failed to allocate memory for fence driver\n");
-               return -ENOMEM;
+               r = -ENOMEM;
+               goto free_fence_drv;
        }
 
        /* Acquire seq64 memory */
@@ -84,7 +85,8 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device 
*adev,
                               &fence_drv->cpu_addr);
        if (r) {
                kfree(fence_drv);
-               return -ENOMEM;
+               r = -ENOMEM;
+               goto free_seq64;
        }
 
        memset(fence_drv->cpu_addr, 0, sizeof(u64));
@@ -94,7 +96,7 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device 
*adev,
        spin_lock_init(&fence_drv->fence_list_lock);
 
        fence_drv->adev = adev;
-       fence_drv->uq_fence_drv_xa_ref = &userq->uq_fence_drv_xa;
+       fence_drv->fence_drv_xa_ptr = &userq->fence_drv_xa;
        fence_drv->context = dma_fence_context_alloc(1);
        get_task_comm(fence_drv->timeline_name, current);
 
@@ -106,6 +108,13 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device 
*adev,
        userq->fence_drv = fence_drv;
 
        return 0;
+
+free_seq64:
+       amdgpu_seq64_free(adev, fence_drv->gpu_addr);
+free_fence_drv:
+       kfree(fence_drv);
+
+       return r;
 }
 
 void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver 
*fence_drv)
@@ -147,7 +156,7 @@ void amdgpu_userq_fence_driver_destroy(struct kref *ref)
        struct amdgpu_device *adev = fence_drv->adev;
        struct amdgpu_userq_fence *fence, *tmp;
        struct xarray *xa = &adev->userq_xa;
-       unsigned long index;
+       unsigned long index, flags;
        struct dma_fence *f;
 
        spin_lock(&fence_drv->fence_list_lock);
@@ -164,11 +173,11 @@ void amdgpu_userq_fence_driver_destroy(struct kref *ref)
        }
        spin_unlock(&fence_drv->fence_list_lock);
 
-       xa_lock(xa);
+       xa_lock_irqsave(xa, flags);
        xa_for_each(xa, index, xa_fence_drv)
                if (xa_fence_drv == fence_drv)
                        __xa_erase(xa, index);
-       xa_unlock(xa);
+       xa_unlock_irqrestore(xa, flags);
 
        /* Free seq64 memory */
        amdgpu_seq64_free(adev, fence_drv->gpu_addr);
@@ -212,12 +221,12 @@ int amdgpu_userq_fence_create(struct 
amdgpu_usermode_queue *userq,
        amdgpu_userq_fence_driver_get(fence_drv);
        dma_fence_get(fence);
 
-       if (!xa_empty(&userq->uq_fence_drv_xa)) {
+       if (!xa_empty(&userq->fence_drv_xa)) {
                struct amdgpu_userq_fence_driver *stored_fence_drv;
                unsigned long index, count = 0;
                int i = 0;
 
-               xa_for_each(&userq->uq_fence_drv_xa, index, stored_fence_drv)
+               xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
                        count++;
 
                userq_fence->fence_drv_array =
@@ -226,9 +235,9 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue 
*userq,
                                       GFP_KERNEL);
 
                if (userq_fence->fence_drv_array) {
-                       xa_for_each(&userq->uq_fence_drv_xa, index, 
stored_fence_drv) {
+                       xa_for_each(&userq->fence_drv_xa, index, 
stored_fence_drv) {
                                userq_fence->fence_drv_array[i] = 
stored_fence_drv;
-                               xa_erase(&userq->uq_fence_drv_xa, index);
+                               xa_erase(&userq->fence_drv_xa, index);
                                i++;
                        }
                }
@@ -378,7 +387,6 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void 
*data,
        struct drm_exec exec;
        u64 wptr;
 
-       /* Array of syncobj handles */
        num_syncobj_handles = args->num_syncobj_handles;
        syncobj_handles = 
memdup_user(u64_to_user_ptr(args->syncobj_handles_array),
                                      sizeof(u32) * num_syncobj_handles);
@@ -400,7 +408,6 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void 
*data,
                }
        }
 
-       /* Array of bo handles */
        num_bo_handles = args->num_bo_handles;
        bo_handles = memdup_user(u64_to_user_ptr(args->bo_handles_array),
                                 sizeof(u32) * num_bo_handles);
@@ -422,7 +429,9 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void 
*data,
                }
        }
 
-       drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+       drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, num_bo_handles);
+
+       /* Lock all BOs with retry handling */
        drm_exec_until_all_locked(&exec) {
                r = drm_exec_prepare_array(&exec, gobj, num_bo_handles, 1);
                drm_exec_retry_on_contention(&exec);
@@ -527,7 +536,6 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void 
*data,
                goto free_timeline_handles;
        }
 
-       /* Array of GEM object handles */
        gobj = kmalloc_array(num_bo_handles, sizeof(*gobj), GFP_KERNEL);
        if (!gobj) {
                r = -ENOMEM;
@@ -542,7 +550,9 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void 
*data,
                }
        }
 
-       drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+       drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, num_bo_handles);
+
+       /* Lock all BOs with retry handling */
        drm_exec_until_all_locked(&exec) {
                r = drm_exec_prepare_array(&exec, gobj, num_bo_handles, 0);
                drm_exec_retry_on_contention(&exec);
@@ -702,8 +712,8 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void 
*data,
                         * Otherwise, we would gather those references until we 
don't
                         * have any more space left and crash.
                         */
-                       if (fence_drv->uq_fence_drv_xa_ref) {
-                               r = xa_alloc(fence_drv->uq_fence_drv_xa_ref, 
&index, fence_drv,
+                       if (fence_drv->fence_drv_xa_ptr) {
+                               r = xa_alloc(fence_drv->fence_drv_xa_ptr, 
&index, fence_drv,
                                             xa_limit_32b, GFP_KERNEL);
                                if (r)
                                        goto free_fences;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
index f72424248cc5..89c82ba38b50 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
@@ -54,7 +54,7 @@ struct amdgpu_userq_fence_driver {
        spinlock_t fence_list_lock;
        struct list_head fences;
        struct amdgpu_device *adev;
-       struct xarray *uq_fence_drv_xa_ref;
+       struct xarray *fence_drv_xa_ptr;
        char timeline_name[TASK_COMM_LEN];
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index e7f7354e0c0e..9b24218f7ad2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -48,8 +48,8 @@ static void amdgpu_userq_walk_and_drop_fence_drv(struct 
xarray *xa)
 static void
 amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq)
 {
-       amdgpu_userq_walk_and_drop_fence_drv(&userq->uq_fence_drv_xa);
-       xa_destroy(&userq->uq_fence_drv_xa);
+       amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa);
+       xa_destroy(&userq->fence_drv_xa);
        /* Drop the fence_drv reference held by user queue */
        amdgpu_userq_fence_driver_put(userq->fence_drv);
 }
@@ -348,7 +348,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union 
drm_amdgpu_userq *args)
        }
        queue->doorbell_index = index;
 
-       xa_init_flags(&queue->uq_fence_drv_xa, XA_FLAGS_ALLOC);
+       xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
        r = amdgpu_userq_fence_driver_alloc(adev, queue);
        if (r) {
                DRM_ERROR("Failed to alloc fence driver\n");
diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h 
b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
index d035b5c2b14b..6eb094a54f4b 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
@@ -52,7 +52,7 @@ struct amdgpu_usermode_queue {
        struct amdgpu_userq_obj db_obj;
        struct amdgpu_userq_obj fw_obj;
        struct amdgpu_userq_obj wptr_obj;
-       struct xarray           uq_fence_drv_xa;
+       struct xarray           fence_drv_xa;
        struct amdgpu_userq_fence_driver *fence_drv;
        struct dma_fence        *last_fence;
 };
-- 
2.34.1

Reply via email to