Add user fence wait IOCTL timeline syncobj support.

v2:(Christian)
  - handle dma_fence_wait() return value.
  - shorten the variable name syncobj_timeline_points a bit.
  - move num_points up to avoid padding issues.

v3:(Christian)
  - Handle timeline drm_syncobj_find_fence() call error
    handling
  - Use dma_fence_unwrap_for_each() in timeline fence as
    there could be more than one fence.

Signed-off-by: Arunpravin Paneer Selvam <arunpravin.paneersel...@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 99 +++++++++++++++++--
 include/uapi/drm/amdgpu_drm.h                 | 16 ++-
 2 files changed, 107 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index 8f9d2427d380..76552eed6d86 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -24,6 +24,7 @@
 
 #include <linux/kref.h>
 #include <linux/slab.h>
+#include <linux/dma-fence-unwrap.h>
 
 #include <drm/drm_exec.h>
 #include <drm/drm_syncobj.h>
@@ -474,11 +475,11 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, 
void *data,
 int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
                            struct drm_file *filp)
 {
+       u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles;
+       u32 num_syncobj, num_bo_handles, num_points;
        struct drm_amdgpu_userq_fence_info *fence_info = NULL;
        struct drm_amdgpu_userq_wait *wait_info = data;
-       u32 *syncobj_handles, *bo_handles;
        struct dma_fence **fences = NULL;
-       u32 num_syncobj, num_bo_handles;
        struct drm_gem_object **gobj;
        struct drm_exec exec;
        int r, i, entry, cnt;
@@ -498,11 +499,26 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void 
*data,
                goto free_bo_handles;
        }
 
+       num_points = wait_info->num_points;
+       timeline_handles = 
memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles),
+                                      sizeof(u32) * num_points);
+       if (IS_ERR(timeline_handles)) {
+               r = PTR_ERR(timeline_handles);
+               goto free_syncobj_handles;
+       }
+
+       timeline_points = 
memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_points),
+                                     sizeof(u32) * num_points);
+       if (IS_ERR(timeline_points)) {
+               r = PTR_ERR(timeline_points);
+               goto free_timeline_handles;
+       }
+
        /* Array of GEM object handles */
        gobj = kmalloc_array(num_bo_handles, sizeof(*gobj), GFP_KERNEL);
        if (!gobj) {
                r = -ENOMEM;
-               goto free_syncobj_handles;
+               goto free_timeline_points;
        }
 
        for (entry = 0; entry < num_bo_handles; entry++) {
@@ -524,15 +540,40 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void 
*data,
        }
 
        if (!wait_info->num_fences) {
+               if (num_points) {
+                       struct dma_fence_unwrap iter;
+                       struct dma_fence *fence;
+                       struct dma_fence *f;
+
+                       for (i = 0; i < num_points; i++) {
+                               r = drm_syncobj_find_fence(filp, 
timeline_handles[i],
+                                                          timeline_points[i],
+                                                          
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+                                                          &fence);
+                               if (r)
+                                       goto exec_fini;
+
+                               num_fences++;
+
+                               dma_fence_unwrap_for_each(f, &iter, fence)
+                                       num_fences++;
+
+                               dma_fence_put(fence);
+                       }
+               }
+
                /* Count syncobj's fence */
                for (i = 0; i < num_syncobj; i++) {
                        struct dma_fence *fence;
 
                        r = drm_syncobj_find_fence(filp, syncobj_handles[i],
-                                                  0, 0, &fence);
+                                                  0,
+                                                  
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+                                                  &fence);
                        if (r)
                                goto exec_fini;
 
+                       dma_fence_put(fence);
                        num_fences++;
                        dma_fence_put(fence);
                }
@@ -588,12 +629,46 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void 
*data,
                        }
                }
 
+               if (num_points) {
+                       struct dma_fence_unwrap iter;
+                       struct dma_fence *fence;
+                       struct dma_fence *f;
+
+                       for (i = 0; i < num_points; i++) {
+                               r = drm_syncobj_find_fence(filp, 
timeline_handles[i],
+                                                          timeline_points[i],
+                                                          
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+                                                          &fence);
+                               if (r)
+                                       goto free_fences;
+
+                               if (WARN_ON_ONCE(num_fences >= 
wait_info->num_fences)) {
+                                       r = -EINVAL;
+                                       goto free_fences;
+                               }
+
+                               fences[num_fences++] = fence;
+
+                               dma_fence_unwrap_for_each(f, &iter, fence) {
+                                       if (WARN_ON_ONCE(num_fences >= 
wait_info->num_fences)) {
+                                               r = -EINVAL;
+                                               goto free_fences;
+                                       }
+
+                                       dma_fence_get(f);
+                                       fences[num_fences++] = f;
+                               }
+                       }
+               }
+
                /* Retrieve syncobj's fence */
                for (i = 0; i < num_syncobj; i++) {
                        struct dma_fence *fence;
 
                        r = drm_syncobj_find_fence(filp, syncobj_handles[i],
-                                                  0, 0, &fence);
+                                                  0,
+                                                  
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+                                                  &fence);
                        if (r)
                                goto free_fences;
 
@@ -616,9 +691,13 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void 
*data,
                                 * Just waiting on other driver fences should
                                 * be good for now
                                 */
-                               dma_fence_wait(fences[i], false);
-                               dma_fence_put(fences[i]);
+                               r = dma_fence_wait(fences[i], true);
+                               if (r) {
+                                       dma_fence_put(fences[i]);
+                                       goto free_fences;
+                               }
 
+                               dma_fence_put(fences[i]);
                                continue;
                        }
 
@@ -664,6 +743,8 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void 
*data,
                drm_gem_object_put(gobj[i]);
        kfree(gobj);
 
+       kfree(timeline_points);
+       kfree(timeline_handles);
        kfree(syncobj_handles);
        kfree(bo_handles);
 
@@ -681,6 +762,10 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void 
*data,
        while (entry-- > 0)
                drm_gem_object_put(gobj[entry]);
        kfree(gobj);
+free_timeline_points:
+       kfree(timeline_points);
+free_timeline_handles:
+       kfree(timeline_handles);
 free_syncobj_handles:
        kfree(syncobj_handles);
 free_bo_handles:
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index af42798e901d..3b24e0cb1b51 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -521,12 +521,26 @@ struct drm_amdgpu_userq_wait {
         * matching fence wait info pair in @userq_fence_info.
         */
        __u32   bo_wait_flags;
-       __u32   pad;
+       /**
+        * @num_points: A count that represents the number of timeline syncobj 
handles in
+        * syncobj_handles_array.
+        */
+       __u32   num_points;
        /**
         * @syncobj_handles_array: An array of syncobj handles defined to get 
the
         * fence wait information of every syncobj handles in the array.
         */
        __u64   syncobj_handles_array;
+       /**
+        * @syncobj_timeline_handles: An array of timeline syncobj handles 
defined to get the
+        * fence wait information of every timeline syncobj handles in the 
array.
+        */
+       __u64   syncobj_timeline_handles;
+       /**
+        * @syncobj_timeline_points: An array of timeline syncobj points 
defined to get the
+        * fence wait points of every timeline syncobj handles in the 
syncobj_handles_array.
+        */
+       __u64   syncobj_timeline_points;
        /**
         * @bo_handles_array: An array of GEM BO handles defined to fetch the 
fence
         * wait information of every BO handles in the array.
-- 
2.34.1

Reply via email to