Now that the user can opt-out of implicit fencing, we need to give them
back control over the fencing. We employ sync_file to wrap our
drm_i915_gem_request and provide an fd that userspace can merge with
other sync_file fds and pass back to the kernel to wait upon before
future execution.

Testcase: igt/gem_exec_fence
Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Kconfig               |  1 +
 drivers/gpu/drm/i915/i915_drv.c            |  5 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 82 ++++++++++++++++++++++++++++--
 include/uapi/drm/i915_drm.h                |  8 ++-
 4 files changed, 89 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 7769e469118f..319ca27ea719 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -18,6 +18,7 @@ config DRM_I915
        select INPUT if ACPI
        select ACPI_VIDEO if ACPI
        select ACPI_BUTTON if ACPI
+       select SYNC_FILE
        help
          Choose this option if you have a system that has "Intel Graphics
          Media Accelerator" or "HD Graphics" integrated graphics,
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 3ecd9e26bdfb..acc8805828a2 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -358,6 +358,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
        case I915_PARAM_HAS_EXEC_ASYNC:
                value = 1;
                break;
+       case I915_PARAM_HAS_EXEC_FENCE:
+               value = 1;
+               break;
        default:
                DRM_DEBUG("Unknown parameter %d\n", param->param);
                return -EINVAL;
@@ -2552,7 +2555,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
        DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, 
DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
        DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, 
DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
        DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH),
-       DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, 
DRM_AUTH|DRM_RENDER_ALLOW),
+       DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2, 
DRM_AUTH|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, 
DRM_AUTH|DRM_ROOT_ONLY),
        DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, 
DRM_AUTH|DRM_ROOT_ONLY),
        DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a0f46293e492..99d50c0a710c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -28,6 +28,7 @@
 
 #include <linux/dma_remapping.h>
 #include <linux/reservation.h>
+#include <linux/sync_file.h>
 #include <linux/uaccess.h>
 
 #include <drm/drmP.h>
@@ -1686,6 +1687,35 @@ eb_select_engine(struct drm_i915_private *dev_priv,
        return engine;
 }
 
+static int eb_add_fence(struct drm_i915_gem_request *req, struct fence *fence)
+{
+       struct fence_array *array;
+       int i;
+
+       if (fence_is_i915(fence))
+               return __eb_sync(req, to_request(fence));
+
+       if (!fence_is_array(fence))
+               return i915_sw_fence_await_dma_fence(&req->submit,
+                                                    fence, GFP_KERNEL);
+
+       array = to_fence_array(fence);
+       for (i = 0; i < array->num_fences; i++) {
+               struct fence *child = array->fences[i];
+               int ret;
+
+               if (fence_is_i915(child))
+                       ret = __eb_sync(req, to_request(child));
+               else
+                       ret = i915_sw_fence_await_dma_fence(&req->submit,
+                                                           child, GFP_KERNEL);
+               if (ret < 0)
+                       return ret;
+       }
+
+       return 0;
+}
+
 static int
 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
                       struct drm_file *file,
@@ -1703,6 +1733,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
        struct i915_execbuffer_params *params = &params_master;
        const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
        u32 dispatch_flags;
+       struct fence *in_fence = NULL;
+       struct sync_file *out_fence = NULL;
+       int out_fence_fd = -1;
        int ret;
        bool need_relocs;
 
@@ -1746,6 +1779,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
                dispatch_flags |= I915_DISPATCH_RS;
        }
 
+       if (args->flags & I915_EXEC_FENCE_IN) {
+               in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
+               if (!in_fence) {
+                       ret = -EINVAL;
+                       goto pre_mutex_err;
+               }
+       }
+
+       if (args->flags & I915_EXEC_FENCE_OUT) {
+               out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
+               if (out_fence_fd < 0) {
+                       ret = out_fence_fd;
+                       out_fence_fd = -1;
+                       goto pre_mutex_err;
+               }
+       }
+
        /* Take a local wakeref for preparing to dispatch the execbuf as
         * we expect to access the hardware fairly frequently in the
         * process. Upon first dispatch, we acquire another prolonged
@@ -1890,6 +1940,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
                goto err_batch_unpin;
        }
 
+       if (in_fence) {
+               ret = eb_add_fence(params->request, in_fence);
+               if (ret < 0)
+                       goto err_request;
+       }
+
+       if (out_fence_fd != -1) {
+               out_fence = 
sync_file_create(fence_get(&params->request->fence));
+               if (!out_fence) {
+                       ret = -ENOMEM;
+                       goto err_request;
+               }
+       }
+
        /* Whilst this request exists, batch_obj will be on the
         * active_list, and so will hold the active reference. Only when this
         * request is retired will the the batch_obj be moved onto the
@@ -1917,6 +1981,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
        ret = execbuf_submit(params, args, &eb->vmas);
 err_request:
        __i915_add_request(params->request, ret == 0);
+       if (out_fence) {
+               if (ret == 0) {
+                       fd_install(out_fence_fd, out_fence->file);
+                       args->rsvd2 &= ~((u64)0xffffffff << 32);
+                       args->rsvd2 |= (u64)out_fence_fd << 32;
+                       out_fence_fd = -1;
+               } else {
+                       fput(out_fence->file);
+               }
+       }
 
 err_batch_unpin:
        /*
@@ -1938,6 +2012,9 @@ pre_mutex_err:
        /* intel_gpu_busy should also get a ref, so it will free when the device
         * is really idle. */
        intel_runtime_pm_put(dev_priv);
+       if (out_fence_fd != -1)
+               put_unused_fd(out_fence_fd);
+       fence_put(in_fence);
        return ret;
 }
 
@@ -2045,11 +2122,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
                return -EINVAL;
        }
 
-       if (args->rsvd2 != 0) {
-               DRM_DEBUG("dirty rvsd2 field\n");
-               return -EINVAL;
-       }
-
        exec2_list = drm_malloc_gfp(args->buffer_count,
                                    sizeof(*exec2_list),
                                    GFP_TEMPORARY);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 75f9cab23249..9fe670cc7bf3 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -246,6 +246,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_OVERLAY_PUT_IMAGE     0x27
 #define DRM_I915_OVERLAY_ATTRS 0x28
 #define DRM_I915_GEM_EXECBUFFER2       0x29
+#define DRM_I915_GEM_EXECBUFFER2_WR    DRM_I915_GEM_EXECBUFFER2
 #define DRM_I915_GET_SPRITE_COLORKEY   0x2a
 #define DRM_I915_SET_SPRITE_COLORKEY   0x2b
 #define DRM_I915_GEM_WAIT      0x2c
@@ -279,6 +280,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_INIT                DRM_IOW(DRM_COMMAND_BASE + 
DRM_I915_GEM_INIT, struct drm_i915_gem_init)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER  DRM_IOW(DRM_COMMAND_BASE + 
DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + 
DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
+#define DRM_IOCTL_I915_GEM_EXECBUFFER2_WR      DRM_IOWR(DRM_COMMAND_BASE + 
DRM_I915_GEM_EXECBUFFER2_WR, struct drm_i915_gem_execbuffer2)
 #define DRM_IOCTL_I915_GEM_PIN         DRM_IOWR(DRM_COMMAND_BASE + 
DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
 #define DRM_IOCTL_I915_GEM_UNPIN       DRM_IOW(DRM_COMMAND_BASE + 
DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
 #define DRM_IOCTL_I915_GEM_BUSY                DRM_IOWR(DRM_COMMAND_BASE + 
DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
@@ -388,6 +390,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_POOLED_EU        38
 #define I915_PARAM_MIN_EU_IN_POOL       39
 #define I915_PARAM_HAS_EXEC_ASYNC       40
+#define I915_PARAM_HAS_EXEC_FENCE       41
 
 typedef struct drm_i915_getparam {
        __s32 param;
@@ -821,7 +824,10 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_RESOURCE_STREAMER     (1<<15)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1)
+#define I915_EXEC_FENCE_IN             (1<<16)
+#define I915_EXEC_FENCE_OUT            (1<<17)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_OUT<<1))
 
 #define I915_EXEC_CONTEXT_ID_MASK      (0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to