Provide a mechanism for userspace to wait for kernel fences, and teach the
CS ioctl to return enough information to let userspace use this mechanism.

This mechanism isn't safe to include as-is, as there is no way for the CS
ioctl to indicate that it hasn't given you the information needed to let you
wait for a kernel fence. I'm thus sending it out as a starting point in case
someone else wants to continue digging into this.

Signed-off-by: Simon Farnsworth <simon.farnsworth at onelan.co.uk>
---
Note the FIXME - how does the CS ioctl indicate a fence failure?

I'm also a little concerned that my general userspace API isn't flexible
enough to match what we actually want - in particular, it would be nice for
GL_ARB_sync to be able to plonk fences down scattered throughout the IB, and
wait for that fence, not the whole CS. I couldn't easily determine whether
it was safe to add a kernel fence to a userspace IB during parsing, though,
so didn't attempt to do that.

Finally, this mechanism (as compared to the wait mechanism I proposed in
[PATCH] drm/radeon: Add support for userspace fence waits), doesn't do
anything to protect us against malicious userspace causing interrupt storms
- userspace can still fill an IB with EVENT_WRITE_EOP packets requesting an
interrupt, and this mechanism allows userspace to ensure that the interrupt
is enabled.

Someone needs to decide if that's a risk worth worrying about - if so, the
CS checkers need to reject attempts by userspace to request interrupts on
completion.

 drivers/gpu/drm/radeon/radeon.h       |    3 +++
 drivers/gpu/drm/radeon/radeon_cs.c    |   25 +++++++++++++++++++++++++
 drivers/gpu/drm/radeon/radeon_fence.c |   16 ++++++++++++++++
 drivers/gpu/drm/radeon/radeon_kms.c   |    1 +
 include/drm/radeon_drm.h              |   15 +++++++++++++++
 5 files changed, 60 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index b4cfb11..f26744a 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -800,6 +800,7 @@ struct radeon_cs_parser {
        int                     chunk_ib_idx;
        int                     chunk_relocs_idx;
        int                     chunk_flags_idx;
+       int                     chunk_fence_idx;
        struct radeon_ib        *ib;
        void                    *track;
        unsigned                family;
@@ -1348,6 +1349,8 @@ int radeon_gem_set_tiling_ioctl(struct drm_device *dev, 
void *data,
                                struct drm_file *filp);
 int radeon_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
                                struct drm_file *filp);
+int radeon_fence_wait_value_ioctl(struct drm_device *dev, void *data,
+                                 struct drm_file *filp);

 /* VRAM scratch page for HDP bug, default vram page */
 struct r600_vram_scratch {
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index 435a3d9..e0e8165 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -163,6 +163,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void 
*data)
        p->chunk_ib_idx = -1;
        p->chunk_relocs_idx = -1;
        p->chunk_flags_idx = -1;
+       p->chunk_fence_idx = -1;
        p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
        if (p->chunks_array == NULL) {
                return -ENOMEM;
@@ -207,6 +208,12 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void 
*data)
                        if (p->chunks[i].length_dw == 0)
                                return -EINVAL;
                }
+               if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FENCE) {
+                       p->chunk_fence_idx = i;
+                       /* A fence chunk must be able to store the 
end-of-stream fence */
+                       if (p->chunks[i].length_dw != (sizeof(struct 
drm_radeon_fence_wait) / 4))
+                               return -EINVAL;
+               }

                p->chunks[i].length_dw = user_chunk.length_dw;
                p->chunks[i].user_ptr = (void __user *)(unsigned 
long)user_chunk.chunk_data;
@@ -279,6 +286,24 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void 
*data)
        return 0;
 }

+static void radeon_cs_parser_output_fence(struct radeon_cs_parser *parser)
+{
+       struct drm_radeon_fence_wait output;
+       void __user* chunk;
+
+       memset(&output, 0, sizeof(struct drm_radeon_fence_wait));
+
+       if (parser->chunk_fence_idx != -1) {
+               chunk = (void __user*)(unsigned 
long)parser->chunks_array[parser->chunk_fence_idx];
+               output.ring_token = parser->ib->fence->ring;
+               output.fence_token = parser->ib->fence->seq;
+               /* FIXME: What do I do about an error here? */
+               DRM_COPY_TO_USER(chunk,
+                                &output,
+                                sizeof(struct drm_radeon_fence_wait));
+       }
+}
+
 /**
  * cs_parser_fini() - clean parser states
  * @parser:    parser structure holding parsing context.
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c 
b/drivers/gpu/drm/radeon/radeon_fence.c
index 85893c3..50bb054 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -356,6 +356,22 @@ int radeon_fence_wait_value(struct radeon_device *rdev, 
int ring,
        return r;
 }

+int radeon_fence_wait_value_ioctl(struct drm_device *dev, void *data,
+                                 struct drm_file *filp)
+{
+       struct radeon_device *rdev = dev->dev_private;
+       struct drm_radeon_fence_wait *args = data;
+       int r;
+
+       r = radeon_fence_wait_value(rdev,
+                                   args->ring_token,
+                                   args->fence_token,
+                                   usecs_to_jiffies(args->timeout_usec));
+       if (r > 0)
+               r = 0;
+
+       return r;
+}
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
 {
        kref_get(&fence->kref);
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c 
b/drivers/gpu/drm/radeon/radeon_kms.c
index d335288..a52db06 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -496,5 +496,6 @@ struct drm_ioctl_desc radeon_ioctls_kms[] = {
        DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, 
DRM_AUTH|DRM_UNLOCKED),
        DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, 
DRM_AUTH|DRM_UNLOCKED),
        DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, 
DRM_AUTH|DRM_UNLOCKED),
+        DRM_IOCTL_DEF_DRV(RADEON_FENCE_WAIT, radeon_fence_wait_value_ioctl, 
DRM_AUTH|DRM_UNLOCKED),
 };
 int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms);
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index dd2e9cf..d81ce94 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -510,6 +510,7 @@ typedef struct {
 #define DRM_RADEON_GEM_GET_TILING      0x29
 #define DRM_RADEON_GEM_BUSY            0x2a
 #define DRM_RADEON_GEM_VA              0x2b
+#define DRM_RADEON_FENCE_WAIT          0x2c

 #define DRM_IOCTL_RADEON_CP_INIT    DRM_IOW( DRM_COMMAND_BASE + 
DRM_RADEON_CP_INIT, drm_radeon_init_t)
 #define DRM_IOCTL_RADEON_CP_START   DRM_IO(  DRM_COMMAND_BASE + 
DRM_RADEON_CP_START)
@@ -552,6 +553,7 @@ typedef struct {
 #define DRM_IOCTL_RADEON_GEM_GET_TILING        DRM_IOWR(DRM_COMMAND_BASE + 
DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling)
 #define DRM_IOCTL_RADEON_GEM_BUSY      DRM_IOWR(DRM_COMMAND_BASE + 
DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy)
 #define DRM_IOCTL_RADEON_GEM_VA                DRM_IOWR(DRM_COMMAND_BASE + 
DRM_RADEON_GEM_VA, struct drm_radeon_gem_va)
+#define DRM_IOCTL_RADEON_FENCE_WAIT    DRM_IOWR(DRM_COMMAND_BASE + 
DRM_RADEON_FENCE_WAIT, struct drm_radeon_fence_wait)

 typedef struct drm_radeon_init {
        enum {
@@ -906,6 +908,12 @@ struct drm_radeon_gem_va {
 #define RADEON_CHUNK_ID_RELOCS 0x01
 #define RADEON_CHUNK_ID_IB     0x02
 #define RADEON_CHUNK_ID_FLAGS  0x03
+#define RADEON_CHUNK_ID_FENCE   0x04
+
+/* A RADEON_CHUNK_ID_FENCE is a struct drm_radeon_fence_wait. If the CS is
+ * accepted, the kernel will fill in the fence_token and ring_token elements
+ * such that userspace just needs to fill in timeout_usec and call the
+ * DRM_RADEON_FENCE_WAIT ioctl to wait for this CS to complete.  */

 /* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */
 #define RADEON_CS_KEEP_TILING_FLAGS 0x01
@@ -967,4 +975,11 @@ struct drm_radeon_info {
        uint64_t                value;
 };

+struct drm_radeon_fence_wait {
+       uint64_t                fence_token;
+       uint64_t                timeout_usec;
+       uint32_t                ring_token;
+       uint32_t                padding;
+};
+
 #endif
-- 
1.7.6.4

Reply via email to