radeon: add command submission IDs

Christian König Mon, 8 Dec 2014 17:11:10 +0100

From: Christian KÃ¶nig <christian.koe...@amd.com>

This patch adds a new 64bit ID as a result to each command submission.


Signed-off-by: Christian KÃ¶nig <christian.koenig at amd.com>
---
 drivers/gpu/drm/radeon/Makefile     |   2 +-
 drivers/gpu/drm/radeon/radeon.h     |  13 +-
 drivers/gpu/drm/radeon/radeon_cs.c  |  13 ++
 drivers/gpu/drm/radeon/radeon_kms.c |  41 +++----
 drivers/gpu/drm/radeon/radeon_seq.c | 229 ++++++++++++++++++++++++++++++++++++
 include/uapi/drm/radeon_drm.h       |   1 +
 6 files changed, 277 insertions(+), 22 deletions(-)
 create mode 100644 drivers/gpu/drm/radeon/radeon_seq.c

diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index 12bc212..7145f15 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -81,7 +81,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \
        rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o 
\
        trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \
        ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o 
\
-       radeon_sync.o
+       radeon_sync.o radeon_seq.o

 # add async DMA block
 radeon-y += \
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 3968f91..b9fde1d 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -433,6 +433,15 @@ static inline bool radeon_fence_is_earlier(struct 
radeon_fence *a,
 }

 /*
+ * Userspace command submission identifier generation
+ */
+struct radeon_seq;
+
+uint64_t radeon_seq_push(struct radeon_seq **seq, struct radeon_fence *fence);
+struct radeon_fence *radeon_seq_query(struct radeon_seq *seq, uint64_t id);
+void radeon_seq_destroy(struct radeon_seq **seq);
+
+/*
  * Tiling registers
  */
 struct radeon_surface_reg {
@@ -975,7 +984,9 @@ struct radeon_vm_manager {
  * file private structure
  */
 struct radeon_fpriv {
-       struct radeon_vm                vm;
+       struct radeon_vm        vm;
+       struct mutex            seq_lock;
+       struct radeon_seq       *seq;
 };

 /*
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index 3c3b7d9..c0fc8d8 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -398,6 +398,19 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser 
*parser, int error, bo
        unsigned i;

        if (!error) {
+               if (parser->chunk_flags &&
+                   parser->chunk_flags->length_dw > 4) {
+                       struct radeon_fpriv *fpriv = parser->filp->driver_priv;
+                       uint32_t __user *to = parser->chunk_flags->user_ptr;
+                       uint64_t id;
+
+                       mutex_lock(&fpriv->seq_lock);
+                       id = radeon_seq_push(&fpriv->seq, parser->ib.fence);
+                       mutex_unlock(&fpriv->seq_lock);
+
+                       copy_to_user(&to[3], &id, sizeof(uint64_t));
+               }
+
                /* Sort the buffer list from the smallest to largest buffer,
                 * which affects the order of buffers in the LRU list.
                 * This assures that the smallest buffers are added first
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c 
b/drivers/gpu/drm/radeon/radeon_kms.c
index f4dd26a..db5c986 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -585,39 +585,34 @@ void radeon_driver_lastclose_kms(struct drm_device *dev)
  */
 int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 {
+       struct radeon_fpriv *fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
        struct radeon_device *rdev = dev->dev_private;
        int r;

-       file_priv->driver_priv = NULL;
+       if (unlikely(!fpriv))
+               return -ENOMEM;
+
+       file_priv->driver_priv = fpriv;

        r = pm_runtime_get_sync(dev->dev);
        if (r < 0)
-               return r;
+               goto error;

        /* new gpu have virtual address space support */
        if (rdev->family >= CHIP_CAYMAN) {
-               struct radeon_fpriv *fpriv;
                struct radeon_vm *vm;
                int r;

-               fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
-               if (unlikely(!fpriv)) {
-                       return -ENOMEM;
-               }
-
                vm = &fpriv->vm;
                r = radeon_vm_init(rdev, vm);
-               if (r) {
-                       kfree(fpriv);
-                       return r;
-               }
+               if (r)
+                       goto error;

                if (rdev->accel_working) {
                        r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
                        if (r) {
                                radeon_vm_fini(rdev, vm);
-                               kfree(fpriv);
-                               return r;
+                               goto error;
                        }

                        /* map the ib pool buffer read only into
@@ -630,16 +625,20 @@ int radeon_driver_open_kms(struct drm_device *dev, struct 
drm_file *file_priv)
                                                  RADEON_VM_PAGE_SNOOPED);
                        if (r) {
                                radeon_vm_fini(rdev, vm);
-                               kfree(fpriv);
-                               return r;
+                               goto error;
                        }
                }
-               file_priv->driver_priv = fpriv;
        }

+       mutex_init(&fpriv->seq_lock);
+
        pm_runtime_mark_last_busy(dev->dev);
        pm_runtime_put_autosuspend(dev->dev);
        return 0;
+
+error:
+       kfree(fpriv);
+       return r;
 }

 /**
@@ -653,11 +652,13 @@ int radeon_driver_open_kms(struct drm_device *dev, struct 
drm_file *file_priv)
 void radeon_driver_postclose_kms(struct drm_device *dev,
                                 struct drm_file *file_priv)
 {
+       struct radeon_fpriv *fpriv = file_priv->driver_priv;
        struct radeon_device *rdev = dev->dev_private;

+       radeon_seq_destroy(&fpriv->seq);
+
        /* new gpu have virtual address space support */
        if (rdev->family >= CHIP_CAYMAN && file_priv->driver_priv) {
-               struct radeon_fpriv *fpriv = file_priv->driver_priv;
                struct radeon_vm *vm = &fpriv->vm;
                int r;

@@ -671,9 +672,9 @@ void radeon_driver_postclose_kms(struct drm_device *dev,
                }

                radeon_vm_fini(rdev, vm);
-               kfree(fpriv);
-               file_priv->driver_priv = NULL;
        }
+       kfree(fpriv);
+       file_priv->driver_priv = NULL;
 }

 /**
diff --git a/drivers/gpu/drm/radeon/radeon_seq.c 
b/drivers/gpu/drm/radeon/radeon_seq.c
new file mode 100644
index 0000000..d8857f1
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_seq.c
@@ -0,0 +1,229 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Christian KÃ¶nig <christian.koenig at amd.com>
+ */
+
+#include <drm/drmP.h>
+#include "radeon.h"
+
+/*
+ * ID sequences
+ * This code generates a 64bit identifier for a command submission.
+ * It works by adding the fence of the command submission to a automatically
+ * resizing ring buffer.
+ */
+
+struct radeon_seq {
+       uint64_t                start;
+       uint64_t                end;
+       uint64_t                mask;
+       struct radeon_seq       *replacement;
+};
+
+/**
+ * radeon_seq_create - create a new sequence object
+ *
+ * @start: start value for this sequence
+ * @size: size of the ring buffer, must be power of two
+ *
+ * Allocate and initialize a new ring buffer and header.
+ * Returns NULL if allocation fails, new object otherwise.
+ */
+static struct radeon_seq *radeon_seq_create(uint64_t start, unsigned size)
+{
+       unsigned bytes = sizeof(struct radeon_seq) +
+               size * sizeof(struct radeon_fence *);
+
+       struct radeon_seq *seq;
+
+       seq = kmalloc(bytes, GFP_KERNEL);
+       if (!seq)
+               return NULL;
+
+       seq->start = start;
+       seq->end = start;
+       seq->mask = size - 1;
+       seq->replacement = NULL;
+
+       return seq;
+}
+
+/**
+ * radeon_seq_ring - get pointer to ring buffer
+ *
+ * @seq: sequence object
+ *
+ * Calculate the address of the ring buffer.
+ */
+static struct radeon_fence **radeon_seq_ring(struct radeon_seq *seq)
+{
+       return (struct radeon_fence **)&seq[1];
+}
+
+/**
+ * radeon_seq_try_free - try to free fences from the ring buffer
+ *
+ * @seq: sequence object
+ *
+ * Try to free fences from the start of the ring buffer.
+ */
+static void radeon_seq_try_free(struct radeon_seq *seq)
+{
+       struct radeon_fence **ring = radeon_seq_ring(seq);
+
+       while (seq->start != seq->end) {
+               unsigned idx = seq->start & seq->mask;
+               struct radeon_fence *fence = ring[idx];
+
+               if (!radeon_fence_signaled(fence))
+                       break;
+
+               radeon_fence_unref(&fence);
+               ++seq->start;
+       }
+}
+
+/**
+ * radeon_seq_add - add new fence to the end of the ring buffer
+ *
+ * @seq: sequence object
+ * @f: the fence object
+ *
+ * Add the fence and return the generated ID.
+ */
+static uint64_t radeon_seq_add(struct radeon_seq *seq, struct radeon_fence *f)
+{
+       struct radeon_fence **ring = radeon_seq_ring(seq);
+
+       ring[seq->end & seq->mask] = radeon_fence_ref(f);
+       return seq->end++;
+}
+
+/**
+ * radeon_seq_push - check for room and add the fence
+ *
+ * @seq: sequence object
+ * @fence: the fence object
+ *
+ * Check for room on the ring buffer, if there isn't enough
+ * reallocate the sequence object and add the fence.
+ * Returns the generated ID.
+ */
+uint64_t radeon_seq_push(struct radeon_seq **seq, struct radeon_fence *fence)
+{
+       unsigned size_for_new_seq = 4;
+       uint64_t start_for_new_seq = 1;
+
+       if (*seq) {
+               /* try to release old replacements */
+               while ((*seq)->replacement) {
+                       radeon_seq_try_free(*seq);
+                       if ((*seq)->start == (*seq)->end) {
+                               struct radeon_seq *repl = (*seq)->replacement;
+
+                               kfree(*seq);
+                               *seq = repl;
+                       } else {
+                               /* move on to the current container */
+                               seq = &(*seq)->replacement;
+                       }
+               }
+
+               /* check if we have enough room for one more fence */
+               radeon_seq_try_free(*seq);
+               if (((*seq)->end - (*seq)->start) <= (*seq)->mask)
+                       return radeon_seq_add(*seq, fence);
+
+               /* not enough room, let's allocate a replacement */
+               size_for_new_seq = ((*seq)->mask + 1) * 2;
+               start_for_new_seq = (*seq)->end + 1;
+               seq = &(*seq)->replacement;
+       }
+
+       *seq = radeon_seq_create(start_for_new_seq, size_for_new_seq);
+       if (!*seq) {
+               /* not enough memory for a new sequence object, but failing
+                  here isn't a good idea either cause the commands are already
+                  submitted to the hardware. So just block on the fence. */
+               int r = radeon_fence_wait(fence, false);
+               if (r)
+                       DRM_ERROR("Error waiting for fence (%d)\n", r);
+               return 0;
+       }
+       return radeon_seq_add(*seq, fence);
+}
+
+/**
+ * radeon_seq_query - lockup fence by it's ID
+ *
+ * @seq: sequence object
+ * @id: the generated ID
+ *
+ * Lockup the associated fence by it's ID.
+ * Returns fence object or NULL if it couldn't be found.
+ */
+struct radeon_fence *radeon_seq_query(struct radeon_seq *seq, uint64_t id)
+{
+       struct radeon_fence **ring;
+
+       while (seq && id > seq->end)
+               seq = seq->replacement;
+
+       if (!seq || id < seq->start)
+               return NULL;
+
+       ring = radeon_seq_ring(seq);
+       return ring[id & seq->mask];
+}
+
+/**
+ * radeon_seq_destroy - destroy the sequence object
+ *
+ * @seq_ptr: pointer to sequence object
+ *
+ * Destroy the sequence objects and release all fence references taken.
+ */
+void radeon_seq_destroy(struct radeon_seq **seq_ptr)
+{
+       struct radeon_seq *seq = *seq_ptr;
+       while (seq) {
+               struct radeon_seq *repl = seq->replacement;
+               unsigned start = seq->start & seq->mask;
+               unsigned end = seq->end & seq->mask;
+               struct radeon_fence **ring;
+               unsigned i;
+
+               ring = radeon_seq_ring(seq);
+               for (i = start; i < end; ++i)
+                       radeon_fence_unref(&ring[i]);
+
+               kfree(seq);
+               seq = repl;
+       }
+       *seq_ptr = NULL;
+}
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 50d0fb4..6b2b2e7 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -959,6 +959,7 @@ struct drm_radeon_gem_va {
 #define RADEON_CS_RING_VCE          4
 /* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority 
*/
 /* 0 = normal, + = higher priority, - = lower priority */
+/* The fourth and fives dword are a 64bit fence ID generated for this CS */

 struct drm_radeon_cs_chunk {
        uint32_t                chunk_id;
-- 
1.9.1

[PATCH 3/6] drm/radeon: add command submission IDs

Reply via email to