From: Marek Ol??k <marek.ol...@amd.com>

Userspace should set the first 4 bits of drm_radeon_cs_reloc::flags to
a number from 0 to 15. The higher the number, the higher the priority,
which means a buffer with a higher number will be validated sooner.

The old behavior is preserved: Buffers used for write are prioritized over
read-only buffers if the userspace doesn't set the number.

Signed-off-by: Marek Ol??k <marek.olsak at amd.com>
---
 drivers/gpu/drm/radeon/radeon.h        |  2 +-
 drivers/gpu/drm/radeon/radeon_cs.c     | 53 ++++++++++++++++++++++++++++++++--
 drivers/gpu/drm/radeon/radeon_object.c | 10 -------
 drivers/gpu/drm/radeon/radeon_object.h |  2 --
 4 files changed, 51 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index d37a57a..f7a3174 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -481,7 +481,7 @@ struct radeon_bo_list {
        struct ttm_validate_buffer tv;
        struct radeon_bo        *bo;
        uint64_t                gpu_offset;
-       bool                    written;
+       unsigned                priority;
        unsigned                domain;
        unsigned                alt_domain;
        u32                     tiling_flags;
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index d49a3f7..1ba1a48 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -31,6 +31,41 @@
 #include "radeon.h"
 #include "radeon_trace.h"

+/* This is a variation of the bucket sort with O(n) time complexity.
+ * The relocations are sorted from the highest to the lowest priority. */
+static void sort_relocs_for_validation(struct list_head *list)
+{
+       struct list_head bucket[17], *it, *tmp;
+       unsigned i, priority;
+
+       for (i = 0; i < 17; i++)
+               INIT_LIST_HEAD(&bucket[i]);
+
+       /* Move the elements into buckets. An i-th bucket only contains
+        * elements with priorities i*2 and i*2+1. Odd numbers are added
+        * at the head of a bucket and even numbers are added at the tail,
+        * therefore all buckets are always sorted. */
+       list_for_each_safe(it, tmp, list) {
+               priority = list_entry(it, struct radeon_bo_list,
+                                     tv.head)->priority;
+               i = priority / 2;
+               i = min(i, 16u) ;
+
+               if (priority % 2 == 1) {
+                       list_move(it, &bucket[i]);
+               } else {
+                       list_move_tail(it, &bucket[i]);
+               }
+       }
+
+       INIT_LIST_HEAD(list);
+
+       /* connect the sorted buckets */
+       for (i = 0; i < 17; i++) {
+               list_splice(&bucket[i], list);
+       }
+}
+
 static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 {
        struct drm_device *ddev = p->rdev->ddev;
@@ -80,7 +115,15 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser 
*p)
                p->relocs_ptr[i] = &p->relocs[i];
                p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
                p->relocs[i].lobj.bo = p->relocs[i].robj;
-               p->relocs[i].lobj.written = !!r->write_domain;
+
+               /* The userspace buffer priorities are from 0 to 15. A higher
+                * number means the buffer is more important.
+                * Also, the buffers used for write have a higher priority than
+                * the buffers used for read only, which doubles the range
+                * to 0 to 31. Numbers 32 and 33 are reserved for the kernel
+                * driver.
+                */
+               p->relocs[i].lobj.priority = (r->flags & 0xf) * 2 + 
!!r->write_domain;

                /* the first reloc of an UVD job is the msg and that must be in
                   VRAM, also but everything into VRAM on AGP cards to avoid
@@ -94,6 +137,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser 
*p)
                        p->relocs[i].lobj.alt_domain =
                                RADEON_GEM_DOMAIN_VRAM;

+                       /* prioritize this over any other relocation */
+                       p->relocs[i].lobj.priority = 32;
                } else {
                        uint32_t domain = r->write_domain ?
                                r->write_domain : r->read_domains;
@@ -107,9 +152,11 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser 
*p)
                p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
                p->relocs[i].handle = r->handle;

-               radeon_bo_list_add_object(&p->relocs[i].lobj,
-                                         &p->validated);
+               list_add(&p->relocs[i].lobj.tv.head, &p->validated);
        }
+
+       sort_relocs_for_validation(&p->validated);
+
        return radeon_bo_list_validate(&p->ticket, &p->validated, p->ring);
 }

diff --git a/drivers/gpu/drm/radeon/radeon_object.c 
b/drivers/gpu/drm/radeon/radeon_object.c
index d676ee2..19042ae 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -368,16 +368,6 @@ void radeon_bo_fini(struct radeon_device *rdev)
        arch_phys_wc_del(rdev->mc.vram_mtrr);
 }

-void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
-                               struct list_head *head)
-{
-       if (lobj->written) {
-               list_add(&lobj->tv.head, head);
-       } else {
-               list_add_tail(&lobj->tv.head, head);
-       }
-}
-
 int radeon_bo_list_validate(struct ww_acquire_ctx *ticket,
                            struct list_head *head, int ring)
 {
diff --git a/drivers/gpu/drm/radeon/radeon_object.h 
b/drivers/gpu/drm/radeon/radeon_object.h
index a9a8c11..6c3ca9e 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -138,8 +138,6 @@ extern int radeon_bo_evict_vram(struct radeon_device *rdev);
 extern void radeon_bo_force_delete(struct radeon_device *rdev);
 extern int radeon_bo_init(struct radeon_device *rdev);
 extern void radeon_bo_fini(struct radeon_device *rdev);
-extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
-                               struct list_head *head);
 extern int radeon_bo_list_validate(struct ww_acquire_ctx *ticket,
                                   struct list_head *head, int ring);
 extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo,
-- 
1.8.3.2

Reply via email to