gem: Use a single chained reloc batches for a single execbuf

Chris Wilson Fri, 01 May 2020 01:43:24 -0700

As we can now keep chaining together a relocation batch to process any
number of relocations, we can keep building that relocation batch for
all of the target vma. This avoiding emitting a new request into the
ring for each target, consuming precious ring space and a potential
stall.


Testcase: igt/gem_exec_reloc/basic-wide-active
Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 23 +++++++++++--------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 47b1192a159e..9d68d66555b0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -268,6 +268,7 @@ struct i915_execbuffer {
                bool has_fence : 1;
                bool needs_unfenced : 1;
 
+               struct i915_vma *target;
                struct i915_request *rq;
                u32 *rq_cmd;
                unsigned int rq_size;
@@ -1070,9 +1071,6 @@ static void reloc_cache_reset(struct reloc_cache *cache)
 {
        void *vaddr;
 
-       if (cache->rq)
-               reloc_gpu_flush(cache);
-
        if (!cache->vaddr)
                return;
 
@@ -1265,7 +1263,6 @@ static int reloc_move_to_gpu(struct i915_request *rq, 
struct i915_vma *vma)
 }
 
 static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
-                            struct i915_vma *vma,
                             unsigned int len)
 {
        struct reloc_cache *cache = &eb->reloc_cache;
@@ -1288,7 +1285,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
                goto out_pool;
        }
 
-       batch = i915_vma_instance(pool->obj, vma->vm, NULL);
+       batch = i915_vma_instance(pool->obj, eb->context->vm, NULL);
        if (IS_ERR(batch)) {
                err = PTR_ERR(batch);
                goto err_unmap;
@@ -1308,10 +1305,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
        if (err)
                goto err_request;
 
-       err = reloc_move_to_gpu(rq, vma);
-       if (err)
-               goto err_request;
-
        err = eb->engine->emit_bb_start(rq,
                                        batch->node.start, PAGE_SIZE,
                                        cache->gen > 5 ? 0 : 
I915_DISPATCH_SECURE);
@@ -1361,9 +1354,17 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
                if (!intel_engine_can_store_dword(eb->engine))
                        return ERR_PTR(-ENODEV);
 
-               err = __reloc_gpu_alloc(eb, vma, len);
+               err = __reloc_gpu_alloc(eb, len);
+               if (unlikely(err))
+                       return ERR_PTR(err);
+       }
+
+       if (vma != cache->target) {
+               err = reloc_move_to_gpu(cache->rq, vma);
                if (unlikely(err))
                        return ERR_PTR(err);
+
+               cache->target = vma;
        }
 
        if (unlikely(cache->rq_size > PAGE_SIZE / sizeof(u32) - len - 4)) {
@@ -1680,6 +1681,8 @@ static int eb_relocate(struct i915_execbuffer *eb)
                        if (err)
                                return err;
                }
+
+               reloc_gpu_flush(&eb->reloc_cache);
        }
 
        return 0;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH 2/4] drm/i915/gem: Use a single chained reloc batches for a single execbuf

Reply via email to