We can simplify our tracking of pending writes in an execbuf to the
single bit in the vma->exec_entry->flags, but that requires the
relocation function knowing the object's vma. Pass it along.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            |   3 +-
 drivers/gpu/drm/i915/i915_gem.c            |   5 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 110 ++++++++++++-----------------
 drivers/gpu/drm/i915/intel_display.c       |   2 +-
 4 files changed, 53 insertions(+), 67 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e7e7840d5a68..899fe983e623 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3256,7 +3256,8 @@ i915_gem_obj_finish_shmem_access(struct 
drm_i915_gem_object *obj)
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
-                        struct drm_i915_gem_request *to);
+                        struct drm_i915_gem_request *to,
+                        bool write);
 void i915_vma_move_to_active(struct i915_vma *vma,
                             struct drm_i915_gem_request *req,
                             unsigned int flags);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0ca3ef547136..78faac2b780c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2798,7 +2798,8 @@ __i915_gem_object_sync(struct drm_i915_gem_request *to,
  */
 int
 i915_gem_object_sync(struct drm_i915_gem_object *obj,
-                    struct drm_i915_gem_request *to)
+                    struct drm_i915_gem_request *to,
+                    bool write)
 {
        struct i915_gem_active *active;
        unsigned long active_mask;
@@ -2810,7 +2811,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
        if (!active_mask)
                return 0;
 
-       if (obj->base.pending_write_domain) {
+       if (write) {
                active = obj->last_read;
        } else {
                active_mask = 1;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 35751e855859..e9ac591f3a79 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -617,42 +617,25 @@ static bool object_is_idle(struct drm_i915_gem_object 
*obj)
 }
 
 static int
-eb_relocate_entry(struct drm_i915_gem_object *obj,
+eb_relocate_entry(struct i915_vma *vma,
                  struct i915_execbuffer *eb,
                  struct drm_i915_gem_relocation_entry *reloc)
 {
-       struct drm_gem_object *target_obj;
-       struct drm_i915_gem_object *target_i915_obj;
-       struct i915_vma *target_vma;
-       uint64_t target_offset;
+       struct i915_vma *target;
+       u64 target_offset;
        int ret;
 
        /* we've already hold a reference to all valid objects */
-       target_vma = eb_get_vma(eb, reloc->target_handle);
-       if (unlikely(target_vma == NULL))
+       target = eb_get_vma(eb, reloc->target_handle);
+       if (unlikely(!target))
                return -ENOENT;
-       target_i915_obj = target_vma->obj;
-       target_obj = &target_vma->obj->base;
-
-       target_offset = gen8_canonical_addr(target_vma->node.start);
-
-       /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
-        * pipe_control writes because the gpu doesn't properly redirect them
-        * through the ppgtt for non_secure batchbuffers. */
-       if (unlikely(IS_GEN6(eb->i915) &&
-                    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) {
-               ret = i915_vma_bind(target_vma, target_i915_obj->cache_level,
-                                   PIN_GLOBAL);
-               if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!"))
-                       return ret;
-       }
 
        /* Validate that the target is in a valid r/w GPU domain */
        if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
                DRM_DEBUG("reloc with multiple write domains: "
-                         "obj %p target %d offset %d "
+                         "target %d offset %d "
                          "read %08x write %08x",
-                         obj, reloc->target_handle,
+                         reloc->target_handle,
                          (int) reloc->offset,
                          reloc->read_domains,
                          reloc->write_domain);
@@ -661,47 +644,60 @@ eb_relocate_entry(struct drm_i915_gem_object *obj,
        if (unlikely((reloc->write_domain | reloc->read_domains)
                     & ~I915_GEM_GPU_DOMAINS)) {
                DRM_DEBUG("reloc with read/write non-GPU domains: "
-                         "obj %p target %d offset %d "
+                         "target %d offset %d "
                          "read %08x write %08x",
-                         obj, reloc->target_handle,
+                         reloc->target_handle,
                          (int) reloc->offset,
                          reloc->read_domains,
                          reloc->write_domain);
                return -EINVAL;
        }
 
-       target_obj->pending_read_domains |= reloc->read_domains;
-       target_obj->pending_write_domain |= reloc->write_domain;
+       if (reloc->write_domain)
+               target->exec_entry->flags |= EXEC_OBJECT_WRITE;
+
+       /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
+        * pipe_control writes because the gpu doesn't properly redirect them
+        * through the ppgtt for non_secure batchbuffers.
+        */
+       if (unlikely(IS_GEN6(eb->i915) &&
+                    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) {
+               ret = i915_vma_bind(target, target->obj->cache_level,
+                                   PIN_GLOBAL);
+               if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!"))
+                       return ret;
+       }
 
        /* If the relocation already has the right value in it, no
         * more work needs to be done.
         */
+       target_offset = gen8_canonical_addr(target->node.start);
        if (target_offset == reloc->presumed_offset)
                return 0;
 
        /* Check that the relocation address is valid... */
        if (unlikely(reloc->offset >
-                    obj->base.size - (eb->reloc_cache.use_64bit_reloc ? 8 : 
4))) {
+                    vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) {
                DRM_DEBUG("Relocation beyond object bounds: "
-                         "obj %p target %d offset %d size %d.\n",
-                         obj, reloc->target_handle,
-                         (int) reloc->offset,
-                         (int) obj->base.size);
+                         "target %d offset %d size %d.\n",
+                         reloc->target_handle,
+                         (int)reloc->offset,
+                         (int)vma->size);
                return -EINVAL;
        }
        if (unlikely(reloc->offset & 3)) {
                DRM_DEBUG("Relocation not 4-byte aligned: "
-                         "obj %p target %d offset %d.\n",
-                         obj, reloc->target_handle,
-                         (int) reloc->offset);
+                         "target %d offset %d.\n",
+                         reloc->target_handle,
+                         (int)reloc->offset);
                return -EINVAL;
        }
 
        /* We can't wait for rendering with pagefaults disabled */
-       if (pagefault_disabled() && !object_is_idle(obj))
+       if (pagefault_disabled() && !object_is_idle(vma->obj))
                return -EFAULT;
 
-       ret = relocate_entry(obj, reloc, &eb->reloc_cache, target_offset);
+       ret = relocate_entry(vma->obj, reloc, &eb->reloc_cache, target_offset);
        if (ret)
                return ret;
 
@@ -736,7 +732,7 @@ static int eb_relocate_vma(struct i915_vma *vma, struct 
i915_execbuffer *eb)
                do {
                        u64 offset = r->presumed_offset;
 
-                       ret = eb_relocate_entry(vma->obj, eb, r);
+                       ret = eb_relocate_entry(vma, eb, r);
                        if (ret)
                                goto out;
 
@@ -767,7 +763,7 @@ eb_relocate_vma_slow(struct i915_vma *vma,
        int i, ret = 0;
 
        for (i = 0; i < entry->relocation_count; i++) {
-               ret = eb_relocate_entry(vma->obj, eb, &relocs[i]);
+               ret = eb_relocate_entry(vma, eb, &relocs[i]);
                if (ret)
                        break;
        }
@@ -809,7 +805,6 @@ eb_reserve_vma(struct i915_vma *vma,
               struct intel_engine_cs *engine,
               bool *need_reloc)
 {
-       struct drm_i915_gem_object *obj = vma->obj;
        struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
        uint64_t flags;
        int ret;
@@ -863,11 +858,6 @@ eb_reserve_vma(struct i915_vma *vma,
                *need_reloc = true;
        }
 
-       if (entry->flags & EXEC_OBJECT_WRITE) {
-               obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
-               obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
-       }
-
        return 0;
 }
 
@@ -930,7 +920,6 @@ eb_vma_misplaced(struct i915_vma *vma)
 static int eb_reserve(struct i915_execbuffer *eb)
 {
        const bool has_fenced_gpu_access = INTEL_GEN(eb->i915) < 4;
-       struct drm_i915_gem_object *obj;
        struct i915_vma *vma;
        struct list_head ordered_vmas;
        struct list_head pinned_vmas;
@@ -943,7 +932,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
                bool need_fence, need_mappable;
 
                vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
-               obj = vma->obj;
                entry = vma->exec_entry;
 
                if (eb->ctx->flags & CONTEXT_NO_ZEROMAP)
@@ -963,9 +951,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
                        list_move(&vma->exec_list, &ordered_vmas);
                } else
                        list_move_tail(&vma->exec_list, &ordered_vmas);
-
-               obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & 
~I915_GEM_DOMAIN_COMMAND;
-               obj->base.pending_write_domain = 0;
        }
        list_splice(&ordered_vmas, &eb->vmas);
        list_splice(&pinned_vmas, &eb->vmas);
@@ -1144,7 +1129,9 @@ eb_move_to_gpu(struct i915_execbuffer *eb)
                struct drm_i915_gem_object *obj = vma->obj;
 
                if (obj->flags & other_rings) {
-                       ret = i915_gem_object_sync(obj, eb->request);
+                       ret = i915_gem_object_sync(obj,
+                                                  eb->request,
+                                                  vma->exec_entry->flags & 
EXEC_OBJECT_WRITE);
                        if (ret)
                                return ret;
                }
@@ -1349,12 +1336,10 @@ eb_move_to_active(struct i915_execbuffer *eb)
                u32 old_read = obj->base.read_domains;
                u32 old_write = obj->base.write_domain;
 
-               obj->base.write_domain = obj->base.pending_write_domain;
-               if (obj->base.write_domain)
-                       vma->exec_entry->flags |= EXEC_OBJECT_WRITE;
-               else
-                       obj->base.pending_read_domains |= 
obj->base.read_domains;
-               obj->base.read_domains = obj->base.pending_read_domains;
+               obj->base.write_domain = 0;
+               if (vma->exec_entry->flags & EXEC_OBJECT_WRITE)
+                       obj->base.read_domains = 0;
+               obj->base.read_domains |= I915_GEM_GPU_DOMAINS;
 
                i915_vma_move_to_active(vma, eb->request, 
vma->exec_entry->flags);
                eb_export_fence(obj, eb->request, vma->exec_entry->flags);
@@ -1704,7 +1689,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
        }
 
        /* Set the pending read domains for the batch buffer to COMMAND */
-       if (eb.batch->obj->base.pending_write_domain) {
+       if (eb.batch->exec_entry->flags & EXEC_OBJECT_WRITE) {
                DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
                ret = -EINVAL;
                goto err;
@@ -1742,10 +1727,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
                }
        }
 
-       eb.batch->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
-       if (args->batch_len == 0)
-               args->batch_len = eb.batch->size - eb.batch_start_offset;
-
        /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
         * batch" bit. Hence we need to pin secure batches into the global gtt.
         * hsw should have this fixed, but bdw mucks it up again. */
@@ -1772,6 +1753,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
                eb.batch = vma;
        }
 
+       if (args->batch_len == 0)
+               args->batch_len = eb.batch->size - eb.batch_start_offset;
+
        /* Allocate a request for this batch buffer nice and early. */
        eb.request = i915_gem_request_alloc(eb.engine, eb.ctx);
        if (IS_ERR(eb.request)) {
diff --git a/drivers/gpu/drm/i915/intel_display.c 
b/drivers/gpu/drm/i915/intel_display.c
index 54e01631c1a9..4c6cb64ec220 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -12137,7 +12137,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
                        goto cleanup_unpin;
                }
 
-               ret = i915_gem_object_sync(obj, request);
+               ret = i915_gem_object_sync(obj, request, false);
                if (ret)
                        goto cleanup_request;
 
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to