[Mesa-dev] [PATCH 51/70] i965: Enable brw-batch dirty tracking

Chris Wilson Fri, 07 Aug 2015 13:18:07 -0700

Remove the old hashtable approach and switch over to the inline write
tracking with brw-batch.


Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 src/mesa/drivers/dri/i965/brw_batch.c        | 71 +++++++++++++++++++++++++++-
 src/mesa/drivers/dri/i965/brw_batch.h        |  9 +---
 src/mesa/drivers/dri/i965/brw_context.c      |  2 +-
 src/mesa/drivers/dri/i965/brw_context.h      | 17 ++++++-
 src/mesa/drivers/dri/i965/brw_misc_state.c   |  4 +-
 src/mesa/drivers/dri/i965/gen8_depth_state.c |  2 +-
 src/mesa/drivers/dri/i965/intel_blit.c       |  3 +-
 src/mesa/drivers/dri/i965/intel_fbo.c        | 38 ---------------
 8 files changed, 93 insertions(+), 53 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_batch.c 
b/src/mesa/drivers/dri/i965/brw_batch.c
index d1f5828..100466f 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.c
+++ b/src/mesa/drivers/dri/i965/brw_batch.c
@@ -415,6 +415,7 @@ static void __brw_batch_clear(struct brw_batch *batch)
    batch->state = BATCH_SIZE / 4;
    batch->aperture = 0;
    batch->batch_flags = batch->batch_base_flags;
+   batch->dirty = false;
 }
 
 /*
@@ -840,12 +841,80 @@ uint64_t __brw_batch_reloc(struct brw_batch *batch,
              batch->needs_pipecontrol_ggtt_wa)
             target_bo->exec->flags |= EXEC_OBJECT_NEEDS_GTT;
       }
+      batch->dirty = true;
    }
 
    return target_bo->offset + target_offset;
 }
 
 /*
+ * Mark a bo as being written to by this batch.
+ *
+ * We frequently dirty a buffer and then emit a global cache flush
+ * cleaning all the dirty buffers within a batch. Afterwards, we may
+ * then write to the same buffer, but may not re-emit a relocation and
+ * so we need to notify that the buffer is now dirty again. Normally
+ * we can rely the relocation marking the write buffers as dirty.
+ *
+ * All caches are flushed by the kernel between batches, so at the end
+ * of each batch we can mark all buffers as clean again. (Before we can
+ * access the buffer, either by the GPU in the next batch or by the CPU
+ * following a set-domain call, that access will be after the flush has
+ * finished.)
+ */
+void brw_bo_mark_dirty(struct brw_batch *batch, struct brw_bo *bo)
+{
+   if (unlikely(bo->batch != batch)) {
+      bo = __brw_batch_lookup_handle(batch, bo->handle);
+      assert(bo);
+   }
+   assert(bo->batch == batch);
+   assert(bo != bo->batch->bo);
+
+   /* We should only be called on objects already in the batch for writing */
+   if (bo->exec == NULL)
+      return;
+
+   assert(bo->read.rq == batch->next_request);
+   assert(bo->write.rq == batch->next_request);
+   assert(bo->domain == DOMAIN_GPU);
+
+   if (bo->dirty)
+      return;
+
+   list_move(&bo->write.link, &batch->next_request->fences);
+   bo->dirty = true;
+   batch->dirty = true;
+}
+
+/*
+ * At the end of each batch and when explicitly flushing caches within
+ * a batch, we can mark all the buffers within that batch as now clean.
+ */
+void brw_batch_clear_dirty(struct brw_batch *batch)
+{
+   struct list_head * const list = &batch->next_request->fences;
+
+   if (!batch->dirty)
+      return;
+
+   list_for_each_entry(struct brw_fence, fence, list, link) {
+      struct brw_bo *bo;
+
+      if (fence->signal != (void*)WRITE_SIGNAL)
+         break;
+
+      bo = container_of(fence, bo, write);
+      if (!bo->dirty)
+         break;
+
+      bo->dirty = false;
+   }
+
+   batch->dirty = false;
+}
+
+/*
  * Close the batch by writing all the tail commands (to store register
  * values between batches, disable profiling, etc). And then to end it all
  * we set MI_BATCH_BUFFER_END.
@@ -1111,8 +1180,6 @@ skip:
    __brw_batch_throttle(batch, rq);
    __brw_batch_retire(batch);
 
-   brw_batch_clear_dirty(batch);
-
    return __brw_batch_reset(batch);
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_batch.h 
b/src/mesa/drivers/dri/i965/brw_batch.h
index da88bc2..b3c4252 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.h
+++ b/src/mesa/drivers/dri/i965/brw_batch.h
@@ -125,6 +125,8 @@ typedef struct brw_batch {
    uint64_t max_aperture;
    uint64_t rss, peak_rss, vmsize;
 
+   bool dirty : 1;
+
    bool has_softpin : 1;
    bool has_llc : 1;
    bool has_mmap_wc : 1;
@@ -180,13 +182,6 @@ typedef struct brw_batch {
    struct list_head borrowed[1<<BORROWED_BITS];
 
    struct brw_bo *freed_bo;
-
-   /**
-    * Set of brw_bo* that have been rendered to within this batchbuffer
-    * and would need flushing before being used from another cache domain that
-    * isn't coherent with it (i.e. the sampler).
-    */
-   struct set *render_cache;
 } brw_batch;
 
 int brw_batch_init(struct brw_batch *batch,
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 117cce4..a51fd81 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -186,7 +186,7 @@ intel_update_state(struct gl_context * ctx, GLuint 
new_state)
         continue;
       intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
       intel_miptree_resolve_color(brw, tex_obj->mt);
-      if (brw_check_dirty(brw, tex_obj->mt->bo))
+      if (brw_check_dirty(tex_obj->mt->bo))
          brw_mi_flush(brw, RENDER_RING);
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 001e70f..547b655 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1970,7 +1970,22 @@ void gen7_emit_cs_stall_flush(struct brw_context *brw);
 
 void brw_mi_flush(struct brw_context *brw, enum brw_gpu_ring ring);
 
-bool brw_check_dirty(struct brw_context *ctx, brw_bo *bo);
+/**
+ * Emits an appropriate flush for a BO if it has been rendered to within the
+ * same batchbuffer as a read that's about to be emitted.
+ *
+ * The GPU has separate, incoherent caches for the render cache and the
+ * sampler cache, along with other caches.  Usually data in the different
+ * caches don't interact (e.g. we don't render to our driver-generated
+ * immediate constant data), but for render-to-texture in FBOs we definitely
+ * do.  When a batchbuffer is flushed, the kernel will ensure that everything
+ * necessary is flushed before another use of that BO, but for reuse from
+ * different caches within a batchbuffer, it's all our responsibility.
+ */
+static inline bool brw_check_dirty(brw_bo *bo)
+{
+   return bo->dirty;
+}
 
 #ifdef __cplusplus
 }
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c 
b/src/mesa/drivers/dri/i965/brw_misc_state.c
index b1e8503..38c3003 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -546,9 +546,9 @@ brw_emit_depthbuffer(struct brw_context *brw)
       height = stencil_irb->Base.Base.Height;
    }
 
-   if (depth_mt && brw_check_dirty(brw, depth_mt->bo))
+   if (depth_mt && brw_check_dirty(depth_mt->bo))
       brw_emit_mi_flush(brw);
-   if (stencil_mt && brw_check_dirty(brw, stencil_mt->bo))
+   if (stencil_mt && brw_check_dirty(stencil_mt->bo))
       brw_emit_mi_flush(brw);
 
    brw->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset,
diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c 
b/src/mesa/drivers/dri/i965/gen8_depth_state.c
index f8ffbeb..85e15c5 100644
--- a/src/mesa/drivers/dri/i965/gen8_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c
@@ -510,7 +510,7 @@ gen8_hiz_exec(struct brw_context *brw, struct 
intel_mipmap_tree *mt,
    ADVANCE_BATCH();
 
    /* Mark this buffer as needing a TC flush, as we've rendered to it. */
-   brw_bo_mark_dirty(&brw->batch, mt->bo);
+   assert(mt->bo->dirty);
 
    if (brw_batch_end(&brw->batch)) {
       struct gl_context *ctx = &brw->ctx;
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c 
b/src/mesa/drivers/dri/i965/intel_blit.c
index a35c8df..2257845 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -560,7 +560,8 @@ intelEmitCopyBlit(struct brw_context *brw,
        *
        * FIXME: Figure out a way to avoid flushing when not required.
        */
-      brw_mi_flush(brw, BLT_RING);
+      if (brw_check_dirty(dst_buffer))
+         brw_mi_flush(brw, BLT_RING);
 
       assert(cpp <= 16);
       BR13 = br13_for_cpp(cpp);
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c 
b/src/mesa/drivers/dri/i965/intel_fbo.c
index 8344791..e2767ac 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -38,7 +38,6 @@
 #include "main/image.h"
 #include "main/condrender.h"
 #include "util/hash_table.h"
-#include "util/set.h"
 
 #include "swrast/swrast.h"
 #include "drivers/common/meta.h"
@@ -1048,40 +1047,6 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw,
    intel_miptree_release(&new_mt);
 }
 
-void
-brw_batch_clear_dirty(brw_batch *batch)
-{
-   struct set_entry *entry;
-
-   set_foreach(batch->render_cache, entry) {
-      _mesa_set_remove(batch->render_cache, entry);
-   }
-}
-
-void
-brw_bo_mark_dirty(brw_batch *batch, brw_bo *bo)
-{
-   _mesa_set_add(batch->render_cache, bo);
-}
-
-/**
- * Emits an appropriate flush for a BO if it has been rendered to within the
- * same batchbuffer as a read that's about to be emitted.
- *
- * The GPU has separate, incoherent caches for the render cache and the
- * sampler cache, along with other caches.  Usually data in the different
- * caches don't interact (e.g. we don't render to our driver-generated
- * immediate constant data), but for render-to-texture in FBOs we definitely
- * do.  When a batchbuffer is flushed, the kernel will ensure that everything
- * necessary is flushed before another use of that BO, but for reuse from
- * different caches within a batchbuffer, it's all our responsibility.
- */
-bool
-brw_check_dirty(struct brw_context *brw, brw_bo *bo)
-{
-   return _mesa_set_search(brw->batch.render_cache, bo);
-}
-
 /**
  * Do one-time context initializations related to GL_EXT_framebuffer_object.
  * Hook in device driver functions.
@@ -1102,7 +1067,4 @@ intel_fbo_init(struct brw_context *brw)
       dd->BlitFramebuffer = gen4_blit_framebuffer;
    dd->EGLImageTargetRenderbufferStorage =
       intel_image_target_renderbuffer_storage;
-
-   brw->batch.render_cache = _mesa_set_create(brw, _mesa_hash_pointer,
-                                              _mesa_key_pointer_equal);
 }
-- 
2.5.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 51/70] i965: Enable brw-batch dirty tracking

Reply via email to