Remove the old hashtable approach and switch over to the inline write tracking with brw-batch.
Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> --- src/mesa/drivers/dri/i965/brw_batch.c | 71 +++++++++++++++++++++++++++- src/mesa/drivers/dri/i965/brw_batch.h | 9 +--- src/mesa/drivers/dri/i965/brw_context.c | 2 +- src/mesa/drivers/dri/i965/brw_context.h | 17 ++++++- src/mesa/drivers/dri/i965/brw_misc_state.c | 4 +- src/mesa/drivers/dri/i965/gen8_depth_state.c | 2 +- src/mesa/drivers/dri/i965/intel_blit.c | 3 +- src/mesa/drivers/dri/i965/intel_fbo.c | 38 --------------- 8 files changed, 93 insertions(+), 53 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_batch.c b/src/mesa/drivers/dri/i965/brw_batch.c index d1f5828..100466f 100644 --- a/src/mesa/drivers/dri/i965/brw_batch.c +++ b/src/mesa/drivers/dri/i965/brw_batch.c @@ -415,6 +415,7 @@ static void __brw_batch_clear(struct brw_batch *batch) batch->state = BATCH_SIZE / 4; batch->aperture = 0; batch->batch_flags = batch->batch_base_flags; + batch->dirty = false; } /* @@ -840,12 +841,80 @@ uint64_t __brw_batch_reloc(struct brw_batch *batch, batch->needs_pipecontrol_ggtt_wa) target_bo->exec->flags |= EXEC_OBJECT_NEEDS_GTT; } + batch->dirty = true; } return target_bo->offset + target_offset; } /* + * Mark a bo as being written to by this batch. + * + * We frequently dirty a buffer and then emit a global cache flush + * cleaning all the dirty buffers within a batch. Afterwards, we may + * then write to the same buffer, but may not re-emit a relocation and + * so we need to notify that the buffer is now dirty again. Normally + * we can rely the relocation marking the write buffers as dirty. + * + * All caches are flushed by the kernel between batches, so at the end + * of each batch we can mark all buffers as clean again. (Before we can + * access the buffer, either by the GPU in the next batch or by the CPU + * following a set-domain call, that access will be after the flush has + * finished.) + */ +void brw_bo_mark_dirty(struct brw_batch *batch, struct brw_bo *bo) +{ + if (unlikely(bo->batch != batch)) { + bo = __brw_batch_lookup_handle(batch, bo->handle); + assert(bo); + } + assert(bo->batch == batch); + assert(bo != bo->batch->bo); + + /* We should only be called on objects already in the batch for writing */ + if (bo->exec == NULL) + return; + + assert(bo->read.rq == batch->next_request); + assert(bo->write.rq == batch->next_request); + assert(bo->domain == DOMAIN_GPU); + + if (bo->dirty) + return; + + list_move(&bo->write.link, &batch->next_request->fences); + bo->dirty = true; + batch->dirty = true; +} + +/* + * At the end of each batch and when explicitly flushing caches within + * a batch, we can mark all the buffers within that batch as now clean. + */ +void brw_batch_clear_dirty(struct brw_batch *batch) +{ + struct list_head * const list = &batch->next_request->fences; + + if (!batch->dirty) + return; + + list_for_each_entry(struct brw_fence, fence, list, link) { + struct brw_bo *bo; + + if (fence->signal != (void*)WRITE_SIGNAL) + break; + + bo = container_of(fence, bo, write); + if (!bo->dirty) + break; + + bo->dirty = false; + } + + batch->dirty = false; +} + +/* * Close the batch by writing all the tail commands (to store register * values between batches, disable profiling, etc). And then to end it all * we set MI_BATCH_BUFFER_END. @@ -1111,8 +1180,6 @@ skip: __brw_batch_throttle(batch, rq); __brw_batch_retire(batch); - brw_batch_clear_dirty(batch); - return __brw_batch_reset(batch); } diff --git a/src/mesa/drivers/dri/i965/brw_batch.h b/src/mesa/drivers/dri/i965/brw_batch.h index da88bc2..b3c4252 100644 --- a/src/mesa/drivers/dri/i965/brw_batch.h +++ b/src/mesa/drivers/dri/i965/brw_batch.h @@ -125,6 +125,8 @@ typedef struct brw_batch { uint64_t max_aperture; uint64_t rss, peak_rss, vmsize; + bool dirty : 1; + bool has_softpin : 1; bool has_llc : 1; bool has_mmap_wc : 1; @@ -180,13 +182,6 @@ typedef struct brw_batch { struct list_head borrowed[1<<BORROWED_BITS]; struct brw_bo *freed_bo; - - /** - * Set of brw_bo* that have been rendered to within this batchbuffer - * and would need flushing before being used from another cache domain that - * isn't coherent with it (i.e. the sampler). - */ - struct set *render_cache; } brw_batch; int brw_batch_init(struct brw_batch *batch, diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 117cce4..a51fd81 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -186,7 +186,7 @@ intel_update_state(struct gl_context * ctx, GLuint new_state) continue; intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt); intel_miptree_resolve_color(brw, tex_obj->mt); - if (brw_check_dirty(brw, tex_obj->mt->bo)) + if (brw_check_dirty(tex_obj->mt->bo)) brw_mi_flush(brw, RENDER_RING); } diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 001e70f..547b655 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1970,7 +1970,22 @@ void gen7_emit_cs_stall_flush(struct brw_context *brw); void brw_mi_flush(struct brw_context *brw, enum brw_gpu_ring ring); -bool brw_check_dirty(struct brw_context *ctx, brw_bo *bo); +/** + * Emits an appropriate flush for a BO if it has been rendered to within the + * same batchbuffer as a read that's about to be emitted. + * + * The GPU has separate, incoherent caches for the render cache and the + * sampler cache, along with other caches. Usually data in the different + * caches don't interact (e.g. we don't render to our driver-generated + * immediate constant data), but for render-to-texture in FBOs we definitely + * do. When a batchbuffer is flushed, the kernel will ensure that everything + * necessary is flushed before another use of that BO, but for reuse from + * different caches within a batchbuffer, it's all our responsibility. + */ +static inline bool brw_check_dirty(brw_bo *bo) +{ + return bo->dirty; +} #ifdef __cplusplus } diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index b1e8503..38c3003 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -546,9 +546,9 @@ brw_emit_depthbuffer(struct brw_context *brw) height = stencil_irb->Base.Base.Height; } - if (depth_mt && brw_check_dirty(brw, depth_mt->bo)) + if (depth_mt && brw_check_dirty(depth_mt->bo)) brw_emit_mi_flush(brw); - if (stencil_mt && brw_check_dirty(brw, stencil_mt->bo)) + if (stencil_mt && brw_check_dirty(stencil_mt->bo)) brw_emit_mi_flush(brw); brw->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset, diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c index f8ffbeb..85e15c5 100644 --- a/src/mesa/drivers/dri/i965/gen8_depth_state.c +++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c @@ -510,7 +510,7 @@ gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt, ADVANCE_BATCH(); /* Mark this buffer as needing a TC flush, as we've rendered to it. */ - brw_bo_mark_dirty(&brw->batch, mt->bo); + assert(mt->bo->dirty); if (brw_batch_end(&brw->batch)) { struct gl_context *ctx = &brw->ctx; diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index a35c8df..2257845 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -560,7 +560,8 @@ intelEmitCopyBlit(struct brw_context *brw, * * FIXME: Figure out a way to avoid flushing when not required. */ - brw_mi_flush(brw, BLT_RING); + if (brw_check_dirty(dst_buffer)) + brw_mi_flush(brw, BLT_RING); assert(cpp <= 16); BR13 = br13_for_cpp(cpp); diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index 8344791..e2767ac 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -38,7 +38,6 @@ #include "main/image.h" #include "main/condrender.h" #include "util/hash_table.h" -#include "util/set.h" #include "swrast/swrast.h" #include "drivers/common/meta.h" @@ -1048,40 +1047,6 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw, intel_miptree_release(&new_mt); } -void -brw_batch_clear_dirty(brw_batch *batch) -{ - struct set_entry *entry; - - set_foreach(batch->render_cache, entry) { - _mesa_set_remove(batch->render_cache, entry); - } -} - -void -brw_bo_mark_dirty(brw_batch *batch, brw_bo *bo) -{ - _mesa_set_add(batch->render_cache, bo); -} - -/** - * Emits an appropriate flush for a BO if it has been rendered to within the - * same batchbuffer as a read that's about to be emitted. - * - * The GPU has separate, incoherent caches for the render cache and the - * sampler cache, along with other caches. Usually data in the different - * caches don't interact (e.g. we don't render to our driver-generated - * immediate constant data), but for render-to-texture in FBOs we definitely - * do. When a batchbuffer is flushed, the kernel will ensure that everything - * necessary is flushed before another use of that BO, but for reuse from - * different caches within a batchbuffer, it's all our responsibility. - */ -bool -brw_check_dirty(struct brw_context *brw, brw_bo *bo) -{ - return _mesa_set_search(brw->batch.render_cache, bo); -} - /** * Do one-time context initializations related to GL_EXT_framebuffer_object. * Hook in device driver functions. @@ -1102,7 +1067,4 @@ intel_fbo_init(struct brw_context *brw) dd->BlitFramebuffer = gen4_blit_framebuffer; dd->EGLImageTargetRenderbufferStorage = intel_image_target_renderbuffer_storage; - - brw->batch.render_cache = _mesa_set_create(brw, _mesa_hash_pointer, - _mesa_key_pointer_equal); } -- 2.5.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev