Remove the old hashtable approach and switch over to the inline write tracking with brw-batch.
Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> --- src/mesa/drivers/dri/i965/brw_batch.c | 70 ++++++++++++++++++++++++++- src/mesa/drivers/dri/i965/brw_batch.h | 10 +--- src/mesa/drivers/dri/i965/brw_context.c | 24 +++++---- src/mesa/drivers/dri/i965/brw_context.h | 17 ++++++- src/mesa/drivers/dri/i965/brw_misc_state.c | 4 +- src/mesa/drivers/dri/i965/brw_pipe_control.c | 2 +- src/mesa/drivers/dri/i965/gen8_depth_state.c | 2 +- src/mesa/drivers/dri/i965/intel_blit.c | 3 +- src/mesa/drivers/dri/i965/intel_fbo.c | 38 --------------- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 2 +- 10 files changed, 108 insertions(+), 64 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_batch.c b/src/mesa/drivers/dri/i965/brw_batch.c index b257d000f8..515a81bf89 100644 --- a/src/mesa/drivers/dri/i965/brw_batch.c +++ b/src/mesa/drivers/dri/i965/brw_batch.c @@ -990,6 +990,8 @@ uint64_t __brw_batch_reloc(struct brw_batch *batch, batch->needs_pipecontrol_ggtt_wa) target_bo->exec->flags |= EXEC_OBJECT_NEEDS_GTT; } + + batch->flags |= BATCH_DIRTY; } return __brw_reloc_address(target_bo, target_offset); @@ -1056,6 +1058,72 @@ static uint32_t __brw_batch_emit_seqno(struct brw_batch *batch, } /* + * Mark a bo as being written to by this batch. + * + * We frequently dirty a buffer and then emit a global cache flush + * cleaning all the dirty buffers within a batch. Afterwards, we may + * then write to the same buffer, but may not re-emit a relocation and + * so we need to notify that the buffer is now dirty again. Normally + * we can rely the relocation marking the write buffers as dirty. + * + * All caches are flushed by the kernel between batches, so at the end + * of each batch we can mark all buffers as clean again. (Before we can + * access the buffer, either by the GPU in the next batch or by the CPU + * following a set-domain call, that access will be after the flush has + * finished.) + */ +void brw_bo_mark_dirty(struct brw_batch *batch, struct brw_bo *bo) +{ + if (unlikely(bo->batch != batch)) { + bo = __brw_batch_lookup_handle(batch, bo->handle); + assert(bo); + } + assert(bo->batch == batch); + assert(bo != bo->batch->bo); + + /* We should only be called on objects already in the batch for writing */ + if (bo->exec == NULL) + return; + + assert(brw_fence_get_request(&bo->read[batch->ring]) == batch->next_request); + assert(brw_fence_get_request(&bo->write) == batch->next_request); + + if (bo->dirty) + return; + + list_move(&bo->write.link, &batch->next_request->fences); + bo->dirty = true; + batch->flags |= BATCH_DIRTY; +} + +/* + * At the end of each batch and when explicitly flushing caches within + * a batch, we can mark all the buffers within that batch as now clean. + */ +void brw_batch_clear_dirty(struct brw_batch *batch) +{ + struct list_head * const list = &batch->next_request->fences; + + if (!(batch->flags & BATCH_DIRTY)) + return; + + list_for_each_entry(struct brw_fence, fence, list, link) { + struct brw_bo *bo; + + if (brw_fence_get_signal(fence) != WRITE_SIGNAL) + break; + + bo = container_of(fence, bo, write); + if (!bo->dirty) + break; + + bo->dirty = false; + } + + batch->flags &= ~BATCH_DIRTY; +} + +/* * Close the batch by writing all the tail commands (to store register * values between batches, disable profiling, etc). And then to end it all * we set MI_BATCH_BUFFER_END. @@ -1397,8 +1465,6 @@ skip: __brw_batch_throttle(batch, rq); __brw_batch_retire(batch); - brw_batch_clear_dirty(batch); - return __brw_batch_next(batch); } diff --git a/src/mesa/drivers/dri/i965/brw_batch.h b/src/mesa/drivers/dri/i965/brw_batch.h index 264868f253..074a13f550 100644 --- a/src/mesa/drivers/dri/i965/brw_batch.h +++ b/src/mesa/drivers/dri/i965/brw_batch.h @@ -107,7 +107,8 @@ typedef struct brw_batch { uint32_t *tail; uint32_t flags; -#define BATCH_HAS_STATE_BASE (1 << 31) +#define BATCH_DIRTY (1 << 31) +#define BATCH_HAS_STATE_BASE (1 << 30) uint32_t base_flags; enum brw_gpu_ring ring; @@ -185,13 +186,6 @@ typedef struct brw_batch { struct list_head borrowed[1<<BORROWED_BITS]; struct brw_bo *freed_bo; - - /** - * Set of brw_bo* that have been rendered to within this batchbuffer - * and would need flushing before being used from another cache domain that - * isn't coherent with it (i.e. the sampler). - */ - struct set *render_cache; } brw_batch; int brw_batch_init(struct brw_batch *batch, diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 15e467b00a..488d76be8e 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -228,6 +228,7 @@ intel_update_state(struct gl_context * ctx, GLuint new_state) struct brw_context *brw = brw_context(ctx); struct intel_texture_object *tex_obj; struct intel_renderbuffer *depth_irb; + bool flush = false; if (ctx->swrast_context) _swrast_InvalidateState(ctx, new_state); @@ -263,8 +264,7 @@ intel_update_state(struct gl_context * ctx, GLuint new_state) const int flags = intel_texture_view_requires_resolve(brw, tex_obj) ? 0 : INTEL_MIPTREE_IGNORE_CCS_E; intel_miptree_all_slices_resolve_color(brw, tex_obj->mt, flags); - if (brw_check_dirty(brw, tex_obj->mt->bo)) - brw_mi_flush(brw, RENDER_RING); + flush |= brw_check_dirty(tex_obj->mt->bo); if (tex_obj->base.StencilSampling || tex_obj->mt->format == MESA_FORMAT_S_UINT8) { @@ -283,8 +283,14 @@ intel_update_state(struct gl_context * ctx, GLuint new_state) struct gl_image_unit *u = &ctx->ImageUnits[shader->Program->sh.ImageUnits[j]]; tex_obj = intel_texture_object(u->TexObj); + if (!tex_obj) + continue; - if (tex_obj && tex_obj->mt) { + if (tex_obj->base.Target == GL_TEXTURE_BUFFER) { + struct intel_buffer_object *intel_obj = + intel_buffer_object(tex_obj->base.BufferObject); + flush |= brw_check_dirty(intel_obj->buffer); + } else if (tex_obj->mt) { /* Access to images is implemented using indirect messages * against data port. Normal render target write understands * lossless compression but unfortunately the typed/untyped @@ -300,8 +306,7 @@ intel_update_state(struct gl_context * ctx, GLuint new_state) "off lossless compression"); } - if (brw_check_dirty(brw, tex_obj->mt->bo)) - brw_mi_flush(brw, RENDER_RING); + flush |= brw_check_dirty(tex_obj->mt->bo); } } } @@ -321,8 +326,7 @@ intel_update_state(struct gl_context * ctx, GLuint new_state) intel_miptree_resolve_color( brw, irb->mt, irb->mt_level, irb->mt_layer, irb->layer_count, INTEL_MIPTREE_IGNORE_CCS_E)) - if (brw_check_dirty(brw, irb->mt->bo)) - brw_emit_mi_flush(brw); + flush |= brw_check_dirty(irb->mt->bo); } } @@ -353,11 +357,13 @@ intel_update_state(struct gl_context * ctx, GLuint new_state) */ assert(!intel_miptree_is_lossless_compressed(brw, mt)); intel_miptree_all_slices_resolve_color(brw, mt, 0); - if (brw_check_dirty(brw, mt->bo)) - brw_mi_flush(brw, RENDER_RING); + flush |= brw_check_dirty(mt->bo); } } + if (flush) + brw_mi_flush(brw, RENDER_RING); + _mesa_lock_context_textures(ctx); if (new_state & _NEW_BUFFERS) { diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 5e2df95508..cd31b730f5 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1680,7 +1680,22 @@ void gen7_emit_cs_stall_flush(struct brw_context *brw); void brw_mi_flush(struct brw_context *brw, enum brw_gpu_ring ring); void brw_pipe_control_flush(struct brw_context *brw, unsigned flags); -bool brw_check_dirty(struct brw_context *ctx, brw_bo *bo); +/** + * Emits an appropriate flush for a BO if it has been rendered to within the + * same batchbuffer as a read that's about to be emitted. + * + * The GPU has separate, incoherent caches for the render cache and the + * sampler cache, along with other caches. Usually data in the different + * caches don't interact (e.g. we don't render to our driver-generated + * immediate constant data), but for render-to-texture in FBOs we definitely + * do. When a batchbuffer is flushed, the kernel will ensure that everything + * necessary is flushed before another use of that BO, but for reuse from + * different caches within a batchbuffer, it's all our responsibility. + */ +static inline bool brw_check_dirty(brw_bo *bo) +{ + return bo->dirty; +} /* brw_queryformat.c */ void brw_query_internal_format(struct gl_context *ctx, GLenum target, diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index f4ea7449f6..9197057e49 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -551,9 +551,9 @@ brw_emit_depthbuffer(struct brw_context *brw) height = stencil_irb->Base.Base.Height; } - if (depth_mt && brw_check_dirty(brw, depth_mt->bo)) + if (depth_mt && brw_check_dirty(depth_mt->bo)) brw_emit_mi_flush(brw); - if (stencil_mt && brw_check_dirty(brw, stencil_mt->bo)) + if (stencil_mt && brw_check_dirty(stencil_mt->bo)) brw_emit_mi_flush(brw); brw->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset, diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c index aab3bf141a..40102f9aa6 100644 --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c @@ -363,7 +363,7 @@ brw_emit_post_sync_nonzero_flush(struct brw_context *brw) void brw_emit_mi_flush(struct brw_context *brw) { - if (brw_batch_count(&brw->batch) == 0) + if (!(brw->batch.flags & BATCH_DIRTY)) return; if (brw->batch.ring == BLT_RING) { diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c index 77424f593a..30f0f11432 100644 --- a/src/mesa/drivers/dri/i965/gen8_depth_state.c +++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c @@ -513,7 +513,7 @@ gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt, ADVANCE_BATCH(); /* Mark this buffer as needing a TC flush, as we've rendered to it. */ - brw_bo_mark_dirty(&brw->batch, mt->bo); + assert(mt->bo->dirty); brw_batch_end(&brw->batch); brw_batch_maybe_flush(&brw->batch); diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index 9e961b03a2..825643cf0c 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -645,7 +645,8 @@ intelEmitCopyBlit(struct brw_context *brw, * * FIXME: Figure out a way to avoid flushing when not required. */ - brw_mi_flush(brw, BLT_RING); + if (brw_check_dirty(dst_buffer)) + brw_mi_flush(brw, BLT_RING); assert(cpp <= 16); BR13 = br13_for_cpp(cpp); diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index 8f8f7e5ad5..47f9e18a35 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -35,7 +35,6 @@ #include "main/image.h" #include "main/condrender.h" #include "util/hash_table.h" -#include "util/set.h" #include "swrast/swrast.h" #include "drivers/common/meta.h" @@ -1049,40 +1048,6 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw, intel_miptree_release(&new_mt); } -void -brw_batch_clear_dirty(brw_batch *batch) -{ - struct set_entry *entry; - - set_foreach(batch->render_cache, entry) { - _mesa_set_remove(batch->render_cache, entry); - } -} - -void -brw_bo_mark_dirty(brw_batch *batch, brw_bo *bo) -{ - _mesa_set_add(batch->render_cache, bo); -} - -/** - * Emits an appropriate flush for a BO if it has been rendered to within the - * same batchbuffer as a read that's about to be emitted. - * - * The GPU has separate, incoherent caches for the render cache and the - * sampler cache, along with other caches. Usually data in the different - * caches don't interact (e.g. we don't render to our driver-generated - * immediate constant data), but for render-to-texture in FBOs we definitely - * do. When a batchbuffer is flushed, the kernel will ensure that everything - * necessary is flushed before another use of that BO, but for reuse from - * different caches within a batchbuffer, it's all our responsibility. - */ -bool -brw_check_dirty(struct brw_context *brw, brw_bo *bo) -{ - return _mesa_set_search(brw->batch.render_cache, bo); -} - /** * Do one-time context initializations related to GL_EXT_framebuffer_object. * Hook in device driver functions. @@ -1102,7 +1067,4 @@ intel_fbo_init(struct brw_context *brw) dd->BlitFramebuffer = gen4_blit_framebuffer; dd->EGLImageTargetRenderbufferStorage = intel_image_target_renderbuffer_storage; - - brw->batch.render_cache = _mesa_set_create(brw, _mesa_hash_pointer, - _mesa_key_pointer_equal); } diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 76e8923b1b..3befcc271e 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -2492,7 +2492,7 @@ intel_update_r8stencil(struct brw_context *brw, } } - if (brw_check_dirty(brw, dst->bo)) + if (brw_check_dirty(dst->bo)) brw_emit_mi_flush(brw); src->r8stencil_needs_update = false; } -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev