Refactor the aperture test, roll back and retry logic to a common idiom. Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> --- src/mesa/drivers/dri/i965/brw_batch.h | 18 ++++++++ src/mesa/drivers/dri/i965/brw_compute.c | 36 +++------------- src/mesa/drivers/dri/i965/brw_draw.c | 33 ++++----------- src/mesa/drivers/dri/i965/genX_blorp_exec.c | 49 ++++------------------ src/mesa/drivers/dri/i965/intel_batchbuffer.c | 59 +++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/intel_blit.c | 49 +++++++--------------- 6 files changed, 112 insertions(+), 132 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_batch.h b/src/mesa/drivers/dri/i965/brw_batch.h index 3899f18f83..95cdbca4fd 100644 --- a/src/mesa/drivers/dri/i965/brw_batch.h +++ b/src/mesa/drivers/dri/i965/brw_batch.h @@ -31,6 +31,8 @@ extern "C" { #endif +#include <setjmp.h> + #include <intel_bufmgr.h> #include "util/list.h" @@ -64,6 +66,9 @@ typedef struct brw_batch { bool state_base_address_emitted; int gen; + jmp_buf jmpbuf; + bool repeat; + unsigned begin_count; bool no_batch_wrap; struct { @@ -275,6 +280,19 @@ intel_batchbuffer_space(struct brw_batch *batch) - USED_BATCH(batch)*4; } +int __brw_batch_begin(struct brw_batch *batch, + const int sz_bytes, + enum brw_gpu_ring ring); +#define brw_batch_begin(batch, sz, ring) ({ \ + int __ret = 0; \ + if ((batch)->begin_count++ == 0) { \ + __ret = __brw_batch_begin((batch), (sz), (ring)); \ + if (__ret == 0) \ + __ret = setjmp((batch)->jmpbuf); \ + } \ + __ret; }) +void brw_batch_end(struct brw_batch *batch); + #ifdef __cplusplus } #endif diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c index 4e76817661..da10c6fba6 100644 --- a/src/mesa/drivers/dri/i965/brw_compute.c +++ b/src/mesa/drivers/dri/i965/brw_compute.c @@ -178,7 +178,7 @@ brw_dispatch_compute_common(struct gl_context *ctx) { struct brw_context *brw = brw_context(ctx); int estimated_buffer_space_needed; - bool fail_next = false; + int ret; if (!_mesa_check_conditional_render(ctx)) return; @@ -196,40 +196,16 @@ brw_dispatch_compute_common(struct gl_context *ctx) estimated_buffer_space_needed += 1024; /* push constants */ estimated_buffer_space_needed += 512; /* misc. pad */ - /* Flush the batch if it's approaching full, so that we don't wrap while - * we've got validated state that needs to be in the same batch as the - * primitives. - */ - intel_batchbuffer_require_space(&brw->batch, estimated_buffer_space_needed, - RENDER_RING); - intel_batchbuffer_save_state(&brw->batch); + ret = brw_batch_begin(&brw->batch, estimated_buffer_space_needed, + RENDER_RING); + if (ret < 0) + return; - retry: - brw->batch.no_batch_wrap = true; brw_upload_compute_state(brw); brw_emit_gpgpu_walker(brw); - brw->batch.no_batch_wrap = false; - - if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { - if (!fail_next) { - intel_batchbuffer_reset_to_saved(&brw->batch); - brw_batch_flush(&brw->batch, NULL); - fail_next = true; - goto retry; - } else { - if (brw_batch_flush(&brw->batch, NULL) == -ENOSPC) { - static bool warned = false; - - if (!warned) { - fprintf(stderr, "i965: Single compute shader dispatch " - "exceeded available aperture space\n"); - warned = true; - } - } - } - } + brw_batch_end(&brw->batch); /* Now that we know we haven't run out of aperture space, we can safely * reset the dirty bits. diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index afbee5167b..e2007774d7 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -442,7 +442,6 @@ brw_try_draw_prims(struct gl_context *ctx, { struct brw_context *brw = brw_context(ctx); GLuint i; - bool fail_next = false; if (ctx->NewState) _mesa_update_state(ctx); @@ -495,6 +494,7 @@ brw_try_draw_prims(struct gl_context *ctx, for (i = 0; i < nr_prims; i++) { int estimated_max_prim_size; const int sampler_state_size = 16; + int ret; estimated_max_prim_size = 512; /* batchbuffer commands */ estimated_max_prim_size += BRW_MAX_TEX_UNIT * @@ -507,9 +507,9 @@ brw_try_draw_prims(struct gl_context *ctx, * we've got validated state that needs to be in the same batch as the * primitives. */ - intel_batchbuffer_require_space(&brw->batch, - estimated_max_prim_size, RENDER_RING); - intel_batchbuffer_save_state(&brw->batch); + ret = brw_batch_begin(&brw->batch, estimated_max_prim_size, RENDER_RING); + if (ret < 0) + break; if (brw->num_instances != prims[i].num_instances || brw->basevertex != prims[i].basevertex || @@ -581,35 +581,16 @@ brw_try_draw_prims(struct gl_context *ctx, else gen6_set_prim(brw, &prims[i]); -retry: - /* Note that before the loop, brw->ctx.NewDriverState was set to != 0, and * that the state updated in the loop outside of this block is that in - * *_set_prim or brw_batch_flush(), which only impacts - * brw->ctx.NewDriverState. + * *_set_prim, which only impacts brw->ctx.NewDriverState. */ - if (brw->ctx.NewDriverState) { - brw->batch.no_batch_wrap = true; + if (brw->ctx.NewDriverState) brw_upload_render_state(brw); - } brw_emit_prim(brw, &prims[i], brw->primitive, xfb_obj, stream); - brw->batch.no_batch_wrap = false; - - if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { - if (!fail_next) { - intel_batchbuffer_reset_to_saved(&brw->batch); - brw_batch_flush(&brw->batch, NULL); - fail_next = true; - goto retry; - } else { - int ret = brw_batch_flush(&brw->batch, NULL); - WARN_ONCE(ret == -ENOSPC, - "i965: Single primitive emit exceeded " - "available aperture space\n"); - } - } + brw_batch_end(&brw->batch); /* Now that we know we haven't run out of aperture space, we can safely * reset the dirty bits. diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c index 6b75a3b727..58a891f383 100644 --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c @@ -157,9 +157,10 @@ genX(blorp_exec)(struct blorp_batch *batch, { assert(batch->blorp->driver_ctx == batch->driver_batch); struct brw_context *brw = batch->driver_batch; - struct gl_context *ctx = &brw->ctx; const uint32_t estimated_max_batch_usage = GEN_GEN >= 8 ? 1800 : 1500; - bool check_aperture_failed_once = false; + + if (brw_batch_begin(&brw->batch, estimated_max_batch_usage, RENDER_RING) < 0) + return; /* Flush the sampler and render caches. We definitely need to flush the * sampler cache so that we get updated contents from the render cache for @@ -172,14 +173,6 @@ genX(blorp_exec)(struct blorp_batch *batch, brw_select_pipeline(brw, BRW_RENDER_PIPELINE); -retry: - intel_batchbuffer_require_space(&brw->batch, - estimated_max_batch_usage, RENDER_RING); - intel_batchbuffer_save_state(&brw->batch); - brw_bo *saved_bo = brw->batch.bo; - uint32_t saved_used = USED_BATCH(&brw->batch); - uint32_t saved_state_batch_offset = brw->batch.state_batch_offset; - #if GEN_GEN == 6 /* Emit workaround flushes when we switch from drawing to blorping. */ brw_emit_post_sync_nonzero_flush(brw); @@ -203,35 +196,12 @@ retry: blorp_exec(batch, params); - /* Make sure we didn't wrap the batch unintentionally, and make sure we - * reserved enough space that a wrap will never happen. - */ - assert(brw->batch.bo == saved_bo); - assert((USED_BATCH(brw->batch) - saved_used) * 4 + - (saved_state_batch_offset - brw->batch.state_batch_offset) < - estimated_max_batch_usage); - /* Shut up compiler warnings on release build */ - (void)saved_bo; - (void)saved_used; - (void)saved_state_batch_offset; - - /* Check if the blorp op we just did would make our batch likely to fail to - * map all the BOs into the GPU at batch exec time later. If so, flush the - * batch and try again with nothing else in the batch. + /* Flush the sampler cache so any texturing from the destination is + * coherent. */ - if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { - if (!check_aperture_failed_once) { - check_aperture_failed_once = true; - intel_batchbuffer_reset_to_saved(&brw->batch); - brw_batch_flush(&brw->batch, NULL); - goto retry; - } else { - int ret = brw_batch_flush(&brw->batch, NULL); - WARN_ONCE(ret == -ENOSPC, - "i965: blorp emit exceeded available aperture space\n"); - } - } + brw_emit_mi_flush(brw); + brw_batch_end(&brw->batch); brw_batch_maybe_flush(&brw->batch); /* We've smashed all state compared to what the normal 3D pipeline @@ -240,9 +210,4 @@ retry: brw->ctx.NewDriverState |= BRW_NEW_BLORP; brw->no_depth_or_stencil = false; brw->ib.type = -1; - - /* Flush the sampler cache so any texturing from the destination is - * coherent. - */ - brw_emit_mi_flush(brw); } diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index e0ba259e5e..4c5a640aff 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -348,7 +348,9 @@ brw_batch_flush(struct brw_batch *batch, struct perf_debug *info) brw->batch.reserved_space = 0; + brw->batch.begin_count++; brw_batch_finish_hook(&brw->batch); + brw->batch.begin_count--; /* Mark the end of the buffer. */ intel_batchbuffer_emit_dword(&brw->batch, MI_BATCH_BUFFER_END); @@ -431,3 +433,60 @@ intel_batchbuffer_data(struct brw_context *brw, memcpy(brw->batch.map_next, data, bytes); brw->batch.map_next += bytes >> 2; } + +int __brw_batch_begin(struct brw_batch *batch, + const int sz_bytes, + enum brw_gpu_ring ring) +{ + assert(batch->begin_count == 1); + assert(!batch->repeat); + + intel_batchbuffer_require_space(batch, sz_bytes, ring); + intel_batchbuffer_save_state(batch); + + assert(!batch->no_batch_wrap); + batch->no_batch_wrap = true; + + return 0; +} + +void brw_batch_end(struct brw_batch *batch) +{ + assert(batch->begin_count); + if (--batch->begin_count) + return; + + assert(batch->no_batch_wrap); + batch->no_batch_wrap = false; + + if (dri_bufmgr_check_aperture_space(&batch->bo, 1)) { + if (!batch->repeat) { + enum brw_gpu_ring ring = batch->ring; + + intel_batchbuffer_reset_to_saved(batch); + brw_batch_flush(batch, NULL); + + batch->begin_count++; + batch->no_batch_wrap = true; + + batch->ring = ring; + batch->repeat = true; + longjmp(batch->jmpbuf, 1); + } + + if (unlikely(brw_batch_flush(batch, NULL) == -ENOSPC)) { + static GLuint msg_id; + if (!msg_id) { + struct brw_context *brw = container_of(batch, brw, batch); + fprintf(stderr, "WARNING: Aperture space exceeded!\n"); + _mesa_gl_debug(&brw->ctx, &msg_id, + MESA_DEBUG_SOURCE_API, + MESA_DEBUG_TYPE_OTHER, + MESA_DEBUG_SEVERITY_HIGH, + "Aperture space exceeded!\n"); + } + } + } + + batch->repeat = false; +} diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index 8c832ac249..6208cc5e18 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -591,10 +591,9 @@ intelEmitCopyBlit(struct brw_context *brw, GLshort w, GLshort h, GLenum logic_op) { - GLuint CMD, BR13, pass = 0; + GLuint CMD, BR13; int dst_y2 = dst_y + h; int dst_x2 = dst_x + w; - brw_bo *aper_array[3]; bool dst_y_tiled = dst_tiling == I915_TILING_Y; bool src_y_tiled = src_tiling == I915_TILING_Y; bool use_fast_copy_blit = false; @@ -604,25 +603,8 @@ intelEmitCopyBlit(struct brw_context *brw, if ((dst_y_tiled || src_y_tiled) && brw->gen < 6) return false; - /* do space check before going any further */ - do { - aper_array[0] = brw->batch.bo; - aper_array[1] = dst_buffer; - aper_array[2] = src_buffer; - - if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) { - brw_batch_flush(&brw->batch, NULL); - pass++; - } else - break; - } while (pass < 2); - - if (pass >= 2) - return false; - - unsigned length = brw->gen >= 8 ? 10 : 8; + const unsigned length = brw->gen >= 8 ? 10 : 8; - intel_batchbuffer_require_space(&brw->batch, length * 4, BLT_RING); DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __func__, src_buffer, src_pitch, src_offset, src_x, src_y, @@ -732,6 +714,9 @@ intelEmitCopyBlit(struct brw_context *brw, assert(dst_x < dst_x2); assert(dst_y < dst_y2); + if (brw_batch_begin(&brw->batch, 100, BLT_RING) < 0) + return false; + BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, src_y_tiled); OUT_BATCH(CMD | (length - 2)); OUT_BATCH(BR13 | (uint16_t)dst_pitch); @@ -762,6 +747,7 @@ intelEmitCopyBlit(struct brw_context *brw, brw_emit_mi_flush(brw); + brw_batch_end(&brw->batch); return true; } @@ -799,10 +785,6 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw, dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords); unsigned xy_setup_blt_length = brw->gen >= 8 ? 10 : 8; - intel_batchbuffer_require_space(&brw->batch, - (xy_setup_blt_length * 4) + - (3 * 4) + dwords * 4, BLT_RING); - opcode = XY_SETUP_BLT_CMD; if (cpp == 4) opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; @@ -818,6 +800,9 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw, if (dst_tiling != I915_TILING_NONE) blit_cmd |= XY_DST_TILED; + if (brw_batch_begin(&brw->batch, 60 + 4*dwords, BLT_RING) < 0) + return false; + BEGIN_BATCH_BLT(xy_setup_blt_length + 3); OUT_BATCH(opcode | (xy_setup_blt_length - 2)); OUT_BATCH(br13); @@ -847,6 +832,7 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw, brw_emit_mi_flush(brw); + brw_batch_end(&brw->batch); return true; } @@ -921,7 +907,6 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw, { uint32_t BR13, CMD; int pitch, cpp; - brw_bo *aper_array[2]; pitch = mt->pitch; cpp = mt->cpp; @@ -939,16 +924,10 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw, } BR13 |= pitch; - /* do space check before going any further */ - aper_array[0] = brw->batch.bo; - aper_array[1] = mt->bo; + if (brw_batch_begin(&brw->batch, 80, BLT_RING) < 0) + return; - if (drm_intel_bufmgr_check_aperture_space(aper_array, - ARRAY_SIZE(aper_array)) != 0) { - brw_batch_flush(&brw->batch, NULL); - } - - unsigned length = brw->gen >= 8 ? 7 : 6; + const unsigned length = brw->gen >= 8 ? 7 : 6; bool dst_y_tiled = mt->tiling == I915_TILING_Y; /* We need to split the blit into chunks that each fit within the blitter's @@ -991,4 +970,6 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw, } brw_emit_mi_flush(brw); + + brw_batch_end(&brw->batch); } -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev