Refactor the aperture test, roll back and retry logic to a common idiom. Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> --- src/mesa/drivers/dri/i965/brw_batch.h | 9 +++++ src/mesa/drivers/dri/i965/brw_blorp.cpp | 43 +++++------------------ src/mesa/drivers/dri/i965/brw_compute.c | 39 +++++---------------- src/mesa/drivers/dri/i965/brw_draw.c | 37 ++++++-------------- src/mesa/drivers/dri/i965/intel_batchbuffer.c | 50 +++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/intel_blit.c | 47 +++++++------------------ 6 files changed, 99 insertions(+), 126 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_batch.h b/src/mesa/drivers/dri/i965/brw_batch.h index 4a50e1b..bef544d 100644 --- a/src/mesa/drivers/dri/i965/brw_batch.h +++ b/src/mesa/drivers/dri/i965/brw_batch.h @@ -31,6 +31,8 @@ extern "C" { #endif +#include <setjmp.h> + #include <intel_bufmgr.h> #include "util/list.h" @@ -63,6 +65,9 @@ typedef struct brw_batch { bool needs_sol_reset; int gen; + jmp_buf jmpbuf; + bool repeat; + unsigned begin_count; bool no_batch_wrap; struct { @@ -261,6 +266,10 @@ intel_batchbuffer_require_space(struct brw_batch *batch, GLuint sz, intel_batchbuffer_emit_render_ring_prelude(batch); } +int brw_batch_begin(struct brw_batch *batch, + const int sz_bytes, + enum brw_gpu_ring ring); +int brw_batch_end(struct brw_batch *batch); #ifdef __cplusplus } diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp index 0b0cc8d..d4d5457 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp @@ -210,7 +210,9 @@ brw_blorp_exec(struct brw_context *brw, const brw_blorp_params *params) { struct gl_context *ctx = &brw->ctx; uint32_t estimated_max_batch_usage = 1500; - bool check_aperture_failed_once = false; + + if (brw_batch_begin(&brw->batch, estimated_max_batch_usage, RENDER_RING) < 0) + return; /* Flush the sampler and render caches. We definitely need to flush the * sampler cache so that we get updated contents from the render cache for @@ -221,13 +223,6 @@ brw_blorp_exec(struct brw_context *brw, const brw_blorp_params *params) */ brw_emit_mi_flush(brw); -retry: - intel_batchbuffer_require_space(&brw->batch, estimated_max_batch_usage, RENDER_RING); - intel_batchbuffer_save_state(&brw->batch); - drm_intel_bo *saved_bo = brw->batch.bo; - uint32_t saved_used = USED_BATCH(&brw->batch); - uint32_t saved_state_batch_offset = brw->batch.state_batch_offset; - switch (brw->gen) { case 6: gen6_blorp_exec(brw, params); @@ -240,33 +235,18 @@ retry: unreachable("not reached"); } - /* Make sure we didn't wrap the batch unintentionally, and make sure we - * reserved enough space that a wrap will never happen. + /* Flush the sampler cache so any texturing from the destination is + * coherent. */ - assert(brw->batch.bo == saved_bo); - assert((USED_BATCH(&brw->batch) - saved_used) * 4 + - (saved_state_batch_offset - brw->batch.state_batch_offset) < - estimated_max_batch_usage); - /* Shut up compiler warnings on release build */ - (void)saved_bo; - (void)saved_used; - (void)saved_state_batch_offset; + brw_emit_mi_flush(brw); /* Check if the blorp op we just did would make our batch likely to fail to * map all the BOs into the GPU at batch exec time later. If so, flush the * batch and try again with nothing else in the batch. */ - if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { - if (!check_aperture_failed_once) { - check_aperture_failed_once = true; - intel_batchbuffer_reset_to_saved(&brw->batch); - brw_batch_flush(&brw->batch, NULL); - goto retry; - } else { - int ret = brw_batch_flush(&brw->batch, NULL); - WARN_ONCE(ret == -ENOSPC, - "i965: blorp emit exceeded available aperture space\n"); - } + if (brw_batch_end(&brw->batch)) { + WARN_ONCE(1, "i965: blorp emit exceeded available aperture space\n"); + return; } brw_batch_maybe_flush(&brw->batch); @@ -277,11 +257,6 @@ retry: brw->ctx.NewDriverState = ~0ull; brw->no_depth_or_stencil = false; brw->ib.type = -1; - - /* Flush the sampler cache so any texturing from the destination is - * coherent. - */ - brw_emit_mi_flush(brw); } brw_hiz_op_params::brw_hiz_op_params(struct intel_mipmap_tree *mt, diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c index 4bff716..817d9ef 100644 --- a/src/mesa/drivers/dri/i965/brw_compute.c +++ b/src/mesa/drivers/dri/i965/brw_compute.c @@ -86,7 +86,7 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) { struct brw_context *brw = brw_context(ctx); int estimated_buffer_space_needed; - bool fail_next = false; + int ret; if (!_mesa_check_conditional_render(ctx)) return; @@ -104,40 +104,17 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) estimated_buffer_space_needed += 1024; /* push constants */ estimated_buffer_space_needed += 512; /* misc. pad */ - /* Flush the batch if it's approaching full, so that we don't wrap while - * we've got validated state that needs to be in the same batch as the - * primitives. - */ - intel_batchbuffer_require_space(&brw->batch, estimated_buffer_space_needed, - RENDER_RING); - intel_batchbuffer_save_state(&brw->batch); + ret = brw_batch_begin(&brw->batch, estimated_buffer_space_needed, + RENDER_RING); + if (ret < 0) + return; - retry: - brw->batch.no_batch_wrap = true; brw_upload_compute_state(brw); - brw_emit_gpgpu_walker(brw, num_groups); - brw->batch.no_batch_wrap = false; - - if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { - if (!fail_next) { - intel_batchbuffer_reset_to_saved(&brw->batch); - brw_batch_flush(&brw->batch, NULL); - fail_next = true; - goto retry; - } else { - if (brw_batch_flush(&brw->batch, NULL) == -ENOSPC) { - static bool warned = false; - - if (!warned) { - fprintf(stderr, "i965: Single compute shader dispatch " - "exceeded available aperture space\n"); - warned = true; - } - } - } - } + WARN_ONCE(brw_batch_end(&brw->batch) == -ENOSPC, + "i965: Single compute shader dispatch exceeded " + "available aperture space\n"); /* Now that we know we haven't run out of aperture space, we can safely * reset the dirty bits. diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index c2e7bda..c2ae0fc 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -404,7 +404,6 @@ brw_try_draw_prims(struct gl_context *ctx, { struct brw_context *brw = brw_context(ctx); GLuint i; - bool fail_next = false; if (ctx->NewState) _mesa_update_state(ctx); @@ -451,6 +450,7 @@ brw_try_draw_prims(struct gl_context *ctx, for (i = 0; i < nr_prims; i++) { int estimated_max_prim_size; const int sampler_state_size = 16; + int ret; estimated_max_prim_size = 512; /* batchbuffer commands */ estimated_max_prim_size += BRW_MAX_TEX_UNIT * @@ -463,9 +463,9 @@ brw_try_draw_prims(struct gl_context *ctx, * we've got validated state that needs to be in the same batch as the * primitives. */ - intel_batchbuffer_require_space(&brw->batch, - estimated_max_prim_size, RENDER_RING); - intel_batchbuffer_save_state(&brw->batch); + ret = brw_batch_begin(&brw->batch, estimated_max_prim_size, RENDER_RING); + if (ret < 0) + break; if (brw->num_instances != prims[i].num_instances || brw->basevertex != prims[i].basevertex) { @@ -501,35 +501,18 @@ brw_try_draw_prims(struct gl_context *ctx, else gen6_set_prim(brw, &prims[i]); -retry: - /* Note that before the loop, brw->ctx.NewDriverState was set to != 0, and * that the state updated in the loop outside of this block is that in - * *_set_prim or brw_batch_flush(), which only impacts - * brw->ctx.NewDriverState. + * *_set_prim, which only impacts brw->ctx.NewDriverState. */ - if (brw->ctx.NewDriverState) { - brw->batch.no_batch_wrap = true; - brw_upload_render_state(brw); - } + if (brw->ctx.NewDriverState) + brw_upload_render_state(brw); brw_emit_prim(brw, &prims[i], brw->primitive); - brw->batch.no_batch_wrap = false; - - if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { - if (!fail_next) { - intel_batchbuffer_reset_to_saved(&brw->batch); - brw_batch_flush(&brw->batch, NULL); - fail_next = true; - goto retry; - } else { - int ret = brw_batch_flush(&brw->batch, NULL); - WARN_ONCE(ret == -ENOSPC, - "i965: Single primitive emit exceeded " - "available aperture space\n"); - } - } + WARN_ONCE(brw_batch_end(&brw->batch) == -ENOSPC, + "i965: Single primitive emit exceeded " + "available aperture space\n"); /* Now that we know we haven't run out of aperture space, we can safely * reset the dirty bits. diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index 2b5acd1..49c1c27 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -321,7 +321,9 @@ brw_batch_flush(struct brw_batch *batch, struct perf_debug *info) brw->batch.reserved_space = 0; + brw->batch.begin_count++; brw_batch_finish_hook(&brw->batch); + brw->batch.begin_count--; /* Mark the end of the buffer. */ intel_batchbuffer_emit_dword(brw, MI_BATCH_BUFFER_END); @@ -404,3 +406,51 @@ intel_batchbuffer_data(struct brw_context *brw, memcpy(brw->batch.map_next, data, bytes); brw->batch.map_next += bytes >> 2; } + +int brw_batch_begin(struct brw_batch *batch, + const int sz_bytes, + enum brw_gpu_ring ring) +{ + if (batch->begin_count++) + return 0; + + intel_batchbuffer_require_space(batch, sz_bytes, ring); + intel_batchbuffer_save_state(batch); + + batch->repeat = false; + batch->no_batch_wrap = true; + + return setjmp(batch->jmpbuf); +} + +int brw_batch_end(struct brw_batch *batch) +{ + assert(batch->begin_count); + if (--batch->begin_count) + return 0; + + batch->no_batch_wrap = false; + + if (dri_bufmgr_check_aperture_space(&batch->bo, 1)) { + if (!batch->repeat) { + enum brw_gpu_ring ring = batch->ring; + + intel_batchbuffer_reset_to_saved(batch); + brw_batch_flush(batch, NULL); + + batch->begin_count++; + batch->no_batch_wrap = true; + + batch->ring = ring; + if (ring == RENDER_RING) + intel_batchbuffer_emit_render_ring_prelude(batch); + + batch->repeat = true; + longjmp(batch->jmpbuf, 1); + } + + return brw_batch_flush(batch, NULL); + } + + return 0; +} diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index 22a5c5d..b6ad969 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -521,10 +521,9 @@ intelEmitCopyBlit(struct brw_context *brw, GLshort w, GLshort h, GLenum logic_op) { - GLuint CMD, BR13, pass = 0; + GLuint CMD, BR13; int dst_y2 = dst_y + h; int dst_x2 = dst_x + w; - brw_bo *aper_array[3]; bool dst_y_tiled = dst_tiling == I915_TILING_Y; bool src_y_tiled = src_tiling == I915_TILING_Y; bool use_fast_copy_blit = false; @@ -532,25 +531,8 @@ intelEmitCopyBlit(struct brw_context *brw, if ((dst_y_tiled || src_y_tiled) && brw->gen < 6) return false; - /* do space check before going any further */ - do { - aper_array[0] = brw->batch.bo; - aper_array[1] = dst_buffer; - aper_array[2] = src_buffer; - - if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) { - brw_batch_flush(&brw->batch, NULL); - pass++; - } else - break; - } while (pass < 2); - - if (pass >= 2) - return false; - unsigned length = brw->gen >= 8 ? 10 : 8; - intel_batchbuffer_require_space(&brw->batch, length * 4, BLT_RING); DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __func__, src_buffer, src_pitch, src_offset, src_x, src_y, @@ -661,6 +643,9 @@ intelEmitCopyBlit(struct brw_context *brw, assert(dst_offset + (dst_y + h - 1) * abs(dst_pitch) + (w * cpp) <= dst_buffer->size); + if (brw_batch_begin(&brw->batch, 60, BLT_RING) < 0) + return false; + BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, src_y_tiled); OUT_BATCH(CMD | (length - 2)); OUT_BATCH(BR13 | (uint16_t)dst_pitch); @@ -691,7 +676,7 @@ intelEmitCopyBlit(struct brw_context *brw, brw_emit_mi_flush(brw); - return true; + return brw_batch_end(&brw->batch) == 0; } bool @@ -728,10 +713,6 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw, dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords); unsigned xy_setup_blt_length = brw->gen >= 8 ? 10 : 8; - intel_batchbuffer_require_space(&brw->batch, - (xy_setup_blt_length * 4) + - (3 * 4) + dwords * 4, BLT_RING); - opcode = XY_SETUP_BLT_CMD; if (cpp == 4) opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; @@ -747,6 +728,9 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw, if (dst_tiling != I915_TILING_NONE) blit_cmd |= XY_DST_TILED; + if (brw_batch_begin(&brw->batch, 20 + dwords, BLT_RING) < 0) + return false; + BEGIN_BATCH_BLT(xy_setup_blt_length + 3); OUT_BATCH(opcode | (xy_setup_blt_length - 2)); OUT_BATCH(br13); @@ -776,7 +760,7 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw, brw_emit_mi_flush(brw); - return true; + return brw_batch_end(&brw->batch) == 0; } /* We don't have a memmove-type blit like some other hardware, so we'll do a @@ -854,7 +838,6 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw, { uint32_t BR13, CMD; int pitch, cpp; - brw_bo *aper_array[2]; pitch = mt->pitch; cpp = mt->cpp; @@ -872,14 +855,8 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw, } BR13 |= pitch; - /* do space check before going any further */ - aper_array[0] = brw->batch.bo; - aper_array[1] = mt->bo; - - if (drm_intel_bufmgr_check_aperture_space(aper_array, - ARRAY_SIZE(aper_array)) != 0) { - brw_batch_flush(&brw->batch, NULL); - } + if (brw_batch_begin(&brw->batch, 20, BLT_RING) < 0) + return; unsigned length = brw->gen >= 8 ? 7 : 6; bool dst_y_tiled = mt->tiling == I915_TILING_Y; @@ -902,4 +879,6 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw, ADVANCE_BATCH_TILED(dst_y_tiled, false); brw_emit_mi_flush(brw); + + brw_batch_end(&brw->batch); } -- 2.5.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev