This keeps us from needing to reemit all the other stage state just because a surface changed.
Improves unoptimized glamor x11perf -f8text by 1.10201% +/- 0.489869% (n=296). --- src/mesa/drivers/dri/i965/brw_binding_tables.c | 51 ++++++++++++++++++++------ src/mesa/drivers/dri/i965/gen7_gs_state.c | 7 ---- src/mesa/drivers/dri/i965/gen7_vs_state.c | 7 ---- src/mesa/drivers/dri/i965/gen7_wm_state.c | 7 ---- 4 files changed, 39 insertions(+), 33 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c b/src/mesa/drivers/dri/i965/brw_binding_tables.c index b39bd10..ca42472 100644 --- a/src/mesa/drivers/dri/i965/brw_binding_tables.c +++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c @@ -50,7 +50,7 @@ * This copies brw_stage_state::surf_offset[] into the indirect state section * of the batchbuffer (allocated by brw_state_batch()). */ -static void +static bool brw_upload_binding_table(struct brw_context *brw, GLbitfield brw_new_binding_table, struct brw_stage_state *stage_state) @@ -63,8 +63,9 @@ brw_upload_binding_table(struct brw_context *brw, if (stage_state->bind_bo_offset != 0) { brw->state.dirty.brw |= brw_new_binding_table; stage_state->bind_bo_offset = 0; + return true; } - return; + return false; } if (INTEL_DEBUG & DEBUG_SHADER_TIME) { @@ -81,6 +82,7 @@ brw_upload_binding_table(struct brw_context *brw, memcpy(bind, stage_state->surf_offset, prog_data->binding_table.size_bytes); brw->state.dirty.brw |= brw_new_binding_table; + return true; } /** @@ -92,15 +94,23 @@ brw_upload_binding_table(struct brw_context *brw, static void brw_vs_upload_binding_table(struct brw_context *brw) { - brw_upload_binding_table(brw, BRW_NEW_VS_BINDING_TABLE, &brw->vs.base); + if (brw_upload_binding_table(brw, BRW_NEW_VS_BINDING_TABLE, &brw->vs.base)) { + if (brw->gen >= 7) { + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_VS << 16 | (2 - 2)); + OUT_BATCH(brw->vs.base.bind_bo_offset); + ADVANCE_BATCH(); + } + } } const struct brw_tracked_state brw_vs_binding_table = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_VS_CONSTBUF | - BRW_NEW_SURFACES, + .brw = (BRW_NEW_BATCH | + BRW_NEW_STATE_BASE_ADDRESS | + BRW_NEW_VS_CONSTBUF | + BRW_NEW_SURFACES), .cache = CACHE_NEW_VS_PROG }, .emit = brw_vs_upload_binding_table, @@ -111,13 +121,22 @@ const struct brw_tracked_state brw_vs_binding_table = { static void brw_upload_wm_binding_table(struct brw_context *brw) { - brw_upload_binding_table(brw, BRW_NEW_PS_BINDING_TABLE, &brw->wm.base); + if (brw_upload_binding_table(brw, BRW_NEW_PS_BINDING_TABLE, &brw->wm.base)) { + if (brw->gen >= 7) { + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_PS << 16 | (2 - 2)); + OUT_BATCH(brw->wm.base.bind_bo_offset); + ADVANCE_BATCH(); + } + } } const struct brw_tracked_state brw_wm_binding_table = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH | BRW_NEW_SURFACES, + .brw = (BRW_NEW_BATCH | + BRW_NEW_STATE_BASE_ADDRESS | + BRW_NEW_SURFACES), .cache = CACHE_NEW_WM_PROG }, .emit = brw_upload_wm_binding_table, @@ -131,15 +150,23 @@ brw_gs_upload_binding_table(struct brw_context *brw) if (brw->geometry_program == NULL) return; - brw_upload_binding_table(brw, BRW_NEW_GS_BINDING_TABLE, &brw->gs.base); + if (brw_upload_binding_table(brw, BRW_NEW_GS_BINDING_TABLE, &brw->gs.base)) { + if (brw->gen >= 7) { + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_GS << 16 | (2 - 2)); + OUT_BATCH(brw->gs.base.bind_bo_offset); + ADVANCE_BATCH(); + } + } } const struct brw_tracked_state brw_gs_binding_table = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_GS_CONSTBUF | - BRW_NEW_SURFACES, + .brw = (BRW_NEW_BATCH | + BRW_NEW_STATE_BASE_ADDRESS | + BRW_NEW_GS_CONSTBUF | + BRW_NEW_SURFACES), .cache = CACHE_NEW_GS_PROG }, .emit = brw_gs_upload_binding_table, diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c index d2ba354..92b872f 100644 --- a/src/mesa/drivers/dri/i965/gen7_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c @@ -66,12 +66,6 @@ upload_gs_state(struct brw_context *brw) /* CACHE_NEW_GS_PROG */ const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base; - /* BRW_NEW_GS_BINDING_TABLE */ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_GS << 16 | (2 - 2)); - OUT_BATCH(stage_state->bind_bo_offset); - ADVANCE_BATCH(); - /* CACHE_NEW_SAMPLER */ BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_GS << 16 | (2 - 2)); @@ -199,7 +193,6 @@ const struct brw_tracked_state gen7_gs_state = { .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_CONTEXT | BRW_NEW_GEOMETRY_PROGRAM | - BRW_NEW_GS_BINDING_TABLE | BRW_NEW_BATCH | BRW_NEW_PUSH_CONSTANT_ALLOCATION), .cache = CACHE_NEW_GS_PROG | CACHE_NEW_SAMPLER diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index 1e76eb1..f6a9e0f 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -74,12 +74,6 @@ upload_vs_state(struct brw_context *brw) gen7_emit_vs_workaround_flush(brw); - /* BRW_NEW_VS_BINDING_TABLE */ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_VS << 16 | (2 - 2)); - OUT_BATCH(stage_state->bind_bo_offset); - ADVANCE_BATCH(); - /* CACHE_NEW_SAMPLER */ BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_VS << 16 | (2 - 2)); @@ -128,7 +122,6 @@ const struct brw_tracked_state gen7_vs_state = { .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_CONTEXT | BRW_NEW_VERTEX_PROGRAM | - BRW_NEW_VS_BINDING_TABLE | BRW_NEW_BATCH | BRW_NEW_PUSH_CONSTANT_ALLOCATION), .cache = CACHE_NEW_VS_PROG | CACHE_NEW_SAMPLER diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index b6561bb..586ad67 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -143,12 +143,6 @@ upload_ps_state(struct brw_context *brw) const int max_threads_shift = brw->is_haswell ? HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT; - /* BRW_NEW_PS_BINDING_TABLE */ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_PS << 16 | (2 - 2)); - OUT_BATCH(brw->wm.base.bind_bo_offset); - ADVANCE_BATCH(); - /* CACHE_NEW_SAMPLER */ BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2)); @@ -285,7 +279,6 @@ const struct brw_tracked_state gen7_ps_state = { _NEW_BUFFERS | _NEW_MULTISAMPLE), .brw = (BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_PS_BINDING_TABLE | BRW_NEW_BATCH | BRW_NEW_PUSH_CONSTANT_ALLOCATION), .cache = (CACHE_NEW_SAMPLER | -- 1.8.5.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev