Abdiel Janulgue <abdiel.janul...@linux.intel.com> writes: > Use the gather table generated from the uniform uploads and > ir_binop_ubo_load to gather and pack the constants to the gather pool. > > Note that the 3DSTATE_CONSTANT_* packet now refers to the gather > pool generated by the resource streamer instead of the constant buffer > pointed to by an offset of the dynamic state base address. > > v2: Support GEN8 + non-trivial rebase. > > Signed-off-by: Abdiel Janulgue <abdiel.janul...@linux.intel.com> > --- > src/mesa/drivers/dri/i965/brw_state.h | 2 +- > src/mesa/drivers/dri/i965/gen6_gs_state.c | 2 +- > src/mesa/drivers/dri/i965/gen6_vs_state.c | 2 +- > src/mesa/drivers/dri/i965/gen6_wm_state.c | 2 +- > src/mesa/drivers/dri/i965/gen7_vs_state.c | 84 > +++++++++++++++++++++++++++---- > 5 files changed, 79 insertions(+), 13 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_state.h > b/src/mesa/drivers/dri/i965/brw_state.h > index c7c7e0b..a1e6c73 100644 > --- a/src/mesa/drivers/dri/i965/brw_state.h > +++ b/src/mesa/drivers/dri/i965/brw_state.h > @@ -361,7 +361,7 @@ brw_upload_pull_constants(struct brw_context *brw, > void > gen7_upload_constant_state(struct brw_context *brw, > const struct brw_stage_state *stage_state, > - bool active, unsigned opcode); > + bool active, unsigned opcode, unsigned gather_op); > > void gen7_rs_control(struct brw_context *brw, int enable); > > diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c > b/src/mesa/drivers/dri/i965/gen6_gs_state.c > index eb4c586..79a899e 100644 > --- a/src/mesa/drivers/dri/i965/gen6_gs_state.c > +++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c > @@ -48,7 +48,7 @@ gen6_upload_gs_push_constants(struct brw_context *brw) > } > > if (brw->gen >= 7) > - gen7_upload_constant_state(brw, stage_state, gp, _3DSTATE_CONSTANT_GS); > + gen7_upload_constant_state(brw, stage_state, gp, _3DSTATE_CONSTANT_GS, > _3DSTATE_GATHER_CONSTANT_GS); > } > > const struct brw_tracked_state gen6_gs_push_constants = { > diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c > b/src/mesa/drivers/dri/i965/gen6_vs_state.c > index d43af5b..bb375b9 100644 > --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c > +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c > @@ -174,7 +174,7 @@ gen6_upload_vs_push_constants(struct brw_context *brw) > gen7_emit_vs_workaround_flush(brw); > > gen7_upload_constant_state(brw, stage_state, true /* active */, > - _3DSTATE_CONSTANT_VS); > + _3DSTATE_CONSTANT_VS, > _3DSTATE_GATHER_CONSTANT_VS); > } > } > > diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c > b/src/mesa/drivers/dri/i965/gen6_wm_state.c > index d1748ba..c7b37c6 100644 > --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c > +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c > @@ -51,7 +51,7 @@ gen6_upload_wm_push_constants(struct brw_context *brw) > > if (brw->gen >= 7) { > gen7_upload_constant_state(brw, &brw->wm.base, true, > - _3DSTATE_CONSTANT_PS); > + _3DSTATE_CONSTANT_PS, > _3DSTATE_GATHER_CONSTANT_PS); > } > } > > diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c > b/src/mesa/drivers/dri/i965/gen7_vs_state.c > index b7e4858..cf07658 100644 > --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c > +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c > @@ -28,19 +28,70 @@ > #include "program/prog_parameter.h" > #include "program/prog_statevars.h" > #include "intel_batchbuffer.h" > +#include "glsl/glsl_parser_extras.h" > > +static void > +gen7_submit_gather_table(struct brw_context* brw, > + const struct brw_stage_state *stage_state, > + const struct brw_stage_prog_data *prog_data, > + unsigned gather_opcode) > +{ > + uint32_t gather_dwords = 3 + prog_data->nr_gather_table; > + > + /* Ordinary uniforms are assigned to the first constant buffer slot */ > + unsigned cb_valid = 1; > + /* Assign subsequent constant buffer slots to UBOs if any */ > + cb_valid |= (prog_data->nr_ubo_params > 0) ? > + (2 << (BRW_UBO_GATHER_INDEX_APPEND + prog_data->max_ubo_const_block)) > - 1 : 0; > + > + assert(cb_valid < 0xffff); > + > + BEGIN_BATCH(gather_dwords); > + OUT_BATCH(gather_opcode << 16 | (gather_dwords - 2)); > + OUT_BATCH(SET_FIELD(cb_valid, BRW_GATHER_BUFFER_VALID) | > + SET_FIELD(BRW_UNIFORM_GATHER_INDEX_START / 16, > + BRW_GATHER_BINDING_TABLE_BLOCK)); > + OUT_BATCH(stage_state->push_const_offset); > + for (int i = 0; i < prog_data->nr_gather_table; i++) { > + /* Which bo are we referring to? The uniform constant buffer or > + * the UBO block? > + */ > + bool is_uniform = prog_data->gather_table[i].reg == -1; > + int cb_offset = is_uniform ? i : > + (prog_data->gather_table[i].const_offset / 16); > + int bt_offset = is_uniform ? 0 : > + (prog_data->gather_table[i].const_block + > BRW_UBO_GATHER_INDEX_APPEND); > + > + assert(cb_offset < 256); > + assert(bt_offset < 16); > + > + OUT_BATCH(SET_FIELD(cb_offset, BRW_GATHER_CONST_BUFFER_OFFSET) | > + SET_FIELD(prog_data->gather_table[i].channel_mask, > + BRW_GATHER_CHANNEL_MASK) | bt_offset); > + } > + ADVANCE_BATCH();
This is called in response to the _NEW_PROGRAM_CONSTANTS state flag, e.g. when uniforms from the default block are updated using glUniform*(), but not necessarily if, say, a new buffer object is bound to a UBO binding point -- Or worse, if something (e.g. glBufferSubData) somehow modifies a buffer object while it's already bound to the pipeline as UBO. It doesn't look like this would notice that a buffer has been modified and resend appropriate _3DSTATE_GATHER_CONSTANT_*S commands so that the new buffer contents are re-gathered. I suspect that my "arb_shader_image_load_store/host-mem-barrier/Uniform buffer/RaW" piglit test would almost reproduce the issue (it binds the same buffer as UBO and shader image simultaneously, then writes to the buffer using imageStore(), calls glMemoryBarrier(GL_UNIFORM_BARRIER_BIT) so that subsequent uniform memory access is coherent with previous image writes, and then reads back the same UBO from the shader) -- If it weren't because it uses non-constant indexing of the uniform array (which you don't promote to gather constants), and because it changes the value of a uniform between the two draw calls so you might actually end up here by luck. ;) > +} > > void > gen7_upload_constant_state(struct brw_context *brw, > const struct brw_stage_state *stage_state, > - bool active, unsigned opcode) > + bool active, unsigned opcode, unsigned > gather_opcode) > { > uint32_t mocs = brw->gen < 8 ? GEN7_MOCS_L3 : 0; > > /* Disable if the shader stage is inactive or there are no push > constants. */ > active = active && stage_state->push_const_size != 0; > > + bool use_gather = (brw->gather_pool.bo != NULL); > + > + int const_loc = use_gather ? 16 : 0; > int dwords = brw->gen >= 8 ? 11 : 7; > + > + struct brw_stage_prog_data *prog_data = stage_state->prog_data; > + if (prog_data && use_gather && active) { > + gen7_submit_gather_table(brw, stage_state, prog_data, gather_opcode); > + } > + > BEGIN_BATCH(dwords); > OUT_BATCH(opcode << 16 | (dwords - 2)); > > @@ -59,10 +110,9 @@ gen7_upload_constant_state(struct brw_context *brw, > OUT_BATCH(0); > OUT_BATCH(stage_state->push_const_size); > } else { > - OUT_BATCH(active ? stage_state->push_const_size : 0); > + OUT_BATCH(active ? stage_state->push_const_size << const_loc : 0); > OUT_BATCH(0); > } > - > /* Pointer to the constant buffer. Covered by the set of state flags > * from gen6_prepare_wm_contants > */ > @@ -79,23 +129,39 @@ gen7_upload_constant_state(struct brw_context *brw, > OUT_BATCH(0); > OUT_BATCH(0); > } else if (brw->gen>= 8) { > - OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0); > - OUT_BATCH(0); > - OUT_BATCH(0); > - OUT_BATCH(0); > + if (use_gather) { > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0); > + OUT_BATCH(0); > + } else { > + OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + } > OUT_BATCH(0); > OUT_BATCH(0); > OUT_BATCH(0); > OUT_BATCH(0); > } else { > - OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0); > - OUT_BATCH(0); > + if (use_gather) { > + OUT_BATCH(0); > + OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0); > + } else { > + OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0); > + OUT_BATCH(0); > + } > OUT_BATCH(0); > OUT_BATCH(0); > } > > ADVANCE_BATCH(); > > + /* Re-enable gather again if required */ > + if (gather_switched_off) > + gen7_toggle_gather_constants(brw, true); > + > /* On SKL+ the new constants don't take effect until the next > corresponding > * 3DSTATE_BINDING_TABLE_POINTER_* command is parsed so we need to ensure > * that is sent > -- > 1.9.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev