On Tue, May 24, 2016 at 12:18 AM, Francisco Jerez <curroje...@riseup.net> wrote:
> Alternatively we could have extended the current semantics to 32-wide > mode by changing brw_broadcast() to emit multiple indexed MOV > instructions in the generator copying the selected value to all > destination registers, but it seemed rather silly to waste EU cycles > unnecessarily copying the exact same value 32 times in the GRF. > It appears as if emit_uniformize in fs_builder sets stride == 0 on the result the version in vec4_visitor does not. It's probably not needed for correctness since I think the generator will always take channel 0 anyway, but we should fix it none the less. > The vstride change in the Align16 path is required to avoid assertions > in validate_reg() since the change causes the execution size of the > MOV and SEL instructions to be equal to the source region width. > --- > src/mesa/drivers/dri/i965/brw_defines.h | 6 ++++++ > src/mesa/drivers/dri/i965/brw_eu_emit.c | 12 +++++++++--- > src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 1 + > src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 1 + > 4 files changed, 17 insertions(+), 3 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h > b/src/mesa/drivers/dri/i965/brw_defines.h > index 702eb5a..8794d44 100644 > --- a/src/mesa/drivers/dri/i965/brw_defines.h > +++ b/src/mesa/drivers/dri/i965/brw_defines.h > @@ -1080,6 +1080,12 @@ enum opcode { > /** > * Pick the channel from its first source register given by the index > * specified as second source. Useful for variable indexing of > surfaces. > + * > + * Note that because the result of this instruction is by definition > + * uniform and it can always be splatted to multiple channels using a > + * scalar regioning mode, only the first channel of the destination > region > + * is guaranteed to be updated, which implies that BROADCAST > instructions > + * should usually be marked force_writemask_all. > */ > SHADER_OPCODE_BROADCAST, > > diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c > b/src/mesa/drivers/dri/i965/brw_eu_emit.c > index b11398c..ee7462f 100644 > --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c > +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c > @@ -3425,6 +3425,10 @@ brw_broadcast(struct brw_codegen *p, > const bool align1 = brw_inst_access_mode(devinfo, p->current) == > BRW_ALIGN_1; > brw_inst *inst; > > + brw_push_insn_state(p); > + brw_set_default_mask_control(p, BRW_MASK_DISABLE); > + brw_set_default_exec_size(p, align1 ? BRW_EXECUTE_1 : BRW_EXECUTE_4); > + > assert(src.file == BRW_GENERAL_REGISTER_FILE && > src.address_mode == BRW_ADDRESS_DIRECT); > > @@ -3475,19 +3479,21 @@ brw_broadcast(struct brw_codegen *p, > */ > inst = brw_MOV(p, > brw_null_reg(), > - stride(brw_swizzle(idx, BRW_SWIZZLE_XXXX), 0, 4, > 1)); > + stride(brw_swizzle(idx, BRW_SWIZZLE_XXXX), 4, 4, > 1)); > brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NONE); > brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_NZ); > brw_inst_set_flag_reg_nr(devinfo, inst, 1); > > /* and use predicated SEL to pick the right channel. */ > inst = brw_SEL(p, dst, > - stride(suboffset(src, 4), 0, 4, 1), > - stride(src, 0, 4, 1)); > + stride(suboffset(src, 4), 4, 4, 1), > + stride(src, 4, 4, 1)); > brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NORMAL); > brw_inst_set_flag_reg_nr(devinfo, inst, 1); > } > } > + > + brw_pop_insn_state(p); > } > > /** > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > index 6421870..804c639 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > @@ -2017,6 +2017,7 @@ fs_generator::generate_code(const cfg_t *cfg, int > dispatch_width) > break; > > case SHADER_OPCODE_BROADCAST: > + assert(inst->force_writemask_all); > brw_broadcast(p, dst, src[0], src[1]); > break; > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > index baf4422..bb0254e 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > @@ -1886,6 +1886,7 @@ generate_code(struct brw_codegen *p, > break; > > case SHADER_OPCODE_BROADCAST: > + assert(inst->force_writemask_all); > brw_broadcast(p, dst, src[0], src[1]); > break; > > -- > 2.7.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev