On Fri, Feb 20, 2015 at 11:48 AM, Francisco Jerez <curroje...@riseup.net> wrote: > The BROADCAST instruction picks the channel from its first source > given by an index passed in as second source. This will be used in > situations where all channels from the same SIMD thread have to agree > on the value of something, e.g. a surface binding table index. > --- > src/mesa/drivers/dri/i965/brw_defines.h | 6 ++ > src/mesa/drivers/dri/i965/brw_eu.h | 6 ++ > src/mesa/drivers/dri/i965/brw_eu_emit.c | 77 > ++++++++++++++++++++++++ > src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 4 ++ > src/mesa/drivers/dri/i965/brw_shader.cpp | 3 + > src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 4 ++ > 6 files changed, 100 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h > b/src/mesa/drivers/dri/i965/brw_defines.h > index 17c27dd..d4930e3 100644 > --- a/src/mesa/drivers/dri/i965/brw_defines.h > +++ b/src/mesa/drivers/dri/i965/brw_defines.h > @@ -911,6 +911,12 @@ enum opcode { > > SHADER_OPCODE_URB_WRITE_SIMD8, > > + /** > + * Pick the channel from its first source register given by the index > + * specified as second source. Useful for variable indexing of surfaces. > + */ > + SHADER_OPCODE_BROADCAST, > + > VEC4_OPCODE_MOV_BYTES, > VEC4_OPCODE_PACK_BYTES, > VEC4_OPCODE_UNPACK_UNIFORM, > diff --git a/src/mesa/drivers/dri/i965/brw_eu.h > b/src/mesa/drivers/dri/i965/brw_eu.h > index a94ea42..2505480 100644 > --- a/src/mesa/drivers/dri/i965/brw_eu.h > +++ b/src/mesa/drivers/dri/i965/brw_eu.h > @@ -413,6 +413,12 @@ brw_pixel_interpolator_query(struct brw_compile *p, > unsigned msg_length, > unsigned response_length); > > +void > +brw_broadcast(struct brw_compile *p, > + struct brw_reg dst, > + struct brw_reg src, > + struct brw_reg idx); > + > /*********************************************************************** > * brw_eu_util.c: > */ > diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c > b/src/mesa/drivers/dri/i965/brw_eu_emit.c > index 1d6fd67..d7e3995 100644 > --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c > +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c > @@ -2854,6 +2854,83 @@ brw_pixel_interpolator_query(struct brw_compile *p, > brw_inst_set_pi_message_data(brw, insn, data); > } > > +void > +brw_broadcast(struct brw_compile *p, > + struct brw_reg dst, > + struct brw_reg src, > + struct brw_reg idx) > +{ > + const struct brw_context *brw = p->brw; > + const bool align1 = (brw_inst_access_mode(brw, p->current) == > BRW_ALIGN_1);
Unnecessary parentheses. > + brw_inst *inst; > + > + assert(src.file == BRW_GENERAL_REGISTER_FILE && > + src.address_mode == BRW_ADDRESS_DIRECT); > + > + if ((src.vstride == 0 && (src.hstride == 0 || !align1)) || > + idx.file == BRW_IMMEDIATE_VALUE) { > + /* Trivial, the source is already uniform or the index is a constant. > + * We will typically not get here if the optimizer is doing its job, > but > + * asserting would be mean. > + */ > + const unsigned i = (idx.file == BRW_IMMEDIATE_VALUE ? idx.dw1.ud : 0); Unnecessary parentheses. > + brw_MOV(p, dst, > + (align1 ? stride(suboffset(src, i), 0, 1, 0) : > + stride(suboffset(src, 4 * i), 0, 4, 1))); > + Extra new line. > + } else { > + if (align1) { > + const struct brw_reg addr = > + retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); > + const unsigned offset = src.nr * REG_SIZE + src.subnr; > + /* Limit in bytes of the signed indirect addressing immediate. */ > + const unsigned limit = 512; > + > + brw_push_insn_state(p); > + brw_set_default_mask_control(p, BRW_MASK_DISABLE); > + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); > + > + /* Take into account the component size and horizontal stride. */ > + assert(src.vstride == src.hstride + src.width); > + brw_SHL(p, addr, vec1(idx), > + brw_imm_ud(_mesa_logbase2(type_sz(src.type)) + > + src.hstride - 1)); > + > + /* We can only address up to limit bytes using the indirect > + * addressing immediate, account for the difference if the source > + * register is above this limit. > + */ > + if (offset >= limit) > + brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit)); > + > + brw_pop_insn_state(p); > + > + /* Use indirect addressing to fetch the specified component. */ > + brw_MOV(p, dst, > + retype(brw_vec1_indirect(addr.subnr, offset % limit), > + src.type)); > + Extra new line. Putting some of Ian's explanation for why this is needed into the commit message might be good. I had to go read the piglit tests before I really understood. _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev