This instruction calculates the index of an arbitrary channel enabled in the current execution mask. It's expected to be used as input for the BROADCAST opcode, but it's implemented as a separate instruction rather than being baked into BROADCAST because FIND_LIVE_CHANNEL has no dependencies so it can always be CSE'ed with other instances of the same instruction within a basic block. --- src/mesa/drivers/dri/i965/brw_defines.h | 8 +++ src/mesa/drivers/dri/i965/brw_eu.h | 4 ++ src/mesa/drivers/dri/i965/brw_eu_emit.c | 70 ++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 4 ++ src/mesa/drivers/dri/i965/brw_shader.cpp | 2 + src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 4 ++ 6 files changed, 92 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index d4930e3..2b52fb2 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -912,6 +912,14 @@ enum opcode { SHADER_OPCODE_URB_WRITE_SIMD8, /** + * Return the index of an arbitrary live channel (i.e. one of the channels + * enabled in the current execution mask) and assign it to the first + * component of the destination. Expected to be used as input for the + * BROADCAST pseudo-opcode. + */ + SHADER_OPCODE_FIND_LIVE_CHANNEL, + + /** * Pick the channel from its first source register given by the index * specified as second source. Useful for variable indexing of surfaces. */ diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 2505480..1a8b38c 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -414,6 +414,10 @@ brw_pixel_interpolator_query(struct brw_compile *p, unsigned response_length); void +brw_find_live_channel(struct brw_compile *p, + struct brw_reg dst); + +void brw_broadcast(struct brw_compile *p, struct brw_reg dst, struct brw_reg src, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index d7e3995..7899f83 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2855,6 +2855,76 @@ brw_pixel_interpolator_query(struct brw_compile *p, } void +brw_find_live_channel(struct brw_compile *p, struct brw_reg dst) +{ + const struct brw_context *brw = p->brw; + brw_inst *inst; + + assert(brw->gen >= 7); + + brw_push_insn_state(p); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + + if (brw_inst_access_mode(brw, p->current) == BRW_ALIGN_1) { + if (brw->gen >= 8) { + /* Getting the first active channel index is easy on Gen8: Just find + * the first bit set in the mask register. The same register exists + * on HSW already but it reads back as all ones when the current + * instruction has execution masking disabled, so it's kind of + * useless. + */ + inst = brw_FBL(p, vec1(dst), + retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD)); + + /* Quarter control has the effect of magically shifting the value of + * this register. Make sure it's set to zero. + */ + brw_inst_set_qtr_control(brw, inst, GEN6_COMPRESSION_1Q); + + } else { + const struct brw_reg flag = retype(brw_flag_reg(1, 0), + BRW_REGISTER_TYPE_UD); + + brw_MOV(p, flag, brw_imm_ud(0)); + + /* Run a 16-wide instruction returning zero with execution masking + * and a conditional modifier enabled in order to get the current + * execution mask in f1.0. + */ + inst = brw_MOV(p, vec16(brw_null_reg()), brw_imm_ud(0)); + brw_inst_set_mask_control(brw, inst, BRW_MASK_ENABLE); + brw_inst_set_cond_modifier(brw, inst, BRW_CONDITIONAL_Z); + brw_inst_set_flag_reg_nr(brw, inst, 1); + + brw_FBL(p, vec1(dst), flag); + } + + } else { + if (brw->gen >= 8) { + /* In SIMD4x2 mode the first active channel index is just the + * negation of the first bit of the mask register. + */ + inst = brw_AND(p, brw_writemask(dst, WRITEMASK_X), + negate(retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD)), + brw_imm_ud(1)); + + } else { + /* Overwrite the destination without and with execution masking to + * find out which of the channels is active. + */ + brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X), + brw_imm_ud(1)); + + inst = brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X), + brw_imm_ud(0)); + brw_inst_set_mask_control(brw, inst, BRW_MASK_ENABLE); + } + } + + brw_pop_insn_state(p); +} + +void brw_broadcast(struct brw_compile *p, struct brw_reg dst, struct brw_reg src, diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index b611641..554f7e0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -2015,6 +2015,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) generate_set_simd4x2_offset(inst, dst, src[0]); break; + case SHADER_OPCODE_FIND_LIVE_CHANNEL: + brw_find_live_channel(p, dst); + break; + case SHADER_OPCODE_BROADCAST: brw_broadcast(p, dst, src[0], src[1]); break; diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index bbb5532..5926f25 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -471,6 +471,8 @@ brw_instruction_name(enum opcode op) case SHADER_OPCODE_URB_WRITE_SIMD8: return "gen8_urb_write_simd8"; + case SHADER_OPCODE_FIND_LIVE_CHANNEL: + return "find_live_channel"; case SHADER_OPCODE_BROADCAST: return "broadcast"; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 60384c3..649dca9 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1508,6 +1508,10 @@ vec4_generator::generate_code(const cfg_t *cfg) generate_untyped_surface_read(inst, dst, src[0]); break; + case SHADER_OPCODE_FIND_LIVE_CHANNEL: + brw_find_live_channel(p, dst); + break; + case SHADER_OPCODE_BROADCAST: brw_broadcast(p, dst, src[0], src[1]); break; -- 2.1.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev