Given the fact that we have multiple possible uses for such an opcode, I've been wondering if it wouldn't be better to simply have a SHADER_OPCODE_INDIRECT_MOV opcode that works on pretty much any register type. Given that they all get lowered away to HW_REG before the end, the emit code wouldn't have to do anything special. This could simply be an INDIRECT_MOV with an ATTR source while my uniform opcode would use a UNIFORM source. If we did this, we would have to have the immediate "range" argument be in bytes, but that's not a huge deal.
On Sat, Nov 7, 2015 at 9:03 PM, Kenneth Graunke <kenn...@whitecape.org> wrote: > The geometry and tessellation control shader stages both read from > multiple URB entries (one per vertex). The thread payload contains > several URB handles which reference these separate memory segments. > > In GLSL, these inputs are represented as per-vertex arrays; the > outermost array index selects which vertex's inputs to read. This > array index does not necessarily need to be constant. > > To handle that, we need to use indirect addressing on GRFs to select > which of the thread payload registers has the appropriate URB handle. > (This is before we can even think about applying the pull model!) > > This patch introduces a new opcode which performs a MOV from a > source using VxH indirect addressing (which allows each of the 8 > SIMD channels to select distinct data.) It also marks a whole > segment of the payload as "used", so the register allocator recognizes > the read and avoids reusing those registers. > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > --- > src/mesa/drivers/dri/i965/brw_defines.h | 11 ++++++++ > src/mesa/drivers/dri/i965/brw_fs.h | 4 +++ > src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 1 + > src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 32 > +++++++++++++++++++++++ > src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 10 +++++++ > src/mesa/drivers/dri/i965/brw_shader.cpp | 2 ++ > 6 files changed, 60 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h > b/src/mesa/drivers/dri/i965/brw_defines.h > index 6433cff..288d8b2 100644 > --- a/src/mesa/drivers/dri/i965/brw_defines.h > +++ b/src/mesa/drivers/dri/i965/brw_defines.h > @@ -1264,6 +1264,17 @@ enum opcode { > * Calculate the high 32-bits of a 32x32 multiply. > */ > SHADER_OPCODE_MULH, > + > + /** > + * A SIMD8 VxH indirect addressed MOV from the thread payload. > + * > + * This can be used to select GS or TCS input URB handles. > + * > + * Source 0: Immediate offset in bytes (UD immediate). > + * Source 1: Indirect offset in bytes (UD GRF). > + * Source 2: Number of registers that could be indirectly addressed. > + */ > + SHADER_OPCODE_INDIRECT_THREAD_PAYLOAD_MOV, > }; > > enum brw_urb_write_flags { > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h > b/src/mesa/drivers/dri/i965/brw_fs.h > index 8a93b56..fb70f0c 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.h > +++ b/src/mesa/drivers/dri/i965/brw_fs.h > @@ -526,6 +526,10 @@ private: > struct brw_reg offset, > struct brw_reg value); > > + void generate_indirect_thread_payload_mov(struct brw_reg dst, > + struct brw_reg imm_byte_offset, > + struct brw_reg > indirect_byte_offset); > + > bool patch_discard_jumps_to_fb_writes(); > > const struct brw_compiler *compiler; > diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp > b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp > index 3a28c8d..699baab 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp > @@ -78,6 +78,7 @@ is_expression(const fs_visitor *v, const fs_inst *const > inst) > case FS_OPCODE_LINTERP: > case SHADER_OPCODE_FIND_LIVE_CHANNEL: > case SHADER_OPCODE_BROADCAST: > + case SHADER_OPCODE_INDIRECT_THREAD_PAYLOAD_MOV: > return true; > case SHADER_OPCODE_RCP: > case SHADER_OPCODE_RSQ: > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > index e207a77..7d51c0e 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > @@ -368,6 +368,33 @@ fs_generator::generate_fb_write(fs_inst *inst, struct > brw_reg payload) > } > > void > +fs_generator::generate_indirect_thread_payload_mov(struct brw_reg dst, > + struct brw_reg > imm_byte_offset_reg, > + struct brw_reg > indirect_byte_offset) > +{ > + assert(imm_byte_offset_reg.type == BRW_REGISTER_TYPE_UD); > + assert(imm_byte_offset_reg.file == BRW_IMMEDIATE_VALUE); > + assert(indirect_byte_offset.type == BRW_REGISTER_TYPE_UD); > + assert(indirect_byte_offset.file == BRW_GENERAL_REGISTER_FILE); > + unsigned imm_byte_offset = imm_byte_offset_reg.dw1.ud; > + > + /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */ > + struct brw_reg addr = vec8(brw_address_reg(0)); > + > + /* The destination stride of an instruction (in bytes) must be greater > + * than or equal to the size of the rest of the instruction. Since the > + * address register is of type UW, we can't use a D-type instruction. > + * In order to get around this, re re-type to UW and use a stride. > + */ > + indirect_byte_offset = > + retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW); > + > + brw_MOV(p, addr, indirect_byte_offset); > + brw_inst_set_mask_control(devinfo, brw_last_inst, BRW_MASK_DISABLE); > + brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type)); > +} > + > +void > fs_generator::generate_urb_read(fs_inst *inst, > struct brw_reg dst, > struct brw_reg header) > @@ -2085,6 +2112,11 @@ fs_generator::generate_code(const cfg_t *cfg, int > dispatch_width) > fill_count++; > break; > > + case SHADER_OPCODE_INDIRECT_THREAD_PAYLOAD_MOV: > + assert(inst->exec_size == 8); > + generate_indirect_thread_payload_mov(dst, src[0], src[1]); > + break; > + > case SHADER_OPCODE_URB_READ_SIMD8: > generate_urb_read(inst, dst, src[0]); > break; > diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp > b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp > index 9251d95..648a0f8 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp > @@ -389,6 +389,16 @@ void fs_visitor::calculate_payload_ranges(int > payload_node_count, > case CS_OPCODE_CS_TERMINATE: > payload_last_use_ip[0] = use_ip; > break; > + case SHADER_OPCODE_INDIRECT_THREAD_PAYLOAD_MOV: { > + assert(inst->src[0].file == IMM && inst->src[2].file == IMM); > + int first_reg = inst->src[0].fixed_hw_reg.dw1.ud / REG_SIZE; > + int num_regs = inst->src[2].fixed_hw_reg.dw1.ud; > + > + for (int i = 0; i < num_regs; i++) { > + payload_last_use_ip[first_reg + i] = use_ip; > + } > + break; > + } > > default: > if (inst->eot) { > diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp > b/src/mesa/drivers/dri/i965/brw_shader.cpp > index 4ea297a..5e407e9 100644 > --- a/src/mesa/drivers/dri/i965/brw_shader.cpp > +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp > @@ -546,6 +546,8 @@ brw_instruction_name(enum opcode op) > return "barrier"; > case SHADER_OPCODE_MULH: > return "mulh"; > + case SHADER_OPCODE_INDIRECT_THREAD_PAYLOAD_MOV: > + return "indirect_thread_payload_mov"; > } > > unreachable("not reached"); > -- > 2.6.2 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev