On Thursday, November 12, 2015 07:27:31 PM Jason Ekstrand wrote: > On Wed, Nov 11, 2015 at 12:02 PM, Kenneth Graunke <kenn...@whitecape.org> > wrote: > > The geometry and tessellation control shader stages both read from > > multiple URB entries (one per vertex). The thread payload contains > > several URB handles which reference these separate memory segments. > > > > In GLSL, these inputs are represented as per-vertex arrays; the > > outermost array index selects which vertex's inputs to read. This > > array index does not necessarily need to be constant. > > > > To handle that, we need to use indirect addressing on GRFs to select > > which of the thread payload registers has the appropriate URB handle. > > (This is before we can even think about applying the pull model!) > > > > This patch introduces a new opcode which performs a MOV from a > > source using VxH indirect addressing (which allows each of the 8 > > SIMD channels to select distinct data.) > > > > Based on a patch by Jason Ekstrand. > > > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > > --- > > src/mesa/drivers/dri/i965/brw_defines.h | 9 +++++++ > > src/mesa/drivers/dri/i965/brw_fs.cpp | 24 ++++++++++++++++++ > > src/mesa/drivers/dri/i965/brw_fs.h | 5 ++++ > > src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 1 + > > src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 34 > > ++++++++++++++++++++++++++ > > src/mesa/drivers/dri/i965/brw_shader.cpp | 2 ++ > > 6 files changed, 75 insertions(+) > > > > Jason, > > > > I omitted a few things from your patch: > > > > - UNIFORM handling in regs_read() - didn't want to ship it without testing > > it > > So you'll need to add this back in when you start using it. > > - Gen7 base_offset munging - on IRC you suggested dropping imm_offset, so > > it > > didn't appear to actually do anything. Feel free to put it back in if > > you > > need it, or switch your code to not use base_offset. > > > > But we're using regs_read() now rather than RA hackery, and using a register > > with a HW_REG file instead of an IMM offset, so that should be a lot more > > reusable. > > > > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h > > b/src/mesa/drivers/dri/i965/brw_defines.h > > index 99a3a2d..6e1cfed 100644 > > --- a/src/mesa/drivers/dri/i965/brw_defines.h > > +++ b/src/mesa/drivers/dri/i965/brw_defines.h > > @@ -1268,6 +1268,15 @@ enum opcode { > > * Calculate the high 32-bits of a 32x32 multiply. > > */ > > SHADER_OPCODE_MULH, > > + > > + /** > > + * A MOV that uses VxH indirect addressing. > > + * > > + * Source 0: A register to start from (HW_REG). > > + * Source 1: An indirect offset (in bytes, UD GRF). > > + * Source 2: The maximum value of the indirect offset (in bytes, UD > > IMM). > > + */ > > + SHADER_OPCODE_MOV_INDIRECT, > > }; > > > > enum brw_urb_write_flags { > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > > b/src/mesa/drivers/dri/i965/brw_fs.cpp > > index be712e5..bf8a4a6 100644 > > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > > @@ -841,6 +841,30 @@ fs_inst::regs_read(int arg) const > > case SHADER_OPCODE_BARRIER: > > return 1; > > > > + case SHADER_OPCODE_MOV_INDIRECT: > > + if (arg == 0) { > > + assert(src[2].file == IMM); > > + unsigned max_indirect = src[2].fixed_hw_reg.dw1.ud; > > + > > + if (src[0].file == HW_REG) { > > + assert(src[0].file == HW_REG); > > + > > + unsigned regs = DIV_ROUND_UP(max_indirect, REG_SIZE); > > + > > + /* If the start of the region is not register aligned, then > > + * we only read part of the register at the beginning, but > > + * overlap into another register at the end. So add 1. > > + */ > > + if (src[0].fixed_hw_reg.subnr) > > + regs++; > > This isn't quite correct. It's actually more complicated based on the > register stride, the actual subnr, and the actual (not rounded) size > of max_indirect. However, this will give you enough registers and, > since we're dealing with HW_REG, that's good enough.
Was your code more correct? I'm happy to change it to be more accurate. I suppose this could be pretty off if the strides/types are small. Do you have an example that's wrong? > > + > > + return regs; > > + } else { > > + assert(!"Invalid register file"); > > + } > > + } > > + break; > > + > > default: > > if (is_tex() && arg == 0 && src[0].file == GRF) > > return mlen; > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h > > b/src/mesa/drivers/dri/i965/brw_fs.h > > index 8a93b56..c599cde 100644 > > --- a/src/mesa/drivers/dri/i965/brw_fs.h > > +++ b/src/mesa/drivers/dri/i965/brw_fs.h > > @@ -526,6 +526,11 @@ private: > > struct brw_reg offset, > > struct brw_reg value); > > > > + void generate_mov_indirect(fs_inst *inst, > > + struct brw_reg dst, > > + struct brw_reg reg, > > + struct brw_reg indirect_byte_offset); > > + > > bool patch_discard_jumps_to_fb_writes(); > > > > const struct brw_compiler *compiler; > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp > > b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp > > index 3a28c8d..ffb5f87 100644 > > --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp > > +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp > > @@ -78,6 +78,7 @@ is_expression(const fs_visitor *v, const fs_inst *const > > inst) > > case FS_OPCODE_LINTERP: > > case SHADER_OPCODE_FIND_LIVE_CHANNEL: > > case SHADER_OPCODE_BROADCAST: > > + case SHADER_OPCODE_MOV_INDIRECT: > > return true; > > case SHADER_OPCODE_RCP: > > case SHADER_OPCODE_RSQ: > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > > b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > > index 974219f..97a85bb 100644 > > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > > @@ -368,6 +368,36 @@ fs_generator::generate_fb_write(fs_inst *inst, struct > > brw_reg payload) > > } > > > > void > > +fs_generator::generate_mov_indirect(fs_inst *inst, > > + struct brw_reg dst, > > + struct brw_reg reg, > > + struct brw_reg indirect_byte_offset) > > +{ > > + assert(indirect_byte_offset.type == BRW_REGISTER_TYPE_UD); > > + assert(indirect_byte_offset.file == BRW_GENERAL_REGISTER_FILE); > > + > > + unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr; > > + > > + /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */ > > + struct brw_reg addr = vec8(brw_address_reg(0)); > > + > > + /* The destination stride of an instruction (in bytes) must be greater > > + * than or equal to the size of the rest of the instruction. Since the > > + * address register is of type UW, we can't use a D-type instruction. > > + * In order to get around this, re re-type to UW and use a stride. > > + */ > > + indirect_byte_offset = > > + retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW); > > + > > + /* Prior to Broadwell, there are only 8 address registers. */ > > + assert(inst->exec_size == 8 || devinfo->gen >= 8); > > + > > + brw_MOV(p, addr, indirect_byte_offset); > > + brw_inst_set_mask_control(devinfo, brw_last_inst, BRW_MASK_DISABLE); > > Why are we disabling the mask? I suppose it's not required, as none of the unset values will actually be used by the next instruction. I can drop that. > > + brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type)); > > +} > > + > > +void > > fs_generator::generate_urb_read(fs_inst *inst, > > struct brw_reg dst, > > struct brw_reg header) > > @@ -2070,6 +2100,10 @@ fs_generator::generate_code(const cfg_t *cfg, int > > dispatch_width) > > fill_count++; > > break; > > > > + case SHADER_OPCODE_MOV_INDIRECT: > > + generate_mov_indirect(inst, dst, src[0], src[1]); > > + break; > > + > > case SHADER_OPCODE_URB_READ_SIMD8: > > generate_urb_read(inst, dst, src[0]); > > break; > > diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp > > b/src/mesa/drivers/dri/i965/brw_shader.cpp > > index a6b57c3..9550a62 100644 > > --- a/src/mesa/drivers/dri/i965/brw_shader.cpp > > +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp > > @@ -552,6 +552,8 @@ brw_instruction_name(enum opcode op) > > return "barrier"; > > case SHADER_OPCODE_MULH: > > return "mulh"; > > + case SHADER_OPCODE_MOV_INDIRECT: > > + return "mov_indirect"; > > } > > > > unreachable("not reached"); > > -- > > 2.6.2 > > > > _______________________________________________ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
signature.asc
Description: This is a digitally signed message part.
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev