On 2015-04-04 01:23:28, Kenneth Graunke wrote: > This allows SIMD16 mode to work for a lot more programs. Texturing is > also more efficient in SIMD16 mode than SIMD8. Several messages don't > actually exist in SIMD8 mode, so we did SIMD16 messages and threw away > half of the data. Now we compute real data in both halves. > > Also, the SIMD16 "sample" message doesn't require all three coordinate > components to exist (like the SIMD8 one), so we can shorten the message > lengths, cutting register usage a bit. > > I chose to implement the visitor functionality in a separate function, > since mixing true SIMD16 with SIMD8 code that uses SIMD16 fallbacks > seemed like a mess. The new code bails on a few cases where we'd > have to do two SIMD8 messages - we just fall back to SIMD8 for now. > > Improves performance in "Shadowrun: Dragonfall - Director's Cut" by > about 20% on GM45 (measured with LIBGL_SHOW_FPS=1 while standing around > in the first mission). > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > --- > src/mesa/drivers/dri/i965/brw_fs.h | 4 ++ > src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 28 ++++++++--- > src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 68 > +++++++++++++++++++++++++- > 3 files changed, 90 insertions(+), 10 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h > b/src/mesa/drivers/dri/i965/brw_fs.h > index 278a8ee..cfdbf55 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.h > +++ b/src/mesa/drivers/dri/i965/brw_fs.h > @@ -271,6 +271,10 @@ public: > fs_reg shadow_comp, > fs_reg lod, fs_reg lod2, int grad_components, > uint32_t sampler); > + fs_inst *emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst, > + fs_reg coordinate, int vector_elements, > + fs_reg shadow_c, fs_reg lod, > + uint32_t sampler); > fs_inst *emit_texture_gen5(ir_texture_opcode op, fs_reg dst, > fs_reg coordinate, int coord_components, > fs_reg shadow_comp, > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > index 40e51aa..2743297 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > @@ -622,16 +622,26 @@ fs_generator::generate_tex(fs_inst *inst, struct > brw_reg dst, struct brw_reg src > /* Note that G45 and older determines shadow compare and dispatch > width > * from message length for most messages. > */ > - assert(dispatch_width == 8); > - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; > - if (inst->shadow_compare) { > - assert(inst->mlen == 6); > - } else { > - assert(inst->mlen <= 4); > - } > + if (dispatch_width == 8) { > + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; > + if (inst->shadow_compare) { > + assert(inst->mlen == 6); > + } else { > + assert(inst->mlen <= 4); > + } > + } else { > + if (inst->shadow_compare) { > + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; > + assert(inst->mlen == 9); > + } else { > + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; > + assert(inst->mlen <= 7 && inst->mlen % 2 == 1); > + } > + } > break; > case FS_OPCODE_TXB: > if (inst->shadow_compare) { > + assert(dispatch_width == 8); > assert(inst->mlen == 6); > msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE; > } else { > @@ -642,6 +652,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg > dst, struct brw_reg src > break; > case SHADER_OPCODE_TXL: > if (inst->shadow_compare) { > + assert(dispatch_width == 8); > assert(inst->mlen == 6); > msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE; > } else { > @@ -652,11 +663,12 @@ fs_generator::generate_tex(fs_inst *inst, struct > brw_reg dst, struct brw_reg src > break; > case SHADER_OPCODE_TXD: > /* There is no sample_d_c message; comparisons are done manually */ > + assert(dispatch_width == 8); > assert(inst->mlen == 7 || inst->mlen == 10); > msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS; > break; > case SHADER_OPCODE_TXF: > - assert(inst->mlen == 9); > + assert(inst->mlen <= 9 && inst->mlen % 2 == 1); > msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD; > simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; > break; > diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > index 8c0ec33..25c424a 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > @@ -1435,8 +1435,6 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, > fs_reg dst, > bool simd16 = false; > fs_reg orig_dst; > > - no16("SIMD16 texturing on Gen4 not supported yet."); > - > /* g0 header. */ > mlen = 1; > > @@ -1588,6 +1586,69 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, > fs_reg dst, > return inst; > } > > +fs_inst * > +fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst, > + fs_reg coordinate, int vector_elements, > + fs_reg shadow_c, fs_reg lod, > + uint32_t sampler) > +{ > + fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F, dispatch_width); > + bool has_lod = op == ir_txl || op == ir_txb; > + > + if (has_lod && shadow_c.file != BAD_FILE) > + no16("TXB and TXL with shadow comparison unsupported in SIMD16."); > + > + if (op == ir_txd) > + no16("textureGrad unsupported in SIMD16."); > + > + /* Copy the coordinates. */ > + for (int i = 0; i < vector_elements; i++) { > + emit(MOV(retype(offset(message, i), coordinate.type), coordinate)); > + coordinate = offset(coordinate, 1); > + } > + > + fs_reg msg_end = offset(message, vector_elements); > + > + /* Messages other than sample and ld require all three components */ > + if (has_lod || shadow_c.file != BAD_FILE) { > + for (int i = vector_elements; i < 3; i++) { > + emit(MOV(offset(message, i), fs_reg(0.0f))); > + } > + } > + > + if (has_lod) { > + fs_reg msg_lod = retype(offset(message, 3), op == ir_txf ? > + BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_F);
From above: has_lod = op == ir_txl || op == ir_txb, so the op == ir_txf check here should always be false, right? Should has_lod also check for ir_txf? Otherwise, Series Reviewed-by: Jordan Justen <jordan.l.jus...@intel.com> > + emit(MOV(msg_lod, lod)); > + msg_end = offset(msg_lod, 1); > + } > + > + if (shadow_c.file != BAD_FILE) { > + fs_reg msg_ref = offset(message, 3 + has_lod); > + emit(MOV(msg_ref, shadow_c)); > + msg_end = offset(msg_ref, 1); > + } > + > + enum opcode opcode; > + switch (op) { > + case ir_tex: opcode = SHADER_OPCODE_TEX; break; > + case ir_txb: opcode = FS_OPCODE_TXB; break; > + case ir_txd: opcode = SHADER_OPCODE_TXD; break; > + case ir_txl: opcode = SHADER_OPCODE_TXL; break; > + case ir_txs: opcode = SHADER_OPCODE_TXS; break; > + case ir_txf: opcode = SHADER_OPCODE_TXF; break; > + default: unreachable("not reached"); > + } > + > + fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler)); > + inst->base_mrf = message.reg - 1; > + inst->mlen = msg_end.reg - inst->base_mrf; > + inst->header_present = true; > + inst->regs_written = 8; > + > + return inst; > +} > + > /* gen5's sampler has slots for u, v, r, array index, then optional > * parameters like shadow comparitor or LOD bias. If optional > * parameters aren't present, those base slots are optional and don't > @@ -2150,6 +2211,9 @@ fs_visitor::emit_texture(ir_texture_opcode op, > shadow_c, lod, lod2, grad_components, > sample_index, sampler, > offset_value.file != BAD_FILE); > + } else if (dispatch_width == 16) { > + inst = emit_texture_gen4_simd16(op, dst, coordinate, coord_components, > + shadow_c, lod, sampler); > } else { > inst = emit_texture_gen4(op, dst, coordinate, coord_components, > shadow_c, lod, lod2, grad_components, > -- > 2.1.2 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev