On Mon, Dec 22, 2014 at 7:29 PM, Ben Widawsky <benjamin.widaw...@intel.com> wrote: > The fancy DW * DW = QW that was enabled earlier in the series for the fs does > not work for the vec4 paths. vec4 paths use ALIGN16 mode, which is restricted > from the extended precision granted by gen8. > > This is based on a similar patch from Ken for the FS backend which was no > longer > needed after I wrote equivalent code using the QW type. > > Cc: Kenneth Graunke <kenn...@whitecape.org> > Signed-off-by: Ben Widawsky <b...@bwidawsk.net> > --- > src/mesa/drivers/dri/i965/brw_defines.h | 3 ++- > src/mesa/drivers/dri/i965/brw_eu.h | 21 +++++++++++++++++++++ > src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 21 --------------------- > src/mesa/drivers/dri/i965/brw_shader.cpp | 2 ++ > src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 12 ++++++++++++ > src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 10 ++++++++-- > 6 files changed, 45 insertions(+), 24 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h > b/src/mesa/drivers/dri/i965/brw_defines.h > index 102ba4a..77e1008 100644 > --- a/src/mesa/drivers/dri/i965/brw_defines.h > +++ b/src/mesa/drivers/dri/i965/brw_defines.h > @@ -1101,7 +1101,8 @@ enum opcode { > */ > GS_OPCODE_FF_SYNC_SET_PRIMITIVES, > > - SHADER_OPCODE_MOV64, > + SHADER_OPCODE_MOV64, /* Used by the fs backend*/
Space before */ > + SHADER_OPCODE_MUL64, /* Used by vec4 backend */ > }; > > enum brw_urb_write_flags { > diff --git a/src/mesa/drivers/dri/i965/brw_eu.h > b/src/mesa/drivers/dri/i965/brw_eu.h > index 22d5a0a..1ca19bc 100644 > --- a/src/mesa/drivers/dri/i965/brw_eu.h > +++ b/src/mesa/drivers/dri/i965/brw_eu.h > @@ -326,6 +326,27 @@ void brw_shader_time_add(struct brw_compile *p, > uint32_t surf_index); > > /** > + * Change the register's data type from UD to W, doubling the strides in > order > + * to compensate for halving the data type width. > + */ > +static inline struct brw_reg > +ud_reg_to_w(struct brw_reg r) > +{ > + assert(r.type == BRW_REGISTER_TYPE_UD); > + r.type = BRW_REGISTER_TYPE_W; > + > + /* The BRW_*_STRIDE enums are defined so that incrementing the field > + * doubles the real stride. > + */ > + if (r.hstride != 0) > + ++r.hstride; > + if (r.vstride != 0) > + ++r.vstride; > + > + return r; > +} > + > +/** > * Return the generation-specific jump distance scaling factor. > * > * Given the number of instructions to jump, we need to scale by > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > index 3a15837..15854b0 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > @@ -1375,27 +1375,6 @@ fs_generator::generate_set_sample_id(fs_inst *inst, > brw_pop_insn_state(p); > } > > -/** > - * Change the register's data type from UD to W, doubling the strides in > order > - * to compensate for halving the data type width. > - */ > -static struct brw_reg > -ud_reg_to_w(struct brw_reg r) > -{ > - assert(r.type == BRW_REGISTER_TYPE_UD); > - r.type = BRW_REGISTER_TYPE_W; > - > - /* The BRW_*_STRIDE enums are defined so that incrementing the field > - * doubles the real stride. > - */ > - if (r.hstride != 0) > - ++r.hstride; > - if (r.vstride != 0) > - ++r.vstride; > - > - return r; > -} > - > void > fs_generator::generate_pack_half_2x16_split(fs_inst *inst, > struct brw_reg dst, > diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp > b/src/mesa/drivers/dri/i965/brw_shader.cpp > index 5373b75..3891f43 100644 > --- a/src/mesa/drivers/dri/i965/brw_shader.cpp > +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp > @@ -404,6 +404,8 @@ brw_instruction_name(enum opcode op) > case FS_OPCODE_REP_FB_WRITE: > return "rep_fb_write"; > > + case SHADER_OPCODE_MUL64: > + return "mov64"; "mul64" > case SHADER_OPCODE_MOV64: > return "mov64"; > case SHADER_OPCODE_RCP: > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > index b88a579..8994a02 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > @@ -1197,6 +1197,18 @@ vec4_generator::generate_code(const cfg_t *cfg) > case BRW_OPCODE_MUL: > brw_MUL(p, dst, src[0], src[1]); > break; > + > + case SHADER_OPCODE_MUL64: { > + assert(brw->gen >= 8); > + struct brw_reg acc = retype(brw_acc_reg(8), dst.type); > + struct brw_reg src1_w = > + ud_reg_to_w(retype(src[1], BRW_REGISTER_TYPE_UD)); > + if (src[1].type == BRW_REGISTER_TYPE_UD) > + src1_w.type = BRW_REGISTER_TYPE_UW; > + > + brw_MUL(p, acc, src[0], src1_w); > + break; > + } > case BRW_OPCODE_MACH: > brw_MACH(p, dst, src[0], src[1]); > break; > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > index 09d79c8..8d52a89 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > @@ -1492,13 +1492,19 @@ vec4_visitor::visit(ir_expression *ir) > emit(MOV(result_dst, src_reg(acc))); > } > } else { > + /* Gen8+ can natively multiply a DW * DW chopping off the upper > bits of > + * the operation. No MACH is needed > + */ This hunk is supposed to be in some other patch. > emit(MUL(result_dst, op[0], op[1])); > } > break; > case ir_binop_imul_high: { > struct brw_reg acc = retype(brw_acc_reg(8), result_dst.type); > - > - emit(MUL(acc, op[0], op[1])); > + if (brw->gen >= 8) { > + emit(SHADER_OPCODE_MUL64, acc, op[0], op[1]); I don't think we still want to be using the accumulator? Or, we don't want to be using MUL64 here? The MUL+MACH macro only works when the MUL does a 16x32 multiply, and we're doing a 32x32 multiply here. > + } else { > + emit(MUL(acc, op[0], op[1])); > + } > emit(MACH(result_dst, op[0], op[1])); > break; > } > -- > 2.2.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev