The fancy DW * DW = QW that was enabled earlier in the series for the fs does not work for the vec4 paths. vec4 paths use ALIGN16 mode, which is restricted from the extended precision granted by gen8.
This is based on a similar patch from Ken for the FS backend which was no longer needed after I wrote equivalent code using the QW type. Cc: Kenneth Graunke <kenn...@whitecape.org> Signed-off-by: Ben Widawsky <b...@bwidawsk.net> --- src/mesa/drivers/dri/i965/brw_defines.h | 3 ++- src/mesa/drivers/dri/i965/brw_eu.h | 21 +++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 21 --------------------- src/mesa/drivers/dri/i965/brw_shader.cpp | 2 ++ src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 12 ++++++++++++ src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 10 ++++++++-- 6 files changed, 45 insertions(+), 24 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 102ba4a..77e1008 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1101,7 +1101,8 @@ enum opcode { */ GS_OPCODE_FF_SYNC_SET_PRIMITIVES, - SHADER_OPCODE_MOV64, + SHADER_OPCODE_MOV64, /* Used by the fs backend*/ + SHADER_OPCODE_MUL64, /* Used by vec4 backend */ }; enum brw_urb_write_flags { diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 22d5a0a..1ca19bc 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -326,6 +326,27 @@ void brw_shader_time_add(struct brw_compile *p, uint32_t surf_index); /** + * Change the register's data type from UD to W, doubling the strides in order + * to compensate for halving the data type width. + */ +static inline struct brw_reg +ud_reg_to_w(struct brw_reg r) +{ + assert(r.type == BRW_REGISTER_TYPE_UD); + r.type = BRW_REGISTER_TYPE_W; + + /* The BRW_*_STRIDE enums are defined so that incrementing the field + * doubles the real stride. + */ + if (r.hstride != 0) + ++r.hstride; + if (r.vstride != 0) + ++r.vstride; + + return r; +} + +/** * Return the generation-specific jump distance scaling factor. * * Given the number of instructions to jump, we need to scale by diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 3a15837..15854b0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1375,27 +1375,6 @@ fs_generator::generate_set_sample_id(fs_inst *inst, brw_pop_insn_state(p); } -/** - * Change the register's data type from UD to W, doubling the strides in order - * to compensate for halving the data type width. - */ -static struct brw_reg -ud_reg_to_w(struct brw_reg r) -{ - assert(r.type == BRW_REGISTER_TYPE_UD); - r.type = BRW_REGISTER_TYPE_W; - - /* The BRW_*_STRIDE enums are defined so that incrementing the field - * doubles the real stride. - */ - if (r.hstride != 0) - ++r.hstride; - if (r.vstride != 0) - ++r.vstride; - - return r; -} - void fs_generator::generate_pack_half_2x16_split(fs_inst *inst, struct brw_reg dst, diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 5373b75..3891f43 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -404,6 +404,8 @@ brw_instruction_name(enum opcode op) case FS_OPCODE_REP_FB_WRITE: return "rep_fb_write"; + case SHADER_OPCODE_MUL64: + return "mov64"; case SHADER_OPCODE_MOV64: return "mov64"; case SHADER_OPCODE_RCP: diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index b88a579..8994a02 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1197,6 +1197,18 @@ vec4_generator::generate_code(const cfg_t *cfg) case BRW_OPCODE_MUL: brw_MUL(p, dst, src[0], src[1]); break; + + case SHADER_OPCODE_MUL64: { + assert(brw->gen >= 8); + struct brw_reg acc = retype(brw_acc_reg(8), dst.type); + struct brw_reg src1_w = + ud_reg_to_w(retype(src[1], BRW_REGISTER_TYPE_UD)); + if (src[1].type == BRW_REGISTER_TYPE_UD) + src1_w.type = BRW_REGISTER_TYPE_UW; + + brw_MUL(p, acc, src[0], src1_w); + break; + } case BRW_OPCODE_MACH: brw_MACH(p, dst, src[0], src[1]); break; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 09d79c8..8d52a89 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1492,13 +1492,19 @@ vec4_visitor::visit(ir_expression *ir) emit(MOV(result_dst, src_reg(acc))); } } else { + /* Gen8+ can natively multiply a DW * DW chopping off the upper bits of + * the operation. No MACH is needed + */ emit(MUL(result_dst, op[0], op[1])); } break; case ir_binop_imul_high: { struct brw_reg acc = retype(brw_acc_reg(8), result_dst.type); - - emit(MUL(acc, op[0], op[1])); + if (brw->gen >= 8) { + emit(SHADER_OPCODE_MUL64, acc, op[0], op[1]); + } else { + emit(MUL(acc, op[0], op[1])); + } emit(MACH(result_dst, op[0], op[1])); break; } -- 2.2.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev