Even though the hardware spec claims that any "integer DWord multiply" operation is affected by the regioning restrictions of CHV/BXT/GLK, this is inconsistent with the behavior of the simulator and with empirical evidence -- Return false from has_dst_aligned_region_restriction() for such instructions as a micro-optimization. --- src/intel/compiler/brw_ir_fs.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-)
diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h index 0a0ba1d363a..c50df45922a 100644 --- a/src/intel/compiler/brw_ir_fs.h +++ b/src/intel/compiler/brw_ir_fs.h @@ -543,11 +543,19 @@ has_dst_aligned_region_restriction(const gen_device_info *devinfo, const fs_inst *inst) { const brw_reg_type exec_type = get_exec_type(inst); - const bool is_int_multiply = !brw_reg_type_is_floating_point(exec_type) && - (inst->opcode == BRW_OPCODE_MUL || inst->opcode == BRW_OPCODE_MAD); + /* Even though the hardware spec claims that "integer DWord multiply" + * operations are restricted, empirical evidence and the behavior of the + * simulator suggest that only 32x32-bit integer multiplication is + * restricted. + */ + const bool is_dword_multiply = !brw_reg_type_is_floating_point(exec_type) && + ((inst->opcode == BRW_OPCODE_MUL && + MIN2(type_sz(inst->src[0].type), type_sz(inst->src[1].type)) >= 4) || + (inst->opcode == BRW_OPCODE_MAD && + MIN2(type_sz(inst->src[1].type), type_sz(inst->src[2].type)) >= 4)); if (type_sz(inst->dst.type) > 4 || type_sz(exec_type) > 4 || - (type_sz(exec_type) == 4 && is_int_multiply)) + (type_sz(exec_type) == 4 && is_dword_multiply)) return devinfo->is_cherryview || gen_device_info_is_9lp(devinfo); else return false; -- 2.19.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev