We need to split DF instructions in two on IVB/BYT as it needs an execsize 8 to process 4 DF values (one GRF in total).
v2: - Rename helper and make it static inline function (Matt). - Fix indention and add braces (Matt). v3: - Don't edit IR instruction when doubling exec_size (Curro) - Add comment into the code (Curro). - Manage ARF registers like the others (Curro) Signed-off-by: Samuel Iglesias Gonsálvez <sigles...@igalia.com> --- src/intel/compiler/brw_ir_vec4.h | 14 ++++++++++++++ src/intel/compiler/brw_vec4.cpp | 10 ++++++++++ src/intel/compiler/brw_vec4_generator.cpp | 17 +++++++++++++++-- 3 files changed, 39 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_ir_vec4.h b/src/intel/compiler/brw_ir_vec4.h index bd026eb2aeb..6130950c72f 100644 --- a/src/intel/compiler/brw_ir_vec4.h +++ b/src/intel/compiler/brw_ir_vec4.h @@ -404,6 +404,20 @@ regs_read(const vec4_instruction *inst, unsigned i) reg_size); } +static inline unsigned +get_exec_type_size(const vec4_instruction *inst) +{ + unsigned exec_type_size = 0; + + for (int i = 0; i < 3; i++) { + if (inst->src[i].type != BAD_FILE) { + exec_type_size = MAX2(exec_type_size, type_sz(inst->src[i].type)); + } + } + + return exec_type_size; +} + } /* namespace brw */ #endif diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index d7c09093032..a213bbb4707 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -2093,6 +2093,16 @@ get_lowered_simd_width(const struct gen_device_info *devinfo, if (inst->opcode == BRW_OPCODE_SEL && type_sz(inst->dst.type) == 8) lowered_width = MIN2(lowered_width, 4); + /* IvyBridge can manage a maximum of 4 DFs per SIMD4x2 instruction, + * as we need to duplicate the instruction's exec_size at generation time + * to work with doubles. No matter if it has force_writemask_all enabled + * or disabled (the latter is affected by compressed instruction bug in + * gen7, which is another reason to force this limit). + */ + if (devinfo->gen == 7 && !devinfo->is_haswell && + (get_exec_type_size(inst) == 8 || type_sz(inst->dst.type) == 8)) + lowered_width = MIN2(lowered_width, 4); + /* HSW PRM, 3D Media GPGPU Engine, Region Alignment Rules for Direct * Register Addressing: * diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp index d3192ab7db3..0d70690b057 100644 --- a/src/intel/compiler/brw_vec4_generator.cpp +++ b/src/intel/compiler/brw_vec4_generator.cpp @@ -1522,14 +1522,27 @@ generate_code(struct brw_codegen *p, brw_set_default_saturate(p, inst->saturate); brw_set_default_mask_control(p, inst->force_writemask_all); brw_set_default_acc_write_control(p, inst->writes_accumulator); - brw_set_default_exec_size(p, cvt(inst->exec_size) - 1); - assert(inst->group % inst->exec_size == 0); + bool is_ivb_df = devinfo->gen == 7 && + !devinfo->is_haswell && + (get_exec_type_size(inst) == 8 || + inst->dst.type == BRW_REGISTER_TYPE_DF); + + assert(inst->group % inst->exec_size == 0 || + is_ivb_df); + assert(inst->group % 8 == 0 || inst->dst.type == BRW_REGISTER_TYPE_DF || inst->src[0].type == BRW_REGISTER_TYPE_DF || inst->src[1].type == BRW_REGISTER_TYPE_DF || inst->src[2].type == BRW_REGISTER_TYPE_DF); + + unsigned exec_size = inst->exec_size; + if (is_ivb_df && inst->exec_size < 8) + exec_size *= 2; + + brw_set_default_exec_size(p, cvt(exec_size) - 1); + if (!inst->force_writemask_all) brw_set_default_group(p, inst->group); -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev