We need to split DF instructions in two on IVB/VLV as it needs an execsize 8 to process 4 DF values (one GRF in total).
Signed-off-by: Samuel Iglesias Gonsálvez <sigles...@igalia.com> --- src/mesa/drivers/dri/i965/brw_ir_vec4.h | 1 + src/mesa/drivers/dri/i965/brw_vec4.cpp | 20 +++++++++++++++++++- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 15 +++++++++++++-- 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h index 57fc6be..9291be0 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h @@ -291,6 +291,7 @@ public: bool can_do_writemask(const struct gen_device_info *devinfo); bool can_change_types() const; bool has_source_and_destination_hazard() const; + unsigned exec_data_size() const; bool is_align1_partial_write() { diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index b5e846d..6ba85d7 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -301,6 +301,19 @@ vec4_instruction::can_change_types() const !src[1].abs && !src[1].negate)); } +unsigned +vec4_instruction::exec_data_size() const +{ + unsigned exec_data_size = 0; + + for (int i = 0; i < 3; i++) { + if (this->src[i].type != BAD_FILE) + exec_data_size = MAX2(exec_data_size, type_sz(this->src[i].type)); + } + + return exec_data_size; +} + /** * Returns how many MRFs an opcode will write over. * @@ -2087,6 +2100,10 @@ get_lowered_simd_width(const struct gen_device_info *devinfo, if (inst->opcode == BRW_OPCODE_SEL && type_sz(inst->dst.type) == 8) lowered_width = MIN2(lowered_width, 4); + if (devinfo->gen == 7 && !devinfo->is_haswell && + (inst->exec_data_size() == 8 || type_sz(inst->dst.type) == 8)) + lowered_width = MIN2(lowered_width, 4); + /* HSW PRM, 3D Media GPGPU Engine, Region Alignment Rules for Direct * Register Addressing: * @@ -2194,7 +2211,8 @@ vec4_visitor::lower_simd_width() inst->insert_before(block, copy); } } else { - dst = horiz_offset(inst->dst, channel_offset); + if (inst->dst.file != ARF) + dst = horiz_offset(inst->dst, channel_offset); } linst->dst = dst; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 3d688cf..0eaa91b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1511,14 +1511,25 @@ generate_code(struct brw_codegen *p, brw_set_default_saturate(p, inst->saturate); brw_set_default_mask_control(p, inst->force_writemask_all); brw_set_default_acc_write_control(p, inst->writes_accumulator); - brw_set_default_exec_size(p, cvt(inst->exec_size) - 1); - assert(inst->group % inst->exec_size == 0); + bool is_ivb_df = devinfo->gen == 7 && + !devinfo->is_haswell && + (inst->exec_data_size() == 8 || + inst->dst.type == BRW_REGISTER_TYPE_DF); + + assert(inst->group % inst->exec_size == 0 || + is_ivb_df); + assert(inst->group % 8 == 0 || inst->dst.type == BRW_REGISTER_TYPE_DF || inst->src[0].type == BRW_REGISTER_TYPE_DF || inst->src[1].type == BRW_REGISTER_TYPE_DF || inst->src[2].type == BRW_REGISTER_TYPE_DF); + + if (is_ivb_df && inst->exec_size < 8) + inst->exec_size *= 2; + brw_set_default_exec_size(p, cvt(inst->exec_size) - 1); + if (!inst->force_writemask_all) brw_set_default_group(p, inst->group); -- 2.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev