From: "Juan A. Suarez Romero" <jasua...@igalia.com> When splitting VEC4_OPCODE_FROM_DOUBLE in Ivybridge/Baytrail, the second part should use a temporal register, and then move the values to the second half of the original destination, so we get all the results in the same register.
v2: - Fix typos (Matt). --- src/mesa/drivers/dri/i965/brw_vec4.cpp | 17 +++++++++++++---- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 1 + 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 64b435f3ec4..adcde085305 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -2191,9 +2191,15 @@ vec4_visitor::lower_simd_width() linst->group = channel_offset; linst->size_written = size_written; + /* When splitting VEC4_OPCODE_FROM_DOUBLE on Ivybridge, the second part + * should use in a temporal register. Later we will move the values + * to the second half of the original destination, so we get all the + * results in the same register. We use d2f_pass to detect this case. + */ + bool d2f_pass = (inst->opcode == VEC4_OPCODE_FROM_DOUBLE && n > 0); /* Compute split dst region */ dst_reg dst; - if (needs_temp) { + if (needs_temp || d2f_pass) { unsigned num_regs = DIV_ROUND_UP(size_written, REG_SIZE); dst = retype(dst_reg(VGRF, alloc.allocate(num_regs)), inst->dst.type); @@ -2226,9 +2232,12 @@ vec4_visitor::lower_simd_width() /* If we used a temporary to store the result of the split * instruction, copy the result to the original destination */ - if (needs_temp) { - vec4_instruction *mov = - MOV(offset(inst->dst, lowered_width, n), src_reg(dst)); + if (needs_temp || d2f_pass) { + vec4_instruction *mov; + if (d2f_pass) + mov = MOV(horiz_offset(inst->dst, n * type_sz(inst->dst.type)), src_reg(dst)); + else + mov = MOV(offset(inst->dst, lowered_width, n), src_reg(dst)); mov->exec_size = lowered_width; mov->group = channel_offset; mov->size_written = size_written; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 7fa1afc9073..b570792badd 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1532,6 +1532,7 @@ generate_code(struct brw_codegen *p, is_ivb_df); assert(inst->group % 8 == 0 || + (inst->exec_size == 4 && inst->group % 4 == 0) || inst->dst.type == BRW_REGISTER_TYPE_DF || inst->src[0].type == BRW_REGISTER_TYPE_DF || inst->src[1].type == BRW_REGISTER_TYPE_DF || -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev