From: "Juan A. Suarez Romero" <jasua...@igalia.com> When spliting double_to_single() in Ivybridge/Valleyview, the second part should use a temporal register, and then move the values to the second half of the original destiny, so we get all the results in the same register. --- src/mesa/drivers/dri/i965/brw_vec4.cpp | 17 +++++++++++++---- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 1 + 2 files changed, 14 insertions(+), 4 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index f533207..afabc22 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -2199,9 +2199,15 @@ vec4_visitor::lower_simd_width() linst->group = channel_offset; linst->size_written = size_written; + /* When spliting double_to_single() in Ivybridge, the second part + * should use in a temporal register. Later we will move the values + * to the second half of the original destination, so we get all the + * results in the same register. We use d2f_pass to detect this case. + */ + bool d2f_pass = (inst->opcode == VEC4_OPCODE_FROM_DOUBLE && n > 0); /* Compute split dst region */ dst_reg dst; - if (needs_temp) { + if (needs_temp || d2f_pass) { unsigned num_regs = DIV_ROUND_UP(size_written, REG_SIZE); dst = retype(dst_reg(VGRF, alloc.allocate(num_regs)), inst->dst.type); @@ -2234,9 +2240,12 @@ vec4_visitor::lower_simd_width() /* If we used a temporary to store the result of the split * instruction, copy the result to the original destination */ - if (needs_temp) { - vec4_instruction *mov = - MOV(offset(inst->dst, lowered_width, n), src_reg(dst)); + if (needs_temp || d2f_pass) { + vec4_instruction *mov; + if (d2f_pass) + mov = MOV(horiz_offset(inst->dst, n * type_sz(inst->dst.type)), src_reg(dst)); + else + mov = MOV(offset(inst->dst, lowered_width, n), src_reg(dst)); mov->exec_size = lowered_width; mov->group = channel_offset; mov->size_written = size_written; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index a68e14c..9409e9d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1521,6 +1521,7 @@ generate_code(struct brw_codegen *p, is_ivb_df); assert(inst->group % 8 == 0 || + (inst->exec_size == 4 && inst->group % 4 == 0) || inst->dst.type == BRW_REGISTER_TYPE_DF || inst->src[0].type == BRW_REGISTER_TYPE_DF || inst->src[1].type == BRW_REGISTER_TYPE_DF || -- 2.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev