From: "Juan A. Suarez Romero" <jasua...@igalia.com> When splitting a CMP/MOV instruction with NULL dest, DF sources, and conditional modifier; we can't use directly the flag registers, as they will have the wrong results in IVB/VLV after the scalarization.
Rather, we need to store the result in a temporary register, and then use that register to set proper the flags values. If a MOV has a null destination register and a conditional modifier, it can be replaced with a CMP against zero with the same conditional modifier. By doing this replacement, we can do the SIMD lowering without any problem. Signed-off-by: Samuel Iglesias Gonsálvez <sigles...@igalia.com> Signed-off-by: Juan A. Suarez Romero <jasua...@igalia.com --- src/mesa/drivers/dri/i965/brw_vec4.cpp | 80 +++++++++++++++++++++++++++++++--- 1 file changed, 74 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index c654c8b..74ab866 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -2190,6 +2190,46 @@ vec4_visitor::lower_simd_width() * value of the instruction's dst. */ bool needs_temp = dst_src_regions_overlap(inst); + + /* When splitting instructions with conditional modifiers and NULL + * dest we can't rely directly on the flags to store the result. Rather, + * we need first to enqueue the result in a temporary register, and then + * move those values into flags. + */ + bool inst_df_dst_null = + inst->dst.is_null() && inst->exec_data_size() == 8 && + inst->conditional_mod != BRW_CONDITIONAL_NONE; + + if (inst_df_dst_null) { + /* If there are other DF instructions with NULL destination, + * we need to verify if we can use the temporary register or + * if we need an extra lowering step. + */ + assert(inst->opcode == BRW_OPCODE_MOV || + inst->opcode == BRW_OPCODE_CMP); + + /* Replace MOV.XX with null destination with the equivalente CMP.XX + * with null destination, so we can lower it as explained before. + */ + if (inst->opcode == BRW_OPCODE_MOV) { + vec4_instruction *cmp = + new(mem_ctx) vec4_instruction(BRW_OPCODE_CMP, dst_null_df(), + inst->src[0], + setup_imm_df(0.0, block, inst)); + cmp->conditional_mod = inst->conditional_mod; + cmp->exec_size = inst->exec_size; + cmp->group = inst->group; + cmp->size_written = inst->size_written; + inst->insert_before(block, cmp); + inst->remove(block); + inst = cmp; + } + } + dst_reg inst_dst; + if (inst_df_dst_null) + inst_dst = + retype(dst_reg(VGRF, alloc.allocate(1)), BRW_REGISTER_TYPE_F); + for (unsigned n = 0; n < inst->exec_size / lowered_width; n++) { unsigned channel_offset = lowered_width * n; @@ -2212,7 +2252,7 @@ vec4_visitor::lower_simd_width() bool d2f_pass = (inst->opcode == VEC4_OPCODE_FROM_DOUBLE && n > 0); /* Compute split dst region */ dst_reg dst; - if (needs_temp || d2f_pass) { + if (needs_temp || d2f_pass || inst_df_dst_null) { unsigned num_regs = DIV_ROUND_UP(size_written, REG_SIZE); dst = retype(dst_reg(VGRF, alloc.allocate(num_regs)), inst->dst.type); @@ -2242,24 +2282,52 @@ vec4_visitor::lower_simd_width() inst->insert_before(block, linst); + dst_reg d2f_dst; + if (inst_df_dst_null) { + unsigned num_regs = DIV_ROUND_UP(lowered_width, type_sz(BRW_REGISTER_TYPE_F)); + d2f_dst = retype(dst_reg(VGRF, alloc.allocate(num_regs)), BRW_REGISTER_TYPE_F); + vec4_instruction *d2f = new(mem_ctx) vec4_instruction(VEC4_OPCODE_FROM_DOUBLE, d2f_dst, src_reg(dst)); + d2f->group = channel_offset; + d2f->exec_size = lowered_width; + d2f->size_written = lowered_width * type_sz(d2f_dst.type); + d2f->predicate = inst->predicate; + inst->insert_before(block, d2f); + } + /* If we used a temporary to store the result of the split * instruction, copy the result to the original destination */ - if (needs_temp || d2f_pass) { + if (needs_temp || d2f_pass || inst_df_dst_null) { vec4_instruction *mov; - if (d2f_pass) + if (d2f_pass) { mov = MOV(horiz_offset(inst->dst, n * type_sz(inst->dst.type)), src_reg(dst)); - else + mov->size_written = size_written; + } else if (inst_df_dst_null) { + mov = MOV(horiz_offset(inst_dst, n * 4), src_reg(d2f_dst)); + mov->size_written = lowered_width * type_sz(inst_dst.type); + } else { mov = MOV(offset(inst->dst, lowered_width, n), src_reg(dst)); + mov->size_written = size_written; + } mov->exec_size = lowered_width; mov->group = channel_offset; - mov->size_written = size_written; mov->predicate = inst->predicate; inst->insert_before(block, mov); } } - inst->remove(block); + /* For CMP.XX instruction, we need to set the flags correctly. We do + * this by comparing the register that has the results against 0.0, + * getting the values in the flags. + */ + if (inst_df_dst_null) { + inst->dst.type = BRW_REGISTER_TYPE_F; + inst->src[0] = src_reg(inst_dst); + inst->src[1] = brw_imm_f(0.0f); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + } else { + inst->remove(block); + } progress = true; } -- 2.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev