https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109088
--- Comment #15 from JuzheZhong <juzhe.zhong at rivai dot ai> ---
Hi,Richard.
Confirmed Robin's patch doesn't help with this issue.
The root cause of this issue is failed to recognize it as possible
vectorization of conditional reduction which means is_cond_scalar_reduction
is FALSE.
I have this following patch which bootstrap on X86 and regtest passed
also passed on aarch64. This following patch can enhance if-conv
conditional reduction recognition.
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index a8c915913ae..2bdd3710a65 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -1784,14 +1784,119 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc,
tree arg_0, tree arg_1,
r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2);
/* Make R_OP1 to hold reduction variable. */
+ gimple *nonphi_use_stmt = NULL;
if (r_nop2 == PHI_RESULT (header_phi)
&& commutative_tree_code (reduction_op))
{
std::swap (r_op1, r_op2);
std::swap (r_nop1, r_nop2);
}
- else if (r_nop1 != PHI_RESULT (header_phi))
- return false;
+ else if (r_nop1 == PHI_RESULT (header_phi))
+ ;
+ else
+ {
+ /* Analyze the statement chain of STMT so that we could teach generate
+ better if-converison code sequence. We are trying to catch this
+ following situation:
+
+ loop-header:
+ reduc_1 = PHI <..., reduc_2>
+ ...
+ if (...)
+ tmp1 = reduc_1 + rhs1;
+ tmp2 = tmp1 + rhs2;
+ tmp3 = tmp2 + rhs3;
+ ...
+ reduc_3 = tmpN-1 + rhsN-1;
+
+ reduc_2 = PHI <reduc_1, reduc_3>
+
+ and convert to
+
+ reduc_2 = PHI <0, reduc_1>
+ tmp1 = rhs1;
+ tmp2 = tmp1 + rhs2;
+ tmp3 = tmp2 + rhs3;
+ ...
+ reduc_3 = tmpN-1 + rhsN-1;
+ ifcvt = cond_expr ? reduc_3 : 0;
+ reduc_1 = reduc_1 +/- ifcvt; */
+ if (num_imm_uses (PHI_RESULT (header_phi)) != 2)
+ return false;
+ if (!ANY_INTEGRAL_TYPE_P (TREE_TYPE (PHI_RESULT (phi)))
+ && !(FLOAT_TYPE_P (TREE_TYPE (PHI_RESULT (phi)))
+ && !HONOR_SIGNED_ZEROS (TREE_TYPE (PHI_RESULT (phi)))
+ && !HONOR_SIGN_DEPENDENT_ROUNDING (TREE_TYPE (PHI_RESULT (phi)))
+ && !HONOR_NANS (TREE_TYPE (PHI_RESULT (phi)))))
+ return false;
+ gimple *prev_use_stmt, *curr_use_stmt;
+ use_operand_p use;
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, PHI_RESULT (header_phi))
+ {
+ prev_use_stmt = curr_use_stmt = USE_STMT (use_p);
+ if (is_gimple_assign (curr_use_stmt))
+ {
+ if (TREE_CODE (gimple_assign_lhs (curr_use_stmt)) != SSA_NAME)
+ return false;
+ if (*has_nop)
+ {
+ if (!CONVERT_EXPR_CODE_P (
+ gimple_assign_rhs_code (curr_use_stmt)))
+ return false;
+ }
+ else
+ {
+ if (gimple_assign_rhs_code (curr_use_stmt) != reduction_op)
+ return false;
+ }
+
+ bool visited_p = false;
+ nonphi_use_stmt = curr_use_stmt;
+ while (!visited_p)
+ {
+ if (!single_imm_use (gimple_assign_lhs (prev_use_stmt), &use,
+ &curr_use_stmt)
+ || gimple_bb (curr_use_stmt) != gimple_bb (stmt)
+ || !is_gimple_assign (curr_use_stmt)
+ || TREE_CODE (gimple_assign_lhs (curr_use_stmt))
+ != SSA_NAME
+ || gimple_assign_rhs_code (curr_use_stmt) !=
reduction_op)
+ return false;
+ if (curr_use_stmt == stmt)
+ {
+ if (*has_nop)
+ {
+ if (!single_imm_use (gimple_assign_lhs (
+ nonphi_use_stmt),
+ &use, &curr_use_stmt))
+ return false;
+ r_op1 = gimple_assign_lhs (nonphi_use_stmt);
+ r_nop1 = PHI_RESULT (header_phi);
+ nonphi_use_stmt = curr_use_stmt;
+ }
+ else
+ r_op1 = PHI_RESULT (header_phi);
+
+ if (*has_nop)
+ {
+ if (!single_imm_use (gimple_assign_lhs (stmt), &use,
+ &curr_use_stmt))
+ return false;
+ r_op2 = gimple_assign_lhs (stmt);
+ r_nop2 = gimple_assign_lhs (curr_use_stmt);
+ }
+ else
+ r_op2 = gimple_assign_lhs (stmt);
+ visited_p = true;
+ }
+ else
+ prev_use_stmt = curr_use_stmt;
+ }
+ }
+ else if (curr_use_stmt != phi)
+ return false;
+ }
+ }
if (*has_nop)
{
@@ -1816,12 +1921,41 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc,
tree arg_0, tree arg_1,
continue;
if (use_stmt == stmt)
continue;
+ if (use_stmt == nonphi_use_stmt)
+ continue;
if (gimple_code (use_stmt) != GIMPLE_PHI)
return false;
}
*op0 = r_op1; *op1 = r_op2;
*reduc = stmt;
+
+ if (nonphi_use_stmt)
+ {
+ /* Transform:
+
+ if (...)
+ tmp1 = reduc_1 + rhs1;
+ tmp2 = tmp1 + rhs2;
+ tmp3 = tmp2 + rhs3;
+
+ into:
+
+ tmp1 = rhs1 + 0; ---> We replace reduc_1 into '0'
+ tmp2 = tmp1 + rhs2;
+ tmp3 = tmp2 + rhs3;
+ ...
+ reduc_3 = tmpN-1 + rhsN-1;
+ ifcvt = cond_expr ? reduc_3 : 0; */
+ gcc_assert (gimple_assign_rhs_code (nonphi_use_stmt) == reduction_op);
+ if (gimple_assign_rhs1 (nonphi_use_stmt) == r_op1)
+ gimple_assign_set_rhs1 (nonphi_use_stmt,
+ build_zero_cst (TREE_TYPE (r_op1)));
+ else if (gimple_assign_rhs2 (nonphi_use_stmt) == r_op1)
+ gimple_assign_set_rhs2 (nonphi_use_stmt,
+ build_zero_cst (TREE_TYPE (r_op1)));
+ update_stmt (nonphi_use_stmt);
+ }
return true;
}
@@ -1886,12 +2020,17 @@ convert_scalar_cond_reduction (gimple *reduc,
gimple_stmt_iterator *gsi,
gsi_remove (&stmt_it, true);
release_defs (nop_reduc);
}
+
gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
/* Delete original reduction stmt. */
- stmt_it = gsi_for_stmt (reduc);
- gsi_remove (&stmt_it, true);
- release_defs (reduc);
+ if (op1 != gimple_assign_lhs (reduc))
+ {
+ stmt_it = gsi_for_stmt (reduc);
+ gsi_remove (&stmt_it, true);
+ release_defs (reduc);
+ }
+
return rhs;
}
I have fully tested it with different kinds of condtional reduction.
All can be vectorized.
I am not sure whether it is a feasible solution.