https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109088
--- Comment #13 from JuzheZhong <juzhe.zhong at rivai dot ai> --- Hi, Richi. This is my draft approach to enhance the finding more potential condtional reduction. diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc index a8c915913ae..c25d2038f16 100644 --- a/gcc/tree-if-conv.cc +++ b/gcc/tree-if-conv.cc @@ -1790,8 +1790,72 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1, std::swap (r_op1, r_op2); std::swap (r_nop1, r_nop2); } - else if (r_nop1 != PHI_RESULT (header_phi)) - return false; + else if (r_nop1 == PHI_RESULT (header_phi)) + ; + else + { + /* Analyze the statement chain of STMT so that we could teach generate + better if-converison code sequence. We are trying to catch this + following situation: + + loop-header: + reduc_1 = PHI <..., reduc_2> + ... + if (...) + tmp1 = reduc_1 + rhs1; + tmp2 = tmp1 + rhs2; + tmp3 = tmp2 + rhs3; + ... + reduc_3 = tmpN-1 + rhsN-1; + + reduc_2 = PHI <reduc_1, reduc_3> + + and convert to + + reduc_2 = PHI <0, reduc_1> + tmp1 = rhs1 + rhs2; + tmp2 = tmp1 + rhs3; + tmp3 = tmp2 + rhs4; + ... + tmpN-1 = tmpN-2 + rhsN; + ifcvt = cond_expr ? tmpN-1 : 0 + reduc_1 = tmpN-1 +/- ifcvt; */ + if (num_imm_uses (PHI_RESULT (header_phi)) != 2) + return false; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, PHI_RESULT (header_phi)) + { + gimple *use_stmt = USE_STMT (use_p); + if (is_gimple_assign (use_stmt)) + { + if (gimple_assign_rhs_code (use_stmt) != reduction_op) + return false; + if (TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME) + return false; + + bool visited_p = false; + while (!visited_p) + { + use_operand_p use; + if (!single_imm_use (gimple_assign_lhs (use_stmt), &use, + &use_stmt) + || gimple_bb (use_stmt) != gimple_bb (stmt) + || !is_gimple_assign (use_stmt) + || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME + || gimple_assign_rhs_code (use_stmt) != reduction_op) + return false; + + if (gimple_assign_lhs (use_stmt) == gimple_assign_lhs (stmt)) + { + r_op2 = r_op1; + r_op1 = PHI_RESULT (header_phi); + visited_p = true; + } + } + } + else if (use_stmt != phi) + return false; + } + } My approach is doing the check as follows: tmp1 = reduc_1 + rhs1; tmp2 = tmp1 + rhs2; tmp3 = tmp2 + rhs3; ... reduc_3 = tmpN-1 + rhsN-1; Start the iteration check from "tmp1 = reduc_1 + rhs1;" until "reduc_3 = tmpN-1 + rhsN-1;" Make sure each statement are PLUS_EXPR for reduction sum. Does it look reasonable ? It succeed on vectorization.