Richard Biener <rguent...@suse.de> writes: > It shouldn't be neccessary.
SVE is the counter-example :-) But the fix is simpler than the code you removed, so it's still a net win. Fixes various vect.exp and aarch64-sve.exp ICEs. OK if it passes proper testing? Thanks, Richard 2019-09-19 Richard Sandiford <richard.sandif...@arm.com> gcc/ * tree-vectorizer.h (vectorizable_condition): Take an int reduction index instead of a boolean flag. * tree-vect-stmts.c (vectorizable_condition): Likewise. Swap the "then" and "else" values for EXTRACT_LAST_REDUCTION reductions if the reduction accumulator is the "then" rather than the "else" value. (vect_analyze_stmt): Update call accordingly. (vect_transform_stmt): Likewise. * tree-vect-loop.c (vectorizable_reduction): Likewise, asserting that the index is > 0. Index: gcc/tree-vectorizer.h =================================================================== --- gcc/tree-vectorizer.h 2019-09-05 08:49:30.717740417 +0100 +++ gcc/tree-vectorizer.h 2019-09-19 08:43:53.965866883 +0100 @@ -1541,7 +1541,7 @@ extern void vect_remove_stores (stmt_vec extern opt_result vect_analyze_stmt (stmt_vec_info, bool *, slp_tree, slp_instance, stmt_vector_for_cost *); extern bool vectorizable_condition (stmt_vec_info, gimple_stmt_iterator *, - stmt_vec_info *, bool, slp_tree, + stmt_vec_info *, int, slp_tree, stmt_vector_for_cost *); extern bool vectorizable_shift (stmt_vec_info, gimple_stmt_iterator *, stmt_vec_info *, slp_tree, Index: gcc/tree-vect-stmts.c =================================================================== --- gcc/tree-vect-stmts.c 2019-09-17 15:27:13.090053952 +0100 +++ gcc/tree-vect-stmts.c 2019-09-19 08:43:53.965866883 +0100 @@ -9778,7 +9778,7 @@ vect_is_simple_cond (tree cond, vec_info bool vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, - stmt_vec_info *vec_stmt, bool for_reduction, + stmt_vec_info *vec_stmt, int reduc_index, slp_tree slp_node, stmt_vector_for_cost *cost_vec) { vec_info *vinfo = stmt_info->vinfo; @@ -9807,6 +9807,7 @@ vectorizable_condition (stmt_vec_info st vec<tree> vec_oprnds3 = vNULL; tree vec_cmp_type; bool masked = false; + bool for_reduction = (reduc_index > 0); if (for_reduction && STMT_SLP_TYPE (stmt_info)) return false; @@ -9888,6 +9889,29 @@ vectorizable_condition (stmt_vec_info st cond_expr1 = TREE_OPERAND (cond_expr, 1); } + /* For conditional reductions, the "then" value needs to be the candidate + value calculated by this iteration while the "else" value needs to be + the result carried over from previous iterations. If the COND_EXPR + is the other way around, we need to swap it. */ + bool must_invert_cmp_result = false; + if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1) + { + if (masked) + must_invert_cmp_result = true; + else + { + bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0)); + tree_code new_code = invert_tree_comparison (cond_code, honor_nans); + if (new_code == ERROR_MARK) + must_invert_cmp_result = true; + else + cond_code = new_code; + } + /* Make sure we don't accidentally use the old condition. */ + cond_expr = NULL_TREE; + std::swap (then_clause, else_clause); + } + if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype)) { /* Boolean values may have another representation in vectors @@ -10102,6 +10126,15 @@ vectorizable_condition (stmt_vec_info st vect_finish_stmt_generation (stmt_info, new_stmt, gsi); vec_compare = vec_compare_name; } + if (must_invert_cmp_result) + { + tree vec_compare_name = make_ssa_name (vec_cmp_type); + gassign *new_stmt = gimple_build_assign (vec_compare_name, + BIT_NOT_EXPR, + vec_compare); + vect_finish_stmt_generation (stmt_info, new_stmt, gsi); + vec_compare = vec_compare_name; + } gcall *new_stmt = gimple_build_call_internal (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare, vec_then_clause); @@ -10635,7 +10668,7 @@ vect_analyze_stmt (stmt_vec_info stmt_in node_instance, cost_vec) || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec) || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec) - || vectorizable_condition (stmt_info, NULL, NULL, false, node, + || vectorizable_condition (stmt_info, NULL, NULL, 0, node, cost_vec) || vectorizable_comparison (stmt_info, NULL, NULL, node, cost_vec)); @@ -10654,7 +10687,7 @@ vect_analyze_stmt (stmt_vec_info stmt_in || vectorizable_load (stmt_info, NULL, NULL, node, node_instance, cost_vec) || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec) - || vectorizable_condition (stmt_info, NULL, NULL, false, node, + || vectorizable_condition (stmt_info, NULL, NULL, 0, node, cost_vec) || vectorizable_comparison (stmt_info, NULL, NULL, node, cost_vec)); @@ -10759,7 +10792,7 @@ vect_transform_stmt (stmt_vec_info stmt_ break; case condition_vec_info_type: - done = vectorizable_condition (stmt_info, gsi, &vec_stmt, false, + done = vectorizable_condition (stmt_info, gsi, &vec_stmt, 0, slp_node, NULL); gcc_assert (done); break; Index: gcc/tree-vect-loop.c =================================================================== --- gcc/tree-vect-loop.c 2019-09-17 15:27:13.078054042 +0100 +++ gcc/tree-vect-loop.c 2019-09-19 08:43:53.961866913 +0100 @@ -6774,8 +6774,9 @@ vectorizable_reduction (stmt_vec_info st { /* Only call during the analysis stage, otherwise we'll lose STMT_VINFO_TYPE. */ + gcc_assert (reduc_index > 0); if (!vec_stmt && !vectorizable_condition (stmt_info, gsi, NULL, - true, NULL, cost_vec)) + reduc_index, NULL, cost_vec)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -7228,9 +7229,9 @@ vectorizable_reduction (stmt_vec_info st if (reduction_type == EXTRACT_LAST_REDUCTION) { - gcc_assert (!slp_node); + gcc_assert (!slp_node && reduc_index > 0); return vectorizable_condition (stmt_info, gsi, vec_stmt, - true, NULL, NULL); + reduc_index, NULL, NULL); } /* Create the destination vector */ @@ -7260,9 +7261,9 @@ vectorizable_reduction (stmt_vec_info st { if (code == COND_EXPR) { - gcc_assert (!slp_node); + gcc_assert (!slp_node && reduc_index > 0); vectorizable_condition (stmt_info, gsi, vec_stmt, - true, NULL, NULL); + reduc_index, NULL, NULL); break; } if (code == LSHIFT_EXPR