With the relatex outer loop reduction support we need to avoid picking up a different nested cycles reduction def. That's easy given we record the PHI we are looking at - almost, at least.
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. Richard. 2018-11-26 Richard Biener <rguent...@suse.de> PR tree-optimization/88182 * tree-vect-loop.c (vectorizable_reduction): Pick up single correct reduc_def_info. * tree-vect-slp.c (vect_analyze_slp_instance): Set STMT_VINFO_REDUC_DEF of the first stmt. libgomp/ * testsuite/libgomp.c++/pr88182.C: New testcase. Index: gcc/tree-vect-loop.c =================================================================== --- gcc/tree-vect-loop.c (revision 266446) +++ gcc/tree-vect-loop.c (working copy) @@ -6223,7 +6223,14 @@ vectorizable_reduction (stmt_vec_info st The last use is the reduction variable. In case of nested cycle this assumption is not true: we use reduc_index to record the index of the reduction variable. */ - stmt_vec_info reduc_def_info = NULL; + stmt_vec_info reduc_def_info; + if (orig_stmt_info) + reduc_def_info = STMT_VINFO_REDUC_DEF (orig_stmt_info); + else + reduc_def_info = STMT_VINFO_REDUC_DEF (stmt_info); + gcc_assert (reduc_def_info); + gphi *reduc_def_phi = as_a <gphi *> (reduc_def_info->stmt); + tree reduc_def = PHI_RESULT (reduc_def_phi); int reduc_index = -1; for (i = 0; i < op_type; i++) { @@ -6236,9 +6243,9 @@ vectorizable_reduction (stmt_vec_info st &def_stmt_info); dt = dts[i]; gcc_assert (is_simple_use); - if (dt == vect_reduction_def) + if (dt == vect_reduction_def + && ops[i] == reduc_def) { - reduc_def_info = def_stmt_info; reduc_index = i; continue; } @@ -6259,10 +6266,10 @@ vectorizable_reduction (stmt_vec_info st && !(dt == vect_nested_cycle && nested_cycle)) return false; - if (dt == vect_nested_cycle) + if (dt == vect_nested_cycle + && ops[i] == reduc_def) { found_nested_cycle_def = true; - reduc_def_info = def_stmt_info; reduc_index = i; } @@ -6298,20 +6305,8 @@ vectorizable_reduction (stmt_vec_info st "in-order reduction chain without SLP.\n"); return false; } - - if (orig_stmt_info) - reduc_def_info = STMT_VINFO_REDUC_DEF (orig_stmt_info); - else - reduc_def_info = STMT_VINFO_REDUC_DEF (stmt_info); } - if (! reduc_def_info) - return false; - - gphi *reduc_def_phi = dyn_cast <gphi *> (reduc_def_info->stmt); - if (!reduc_def_phi) - return false; - if (!(reduc_index == -1 || dts[reduc_index] == vect_reduction_def || dts[reduc_index] == vect_nested_cycle Index: gcc/tree-vect-slp.c =================================================================== --- gcc/tree-vect-slp.c (revision 266446) +++ gcc/tree-vect-slp.c (working copy) @@ -1952,6 +1952,8 @@ vect_analyze_slp_instance (vec_info *vin transform the node. In the reduction analysis phase only the last element of the chain is marked as reduction. */ STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def; + STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) + = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ())); } else { Index: libgomp/testsuite/libgomp.c++/pr88182.C =================================================================== --- libgomp/testsuite/libgomp.c++/pr88182.C (nonexistent) +++ libgomp/testsuite/libgomp.c++/pr88182.C (working copy) @@ -0,0 +1,42 @@ +// { dg-do run } +// { dg-options "-O -fopenmp -ftree-loop-if-convert -fno-ssa-phiopt" } + +#pragma omp declare simd simdlen(4) notinbranch +__attribute__((noinline)) int +foo (double c1, double c2) +{ + double z1 = c1, z2 = c2; + int res = 100, i; + + for (i = 0; i < 5; i++) + { + res = (z1 * z1 + z2 * z2 > 4.0) ? (i < res ? i : res) : res; + z1 = c1 + z1 * z1 - z2 * z2; + z2 = c2 + 2.0 * z1 * z2; + c1 += 0.5; + c2 += 0.5; + } + return res; +} + +__attribute__((noinline, noclone)) void +bar (double *x, double *y) +{ + asm volatile ("" : : "rm" (x), "rm" (y) : "memory"); +} + +int +main () +{ + int i; + double c[4] = { 0.0, 1.0, 0.0, 1.0 }; + double d[4] = { 0.0, 1.0, 2.0, 0.0 }; + int e[4]; + bar (c, d); +#pragma omp simd safelen(4) + for (i = 0; i < 4; i++) + e[i] = foo (c[i], d[i]); + if (e[0] != 3 || e[1] != 1 || e[2] != 1 || e[3] != 2) + __builtin_abort (); + return 0; +}