On Mon, Dec 4, 2017 at 1:11 PM, Richard Biener <rguent...@suse.de> wrote: > > I've noticed we perform FP reduction association without the required > checks for associative math. I've added > gcc.dg/tree-ssa/loop-interchange-1b.c to cover this. > > I also noticed we happily interchange a loop with a reduction like > > sum = a[i] - sum; > > where a change in order of elements isn't ok. Unfortunately bwaves > is exactly a case where single_use != next_def (tried to simply remove > that case for now), because reassoc didn't have a chance to fix the > operand order. Thus this patch exports the relevant handling from > the vectorizer (for stage1 having a separate infrastructure gathering / > analyzing of reduction/induction infrastructure would be nice...) > and uses it from interchange. We then don't handle > gcc.dg/tree-ssa/loop-interchange-4.c anymore (similar vectorizer > missed-opt is PR65930). I didn't bother to split up the vectorizer > code further to implement relaxed validity checking but simply XFAILed > this testcase. > > Earlier I simplified allocation stuff in the main loop which is why > this part is included in this patch. > > Bootstrap running on x86_64-unknown-linux-gnu. > > I'll see to craft a testcase with the sum = a[i] - sum; mis-handling. > > Ok? Sure. Just for the record. There is also similar associative check in predcom. As you suggested, a path extraction/checking interface for associative checking would be great, given we have multiple users now.
Thanks, bin > > Thanks, > Richard. > > 2017-12-04 Richard Biener <rguent...@suse.de> > > * tree-vectorizer.h (check_reduction_path): Declare. > * tree-vect-loop.c (check_reduction_path): New function, split out > from ... > (vect_is_simple_reduction): ... here. > * gimple-loop-interchange.cc: Include tree-vectorizer.h. > (loop_cand::analyze_iloop_reduction_var): Use single_imm_use. > Properly check for a supported reduction operation and a > valid expression if the reduction covers multiple stmts. > (prepare_perfect_loop_nest): Simpify allocation. > (pass_linterchange::execute): Likewise. > > * gcc.dg/tree-ssa/loop-interchange-1.c: Add fast-math flags. > * gcc.dg/tree-ssa/loop-interchange-1b.c: New test variant. > * gcc.dg/tree-ssa/loop-interchange-4.c: XFAIL. > > > Index: gcc/gimple-loop-interchange.cc > =================================================================== > --- gcc/gimple-loop-interchange.cc (revision 255375) > +++ gcc/gimple-loop-interchange.cc (working copy) > @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. > #include "tree-ssa-loop-ivopts.h" > #include "tree-ssa-dce.h" > #include "tree-data-ref.h" > +#include "tree-vectorizer.h" > > /* This pass performs loop interchange: for example, the loop nest > > @@ -551,23 +552,29 @@ loop_cand::analyze_iloop_reduction_var ( > in a way that reduction operation is seen as black box. In general, > we can ignore reassociation of reduction operator; we can handle fake > reductions in which VAR is not even used to compute NEXT. */ > - FOR_EACH_IMM_USE_FAST (use_p, iterator, var) > - { > - stmt = USE_STMT (use_p); > - if (is_gimple_debug (stmt)) > - continue; > - > - if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt))) > - return false; > - > - if (single_use != NULL) > - return false; > + if (! single_imm_use (var, &use_p, &single_use) > + || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use))) > + return false; > > - single_use = stmt; > - } > + /* Check the reduction operation. We require a commutative or > + left-associative operation. For FP math we also need to be allowed > + to associate operations. */ > + if (! is_gimple_assign (single_use) > + || ! (commutative_tree_code (gimple_assign_rhs_code (single_use)) > + || (commutative_ternary_tree_code > + (gimple_assign_rhs_code (single_use)) > + && (use_p->use == gimple_assign_rhs1_ptr (single_use) > + || use_p->use == gimple_assign_rhs2_ptr (single_use))) > + || (gimple_assign_rhs_code (single_use) == MINUS_EXPR > + && use_p->use == gimple_assign_rhs1_ptr (single_use))) > + || (FLOAT_TYPE_P (TREE_TYPE (var)) > + && ! flag_associative_math)) > + return false; > > + /* Handle and verify a series of stmts feeding the reduction op. */ > if (single_use != next_def > - && !stmt_dominates_stmt_p (single_use, next_def)) > + && !check_reduction_path (UNKNOWN_LOCATION, m_loop, phi, next, > + gimple_assign_rhs_code (single_use))) > return false; > > /* Only support cases in which INIT is used in inner loop. */ > @@ -1964,7 +1971,7 @@ prepare_perfect_loop_nest (struct loop * > vec<data_reference_p> *datarefs, vec<ddr_p> *ddrs) > { > struct loop *start_loop = NULL, *innermost = loop; > - struct loop *outermost = superloop_at_depth (loop, 0); > + struct loop *outermost = loops_for_fn (cfun)->tree_root; > > /* Find loop nest from the innermost loop. The outermost is the innermost > outer*/ > @@ -1985,21 +1992,26 @@ prepare_perfect_loop_nest (struct loop * > if (!start_loop || !start_loop->inner) > return false; > > + /* Prepare the data reference vector for the loop nest, pruning outer > + loops we cannot handle. */ > datarefs->create (20); > - if ((start_loop = prepare_data_references (start_loop, datarefs)) == NULL > + start_loop = prepare_data_references (start_loop, datarefs); > + if (!start_loop > /* Check if there is no data reference. */ > || datarefs->is_empty () > /* Check if there are too many of data references. */ > - || (int) datarefs->length () > MAX_DATAREFS > - /* Compute access strides for all data references. */ > - || ((start_loop = compute_access_strides (start_loop, innermost, > - *datarefs)) == NULL) > - /* Check if loop nest should be interchanged. */ > - || !should_interchange_loop_nest (start_loop, innermost, *datarefs)) > - { > - free_data_refs_with_aux (*datarefs); > - return false; > - } > + || (int) datarefs->length () > MAX_DATAREFS) > + return false; > + > + /* Compute access strides for all data references, pruning outer > + loops we cannot analyze refs in. */ > + start_loop = compute_access_strides (start_loop, innermost, *datarefs); > + if (!start_loop) > + return false; > + > + /* Check if any interchange is profitable in the loop nest. */ > + if (!should_interchange_loop_nest (start_loop, innermost, *datarefs)) > + return false; > > /* Check if data dependences can be computed for loop nest starting from > start_loop. */ > @@ -2029,14 +2041,15 @@ prepare_perfect_loop_nest (struct loop * > return true; > } > > + /* ??? We should be able to adjust DDRs we already computed by > + truncating distance vectors. */ > free_dependence_relations (*ddrs); > + *ddrs = vNULL; > /* Try to compute data dependences with the outermost loop stripped. */ > loop = start_loop; > start_loop = start_loop->inner; > } while (start_loop && start_loop->inner); > > - loop_nest->release (); > - free_data_refs_with_aux (*datarefs); > return false; > } > > @@ -2050,19 +2063,20 @@ pass_linterchange::execute (function *fu > > bool changed_p = false; > struct loop *loop; > - vec<loop_p> loop_nest; > - vec<data_reference_p> datarefs; > - vec<ddr_p> ddrs; > - > FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) > - if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs)) > - { > - tree_loop_interchange loop_interchange (loop_nest); > - changed_p |= loop_interchange.interchange (datarefs, ddrs); > - free_dependence_relations (ddrs); > - free_data_refs_with_aux (datarefs); > - loop_nest.release (); > - } > + { > + vec<loop_p> loop_nest = vNULL; > + vec<data_reference_p> datarefs = vNULL; > + vec<ddr_p> ddrs = vNULL; > + if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs)) > + { > + tree_loop_interchange loop_interchange (loop_nest); > + changed_p |= loop_interchange.interchange (datarefs, ddrs); > + } > + free_dependence_relations (ddrs); > + free_data_refs_with_aux (datarefs); > + loop_nest.release (); > + } > > if (changed_p) > scev_reset_htab (); > Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c > =================================================================== > --- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c (revision 255375) > +++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do run } */ > -/* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } > */ > +/* { dg-options "-O2 -floop-interchange -fassociative-math -fno-signed-zeros > -fno-trapping-math -fdump-tree-linterchange-details" } */ > > /* Copied from graphite/interchange-4.c */ > > Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c > =================================================================== > --- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c (nonexistent) > +++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c (working copy) > @@ -0,0 +1,52 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } > */ > + > +/* Copied from graphite/interchange-4.c */ > + > +#define DEBUG 0 > +#if DEBUG > +#include <stdio.h> > +#endif > + > +double u[1782225]; > + > +static void __attribute__((noinline)) > +foo (int N, double *res) > +{ > + int i, j; > + double sum = 0; > + for (i = 0; i < N; i++) > + for (j = 0; j < N; j++) > + sum = sum + u[i + 1335 * j]; > + > + *res = sum; > +} > + > +extern void abort (); > + > +int > +main (void) > +{ > + int i, j; > + double res; > + > + for (i = 0; i < 1782225; i++) > + u[i] = 0; > + u[0] = __DBL_MAX__; > + u[1335] = -__DBL_MAX__; > + u[1] = __DBL_MAX__; > + u[1336] = -__DBL_MAX__; > + > + foo (1335, &res); > + > +#if DEBUG > + fprintf (stderr, "res = %d \n", res); > +#endif > + > + if (res != 0.0) > + abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-not "is interchanged" "linterchange"} } */ > Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c > =================================================================== > --- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c (revision 255375) > +++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c (working copy) > @@ -47,4 +47,4 @@ main (void) > return 0; > } > > -/* { dg-final { scan-tree-dump-times "Loop_pair<outer:., inner:.> is > interchanged" 1 "linterchange"} } */ > +/* { dg-final { scan-tree-dump-times "Loop_pair<outer:., inner:.> is > interchanged" 1 "linterchange" { xfail *-*-* } } } */ > Index: gcc/tree-vectorizer.h > =================================================================== > --- gcc/tree-vectorizer.h (revision 255375) > +++ gcc/tree-vectorizer.h (working copy) > @@ -1255,6 +1255,9 @@ extern tree vect_create_addr_base_for_ve > /* FORNOW: Used in tree-parloops.c. */ > extern gimple *vect_force_simple_reduction (loop_vec_info, gimple *, > bool *, bool); > +/* Used in gimple-loop-interchange.c. */ > +extern bool check_reduction_path (location_t, loop_p, gphi *, tree, > + enum tree_code); > /* Drive for loop analysis stage. */ > extern loop_vec_info vect_analyze_loop (struct loop *, loop_vec_info); > extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL); > Index: gcc/tree-vect-loop.c > =================================================================== > --- gcc/tree-vect-loop.c (revision 255375) > +++ gcc/tree-vect-loop.c (working copy) > @@ -2626,6 +2626,114 @@ vect_is_slp_reduction (loop_vec_info loo > } > > > +/* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and > + reduction operation CODE has a handled computation expression. */ > + > +bool > +check_reduction_path (location_t loc, loop_p loop, gphi *phi, tree loop_arg, > + enum tree_code code) > +{ > + auto_vec<std::pair<ssa_op_iter, use_operand_p> > path; > + auto_bitmap visited; > + tree lookfor = PHI_RESULT (phi); > + ssa_op_iter curri; > + use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE); > + while (USE_FROM_PTR (curr) != loop_arg) > + curr = op_iter_next_use (&curri); > + curri.i = curri.numops; > + do > + { > + path.safe_push (std::make_pair (curri, curr)); > + tree use = USE_FROM_PTR (curr); > + if (use == lookfor) > + break; > + gimple *def = SSA_NAME_DEF_STMT (use); > + if (gimple_nop_p (def) > + || ! flow_bb_inside_loop_p (loop, gimple_bb (def))) > + { > +pop: > + do > + { > + std::pair<ssa_op_iter, use_operand_p> x = path.pop (); > + curri = x.first; > + curr = x.second; > + do > + curr = op_iter_next_use (&curri); > + /* Skip already visited or non-SSA operands (from iterating > + over PHI args). */ > + while (curr != NULL_USE_OPERAND_P > + && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME > + || ! bitmap_set_bit (visited, > + SSA_NAME_VERSION > + (USE_FROM_PTR (curr))))); > + } > + while (curr == NULL_USE_OPERAND_P && ! path.is_empty ()); > + if (curr == NULL_USE_OPERAND_P) > + break; > + } > + else > + { > + if (gimple_code (def) == GIMPLE_PHI) > + curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), > SSA_OP_USE); > + else > + curr = op_iter_init_use (&curri, def, SSA_OP_USE); > + while (curr != NULL_USE_OPERAND_P > + && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME > + || ! bitmap_set_bit (visited, > + SSA_NAME_VERSION > + (USE_FROM_PTR (curr))))) > + curr = op_iter_next_use (&curri); > + if (curr == NULL_USE_OPERAND_P) > + goto pop; > + } > + } > + while (1); > + if (dump_file && (dump_flags & TDF_DETAILS)) > + { > + dump_printf_loc (MSG_NOTE, loc, "reduction path: "); > + unsigned i; > + std::pair<ssa_op_iter, use_operand_p> *x; > + FOR_EACH_VEC_ELT (path, i, x) > + { > + dump_generic_expr (MSG_NOTE, TDF_SLIM, USE_FROM_PTR (x->second)); > + dump_printf (MSG_NOTE, " "); > + } > + dump_printf (MSG_NOTE, "\n"); > + } > + > + /* Check whether the reduction path detected is valid. */ > + bool fail = path.length () == 0; > + bool neg = false; > + for (unsigned i = 1; i < path.length (); ++i) > + { > + gimple *use_stmt = USE_STMT (path[i].second); > + tree op = USE_FROM_PTR (path[i].second); > + if (! has_single_use (op) > + || ! is_gimple_assign (use_stmt)) > + { > + fail = true; > + break; > + } > + if (gimple_assign_rhs_code (use_stmt) != code) > + { > + if (code == PLUS_EXPR > + && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) > + { > + /* Track whether we negate the reduction value each iteration. > */ > + if (gimple_assign_rhs2 (use_stmt) == op) > + neg = ! neg; > + } > + else > + { > + fail = true; > + break; > + } > + } > + } > + return ! fail && ! neg; > +} > + > + > /* Function vect_is_simple_reduction > > (1) Detect a cross-iteration def-use cycle that represents a simple > @@ -3128,106 +3236,8 @@ vect_is_simple_reduction (loop_vec_info > } > > /* Look for the expression computing loop_arg from loop PHI result. */ > - auto_vec<std::pair<ssa_op_iter, use_operand_p> > path; > - auto_bitmap visited; > - tree lookfor = PHI_RESULT (phi); > - ssa_op_iter curri; > - use_operand_p curr = op_iter_init_phiuse (&curri, as_a <gphi *>(phi), > - SSA_OP_USE); > - while (USE_FROM_PTR (curr) != loop_arg) > - curr = op_iter_next_use (&curri); > - curri.i = curri.numops; > - do > - { > - path.safe_push (std::make_pair (curri, curr)); > - tree use = USE_FROM_PTR (curr); > - if (use == lookfor) > - break; > - gimple *def = SSA_NAME_DEF_STMT (use); > - if (gimple_nop_p (def) > - || ! flow_bb_inside_loop_p (loop, gimple_bb (def))) > - { > -pop: > - do > - { > - std::pair<ssa_op_iter, use_operand_p> x = path.pop (); > - curri = x.first; > - curr = x.second; > - do > - curr = op_iter_next_use (&curri); > - /* Skip already visited or non-SSA operands (from iterating > - over PHI args). */ > - while (curr != NULL_USE_OPERAND_P > - && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME > - || ! bitmap_set_bit (visited, > - SSA_NAME_VERSION > - (USE_FROM_PTR (curr))))); > - } > - while (curr == NULL_USE_OPERAND_P && ! path.is_empty ()); > - if (curr == NULL_USE_OPERAND_P) > - break; > - } > - else > - { > - if (gimple_code (def) == GIMPLE_PHI) > - curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), > SSA_OP_USE); > - else > - curr = op_iter_init_use (&curri, def, SSA_OP_USE); > - while (curr != NULL_USE_OPERAND_P > - && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME > - || ! bitmap_set_bit (visited, > - SSA_NAME_VERSION > - (USE_FROM_PTR (curr))))) > - curr = op_iter_next_use (&curri); > - if (curr == NULL_USE_OPERAND_P) > - goto pop; > - } > - } > - while (1); > - if (dump_file && (dump_flags & TDF_DETAILS)) > - { > - dump_printf_loc (MSG_NOTE, vect_location, > - "reduction path: "); > - unsigned i; > - std::pair<ssa_op_iter, use_operand_p> *x; > - FOR_EACH_VEC_ELT (path, i, x) > - { > - dump_generic_expr (MSG_NOTE, TDF_SLIM, USE_FROM_PTR (x->second)); > - dump_printf (MSG_NOTE, " "); > - } > - dump_printf (MSG_NOTE, "\n"); > - } > - > - /* Check whether the reduction path detected is valid. */ > - bool fail = path.length () == 0; > - bool neg = false; > - for (unsigned i = 1; i < path.length (); ++i) > - { > - gimple *use_stmt = USE_STMT (path[i].second); > - tree op = USE_FROM_PTR (path[i].second); > - if (! has_single_use (op) > - || ! is_gimple_assign (use_stmt)) > - { > - fail = true; > - break; > - } > - if (gimple_assign_rhs_code (use_stmt) != code) > - { > - if (code == PLUS_EXPR > - && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) > - { > - /* Track whether we negate the reduction value each iteration. > */ > - if (gimple_assign_rhs2 (use_stmt) == op) > - neg = ! neg; > - } > - else > - { > - fail = true; > - break; > - } > - } > - } > - if (! fail && ! neg) > + if (check_reduction_path (vect_location, loop, as_a <gphi *> (phi), > loop_arg, > + code)) > return def_stmt; > > if (dump_enabled_p ())