On Thu, Nov 12, 2020 at 11:11 AM Frederik Harwath <frede...@codesourcery.com> wrote: > > > This patch enables the use of Graphite for the analysis of OpenACC > "auto" loops. The goal is to decide if a loop may be parallelized > (i.e. converted to an "independent" loop) or not. Graphite and the > functionality on which it relies (scalar evolution, data references) are > extended to interpret the internal representation of OpenACC loop > constructs that is encoded (e.g. through calls to OpenACC-specific > internal functions) in the OpenACC outlined functions (".omp_fn") and to > ignore some artifacts of the outlining process that are not relevant for > the analysis the original loops (e.g. pointers introduced for the > purpose of offloading are irrelevant to the question whether the > original loops can be parallelized or not). This is done in a way that > does not impact code which does not use OpenACC. Furthermore, Graphite > is extended by functionality that extends its applicability to > real-world code (e.g. runtime alias checking). The OpenACC lowering is > extended to use the result of Graphite's analysis to assign > "independent" clauses to loops.
I wonder if this can be split into a refactoring of graphite and adding runtime alias capability and a part doing the OpenACC pieces. Richard. > --- > gcc/common.opt | 8 + > gcc/graphite-dependences.c | 12 +- > gcc/graphite-isl-ast-to-gimple.c | 77 +- > gcc/graphite-oacc.h | 90 ++ > gcc/graphite-scop-detection.c | 828 ++++++++++++++---- > gcc/graphite-sese-to-poly.c | 26 +- > gcc/graphite.c | 403 ++++++++- > gcc/graphite.h | 11 +- > gcc/internal-fn.h | 7 +- > gcc/omp-expand.c | 26 +- > gcc/omp-offload.c | 173 +++- > gcc/predict.c | 2 +- > .../graphite/alias-0-no-runtime-check.c | 20 + > .../gcc.dg/graphite/alias-0-runtime-check.c | 21 + > gcc/testsuite/gcc.dg/graphite/alias-1.c | 22 + > gcc/tree-chrec-oacc.h | 45 + > gcc/tree-chrec.c | 16 +- > gcc/tree-data-ref.c | 112 ++- > gcc/tree-data-ref.h | 8 +- > gcc/tree-loop-distribution.c | 17 +- > gcc/tree-scalar-evolution.c | 257 +++++- > gcc/tree-ssa-loop-ivcanon.c | 9 +- > gcc/tree-ssa-loop-niter.c | 13 + > 23 files changed, 1870 insertions(+), 333 deletions(-) > create mode 100644 gcc/graphite-oacc.h > create mode 100644 gcc/testsuite/gcc.dg/graphite/alias-0-no-runtime-check.c > create mode 100644 gcc/testsuite/gcc.dg/graphite/alias-0-runtime-check.c > create mode 100644 gcc/testsuite/gcc.dg/graphite/alias-1.c > create mode 100644 gcc/tree-chrec-oacc.h > > diff --git a/gcc/common.opt b/gcc/common.opt > index dfed6ec76ba..caaeaa1aa6f 100644 > --- a/gcc/common.opt > +++ b/gcc/common.opt > @@ -1600,6 +1600,14 @@ fgraphite-identity > Common Report Var(flag_graphite_identity) Optimization > Enable Graphite Identity transformation. > > +fgraphite-non-affine-accesses > +Common Report Var(flag_graphite_non_affine_accesses) Init(0) > +Allow Graphite to handle non-affine data accesses. > + > +fgraphite-runtime-alias-checks > +Common Report Var(flag_graphite_runtime_alias_checks) Optimization Init(1) > +Allow Graphite to add runtime alias checks to loops if aliasing cannot be > resolved statically. > + > fhoist-adjacent-loads > Common Report Var(flag_hoist_adjacent_loads) Optimization > Enable hoisting adjacent loads to encourage generating conditional move > diff --git a/gcc/graphite-dependences.c b/gcc/graphite-dependences.c > index 7078c949800..76ba027cdf3 100644 > --- a/gcc/graphite-dependences.c > +++ b/gcc/graphite-dependences.c > @@ -82,7 +82,7 @@ scop_get_reads_and_writes (scop_p scop, isl_union_map > *&reads, > { > if (dump_file) > { > - fprintf (dump_file, "Adding read to depedence graph: "); > + fprintf (dump_file, "Adding read to dependence graph: "); > print_pdr (dump_file, pdr); > } > isl_union_map *um > @@ -90,7 +90,7 @@ scop_get_reads_and_writes (scop_p scop, isl_union_map > *&reads, > reads = isl_union_map_union (reads, um); > if (dump_file) > { > - fprintf (dump_file, "Reads depedence graph: "); > + fprintf (dump_file, "Reads dependence graph: "); > print_isl_union_map (dump_file, reads); > } > } > @@ -98,7 +98,7 @@ scop_get_reads_and_writes (scop_p scop, isl_union_map > *&reads, > { > if (dump_file) > { > - fprintf (dump_file, "Adding must write to depedence graph: "); > + fprintf (dump_file, "Adding must write to dependence graph: > "); > print_pdr (dump_file, pdr); > } > isl_union_map *um > @@ -106,7 +106,7 @@ scop_get_reads_and_writes (scop_p scop, isl_union_map > *&reads, > must_writes = isl_union_map_union (must_writes, um); > if (dump_file) > { > - fprintf (dump_file, "Must writes depedence graph: "); > + fprintf (dump_file, "Must writes dependence graph: "); > print_isl_union_map (dump_file, must_writes); > } > } > @@ -114,7 +114,7 @@ scop_get_reads_and_writes (scop_p scop, isl_union_map > *&reads, > { > if (dump_file) > { > - fprintf (dump_file, "Adding may write to depedence graph: "); > + fprintf (dump_file, "Adding may write to dependence graph: "); > print_pdr (dump_file, pdr); > } > isl_union_map *um > @@ -122,7 +122,7 @@ scop_get_reads_and_writes (scop_p scop, isl_union_map > *&reads, > may_writes = isl_union_map_union (may_writes, um); > if (dump_file) > { > - fprintf (dump_file, "May writes depedence graph: "); > + fprintf (dump_file, "May writes dependence graph: "); > print_isl_union_map (dump_file, may_writes); > } > } > diff --git a/gcc/graphite-isl-ast-to-gimple.c > b/gcc/graphite-isl-ast-to-gimple.c > index ef93fda2233..98c61ff864e 100644 > --- a/gcc/graphite-isl-ast-to-gimple.c > +++ b/gcc/graphite-isl-ast-to-gimple.c > @@ -57,6 +57,7 @@ along with GCC; see the file COPYING3. If not see > #include "tree-ssa.h" > #include "tree-vectorizer.h" > #include "graphite.h" > +#include "graphite-oacc.h" > > struct ast_build_info > { > @@ -635,12 +636,18 @@ translate_isl_ast_for_loop (loop_p context_loop, > redirect_edge_succ_nodup (next_e, after); > set_immediate_dominator (CDI_DOMINATORS, next_e->dest, next_e->src); > > - if (flag_loop_parallelize_all) > + if (flag_loop_parallelize_all || oacc_function_p (cfun)) > { > isl_id *id = isl_ast_node_get_annotation (node_for); > gcc_assert (id); > ast_build_info *for_info = (ast_build_info *) isl_id_get_user (id); > loop->can_be_parallel = for_info->is_parallelizable; > + if (dump_file && (dump_flags & TDF_DETAILS)) > + { > + dump_user_location_t loc = find_loop_location (loop); > + dump_printf_loc (MSG_NOTE, loc, "loop can be parallel: %d \n", > + loop->can_be_parallel); > + } > free (for_info); > isl_id_free (id); > } > @@ -1027,7 +1034,7 @@ gsi_insert_earliest (gimple_seq seq) > basic_block begin_bb = get_entry_bb (codegen_region); > > /* Inserting the gimple statements in a vector because gimple_seq behave > - in strage ways when inserting the stmts from it into different basic > + in strange ways when inserting the stmts from it into different basic > blocks one at a time. */ > auto_vec<gimple *, 3> stmts; > for (gimple_stmt_iterator gsi = gsi_start (seq); !gsi_end_p (gsi); > @@ -1397,7 +1404,7 @@ scop_to_isl_ast (scop_p scop) > (isl_schedule_copy (scop->transformed_schedule), set_separate_option, > NULL); > isl_ast_build *context_isl = generate_isl_context (scop); > > - if (flag_loop_parallelize_all) > + if (flag_loop_parallelize_all || oacc_function_p (cfun)) > { > scop_get_dependences (scop); > context_isl = > @@ -1464,6 +1471,42 @@ generate_entry_out_of_ssa_copies (edge false_entry, > } > } > > +/* Defined in tree-loop-distribution.c */ > +/* TODO Move this function to tree-data-ref.c? */ > + > +void > +compute_alias_check_pairs (class loop *loop, vec<ddr_p> *alias_ddrs, > + vec<dr_with_seg_len_pair_t> *comp_alias_pairs); > + > + > +/* Create a condition that evaluates to TRUE if some ALIAS_DDRS > + do alias. */ > + > +static tree > +generate_alias_cond (vec<ddr_p>& alias_ddrs, loop_p context_loop) > +{ > + gcc_checking_assert (flag_graphite_runtime_alias_checks > + && alias_ddrs.length () > 0); > + gcc_checking_assert (context_loop); > + > + auto_vec<dr_with_seg_len_pair_t> check_pairs; > + compute_alias_check_pairs (context_loop, &alias_ddrs, &check_pairs); > + gcc_checking_assert (check_pairs.length () > 0); > + > + tree alias_cond = NULL_TREE; > + create_runtime_alias_checks (context_loop, &check_pairs, &alias_cond); > + gcc_checking_assert (alias_cond); > + > + if (dump_file && (dump_flags & TDF_DETAILS)) > + { > + fprintf (dump_file, "Generated runtime alias check: "); > + print_generic_expr (dump_file, alias_cond, dump_flags); > + fprintf (dump_file, "\n"); > + } > + > + return alias_cond; > +} > + > /* GIMPLE Loop Generator: generates loops in GIMPLE form for the given SCOP. > Return true if code generation succeeded. */ > > @@ -1504,12 +1547,38 @@ graphite_regenerate_ast_isl (scop_p scop) > region->if_region = if_region; > > loop_p context_loop = region->region.entry->src->loop_father; > + gcc_checking_assert (context_loop); > edge e = single_succ_edge (if_region->true_region->region.entry->dest); > basic_block bb = split_edge (e); > > /* Update the true_region exit edge. */ > region->if_region->true_region->region.exit = single_succ_edge (bb); > > + if (flag_graphite_runtime_alias_checks > + && scop->unhandled_alias_ddrs.length () > 0) > + { > + /* SCoP detection has failed to handle the aliasing between some > + data-references of the SCoP statically. Generate an alias > + check that selects the newly generated version of the SCoP > + (in the true-branch of the conditional) if aliasing can be > + ruled out at runtime and the original version of the SCoP, > + otherwise. */ > + > + loop_p loop > + = find_common_loop (scop->scop_info->region.entry->dest->loop_father, > + scop->scop_info->region.exit->src->loop_father); > + > + tree cond = generate_alias_cond (scop->unhandled_alias_ddrs, loop); > + tree non_alias_cond = build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); > + set_ifsese_condition (region->if_region, non_alias_cond); > + /* The loop nest is shared by all DDRs, cf. build_alias_set. */ > + DDR_LOOP_NEST (scop->unhandled_alias_ddrs[0]).release (); > + free_dependence_relations (scop->unhandled_alias_ddrs); > + } > + > + if (dump_file) > + fprintf (dump_file, "[codegen] isl AST to Gimple succeeded.\n"); > + > t.translate_isl_ast (context_loop, root_node, e, ip); > if (! t.codegen_error_p ()) > { > @@ -1520,8 +1589,6 @@ graphite_regenerate_ast_isl (scop_p scop) > if_region->region->region.exit->src, > if_region->false_region->region.exit, > if_region->true_region->region.exit); > - if (dump_file) > - fprintf (dump_file, "[codegen] isl AST to Gimple succeeded.\n"); > } > > if (t.codegen_error_p ()) > diff --git a/gcc/graphite-oacc.h b/gcc/graphite-oacc.h > new file mode 100644 > index 00000000000..5978f428974 > --- /dev/null > +++ b/gcc/graphite-oacc.h > @@ -0,0 +1,90 @@ > +/* Graphite OpenACC helpers > + Copyright (C) 2006-2020 Free Software Foundation, Inc. > + Contributed by Sebastian Pop <sebastian....@inria.fr>. > + > +This file is part of GCC. > + > +GCC is free software; you can redistribute it and/or modify > +it under the terms of the GNU General Public License as published by > +the Free Software Foundation; either version 3, or (at your option) > +any later version. > + > +GCC is distributed in the hope that it will be useful, > +but WITHOUT ANY WARRANTY; without even the implied warranty of > +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > +GNU General Public License for more details. > + > +You should have received a copy of the GNU General Public License > +along with GCC; see the file COPYING3. If not see > +<http://www.gnu.org/licenses/>. */ > + > +/* This pass converts GIMPLE to GRAPHITE, performs some loop > + transformations and then converts the resulting representation back > + to GIMPLE. > + > + An early description of this pass can be found in the GCC Summit'06 > + paper "GRAPHITE: Polyhedral Analyses and Optimizations for GCC". > + The wiki page http://gcc.gnu.org/wiki/Graphite contains pointers to > + the related work. */ > + > +#include "omp-general.h" > +#include "attribs.h" > + > +static inline bool oacc_function_p (function *fun) > +{ > + return oacc_get_fn_attrib (fun->decl); > +} > + > +/* Represents a field of the ".omp_data_i" argument of > + an outlined OpenACC function. Each such field > + is used to pass a unique variable from the function > + that originally contained the loop to the outlined > + function. */ > + > +struct oacc_data_field { > + /* The variable of the source function that > + gets passed through this field. */ > + tree src_var; > + > + /* The variable that holds the dereferenced value of the > + field. This might be left NULL for reduction variables. */ > + // TODO-kernels Should we also set this for reduction variables? > + // This seems to be unnecessary since we do not create data-refs > + // for reduction variables. > + > + tree tgt_var; > +}; > + > +class oacc_omp_data > +{ > +private: > + hash_map<tree, oacc_data_field> field_map; > + > + void gather_assignments (struct function *fn); > + tree get_accessed_field (tree t); > +public: > + tree src_fn_arg; > + tree tgt_fn_arg; > + gimple* src_fn_def; > + // TODO-kernels This belongs into the oacc_context > + loop_p loop; > + static oacc_omp_data construct (struct function* fn); > + tree redirect_data_ref (tree ref); > +}; > + > +class oacc_context { > +public: > + oacc_omp_data omp_data; > + > +private: > + bool valid; > + oacc_context () : omp_data (), valid (false) {} > + oacc_context (oacc_omp_data omp_data) : > + omp_data (omp_data), > + valid (true) {} > +public: > + static oacc_context build_context (); > + static oacc_context invalid_context () { return oacc_context (); } > + > + bool is_valid () { return valid; } > +}; > diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c > index 75f81227f8a..ccdf3aa4d5b 100644 > --- a/gcc/graphite-scop-detection.c > +++ b/gcc/graphite-scop-detection.c > @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see > #include "tree-into-ssa.h" > #include "tree-ssa.h" > #include "cfgloop.h" > +#include "tree-chrec-oacc.h" > #include "tree-data-ref.h" > #include "tree-scalar-evolution.h" > #include "tree-pass.h" > @@ -49,6 +50,9 @@ along with GCC; see the file COPYING3. If not see > #include "gimple-pretty-print.h" > #include "cfganal.h" > #include "graphite.h" > +#include "omp-general.h" > +#include "graphite-oacc.h" > +#include "print-tree.h" > > class debug_printer > { > @@ -69,12 +73,27 @@ public: > fprintf (output.dump_file, "%d", i); > return output; > } > + > friend debug_printer & > operator<< (debug_printer &output, const char *s) > { > fprintf (output.dump_file, "%s", s); > return output; > } > + > + friend debug_printer & > + operator<< (debug_printer &output, gimple* stmt) > + { > + print_gimple_stmt (output.dump_file, stmt, 0, TDF_VOPS | TDF_MEMSYMS); > + return output; > + } > + > + friend debug_printer & > + operator<< (debug_printer &output, tree t) > + { > + print_generic_expr (output.dump_file, t, TDF_SLIM); > + return output; > + } > } dp; > > #define DEBUG_PRINT(args) do \ > @@ -286,7 +305,8 @@ namespace > class scop_detection > { > public: > - scop_detection () : scops (vNULL) {} > + scop_detection (oacc_context *oacc_ctx) > + : scops (vNULL), oacc_ctx (oacc_ctx) {} > > ~scop_detection () > { > @@ -354,24 +374,6 @@ public: > bool stmt_simple_for_scop_p (sese_l scop, gimple *stmt, > basic_block bb) const; > > - /* Something like "n * m" is not allowed. */ > - > - static bool graphite_can_represent_init (tree e); > - > - /* Return true when SCEV can be represented in the polyhedral model. > - > - An expression can be represented, if it can be expressed as an > - affine expression. For loops (i, j) and parameters (m, n) all > - affine expressions are of the form: > - > - x1 * i + x2 * j + x3 * m + x4 * n + x5 * 1 where x1..x5 element of Z > - > - 1 i + 20 j + (-2) m + 25 > - > - Something like "i * n" or "n * m" is not allowed. */ > - > - static bool graphite_can_represent_scev (sese_l scop, tree scev); > - > /* Return true when EXPR can be represented in the polyhedral model. > > This means an expression can be represented, if it is linear with > respect > @@ -382,9 +384,9 @@ public: > tree expr); > > /* Return true if the data references of STMT can be represented by > Graphite. > - We try to analyze the data references in a loop contained in the SCOP. > */ > + We try to analyze the data references in a loop contained in the SCOP. > */ > > - static bool stmt_has_simple_data_refs_p (sese_l scop, gimple *stmt); > + static bool stmt_has_simple_data_refs_p (sese_l scop, gimple *stmt, > oacc_context *oacc_ctx); > > /* Remove the close phi node at GSI and replace its rhs with the rhs > of PHI. */ > @@ -403,6 +405,7 @@ public: > > private: > vec<sese_l> scops; > + oacc_context *oacc_ctx; > }; > > sese_l scop_detection::invalid_sese (NULL, NULL); > @@ -560,14 +563,58 @@ scop_detection::can_represent_loop (loop_p loop, sese_l > scop) > || !single_pred_p (loop->latch) > || exit->src != single_pred (loop->latch) > || !empty_block_p (loop->latch)) > - return false; > + { > + DEBUG_PRINT (dp << "[can_represent_loop-fail] Loop shape invalid.\n"); > + return false; > + > + } > + > + bool edge_irreducible = loop_preheader_edge (loop)->flags & > EDGE_IRREDUCIBLE_LOOP; > + if (edge_irreducible) > + { > + DEBUG_PRINT (dp << "[can_represent_loop-fail] Loop is not a natural > loop.\n"); > + return false; > + } > + > + bool niter_is_unconditional = > + number_of_iterations_exit (loop, single_exit (loop), &niter_desc, false); > + if (!niter_is_unconditional) > + { > + DEBUG_PRINT (dp << "[can_represent_loop-fail] Loop niter not > unconditional.\n"); > + return false; > + } > + > + if (!niter_desc.control.no_overflow) > + { > + DEBUG_PRINT (dp << "[can_represent_loop-fail] Loop niter can > overflow.\n"); > + return false; > + } > + > + niter = number_of_latch_executions (loop); > + if (!niter) > + { > + DEBUG_PRINT (dp << "[can_represent_loop-fail] Loop niter unknown.\n"); > + return false; > + } > + > + bool undetermined_coefficients = chrec_contains_undetermined (niter); > + if (undetermined_coefficients) > + { > + DEBUG_PRINT (dp << "[can_represent_loop-fail] " > + << "Loop niter chrec contains undetermined > coefficients.\n"); > + return false; > + } > > - return !(loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP) > - && number_of_iterations_exit (loop, single_exit (loop), &niter_desc, > false) > - && niter_desc.control.no_overflow > - && (niter = number_of_latch_executions (loop)) > - && !chrec_contains_undetermined (niter) > - && graphite_can_represent_expr (scop, loop, niter); > + bool can_represent_expr = graphite_can_represent_expr (scop, loop, niter); > + if (!can_represent_expr) > + { > + DEBUG_PRINT (dp << "[can_represent_loop-fail] " > + << "Loop niter expression cannot be represented: " > + << niter << "\n"); > + return false; > + } > + > + return true; > } > > /* Return true when BEGIN is the preheader edge of a loop with a single exit > @@ -615,13 +662,12 @@ scop_detection::add_scop (sese_l s) > s.exit = single_succ_edge (s.exit->dest); > } > > - /* Do not add scops with only one loop. */ > - if (region_has_one_loop (s)) > - { > - DEBUG_PRINT (dp << "[scop-detection-fail] Discarding one loop SCoP: "; > - print_sese (dump_file, s)); > - return; > - } > + if (!oacc_function_p (cfun) && region_has_one_loop (s)) > + { > + DEBUG_PRINT (dp << "[scop-detection-fail] Discarding one loop SCoP: "; > + print_sese (dump_file, s)); > + return; > + } > > if (get_exit_bb (s) == EXIT_BLOCK_PTR_FOR_FN (cfun)) > { > @@ -805,140 +851,87 @@ scop_detection::remove_intersecting_scops (sese_l s1) > } > } > > -/* Something like "n * m" is not allowed. */ > +/* Return true when EXPR can be represented in the polyhedral model. > + > + This means an expression can be represented, if it is linear with respect > to > + the loops and the strides are non parametric. LOOP is the place where the > + expr will be evaluated. SCOP defines the region we analyse. */ > > bool > -scop_detection::graphite_can_represent_init (tree e) > +scop_detection::graphite_can_represent_expr (sese_l scop, loop_p loop, > + tree expr) > { > - switch (TREE_CODE (e)) > + if (TREE_CODE (expr) == SSA_NAME) > { > - case POLYNOMIAL_CHREC: > - return graphite_can_represent_init (CHREC_LEFT (e)) > - && graphite_can_represent_init (CHREC_RIGHT (e)); > - > - case MULT_EXPR: > - if (chrec_contains_symbols (TREE_OPERAND (e, 0))) > - return graphite_can_represent_init (TREE_OPERAND (e, 0)) > - && tree_fits_shwi_p (TREE_OPERAND (e, 1)); > - else > - return graphite_can_represent_init (TREE_OPERAND (e, 1)) > - && tree_fits_shwi_p (TREE_OPERAND (e, 0)); > + tree name = SSA_NAME_IDENTIFIER (expr); > > - case PLUS_EXPR: > - case POINTER_PLUS_EXPR: > - case MINUS_EXPR: > - return graphite_can_represent_init (TREE_OPERAND (e, 0)) > - && graphite_can_represent_init (TREE_OPERAND (e, 1)); > - > - case NEGATE_EXPR: > - case BIT_NOT_EXPR: > - CASE_CONVERT: > - case NON_LVALUE_EXPR: > - return graphite_can_represent_init (TREE_OPERAND (e, 0)); > - > - default: > - break; > + if (name) > + { > + const char* id = IDENTIFIER_POINTER (name); > + if (strncmp (id, ".bound", 6) == 0 > + || strncmp (id, ".offset", 7) == 0 > + || strncmp (id, ".chunk_max", 11) == 0 > + || strncmp (id, ".chunk_no", 10) == 0 > + || strncmp (id, ".step", 5) == 0) > + return true; > + } > } > + tree scev = cached_scalar_evolution_in_region (scop, loop, expr); > + bool can_represent = graphite_can_represent_scev (scop, scev); > > - return true; > -} > - > -/* Return true when SCEV can be represented in the polyhedral model. > - > - An expression can be represented, if it can be expressed as an > - affine expression. For loops (i, j) and parameters (m, n) all > - affine expressions are of the form: > - > - x1 * i + x2 * j + x3 * m + x4 * n + x5 * 1 where x1..x5 element of Z > - > - 1 i + 20 j + (-2) m + 25 > - > - Something like "i * n" or "n * m" is not allowed. */ > - > -bool > -scop_detection::graphite_can_represent_scev (sese_l scop, tree scev) > -{ > - if (chrec_contains_undetermined (scev)) > - return false; > - > - switch (TREE_CODE (scev)) > + if (!can_represent) > { > - case NEGATE_EXPR: > - case BIT_NOT_EXPR: > - CASE_CONVERT: > - case NON_LVALUE_EXPR: > - return graphite_can_represent_scev (scop, TREE_OPERAND (scev, 0)); > - > - case PLUS_EXPR: > - case POINTER_PLUS_EXPR: > - case MINUS_EXPR: > - return graphite_can_represent_scev (scop, TREE_OPERAND (scev, 0)) > - && graphite_can_represent_scev (scop, TREE_OPERAND (scev, 1)); > - > - case MULT_EXPR: > - return !CONVERT_EXPR_CODE_P (TREE_CODE (TREE_OPERAND (scev, 0))) > - && !CONVERT_EXPR_CODE_P (TREE_CODE (TREE_OPERAND (scev, 1))) > - && !(chrec_contains_symbols (TREE_OPERAND (scev, 0)) > - && chrec_contains_symbols (TREE_OPERAND (scev, 1))) > - && graphite_can_represent_init (scev) > - && graphite_can_represent_scev (scop, TREE_OPERAND (scev, 0)) > - && graphite_can_represent_scev (scop, TREE_OPERAND (scev, 1)); > - > - case POLYNOMIAL_CHREC: > - /* Check for constant strides. With a non constant stride of > - 'n' we would have a value of 'iv * n'. Also check that the > - initial value can represented: for example 'n * m' cannot be > - represented. */ > - gcc_assert (loop_in_sese_p (get_loop (cfun, > - CHREC_VARIABLE (scev)), scop)); > - if (!evolution_function_right_is_integer_cst (scev) > - || !graphite_can_represent_init (scev)) > - return false; > - return graphite_can_represent_scev (scop, CHREC_LEFT (scev)); > - > - case ADDR_EXPR: > - /* We cannot encode addresses for ISL. */ > - return false; > - > - default: > - break; > + if (dump_file) > + { > + fprintf (dump_file, "[graphite_can_represent_expr] Cannot represent > scev "); > + print_generic_expr (dump_file, scev, TDF_SLIM); > + fprintf (dump_file, " of expression "); > + print_generic_expr (dump_file, expr, TDF_SLIM); > + fprintf (dump_file, "\n"); > + } > } > - > - /* Only affine functions can be represented. */ > - if (tree_contains_chrecs (scev, NULL) || !scev_is_linear_expression (scev)) > - return false; > - > - return true; > + return can_represent; > } > > -/* Return true when EXPR can be represented in the polyhedral model. > - > - This means an expression can be represented, if it is linear with respect > to > - the loops and the strides are non parametric. LOOP is the place where the > - expr will be evaluated. SCOP defines the region we analyse. */ > +/* Check if STMT is a internal OpenACC function call that should be > + ignored when Graphite checks side effects and data references. */ > > -bool > -scop_detection::graphite_can_represent_expr (sese_l scop, loop_p loop, > - tree expr) > -{ > - tree scev = cached_scalar_evolution_in_region (scop, loop, expr); > - return graphite_can_represent_scev (scop, scev); > +static inline bool > +oacc_ignore_internal_stmt (gimple *stmt) { > + return is_gimple_call (stmt) && > + (gimple_call_internal_p (stmt, IFN_UNIQUE) > + || gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION)); > } > > /* Return true if the data references of STMT can be represented by Graphite. > We try to analyze the data references in a loop contained in the SCOP. */ > > bool > -scop_detection::stmt_has_simple_data_refs_p (sese_l scop, gimple *stmt) > +scop_detection::stmt_has_simple_data_refs_p (sese_l scop, gimple *stmt, > oacc_context *oacc_ctx) > { > + if (oacc_ignore_internal_stmt (stmt)) > + return true; > + > edge nest = scop.entry; > loop_p loop = loop_containing_stmt (stmt); > if (!loop_in_sese_p (loop, scop)) > loop = NULL; > > + bool allow_non_affine_base = flag_graphite_non_affine_accesses; > auto_vec<data_reference_p> drs; > - if (! graphite_find_data_references_in_stmt (nest, loop, stmt, &drs)) > - return false; > + if (! graphite_find_data_references_in_stmt (nest, loop, stmt, &drs, > + oacc_ctx, > allow_non_affine_base)) > + { > + DEBUG_PRINT (dp << "[stmt_has_simple_data_refs_p] Unanalyzable > statement.\n"); > + return false; > + } > + > + /* This flag means that we allow Graphite to overapproximate the > + range of data references. Consequently, we do not need to check > + if Graphite can actually represent the access functions' > + SCEVs. */ > + if (flag_graphite_non_affine_accesses) > + return true; > > int j; > data_reference_p dr; > @@ -946,7 +939,10 @@ scop_detection::stmt_has_simple_data_refs_p (sese_l > scop, gimple *stmt) > { > for (unsigned i = 0; i < DR_NUM_DIMENSIONS (dr); ++i) > if (! graphite_can_represent_scev (scop, DR_ACCESS_FN (dr, i))) > - return false; > + { > + DEBUG_PRINT (dp << "[stmt_has_simple_data_refs_p] Cannot > represent access function SCEV: " << DR_ACCESS_FN (dr, i) << "\n"); > + return false; > + } > } > > return true; > @@ -959,6 +955,9 @@ scop_detection::stmt_has_simple_data_refs_p (sese_l scop, > gimple *stmt) > static bool > stmt_has_side_effects (gimple *stmt) > { > + if (oacc_ignore_internal_stmt (stmt)) > + return false; > + > if (gimple_has_volatile_ops (stmt) > || (gimple_code (stmt) == GIMPLE_CALL > && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE))) > @@ -990,11 +989,16 @@ scop_detection::stmt_simple_for_scop_p (sese_l scop, > gimple *stmt, > if (stmt_has_side_effects (stmt)) > return false; > > - if (!stmt_has_simple_data_refs_p (scop, stmt)) > + if (!stmt_has_simple_data_refs_p (scop, stmt, oacc_ctx)) > { > - DEBUG_PRINT (dp << "[scop-detection-fail] " > - << "Graphite cannot handle data-refs in stmt:\n"; > - print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS|TDF_MEMSYMS);); > + DEBUG_PRINT (dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmt, > + "[scop-detection-fail] Graphite cannot " > + "handle data-refs-in-stmt: "); > + print_gimple_stmt (dump_file, stmt, 0, > + TDF_VOPS | TDF_MEMSYMS); > + fprintf (dump_file, "\n"); > + > + ); > return false; > } > > @@ -1027,14 +1031,22 @@ scop_detection::stmt_simple_for_scop_p (sese_l scop, > gimple *stmt, > for (unsigned i = 0; i < 2; ++i) > { > tree op = gimple_op (stmt, i); > - if (!graphite_can_represent_expr (scop, loop, op) > - /* We can only constrain on integer type. */ > - || ! INTEGRAL_TYPE_P (TREE_TYPE (op))) > + if (!graphite_can_represent_expr (scop, loop, op)) > + { > + DEBUG_PRINT (dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmt, > + "[scop-detection-fail] " > + "Graphite cannot represent cond > " > + "stmt operator expression.\n")); > + return false; > + } > + > + if (! INTEGRAL_TYPE_P (TREE_TYPE (op))) > { > - DEBUG_PRINT (dp << "[scop-detection-fail] " > - << "Graphite cannot represent stmt:\n"; > - print_gimple_stmt (dump_file, stmt, 0, > - TDF_VOPS | TDF_MEMSYMS)); > + DEBUG_PRINT (dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmt, > + "[scop-detection-fail] " > + "Graphite cannot represent cond > " > + "statement operator. " > + "Type must be integral.\n")); > return false; > } > } > @@ -1151,6 +1163,7 @@ scan_tree_for_params (sese_info_p s, tree e) > break; > > case SSA_NAME: > + //TODO-kernels Stop treating some OpenACC ifn calls as parameters > (reductions?) > assign_parameter_index_in_region (e, s); > break; > > @@ -1288,11 +1301,99 @@ build_cross_bb_scalars_use (scop_p scop, tree use, > gimple *use_stmt, > add_read (reads, use, use_stmt); > } > > +/* This class keeps track of the variables that occur in active > + OpenACC reductions while walking a function's basic blocks during > + SCoP detection. The UPDATE method processes calls to the OpenACC > + internal functions which mark the beginning and the end of the use > + of a reduction variable. It adjusts an internal reference count > + that is maintained for each such variable accordingly (i.e. number > + of reductions using a SSA_NAME with the variable name). */ > + > +class oacc_reductions { > + public: > + bool is_reduction_var (const tree var); > + void update (const gimple* oacc_reduction_call); > + void update (const basic_block); > + private: > + hash_map<tree, unsigned> reductions; > +}; > + > +/* Check if the DEF is a SSA_NAME for a variable that occurs in an > + active reduction. */ > + > +bool oacc_reductions::is_reduction_var (const tree def) { > + if (TREE_CODE (def) != SSA_NAME) > + return false; > + > + tree var = SSA_NAME_VAR (def); > + if (var == NULL_TREE) > + return false; > + > + return reductions.get (var); > +} > + > +/* Update the internal reference count for the variable used by the > + OACC_REDUCTION_CALL if it starts or ends a reduction. */ > + > +void oacc_reductions::update (const gimple* oacc_reduction_call) > +{ > + const gcall* call = GIMPLE_CHECK2<const gcall *> (oacc_reduction_call); > + unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0)); > + > + if (!gimple_has_lhs (call)) > + return; > + > + tree var = SSA_NAME_VAR (gimple_call_lhs (call)); > + > + if (var == NULL) > + return; > + > + switch (code) > + { > + case IFN_GOACC_REDUCTION_SETUP: > + { > + unsigned& ref_count = reductions.get_or_insert (var); > + ref_count++; > + > + break; > + } > + case IFN_GOACC_REDUCTION_FINI: > + { > + unsigned* ref_count = reductions.get (var); > + gcc_checking_assert (ref_count != NULL && *ref_count > 0); > + ref_count--; > + > + if (ref_count == 0) > + reductions.remove (var); > + > + break; > + } > + > + default: > + break; > + } > +} > + > +void oacc_reductions::update (const basic_block bb) > +{ > + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); > + gsi_next (&gsi)) > + { > + gimple *stmt = gsi_stmt (gsi); > + if (gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION)) > + update (stmt); > + } > +} > + > + > + > /* Generates a polyhedral black box only if the bb contains interesting > information. */ > > static gimple_poly_bb_p > -try_generate_gimple_bb (scop_p scop, basic_block bb) > +try_generate_gimple_bb (scop_p scop, basic_block bb, > + oacc_reductions& oacc_reductions, > + __attribute__ ((unused)) oacc_context* oacc_ctx) > { > vec<data_reference_p> drs = vNULL; > vec<tree> writes = vNULL; > @@ -1304,6 +1405,7 @@ try_generate_gimple_bb (scop_p scop, basic_block bb) > if (!loop_in_sese_p (loop, region)) > loop = NULL; > > + bool allow_non_affine_base = flag_graphite_non_affine_accesses; > for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); > gsi_next (&gsi)) > { > @@ -1311,16 +1413,27 @@ try_generate_gimple_bb (scop_p scop, basic_block bb) > if (is_gimple_debug (stmt)) > continue; > > - graphite_find_data_references_in_stmt (nest, loop, stmt, &drs); > + graphite_find_data_references_in_stmt (nest, loop, stmt, &drs, > + oacc_ctx, allow_non_affine_base); > > tree def = gimple_get_lhs (stmt); > - if (def) > + if (def > + /* When analyzing the outlined function for an OpenACC > + region, no dependencies on reduction variables should be > + generated. Those variables must be ignored when deciding > + if a loop can be parallel. */ > + && !oacc_reductions.is_reduction_var (def)) > build_cross_bb_scalars_def (scop, def, gimple_bb (stmt), &writes); > > ssa_op_iter iter; > tree use; > FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) > - build_cross_bb_scalars_use (scop, use, stmt, &reads); > + { > + if (oacc_reductions.is_reduction_var (use)) > + continue; > + > + build_cross_bb_scalars_use (scop, use, stmt, &reads); > + } > } > > /* Handle defs and uses in PHIs. Those need special treatment given > @@ -1332,7 +1445,8 @@ try_generate_gimple_bb (scop_p scop, basic_block bb) > gphi *phi = psi.phi (); > tree res = gimple_phi_result (phi); > if (virtual_operand_p (res) > - || scev_analyzable_p (res, scop->scop_info->region)) > + || scev_analyzable_p (res, scop->scop_info->region) > + || oacc_reductions.is_reduction_var (res)) > continue; > /* To simulate out-of-SSA the block containing the PHI node has > reads of the PHI destination. And to preserve SSA dependences > @@ -1362,13 +1476,15 @@ try_generate_gimple_bb (scop_p scop, basic_block bb) > continue; > /* To simulate out-of-SSA the predecessor of edges into PHI > nodes > has a copy from the PHI argument to the PHI destination. */ > - if (! scev_analyzable_p (res, scop->scop_info->region)) > + if (! scev_analyzable_p (res, scop->scop_info->region) > + && ! oacc_reductions.is_reduction_var (res)) > add_write (&writes, res); > tree use = PHI_ARG_DEF_FROM_EDGE (phi, e); > if (TREE_CODE (use) == SSA_NAME > && ! SSA_NAME_IS_DEFAULT_DEF (use) > && gimple_bb (SSA_NAME_DEF_STMT (use)) != bb_for_succs > - && ! scev_analyzable_p (use, scop->scop_info->region)) > + && ! scev_analyzable_p (use, scop->scop_info->region) > + && ! oacc_reductions.is_reduction_var (use)) > add_read (&reads, use, phi); > } > if (e->dest == bb_for_succs->loop_father->latch > @@ -1402,40 +1518,316 @@ try_generate_gimple_bb (scop_p scop, basic_block bb) > return new_gimple_poly_bb (bb, drs, reads, writes); > } > > +bool oacc_is_omp_data_use (oacc_context *ctx, data_reference_p dr) > +{ > + tree t; > + > + switch (TREE_CODE (dr->ref)) > + { > + case COMPONENT_REF: > + { > + tree referenced = TREE_OPERAND (dr->ref, 0); > + tree target = TREE_OPERAND (referenced, 0); > + > + if (TREE_CODE(target) != SSA_NAME || > + SSA_NAME_VAR(target) == NULL_TREE) > + return false; > + > + if (SSA_NAME_VAR (target) == ctx->omp_data.tgt_fn_arg) > + return true; > + } > + > + default: > + return false; > + } > + > + return false; > +} > + > +/* Aliasing involving the pointers contained in the ".omp_data_i" > + struct can be safely ignored. We are analysing the behavior of a > + loop nest with respect to the original function and those pointers > + are artifacts of the outlining process. */ > + > +bool oacc_ignore_alias (oacc_context *ctx, data_reference_p dr1, > data_reference_p dr2) > +{ > + if (oacc_is_omp_data_use (ctx, dr1) > + || oacc_is_omp_data_use (ctx, dr2)) > + return true; > + > + return false; > +} > + > +/* Returns true if expression EXPR is defined between ENTRY and > + EXIT. */ > + > +static bool > +def_in_region_p (edge entry, edge exit, tree expr) > +{ > + basic_block entry_bb = entry->dest; > + basic_block exit_bb = exit->dest; > + basic_block def_bb; > + > + if (! expr) > + return false; > + > + if (is_gimple_min_invariant (expr)) > + return false; > + > + if (TREE_CODE (expr) == SSA_NAME) > + { > + def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr)); > + if (!def_bb) > + return false; > + > + if (dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb) > + && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb)) > + return true; > + > + return false; > + } > + else { > + for (unsigned i = 0; i < TREE_OPERAND_LENGTH (expr); i++) > + if (def_in_region_p (entry, exit, TREE_OPERAND (expr, i))) > + return true; > + } > + > + return false; > +} > + > +static bool > +scev_defs_outside_region_p (edge entry, edge exit, tree scev) > +{ > + if (chrec_contains_undetermined (scev)) > + return false; > + > + switch (TREE_CODE (scev)) > + { > + case NEGATE_EXPR: > + case BIT_NOT_EXPR: > + case NON_LVALUE_EXPR: > + case ADDR_EXPR: > + return scev_defs_outside_region_p (entry, exit, TREE_OPERAND (scev, > 0)); > + > + case PLUS_EXPR: > + case POINTER_PLUS_EXPR: > + case MINUS_EXPR: > + case MULT_EXPR: > + case POLYNOMIAL_CHREC: > + return scev_defs_outside_region_p (entry, exit, TREE_OPERAND (scev, 0)) > + && scev_defs_outside_region_p (entry, exit, TREE_OPERAND (scev, 1)); > + > + default: > + break; > + } > + > + return ! def_in_region_p (entry, exit, scev); > +} > + > +/* Checks if all parts of the DRI are defined outside of the region > + surrounded by the given edges which allows an alias check involving > + DRI to be placed before the region. */ > + > +static bool > +dr_defs_outside_region (edge entry, edge exit, dr_info *dri) > +{ > + data_reference_p dr = dri->dr; > + tree base = DR_BASE_OBJECT (dr); > + if (def_in_region_p (entry, exit, base)) > + { > + DEBUG_PRINT(dp << "base defined inside SCoP.\n"); > + return false; > + } > + > + tree step = DR_STEP (dr); > + if (def_in_region_p (entry, exit, step)) > + { > + DEBUG_PRINT(dp << "step defined inside SCoP.\n"); > + return false; > + } > + > + tree base_addr = DR_BASE_ADDRESS (dr); > + if (def_in_region_p (entry, exit, base_addr)) > + { > + DEBUG_PRINT(dp << "base address defined inside SCoP.\n"); > + return false; > + } > + > + for (unsigned i = 0; i < DR_NUM_DIMENSIONS(dr); ++i) > + { > + tree access = DR_ACCESS_FN (dr, i); > + if (! scev_defs_outside_region_p (entry, exit, access)) > + { > + DEBUG_PRINT(fprintf (dump_file, "%d-th access function uses > definitions from SCoP.\n", i); > + print_generic_expr (dump_file, access, dump_flags); > + fprintf (dump_file, "\n");); > + return false; > + } > + } > + > + return true; > +} > + > + > + > +/* Return TRUE if a runtime alias check to resolve the aliasing > + between the DDRs DR1 and DR2 which belong to the LOOP in the region > + delineated by SCOP_ENTRY and SCOP_EXIT may be created and placed > + before that region. */ > + > +static opt_result > +graphite_runtime_alias_check_p (dr_info* dr1, dr_info* dr2, class loop *loop, > + edge scop_entry, edge scop_exit) > +{ > + gcc_checking_assert (loop); > + gcc_checking_assert (dr1); > + gcc_checking_assert (dr2); > + > + if (dump_enabled_p ()) > + dump_printf (MSG_NOTE, > + "consider run-time aliasing test between %T and %T\n", > + DR_REF (dr1->dr), DR_REF (dr2->dr)); > + > + > + if (! optimize_loop_for_speed_p (loop)) > + return opt_result::failure_at (DR_STMT (dr1->dr), > + "runtime alias check not supported when" > + " optimizing for size.\n"); > + > + > + /* Verify that we have enough information about the data-references > + and context loop to construct a runtime alias check with > + "compute_alias_check_pairs". */ > + > + if (loop->num != 0) { > + tree niters = number_of_latch_executions (loop); > + if (niters == NULL_TREE || niters == chrec_dont_know) > + return opt_result::failure_at (DR_STMT (dr1->dr), > + "could not determine number of > iterations " > + "of the SCoP's context loop. " > + "Aborting runtime alias checks.\n"); > + } > + > + /* The runtime alias check selects between the optimized and the > + original version of a SCoP. Hence, it must be placed before the > + SCoP which is not possible if some of the data reference's fields > + refer to definitions inside of the SCoP. */ > + > + if (! dr_defs_outside_region (scop_entry, scop_exit, dr1) > + || ! dr_defs_outside_region (scop_entry, scop_exit, dr2)) > + return opt_result::failure_at (DR_STMT (dr1->dr), > + "data-references use definitions inside > of " > + "SCoP. " > + "Aborting runtime alias checks.\n"); > + > + > + return opt_result::success (); > +} > + > /* Compute alias-sets for all data references in DRS. */ > > -static bool > -build_alias_set (scop_p scop) > +static bool > +build_alias_set (scop_p scop, oacc_context *oacc_ctx) > { > int num_vertices = scop->drs.length (); > struct graph *g = new_graph (num_vertices); > dr_info *dr1, *dr2; > int i, j; > int *all_vertices; > + edge scop_entry = scop->scop_info->region.entry; > + edge scop_exit = scop->scop_info->region.exit; > > struct loop *nest > - = find_common_loop (scop->scop_info->region.entry->dest->loop_father, > - scop->scop_info->region.exit->src->loop_father); > + = find_common_loop (scop_entry->dest->loop_father, > + scop_exit->src->loop_father); > + > + gcc_checking_assert (nest); > + > + DEBUG_PRINT(dp << "[build_alias_set]: Data references:\n"; > + dr_info *dr; > + FOR_EACH_VEC_ELT (scop->drs, i, dr) > + { > + dump_data_reference (dump_file, dr->dr); > + } > + ); > + > + auto_vec<loop_p, 1> nest_vec; > + if (flag_graphite_runtime_alias_checks) > + { > + scop->unhandled_alias_ddrs.create (1); > + nest_vec.safe_push (nest); > + } > > FOR_EACH_VEC_ELT (scop->drs, i, dr1) > for (j = i+1; scop->drs.iterate (j, &dr2); j++) > - if (dr_may_alias_p (dr1->dr, dr2->dr, nest)) > + if (! (DR_IS_READ (dr1->dr) && DR_IS_READ (dr2->dr)) > + && dr_may_alias_p (dr1->dr, dr2->dr, nest)) > { > - /* Dependences in the same alias set need to be handled > - by just looking at DR_ACCESS_FNs. */ > - if (DR_NUM_DIMENSIONS (dr1->dr) == 0 > - || DR_NUM_DIMENSIONS (dr1->dr) != DR_NUM_DIMENSIONS (dr2->dr) > - || ! operand_equal_p (DR_BASE_OBJECT (dr1->dr), > - DR_BASE_OBJECT (dr2->dr), > - OEP_ADDRESS_OF) > - || ! types_compatible_p (TREE_TYPE (DR_BASE_OBJECT (dr1->dr)), > - TREE_TYPE (DR_BASE_OBJECT (dr2->dr)))) > + if (! oacc_ignore_alias (oacc_ctx, dr1->dr, dr2->dr)) > { > - free_graph (g); > - return false; > + /* Dependences in the same alias set need to be handled > + by just looking at DR_ACCESS_FNs. */ > + bool dimension_zero = DR_NUM_DIMENSIONS (dr1->dr) == 0; > + if (dimension_zero) > + DEBUG_PRINT(dp << "[build_alias_set] DR1 has dimension > 0\n"); > + > + bool different_dimensions = > + DR_NUM_DIMENSIONS (dr1->dr) != DR_NUM_DIMENSIONS (dr2->dr); > + if (different_dimensions) > + DEBUG_PRINT (dp << "[build_alias_set] " > + "DRs have different dimensions\n"); > + > + bool different_base_objects = > + ! operand_equal_p (DR_BASE_OBJECT (dr1->dr), > + DR_BASE_OBJECT (dr2->dr), OEP_ADDRESS_OF); > + if (different_base_objects) > + DEBUG_PRINT (dp << "[build_alias_set] " > + "DRs access different objects\n"); > + > + bool incompatible_types = > + ! types_compatible_p (TREE_TYPE (DR_BASE_OBJECT (dr1->dr)), > + TREE_TYPE (DR_BASE_OBJECT (dr2->dr))); > + if (incompatible_types) > + DEBUG_PRINT (dp << "[build_alias_set] " > + "DRs with incompatible base object types"); > + > + if (dimension_zero || different_dimensions > + || different_base_objects || incompatible_types) > + { > + if (! flag_graphite_runtime_alias_checks) > + goto FAIL; > + > + if (! graphite_runtime_alias_check_p (dr1, dr2, nest, > + scop_entry, > scop_exit)) > + goto FAIL; > + > + ddr_p ddr = initialize_data_dependence_relation > + (dr1->dr, dr2->dr, nest_vec); > + scop->unhandled_alias_ddrs.safe_push(ddr); > + } > } > add_edge (g, i, j); > add_edge (g, j, i); > + continue; > + > + FAIL: > + DEBUG_PRINT (dp << > + "[build_alias_set] " > + "Cannot handle dependency between data references: \n"; > + print_gimple_stmt (dump_file, dr1->dr->stmt, 2, > TDF_DETAILS); > + print_gimple_stmt (dump_file, dr2->dr->stmt, 2, > TDF_DETAILS); > + dp << "\n"); > + > + if (flag_graphite_runtime_alias_checks) > + { > + if (scop->unhandled_alias_ddrs.length () > 0) > + /* The loop-nest vec is shared by all DDRs. */ > + DDR_LOOP_NEST (scop->unhandled_alias_ddrs[0]).release (); > + free_dependence_relations (scop->unhandled_alias_ddrs); > + } > + > + free_graph (g); > + return false; > } > > all_vertices = XNEWVEC (int, num_vertices); > @@ -1457,7 +1849,7 @@ build_alias_set (scop_p scop) > class gather_bbs : public dom_walker > { > public: > - gather_bbs (cdi_direction, scop_p, int *); > + gather_bbs (cdi_direction, scop_p, int *, oacc_reductions&, oacc_context*); > > virtual edge before_dom_children (basic_block); > virtual void after_dom_children (basic_block); > @@ -1465,12 +1857,18 @@ public: > private: > auto_vec<gimple *, 3> conditions, cases; > scop_p scop; > + oacc_reductions &reductions; > + oacc_context *oacc_ctx; > }; > -} > -gather_bbs::gather_bbs (cdi_direction direction, scop_p scop, int *bb_to_rpo) > - : dom_walker (direction, ALL_BLOCKS, bb_to_rpo), scop (scop) > -{ > -} > + > +gather_bbs::gather_bbs > +(cdi_direction direction, scop_p scop, int *bb_to_rpo, > + oacc_reductions& reductions, oacc_context *oacc_ctx) > +: dom_walker (direction, ALL_BLOCKS, bb_to_rpo) > + , scop (scop) > + , reductions (reductions) > + , oacc_ctx (oacc_ctx) > +{} > > /* Call-back for dom_walk executed before visiting the dominated > blocks. */ > @@ -1478,6 +1876,8 @@ gather_bbs::gather_bbs (cdi_direction direction, scop_p > scop, int *bb_to_rpo) > edge > gather_bbs::before_dom_children (basic_block bb) > { > + reductions.update (bb); > + > sese_info_p region = scop->scop_info; > if (!bb_in_sese_p (bb, region->region)) > return dom_walker::STOP; > @@ -1514,7 +1914,8 @@ gather_bbs::before_dom_children (basic_block bb) > > scop->scop_info->bbs.safe_push (bb); > > - gimple_poly_bb_p gbb = try_generate_gimple_bb (scop, bb); > + gimple_poly_bb_p gbb = try_generate_gimple_bb (scop, bb, reductions, > oacc_ctx); > + > if (!gbb) > return NULL; > > @@ -1563,6 +1964,44 @@ gather_bbs::after_dom_children (basic_block bb) > } > } > > +/* Update the OpenACC reductions information for all basic blocks > + encountered by the dom_walker. This is used to adjust the > + reduction information for the basic blocks between the SCoPs (which > + are processed by GATHER_BBS) in the BUILD_SCOPS function. */ > + > +class oacc_reduction_walker : public dom_walker > +{ > +public: > + oacc_reduction_walker (oacc_reductions& reductions, edge end, int *); > + > + virtual edge before_dom_children (basic_block); > + > +private: > + auto_vec<gimple *, 3> conditions, cases; > + oacc_reductions& reductions; > + edge end; > +}; > + > +oacc_reduction_walker::oacc_reduction_walker > +(oacc_reductions& reductions, edge end, int *bb_to_rpo) > +: dom_walker (CDI_DOMINATORS, ALL_BLOCKS, bb_to_rpo), > + reductions (reductions), > + end (end) > +{ > +} > + > +edge > +oacc_reduction_walker::before_dom_children (basic_block bb) > +{ > + reductions.update (bb); > + > + if (bb == end->src) > + return dom_walker::STOP; > + else > + return NULL; > +} > + > +} > > /* Compute sth like an execution order, dominator order with first executing > edges that stay inside the current loop, delaying processing exit edges. > */ > @@ -1590,12 +2029,12 @@ cmp_pbbs (const void *pa, const void *pb) > them to SCOPS. */ > > void > -build_scops (vec<scop_p> *scops) > +build_scops (vec<scop_p> *scops, oacc_context *oacc_ctx) > { > if (dump_file) > dp.set_dump_file (dump_file); > > - scop_detection sb; > + scop_detection sb (oacc_ctx); > sb.build_scop_depth (current_loops->tree_root); > > /* Now create scops from the lightweight SESEs. */ > @@ -1611,17 +2050,26 @@ build_scops (vec<scop_p> *scops) > > int i; > sese_l *s; > + basic_block reduction_walk_start = ENTRY_BLOCK_PTR_FOR_FN (cfun); > + oacc_reductions reductions; > + > FOR_EACH_VEC_ELT (scops_l, i, s) > { > scop_p scop = new_scop (s->entry, s->exit); > > + edge reduction_walk_end = s->entry; > + oacc_reduction_walker (reductions, reduction_walk_end, bb_to_rpo) > + .walk (reduction_walk_start); > + reduction_walk_start = s->exit->dest; > + > /* Record all basic blocks and their conditions in REGION. */ > - gather_bbs (CDI_DOMINATORS, scop, bb_to_rpo).walk (s->entry->dest); > + gather_bbs (CDI_DOMINATORS, scop, bb_to_rpo, reductions, oacc_ctx) > + .walk (s->entry->dest); > > /* Sort pbbs after execution order for initial schedule generation. */ > scop->pbbs.qsort (cmp_pbbs); > > - if (! build_alias_set (scop)) > + if (! build_alias_set (scop, oacc_ctx)) > { > DEBUG_PRINT (dp << "[scop-detection-fail] cannot handle > dependences\n"); > free_scop (scop); > diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c > index c42415e0554..c6f07ea9a99 100644 > --- a/gcc/graphite-sese-to-poly.c > +++ b/gcc/graphite-sese-to-poly.c > @@ -36,6 +36,7 @@ along with GCC; see the file COPYING3. If not see > #include "gimplify.h" > #include "gimplify-me.h" > #include "tree-cfg.h" > +#include "tree-chrec-oacc.h" > #include "tree-ssa-loop-manip.h" > #include "tree-ssa-loop-niter.h" > #include "tree-ssa-loop.h" > @@ -209,6 +210,8 @@ parameter_index_in_region (tree name, sese_info_p region) > return -1; > } > > +tree oacc_ifn_call_extract (gimple*); > + > /* Extract an affine expression from the tree E in the scop S. */ > > static isl_pw_aff * > @@ -275,6 +278,13 @@ extract_affine (scop_p s, tree e, __isl_take isl_space > *space) > case SSA_NAME: > { > gcc_assert (! defined_in_sese_p (e, s->scop_info->region)); > + if (is_oacc_loop_ifn_call_def (e)) > + { > + gimple* stmt = SSA_NAME_DEF_STMT (e); > + return extract_affine (s, oacc_ifn_call_extract (stmt), space); > + > + } > + > int dim = parameter_index_in_region (e, s->scop_info); > gcc_assert (dim != -1); > /* No need to wrap a parameter. */ > @@ -643,8 +653,20 @@ build_poly_dr (dr_info &dri) > subscript_sizes = pdr_add_data_dimensions (subscript_sizes, scop, dr); > } > > - new_poly_dr (pbb, DR_STMT (dr), DR_IS_READ (dr) ? PDR_READ : PDR_WRITE, > - acc, subscript_sizes); > + bool representable = true; > + for (unsigned i = 0; i < DR_NUM_DIMENSIONS (dr); ++i) > + if (! graphite_can_represent_scev > + (scop->scop_info->region, DR_ACCESS_FN (dr, i))) > + representable = false; > + > + /* If non-affine access functions are not enabled, the DR should > + have been rejected during SCoP detection. */ > + gcc_checking_assert (representable || flag_graphite_non_affine_accesses); > + > + poly_dr_type write_type = representable ? PDR_WRITE : PDR_MAY_WRITE; > + poly_dr_type type = DR_IS_READ (dr) ? PDR_READ : write_type; > + > + new_poly_dr (pbb, DR_STMT (dr), type, acc, subscript_sizes); > } > > static void > diff --git a/gcc/graphite.c b/gcc/graphite.c > index 27f1e486e1f..3661d92e601 100644 > --- a/gcc/graphite.c > +++ b/gcc/graphite.c > @@ -43,6 +43,8 @@ along with GCC; see the file COPYING3. If not see > #include "cfghooks.h" > #include "tree.h" > #include "gimple.h" > +#include "gimple-iterator.h" > +#include "gimplify-me.h" > #include "ssa.h" > #include "fold-const.h" > #include "gimple-iterator.h" > @@ -58,6 +60,18 @@ along with GCC; see the file COPYING3. If not see > #include "tree-ssa.h" > #include "tree-into-ssa.h" > #include "graphite.h" > +#include "graphite-oacc.h" > +#include "cgraph.h" > +#include "gimple-pretty-print.h" > +#include "print-tree.h" > + > +static bool have_isl = true; > + > +#ifdef ACCEL_COMPILER > +static bool accel_compiler = true; > +#else > +static bool accel_compiler = false; > +#endif > > /* Print global statistics to FILE. */ > > @@ -348,6 +362,220 @@ canonicalize_loop_closed_ssa (loop_p loop, edge e) > } > } > > +struct goacc_parallel_info { > + gcall* call; > + loop_p loop; > +}; > + > +/* Find the first call to BUILT_IN_GOACC_PARALLEL in the given > + function where the type of the ".omp_data_arr" argument matches the > + given type. The type of that argument is sufficient to find the > + call to the right outlined function, i.e. we don't need to check > + the function name. Return this call or a NULL_TREE if there is no > + such call in the function. */ > + > +static goacc_parallel_info find_goacc_parallel_call (struct function* fn) > +{ > + goacc_parallel_info result; > + > + basic_block bb; > + FOR_EACH_BB_FN (bb, fn) > + { > + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); > + gsi_next (&gsi)) > + { > + gimple *call = gsi_stmt (gsi); > + if (! is_gimple_call (call) > + || ! gimple_call_builtin_p (call, BUILT_IN_NORMAL)) > + continue; > + > + built_in_function code = > + DECL_FUNCTION_CODE (gimple_call_fndecl (call)); > + if (code != BUILT_IN_GOACC_PARALLEL) > + continue; > + > + tree called_fn_addr = gimple_call_arg (call, 1); > + gcc_checking_assert (TREE_CODE (called_fn_addr) == ADDR_EXPR); > + tree called_fn = TREE_OPERAND (called_fn_addr, 0); > + > + if (called_fn != cfun->decl) > + continue; > + > + result.call = (gcall*)call; > + result.loop = bb->loop_father; > + } > + } > + > + return result; > +} > + > +static tree > +get_goacc_parallel_omp_data_arg (gimple* call) > +{ > + gcc_checking_assert (is_gimple_call (call) > + && gimple_call_builtin_p (call, BUILT_IN_NORMAL) > + && DECL_FUNCTION_CODE (gimple_call_fndecl (call)) > + == BUILT_IN_GOACC_PARALLEL); > + tree omp_data_ptr = gimple_call_arg (call, 3); > + tree omp_data = TREE_OPERAND (omp_data_ptr, 0); > + > + return omp_data; > +} > + > +/* TODO-kernels This was meant as a way to allow to peak into the > + original function from an outlined function to see, for instance, > + if some values are known to be constant since constant propagation > + fails to propagate values into the outlined function. Currently > + unused and can be removed. * > + > +/* Gather direct assignments to STRUCT in FN in MAP. Each field of > + STRUCT that gets assigned to in FN is mapped to the corresponding > + rhs of the last encountered assignment. */ > + > +void > +oacc_omp_data::gather_assignments (struct function *fn) > +{ > + if (!fn) > + return; > + > + basic_block bb; > + FOR_EACH_BB_FN (bb, fn) > + { > + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); > + gsi_next (&gsi)) > + { > + gimple *assign = gsi_stmt (gsi); > + > + if (! is_gimple_assign (assign)) > + continue; > + > + tree lhs = gimple_assign_lhs (assign); > + if (TREE_CODE (lhs) != COMPONENT_REF > + || TREE_OPERAND (lhs, 0) != src_fn_arg) > + continue; > + > + tree field = DECL_NAME (TREE_OPERAND (lhs, 1)); > + > + oacc_data_field& value = field_map.get_or_insert (field); > + value.src_var = gimple_assign_rhs1 (assign); > + } > + } > + > + unsigned i; > + tree var; > + FOR_EACH_VEC_SAFE_ELT (cfun->local_decls, i, var) > + { > + oacc_data_field* value = field_map.get (DECL_NAME (var)); > + if (value) > + value->tgt_var = var; > + } > +} > + > +/* TODO-kernels This will have to be adapted to work with the > +"exploded arguments" patch because the mapping of names from the > +outlined function to the original function relies on the omp_data > +arguments to the outlined function which don't exist after "exploding" > +the arguments. */ > + > +oacc_omp_data > +oacc_omp_data::construct (struct function* fn) > +{ > + tree omp_data_arr_arg = NULL_TREE; > + loop_p call_loop = NULL; > + > + if (fn) > + { > + goacc_parallel_info info = find_goacc_parallel_call (fn); > + gcall *parallel_call = info.call; > + call_loop = info.loop; > + gcc_checking_assert (parallel_call); > + gcc_checking_assert (call_loop); > + > + omp_data_arr_arg = get_goacc_parallel_omp_data_arg (parallel_call); > + gcc_checking_assert (omp_data_arr_arg); > + } > + tree omp_data_i_arg = DECL_ARGUMENTS (cfun->decl); > + > + oacc_omp_data omp_data; > + omp_data.src_fn_arg = omp_data_arr_arg; > + omp_data.tgt_fn_arg = omp_data_i_arg; > + omp_data.loop = call_loop; > + > + omp_data.gather_assignments (fn); > + > + return omp_data; > +} > + > +/* TODO-kernels How to implement this in a stable way? The name of the > + original function may change (constprop?) which causes the search > + to fail. */ > + > +/* Return the function from which the OpenACC OUTLINED_FN > + has been outlined. */ > + > +static function* > +find_oacc_src_fn (function* outlined_fn) > +{ > + gcc_assert (! accel_compiler); > + gcc_assert (oacc_function_p (outlined_fn)); > + > + tree name = DECL_ASSEMBLER_NAME (outlined_fn->decl); > + const char* id = IDENTIFIER_POINTER (name); > + const unsigned len = IDENTIFIER_LENGTH (name); > + > + /* id is the name of the function from which the current > + function has been outlined, followed by a suffix that starts > + with ".omp_fn" */ > + unsigned i = 0; > + for (; i < len; ++i) > + if (id[i] == '.') > + break; > + > + gcc_checking_assert (id[i] == '.'); > + > + const char* src_id = ggc_alloc_string (id, i); > + /* fprintf (stderr, "[%s] Looking for source function '%s'\n. ", > __FUNCTION__, src_id); */ > + tree id_node = get_identifier (src_id); > + > + cgraph_node* n = cgraph_node::get_for_asmname (id_node); > + > + return n ? n->get_fun () : NULL; > + > + /* cgraph_node* node; */ > + /* FOR_EACH_FUNCTION (node) */ > + /* { */ > + /* tree node_name = DECL_ASSEMBLER_NAME (node->decl); */ > + /* const char* node_id = IDENTIFIER_POINTER (node_name); */ > + > + /* fprintf (stderr, "[%s] Function '%s'\n. ", __FUNCTION__, node_id); > */ > + > + /* if (strcmp (src_id, node_id) == 0) */ > + /* return node->get_fun (); */ > + /* } */ > + /* return NULL; */ > +} > + > +oacc_context > +oacc_context::build_context () { > + if (! oacc_function_p (cfun)) > + return oacc_context::invalid_context (); > + > + struct function* src_fn (find_oacc_src_fn (cfun)); > + if (! src_fn) > + { > + if (dump_file) > + fprintf (dump_file, > + "Source function for outlined function %s not found.\n", > + IDENTIFIER_POINTER (DECL_NAME (cfun->decl))); > + /* return oacc_context::invalid_context (); */ > + } > + > + oacc_context context (oacc_omp_data::construct (src_fn)); > + > + context.valid = true; > + return context; > +} > + > /* Converts the current loop closed SSA form to a canonical form > expected by the Graphite code generation. > > @@ -405,6 +633,8 @@ canonicalize_loop_form (void) > > isl_ctx *the_isl_ctx; > > +extern void oacc_set_arg_evolutions (); > + > /* Perform a set of linear transforms on the loops of the current > function. */ > > @@ -417,10 +647,34 @@ graphite_transform_loops (void) > vec<scop_p> scops = vNULL; > isl_ctx *ctx; > > + bool is_oacc_function = oacc_function_p (cfun); > + > + oacc_context oacc_ctx(oacc_context::build_context ()); > + // TODO-kernels Clean this up > + /* if (is_oacc_function && ! oacc_ctx.is_valid ()) */ > + /* { */ > + /* if (dump_file) */ > + /* fprintf (dump_file, "Could not build OpenACC context for function %s. > " */ > + /* "Aborting Graphite.\n", current_function_name ()); */ > + /* return; */ > + /* } */ > + > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fprintf (dump_file, "\nTransforming Function: %s\n", > + current_function_name ()); > + > /* If a function is parallel it was most probably already run through > graphite > once. No need to run again. */ > - if (parallelized_function_p (cfun->decl)) > - return; > + > + /* TODO-kernels Stop marking kernels regions that should be > + processed here as "parallelized". */ > + if (parallelized_function_p (cfun->decl) && ! is_oacc_function) > + { > + > + if (dump_file) > + fprintf (dump_file, "\nAlready parallelized function.\n"); > + return; > + } > > calculate_dominance_info (CDI_DOMINATORS); > > @@ -445,7 +699,9 @@ graphite_transform_loops (void) > seir_cache = new hash_map<sese_scev_hash, tree>; > > calculate_dominance_info (CDI_POST_DOMINATORS); > - build_scops (&scops); > + > + oacc_set_arg_evolutions (); > + build_scops (&scops, oacc_ctx.is_valid () ? &oacc_ctx : NULL); > free_dominance_info (CDI_POST_DOMINATORS); > > /* Remove the fake exits before transform given they are not reflected > @@ -520,6 +776,8 @@ graphite_transform_loops (void) > > #else /* If isl is not available: #ifndef HAVE_isl. */ > > +static bool have_isl = false; > + > static void > graphite_transform_loops (void) > { > @@ -532,7 +790,9 @@ graphite_transform_loops (void) > static unsigned int > graphite_transforms (struct function *fun) > { > - if (number_of_loops (fun) <= 1) > + unsigned num_loops = number_of_loops (fun); > + if (num_loops == 0 > + || (num_loops == 1 && !oacc_function_p (cfun))) > return 0; > > graphite_transform_loops (); > @@ -540,9 +800,34 @@ graphite_transforms (struct function *fun) > return 0; > } > > + > +/* Return true if fun is an OpenACC outlined function that should be > +handled by Graphite. */ > + > +static inline bool oacc_enable_graphite_p (function *fun) > +{ > + return !accel_compiler && flag_openacc && oacc_get_fn_attrib (fun->decl); > +} > + > + > static bool > -gate_graphite_transforms (void) > +gate_graphite_transforms (function *fun) > { > + bool oacc_enabled_graphite = oacc_enable_graphite_p (fun); > + bool optimizing = global_options.x_optimize <= 0; > + > + /* Enabling Graphite if isl is not available aborts compilation. > + Prefer to skip Graphite if compiling without optimisations, > + but emit a warning. */ > + if (have_isl || optimizing) > + flag_graphite_identity |= oacc_enabled_graphite; > + else { > + if (oacc_enabled_graphite) > + warning (OPT_Wall, "Unable to enable Graphite on OpenACC regions," > + "because isl is not available"); > + } > + > + > /* Enable -fgraphite pass if any one of the graphite optimization flags > is turned on. */ > if (flag_graphite_identity > @@ -576,7 +861,7 @@ public: > {} > > /* opt_pass methods: */ > - virtual bool gate (function *) { return gate_graphite_transforms (); } > + virtual bool gate (function *fun) { return gate_graphite_transforms (fun); > } > > }; // class pass_graphite > > @@ -611,7 +896,7 @@ public: > {} > > /* opt_pass methods: */ > - virtual bool gate (function *) { return gate_graphite_transforms (); } > + virtual bool gate (function *fun) { return gate_graphite_transforms (fun); > } > virtual unsigned int execute (function *fun) { return graphite_transforms > (fun); } > > }; // class pass_graphite_transforms > @@ -624,4 +909,108 @@ make_pass_graphite_transforms (gcc::context *ctxt) > return new pass_graphite_transforms (ctxt); > } > > +/* Something like "n * m" is not allowed. */ > + > +static bool > +graphite_can_represent_init (tree e) > +{ > + switch (TREE_CODE (e)) > + { > + case POLYNOMIAL_CHREC: > + return graphite_can_represent_init (CHREC_LEFT (e)) > + && graphite_can_represent_init (CHREC_RIGHT (e)); > + > + case MULT_EXPR: > + if (chrec_contains_symbols (TREE_OPERAND (e, 0))) > + return graphite_can_represent_init (TREE_OPERAND (e, 0)) > + && tree_fits_shwi_p (TREE_OPERAND (e, 1)); > + else > + return graphite_can_represent_init (TREE_OPERAND (e, 1)) > + && tree_fits_shwi_p (TREE_OPERAND (e, 0)); > + > + case PLUS_EXPR: > + case POINTER_PLUS_EXPR: > + case MINUS_EXPR: > + return graphite_can_represent_init (TREE_OPERAND (e, 0)) > + && graphite_can_represent_init (TREE_OPERAND (e, 1)); > + > + case NEGATE_EXPR: > + case BIT_NOT_EXPR: > + CASE_CONVERT: > + case NON_LVALUE_EXPR: > + return graphite_can_represent_init (TREE_OPERAND (e, 0)); > + > + default: > + break; > + } > + > + return true; > +} > + > +/* Return true when SCEV can be represented in the polyhedral model. > + > + An expression can be represented, if it can be expressed as an > + affine expression. For loops (i, j) and parameters (m, n) all > + affine expressions are of the form: > + > + x1 * i + x2 * j + x3 * m + x4 * n + x5 * 1 where x1..x5 element of Z > + > + 1 i + 20 j + (-2) m + 25 > + > + Something like "i * n" or "n * m" is not allowed. */ > > +bool > +graphite_can_represent_scev (sese_l scop, tree scev) > +{ > + if (chrec_contains_undetermined (scev)) > + return false; > + > + switch (TREE_CODE (scev)) > + { > + case NEGATE_EXPR: > + case BIT_NOT_EXPR: > + CASE_CONVERT: > + case NON_LVALUE_EXPR: > + return graphite_can_represent_scev (scop, TREE_OPERAND (scev, 0)); > + > + case PLUS_EXPR: > + case POINTER_PLUS_EXPR: > + case MINUS_EXPR: > + return graphite_can_represent_scev (scop, TREE_OPERAND (scev, 0)) > + && graphite_can_represent_scev (scop, TREE_OPERAND (scev, 1)); > + > + case MULT_EXPR: > + return !CONVERT_EXPR_CODE_P (TREE_CODE (TREE_OPERAND (scev, 0))) > + && !CONVERT_EXPR_CODE_P (TREE_CODE (TREE_OPERAND (scev, 1))) > + && !(chrec_contains_symbols (TREE_OPERAND (scev, 0)) > + && chrec_contains_symbols (TREE_OPERAND (scev, 1))) > + && graphite_can_represent_init (scev) > + && graphite_can_represent_scev (scop, TREE_OPERAND (scev, 0)) > + && graphite_can_represent_scev (scop, TREE_OPERAND (scev, 1)); > + > + case POLYNOMIAL_CHREC: > + /* Check for constant strides. With a non constant stride of > + 'n' we would have a value of 'iv * n'. Also check that the > + initial value can represented: for example 'n * m' cannot be > + represented. */ > + gcc_assert (loop_in_sese_p (get_loop (cfun, > + CHREC_VARIABLE (scev)), scop)); > + if (!evolution_function_right_is_integer_cst (scev) > + || !graphite_can_represent_init (scev)) > + return false; > + return graphite_can_represent_scev (scop, CHREC_LEFT (scev)); > + > + case ADDR_EXPR: > + /* We cannot encode addresses for ISL. */ > + return false; > + > + default: > + break; > + } > + > + /* Only affine functions can be represented. */ > + if (tree_contains_chrecs (scev, NULL) || !scev_is_linear_expression (scev)) > + return false; > + > + return true; > +} > diff --git a/gcc/graphite.h b/gcc/graphite.h > index 3fe1345cf96..1e0ccd2de7b 100644 > --- a/gcc/graphite.h > +++ b/gcc/graphite.h > @@ -384,6 +384,10 @@ struct scop > /* The maximum alias set as assigned to drs by build_alias_sets. */ > unsigned max_alias_set; > > + /* The set of dependent ddrs that were rejected by build_alias_set > + and that must be handled by other means (runtime checking). */ > + auto_vec<ddr_p> unhandled_alias_ddrs; > + > /* All the basic blocks in this scop that contain memory references > and that will be represented as statements in the polyhedral > representation. */ > @@ -459,10 +463,15 @@ carries_deps (__isl_keep isl_union_map *schedule, > > extern bool build_poly_scop (scop_p); > extern bool graphite_regenerate_ast_isl (scop_p); > -extern void build_scops (vec<scop_p> *); > + > +class oacc_context; > +extern void build_scops (vec<scop_p> *, oacc_context*); > + > extern tree cached_scalar_evolution_in_region (const sese_l &, loop_p, tree); > extern void dot_all_sese (FILE *, vec<sese_l> &); > extern void dot_sese (sese_l &); > extern void dot_cfg (); > > +extern bool graphite_can_represent_scev (sese_l, tree); > + > #endif > diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h > index 2ec3c504cd6..73c0e7d1880 100644 > --- a/gcc/internal-fn.h > +++ b/gcc/internal-fn.h > @@ -52,10 +52,13 @@ enum ifn_unique_kind { > > CHUNK_MAX = LOOP (CODE_CHUNKS, DIR, RANGE, STEP, CHUNK_SIZE, MASK) > STEP = LOOP (CODE_STEP, DIR, RANGE, STEP, CHUNK_SIZE, MASK) > - OFFSET = LOOP (CODE_OFFSET, DIR, RANGE, STEP, CHUNK_SIZE, MASK, > CHUNK_NO) > - BOUND = LOOP (CODE_BOUND, DIR, RANGE, STEP, CHUNK_SIZE, MASK, OFFSET) > + OFFSET = LOOP (CODE_OFFSET, DIR, RANGE, STEP, CHUNK_SIZE, MASK, BASE, > CHUNK_NO) > + BOUND = LOOP (CODE_BOUND, DIR, RANGE, STEP, CHUNK_SIZE, MASK, END, > OFFSET) > > DIR - +1 for up loop, -1 for down loop > + BASE - Initial value of the loop's iteration variable. > + END - Last value of the loop's iteration variable +1 or -1, depending > on the > + direction of the iteration. > RANGE - Range of loop (END - BASE) > STEP - iteration step size > CHUNKING - size of chunking, (constant zero for no chunking) > diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c > index 08afaceb87e..b3d21c1181d 100644 > --- a/gcc/omp-expand.c > +++ b/gcc/omp-expand.c > @@ -6015,8 +6015,8 @@ expand_omp_taskloop_for_inner (struct omp_region > *region, > T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV); > > <head_bb> [created by splitting end of entry_bb] > - T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no); > - T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset); > + T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, b, > chunk_no); > + T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, e, offset); > if (!(offset LTGT bound)) goto bottom_bb; > > <body_bb> [incoming] > @@ -6270,20 +6270,22 @@ expand_oacc_for (struct omp_region *region, struct > omp_for_data *fd) > /* Loop offset & bound go into head_bb. */ > gsi = gsi_start_bb (head_bb); > > - call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, > + tree begin = force_gimple_operand_gsi (&gsi, unshare_expr (fd->loop.n1), > true, NULL_TREE, true, GSI_SAME_STMT); > + call = gimple_build_call_internal (IFN_GOACC_LOOP, 8, > build_int_cst (integer_type_node, > IFN_GOACC_LOOP_OFFSET), > dir, range, s, > - chunk_size, gwv, chunk_no); > + chunk_size, gwv, begin, chunk_no); > gimple_call_set_lhs (call, offset_init); > gimple_set_location (call, loc); > gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); > > - call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, > + tree end = force_gimple_operand_gsi (&gsi, unshare_expr (fd->loop.n2), > true, NULL_TREE, true, GSI_SAME_STMT); > + call = gimple_build_call_internal (IFN_GOACC_LOOP, 8, > build_int_cst (integer_type_node, > IFN_GOACC_LOOP_BOUND), > dir, range, s, > - chunk_size, gwv, offset_init); > + chunk_size, gwv, end, offset_init); > gimple_call_set_lhs (call, bound); > gimple_set_location (call, loc); > gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); > @@ -6332,16 +6334,20 @@ expand_oacc_for (struct omp_region *region, struct > omp_for_data *fd) > tree t, e_gwv = integer_minus_one_node; > tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */ > > + tree begin = force_gimple_operand_gsi (&gsi, unshare_expr > (fd->loop.n1), > + true, NULL_TREE, true, > GSI_SAME_STMT); > t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET); > - call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, > e_range, > - element_s, chunk, e_gwv, chunk); > + call = gimple_build_call_internal (IFN_GOACC_LOOP, 8, t, dir, > e_range, > + element_s, chunk, e_gwv, begin, > chunk); > gimple_call_set_lhs (call, e_offset); > gimple_set_location (call, loc); > gsi_insert_before (&gsi, call, GSI_SAME_STMT); > > + tree end = force_gimple_operand_gsi (&gsi, unshare_expr > (fd->loop.n2), > + true, NULL_TREE, true, > GSI_SAME_STMT); > t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND); > - call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, > e_range, > - element_s, chunk, e_gwv, > e_offset); > + call = gimple_build_call_internal (IFN_GOACC_LOOP, 8, t, dir, > e_range, > + element_s, chunk, e_gwv, end, > e_offset); > gimple_call_set_lhs (call, e_bound); > gimple_set_location (call, loc); > gsi_insert_before (&gsi, call, GSI_SAME_STMT); > diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c > index a73b73fb41a..3e81e878cf9 100644 > --- a/gcc/omp-offload.c > +++ b/gcc/omp-offload.c > @@ -83,6 +83,8 @@ struct oacc_loop > vec<gcall *> ifns; /* Contained loop abstraction functions. */ > tree chunk_size; /* Chunk size. */ > gcall *head_end; /* Final marker of head sequence. */ > + > + bool can_be_parallel; /* Can the loop be parallelized?. */ > }; > > /* Holds offload tables with decls. */ > @@ -657,7 +659,7 @@ oacc_xform_loop (gcall *call) > > if (chunking) > { > - tree chunk = fold_convert (diff_type, gimple_call_arg (call, > 6)); > + tree chunk = fold_convert (diff_type, gimple_call_arg (call, > 7)); > tree per > = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size); > per = build2 (MULT_EXPR, diff_type, per, chunk); > @@ -697,7 +699,7 @@ oacc_xform_loop (gcall *call) > > r = fold_build2 (MULT_EXPR, diff_type, span, step); > > - tree offset = gimple_call_arg (call, 6); > + tree offset = gimple_call_arg (call, 7); > r = build2 (PLUS_EXPR, diff_type, r, > fold_convert (diff_type, offset)); > r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR, > @@ -911,7 +913,8 @@ oacc_validate_dims (tree fn, tree attrs, int *dims, int > level, unsigned used) > check = false; > #endif > if (check > - && !lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (fn))) > + && !lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (fn)) > + && !lookup_attribute ("oacc parallel_kernels_graphite", > DECL_ATTRIBUTES (fn))) > { > static char const *const axes[] = > /* Must be kept in sync with GOMP_DIM enumeration. */ > @@ -1015,6 +1018,11 @@ new_oacc_loop (oacc_loop *parent, gcall *marker) > flags. */ > > loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3)); > + if (dump_file && (dump_flags & TDF_DETAILS)) > + dump_printf_loc (MSG_NOTE, > + dump_user_location_t::from_location_t (loop->loc), > + "[new_oacc_loop] Found loop %s 'auto' directive.\n", > + loop->flags & OLF_AUTO ? "with" : "without"); > > tree chunk_size = integer_zero_node; > if (loop->flags & OLF_GANG_STATIC) > @@ -1217,45 +1225,62 @@ oacc_loop_discover_walk (oacc_loop *loop, basic_block > bb) > break; > > case IFN_UNIQUE: > - enum ifn_unique_kind kind > - = (enum ifn_unique_kind) (TREE_INT_CST_LOW > - (gimple_call_arg (call, 0))); > - if (kind == IFN_UNIQUE_OACC_HEAD_MARK > - || kind == IFN_UNIQUE_OACC_TAIL_MARK) > - { > - if (gimple_call_num_args (call) == 2) > - { > - gcc_assert (marker && !remaining); > - marker = 0; > - if (kind == IFN_UNIQUE_OACC_TAIL_MARK) > - loop = finish_oacc_loop (loop); > - else > - loop->head_end = call; > - } > - else > - { > - int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2)); > + { > + enum ifn_unique_kind kind > + = (enum ifn_unique_kind) (TREE_INT_CST_LOW > + (gimple_call_arg (call, 0))); > + if (kind == IFN_UNIQUE_OACC_HEAD_MARK > + || kind == IFN_UNIQUE_OACC_TAIL_MARK) > + { > + if (gimple_call_num_args (call) == 2) > + { > + gcc_assert (marker && !remaining); > + marker = 0; > + if (kind == IFN_UNIQUE_OACC_TAIL_MARK) > + loop = finish_oacc_loop (loop); > + else > + loop->head_end = call; > + } > + else > + { > + int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2)); > > - if (!marker) > - { > - if (kind == IFN_UNIQUE_OACC_HEAD_MARK) > - loop = new_oacc_loop (loop, call); > - remaining = count; > - } > - gcc_assert (count == remaining); > - if (remaining) > - { > - remaining--; > - if (kind == IFN_UNIQUE_OACC_HEAD_MARK) > - loop->heads[marker] = call; > - else > - loop->tails[remaining] = call; > - } > - marker++; > - } > - } > + if (!marker) > + { > + if (kind == IFN_UNIQUE_OACC_HEAD_MARK) > + loop = new_oacc_loop (loop, call); > + > + remaining = count; > + } > + gcc_assert (count == remaining); > + if (remaining) > + { > + remaining--; > + if (kind == IFN_UNIQUE_OACC_HEAD_MARK) > + loop->heads[marker] = call; > + else > + loop->tails[remaining] = call; > + } > + marker++; > + } > + } > + break; > + } > + > + case IFN_GOACC_REDUCTION: > + break; > } > } > + > + if (bb->loop_father->can_be_parallel) > + { > + loop->can_be_parallel = true; > + const dump_user_location_t loc > + = dump_user_location_t::from_location_t (loop->loc); > + if (dump_file) > + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, "Detected > parallelizable loop."); > + } > + > if (remaining || marker) > { > bb = single_succ (bb); > @@ -1411,12 +1436,61 @@ oacc_loop_process (oacc_loop *loop) > oacc_loop_process (loop->sibling); > } > > -/* Walk the OpenACC loop heirarchy checking and assigning the > +/* Interpret the "can_be_parallel" flag of the LOOP to decide > + if it can be made "independent" */ > + > +static bool oacc_loop_parallelize (oacc_loop *loop) { > + if (loop->routine) > + return false; > + > + if (!(loop->flags & OLF_AUTO)) > + { > + if (dump_file && (dump_flags & TDF_DETAILS)) > + dump_printf_loc (MSG_NOTE, > + dump_user_location_t::from_location_t (loop->loc), > + "[oacc_loop_parallelize] Not an 'auto' loop.\n"); > + > + > + return false; > + } > + > + if (!loop->can_be_parallel) > + { > + if (dump_file && (dump_flags & TDF_DETAILS)) > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, > + dump_user_location_t::from_location_t (loop->loc), > + "'auto' loop cannot be parallel.\n"); > + return false; > + } > + > + if (dump_file && (dump_flags & TDF_DETAILS)) > + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, > + dump_user_location_t::from_location_t (loop->loc), > + "'auto' loop can be parallel.\n"); > + > + loop->flags |= OLF_INDEPENDENT; > + > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fprintf (dump_file, "[oacc_loop_parallelize] %s:%u Transformed 'auto' > into 'independent'.\n", > + LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc)); > + > + // loop->flags &= ~OLF_AUTO; > + /* TODO-kernels Right now we need to keep the OLF_AUTO flag for > + further processing in oacc_loop_fixed_partitions and > + oacc_loop_auto_partitions. We should remove it here and use > + another flag to indicate that the partitioning must be > + assigned. */ > + > + return true; > +} > + > +/* Walk the OpenACC loop hierarchy checking and assigning the > programmer-specified partitionings. OUTER_MASK is the partitioning > this loop is contained within. Return mask of partitioning > encountered. If any auto loops are discovered, set GOMP_DIM_MAX > bit. */ > > + > static unsigned > oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask) > { > @@ -1446,14 +1520,18 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned > outer_mask) > > if ((this_mask != 0) + auto_par + seq_par > 1) > { > - if (noisy) > - error_at (loop->loc, > - seq_par > - ? G_("%<seq%> overrides other OpenACC loop specifiers") > - : G_("%<auto%> conflicts with other OpenACC loop " > - "specifiers")); > + if (seq_par && noisy) > + error_at (loop->loc, G_("%<seq%> overrides other OpenACC loop > specifiers")); > maybe_auto = false; > + > + if (dump_file && (dump_flags & TDF_DETAILS)) > + dump_printf_loc (MSG_NOTE, > + dump_user_location_t::from_location_t > (loop->loc), > + "[oacc_loop_fixed_partitions] Removed > 'auto'.\n"); > + > + > loop->flags &= ~OLF_AUTO; > + > if (seq_par) > { > loop->flags > @@ -1467,6 +1545,9 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned > outer_mask) > loop->flags |= OLF_AUTO; > mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX); > } > + > + if (oacc_loop_parallelize (loop)) > + mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX); > } > > if (this_mask & outer_mask) > diff --git a/gcc/predict.c b/gcc/predict.c > index a7ae977c866..c44aac58f28 100644 > --- a/gcc/predict.c > +++ b/gcc/predict.c > @@ -4035,7 +4035,7 @@ pass_profile::execute (function *fun) > class loop *loop; > FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) > if (loop->header->count.initialized_p ()) > - fprintf (dump_file, "Loop got predicted %d to iterate %i times.\n", > + fprintf (dump_file, "Loop %d got predicted to iterate %i times.\n", > loop->num, > (int)expected_loop_iterations_unbounded (loop)); > } > diff --git a/gcc/testsuite/gcc.dg/graphite/alias-0-no-runtime-check.c > b/gcc/testsuite/gcc.dg/graphite/alias-0-no-runtime-check.c > new file mode 100644 > index 00000000000..7228fb09818 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/graphite/alias-0-no-runtime-check.c > @@ -0,0 +1,20 @@ > +/* This test demonstrates a loop nest that Graphite cannot handle > + because of aliasing. But the loop nest can be handled with enabled > + runtime alias checking. */ > + > +/* { dg-options "-O2 -fgraphite-identity -fno-graphite-runtime-alias-checks > -fdump-tree-graphite-details" } */ > + > +void sum(int *x, int *y, unsigned *sum) > +{ > + unsigned i,j; > + *sum = 0; > + > + for (i = 0; i < 10000; i=i+1) > + { > + int xi = x[i]; > + for (j = 0; j < 22222; j=j+1) > + *sum += xi + y[j]; > + } > +} > + > +/* { dg-final { scan-tree-dump "number of SCoPs: 0" "graphite"} } */ > diff --git a/gcc/testsuite/gcc.dg/graphite/alias-0-runtime-check.c > b/gcc/testsuite/gcc.dg/graphite/alias-0-runtime-check.c > new file mode 100644 > index 00000000000..a9f9ef99908 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/graphite/alias-0-runtime-check.c > @@ -0,0 +1,21 @@ > +/* This test demonstrates a loop nest that Graphite cannot handle > + because of aliasing. But the loop nest can be handled with enabled > + runtime alias checking. */ > + > +/* { dg-options "-O2 -fgraphite-identity -fgraphite-runtime-alias-checks > -fdump-tree-graphite-details" } */ > + > +void sum(int *x, int *y, unsigned *sum) > +{ > + unsigned i,j; > + *sum = 0; > + > + for (i = 0; i < 10000; i=i+1) > + { > + int xi = x[i]; > + for (j = 0; j < 22222; j=j+1) > + *sum += xi + y[j]; > + } > +} > + > +/* { dg-final { scan-tree-dump "number of SCoPs: 1" "graphite"} } */ > +/* { dg-final { scan-tree-dump "Generated runtime alias > check.*?sum_.*?x_.*?y_.*?\n" "graphite"} } */ > diff --git a/gcc/testsuite/gcc.dg/graphite/alias-1.c > b/gcc/testsuite/gcc.dg/graphite/alias-1.c > new file mode 100644 > index 00000000000..ee80dae1df3 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/graphite/alias-1.c > @@ -0,0 +1,22 @@ > +/* This test demonstrates a loop nest that Graphite cannot handle > + because of aliasing. It should be possible to handle this loop nest > + by creating a runtime alias check like in the very similar test > + alias-0-runtime-check.c. However Graphite analyses the data > + reference with respect to the innermost loop that contains the data > + reference, the variable "i" remains uninstantiated (in contrast to > + "j"), and consequently the alias check cannot be placed outside of > + the SCoP since "i" is not defined there. */ > + > +/* { dg-options "-O2 -fgraphite-identity -fgraphite-runtime-alias-checks > -fdump-tree-graphite-details" } */ > + > +void sum(int *x, int *y, unsigned *sum) > +{ > + unsigned i,j; > + *sum = 0; > + > + for (i = 0; i < 10000; i=i+1) > + for (j = 0; j < 22222; j=j+1) > + *sum += x[i] + y[j]; > +} > + > +/* { dg-final { scan-tree-dump "number of SCoPs: 1" "graphite" { xfail *-*-* > } } } */ > diff --git a/gcc/tree-chrec-oacc.h b/gcc/tree-chrec-oacc.h > new file mode 100644 > index 00000000000..bcbb1e03657 > --- /dev/null > +++ b/gcc/tree-chrec-oacc.h > @@ -0,0 +1,45 @@ > +/* OpenACC helpers for Chains of recurrences. > + Copyright (C) 2003-2020 Free Software Foundation, Inc. > + > +This file is part of GCC. > + > +GCC is free software; you can redistribute it and/or modify it under > +the terms of the GNU General Public License as published by the Free > +Software Foundation; either version 3, or (at your option) any later > +version. > + > +GCC is distributed in the hope that it will be useful, but WITHOUT ANY > +WARRANTY; without even the implied warranty of MERCHANTABILITY or > +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License > +for more details. > + > +You should have received a copy of the GNU General Public License > +along with GCC; see the file COPYING3. If not see > +<http://www.gnu.org/licenses/>. */ > + > +#ifndef GCC_TREE_CHREC_OACC_H > +#define GCC_TREE_CHREC_OACC_H > + > +#include <gimple.h> > +#include <internal-fn.h> > + > +/* Check if the tree is an SSA_NAME whose defining statement > + is a call to a IFN_GOACC_LOOP function. */ > +static inline bool > +is_oacc_loop_ifn_call_def (tree t) { > + tree_code code = TREE_CODE (t); > + return (code == SSA_NAME > + && gimple_call_internal_p (SSA_NAME_DEF_STMT (t), IFN_GOACC_LOOP)); > +} > + > +/* Check if the tree is an SSA_NAME whose defining statement > + is a call to a IFN_GOACC_LOOP function. */ > +static inline bool > +is_oacc_ifn_call_def (tree t) { > + if (TREE_CODE (t) != SSA_NAME) > + return false; > + > + return (gimple_call_internal_p (SSA_NAME_DEF_STMT (t), IFN_GOACC_LOOP) > + || gimple_call_internal_p (SSA_NAME_DEF_STMT (t), > IFN_GOACC_REDUCTION)); > +} > +#endif /* GCC_TREE_CHREC_OACC_H */ > diff --git a/gcc/tree-chrec.c b/gcc/tree-chrec.c > index a8848067040..f536d6001ce 100644 > --- a/gcc/tree-chrec.c > +++ b/gcc/tree-chrec.c > @@ -1744,8 +1744,17 @@ scev_is_linear_expression (tree scev) > } > } > > -/* Determines whether the expression CHREC contains only interger consts > - in the right parts. */ > +static bool > +is_oacc_loop_call (tree chrec) { > + return TREE_CODE (chrec) == SSA_NAME > + && gimple_call_internal_p (SSA_NAME_DEF_STMT (chrec), > + IFN_GOACC_LOOP); > + > +} > + > +/* Determines whether the expression CHREC contains only integer > + consts in the right parts. OpenACC internal function calls > + which encode integer constants are also admitted. */ > > bool > evolution_function_right_is_integer_cst (const_tree chrec) > @@ -1759,7 +1768,8 @@ evolution_function_right_is_integer_cst (const_tree > chrec) > return true; > > case POLYNOMIAL_CHREC: > - return TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST > + return (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST > + || is_oacc_loop_call (CHREC_RIGHT (chrec))) > && (TREE_CODE (CHREC_LEFT (chrec)) != POLYNOMIAL_CHREC > || evolution_function_right_is_integer_cst (CHREC_LEFT (chrec))); > > diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c > index 5505ba46778..9094b2ac45d 100644 > --- a/gcc/tree-data-ref.c > +++ b/gcc/tree-data-ref.c > @@ -85,6 +85,7 @@ along with GCC; see the file COPYING3. If not see > #include "fold-const.h" > #include "expr.h" > #include "gimple-iterator.h" > +#include "tree-chrec-oacc.h" > #include "tree-ssa-loop-niter.h" > #include "tree-ssa-loop.h" > #include "tree-ssa.h" > @@ -97,6 +98,8 @@ along with GCC; see the file COPYING3. If not see > #include "tree-eh.h" > #include "ssa.h" > #include "internal-fn.h" > +#include "print-tree.h" > +#include "graphite-oacc.h" > > static struct datadep_stats > { > @@ -884,18 +887,23 @@ canonicalize_base_object_address (tree addr) > dummy outermost loop. In other cases perform loop analysis. > > Return true if the analysis succeeded and store the results in DRB if so. > - BB analysis can only fail for bitfield or reversed-storage accesses. */ > + BB analysis can only fail for bitfield or reversed-storage accesses. > + > + If ALLOW_NON_AFFINE_BASE is true, the function will not return false if > + the base is non-affine. */ > > opt_result > dr_analyze_innermost (innermost_loop_behavior *drb, tree ref, > - class loop *loop, const gimple *stmt) > + class loop *loop, const gimple *stmt, > + // TODO-kernels Rename (also allows non affine offset) > + bool allow_non_affine_base) > { > poly_int64 pbitsize, pbitpos; > tree base, poffset; > machine_mode pmode; > int punsignedp, preversep, pvolatilep; > affine_iv base_iv, offset_iv; > - tree init, dinit, step; > + tree init, dinit; > bool in_loop = (loop && loop->num); > > if (dump_file && (dump_flags & TDF_DETAILS)) > @@ -945,17 +953,20 @@ dr_analyze_innermost (innermost_loop_behavior *drb, > tree ref, > else > base = build_fold_addr_expr (base); > > + bool affine_base = true; > if (in_loop) > { > - if (!simple_iv (loop, loop, base, &base_iv, true)) > + affine_base = simple_iv (loop, loop, base, &base_iv, true); > + if (!affine_base && !allow_non_affine_base) > return opt_result::failure_at > (stmt, "failed: evolution of base is not affine.\n"); > } > - else > + > + if (!in_loop || !affine_base) > { > base_iv.base = base; > base_iv.step = ssize_int (0); > - base_iv.no_overflow = true; > + base_iv.no_overflow = affine_base ? false : true; > } > > if (!poffset) > @@ -965,14 +976,18 @@ dr_analyze_innermost (innermost_loop_behavior *drb, > tree ref, > } > else > { > - if (!in_loop) > - { > - offset_iv.base = poffset; > - offset_iv.step = ssize_int (0); > - } > - else if (!simple_iv (loop, loop, poffset, &offset_iv, true)) > - return opt_result::failure_at > - (stmt, "failed: evolution of offset is not affine.\n"); > + offset_iv.base = poffset; > + offset_iv.step = ssize_int (0); > + > + if (in_loop && ! simple_iv (loop, loop, poffset, &offset_iv, true)) { > + if (!allow_non_affine_base) > + return opt_result::failure_at > + (stmt, "failed: evolution of offset is not affine.\n"); > + offset_iv.base = poffset; > + offset_iv.step = ssize_int (0); > + > + > + } > } > > init = ssize_int (pbytepos); > @@ -983,14 +998,8 @@ dr_analyze_innermost (innermost_loop_behavior *drb, tree > ref, > init = size_binop (PLUS_EXPR, init, dinit); > base_misalignment -= TREE_INT_CST_LOW (dinit); > > - split_constant_offset (offset_iv.base, &offset_iv.base, &dinit); > - init = size_binop (PLUS_EXPR, init, dinit); > - > - step = size_binop (PLUS_EXPR, > - fold_convert (ssizetype, base_iv.step), > - fold_convert (ssizetype, offset_iv.step)); > - > base = canonicalize_base_object_address (base_iv.base); > + split_constant_offset (offset_iv.base, &offset_iv.base, &dinit); > > /* See if get_pointer_alignment can guarantee a higher alignment than > the one we calculated above. */ > @@ -1013,7 +1022,10 @@ dr_analyze_innermost (innermost_loop_behavior *drb, > tree ref, > drb->base_address = base; > drb->offset = fold_convert (ssizetype, offset_iv.base); > drb->init = init; > - drb->step = step; > + drb->step = size_binop (PLUS_EXPR, > + fold_convert (ssizetype, base_iv.step), > + fold_convert (ssizetype, offset_iv.step)); > + > if (known_misalignment (base_misalignment, base_alignment, > &drb->base_misalignment)) > drb->base_alignment = base_alignment; > @@ -1023,7 +1035,7 @@ dr_analyze_innermost (innermost_loop_behavior *drb, > tree ref, > drb->base_misalignment = 0; > } > drb->offset_alignment = highest_pow2_factor (offset_iv.base); > - drb->step_alignment = highest_pow2_factor (step); > + drb->step_alignment = highest_pow2_factor (drb->step); > > if (dump_file && (dump_flags & TDF_DETAILS)) > fprintf (dump_file, "success.\n"); > @@ -1096,7 +1108,9 @@ dr_analyze_indices (struct data_reference *dr, edge > nest, loop_p loop) > { > op = TREE_OPERAND (ref, 1); > access_fn = analyze_scalar_evolution (loop, op); > - access_fn = instantiate_scev (nest, loop, access_fn); > + tree instantiated_fn = instantiate_scev (nest, loop, access_fn); > + if (instantiated_fn) > + access_fn = instantiated_fn; > access_fns.safe_push (access_fn); > } > else if (TREE_CODE (ref) == COMPONENT_REF > @@ -1128,7 +1142,9 @@ dr_analyze_indices (struct data_reference *dr, edge > nest, loop_p loop) > { > op = TREE_OPERAND (ref, 0); > access_fn = analyze_scalar_evolution (loop, op); > - access_fn = instantiate_scev (nest, loop, access_fn); > + tree instantiated_fn = instantiate_scev (nest, loop, access_fn); > + if (instantiated_fn) > + access_fn = instantiated_fn; > if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC) > { > tree orig_type; > @@ -1230,11 +1246,15 @@ free_data_ref (data_reference_p dr) > > Return the data_reference description of MEMREF. NEST is the outermost > loop in which the reference should be instantiated, LOOP is the loop > - in which the data reference should be analyzed. */ > + in which the data reference should be analyzed. > + > + If ALLOW_NON_AFFINE_BASE is true, the function will not fail if the > + base is non-affine. */ > > struct data_reference * > create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt, > - bool is_read, bool is_conditional_in_stmt) > + bool is_read, bool is_conditional_in_stmt, > + bool allow_non_affine_base) > { > struct data_reference *dr; > > @@ -1252,7 +1272,8 @@ create_data_ref (edge nest, loop_p loop, tree memref, > gimple *stmt, > DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt; > > dr_analyze_innermost (&DR_INNERMOST (dr), memref, > - nest != NULL ? loop : NULL, stmt); > + nest != NULL ? loop : NULL, stmt, > + allow_non_affine_base); > dr_analyze_indices (dr, nest, loop); > dr_analyze_alias (dr); > > @@ -5422,6 +5443,7 @@ struct data_ref_loc > bool is_conditional_in_stmt; > }; > > +tree oacc_ifn_call_extract (gimple *call); > > /* Stores the locations of memory references in STMT to REFERENCES. Returns > true if STMT clobbers memory, false otherwise. */ > @@ -5444,6 +5466,10 @@ get_references_in_stmt (gimple *stmt, > vec<data_ref_loc, va_heap> *references) > if (gimple_call_internal_p (stmt)) > switch (gimple_call_internal_fn (stmt)) > { > + case IFN_GOACC_REDUCTION: > + case IFN_UNIQUE: > + case IFN_GOACC_LOOP: > + break; > case IFN_GOMP_SIMD_LANE: > { > class loop *loop = gimple_bb (stmt)->loop_father; > @@ -5519,6 +5545,25 @@ get_references_in_stmt (gimple *stmt, > vec<data_ref_loc, va_heap> *references) > ptr); > references->safe_push (ref); > return false; > + case IFN_GOACC_LOOP: > + /* Treat this like a reference to the data from the > + original loop (offset, bound etc.) that has been > + replaced by the internal function call in > + omp-expand.c. */ > + > + op0 = gimple_call_lhs (stmt); > + op1 = oacc_ifn_call_extract (stmt); > + > + if (DECL_P (op1) > + || (REFERENCE_CLASS_P (op1) && get_base_address (op1))) > + { > + ref.ref = op1; > + ref.is_read = true; > + ref.is_conditional_in_stmt = false; > + references->safe_push (ref); > + } > + return false; > + > default: > break; > } > @@ -5616,11 +5661,15 @@ find_data_references_in_stmt (class loop *nest, > gimple *stmt, > unanalyzable reference, returns false, otherwise returns true. > NEST is the outermost loop of the loop nest in which the references > should be instantiated, LOOP is the loop in which the references > - should be analyzed. */ > + should be analyzed. > + If ALLOW_NON_AFFINE_BASE is true, the data references are allowed > + to have a non-affine base. */ > > bool > graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt, > - vec<data_reference_p> *datarefs) > + vec<data_reference_p> *datarefs, > + oacc_context *oacc_ctx, > + bool allow_non_affine_base) > { > unsigned i; > auto_vec<data_ref_loc, 2> references; > @@ -5634,7 +5683,8 @@ graphite_find_data_references_in_stmt (edge nest, > loop_p loop, gimple *stmt, > FOR_EACH_VEC_ELT (references, i, ref) > { > dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read, > - ref->is_conditional_in_stmt); > + ref->is_conditional_in_stmt, > + allow_non_affine_base); > gcc_assert (dr != NULL); > datarefs->safe_push (dr); > } > diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h > index 771d20fbbc3..2d82e0ad923 100644 > --- a/gcc/tree-data-ref.h > +++ b/gcc/tree-data-ref.h > @@ -515,7 +515,7 @@ typedef struct data_dependence_relation *ddr_p; > > > opt_result dr_analyze_innermost (innermost_loop_behavior *, tree, > - class loop *, const gimple *); > + class loop *, const gimple *, bool = false); > extern bool compute_data_dependences_for_loop (class loop *, bool, > vec<loop_p> *, > vec<data_reference_p> *, > @@ -539,12 +539,14 @@ extern void free_data_ref (data_reference_p); > extern void free_data_refs (vec<data_reference_p> ); > extern opt_result find_data_references_in_stmt (class loop *, gimple *, > vec<data_reference_p> *); > +class oacc_context; > extern bool graphite_find_data_references_in_stmt (edge, loop_p, gimple *, > - vec<data_reference_p> *); > + vec<data_reference_p> *, > + oacc_context *, bool); > tree find_data_references_in_loop (class loop *, vec<data_reference_p> *); > bool loop_nest_has_data_refs (loop_p loop); > struct data_reference *create_data_ref (edge, loop_p, tree, gimple *, bool, > - bool); > + bool, bool = false); > extern bool find_loop_nest (class loop *, vec<loop_p> *); > extern struct data_dependence_relation *initialize_data_dependence_relation > (struct data_reference *, struct data_reference *, vec<loop_p>); > diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c > index 888af48946f..1ac27569a03 100644 > --- a/gcc/tree-loop-distribution.c > +++ b/gcc/tree-loop-distribution.c > @@ -2572,15 +2572,24 @@ latch_dominated_by_data_ref (class loop *loop, > data_reference *dr) > /* Compute alias check pairs and store them in COMP_ALIAS_PAIRS for LOOP's > data dependence relations ALIAS_DDRS. */ > > -static void > +void > compute_alias_check_pairs (class loop *loop, vec<ddr_p> *alias_ddrs, > vec<dr_with_seg_len_pair_t> *comp_alias_pairs) > { > unsigned int i; > unsigned HOST_WIDE_INT factor = 1; > - tree niters_plus_one, niters = number_of_latch_executions (loop); > + tree niters_plus_one, niters; > > + if (loop->num == 0) > + { > + /* Loop 0 is not at real loop and hence it has no niter information. > + It executes once. */ > + niters = build_int_cst (integer_type_node, 1); > + } > + else > + niters = number_of_latch_executions (loop); > gcc_assert (niters != NULL_TREE && niters != chrec_dont_know); > + > niters = fold_convert (sizetype, niters); > niters_plus_one = size_binop (PLUS_EXPR, niters, size_one_node); > > @@ -2595,12 +2604,12 @@ compute_alias_check_pairs (class loop *loop, > vec<ddr_p> *alias_ddrs, > struct data_reference *dr_b = DDR_B (ddr); > tree seg_length_a, seg_length_b; > > - if (latch_dominated_by_data_ref (loop, dr_a)) > + if (loop->num != 0 && latch_dominated_by_data_ref (loop, dr_a)) > seg_length_a = data_ref_segment_size (dr_a, niters_plus_one); > else > seg_length_a = data_ref_segment_size (dr_a, niters); > > - if (latch_dominated_by_data_ref (loop, dr_b)) > + if (loop->num != 0 && latch_dominated_by_data_ref (loop, dr_b)) > seg_length_b = data_ref_segment_size (dr_b, niters_plus_one); > else > seg_length_b = data_ref_segment_size (dr_b, niters); > diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c > index edab778277b..466aa65d899 100644 > --- a/gcc/tree-scalar-evolution.c > +++ b/gcc/tree-scalar-evolution.c > @@ -264,6 +264,8 @@ along with GCC; see the file COPYING3. If not see > #include "gimple.h" > #include "ssa.h" > #include "gimple-pretty-print.h" > +#include "tree-pretty-print.h" > +#include "print-tree.h" > #include "fold-const.h" > #include "gimplify.h" > #include "gimple-iterator.h" > @@ -276,6 +278,7 @@ along with GCC; see the file COPYING3. If not see > #include "tree-ssa.h" > #include "cfgloop.h" > #include "tree-chrec.h" > +#include "tree-chrec-oacc.h" > #include "tree-affine.h" > #include "tree-scalar-evolution.h" > #include "dumpfile.h" > @@ -284,6 +287,8 @@ along with GCC; see the file COPYING3. If not see > #include "tree-into-ssa.h" > #include "builtins.h" > #include "case-cfn-macros.h" > +#include "omp-offload.h" > +#include "internal-fn.h" > > static tree analyze_scalar_evolution_1 (class loop *, tree); > static tree analyze_scalar_evolution_for_address_of (class loop *loop, > @@ -550,11 +555,32 @@ get_scalar_evolution (basic_block instantiated_below, > tree scalar) > switch (TREE_CODE (scalar)) > { > case SSA_NAME: > + { > if (SSA_NAME_IS_DEFAULT_DEF (scalar)) > res = scalar; > else > - res = *find_var_scev_info (instantiated_below, scalar); > + { > + // TODO-kernels Should no longer be necessary, cf. > oacc_set_arg_evolutions > + res = *find_var_scev_info (instantiated_below, scalar); > + if (res) > + break; > + > + tree name = SSA_NAME_IDENTIFIER (scalar); > + > + if (name) > + { > + const char* id = IDENTIFIER_POINTER (name); > + if (strncmp (id, ".bound", 6) == 0 > + || strncmp (id, ".offset", 7) == 0 > + || strncmp (id, ".chunk_max", 11) == 0 > + || strncmp (id, ".chunk_no", 10) == 0 > + || strncmp (id, ".step", 5) == 0) > + res = scalar; > + } > + > + } > break; > + } > > case REAL_CST: > case FIXED_CST: > @@ -1115,6 +1141,7 @@ follow_ssa_edge_inner_loop_phi (class loop *outer_loop, > return follow_ssa_edge_expr (outer_loop, loop_phi_node, ev, halting_phi, > evolution_of_loop, limit); > } > +tree interpret_gimple_call (class loop *loop, gimple *call); > > /* Follow the ssa edge into the expression EXPR. > Return true if the strongly connected component has been found. */ > @@ -1125,7 +1152,9 @@ follow_ssa_edge_expr (class loop *loop, gimple > *at_stmt, tree expr, > int limit) > { > enum tree_code code; > - tree type, rhs0, rhs1 = NULL_TREE; > + tree type = NULL_TREE; > + tree rhs0 = NULL_TREE; > + tree rhs1 = NULL_TREE; > > /* The EXPR is one of the following cases: > - an SSA_NAME, > @@ -1189,26 +1218,36 @@ tail_recurse: > > /* At this level of abstraction, the program is just a set > of GIMPLE_ASSIGNs and PHI_NODEs. In principle there is no > - other def to be handled. */ > - if (!is_gimple_assign (def)) > - return t_false; > - > - code = gimple_assign_rhs_code (def); > - switch (get_gimple_rhs_class (code)) > + other def to be handled except for OpenACC internal function calls. > + */ > + if (is_gimple_assign (def)) { > + code = gimple_assign_rhs_code (def); > + switch (get_gimple_rhs_class (code)) > + { > + case GIMPLE_BINARY_RHS: > + rhs0 = gimple_assign_rhs1 (def); > + rhs1 = gimple_assign_rhs2 (def); > + break; > + case GIMPLE_UNARY_RHS: > + case GIMPLE_SINGLE_RHS: > + rhs0 = gimple_assign_rhs1 (def); > + break; > + default: > + return t_false; > + } > + type = TREE_TYPE (gimple_assign_lhs (def)); > + at_stmt = def; > + } > + else if (is_oacc_ifn_call_def (expr)) { > + rhs0 = interpret_gimple_call (loop, def); > + type = TREE_TYPE (gimple_call_lhs (def)); > + at_stmt = def; > + } > + else > { > - case GIMPLE_BINARY_RHS: > - rhs0 = gimple_assign_rhs1 (def); > - rhs1 = gimple_assign_rhs2 (def); > - break; > - case GIMPLE_UNARY_RHS: > - case GIMPLE_SINGLE_RHS: > - rhs0 = gimple_assign_rhs1 (def); > - break; > - default: > return t_false; > } > - type = TREE_TYPE (gimple_assign_lhs (def)); > - at_stmt = def; > + > } > else > { > @@ -1920,7 +1959,75 @@ interpret_gimple_assign (class loop *loop, gimple > *stmt) > gimple_assign_rhs2 (stmt)); > } > > - > +/* Extract loop information from a OpenACC internal function call. */ > +tree > +oacc_ifn_call_extract (gimple *call) { > + gcc_assert (gimple_call_internal_p (call, IFN_GOACC_LOOP)); > + > + enum ifn_goacc_loop_kind code > + = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, > 0)); > + > + tree expr; > + switch (code) > + { > + case IFN_GOACC_LOOP_STEP: > + { > + expr = gimple_call_arg (call, 3); > + break; > + } > + case IFN_GOACC_LOOP_CHUNKS: > + { > + expr = gimple_call_arg (call, 4); > + break; > + } > + case IFN_GOACC_LOOP_OFFSET: > + { > + expr = gimple_call_arg (call, 6); > + break; > + } > + case IFN_GOACC_LOOP_BOUND: > + { > + expr = gimple_call_arg (call, 2); > + break; > + } > + default: > + gcc_unreachable(); > + } > + > + gcc_assert (scev_is_linear_expression (expr)); > + return expr; > +} > + > +/* Interpret a gimple call statement. */ > +tree > +interpret_gimple_call (class loop *loop, gimple *call) > +{ > + /* Only IFN_GOACC_LOOP calls are handled here. > + SCEV computation for those calls is only really relevant > + for Graphite's execution on OpenACC functions in the host > + compiler. */ > + > +#ifndef ACCEL_COMPILER > + if (!gimple_call_internal_p (call, IFN_GOACC_LOOP)) > + return chrec_dont_know; > +#else > + return chrec_dont_know; > +#endif > + > + /* Information about OpenACC loops is encoded in internal function calls. > + Extract loop information from those calls, but ignore other calls. */ > + if (!gimple_call_internal_p (call, IFN_GOACC_LOOP)) > + return chrec_dont_know; > + > + tree expr = oacc_ifn_call_extract (call); > + tree analyzed = analyze_scalar_evolution (loop, expr); > + gcc_checking_assert (expr == analyzed); > + > + tree lhs = gimple_call_lhs (call); > + gcc_assert (lhs); > + > + return chrec_convert (TREE_TYPE (lhs), analyzed, call); > +} > > /* This section contains all the entry points: > - number_of_iterations_in_loop, > @@ -1969,6 +2076,10 @@ analyze_scalar_evolution_1 (class loop *loop, tree var) > res = interpret_gimple_assign (loop, def); > break; > > + case GIMPLE_CALL: > + res = interpret_gimple_call (loop, def); > + break; > + > case GIMPLE_PHI: > if (loop_phi_node_p (def)) > res = interpret_loop_phi (loop, as_a <gphi *> (def)); > @@ -2049,6 +2160,91 @@ analyze_scalar_evolution (class loop *loop, tree var) > return res; > } > > +/* Check if VAR represents a parameter of an OpenACC region in an > + offloaded function. That is, check that VAR's defining statement > + has the shape: > + > + VAR2 = *.omp_data_i(D).field > + VAR = *_VAR2 > + */ > +static bool > +is_oacc_arg (tree var) > +{ > + gimple* def = SSA_NAME_DEF_STMT (var); > + > + if (!def || !is_gimple_assign (def)) > + return false; > + > + tree rhs = gimple_assign_rhs1 (def); > + > + if (TREE_CODE (rhs) != MEM_REF) > + return false; > + > + tree ref = TREE_OPERAND (rhs, 0); > + > + if (TREE_CODE (ref) != SSA_NAME) > + return false; > + > + gimple* ref_def = SSA_NAME_DEF_STMT (ref); > + > + if (!ref_def || !is_gimple_assign (ref_def)) > + return false; > + > + rhs = gimple_assign_rhs1 (ref_def); > + if (TREE_CODE (rhs) != COMPONENT_REF) > + return false; > + > + tree base_ref = TREE_OPERAND (rhs, 0); > + > + if (TREE_CODE (base_ref) != MEM_REF) > + return false; > + > + tree base = TREE_OPERAND (base_ref, 0); > + > + if (!SSA_NAME_IDENTIFIER (base)) > + return false; > + > + char* base_id = > + const_cast<char*>(IDENTIFIER_POINTER (SSA_NAME_IDENTIFIER (base))); > + > + if (strncmp (base_id, ".omp_data_i", 11)) > + return false; > + > + return true; > +} > + > +/* Search for SSA_NAMEs which represent parameters of an offloaded > + OpenACC region and set their SCEV values to a parametric chrec > + containing the variable itself. > + > + We do not have a way to perform scalar evolution on the function > + from which an OpenACC outlined function was extracted while > + executing on the outlined function. Analysing those SSA_NAMEs > + would lead to chrec_dont_know because of the pointer indirection > + introduced by the outlining. We are better of treating the > + names as parameters. */ > + > +/* TODO Come up with a way to determine the scalar evolution > + in the original function */ > + > +void > +oacc_set_arg_evolutions () { > + unsigned i; > + tree var; > + > + FOR_EACH_SSA_NAME (i, var, cfun) > + { > + if (! is_oacc_arg (var)) > + continue; > + > + basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var)); > + > + tree *chrec = find_var_scev_info (bb, var); > + *chrec = var; > + } > + > +} > + > /* Analyzes and returns the scalar evolution of VAR address in LOOP. */ > > static tree > @@ -2261,6 +2457,15 @@ instantiate_scev_name (edge instantiate_below, > class loop *def_loop; > basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (chrec)); > > + /* Do not instantiate names which dereference ".omp_data_i" field. > + Cf. oacc_set_arg_evolutions. */ > + if (is_oacc_arg (chrec)) > + return chrec; > + > + if (is_oacc_loop_ifn_call_def (chrec)) > + return interpret_gimple_call (evolution_loop, SSA_NAME_DEF_STMT (chrec)); > + > + > /* A parameter, nothing to do. */ > if (!def_bb > || !dominated_by_p (CDI_DOMINATORS, def_bb, instantiate_below->dest)) > @@ -3221,10 +3426,14 @@ simple_iv_with_niters (class loop *wrto_loop, class > loop *use_loop, > || chrec_contains_symbols_defined_in_loop (ev, wrto_loop->num)) > return false; > > + tree ev_type = TREE_TYPE (ev); > + if (is_oacc_loop_ifn_call_def (ev)) > + type = integer_type_node; > + > if (tree_does_not_contain_chrecs (ev)) > { > iv->base = ev; > - iv->step = build_int_cst (TREE_TYPE (ev), 0); > + iv->step = build_int_cst (ev_type, 0); > iv->no_overflow = true; > return true; > } > @@ -3240,6 +3449,9 @@ simple_iv_with_niters (class loop *wrto_loop, class > loop *use_loop, > return false; > > iv->step = CHREC_RIGHT (ev); > + if (is_oacc_loop_ifn_call_def (iv->step)) > + iv->step = interpret_gimple_call (use_loop, SSA_NAME_DEF_STMT > (iv->step)); > + > if ((!allow_nonconstant_step && TREE_CODE (iv->step) != INTEGER_CST) > || tree_contains_chrecs (iv->step, NULL)) > return false; > @@ -3385,6 +3597,9 @@ expression_expensive_p (tree expr, hash_map<tree, > uint64_t> &cache, > return true; > } > > + if (is_oacc_ifn_call_def (expr)) > + return false; > + > bool visited_p; > uint64_t &local_cost = cache.get_or_insert (expr, &visited_p); > if (visited_p) > diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c > index 6c1268e84ad..0f3d7ce3e76 100644 > --- a/gcc/tree-ssa-loop-ivcanon.c > +++ b/gcc/tree-ssa-loop-ivcanon.c > @@ -64,6 +64,7 @@ along with GCC; see the file COPYING3. If not see > #include "builtins.h" > #include "tree-ssa-sccvn.h" > #include "dbgcnt.h" > +#include "omp-general.h" > > /* Specifies types of loops that may be unrolled. */ > > @@ -1256,7 +1257,13 @@ canonicalize_loop_induction_variables (class loop > *loop, > populates the loop bounds. */ > modified |= remove_redundant_iv_tests (loop); > > - if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul, > + /* Skip unrolling on OpenACC outlined functions. Those functions > + contain loops (e.g. the top loop for a region) that never iterate > + and that should not be removed. */ > + > + bool skip_unrolling = oacc_get_fn_attrib (cfun->decl); > + if (!skip_unrolling && > + try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul, > maxiter, locus, allow_peel)) > return true; > > diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c > index 7d61ef080eb..c54a0277670 100644 > --- a/gcc/tree-ssa-loop-niter.c > +++ b/gcc/tree-ssa-loop-niter.c > @@ -40,6 +40,7 @@ along with GCC; see the file COPYING3. If not see > #include "tree-ssa-loop.h" > #include "cfgloop.h" > #include "tree-chrec.h" > +#include "tree-chrec-oacc.h" > #include "tree-scalar-evolution.h" > #include "tree-dfa.h" > > @@ -1980,6 +1981,9 @@ simplify_replace_tree (tree expr, tree old, tree > new_tree, > return (ret ? (do_fold ? fold (ret) : ret) : expr); > } > > +tree oacc_ifn_call_extract (gimple*); > +tree interpret_gimple_call (class loop *loop, gimple *call); > + > /* Expand definitions of ssa names in EXPR as long as they are simple > enough, and return the new expression. If STOP is specified, stop > expanding if EXPR equals to it. */ > @@ -1995,6 +1999,12 @@ expand_simple_operations (tree expr, tree stop, > hash_map<tree, tree> &cache) > if (expr == NULL_TREE) > return expr; > > + if (is_oacc_ifn_call_def (expr)) > + { > + //expr = oacc_ifn_call_extract (SSA_NAME_DEF_STMT (expr)); > + expr = interpret_gimple_call (NULL, SSA_NAME_DEF_STMT (expr)); > + } > + > if (is_gimple_min_invariant (expr)) > return expr; > > @@ -2465,6 +2475,9 @@ number_of_iterations_exit_assumptions (class loop > *loop, edge exit, > if (iv0_niters && iv1_niters) > return false; > > + type = TREE_TYPE (iv0.step); > + > + > /* We don't want to see undefined signed overflow warnings while > computing the number of iterations. */ > fold_defer_overflow_warnings (); > -- > 2.17.1 > > ----------------- > Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany > Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, > Alexander Walter