On Thu, Nov 12, 2020 at 11:11 AM Frederik Harwath
<frede...@codesourcery.com> wrote:
>
>
> This patch enables the use of Graphite for the analysis of OpenACC
> "auto" loops. The goal is to decide if a loop may be parallelized
> (i.e. converted to an "independent" loop) or not.  Graphite and the
> functionality on which it relies (scalar evolution, data references) are
> extended to interpret the internal representation of OpenACC loop
> constructs that is encoded (e.g. through calls to OpenACC-specific
> internal functions) in the OpenACC outlined functions (".omp_fn") and to
> ignore some artifacts of the outlining process that are not relevant for
> the analysis the original loops (e.g. pointers introduced for the
> purpose of offloading are irrelevant to the question whether the
> original loops can be parallelized or not). This is done in a way that
> does not impact code which does not use OpenACC.  Furthermore, Graphite
> is extended by functionality that extends its applicability to
> real-world code (e.g. runtime alias checking).  The OpenACC lowering is
> extended to use the result of Graphite's analysis to assign
> "independent" clauses to loops.

I wonder if this can be split into a refactoring of graphite and adding
runtime alias capability and a part doing the OpenACC pieces.

Richard.

> ---
>  gcc/common.opt                                |   8 +
>  gcc/graphite-dependences.c                    |  12 +-
>  gcc/graphite-isl-ast-to-gimple.c              |  77 +-
>  gcc/graphite-oacc.h                           |  90 ++
>  gcc/graphite-scop-detection.c                 | 828 ++++++++++++++----
>  gcc/graphite-sese-to-poly.c                   |  26 +-
>  gcc/graphite.c                                | 403 ++++++++-
>  gcc/graphite.h                                |  11 +-
>  gcc/internal-fn.h                             |   7 +-
>  gcc/omp-expand.c                              |  26 +-
>  gcc/omp-offload.c                             | 173 +++-
>  gcc/predict.c                                 |   2 +-
>  .../graphite/alias-0-no-runtime-check.c       |  20 +
>  .../gcc.dg/graphite/alias-0-runtime-check.c   |  21 +
>  gcc/testsuite/gcc.dg/graphite/alias-1.c       |  22 +
>  gcc/tree-chrec-oacc.h                         |  45 +
>  gcc/tree-chrec.c                              |  16 +-
>  gcc/tree-data-ref.c                           | 112 ++-
>  gcc/tree-data-ref.h                           |   8 +-
>  gcc/tree-loop-distribution.c                  |  17 +-
>  gcc/tree-scalar-evolution.c                   | 257 +++++-
>  gcc/tree-ssa-loop-ivcanon.c                   |   9 +-
>  gcc/tree-ssa-loop-niter.c                     |  13 +
>  23 files changed, 1870 insertions(+), 333 deletions(-)
>  create mode 100644 gcc/graphite-oacc.h
>  create mode 100644 gcc/testsuite/gcc.dg/graphite/alias-0-no-runtime-check.c
>  create mode 100644 gcc/testsuite/gcc.dg/graphite/alias-0-runtime-check.c
>  create mode 100644 gcc/testsuite/gcc.dg/graphite/alias-1.c
>  create mode 100644 gcc/tree-chrec-oacc.h
>
> diff --git a/gcc/common.opt b/gcc/common.opt
> index dfed6ec76ba..caaeaa1aa6f 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -1600,6 +1600,14 @@ fgraphite-identity
>  Common Report Var(flag_graphite_identity) Optimization
>  Enable Graphite Identity transformation.
>
> +fgraphite-non-affine-accesses
> +Common Report Var(flag_graphite_non_affine_accesses) Init(0)
> +Allow Graphite to handle non-affine data accesses.
> +
> +fgraphite-runtime-alias-checks
> +Common Report Var(flag_graphite_runtime_alias_checks) Optimization Init(1)
> +Allow Graphite to add runtime alias checks to loops if aliasing cannot be 
> resolved statically.
> +
>  fhoist-adjacent-loads
>  Common Report Var(flag_hoist_adjacent_loads) Optimization
>  Enable hoisting adjacent loads to encourage generating conditional move
> diff --git a/gcc/graphite-dependences.c b/gcc/graphite-dependences.c
> index 7078c949800..76ba027cdf3 100644
> --- a/gcc/graphite-dependences.c
> +++ b/gcc/graphite-dependences.c
> @@ -82,7 +82,7 @@ scop_get_reads_and_writes (scop_p scop, isl_union_map 
> *&reads,
>           {
>             if (dump_file)
>               {
> -               fprintf (dump_file, "Adding read to depedence graph: ");
> +               fprintf (dump_file, "Adding read to dependence graph: ");
>                 print_pdr (dump_file, pdr);
>               }
>             isl_union_map *um
> @@ -90,7 +90,7 @@ scop_get_reads_and_writes (scop_p scop, isl_union_map 
> *&reads,
>             reads = isl_union_map_union (reads, um);
>             if (dump_file)
>               {
> -               fprintf (dump_file, "Reads depedence graph: ");
> +               fprintf (dump_file, "Reads dependence graph: ");
>                 print_isl_union_map (dump_file, reads);
>               }
>           }
> @@ -98,7 +98,7 @@ scop_get_reads_and_writes (scop_p scop, isl_union_map 
> *&reads,
>           {
>             if (dump_file)
>               {
> -               fprintf (dump_file, "Adding must write to depedence graph: ");
> +               fprintf (dump_file, "Adding must write to dependence graph: 
> ");
>                 print_pdr (dump_file, pdr);
>               }
>             isl_union_map *um
> @@ -106,7 +106,7 @@ scop_get_reads_and_writes (scop_p scop, isl_union_map 
> *&reads,
>             must_writes = isl_union_map_union (must_writes, um);
>             if (dump_file)
>               {
> -               fprintf (dump_file, "Must writes depedence graph: ");
> +               fprintf (dump_file, "Must writes dependence graph: ");
>                 print_isl_union_map (dump_file, must_writes);
>               }
>           }
> @@ -114,7 +114,7 @@ scop_get_reads_and_writes (scop_p scop, isl_union_map 
> *&reads,
>           {
>             if (dump_file)
>               {
> -               fprintf (dump_file, "Adding may write to depedence graph: ");
> +               fprintf (dump_file, "Adding may write to dependence graph: ");
>                 print_pdr (dump_file, pdr);
>               }
>             isl_union_map *um
> @@ -122,7 +122,7 @@ scop_get_reads_and_writes (scop_p scop, isl_union_map 
> *&reads,
>             may_writes = isl_union_map_union (may_writes, um);
>             if (dump_file)
>               {
> -               fprintf (dump_file, "May writes depedence graph: ");
> +               fprintf (dump_file, "May writes dependence graph: ");
>                 print_isl_union_map (dump_file, may_writes);
>               }
>           }
> diff --git a/gcc/graphite-isl-ast-to-gimple.c 
> b/gcc/graphite-isl-ast-to-gimple.c
> index ef93fda2233..98c61ff864e 100644
> --- a/gcc/graphite-isl-ast-to-gimple.c
> +++ b/gcc/graphite-isl-ast-to-gimple.c
> @@ -57,6 +57,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-ssa.h"
>  #include "tree-vectorizer.h"
>  #include "graphite.h"
> +#include "graphite-oacc.h"
>
>  struct ast_build_info
>  {
> @@ -635,12 +636,18 @@ translate_isl_ast_for_loop (loop_p context_loop,
>      redirect_edge_succ_nodup (next_e, after);
>    set_immediate_dominator (CDI_DOMINATORS, next_e->dest, next_e->src);
>
> -  if (flag_loop_parallelize_all)
> +  if (flag_loop_parallelize_all || oacc_function_p (cfun))
>      {
>        isl_id *id = isl_ast_node_get_annotation (node_for);
>        gcc_assert (id);
>        ast_build_info *for_info = (ast_build_info *) isl_id_get_user (id);
>        loop->can_be_parallel = for_info->is_parallelizable;
> +      if (dump_file && (dump_flags & TDF_DETAILS))
> +       {
> +         dump_user_location_t loc = find_loop_location (loop);
> +           dump_printf_loc (MSG_NOTE, loc, "loop can be parallel: %d \n",
> +                            loop->can_be_parallel);
> +       }
>        free (for_info);
>        isl_id_free (id);
>      }
> @@ -1027,7 +1034,7 @@ gsi_insert_earliest (gimple_seq seq)
>    basic_block begin_bb = get_entry_bb (codegen_region);
>
>    /* Inserting the gimple statements in a vector because gimple_seq behave
> -     in strage ways when inserting the stmts from it into different basic
> +     in strange ways when inserting the stmts from it into different basic
>       blocks one at a time.  */
>    auto_vec<gimple *, 3> stmts;
>    for (gimple_stmt_iterator gsi = gsi_start (seq); !gsi_end_p (gsi);
> @@ -1397,7 +1404,7 @@ scop_to_isl_ast (scop_p scop)
>      (isl_schedule_copy (scop->transformed_schedule), set_separate_option, 
> NULL);
>    isl_ast_build *context_isl = generate_isl_context (scop);
>
> -  if (flag_loop_parallelize_all)
> +  if (flag_loop_parallelize_all || oacc_function_p (cfun))
>      {
>        scop_get_dependences (scop);
>        context_isl =
> @@ -1464,6 +1471,42 @@ generate_entry_out_of_ssa_copies (edge false_entry,
>      }
>  }
>
> +/* Defined in tree-loop-distribution.c */
> +/* TODO Move this function to tree-data-ref.c? */
> +
> +void
> +compute_alias_check_pairs (class loop *loop, vec<ddr_p> *alias_ddrs,
> +                          vec<dr_with_seg_len_pair_t> *comp_alias_pairs);
> +
> +
> +/* Create a condition that evaluates to TRUE if some ALIAS_DDRS
> +   do alias. */
> +
> +static tree
> +generate_alias_cond (vec<ddr_p>& alias_ddrs, loop_p context_loop)
> +{
> +  gcc_checking_assert (flag_graphite_runtime_alias_checks
> +                      && alias_ddrs.length () > 0);
> +  gcc_checking_assert (context_loop);
> +
> +  auto_vec<dr_with_seg_len_pair_t> check_pairs;
> +  compute_alias_check_pairs (context_loop, &alias_ddrs, &check_pairs);
> +  gcc_checking_assert (check_pairs.length () > 0);
> +
> +  tree alias_cond = NULL_TREE;
> +  create_runtime_alias_checks (context_loop, &check_pairs, &alias_cond);
> +  gcc_checking_assert (alias_cond);
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file, "Generated runtime alias check: ");
> +      print_generic_expr (dump_file, alias_cond, dump_flags);
> +      fprintf (dump_file, "\n");
> +    }
> +
> +  return alias_cond;
> +}
> +
>  /* GIMPLE Loop Generator: generates loops in GIMPLE form for the given SCOP.
>     Return true if code generation succeeded.  */
>
> @@ -1504,12 +1547,38 @@ graphite_regenerate_ast_isl (scop_p scop)
>    region->if_region = if_region;
>
>    loop_p context_loop = region->region.entry->src->loop_father;
> +  gcc_checking_assert (context_loop);
>    edge e = single_succ_edge (if_region->true_region->region.entry->dest);
>    basic_block bb = split_edge (e);
>
>    /* Update the true_region exit edge.  */
>    region->if_region->true_region->region.exit = single_succ_edge (bb);
>
> +  if (flag_graphite_runtime_alias_checks
> +      && scop->unhandled_alias_ddrs.length () > 0)
> +    {
> +      /* SCoP detection has failed to handle the aliasing between some
> +        data-references of the SCoP statically. Generate an alias
> +        check that selects the newly generated version of the SCoP
> +        (in the true-branch of the conditional) if aliasing can be
> +        ruled out at runtime and the original version of the SCoP,
> +        otherwise. */
> +
> +      loop_p loop
> +       = find_common_loop (scop->scop_info->region.entry->dest->loop_father,
> +                           scop->scop_info->region.exit->src->loop_father);
> +
> +      tree cond = generate_alias_cond (scop->unhandled_alias_ddrs, loop);
> +      tree non_alias_cond = build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
> +      set_ifsese_condition (region->if_region, non_alias_cond);
> +      /* The loop nest is shared by all DDRs, cf. build_alias_set. */
> +      DDR_LOOP_NEST (scop->unhandled_alias_ddrs[0]).release ();
> +      free_dependence_relations (scop->unhandled_alias_ddrs);
> +    }
> +
> +  if (dump_file)
> +    fprintf (dump_file, "[codegen] isl AST to Gimple succeeded.\n");
> +
>    t.translate_isl_ast (context_loop, root_node, e, ip);
>    if (! t.codegen_error_p ())
>      {
> @@ -1520,8 +1589,6 @@ graphite_regenerate_ast_isl (scop_p scop)
>                                      if_region->region->region.exit->src,
>                                      if_region->false_region->region.exit,
>                                      if_region->true_region->region.exit);
> -      if (dump_file)
> -       fprintf (dump_file, "[codegen] isl AST to Gimple succeeded.\n");
>      }
>
>    if (t.codegen_error_p ())
> diff --git a/gcc/graphite-oacc.h b/gcc/graphite-oacc.h
> new file mode 100644
> index 00000000000..5978f428974
> --- /dev/null
> +++ b/gcc/graphite-oacc.h
> @@ -0,0 +1,90 @@
> +/* Graphite OpenACC helpers
> +   Copyright (C) 2006-2020 Free Software Foundation, Inc.
> +   Contributed by Sebastian Pop <sebastian....@inria.fr>.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify
> +it under the terms of the GNU General Public License as published by
> +the Free Software Foundation; either version 3, or (at your option)
> +any later version.
> +
> +GCC is distributed in the hope that it will be useful,
> +but WITHOUT ANY WARRANTY; without even the implied warranty of
> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +GNU General Public License for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +<http://www.gnu.org/licenses/>.  */
> +
> +/* This pass converts GIMPLE to GRAPHITE, performs some loop
> +   transformations and then converts the resulting representation back
> +   to GIMPLE.
> +
> +   An early description of this pass can be found in the GCC Summit'06
> +   paper "GRAPHITE: Polyhedral Analyses and Optimizations for GCC".
> +   The wiki page http://gcc.gnu.org/wiki/Graphite contains pointers to
> +   the related work.  */
> +
> +#include "omp-general.h"
> +#include "attribs.h"
> +
> +static inline bool oacc_function_p (function *fun)
> +{
> +  return oacc_get_fn_attrib (fun->decl);
> +}
> +
> +/* Represents a field of the ".omp_data_i" argument of
> +   an outlined OpenACC function. Each such field
> +   is used to pass a unique variable from the function
> +   that originally contained the loop to the outlined
> +   function. */
> +
> +struct oacc_data_field {
> +  /* The variable of the source function that
> +     gets passed through this field.  */
> +  tree src_var;
> +
> +  /* The variable that holds the dereferenced value of the
> +     field. This might be left NULL for reduction variables. */
> +  // TODO-kernels Should we also set this for reduction variables?
> +  // This seems to be unnecessary since we do not create data-refs
> +  // for reduction variables.
> +
> +  tree tgt_var;
> +};
> +
> +class oacc_omp_data
> +{
> +private:
> +  hash_map<tree, oacc_data_field> field_map;
> +
> +  void gather_assignments (struct function *fn);
> +  tree get_accessed_field (tree t);
> +public:
> +  tree src_fn_arg;
> +  tree tgt_fn_arg;
> +  gimple* src_fn_def;
> +  // TODO-kernels This belongs into the oacc_context
> +  loop_p loop;
> +  static oacc_omp_data construct (struct function* fn);
> +  tree redirect_data_ref (tree ref);
> +};
> +
> +class oacc_context {
> +public:
> +  oacc_omp_data omp_data;
> +
> +private:
> +  bool valid;
> +  oacc_context () : omp_data (), valid (false) {}
> +  oacc_context (oacc_omp_data omp_data) :
> +                   omp_data (omp_data),
> +                   valid (true) {}
> +public:
> +  static oacc_context build_context ();
> +  static oacc_context invalid_context () { return oacc_context (); }
> +
> +  bool is_valid () { return valid; }
> +};
> diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c
> index 75f81227f8a..ccdf3aa4d5b 100644
> --- a/gcc/graphite-scop-detection.c
> +++ b/gcc/graphite-scop-detection.c
> @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-into-ssa.h"
>  #include "tree-ssa.h"
>  #include "cfgloop.h"
> +#include "tree-chrec-oacc.h"
>  #include "tree-data-ref.h"
>  #include "tree-scalar-evolution.h"
>  #include "tree-pass.h"
> @@ -49,6 +50,9 @@ along with GCC; see the file COPYING3.  If not see
>  #include "gimple-pretty-print.h"
>  #include "cfganal.h"
>  #include "graphite.h"
> +#include "omp-general.h"
> +#include "graphite-oacc.h"
> +#include "print-tree.h"
>
>  class debug_printer
>  {
> @@ -69,12 +73,27 @@ public:
>      fprintf (output.dump_file, "%d", i);
>      return output;
>    }
> +
>    friend debug_printer &
>    operator<< (debug_printer &output, const char *s)
>    {
>      fprintf (output.dump_file, "%s", s);
>      return output;
>    }
> +
> +  friend debug_printer &
> +  operator<< (debug_printer &output, gimple* stmt)
> +  {
> +    print_gimple_stmt (output.dump_file, stmt, 0, TDF_VOPS | TDF_MEMSYMS);
> +    return output;
> +  }
> +
> +  friend debug_printer &
> +  operator<< (debug_printer &output, tree t)
> +  {
> +    print_generic_expr (output.dump_file, t, TDF_SLIM);
> +    return output;
> +  }
>  } dp;
>
>  #define DEBUG_PRINT(args) do \
> @@ -286,7 +305,8 @@ namespace
>  class scop_detection
>  {
>  public:
> -  scop_detection () : scops (vNULL) {}
> +  scop_detection (oacc_context *oacc_ctx)
> +    : scops (vNULL), oacc_ctx (oacc_ctx) {}
>
>    ~scop_detection ()
>    {
> @@ -354,24 +374,6 @@ public:
>    bool stmt_simple_for_scop_p (sese_l scop, gimple *stmt,
>                                basic_block bb) const;
>
> -  /* Something like "n * m" is not allowed.  */
> -
> -  static bool graphite_can_represent_init (tree e);
> -
> -  /* Return true when SCEV can be represented in the polyhedral model.
> -
> -     An expression can be represented, if it can be expressed as an
> -     affine expression.  For loops (i, j) and parameters (m, n) all
> -     affine expressions are of the form:
> -
> -     x1 * i + x2 * j + x3 * m + x4 * n + x5 * 1 where x1..x5 element of Z
> -
> -     1 i + 20 j + (-2) m + 25
> -
> -     Something like "i * n" or "n * m" is not allowed.  */
> -
> -  static bool graphite_can_represent_scev (sese_l scop, tree scev);
> -
>    /* Return true when EXPR can be represented in the polyhedral model.
>
>       This means an expression can be represented, if it is linear with 
> respect
> @@ -382,9 +384,9 @@ public:
>                                            tree expr);
>
>    /* Return true if the data references of STMT can be represented by 
> Graphite.
> -     We try to analyze the data references in a loop contained in the SCOP.  
> */
> +     We try to analyze the data references in a loop contained in the SCOP. 
> */
>
> -  static bool stmt_has_simple_data_refs_p (sese_l scop, gimple *stmt);
> +  static bool stmt_has_simple_data_refs_p (sese_l scop, gimple *stmt, 
> oacc_context *oacc_ctx);
>
>    /* Remove the close phi node at GSI and replace its rhs with the rhs
>       of PHI.  */
> @@ -403,6 +405,7 @@ public:
>
>  private:
>    vec<sese_l> scops;
> +  oacc_context *oacc_ctx;
>  };
>
>  sese_l scop_detection::invalid_sese (NULL, NULL);
> @@ -560,14 +563,58 @@ scop_detection::can_represent_loop (loop_p loop, sese_l 
> scop)
>        || !single_pred_p (loop->latch)
>        || exit->src != single_pred (loop->latch)
>        || !empty_block_p (loop->latch))
> -    return false;
> +    {
> +      DEBUG_PRINT (dp << "[can_represent_loop-fail] Loop shape invalid.\n");
> +      return false;
> +
> +    }
> +
> +  bool edge_irreducible = loop_preheader_edge (loop)->flags & 
> EDGE_IRREDUCIBLE_LOOP;
> +  if (edge_irreducible)
> +    {
> +      DEBUG_PRINT (dp << "[can_represent_loop-fail] Loop is not a natural 
> loop.\n");
> +      return false;
> +    }
> +
> +  bool niter_is_unconditional =
> +    number_of_iterations_exit (loop, single_exit (loop), &niter_desc, false);
> +  if (!niter_is_unconditional)
> +    {
> +      DEBUG_PRINT (dp << "[can_represent_loop-fail] Loop niter not 
> unconditional.\n");
> +      return false;
> +    }
> +
> +  if (!niter_desc.control.no_overflow)
> +    {
> +      DEBUG_PRINT (dp << "[can_represent_loop-fail] Loop niter can 
> overflow.\n");
> +      return false;
> +    }
> +
> +  niter = number_of_latch_executions (loop);
> +  if (!niter)
> +    {
> +      DEBUG_PRINT (dp << "[can_represent_loop-fail] Loop niter unknown.\n");
> +      return false;
> +    }
> +
> +  bool undetermined_coefficients = chrec_contains_undetermined (niter);
> +  if (undetermined_coefficients)
> +    {
> +      DEBUG_PRINT (dp << "[can_represent_loop-fail] "
> +                  << "Loop niter chrec contains undetermined 
> coefficients.\n");
> +      return false;
> +    }
>
> -  return !(loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP)
> -    && number_of_iterations_exit (loop, single_exit (loop), &niter_desc, 
> false)
> -    && niter_desc.control.no_overflow
> -    && (niter = number_of_latch_executions (loop))
> -    && !chrec_contains_undetermined (niter)
> -    && graphite_can_represent_expr (scop, loop, niter);
> +  bool can_represent_expr = graphite_can_represent_expr (scop, loop, niter);
> +  if (!can_represent_expr)
> +    {
> +      DEBUG_PRINT (dp << "[can_represent_loop-fail] "
> +                  << "Loop niter expression cannot be represented: "
> +                  << niter << "\n");
> +      return false;
> +    }
> +
> +  return true;
>  }
>
>  /* Return true when BEGIN is the preheader edge of a loop with a single exit
> @@ -615,13 +662,12 @@ scop_detection::add_scop (sese_l s)
>        s.exit = single_succ_edge (s.exit->dest);
>      }
>
> -  /* Do not add scops with only one loop.  */
> -  if (region_has_one_loop (s))
> -    {
> -      DEBUG_PRINT (dp << "[scop-detection-fail] Discarding one loop SCoP: ";
> -                  print_sese (dump_file, s));
> -      return;
> -    }
> +  if (!oacc_function_p (cfun) && region_has_one_loop (s))
> +     {
> +       DEBUG_PRINT (dp << "[scop-detection-fail] Discarding one loop SCoP: ";
> +                  print_sese (dump_file, s));
> +       return;
> +     }
>
>    if (get_exit_bb (s) == EXIT_BLOCK_PTR_FOR_FN (cfun))
>      {
> @@ -805,140 +851,87 @@ scop_detection::remove_intersecting_scops (sese_l s1)
>      }
>  }
>
> -/* Something like "n * m" is not allowed.  */
> +/* Return true when EXPR can be represented in the polyhedral model.
> +
> +   This means an expression can be represented, if it is linear with respect 
> to
> +   the loops and the strides are non parametric.  LOOP is the place where the
> +   expr will be evaluated.  SCOP defines the region we analyse.  */
>
>  bool
> -scop_detection::graphite_can_represent_init (tree e)
> +scop_detection::graphite_can_represent_expr (sese_l scop, loop_p loop,
> +                                            tree expr)
>  {
> -  switch (TREE_CODE (e))
> +  if (TREE_CODE (expr) == SSA_NAME)
>      {
> -    case POLYNOMIAL_CHREC:
> -      return graphite_can_represent_init (CHREC_LEFT (e))
> -       && graphite_can_represent_init (CHREC_RIGHT (e));
> -
> -    case MULT_EXPR:
> -      if (chrec_contains_symbols (TREE_OPERAND (e, 0)))
> -       return graphite_can_represent_init (TREE_OPERAND (e, 0))
> -         && tree_fits_shwi_p (TREE_OPERAND (e, 1));
> -      else
> -       return graphite_can_represent_init (TREE_OPERAND (e, 1))
> -         && tree_fits_shwi_p (TREE_OPERAND (e, 0));
> +      tree name = SSA_NAME_IDENTIFIER (expr);
>
> -    case PLUS_EXPR:
> -    case POINTER_PLUS_EXPR:
> -    case MINUS_EXPR:
> -      return graphite_can_represent_init (TREE_OPERAND (e, 0))
> -       && graphite_can_represent_init (TREE_OPERAND (e, 1));
> -
> -    case NEGATE_EXPR:
> -    case BIT_NOT_EXPR:
> -    CASE_CONVERT:
> -    case NON_LVALUE_EXPR:
> -      return graphite_can_represent_init (TREE_OPERAND (e, 0));
> -
> -    default:
> -      break;
> +      if (name)
> +       {
> +         const char* id = IDENTIFIER_POINTER (name);
> +         if (strncmp (id, ".bound", 6) == 0
> +             || strncmp (id, ".offset", 7) == 0
> +             || strncmp (id, ".chunk_max", 11) == 0
> +             || strncmp (id, ".chunk_no", 10) == 0
> +             || strncmp (id, ".step", 5) == 0)
> +           return true;
> +       }
>      }
> +  tree scev = cached_scalar_evolution_in_region (scop, loop, expr);
> +  bool can_represent = graphite_can_represent_scev (scop, scev);
>
> -  return true;
> -}
> -
> -/* Return true when SCEV can be represented in the polyhedral model.
> -
> -   An expression can be represented, if it can be expressed as an
> -   affine expression.  For loops (i, j) and parameters (m, n) all
> -   affine expressions are of the form:
> -
> -   x1 * i + x2 * j + x3 * m + x4 * n + x5 * 1 where x1..x5 element of Z
> -
> -   1 i + 20 j + (-2) m + 25
> -
> -   Something like "i * n" or "n * m" is not allowed.  */
> -
> -bool
> -scop_detection::graphite_can_represent_scev (sese_l scop, tree scev)
> -{
> -  if (chrec_contains_undetermined (scev))
> -    return false;
> -
> -  switch (TREE_CODE (scev))
> +  if (!can_represent)
>      {
> -    case NEGATE_EXPR:
> -    case BIT_NOT_EXPR:
> -    CASE_CONVERT:
> -    case NON_LVALUE_EXPR:
> -      return graphite_can_represent_scev (scop, TREE_OPERAND (scev, 0));
> -
> -    case PLUS_EXPR:
> -    case POINTER_PLUS_EXPR:
> -    case MINUS_EXPR:
> -      return graphite_can_represent_scev (scop, TREE_OPERAND (scev, 0))
> -       && graphite_can_represent_scev (scop, TREE_OPERAND (scev, 1));
> -
> -    case MULT_EXPR:
> -      return !CONVERT_EXPR_CODE_P (TREE_CODE (TREE_OPERAND (scev, 0)))
> -       && !CONVERT_EXPR_CODE_P (TREE_CODE (TREE_OPERAND (scev, 1)))
> -       && !(chrec_contains_symbols (TREE_OPERAND (scev, 0))
> -            && chrec_contains_symbols (TREE_OPERAND (scev, 1)))
> -       && graphite_can_represent_init (scev)
> -       && graphite_can_represent_scev (scop, TREE_OPERAND (scev, 0))
> -       && graphite_can_represent_scev (scop, TREE_OPERAND (scev, 1));
> -
> -    case POLYNOMIAL_CHREC:
> -      /* Check for constant strides.  With a non constant stride of
> -        'n' we would have a value of 'iv * n'.  Also check that the
> -        initial value can represented: for example 'n * m' cannot be
> -        represented.  */
> -      gcc_assert (loop_in_sese_p (get_loop (cfun,
> -                                           CHREC_VARIABLE (scev)), scop));
> -      if (!evolution_function_right_is_integer_cst (scev)
> -         || !graphite_can_represent_init (scev))
> -       return false;
> -      return graphite_can_represent_scev (scop, CHREC_LEFT (scev));
> -
> -    case ADDR_EXPR:
> -      /* We cannot encode addresses for ISL.  */
> -      return false;
> -
> -    default:
> -      break;
> +      if (dump_file)
> +       {
> +         fprintf (dump_file, "[graphite_can_represent_expr] Cannot represent 
> scev ");
> +         print_generic_expr (dump_file, scev, TDF_SLIM);
> +         fprintf (dump_file, " of expression ");
> +         print_generic_expr (dump_file, expr, TDF_SLIM);
> +         fprintf (dump_file, "\n");
> +       }
>      }
> -
> -  /* Only affine functions can be represented.  */
> -  if (tree_contains_chrecs (scev, NULL) || !scev_is_linear_expression (scev))
> -    return false;
> -
> -  return true;
> +  return can_represent;
>  }
>
> -/* Return true when EXPR can be represented in the polyhedral model.
> -
> -   This means an expression can be represented, if it is linear with respect 
> to
> -   the loops and the strides are non parametric.  LOOP is the place where the
> -   expr will be evaluated.  SCOP defines the region we analyse.  */
> +/* Check if STMT is a internal OpenACC function call that should be
> +   ignored when Graphite checks side effects and data references. */
>
> -bool
> -scop_detection::graphite_can_represent_expr (sese_l scop, loop_p loop,
> -                                            tree expr)
> -{
> -  tree scev = cached_scalar_evolution_in_region (scop, loop, expr);
> -  return graphite_can_represent_scev (scop, scev);
> +static inline bool
> +oacc_ignore_internal_stmt (gimple *stmt) {
> +  return is_gimple_call (stmt) &&
> +    (gimple_call_internal_p (stmt, IFN_UNIQUE)
> +     || gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION));
>  }
>
>  /* Return true if the data references of STMT can be represented by Graphite.
>     We try to analyze the data references in a loop contained in the SCOP.  */
>
>  bool
> -scop_detection::stmt_has_simple_data_refs_p (sese_l scop, gimple *stmt)
> +scop_detection::stmt_has_simple_data_refs_p (sese_l scop, gimple *stmt, 
> oacc_context *oacc_ctx)
>  {
> +  if (oacc_ignore_internal_stmt (stmt))
> +    return true;
> +
>    edge nest = scop.entry;
>    loop_p loop = loop_containing_stmt (stmt);
>    if (!loop_in_sese_p (loop, scop))
>      loop = NULL;
>
> +  bool allow_non_affine_base = flag_graphite_non_affine_accesses;
>    auto_vec<data_reference_p> drs;
> -  if (! graphite_find_data_references_in_stmt (nest, loop, stmt, &drs))
> -    return false;
> +  if (! graphite_find_data_references_in_stmt (nest, loop, stmt, &drs,
> +                                              oacc_ctx, 
> allow_non_affine_base))
> +    {
> +      DEBUG_PRINT (dp << "[stmt_has_simple_data_refs_p] Unanalyzable 
> statement.\n");
> +      return false;
> +    }
> +
> +  /* This flag means that we allow Graphite to overapproximate the
> +     range of data references. Consequently, we do not need to check
> +     if Graphite can actually represent the access functions'
> +     SCEVs. */
> +  if (flag_graphite_non_affine_accesses)
> +    return true;
>
>    int j;
>    data_reference_p dr;
> @@ -946,7 +939,10 @@ scop_detection::stmt_has_simple_data_refs_p (sese_l 
> scop, gimple *stmt)
>      {
>        for (unsigned i = 0; i < DR_NUM_DIMENSIONS (dr); ++i)
>         if (! graphite_can_represent_scev (scop, DR_ACCESS_FN (dr, i)))
> -         return false;
> +         {
> +           DEBUG_PRINT (dp << "[stmt_has_simple_data_refs_p] Cannot 
> represent access function SCEV: " << DR_ACCESS_FN (dr, i) << "\n");
> +           return false;
> +         }
>      }
>
>    return true;
> @@ -959,6 +955,9 @@ scop_detection::stmt_has_simple_data_refs_p (sese_l scop, 
> gimple *stmt)
>  static bool
>  stmt_has_side_effects (gimple *stmt)
>  {
> +  if (oacc_ignore_internal_stmt (stmt))
> +    return false;
> +
>    if (gimple_has_volatile_ops (stmt)
>        || (gimple_code (stmt) == GIMPLE_CALL
>           && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
> @@ -990,11 +989,16 @@ scop_detection::stmt_simple_for_scop_p (sese_l scop, 
> gimple *stmt,
>    if (stmt_has_side_effects (stmt))
>      return false;
>
> -  if (!stmt_has_simple_data_refs_p (scop, stmt))
> +  if (!stmt_has_simple_data_refs_p (scop, stmt, oacc_ctx))
>      {
> -      DEBUG_PRINT (dp << "[scop-detection-fail] "
> -                     << "Graphite cannot handle data-refs in stmt:\n";
> -       print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS|TDF_MEMSYMS););
> +      DEBUG_PRINT (dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmt,
> +                                   "[scop-detection-fail] Graphite cannot "
> +                                   "handle data-refs-in-stmt: ");
> +                  print_gimple_stmt (dump_file, stmt, 0,
> +                                     TDF_VOPS | TDF_MEMSYMS);
> +                  fprintf (dump_file, "\n");
> +
> +                  );
>        return false;
>      }
>
> @@ -1027,14 +1031,22 @@ scop_detection::stmt_simple_for_scop_p (sese_l scop, 
> gimple *stmt,
>         for (unsigned i = 0; i < 2; ++i)
>           {
>             tree op = gimple_op (stmt, i);
> -           if (!graphite_can_represent_expr (scop, loop, op)
> -               /* We can only constrain on integer type.  */
> -               || ! INTEGRAL_TYPE_P (TREE_TYPE (op)))
> +           if (!graphite_can_represent_expr (scop, loop, op))
> +             {
> +               DEBUG_PRINT (dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmt,
> +                                             "[scop-detection-fail] "
> +                                             "Graphite cannot represent cond 
> "
> +                                             "stmt operator expression.\n"));
> +               return false;
> +             }
> +
> +             if (! INTEGRAL_TYPE_P (TREE_TYPE (op)))
>               {
> -               DEBUG_PRINT (dp << "[scop-detection-fail] "
> -                               << "Graphite cannot represent stmt:\n";
> -                            print_gimple_stmt (dump_file, stmt, 0,
> -                                               TDF_VOPS | TDF_MEMSYMS));
> +               DEBUG_PRINT (dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmt,
> +                                             "[scop-detection-fail] "
> +                                             "Graphite cannot represent cond 
> "
> +                                             "statement operator. "
> +                                             "Type must be integral.\n"));
>                 return false;
>               }
>           }
> @@ -1151,6 +1163,7 @@ scan_tree_for_params (sese_info_p s, tree e)
>        break;
>
>      case SSA_NAME:
> +      //TODO-kernels Stop treating some OpenACC ifn calls as parameters 
> (reductions?)
>        assign_parameter_index_in_region (e, s);
>        break;
>
> @@ -1288,11 +1301,99 @@ build_cross_bb_scalars_use (scop_p scop, tree use, 
> gimple *use_stmt,
>      add_read (reads, use, use_stmt);
>  }
>
> +/* This class keeps track of the variables that occur in active
> +   OpenACC reductions while walking a function's basic blocks during
> +   SCoP detection.  The UPDATE method processes calls to the OpenACC
> +   internal functions which mark the beginning and the end of the use
> +   of a reduction variable.  It adjusts an internal reference count
> +   that is maintained for each such variable accordingly (i.e. number
> +   of reductions using a SSA_NAME with the variable name). */
> +
> +class oacc_reductions {
> + public:
> +  bool is_reduction_var (const tree var);
> +  void update (const gimple* oacc_reduction_call);
> +  void update (const basic_block);
> + private:
> +  hash_map<tree, unsigned> reductions;
> +};
> +
> +/* Check if the DEF is a SSA_NAME for a variable that occurs in an
> +   active reduction. */
> +
> +bool oacc_reductions::is_reduction_var (const tree def) {
> +  if (TREE_CODE (def) != SSA_NAME)
> +    return false;
> +
> +  tree var = SSA_NAME_VAR (def);
> +  if (var == NULL_TREE)
> +    return false;
> +
> +  return reductions.get (var);
> +}
> +
> +/* Update the internal reference count for the variable used by the
> +   OACC_REDUCTION_CALL if it starts or ends a reduction.  */
> +
> +void oacc_reductions::update (const gimple* oacc_reduction_call)
> +{
> +  const gcall* call = GIMPLE_CHECK2<const gcall *> (oacc_reduction_call);
> +  unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
> +
> +  if (!gimple_has_lhs (call))
> +    return;
> +
> +  tree var = SSA_NAME_VAR (gimple_call_lhs (call));
> +
> +  if (var == NULL)
> +    return;
> +
> +  switch (code)
> +    {
> +    case IFN_GOACC_REDUCTION_SETUP:
> +      {
> +        unsigned& ref_count = reductions.get_or_insert (var);
> +       ref_count++;
> +
> +       break;
> +      }
> +    case IFN_GOACC_REDUCTION_FINI:
> +      {
> +       unsigned* ref_count = reductions.get (var);
> +       gcc_checking_assert (ref_count != NULL && *ref_count > 0);
> +       ref_count--;
> +
> +       if (ref_count == 0)
> +         reductions.remove (var);
> +
> +       break;
> +      }
> +
> +    default:
> +      break;
> +    }
> +}
> +
> +void oacc_reductions::update (const basic_block bb)
> +{
> +  for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
> +       gsi_next (&gsi))
> +    {
> +      gimple *stmt = gsi_stmt (gsi);
> +      if (gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION))
> +       update (stmt);
> +    }
> +}
> +
> +
> +
>  /* Generates a polyhedral black box only if the bb contains interesting
>     information.  */
>
>  static gimple_poly_bb_p
> -try_generate_gimple_bb (scop_p scop, basic_block bb)
> +try_generate_gimple_bb (scop_p scop, basic_block bb,
> +                       oacc_reductions& oacc_reductions,
> +                       __attribute__ ((unused)) oacc_context* oacc_ctx)
>  {
>    vec<data_reference_p> drs = vNULL;
>    vec<tree> writes = vNULL;
> @@ -1304,6 +1405,7 @@ try_generate_gimple_bb (scop_p scop, basic_block bb)
>    if (!loop_in_sese_p (loop, region))
>      loop = NULL;
>
> +  bool allow_non_affine_base = flag_graphite_non_affine_accesses;
>    for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
>         gsi_next (&gsi))
>      {
> @@ -1311,16 +1413,27 @@ try_generate_gimple_bb (scop_p scop, basic_block bb)
>        if (is_gimple_debug (stmt))
>         continue;
>
> -      graphite_find_data_references_in_stmt (nest, loop, stmt, &drs);
> +      graphite_find_data_references_in_stmt (nest, loop, stmt, &drs,
> +                                            oacc_ctx, allow_non_affine_base);
>
>        tree def = gimple_get_lhs (stmt);
> -      if (def)
> +      if (def
> +         /* When analyzing the outlined function for an OpenACC
> +            region, no dependencies on reduction variables should be
> +            generated.  Those variables must be ignored when deciding
> +            if a loop can be parallel. */
> +         && !oacc_reductions.is_reduction_var (def))
>         build_cross_bb_scalars_def (scop, def, gimple_bb (stmt), &writes);
>
>        ssa_op_iter iter;
>        tree use;
>        FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
> -       build_cross_bb_scalars_use (scop, use, stmt, &reads);
> +       {
> +         if (oacc_reductions.is_reduction_var (use))
> +           continue;
> +
> +         build_cross_bb_scalars_use (scop, use, stmt, &reads);
> +       }
>      }
>
>    /* Handle defs and uses in PHIs.  Those need special treatment given
> @@ -1332,7 +1445,8 @@ try_generate_gimple_bb (scop_p scop, basic_block bb)
>        gphi *phi = psi.phi ();
>        tree res = gimple_phi_result (phi);
>        if (virtual_operand_p (res)
> -         || scev_analyzable_p (res, scop->scop_info->region))
> +         || scev_analyzable_p (res, scop->scop_info->region)
> +         || oacc_reductions.is_reduction_var (res))
>         continue;
>        /* To simulate out-of-SSA the block containing the PHI node has
>           reads of the PHI destination.  And to preserve SSA dependences
> @@ -1362,13 +1476,15 @@ try_generate_gimple_bb (scop_p scop, basic_block bb)
>                 continue;
>               /* To simulate out-of-SSA the predecessor of edges into PHI 
> nodes
>                  has a copy from the PHI argument to the PHI destination.  */
> -             if (! scev_analyzable_p (res, scop->scop_info->region))
> +             if (! scev_analyzable_p (res, scop->scop_info->region)
> +                 && ! oacc_reductions.is_reduction_var (res))
>                 add_write (&writes, res);
>               tree use = PHI_ARG_DEF_FROM_EDGE (phi, e);
>               if (TREE_CODE (use) == SSA_NAME
>                   && ! SSA_NAME_IS_DEFAULT_DEF (use)
>                   && gimple_bb (SSA_NAME_DEF_STMT (use)) != bb_for_succs
> -                 && ! scev_analyzable_p (use, scop->scop_info->region))
> +                 && ! scev_analyzable_p (use, scop->scop_info->region)
> +                 && ! oacc_reductions.is_reduction_var (use))
>                 add_read (&reads, use, phi);
>             }
>           if (e->dest == bb_for_succs->loop_father->latch
> @@ -1402,40 +1518,316 @@ try_generate_gimple_bb (scop_p scop, basic_block bb)
>    return new_gimple_poly_bb (bb, drs, reads, writes);
>  }
>
> +bool oacc_is_omp_data_use (oacc_context *ctx, data_reference_p dr)
> +{
> +  tree t;
> +
> +  switch (TREE_CODE (dr->ref))
> +    {
> +    case COMPONENT_REF:
> +      {
> +       tree referenced = TREE_OPERAND (dr->ref, 0);
> +       tree target = TREE_OPERAND (referenced, 0);
> +
> +       if (TREE_CODE(target) != SSA_NAME ||
> +           SSA_NAME_VAR(target) == NULL_TREE)
> +         return false;
> +
> +       if (SSA_NAME_VAR (target) == ctx->omp_data.tgt_fn_arg)
> +         return true;
> +      }
> +
> +    default:
> +      return false;
> +    }
> +
> +  return false;
> +}
> +
> +/* Aliasing involving the pointers contained in the ".omp_data_i"
> +   struct can be safely ignored.  We are analysing the behavior of a
> +   loop nest with respect to the original function and those pointers
> +   are artifacts of the outlining process. */
> +
> +bool oacc_ignore_alias (oacc_context *ctx, data_reference_p dr1, 
> data_reference_p dr2)
> +{
> +  if (oacc_is_omp_data_use (ctx, dr1)
> +      || oacc_is_omp_data_use (ctx, dr2))
> +    return true;
> +
> +  return false;
> +}
> +
> +/* Returns true if expression EXPR is defined between ENTRY and
> +   EXIT.  */
> +
> +static bool
> +def_in_region_p (edge entry, edge exit, tree expr)
> +{
> +  basic_block entry_bb = entry->dest;
> +  basic_block exit_bb = exit->dest;
> +  basic_block def_bb;
> +
> +  if (! expr)
> +    return false;
> +
> +  if (is_gimple_min_invariant (expr))
> +    return false;
> +
> +  if (TREE_CODE (expr) == SSA_NAME)
> +    {
> +      def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
> +      if (!def_bb)
> +       return false;
> +
> +      if (dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
> +         && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
> +       return true;
> +
> +      return false;
> +    }
> +  else {
> +    for (unsigned i = 0; i < TREE_OPERAND_LENGTH (expr); i++)
> +      if (def_in_region_p (entry, exit, TREE_OPERAND (expr, i)))
> +       return true;
> +  }
> +
> +  return false;
> +}
> +
> +static bool
> +scev_defs_outside_region_p (edge entry, edge exit, tree scev)
> +{
> +  if (chrec_contains_undetermined (scev))
> +    return false;
> +
> +  switch (TREE_CODE (scev))
> +    {
> +    case NEGATE_EXPR:
> +    case BIT_NOT_EXPR:
> +    case NON_LVALUE_EXPR:
> +    case ADDR_EXPR:
> +      return scev_defs_outside_region_p (entry, exit, TREE_OPERAND (scev, 
> 0));
> +
> +    case PLUS_EXPR:
> +    case POINTER_PLUS_EXPR:
> +    case MINUS_EXPR:
> +    case MULT_EXPR:
> +    case POLYNOMIAL_CHREC:
> +      return scev_defs_outside_region_p (entry, exit, TREE_OPERAND (scev, 0))
> +       && scev_defs_outside_region_p (entry, exit, TREE_OPERAND (scev, 1));
> +
> +    default:
> +      break;
> +    }
> +
> +  return ! def_in_region_p (entry, exit, scev);
> +}
> +
> +/* Checks if all parts of the DRI are defined outside of the region
> +   surrounded by the given edges which allows an alias check involving
> +   DRI to be placed before the region. */
> +
> +static bool
> +dr_defs_outside_region (edge entry, edge exit, dr_info *dri)
> +{
> +  data_reference_p dr = dri->dr;
> +  tree base = DR_BASE_OBJECT (dr);
> +  if (def_in_region_p (entry, exit, base))
> +    {
> +      DEBUG_PRINT(dp << "base defined inside SCoP.\n");
> +      return false;
> +    }
> +
> +  tree step = DR_STEP (dr);
> +  if (def_in_region_p (entry, exit, step))
> +    {
> +      DEBUG_PRINT(dp << "step defined inside SCoP.\n");
> +      return false;
> +    }
> +
> +  tree base_addr = DR_BASE_ADDRESS (dr);
> +  if (def_in_region_p (entry, exit, base_addr))
> +    {
> +      DEBUG_PRINT(dp << "base address defined inside SCoP.\n");
> +      return false;
> +    }
> +
> +  for (unsigned i = 0; i < DR_NUM_DIMENSIONS(dr); ++i)
> +    {
> +      tree access = DR_ACCESS_FN (dr, i);
> +      if (! scev_defs_outside_region_p (entry, exit, access))
> +       {
> +         DEBUG_PRINT(fprintf (dump_file, "%d-th access function uses 
> definitions from SCoP.\n", i);
> +                     print_generic_expr (dump_file, access, dump_flags);
> +                     fprintf (dump_file, "\n"););
> +         return false;
> +       }
> +    }
> +
> +  return true;
> +}
> +
> +
> +
> +/* Return TRUE if a runtime alias check to resolve the aliasing
> +   between the DDRs DR1 and DR2 which belong to the LOOP in the region
> +   delineated by SCOP_ENTRY and SCOP_EXIT may be created and placed
> +   before that region. */
> +
> +static opt_result
> +graphite_runtime_alias_check_p (dr_info* dr1, dr_info* dr2, class loop *loop,
> +                               edge scop_entry, edge scop_exit)
> +{
> +  gcc_checking_assert (loop);
> +  gcc_checking_assert (dr1);
> +  gcc_checking_assert (dr2);
> +
> +  if (dump_enabled_p ())
> +    dump_printf (MSG_NOTE,
> +                "consider run-time aliasing test between %T and %T\n",
> +                DR_REF (dr1->dr), DR_REF (dr2->dr));
> +
> +
> +  if (! optimize_loop_for_speed_p (loop))
> +    return opt_result::failure_at (DR_STMT (dr1->dr),
> +                                  "runtime alias check not supported when"
> +                                  " optimizing for size.\n");
> +
> +
> +  /* Verify that we have enough information about the data-references
> +     and context loop to construct a runtime alias check with
> +     "compute_alias_check_pairs". */
> +
> +  if (loop->num != 0) {
> +    tree niters = number_of_latch_executions (loop);
> +    if (niters == NULL_TREE || niters == chrec_dont_know)
> +      return opt_result::failure_at (DR_STMT (dr1->dr),
> +                                    "could not determine number of 
> iterations "
> +                                    "of the SCoP's context loop. "
> +                                    "Aborting runtime alias checks.\n");
> +  }
> +
> +  /* The runtime alias check selects between the optimized and the
> +     original version of a SCoP.  Hence, it must be placed before the
> +     SCoP which is not possible if some of the data reference's fields
> +     refer to definitions inside of the SCoP. */
> +
> +  if (! dr_defs_outside_region (scop_entry, scop_exit, dr1)
> +      || ! dr_defs_outside_region (scop_entry, scop_exit, dr2))
> +      return opt_result::failure_at (DR_STMT (dr1->dr),
> +                                    "data-references use definitions inside 
> of "
> +                                    "SCoP. "
> +                                    "Aborting runtime alias checks.\n");
> +
> +
> +  return opt_result::success ();
> +}
> +
>  /* Compute alias-sets for all data references in DRS.  */
>
> -static bool
> -build_alias_set (scop_p scop)
> +static bool
> +build_alias_set (scop_p scop, oacc_context *oacc_ctx)
>  {
>    int num_vertices = scop->drs.length ();
>    struct graph *g = new_graph (num_vertices);
>    dr_info *dr1, *dr2;
>    int i, j;
>    int *all_vertices;
> +  edge scop_entry = scop->scop_info->region.entry;
> +  edge scop_exit = scop->scop_info->region.exit;
>
>    struct loop *nest
> -    = find_common_loop (scop->scop_info->region.entry->dest->loop_father,
> -                       scop->scop_info->region.exit->src->loop_father);
> +    = find_common_loop (scop_entry->dest->loop_father,
> +                       scop_exit->src->loop_father);
> +
> +  gcc_checking_assert (nest);
> +
> +  DEBUG_PRINT(dp << "[build_alias_set]: Data  references:\n";
> +             dr_info *dr;
> +             FOR_EACH_VEC_ELT (scop->drs, i, dr)
> +             {
> +               dump_data_reference (dump_file, dr->dr);
> +             }
> +             );
> +
> +  auto_vec<loop_p, 1> nest_vec;
> +  if (flag_graphite_runtime_alias_checks)
> +    {
> +      scop->unhandled_alias_ddrs.create (1);
> +      nest_vec.safe_push (nest);
> +    }
>
>    FOR_EACH_VEC_ELT (scop->drs, i, dr1)
>      for (j = i+1; scop->drs.iterate (j, &dr2); j++)
> -      if (dr_may_alias_p (dr1->dr, dr2->dr, nest))
> +      if (! (DR_IS_READ (dr1->dr) && DR_IS_READ (dr2->dr))
> +         && dr_may_alias_p (dr1->dr, dr2->dr, nest))
>         {
> -         /* Dependences in the same alias set need to be handled
> -            by just looking at DR_ACCESS_FNs.  */
> -         if (DR_NUM_DIMENSIONS (dr1->dr) == 0
> -             || DR_NUM_DIMENSIONS (dr1->dr) != DR_NUM_DIMENSIONS (dr2->dr)
> -             || ! operand_equal_p (DR_BASE_OBJECT (dr1->dr),
> -                                   DR_BASE_OBJECT (dr2->dr),
> -                                   OEP_ADDRESS_OF)
> -             || ! types_compatible_p (TREE_TYPE (DR_BASE_OBJECT (dr1->dr)),
> -                                      TREE_TYPE (DR_BASE_OBJECT (dr2->dr))))
> +         if (! oacc_ignore_alias (oacc_ctx, dr1->dr, dr2->dr))
>             {
> -             free_graph (g);
> -             return false;
> +             /* Dependences in the same alias set need to be handled
> +                by just looking at DR_ACCESS_FNs.  */
> +             bool dimension_zero = DR_NUM_DIMENSIONS (dr1->dr) == 0;
> +             if (dimension_zero)
> +                 DEBUG_PRINT(dp << "[build_alias_set] DR1 has dimension 
> 0\n");
> +
> +             bool different_dimensions =
> +               DR_NUM_DIMENSIONS (dr1->dr) != DR_NUM_DIMENSIONS (dr2->dr);
> +             if (different_dimensions)
> +               DEBUG_PRINT (dp << "[build_alias_set] "
> +                            "DRs have different dimensions\n");
> +
> +             bool different_base_objects =
> +               ! operand_equal_p (DR_BASE_OBJECT (dr1->dr),
> +                                  DR_BASE_OBJECT (dr2->dr), OEP_ADDRESS_OF);
> +             if (different_base_objects)
> +               DEBUG_PRINT (dp << "[build_alias_set] "
> +                            "DRs access different objects\n");
> +
> +             bool incompatible_types =
> +               ! types_compatible_p (TREE_TYPE (DR_BASE_OBJECT (dr1->dr)),
> +                                     TREE_TYPE (DR_BASE_OBJECT (dr2->dr)));
> +             if (incompatible_types)
> +               DEBUG_PRINT (dp << "[build_alias_set] "
> +                            "DRs with incompatible base object types");
> +
> +             if (dimension_zero || different_dimensions
> +                 || different_base_objects || incompatible_types)
> +               {
> +                 if (! flag_graphite_runtime_alias_checks)
> +                   goto FAIL;
> +
> +                 if (! graphite_runtime_alias_check_p (dr1, dr2, nest,
> +                                                       scop_entry, 
> scop_exit))
> +                   goto FAIL;
> +
> +                 ddr_p ddr = initialize_data_dependence_relation
> +                   (dr1->dr, dr2->dr, nest_vec);
> +                 scop->unhandled_alias_ddrs.safe_push(ddr);
> +               }
>             }
>           add_edge (g, i, j);
>           add_edge (g, j, i);
> +         continue;
> +
> +       FAIL:
> +         DEBUG_PRINT (dp <<
> +                      "[build_alias_set] "
> +                      "Cannot handle dependency between data references: \n";
> +                      print_gimple_stmt (dump_file, dr1->dr->stmt, 2, 
> TDF_DETAILS);
> +                      print_gimple_stmt (dump_file, dr2->dr->stmt, 2, 
> TDF_DETAILS);
> +                      dp << "\n");
> +
> +         if (flag_graphite_runtime_alias_checks)
> +           {
> +             if (scop->unhandled_alias_ddrs.length () > 0)
> +               /* The loop-nest vec is shared by all DDRs. */
> +               DDR_LOOP_NEST (scop->unhandled_alias_ddrs[0]).release ();
> +             free_dependence_relations (scop->unhandled_alias_ddrs);
> +           }
> +
> +         free_graph (g);
> +         return false;
>         }
>
>    all_vertices = XNEWVEC (int, num_vertices);
> @@ -1457,7 +1849,7 @@ build_alias_set (scop_p scop)
>  class gather_bbs : public dom_walker
>  {
>  public:
> -  gather_bbs (cdi_direction, scop_p, int *);
> +  gather_bbs (cdi_direction, scop_p, int *, oacc_reductions&, oacc_context*);
>
>    virtual edge before_dom_children (basic_block);
>    virtual void after_dom_children (basic_block);
> @@ -1465,12 +1857,18 @@ public:
>  private:
>    auto_vec<gimple *, 3> conditions, cases;
>    scop_p scop;
> +  oacc_reductions &reductions;
> +  oacc_context *oacc_ctx;
>  };
> -}
> -gather_bbs::gather_bbs (cdi_direction direction, scop_p scop, int *bb_to_rpo)
> -  : dom_walker (direction, ALL_BLOCKS, bb_to_rpo), scop (scop)
> -{
> -}
> +
> +gather_bbs::gather_bbs
> +(cdi_direction direction, scop_p scop, int *bb_to_rpo,
> + oacc_reductions& reductions, oacc_context *oacc_ctx)
> +: dom_walker (direction, ALL_BLOCKS, bb_to_rpo)
> +  , scop (scop)
> +  , reductions (reductions)
> +  , oacc_ctx (oacc_ctx)
> +{}
>
>  /* Call-back for dom_walk executed before visiting the dominated
>     blocks.  */
> @@ -1478,6 +1876,8 @@ gather_bbs::gather_bbs (cdi_direction direction, scop_p 
> scop, int *bb_to_rpo)
>  edge
>  gather_bbs::before_dom_children (basic_block bb)
>  {
> +  reductions.update (bb);
> +
>    sese_info_p region = scop->scop_info;
>    if (!bb_in_sese_p (bb, region->region))
>      return dom_walker::STOP;
> @@ -1514,7 +1914,8 @@ gather_bbs::before_dom_children (basic_block bb)
>
>    scop->scop_info->bbs.safe_push (bb);
>
> -  gimple_poly_bb_p gbb = try_generate_gimple_bb (scop, bb);
> +  gimple_poly_bb_p gbb = try_generate_gimple_bb (scop, bb, reductions, 
> oacc_ctx);
> +
>    if (!gbb)
>      return NULL;
>
> @@ -1563,6 +1964,44 @@ gather_bbs::after_dom_children (basic_block bb)
>      }
>  }
>
> +/* Update the OpenACC reductions information for all basic blocks
> +   encountered by the dom_walker.  This is used to adjust the
> +   reduction information for the basic blocks between the SCoPs (which
> +   are processed by GATHER_BBS) in the BUILD_SCOPS function. */
> +
> +class oacc_reduction_walker : public dom_walker
> +{
> +public:
> +  oacc_reduction_walker (oacc_reductions& reductions, edge end, int *);
> +
> +  virtual edge before_dom_children (basic_block);
> +
> +private:
> +  auto_vec<gimple *, 3> conditions, cases;
> +  oacc_reductions& reductions;
> +  edge end;
> +};
> +
> +oacc_reduction_walker::oacc_reduction_walker
> +(oacc_reductions& reductions, edge end, int *bb_to_rpo)
> +: dom_walker (CDI_DOMINATORS, ALL_BLOCKS, bb_to_rpo),
> +  reductions (reductions),
> +  end (end)
> +{
> +}
> +
> +edge
> +oacc_reduction_walker::before_dom_children (basic_block bb)
> +{
> +  reductions.update (bb);
> +
> +  if (bb == end->src)
> +    return dom_walker::STOP;
> +  else
> +    return NULL;
> +}
> +
> +}
>
>  /* Compute sth like an execution order, dominator order with first executing
>     edges that stay inside the current loop, delaying processing exit edges.  
> */
> @@ -1590,12 +2029,12 @@ cmp_pbbs (const void *pa, const void *pb)
>     them to SCOPS.  */
>
>  void
> -build_scops (vec<scop_p> *scops)
> +build_scops (vec<scop_p> *scops, oacc_context *oacc_ctx)
>  {
>    if (dump_file)
>      dp.set_dump_file (dump_file);
>
> -  scop_detection sb;
> +  scop_detection sb (oacc_ctx);
>    sb.build_scop_depth (current_loops->tree_root);
>
>    /* Now create scops from the lightweight SESEs.  */
> @@ -1611,17 +2050,26 @@ build_scops (vec<scop_p> *scops)
>
>    int i;
>    sese_l *s;
> +  basic_block reduction_walk_start = ENTRY_BLOCK_PTR_FOR_FN (cfun);
> +  oacc_reductions reductions;
> +
>    FOR_EACH_VEC_ELT (scops_l, i, s)
>      {
>        scop_p scop = new_scop (s->entry, s->exit);
>
> +      edge reduction_walk_end = s->entry;
> +      oacc_reduction_walker (reductions,  reduction_walk_end, bb_to_rpo)
> +       .walk (reduction_walk_start);
> +      reduction_walk_start = s->exit->dest;
> +
>        /* Record all basic blocks and their conditions in REGION.  */
> -      gather_bbs (CDI_DOMINATORS, scop, bb_to_rpo).walk (s->entry->dest);
> +      gather_bbs (CDI_DOMINATORS, scop, bb_to_rpo, reductions, oacc_ctx)
> +       .walk (s->entry->dest);
>
>        /* Sort pbbs after execution order for initial schedule generation.  */
>        scop->pbbs.qsort (cmp_pbbs);
>
> -      if (! build_alias_set (scop))
> +      if (! build_alias_set (scop, oacc_ctx))
>         {
>           DEBUG_PRINT (dp << "[scop-detection-fail] cannot handle 
> dependences\n");
>           free_scop (scop);
> diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c
> index c42415e0554..c6f07ea9a99 100644
> --- a/gcc/graphite-sese-to-poly.c
> +++ b/gcc/graphite-sese-to-poly.c
> @@ -36,6 +36,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "gimplify.h"
>  #include "gimplify-me.h"
>  #include "tree-cfg.h"
> +#include "tree-chrec-oacc.h"
>  #include "tree-ssa-loop-manip.h"
>  #include "tree-ssa-loop-niter.h"
>  #include "tree-ssa-loop.h"
> @@ -209,6 +210,8 @@ parameter_index_in_region (tree name, sese_info_p region)
>    return -1;
>  }
>
> +tree oacc_ifn_call_extract (gimple*);
> +
>  /* Extract an affine expression from the tree E in the scop S.  */
>
>  static isl_pw_aff *
> @@ -275,6 +278,13 @@ extract_affine (scop_p s, tree e, __isl_take isl_space 
> *space)
>      case SSA_NAME:
>        {
>         gcc_assert (! defined_in_sese_p (e, s->scop_info->region));
> +       if (is_oacc_loop_ifn_call_def (e))
> +         {
> +           gimple* stmt = SSA_NAME_DEF_STMT (e);
> +           return extract_affine (s, oacc_ifn_call_extract (stmt), space);
> +
> +         }
> +
>         int dim = parameter_index_in_region (e, s->scop_info);
>         gcc_assert (dim != -1);
>         /* No need to wrap a parameter.  */
> @@ -643,8 +653,20 @@ build_poly_dr (dr_info &dri)
>      subscript_sizes = pdr_add_data_dimensions (subscript_sizes, scop, dr);
>    }
>
> -  new_poly_dr (pbb, DR_STMT (dr), DR_IS_READ (dr) ? PDR_READ : PDR_WRITE,
> -              acc, subscript_sizes);
> +  bool representable = true;
> +  for (unsigned i = 0; i < DR_NUM_DIMENSIONS (dr); ++i)
> +    if (! graphite_can_represent_scev
> +       (scop->scop_info->region, DR_ACCESS_FN (dr, i)))
> +      representable = false;
> +
> +  /* If non-affine access functions are not enabled, the DR should
> +     have been rejected during SCoP detection. */
> +  gcc_checking_assert (representable || flag_graphite_non_affine_accesses);
> +
> +  poly_dr_type write_type = representable ? PDR_WRITE : PDR_MAY_WRITE;
> +  poly_dr_type type = DR_IS_READ (dr) ? PDR_READ : write_type;
> +
> +  new_poly_dr (pbb, DR_STMT (dr), type, acc, subscript_sizes);
>  }
>
>  static void
> diff --git a/gcc/graphite.c b/gcc/graphite.c
> index 27f1e486e1f..3661d92e601 100644
> --- a/gcc/graphite.c
> +++ b/gcc/graphite.c
> @@ -43,6 +43,8 @@ along with GCC; see the file COPYING3.  If not see
>  #include "cfghooks.h"
>  #include "tree.h"
>  #include "gimple.h"
> +#include "gimple-iterator.h"
> +#include "gimplify-me.h"
>  #include "ssa.h"
>  #include "fold-const.h"
>  #include "gimple-iterator.h"
> @@ -58,6 +60,18 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-ssa.h"
>  #include "tree-into-ssa.h"
>  #include "graphite.h"
> +#include "graphite-oacc.h"
> +#include "cgraph.h"
> +#include "gimple-pretty-print.h"
> +#include "print-tree.h"
> +
> +static bool have_isl = true;
> +
> +#ifdef ACCEL_COMPILER
> +static bool accel_compiler = true;
> +#else
> +static bool accel_compiler = false;
> +#endif
>
>  /* Print global statistics to FILE.  */
>
> @@ -348,6 +362,220 @@ canonicalize_loop_closed_ssa (loop_p loop, edge e)
>      }
>  }
>
> +struct goacc_parallel_info {
> +  gcall* call;
> +  loop_p loop;
> +};
> +
> +/* Find the first call to BUILT_IN_GOACC_PARALLEL in the given
> +   function where the type of the ".omp_data_arr" argument matches the
> +   given type.  The type of that argument is sufficient to find the
> +   call to the right outlined function, i.e. we don't need to check
> +   the function name.  Return this call or a NULL_TREE if there is no
> +   such call in the function. */
> +
> +static goacc_parallel_info find_goacc_parallel_call (struct function* fn)
> +{
> +  goacc_parallel_info result;
> +
> +  basic_block bb;
> +  FOR_EACH_BB_FN (bb, fn)
> +    {
> +      for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
> +          gsi_next (&gsi))
> +       {
> +         gimple *call = gsi_stmt (gsi);
> +         if (! is_gimple_call (call)
> +             || ! gimple_call_builtin_p (call, BUILT_IN_NORMAL))
> +           continue;
> +
> +         built_in_function code =
> +           DECL_FUNCTION_CODE (gimple_call_fndecl (call));
> +         if (code != BUILT_IN_GOACC_PARALLEL)
> +           continue;
> +
> +         tree called_fn_addr = gimple_call_arg (call, 1);
> +         gcc_checking_assert (TREE_CODE (called_fn_addr) == ADDR_EXPR);
> +         tree called_fn = TREE_OPERAND (called_fn_addr, 0);
> +
> +         if (called_fn != cfun->decl)
> +           continue;
> +
> +         result.call = (gcall*)call;
> +         result.loop = bb->loop_father;
> +       }
> +    }
> +
> +  return result;
> +}
> +
> +static tree
> +get_goacc_parallel_omp_data_arg (gimple* call)
> +{
> +  gcc_checking_assert (is_gimple_call (call)
> +                      && gimple_call_builtin_p (call, BUILT_IN_NORMAL)
> +                      && DECL_FUNCTION_CODE (gimple_call_fndecl (call))
> +                      == BUILT_IN_GOACC_PARALLEL);
> +  tree omp_data_ptr = gimple_call_arg (call, 3);
> +  tree omp_data = TREE_OPERAND (omp_data_ptr, 0);
> +
> +  return omp_data;
> +}
> +
> +/* TODO-kernels This was meant as a way to allow to peak into the
> +   original function from an outlined function to see, for instance,
> +   if some values are known to be constant since constant propagation
> +   fails to propagate values into the outlined function. Currently
> +   unused and can be removed. *
> +
> +/* Gather direct assignments to STRUCT in FN in MAP.  Each field of
> +   STRUCT that gets assigned to in FN is mapped to the corresponding
> +   rhs of the last encountered assignment. */
> +
> +void
> +oacc_omp_data::gather_assignments (struct function *fn)
> +{
> +  if (!fn)
> +    return;
> +
> +  basic_block bb;
> +  FOR_EACH_BB_FN (bb, fn)
> +    {
> +      for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
> +          gsi_next (&gsi))
> +       {
> +         gimple *assign = gsi_stmt (gsi);
> +
> +         if (! is_gimple_assign (assign))
> +           continue;
> +
> +         tree lhs = gimple_assign_lhs (assign);
> +         if (TREE_CODE (lhs) != COMPONENT_REF
> +             || TREE_OPERAND (lhs, 0) != src_fn_arg)
> +           continue;
> +
> +         tree field = DECL_NAME (TREE_OPERAND (lhs, 1));
> +
> +         oacc_data_field& value = field_map.get_or_insert (field);
> +         value.src_var = gimple_assign_rhs1 (assign);
> +       }
> +    }
> +
> +  unsigned i;
> +  tree var;
> +  FOR_EACH_VEC_SAFE_ELT (cfun->local_decls, i, var)
> +    {
> +     oacc_data_field* value = field_map.get (DECL_NAME (var));
> +     if (value)
> +       value->tgt_var = var;
> +    }
> +}
> +
> +/* TODO-kernels This will have to be adapted to work with the
> +"exploded arguments" patch because the mapping of names from the
> +outlined function to the original function relies on the omp_data
> +arguments to the outlined function which don't exist after "exploding"
> +the arguments. */
> +
> +oacc_omp_data
> +oacc_omp_data::construct (struct function* fn)
> +{
> +  tree omp_data_arr_arg = NULL_TREE;
> +  loop_p call_loop = NULL;
> +
> +  if (fn)
> +    {
> +      goacc_parallel_info info = find_goacc_parallel_call (fn);
> +      gcall *parallel_call = info.call;
> +      call_loop = info.loop;
> +      gcc_checking_assert (parallel_call);
> +      gcc_checking_assert (call_loop);
> +
> +      omp_data_arr_arg = get_goacc_parallel_omp_data_arg (parallel_call);
> +      gcc_checking_assert (omp_data_arr_arg);
> +    }
> +  tree omp_data_i_arg = DECL_ARGUMENTS (cfun->decl);
> +
> +  oacc_omp_data omp_data;
> +  omp_data.src_fn_arg = omp_data_arr_arg;
> +  omp_data.tgt_fn_arg = omp_data_i_arg;
> +  omp_data.loop = call_loop;
> +
> +  omp_data.gather_assignments (fn);
> +
> +  return omp_data;
> +}
> +
> +/* TODO-kernels How to implement this in a stable way? The name of the
> +   original function may change (constprop?) which causes the search
> +   to fail. */
> +
> +/* Return the function from which the OpenACC OUTLINED_FN
> +   has been outlined.  */
> +
> +static function*
> +find_oacc_src_fn (function* outlined_fn)
> +{
> +  gcc_assert (! accel_compiler);
> +  gcc_assert (oacc_function_p (outlined_fn));
> +
> +  tree name = DECL_ASSEMBLER_NAME (outlined_fn->decl);
> +  const char* id = IDENTIFIER_POINTER (name);
> +  const unsigned len = IDENTIFIER_LENGTH (name);
> +
> +  /* id is the name of the function from which the current
> +     function has been outlined, followed by a suffix that starts
> +     with ".omp_fn" */
> +  unsigned i = 0;
> +  for (; i < len; ++i)
> +      if (id[i] == '.')
> +        break;
> +
> +  gcc_checking_assert (id[i] == '.');
> +
> +  const char* src_id = ggc_alloc_string (id, i);
> +  /* fprintf (stderr, "[%s] Looking for source function '%s'\n. ", 
> __FUNCTION__, src_id); */
> +  tree id_node = get_identifier (src_id);
> +
> +  cgraph_node* n = cgraph_node::get_for_asmname (id_node);
> +
> +  return n ? n->get_fun () : NULL;
> +
> +  /* cgraph_node* node; */
> +  /* FOR_EACH_FUNCTION (node) */
> +  /*   { */
> +  /*     tree node_name = DECL_ASSEMBLER_NAME (node->decl); */
> +  /*     const char* node_id = IDENTIFIER_POINTER (node_name); */
> +
> +  /*     fprintf (stderr, "[%s] Function '%s'\n. ", __FUNCTION__, node_id); 
> */
> +
> +  /*     if (strcmp (src_id, node_id) == 0) */
> +  /*   return node->get_fun (); */
> +  /*   } */
> +  /* return NULL; */
> +}
> +
> +oacc_context
> +oacc_context::build_context () {
> +  if (! oacc_function_p (cfun))
> +    return oacc_context::invalid_context ();
> +
> +  struct function* src_fn (find_oacc_src_fn (cfun));
> +  if (! src_fn)
> +    {
> +      if (dump_file)
> +       fprintf (dump_file,
> +                "Source function for outlined function %s not found.\n",
> +                IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
> +      /* return oacc_context::invalid_context (); */
> +    }
> +
> +  oacc_context context (oacc_omp_data::construct (src_fn));
> +
> +  context.valid = true;
> +  return context;
> +}
> +
>  /* Converts the current loop closed SSA form to a canonical form
>     expected by the Graphite code generation.
>
> @@ -405,6 +633,8 @@ canonicalize_loop_form (void)
>
>  isl_ctx *the_isl_ctx;
>
> +extern void oacc_set_arg_evolutions ();
> +
>  /* Perform a set of linear transforms on the loops of the current
>     function.  */
>
> @@ -417,10 +647,34 @@ graphite_transform_loops (void)
>    vec<scop_p> scops = vNULL;
>    isl_ctx *ctx;
>
> +  bool is_oacc_function = oacc_function_p (cfun);
> +
> +  oacc_context oacc_ctx(oacc_context::build_context ());
> +  // TODO-kernels Clean this up
> +  /* if (is_oacc_function && ! oacc_ctx.is_valid ()) */
> +  /*   { */
> +  /*     if (dump_file) */
> +  /*   fprintf (dump_file, "Could not build OpenACC context for function %s. 
> " */
> +  /*            "Aborting Graphite.\n", current_function_name ()); */
> +  /*     return; */
> +  /*   } */
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    fprintf (dump_file, "\nTransforming Function: %s\n",
> +            current_function_name ());
> +
>    /* If a function is parallel it was most probably already run through 
> graphite
>       once. No need to run again.  */
> -  if (parallelized_function_p (cfun->decl))
> -    return;
> +
> +  /* TODO-kernels Stop marking kernels regions that should be
> +     processed here as "parallelized". */
> +  if (parallelized_function_p (cfun->decl) && ! is_oacc_function)
> +    {
> +
> +      if (dump_file)
> +       fprintf (dump_file, "\nAlready parallelized function.\n");
> +      return;
> +    }
>
>    calculate_dominance_info (CDI_DOMINATORS);
>
> @@ -445,7 +699,9 @@ graphite_transform_loops (void)
>    seir_cache = new hash_map<sese_scev_hash, tree>;
>
>    calculate_dominance_info (CDI_POST_DOMINATORS);
> -  build_scops (&scops);
> +
> +  oacc_set_arg_evolutions ();
> +  build_scops (&scops, oacc_ctx.is_valid () ? &oacc_ctx : NULL);
>    free_dominance_info (CDI_POST_DOMINATORS);
>
>    /* Remove the fake exits before transform given they are not reflected
> @@ -520,6 +776,8 @@ graphite_transform_loops (void)
>
>  #else /* If isl is not available: #ifndef HAVE_isl.  */
>
> +static bool have_isl = false;
> +
>  static void
>  graphite_transform_loops (void)
>  {
> @@ -532,7 +790,9 @@ graphite_transform_loops (void)
>  static unsigned int
>  graphite_transforms (struct function *fun)
>  {
> -  if (number_of_loops (fun) <= 1)
> +  unsigned num_loops = number_of_loops (fun);
> +  if (num_loops == 0
> +      || (num_loops == 1 && !oacc_function_p (cfun)))
>      return 0;
>
>    graphite_transform_loops ();
> @@ -540,9 +800,34 @@ graphite_transforms (struct function *fun)
>    return 0;
>  }
>
> +
> +/* Return true if fun is an OpenACC outlined function that should be
> +handled by Graphite. */
> +
> +static inline bool oacc_enable_graphite_p (function *fun)
> +{
> +  return !accel_compiler && flag_openacc && oacc_get_fn_attrib (fun->decl);
> +}
> +
> +
>  static bool
> -gate_graphite_transforms (void)
> +gate_graphite_transforms (function *fun)
>  {
> +  bool oacc_enabled_graphite = oacc_enable_graphite_p (fun);
> +  bool optimizing = global_options.x_optimize <= 0;
> +
> +  /* Enabling Graphite if isl is not available aborts compilation.
> +     Prefer to skip Graphite if compiling without optimisations,
> +     but emit a warning. */
> +  if (have_isl || optimizing)
> +    flag_graphite_identity |= oacc_enabled_graphite;
> +  else {
> +    if (oacc_enabled_graphite)
> +       warning (OPT_Wall, "Unable to enable Graphite on OpenACC regions,"
> +               "because isl is not available");
> +  }
> +
> +
>    /* Enable -fgraphite pass if any one of the graphite optimization flags
>       is turned on.  */
>    if (flag_graphite_identity
> @@ -576,7 +861,7 @@ public:
>    {}
>
>    /* opt_pass methods: */
> -  virtual bool gate (function *) { return gate_graphite_transforms (); }
> +  virtual bool gate (function *fun) { return gate_graphite_transforms (fun); 
> }
>
>  }; // class pass_graphite
>
> @@ -611,7 +896,7 @@ public:
>    {}
>
>    /* opt_pass methods: */
> -  virtual bool gate (function *) { return gate_graphite_transforms (); }
> +  virtual bool gate (function *fun) { return gate_graphite_transforms (fun); 
> }
>    virtual unsigned int execute (function *fun) { return graphite_transforms 
> (fun); }
>
>  }; // class pass_graphite_transforms
> @@ -624,4 +909,108 @@ make_pass_graphite_transforms (gcc::context *ctxt)
>    return new pass_graphite_transforms (ctxt);
>  }
>
> +/* Something like "n * m" is not allowed.  */
> +
> +static bool
> +graphite_can_represent_init (tree e)
> +{
> +  switch (TREE_CODE (e))
> +    {
> +    case POLYNOMIAL_CHREC:
> +      return graphite_can_represent_init (CHREC_LEFT (e))
> +       && graphite_can_represent_init (CHREC_RIGHT (e));
> +
> +    case MULT_EXPR:
> +      if (chrec_contains_symbols (TREE_OPERAND (e, 0)))
> +       return graphite_can_represent_init (TREE_OPERAND (e, 0))
> +         && tree_fits_shwi_p (TREE_OPERAND (e, 1));
> +      else
> +       return graphite_can_represent_init (TREE_OPERAND (e, 1))
> +         && tree_fits_shwi_p (TREE_OPERAND (e, 0));
> +
> +    case PLUS_EXPR:
> +    case POINTER_PLUS_EXPR:
> +    case MINUS_EXPR:
> +      return graphite_can_represent_init (TREE_OPERAND (e, 0))
> +       && graphite_can_represent_init (TREE_OPERAND (e, 1));
> +
> +    case NEGATE_EXPR:
> +    case BIT_NOT_EXPR:
> +    CASE_CONVERT:
> +    case NON_LVALUE_EXPR:
> +      return graphite_can_represent_init (TREE_OPERAND (e, 0));
> +
> +    default:
> +      break;
> +    }
> +
> +  return true;
> +}
> +
> +/* Return true when SCEV can be represented in the polyhedral model.
> +
> +   An expression can be represented, if it can be expressed as an
> +   affine expression.  For loops (i, j) and parameters (m, n) all
> +   affine expressions are of the form:
> +
> +   x1 * i + x2 * j + x3 * m + x4 * n + x5 * 1 where x1..x5 element of Z
> +
> +   1 i + 20 j + (-2) m + 25
> +
> +   Something like "i * n" or "n * m" is not allowed.  */
>
> +bool
> +graphite_can_represent_scev (sese_l scop, tree scev)
> +{
> +  if (chrec_contains_undetermined (scev))
> +    return false;
> +
> +  switch (TREE_CODE (scev))
> +    {
> +    case NEGATE_EXPR:
> +    case BIT_NOT_EXPR:
> +    CASE_CONVERT:
> +    case NON_LVALUE_EXPR:
> +      return graphite_can_represent_scev (scop, TREE_OPERAND (scev, 0));
> +
> +    case PLUS_EXPR:
> +    case POINTER_PLUS_EXPR:
> +    case MINUS_EXPR:
> +      return graphite_can_represent_scev (scop, TREE_OPERAND (scev, 0))
> +       && graphite_can_represent_scev (scop, TREE_OPERAND (scev, 1));
> +
> +    case MULT_EXPR:
> +      return !CONVERT_EXPR_CODE_P (TREE_CODE (TREE_OPERAND (scev, 0)))
> +       && !CONVERT_EXPR_CODE_P (TREE_CODE (TREE_OPERAND (scev, 1)))
> +       && !(chrec_contains_symbols (TREE_OPERAND (scev, 0))
> +            && chrec_contains_symbols (TREE_OPERAND (scev, 1)))
> +       && graphite_can_represent_init (scev)
> +       && graphite_can_represent_scev (scop, TREE_OPERAND (scev, 0))
> +       && graphite_can_represent_scev (scop, TREE_OPERAND (scev, 1));
> +
> +    case POLYNOMIAL_CHREC:
> +      /* Check for constant strides.  With a non constant stride of
> +        'n' we would have a value of 'iv * n'.  Also check that the
> +        initial value can represented: for example 'n * m' cannot be
> +        represented.  */
> +      gcc_assert (loop_in_sese_p (get_loop (cfun,
> +                                           CHREC_VARIABLE (scev)), scop));
> +      if (!evolution_function_right_is_integer_cst (scev)
> +         || !graphite_can_represent_init (scev))
> +       return false;
> +      return graphite_can_represent_scev (scop, CHREC_LEFT (scev));
> +
> +    case ADDR_EXPR:
> +      /* We cannot encode addresses for ISL.  */
> +      return false;
> +
> +    default:
> +      break;
> +    }
> +
> +  /* Only affine functions can be represented.  */
> +  if (tree_contains_chrecs (scev, NULL) || !scev_is_linear_expression (scev))
> +    return false;
> +
> +  return true;
> +}
> diff --git a/gcc/graphite.h b/gcc/graphite.h
> index 3fe1345cf96..1e0ccd2de7b 100644
> --- a/gcc/graphite.h
> +++ b/gcc/graphite.h
> @@ -384,6 +384,10 @@ struct scop
>    /* The maximum alias set as assigned to drs by build_alias_sets.  */
>    unsigned max_alias_set;
>
> +  /* The set of dependent ddrs that were rejected by build_alias_set
> +     and that must be handled by other means (runtime checking). */
> +  auto_vec<ddr_p> unhandled_alias_ddrs;
> +
>    /* All the basic blocks in this scop that contain memory references
>       and that will be represented as statements in the polyhedral
>       representation.  */
> @@ -459,10 +463,15 @@ carries_deps (__isl_keep isl_union_map *schedule,
>
>  extern bool build_poly_scop (scop_p);
>  extern bool graphite_regenerate_ast_isl (scop_p);
> -extern void build_scops (vec<scop_p> *);
> +
> +class oacc_context;
> +extern void build_scops (vec<scop_p> *, oacc_context*);
> +
>  extern tree cached_scalar_evolution_in_region (const sese_l &, loop_p, tree);
>  extern void dot_all_sese (FILE *, vec<sese_l> &);
>  extern void dot_sese (sese_l &);
>  extern void dot_cfg ();
>
> +extern bool graphite_can_represent_scev (sese_l, tree);
> +
>  #endif
> diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
> index 2ec3c504cd6..73c0e7d1880 100644
> --- a/gcc/internal-fn.h
> +++ b/gcc/internal-fn.h
> @@ -52,10 +52,13 @@ enum ifn_unique_kind {
>
>       CHUNK_MAX = LOOP (CODE_CHUNKS, DIR, RANGE, STEP, CHUNK_SIZE, MASK)
>       STEP = LOOP (CODE_STEP, DIR, RANGE, STEP, CHUNK_SIZE, MASK)
> -     OFFSET = LOOP (CODE_OFFSET, DIR, RANGE, STEP, CHUNK_SIZE, MASK, 
> CHUNK_NO)
> -     BOUND = LOOP (CODE_BOUND, DIR, RANGE, STEP, CHUNK_SIZE, MASK, OFFSET)
> +     OFFSET = LOOP (CODE_OFFSET, DIR, RANGE, STEP, CHUNK_SIZE, MASK, BASE, 
> CHUNK_NO)
> +     BOUND = LOOP (CODE_BOUND, DIR, RANGE, STEP, CHUNK_SIZE, MASK, END, 
> OFFSET)
>
>       DIR - +1 for up loop, -1 for down loop
> +     BASE - Initial value of the loop's iteration variable.
> +     END - Last value of the loop's iteration variable +1 or -1, depending 
> on the
> +           direction of the iteration.
>       RANGE - Range of loop (END - BASE)
>       STEP - iteration step size
>       CHUNKING - size of chunking, (constant zero for no chunking)
> diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c
> index 08afaceb87e..b3d21c1181d 100644
> --- a/gcc/omp-expand.c
> +++ b/gcc/omp-expand.c
> @@ -6015,8 +6015,8 @@ expand_omp_taskloop_for_inner (struct omp_region 
> *region,
>       T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
>
>     <head_bb> [created by splitting end of entry_bb]
> -     T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
> -     T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
> +     T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, b, 
> chunk_no);
> +     T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, e, offset);
>       if (!(offset LTGT bound)) goto bottom_bb;
>
>     <body_bb> [incoming]
> @@ -6270,20 +6270,22 @@ expand_oacc_for (struct omp_region *region, struct 
> omp_for_data *fd)
>    /* Loop offset & bound go into head_bb.  */
>    gsi = gsi_start_bb (head_bb);
>
> -  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
> +  tree begin = force_gimple_operand_gsi (&gsi, unshare_expr (fd->loop.n1), 
> true, NULL_TREE, true, GSI_SAME_STMT);
> +  call = gimple_build_call_internal (IFN_GOACC_LOOP, 8,
>                                      build_int_cst (integer_type_node,
>                                                     IFN_GOACC_LOOP_OFFSET),
>                                      dir, range, s,
> -                                    chunk_size, gwv, chunk_no);
> +                                    chunk_size, gwv, begin, chunk_no);
>    gimple_call_set_lhs (call, offset_init);
>    gimple_set_location (call, loc);
>    gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
>
> -  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
> +  tree end = force_gimple_operand_gsi (&gsi, unshare_expr (fd->loop.n2), 
> true, NULL_TREE, true, GSI_SAME_STMT);
> +  call = gimple_build_call_internal (IFN_GOACC_LOOP, 8,
>                                      build_int_cst (integer_type_node,
>                                                     IFN_GOACC_LOOP_BOUND),
>                                      dir, range, s,
> -                                    chunk_size, gwv, offset_init);
> +                                    chunk_size, gwv, end, offset_init);
>    gimple_call_set_lhs (call, bound);
>    gimple_set_location (call, loc);
>    gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
> @@ -6332,16 +6334,20 @@ expand_oacc_for (struct omp_region *region, struct 
> omp_for_data *fd)
>           tree t, e_gwv = integer_minus_one_node;
>           tree chunk = build_int_cst (diff_type, 0); /* Never chunked.  */
>
> +         tree begin = force_gimple_operand_gsi (&gsi, unshare_expr 
> (fd->loop.n1),
> +                                                true, NULL_TREE, true, 
> GSI_SAME_STMT);
>           t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
> -         call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, 
> e_range,
> -                                            element_s, chunk, e_gwv, chunk);
> +         call = gimple_build_call_internal (IFN_GOACC_LOOP, 8, t, dir, 
> e_range,
> +                                            element_s, chunk, e_gwv, begin, 
> chunk);
>           gimple_call_set_lhs (call, e_offset);
>           gimple_set_location (call, loc);
>           gsi_insert_before (&gsi, call, GSI_SAME_STMT);
>
> +         tree end = force_gimple_operand_gsi (&gsi, unshare_expr 
> (fd->loop.n2),
> +                                              true, NULL_TREE, true, 
> GSI_SAME_STMT);
>           t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
> -         call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, 
> e_range,
> -                                            element_s, chunk, e_gwv, 
> e_offset);
> +         call = gimple_build_call_internal (IFN_GOACC_LOOP, 8, t, dir, 
> e_range,
> +                                            element_s, chunk, e_gwv, end, 
> e_offset);
>           gimple_call_set_lhs (call, e_bound);
>           gimple_set_location (call, loc);
>           gsi_insert_before (&gsi, call, GSI_SAME_STMT);
> diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
> index a73b73fb41a..3e81e878cf9 100644
> --- a/gcc/omp-offload.c
> +++ b/gcc/omp-offload.c
> @@ -83,6 +83,8 @@ struct oacc_loop
>    vec<gcall *> ifns;  /* Contained loop abstraction functions.  */
>    tree chunk_size; /* Chunk size.  */
>    gcall *head_end; /* Final marker of head sequence.  */
> +
> +  bool can_be_parallel; /* Can the loop be parallelized?. */
>  };
>
>  /* Holds offload tables with decls.  */
> @@ -657,7 +659,7 @@ oacc_xform_loop (gcall *call)
>
>           if (chunking)
>             {
> -             tree chunk = fold_convert (diff_type, gimple_call_arg (call, 
> 6));
> +             tree chunk = fold_convert (diff_type, gimple_call_arg (call, 
> 7));
>               tree per
>                 = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size);
>               per = build2 (MULT_EXPR, diff_type, per, chunk);
> @@ -697,7 +699,7 @@ oacc_xform_loop (gcall *call)
>
>           r = fold_build2 (MULT_EXPR, diff_type, span, step);
>
> -         tree offset = gimple_call_arg (call, 6);
> +         tree offset = gimple_call_arg (call, 7);
>           r = build2 (PLUS_EXPR, diff_type, r,
>                       fold_convert (diff_type, offset));
>           r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR,
> @@ -911,7 +913,8 @@ oacc_validate_dims (tree fn, tree attrs, int *dims, int 
> level, unsigned used)
>    check = false;
>  #endif
>    if (check
> -      && !lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (fn)))
> +      && !lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (fn))
> +      && !lookup_attribute ("oacc parallel_kernels_graphite", 
> DECL_ATTRIBUTES (fn)))
>      {
>        static char const *const axes[] =
>        /* Must be kept in sync with GOMP_DIM enumeration.  */
> @@ -1015,6 +1018,11 @@ new_oacc_loop (oacc_loop *parent, gcall *marker)
>       flags.  */
>
>    loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3));
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    dump_printf_loc (MSG_NOTE,
> +                    dump_user_location_t::from_location_t (loop->loc),
> +                    "[new_oacc_loop] Found loop %s 'auto' directive.\n",
> +                    loop->flags & OLF_AUTO ? "with" : "without");
>
>    tree chunk_size = integer_zero_node;
>    if (loop->flags & OLF_GANG_STATIC)
> @@ -1217,45 +1225,62 @@ oacc_loop_discover_walk (oacc_loop *loop, basic_block 
> bb)
>           break;
>
>         case IFN_UNIQUE:
> -         enum ifn_unique_kind kind
> -           = (enum ifn_unique_kind) (TREE_INT_CST_LOW
> -                                     (gimple_call_arg (call, 0)));
> -         if (kind == IFN_UNIQUE_OACC_HEAD_MARK
> -             || kind == IFN_UNIQUE_OACC_TAIL_MARK)
> -           {
> -             if (gimple_call_num_args (call) == 2)
> -               {
> -                 gcc_assert (marker && !remaining);
> -                 marker = 0;
> -                 if (kind == IFN_UNIQUE_OACC_TAIL_MARK)
> -                   loop = finish_oacc_loop (loop);
> -                 else
> -                   loop->head_end = call;
> -               }
> -             else
> -               {
> -                 int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
> +         {
> +           enum ifn_unique_kind kind
> +             = (enum ifn_unique_kind) (TREE_INT_CST_LOW
> +                                       (gimple_call_arg (call, 0)));
> +           if (kind == IFN_UNIQUE_OACC_HEAD_MARK
> +               || kind == IFN_UNIQUE_OACC_TAIL_MARK)
> +             {
> +               if (gimple_call_num_args (call) == 2)
> +                 {
> +                   gcc_assert (marker && !remaining);
> +                   marker = 0;
> +                   if (kind == IFN_UNIQUE_OACC_TAIL_MARK)
> +                     loop = finish_oacc_loop (loop);
> +                   else
> +                     loop->head_end = call;
> +                 }
> +               else
> +                 {
> +                   int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
>
> -                 if (!marker)
> -                   {
> -                     if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
> -                       loop = new_oacc_loop (loop, call);
> -                     remaining = count;
> -                   }
> -                 gcc_assert (count == remaining);
> -                 if (remaining)
> -                   {
> -                     remaining--;
> -                     if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
> -                       loop->heads[marker] = call;
> -                     else
> -                       loop->tails[remaining] = call;
> -                   }
> -                 marker++;
> -               }
> -           }
> +                   if (!marker)
> +                     {
> +                       if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
> +                           loop = new_oacc_loop (loop, call);
> +
> +                       remaining = count;
> +                     }
> +                   gcc_assert (count == remaining);
> +                   if (remaining)
> +                     {
> +                       remaining--;
> +                       if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
> +                         loop->heads[marker] = call;
> +                       else
> +                         loop->tails[remaining] = call;
> +                     }
> +                   marker++;
> +                 }
> +             }
> +           break;
> +         }
> +
> +       case IFN_GOACC_REDUCTION:
> +         break;
>         }
>      }
> +
> +  if (bb->loop_father->can_be_parallel)
> +    {
> +      loop->can_be_parallel = true;
> +      const dump_user_location_t loc
> +       = dump_user_location_t::from_location_t (loop->loc);
> +      if (dump_file)
> +      dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, "Detected 
> parallelizable loop.");
> +    }
> +
>    if (remaining || marker)
>      {
>        bb = single_succ (bb);
> @@ -1411,12 +1436,61 @@ oacc_loop_process (oacc_loop *loop)
>      oacc_loop_process (loop->sibling);
>  }
>
> -/* Walk the OpenACC loop heirarchy checking and assigning the
> +/* Interpret the "can_be_parallel" flag of the LOOP to decide
> +   if it can be made "independent" */
> +
> +static bool oacc_loop_parallelize (oacc_loop *loop) {
> +  if (loop->routine)
> +    return false;
> +
> +  if (!(loop->flags & OLF_AUTO))
> +    {
> +      if (dump_file && (dump_flags & TDF_DETAILS))
> +       dump_printf_loc (MSG_NOTE,
> +                        dump_user_location_t::from_location_t (loop->loc),
> +                        "[oacc_loop_parallelize] Not an 'auto' loop.\n");
> +
> +
> +      return false;
> +    }
> +
> +  if (!loop->can_be_parallel)
> +    {
> +      if (dump_file && (dump_flags & TDF_DETAILS))
> +       dump_printf_loc (MSG_MISSED_OPTIMIZATION,
> +                        dump_user_location_t::from_location_t (loop->loc),
> +                        "'auto' loop cannot be parallel.\n");
> +      return false;
> +    }
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
> +                    dump_user_location_t::from_location_t (loop->loc),
> +                    "'auto' loop can be parallel.\n");
> +
> +  loop->flags |= OLF_INDEPENDENT;
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    fprintf (dump_file, "[oacc_loop_parallelize] %s:%u Transformed 'auto' 
> into 'independent'.\n",
> +            LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc));
> +
> +  // loop->flags &= ~OLF_AUTO;
> +  /* TODO-kernels Right now we need to keep the OLF_AUTO flag for
> +     further processing in oacc_loop_fixed_partitions and
> +     oacc_loop_auto_partitions.  We should remove it here and use
> +     another flag to indicate that the partitioning must be
> +     assigned. */
> +
> +  return true;
> +}
> +
> +/* Walk the OpenACC loop hierarchy checking and assigning the
>     programmer-specified partitionings.  OUTER_MASK is the partitioning
>     this loop is contained within.  Return mask of partitioning
>     encountered.  If any auto loops are discovered, set GOMP_DIM_MAX
>     bit.  */
>
> +
>  static unsigned
>  oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
>  {
> @@ -1446,14 +1520,18 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned 
> outer_mask)
>
>        if ((this_mask != 0) + auto_par + seq_par > 1)
>         {
> -         if (noisy)
> -           error_at (loop->loc,
> -                     seq_par
> -                     ? G_("%<seq%> overrides other OpenACC loop specifiers")
> -                     : G_("%<auto%> conflicts with other OpenACC loop "
> -                          "specifiers"));
> +         if (seq_par && noisy)
> +           error_at (loop->loc, G_("%<seq%> overrides other OpenACC loop 
> specifiers"));
>           maybe_auto = false;
> +
> +         if (dump_file && (dump_flags & TDF_DETAILS))
> +           dump_printf_loc (MSG_NOTE,
> +                            dump_user_location_t::from_location_t 
> (loop->loc),
> +                            "[oacc_loop_fixed_partitions] Removed 
> 'auto'.\n");
> +
> +
>           loop->flags &= ~OLF_AUTO;
> +
>           if (seq_par)
>             {
>               loop->flags
> @@ -1467,6 +1545,9 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned 
> outer_mask)
>           loop->flags |= OLF_AUTO;
>           mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
>         }
> +
> +      if (oacc_loop_parallelize (loop))
> +         mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
>      }
>
>    if (this_mask & outer_mask)
> diff --git a/gcc/predict.c b/gcc/predict.c
> index a7ae977c866..c44aac58f28 100644
> --- a/gcc/predict.c
> +++ b/gcc/predict.c
> @@ -4035,7 +4035,7 @@ pass_profile::execute (function *fun)
>       class loop *loop;
>       FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
>         if (loop->header->count.initialized_p ())
> -         fprintf (dump_file, "Loop got predicted %d to iterate %i times.\n",
> +         fprintf (dump_file, "Loop %d got predicted to iterate %i times.\n",
>                    loop->num,
>                    (int)expected_loop_iterations_unbounded (loop));
>     }
> diff --git a/gcc/testsuite/gcc.dg/graphite/alias-0-no-runtime-check.c 
> b/gcc/testsuite/gcc.dg/graphite/alias-0-no-runtime-check.c
> new file mode 100644
> index 00000000000..7228fb09818
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/graphite/alias-0-no-runtime-check.c
> @@ -0,0 +1,20 @@
> +/* This test demonstrates a loop nest that Graphite cannot handle
> +   because of aliasing. But the loop nest can be handled with enabled
> +   runtime alias checking. */
> +
> +/* { dg-options "-O2 -fgraphite-identity -fno-graphite-runtime-alias-checks 
> -fdump-tree-graphite-details" } */
> +
> +void sum(int *x, int *y, unsigned *sum)
> +{
> +  unsigned i,j;
> +  *sum = 0;
> +
> +  for (i = 0; i < 10000; i=i+1)
> +    {
> +      int xi = x[i];
> +      for (j = 0; j < 22222; j=j+1)
> +         *sum +=  xi + y[j];
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump "number of SCoPs: 0" "graphite"} } */
> diff --git a/gcc/testsuite/gcc.dg/graphite/alias-0-runtime-check.c 
> b/gcc/testsuite/gcc.dg/graphite/alias-0-runtime-check.c
> new file mode 100644
> index 00000000000..a9f9ef99908
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/graphite/alias-0-runtime-check.c
> @@ -0,0 +1,21 @@
> +/* This test demonstrates a loop nest that Graphite cannot handle
> +   because of aliasing. But the loop nest can be handled with enabled
> +   runtime alias checking. */
> +
> +/* { dg-options "-O2 -fgraphite-identity -fgraphite-runtime-alias-checks 
> -fdump-tree-graphite-details" } */
> +
> +void sum(int *x, int *y, unsigned *sum)
> +{
> +  unsigned i,j;
> +  *sum = 0;
> +
> +  for (i = 0; i < 10000; i=i+1)
> +    {
> +      int xi = x[i];
> +      for (j = 0; j < 22222; j=j+1)
> +         *sum +=  xi + y[j];
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump "number of SCoPs: 1" "graphite"} } */
> +/* { dg-final { scan-tree-dump "Generated runtime alias 
> check.*?sum_.*?x_.*?y_.*?\n" "graphite"} } */
> diff --git a/gcc/testsuite/gcc.dg/graphite/alias-1.c 
> b/gcc/testsuite/gcc.dg/graphite/alias-1.c
> new file mode 100644
> index 00000000000..ee80dae1df3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/graphite/alias-1.c
> @@ -0,0 +1,22 @@
> +/* This test demonstrates a loop nest that Graphite cannot handle
> +   because of aliasing. It should be possible to handle this loop nest
> +   by creating a runtime alias check like in the very similar test
> +   alias-0-runtime-check.c. However Graphite analyses the data
> +   reference with respect to the innermost loop that contains the data
> +   reference, the variable "i" remains uninstantiated (in contrast to
> +   "j"), and consequently the alias check cannot be placed outside of
> +   the SCoP since "i" is not defined there. */
> +
> +/* { dg-options "-O2 -fgraphite-identity -fgraphite-runtime-alias-checks 
> -fdump-tree-graphite-details" } */
> +
> +void sum(int *x, int *y, unsigned *sum)
> +{
> +  unsigned i,j;
> +  *sum = 0;
> +
> +  for (i = 0; i < 10000; i=i+1)
> +    for (j = 0; j < 22222; j=j+1)
> +      *sum +=  x[i] + y[j];
> +}
> +
> +/* { dg-final { scan-tree-dump "number of SCoPs: 1" "graphite" { xfail *-*-* 
> } } } */
> diff --git a/gcc/tree-chrec-oacc.h b/gcc/tree-chrec-oacc.h
> new file mode 100644
> index 00000000000..bcbb1e03657
> --- /dev/null
> +++ b/gcc/tree-chrec-oacc.h
> @@ -0,0 +1,45 @@
> +/* OpenACC helpers for Chains of recurrences.
> +   Copyright (C) 2003-2020 Free Software Foundation, Inc.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify it under
> +the terms of the GNU General Public License as published by the Free
> +Software Foundation; either version 3, or (at your option) any later
> +version.
> +
> +GCC is distributed in the hope that it will be useful, but WITHOUT ANY
> +WARRANTY; without even the implied warranty of MERCHANTABILITY or
> +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
> +for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +<http://www.gnu.org/licenses/>.  */
> +
> +#ifndef GCC_TREE_CHREC_OACC_H
> +#define GCC_TREE_CHREC_OACC_H
> +
> +#include <gimple.h>
> +#include <internal-fn.h>
> +
> +/* Check if the tree is an SSA_NAME whose defining statement
> +   is a call to a IFN_GOACC_LOOP function.  */
> +static inline bool
> +is_oacc_loop_ifn_call_def (tree t) {
> +  tree_code code = TREE_CODE (t);
> +  return (code == SSA_NAME
> +         && gimple_call_internal_p (SSA_NAME_DEF_STMT (t), IFN_GOACC_LOOP));
> +}
> +
> +/* Check if the tree is an SSA_NAME whose defining statement
> +   is a call to a IFN_GOACC_LOOP function.  */
> +static inline bool
> +is_oacc_ifn_call_def (tree t) {
> +  if (TREE_CODE (t) != SSA_NAME)
> +    return false;
> +
> +  return (gimple_call_internal_p (SSA_NAME_DEF_STMT (t), IFN_GOACC_LOOP)
> +         || gimple_call_internal_p (SSA_NAME_DEF_STMT (t), 
> IFN_GOACC_REDUCTION));
> +}
> +#endif  /* GCC_TREE_CHREC_OACC_H  */
> diff --git a/gcc/tree-chrec.c b/gcc/tree-chrec.c
> index a8848067040..f536d6001ce 100644
> --- a/gcc/tree-chrec.c
> +++ b/gcc/tree-chrec.c
> @@ -1744,8 +1744,17 @@ scev_is_linear_expression (tree scev)
>      }
>  }
>
> -/* Determines whether the expression CHREC contains only interger consts
> -   in the right parts.  */
> +static bool
> +is_oacc_loop_call (tree chrec) {
> +  return TREE_CODE (chrec) == SSA_NAME
> +    && gimple_call_internal_p (SSA_NAME_DEF_STMT (chrec),
> +                              IFN_GOACC_LOOP);
> +
> +}
> +
> +/* Determines whether the expression CHREC contains only integer
> +   consts in the right parts.  OpenACC internal function calls
> +   which encode integer constants are also admitted. */
>
>  bool
>  evolution_function_right_is_integer_cst (const_tree chrec)
> @@ -1759,7 +1768,8 @@ evolution_function_right_is_integer_cst (const_tree 
> chrec)
>        return true;
>
>      case POLYNOMIAL_CHREC:
> -      return TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST
> +      return (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST
> +             || is_oacc_loop_call (CHREC_RIGHT (chrec)))
>         && (TREE_CODE (CHREC_LEFT (chrec)) != POLYNOMIAL_CHREC
>             || evolution_function_right_is_integer_cst (CHREC_LEFT (chrec)));
>
> diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
> index 5505ba46778..9094b2ac45d 100644
> --- a/gcc/tree-data-ref.c
> +++ b/gcc/tree-data-ref.c
> @@ -85,6 +85,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "fold-const.h"
>  #include "expr.h"
>  #include "gimple-iterator.h"
> +#include "tree-chrec-oacc.h"
>  #include "tree-ssa-loop-niter.h"
>  #include "tree-ssa-loop.h"
>  #include "tree-ssa.h"
> @@ -97,6 +98,8 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-eh.h"
>  #include "ssa.h"
>  #include "internal-fn.h"
> +#include "print-tree.h"
> +#include "graphite-oacc.h"
>
>  static struct datadep_stats
>  {
> @@ -884,18 +887,23 @@ canonicalize_base_object_address (tree addr)
>     dummy outermost loop.  In other cases perform loop analysis.
>
>     Return true if the analysis succeeded and store the results in DRB if so.
> -   BB analysis can only fail for bitfield or reversed-storage accesses.  */
> +   BB analysis can only fail for bitfield or reversed-storage accesses.
> +
> +   If ALLOW_NON_AFFINE_BASE is true, the function will not return false if
> +   the base is non-affine.  */
>
>  opt_result
>  dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
> -                     class loop *loop, const gimple *stmt)
> +                     class loop *loop, const gimple *stmt,
> +                     // TODO-kernels Rename (also allows non affine offset)
> +                     bool allow_non_affine_base)
>  {
>    poly_int64 pbitsize, pbitpos;
>    tree base, poffset;
>    machine_mode pmode;
>    int punsignedp, preversep, pvolatilep;
>    affine_iv base_iv, offset_iv;
> -  tree init, dinit, step;
> +  tree init, dinit;
>    bool in_loop = (loop && loop->num);
>
>    if (dump_file && (dump_flags & TDF_DETAILS))
> @@ -945,17 +953,20 @@ dr_analyze_innermost (innermost_loop_behavior *drb, 
> tree ref,
>    else
>      base = build_fold_addr_expr (base);
>
> +  bool affine_base = true;
>    if (in_loop)
>      {
> -      if (!simple_iv (loop, loop, base, &base_iv, true))
> +      affine_base = simple_iv (loop, loop, base, &base_iv, true);
> +      if (!affine_base && !allow_non_affine_base)
>         return opt_result::failure_at
>           (stmt, "failed: evolution of base is not affine.\n");
>      }
> -  else
> +
> +  if (!in_loop || !affine_base)
>      {
>        base_iv.base = base;
>        base_iv.step = ssize_int (0);
> -      base_iv.no_overflow = true;
> +      base_iv.no_overflow = affine_base ? false : true;
>      }
>
>    if (!poffset)
> @@ -965,14 +976,18 @@ dr_analyze_innermost (innermost_loop_behavior *drb, 
> tree ref,
>      }
>    else
>      {
> -      if (!in_loop)
> -        {
> -          offset_iv.base = poffset;
> -          offset_iv.step = ssize_int (0);
> -        }
> -      else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
> -       return opt_result::failure_at
> -         (stmt, "failed: evolution of offset is not affine.\n");
> +      offset_iv.base = poffset;
> +      offset_iv.step = ssize_int (0);
> +
> +      if (in_loop && ! simple_iv (loop, loop, poffset, &offset_iv, true)) {
> +       if (!allow_non_affine_base)
> +         return opt_result::failure_at
> +           (stmt, "failed: evolution of offset is not affine.\n");
> +       offset_iv.base = poffset;
> +       offset_iv.step = ssize_int (0);
> +
> +
> +      }
>      }
>
>    init = ssize_int (pbytepos);
> @@ -983,14 +998,8 @@ dr_analyze_innermost (innermost_loop_behavior *drb, tree 
> ref,
>    init = size_binop (PLUS_EXPR, init, dinit);
>    base_misalignment -= TREE_INT_CST_LOW (dinit);
>
> -  split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
> -  init = size_binop (PLUS_EXPR, init, dinit);
> -
> -  step = size_binop (PLUS_EXPR,
> -                    fold_convert (ssizetype, base_iv.step),
> -                    fold_convert (ssizetype, offset_iv.step));
> -
>    base = canonicalize_base_object_address (base_iv.base);
> +  split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
>
>    /* See if get_pointer_alignment can guarantee a higher alignment than
>       the one we calculated above.  */
> @@ -1013,7 +1022,10 @@ dr_analyze_innermost (innermost_loop_behavior *drb, 
> tree ref,
>    drb->base_address = base;
>    drb->offset = fold_convert (ssizetype, offset_iv.base);
>    drb->init = init;
> -  drb->step = step;
> +  drb->step = size_binop (PLUS_EXPR,
> +                         fold_convert (ssizetype, base_iv.step),
> +                         fold_convert (ssizetype, offset_iv.step));
> +
>    if (known_misalignment (base_misalignment, base_alignment,
>                           &drb->base_misalignment))
>      drb->base_alignment = base_alignment;
> @@ -1023,7 +1035,7 @@ dr_analyze_innermost (innermost_loop_behavior *drb, 
> tree ref,
>        drb->base_misalignment = 0;
>      }
>    drb->offset_alignment = highest_pow2_factor (offset_iv.base);
> -  drb->step_alignment = highest_pow2_factor (step);
> +  drb->step_alignment = highest_pow2_factor (drb->step);
>
>    if (dump_file && (dump_flags & TDF_DETAILS))
>      fprintf (dump_file, "success.\n");
> @@ -1096,7 +1108,9 @@ dr_analyze_indices (struct data_reference *dr, edge 
> nest, loop_p loop)
>         {
>           op = TREE_OPERAND (ref, 1);
>           access_fn = analyze_scalar_evolution (loop, op);
> -         access_fn = instantiate_scev (nest, loop, access_fn);
> +         tree instantiated_fn = instantiate_scev (nest, loop, access_fn);
> +         if (instantiated_fn)
> +           access_fn = instantiated_fn;
>           access_fns.safe_push (access_fn);
>         }
>        else if (TREE_CODE (ref) == COMPONENT_REF
> @@ -1128,7 +1142,9 @@ dr_analyze_indices (struct data_reference *dr, edge 
> nest, loop_p loop)
>      {
>        op = TREE_OPERAND (ref, 0);
>        access_fn = analyze_scalar_evolution (loop, op);
> -      access_fn = instantiate_scev (nest, loop, access_fn);
> +      tree instantiated_fn = instantiate_scev (nest, loop, access_fn);
> +      if (instantiated_fn)
> +       access_fn = instantiated_fn;
>        if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
>         {
>           tree orig_type;
> @@ -1230,11 +1246,15 @@ free_data_ref (data_reference_p dr)
>
>     Return the data_reference description of MEMREF.  NEST is the outermost
>     loop in which the reference should be instantiated, LOOP is the loop
> -   in which the data reference should be analyzed.  */
> +   in which the data reference should be analyzed.
> +
> +   If ALLOW_NON_AFFINE_BASE is true, the function will not fail if the
> +   base is non-affine. */
>
>  struct data_reference *
>  create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
> -                bool is_read, bool is_conditional_in_stmt)
> +                bool is_read, bool is_conditional_in_stmt,
> +                bool allow_non_affine_base)
>  {
>    struct data_reference *dr;
>
> @@ -1252,7 +1272,8 @@ create_data_ref (edge nest, loop_p loop, tree memref, 
> gimple *stmt,
>    DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
>
>    dr_analyze_innermost (&DR_INNERMOST (dr), memref,
> -                       nest != NULL ? loop : NULL, stmt);
> +                       nest != NULL ? loop : NULL, stmt,
> +                       allow_non_affine_base);
>    dr_analyze_indices (dr, nest, loop);
>    dr_analyze_alias (dr);
>
> @@ -5422,6 +5443,7 @@ struct data_ref_loc
>    bool is_conditional_in_stmt;
>  };
>
> +tree oacc_ifn_call_extract (gimple *call);
>
>  /* Stores the locations of memory references in STMT to REFERENCES.  Returns
>     true if STMT clobbers memory, false otherwise.  */
> @@ -5444,6 +5466,10 @@ get_references_in_stmt (gimple *stmt, 
> vec<data_ref_loc, va_heap> *references)
>        if (gimple_call_internal_p (stmt))
>         switch (gimple_call_internal_fn (stmt))
>           {
> +         case IFN_GOACC_REDUCTION:
> +         case IFN_UNIQUE:
> +         case IFN_GOACC_LOOP:
> +           break;
>           case IFN_GOMP_SIMD_LANE:
>             {
>               class loop *loop = gimple_bb (stmt)->loop_father;
> @@ -5519,6 +5545,25 @@ get_references_in_stmt (gimple *stmt, 
> vec<data_ref_loc, va_heap> *references)
>                                    ptr);
>             references->safe_push (ref);
>             return false;
> +         case IFN_GOACC_LOOP:
> +           /* Treat this like a reference to the data from the
> +              original loop (offset, bound etc.) that has been
> +              replaced by the internal function call in
> +              omp-expand.c. */
> +
> +           op0 = gimple_call_lhs (stmt);
> +           op1 = oacc_ifn_call_extract (stmt);
> +
> +           if (DECL_P (op1)
> +               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
> +             {
> +               ref.ref = op1;
> +               ref.is_read = true;
> +               ref.is_conditional_in_stmt = false;
> +               references->safe_push (ref);
> +             }
> +           return false;
> +
>           default:
>             break;
>           }
> @@ -5616,11 +5661,15 @@ find_data_references_in_stmt (class loop *nest, 
> gimple *stmt,
>     unanalyzable reference, returns false, otherwise returns true.
>     NEST is the outermost loop of the loop nest in which the references
>     should be instantiated, LOOP is the loop in which the references
> -   should be analyzed.  */
> +   should be analyzed.
> +   If ALLOW_NON_AFFINE_BASE is true, the data references are allowed
> +   to have a non-affine base. */
>
>  bool
>  graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
> -                                      vec<data_reference_p> *datarefs)
> +                                      vec<data_reference_p> *datarefs,
> +                                      oacc_context *oacc_ctx,
> +                                      bool allow_non_affine_base)
>  {
>    unsigned i;
>    auto_vec<data_ref_loc, 2> references;
> @@ -5634,7 +5683,8 @@ graphite_find_data_references_in_stmt (edge nest, 
> loop_p loop, gimple *stmt,
>    FOR_EACH_VEC_ELT (references, i, ref)
>      {
>        dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
> -                           ref->is_conditional_in_stmt);
> +                           ref->is_conditional_in_stmt,
> +                           allow_non_affine_base);
>        gcc_assert (dr != NULL);
>        datarefs->safe_push (dr);
>      }
> diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h
> index 771d20fbbc3..2d82e0ad923 100644
> --- a/gcc/tree-data-ref.h
> +++ b/gcc/tree-data-ref.h
> @@ -515,7 +515,7 @@ typedef struct data_dependence_relation *ddr_p;
>
>
>  opt_result dr_analyze_innermost (innermost_loop_behavior *, tree,
> -                                class loop *, const gimple *);
> +                                class loop *, const gimple *, bool = false);
>  extern bool compute_data_dependences_for_loop (class loop *, bool,
>                                                vec<loop_p> *,
>                                                vec<data_reference_p> *,
> @@ -539,12 +539,14 @@ extern void free_data_ref (data_reference_p);
>  extern void free_data_refs (vec<data_reference_p> );
>  extern opt_result find_data_references_in_stmt (class loop *, gimple *,
>                                                 vec<data_reference_p> *);
> +class oacc_context;
>  extern bool graphite_find_data_references_in_stmt (edge, loop_p, gimple *,
> -                                                  vec<data_reference_p> *);
> +                                                  vec<data_reference_p> *,
> +                                                  oacc_context *, bool);
>  tree find_data_references_in_loop (class loop *, vec<data_reference_p> *);
>  bool loop_nest_has_data_refs (loop_p loop);
>  struct data_reference *create_data_ref (edge, loop_p, tree, gimple *, bool,
> -                                       bool);
> +                                       bool, bool = false);
>  extern bool find_loop_nest (class loop *, vec<loop_p> *);
>  extern struct data_dependence_relation *initialize_data_dependence_relation
>       (struct data_reference *, struct data_reference *, vec<loop_p>);
> diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
> index 888af48946f..1ac27569a03 100644
> --- a/gcc/tree-loop-distribution.c
> +++ b/gcc/tree-loop-distribution.c
> @@ -2572,15 +2572,24 @@ latch_dominated_by_data_ref (class loop *loop, 
> data_reference *dr)
>  /* Compute alias check pairs and store them in COMP_ALIAS_PAIRS for LOOP's
>     data dependence relations ALIAS_DDRS.  */
>
> -static void
> +void
>  compute_alias_check_pairs (class loop *loop, vec<ddr_p> *alias_ddrs,
>                            vec<dr_with_seg_len_pair_t> *comp_alias_pairs)
>  {
>    unsigned int i;
>    unsigned HOST_WIDE_INT factor = 1;
> -  tree niters_plus_one, niters = number_of_latch_executions (loop);
> +  tree niters_plus_one, niters;
>
> +  if (loop->num == 0)
> +    {
> +      /* Loop 0 is not at real loop and hence it has no niter information.
> +        It executes once. */
> +      niters = build_int_cst (integer_type_node, 1);
> +    }
> +  else
> +     niters = number_of_latch_executions (loop);
>    gcc_assert (niters != NULL_TREE && niters != chrec_dont_know);
> +
>    niters = fold_convert (sizetype, niters);
>    niters_plus_one = size_binop (PLUS_EXPR, niters, size_one_node);
>
> @@ -2595,12 +2604,12 @@ compute_alias_check_pairs (class loop *loop, 
> vec<ddr_p> *alias_ddrs,
>        struct data_reference *dr_b = DDR_B (ddr);
>        tree seg_length_a, seg_length_b;
>
> -      if (latch_dominated_by_data_ref (loop, dr_a))
> +      if (loop->num != 0 && latch_dominated_by_data_ref (loop, dr_a))
>         seg_length_a = data_ref_segment_size (dr_a, niters_plus_one);
>        else
>         seg_length_a = data_ref_segment_size (dr_a, niters);
>
> -      if (latch_dominated_by_data_ref (loop, dr_b))
> +      if (loop->num != 0 && latch_dominated_by_data_ref (loop, dr_b))
>         seg_length_b = data_ref_segment_size (dr_b, niters_plus_one);
>        else
>         seg_length_b = data_ref_segment_size (dr_b, niters);
> diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
> index edab778277b..466aa65d899 100644
> --- a/gcc/tree-scalar-evolution.c
> +++ b/gcc/tree-scalar-evolution.c
> @@ -264,6 +264,8 @@ along with GCC; see the file COPYING3.  If not see
>  #include "gimple.h"
>  #include "ssa.h"
>  #include "gimple-pretty-print.h"
> +#include "tree-pretty-print.h"
> +#include "print-tree.h"
>  #include "fold-const.h"
>  #include "gimplify.h"
>  #include "gimple-iterator.h"
> @@ -276,6 +278,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-ssa.h"
>  #include "cfgloop.h"
>  #include "tree-chrec.h"
> +#include "tree-chrec-oacc.h"
>  #include "tree-affine.h"
>  #include "tree-scalar-evolution.h"
>  #include "dumpfile.h"
> @@ -284,6 +287,8 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-into-ssa.h"
>  #include "builtins.h"
>  #include "case-cfn-macros.h"
> +#include "omp-offload.h"
> +#include "internal-fn.h"
>
>  static tree analyze_scalar_evolution_1 (class loop *, tree);
>  static tree analyze_scalar_evolution_for_address_of (class loop *loop,
> @@ -550,11 +555,32 @@ get_scalar_evolution (basic_block instantiated_below, 
> tree scalar)
>      switch (TREE_CODE (scalar))
>        {
>        case SSA_NAME:
> +       {
>          if (SSA_NAME_IS_DEFAULT_DEF (scalar))
>           res = scalar;
>         else
> -         res = *find_var_scev_info (instantiated_below, scalar);
> +         {
> +           // TODO-kernels Should no longer be necessary, cf. 
> oacc_set_arg_evolutions
> +           res = *find_var_scev_info (instantiated_below, scalar);
> +           if (res)
> +             break;
> +
> +           tree name = SSA_NAME_IDENTIFIER (scalar);
> +
> +           if (name)
> +             {
> +               const char* id = IDENTIFIER_POINTER (name);
> +               if (strncmp (id, ".bound", 6) == 0
> +                   || strncmp (id, ".offset", 7) == 0
> +                   || strncmp (id, ".chunk_max", 11) == 0
> +                   || strncmp (id, ".chunk_no", 10) == 0
> +                   || strncmp (id, ".step", 5) == 0)
> +                 res = scalar;
> +             }
> +
> +         }
>         break;
> +       }
>
>        case REAL_CST:
>        case FIXED_CST:
> @@ -1115,6 +1141,7 @@ follow_ssa_edge_inner_loop_phi (class loop *outer_loop,
>    return follow_ssa_edge_expr (outer_loop, loop_phi_node, ev, halting_phi,
>                                evolution_of_loop, limit);
>  }
> +tree interpret_gimple_call (class loop *loop, gimple *call);
>
>  /* Follow the ssa edge into the expression EXPR.
>     Return true if the strongly connected component has been found.  */
> @@ -1125,7 +1152,9 @@ follow_ssa_edge_expr (class loop *loop, gimple 
> *at_stmt, tree expr,
>                       int limit)
>  {
>    enum tree_code code;
> -  tree type, rhs0, rhs1 = NULL_TREE;
> +  tree type = NULL_TREE;
> +  tree rhs0 = NULL_TREE;
> +  tree rhs1 = NULL_TREE;
>
>    /* The EXPR is one of the following cases:
>       - an SSA_NAME,
> @@ -1189,26 +1218,36 @@ tail_recurse:
>
>        /* At this level of abstraction, the program is just a set
>          of GIMPLE_ASSIGNs and PHI_NODEs.  In principle there is no
> -        other def to be handled.  */
> -      if (!is_gimple_assign (def))
> -       return t_false;
> -
> -      code = gimple_assign_rhs_code (def);
> -      switch (get_gimple_rhs_class (code))
> +        other def to be handled except for OpenACC internal function calls.
> +      */
> +      if (is_gimple_assign (def)) {
> +       code = gimple_assign_rhs_code (def);
> +       switch (get_gimple_rhs_class (code))
> +         {
> +         case GIMPLE_BINARY_RHS:
> +           rhs0 = gimple_assign_rhs1 (def);
> +           rhs1 = gimple_assign_rhs2 (def);
> +           break;
> +         case GIMPLE_UNARY_RHS:
> +         case GIMPLE_SINGLE_RHS:
> +           rhs0 = gimple_assign_rhs1 (def);
> +           break;
> +         default:
> +           return t_false;
> +         }
> +       type = TREE_TYPE (gimple_assign_lhs (def));
> +       at_stmt = def;
> +      }
> +      else if (is_oacc_ifn_call_def (expr)) {
> +       rhs0 = interpret_gimple_call (loop, def);
> +       type = TREE_TYPE (gimple_call_lhs (def));
> +       at_stmt = def;
> +      }
> +      else
>         {
> -       case GIMPLE_BINARY_RHS:
> -         rhs0 = gimple_assign_rhs1 (def);
> -         rhs1 = gimple_assign_rhs2 (def);
> -         break;
> -       case GIMPLE_UNARY_RHS:
> -       case GIMPLE_SINGLE_RHS:
> -         rhs0 = gimple_assign_rhs1 (def);
> -         break;
> -       default:
>           return t_false;
>         }
> -      type = TREE_TYPE (gimple_assign_lhs (def));
> -      at_stmt = def;
> +
>      }
>    else
>      {
> @@ -1920,7 +1959,75 @@ interpret_gimple_assign (class loop *loop, gimple 
> *stmt)
>                              gimple_assign_rhs2 (stmt));
>  }
>
> -
> +/* Extract loop information from a OpenACC internal function call. */
> +tree
> +oacc_ifn_call_extract (gimple *call) {
> +  gcc_assert (gimple_call_internal_p (call, IFN_GOACC_LOOP));
> +
> +  enum ifn_goacc_loop_kind code
> +    = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 
> 0));
> +
> +  tree expr;
> +  switch (code)
> +    {
> +    case IFN_GOACC_LOOP_STEP:
> +      {
> +       expr = gimple_call_arg (call, 3);
> +       break;
> +      }
> +    case IFN_GOACC_LOOP_CHUNKS:
> +      {
> +       expr = gimple_call_arg (call, 4);
> +       break;
> +      }
> +    case IFN_GOACC_LOOP_OFFSET:
> +      {
> +       expr = gimple_call_arg (call, 6);
> +       break;
> +      }
> +    case IFN_GOACC_LOOP_BOUND:
> +      {
> +       expr = gimple_call_arg (call, 2);
> +       break;
> +      }
> +    default:
> +      gcc_unreachable();
> +    }
> +
> +  gcc_assert (scev_is_linear_expression (expr));
> +  return expr;
> +}
> +
> +/* Interpret a gimple call statement. */
> +tree
> +interpret_gimple_call (class loop *loop, gimple *call)
> +{
> +  /* Only IFN_GOACC_LOOP calls are handled here.
> +     SCEV computation for those calls is only really relevant
> +     for Graphite's execution on OpenACC functions in the host
> +     compiler. */
> +
> +#ifndef ACCEL_COMPILER
> +  if (!gimple_call_internal_p (call, IFN_GOACC_LOOP))
> +    return chrec_dont_know;
> +#else
> +  return chrec_dont_know;
> +#endif
> +
> +  /* Information about OpenACC loops is encoded in internal function calls.
> +     Extract loop information from those calls, but ignore other calls. */
> +  if (!gimple_call_internal_p (call, IFN_GOACC_LOOP))
> +    return chrec_dont_know;
> +
> +  tree expr = oacc_ifn_call_extract (call);
> +  tree analyzed = analyze_scalar_evolution (loop, expr);
> +  gcc_checking_assert (expr == analyzed);
> +
> +  tree lhs = gimple_call_lhs (call);
> +  gcc_assert (lhs);
> +
> +  return chrec_convert (TREE_TYPE (lhs), analyzed, call);
> +}
>
>  /* This section contains all the entry points:
>     - number_of_iterations_in_loop,
> @@ -1969,6 +2076,10 @@ analyze_scalar_evolution_1 (class loop *loop, tree var)
>        res = interpret_gimple_assign (loop, def);
>        break;
>
> +    case GIMPLE_CALL:
> +      res = interpret_gimple_call (loop, def);
> +      break;
> +
>      case GIMPLE_PHI:
>        if (loop_phi_node_p (def))
>         res = interpret_loop_phi (loop, as_a <gphi *> (def));
> @@ -2049,6 +2160,91 @@ analyze_scalar_evolution (class loop *loop, tree var)
>    return res;
>  }
>
> +/* Check if VAR represents a parameter of an OpenACC region in an
> +   offloaded function.  That is, check that VAR's defining statement
> +   has the shape:
> +
> +   VAR2 = *.omp_data_i(D).field
> +   VAR = *_VAR2
> + */
> +static bool
> +is_oacc_arg (tree var)
> +{
> +  gimple* def =  SSA_NAME_DEF_STMT (var);
> +
> +  if (!def || !is_gimple_assign (def))
> +    return false;
> +
> +  tree rhs = gimple_assign_rhs1 (def);
> +
> +  if (TREE_CODE (rhs) != MEM_REF)
> +    return false;
> +
> +  tree ref = TREE_OPERAND (rhs, 0);
> +
> +  if (TREE_CODE (ref) != SSA_NAME)
> +    return false;
> +
> +  gimple* ref_def = SSA_NAME_DEF_STMT (ref);
> +
> +  if (!ref_def || !is_gimple_assign (ref_def))
> +    return false;
> +
> +  rhs = gimple_assign_rhs1 (ref_def);
> +  if (TREE_CODE (rhs) != COMPONENT_REF)
> +    return false;
> +
> +  tree base_ref = TREE_OPERAND (rhs, 0);
> +
> +  if (TREE_CODE (base_ref) != MEM_REF)
> +    return false;
> +
> +  tree base = TREE_OPERAND (base_ref, 0);
> +
> +  if (!SSA_NAME_IDENTIFIER (base))
> +    return false;
> +
> +  char* base_id =
> +    const_cast<char*>(IDENTIFIER_POINTER (SSA_NAME_IDENTIFIER (base)));
> +
> +  if (strncmp (base_id, ".omp_data_i", 11))
> +    return false;
> +
> +  return true;
> +}
> +
> +/* Search for SSA_NAMEs which represent parameters of an offloaded
> +   OpenACC region and set their SCEV values to a parametric chrec
> +   containing the variable itself.
> +
> +   We do not have a way to perform scalar evolution on the function
> +   from which an OpenACC outlined function was extracted while
> +   executing on the outlined function. Analysing those SSA_NAMEs
> +   would lead to chrec_dont_know because of the pointer indirection
> +   introduced by the outlining. We are better of treating the
> +   names as parameters. */
> +
> +/* TODO Come up with a way to determine the scalar evolution
> +   in the original function */
> +
> +void
> +oacc_set_arg_evolutions () {
> +  unsigned i;
> +  tree var;
> +
> +  FOR_EACH_SSA_NAME (i, var, cfun)
> +    {
> +      if (! is_oacc_arg (var))
> +       continue;
> +
> +      basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
> +
> +      tree *chrec = find_var_scev_info (bb, var);
> +      *chrec = var;
> +    }
> +
> +}
> +
>  /* Analyzes and returns the scalar evolution of VAR address in LOOP.  */
>
>  static tree
> @@ -2261,6 +2457,15 @@ instantiate_scev_name (edge instantiate_below,
>    class loop *def_loop;
>    basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (chrec));
>
> +  /* Do not instantiate names which dereference ".omp_data_i" field.
> +     Cf. oacc_set_arg_evolutions. */
> +  if (is_oacc_arg (chrec))
> +      return chrec;
> +
> +  if (is_oacc_loop_ifn_call_def (chrec))
> +    return interpret_gimple_call (evolution_loop, SSA_NAME_DEF_STMT (chrec));
> +
> +
>    /* A parameter, nothing to do.  */
>    if (!def_bb
>        || !dominated_by_p (CDI_DOMINATORS, def_bb, instantiate_below->dest))
> @@ -3221,10 +3426,14 @@ simple_iv_with_niters (class loop *wrto_loop, class 
> loop *use_loop,
>        || chrec_contains_symbols_defined_in_loop (ev, wrto_loop->num))
>      return false;
>
> +  tree ev_type = TREE_TYPE (ev);
> +  if (is_oacc_loop_ifn_call_def (ev))
> +    type = integer_type_node;
> +
>    if (tree_does_not_contain_chrecs (ev))
>      {
>        iv->base = ev;
> -      iv->step = build_int_cst (TREE_TYPE (ev), 0);
> +      iv->step = build_int_cst (ev_type, 0);
>        iv->no_overflow = true;
>        return true;
>      }
> @@ -3240,6 +3449,9 @@ simple_iv_with_niters (class loop *wrto_loop, class 
> loop *use_loop,
>      return false;
>
>    iv->step = CHREC_RIGHT (ev);
> +  if (is_oacc_loop_ifn_call_def (iv->step))
> +    iv->step = interpret_gimple_call (use_loop, SSA_NAME_DEF_STMT 
> (iv->step));
> +
>    if ((!allow_nonconstant_step && TREE_CODE (iv->step) != INTEGER_CST)
>        || tree_contains_chrecs (iv->step, NULL))
>      return false;
> @@ -3385,6 +3597,9 @@ expression_expensive_p (tree expr, hash_map<tree, 
> uint64_t> &cache,
>         return true;
>      }
>
> +  if (is_oacc_ifn_call_def (expr))
> +      return false;
> +
>    bool visited_p;
>    uint64_t &local_cost = cache.get_or_insert (expr, &visited_p);
>    if (visited_p)
> diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c
> index 6c1268e84ad..0f3d7ce3e76 100644
> --- a/gcc/tree-ssa-loop-ivcanon.c
> +++ b/gcc/tree-ssa-loop-ivcanon.c
> @@ -64,6 +64,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "builtins.h"
>  #include "tree-ssa-sccvn.h"
>  #include "dbgcnt.h"
> +#include "omp-general.h"
>
>  /* Specifies types of loops that may be unrolled.  */
>
> @@ -1256,7 +1257,13 @@ canonicalize_loop_induction_variables (class loop 
> *loop,
>       populates the loop bounds.  */
>    modified |= remove_redundant_iv_tests (loop);
>
> -  if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul,
> +  /* Skip unrolling on OpenACC outlined functions. Those functions
> +     contain loops (e.g. the top loop for a region) that never iterate
> +     and that should not be removed. */
> +
> +  bool skip_unrolling = oacc_get_fn_attrib (cfun->decl);
> +  if (!skip_unrolling &&
> +      try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul,
>                                   maxiter, locus, allow_peel))
>      return true;
>
> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
> index 7d61ef080eb..c54a0277670 100644
> --- a/gcc/tree-ssa-loop-niter.c
> +++ b/gcc/tree-ssa-loop-niter.c
> @@ -40,6 +40,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-ssa-loop.h"
>  #include "cfgloop.h"
>  #include "tree-chrec.h"
> +#include "tree-chrec-oacc.h"
>  #include "tree-scalar-evolution.h"
>  #include "tree-dfa.h"
>
> @@ -1980,6 +1981,9 @@ simplify_replace_tree (tree expr, tree old, tree 
> new_tree,
>    return (ret ? (do_fold ? fold (ret) : ret) : expr);
>  }
>
> +tree oacc_ifn_call_extract (gimple*);
> +tree interpret_gimple_call (class loop *loop, gimple *call);
> +
>  /* Expand definitions of ssa names in EXPR as long as they are simple
>     enough, and return the new expression.  If STOP is specified, stop
>     expanding if EXPR equals to it.  */
> @@ -1995,6 +1999,12 @@ expand_simple_operations (tree expr, tree stop, 
> hash_map<tree, tree> &cache)
>    if (expr == NULL_TREE)
>      return expr;
>
> +  if (is_oacc_ifn_call_def (expr))
> +    {
> +      //expr = oacc_ifn_call_extract (SSA_NAME_DEF_STMT (expr));
> +      expr = interpret_gimple_call (NULL, SSA_NAME_DEF_STMT (expr));
> +    }
> +
>    if (is_gimple_min_invariant (expr))
>      return expr;
>
> @@ -2465,6 +2475,9 @@ number_of_iterations_exit_assumptions (class loop 
> *loop, edge exit,
>    if (iv0_niters && iv1_niters)
>      return false;
>
> +  type = TREE_TYPE (iv0.step);
> +
> +
>    /* We don't want to see undefined signed overflow warnings while
>       computing the number of iterations.  */
>    fold_defer_overflow_warnings ();
> --
> 2.17.1
>
> -----------------
> Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
> Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, 
> Alexander Walter

Reply via email to