On Tue, 21 Apr 2015, Thomas Schwinge wrote: > Hi! > > On Tue, 25 Nov 2014 12:27:34 +0100, Tom de Vries <tom_devr...@mentor.com> > wrote: > > On 15-11-14 18:21, Tom de Vries wrote: > > > On 15-11-14 13:14, Tom de Vries wrote: > > >> Hi, > > >> > > >> I'm submitting a patch series with initial support for the oacc kernels > > >> directive. > > >> > > >> The patch series uses pass_parallelize_loops to implement > > >> parallelization of > > >> loops in the oacc kernels region. > > >> > > >> The patch series consists of these 8 patches: > > >> ... > > >> 1 Expand oacc kernels after pass_build_ealias > > >> 2 Add pass_oacc_kernels > > >> 3 Add pass_ch_oacc_kernels to pass_oacc_kernels > > >> 4 Add pass_tree_loop_{init,done} to pass_oacc_kernels > > >> 5 Add pass_loop_im to pass_oacc_kernels > > >> 6 Add pass_ccp to pass_oacc_kernels > > >> 7 Add pass_parloops_oacc_kernels to pass_oacc_kernels > > >> 8 Do simple omp lowering for no address taken var > > >> ... > > > > > > This patch adds a pass_ch_oacc_kernels to the pass group > > > pass_oacc_kernels. > > > > > > The idea is that pass_parallelize_loops only deals with loops for which > > > the > > > header has been copied, so the easiest way to meet that requirement when > > > running > > > pass_parallelize_loops in group pass_oacc_kernels, is to run pass_ch as a > > > part > > > of pass_oacc_kernels. > > > > > > We define a seperate pass pass_ch_oacc_kernels, to leave all loops that > > > aren't > > > part of a kernels region alone. > > > > > > > Updated for moving pass_oacc_kernels down past pass_fre in the pass list. > > > > Bootstrapped and reg-tested as before. > > > > OK for trunk? > > Committed to gomp-4_0-branch in r222281: > > commit 58c33a7965c379b55b549d50e3b79b2252bcc876 > Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4> > Date: Tue Apr 21 19:48:16 2015 +0000 > > Add pass_ch_oacc_kernels to pass_oacc_kernels > > gcc/ > * omp-low.c (loop_in_oacc_kernels_region_p): New function. > * omp-low.h (loop_in_oacc_kernels_region_p): Declare. > * passes.def: Add pass_ch_oacc_kernels to pass group pass_oacc_kernels. > * tree-pass.h (make_pass_ch_oacc_kernels): Declare > * tree-ssa-loop-ch.c: Include omp-low.h. > (pass_ch_execute): Declare. > (pass_ch::execute): Factor out ... > (pass_ch_execute): ... this new function. If handling oacc kernels, > skip loops that are not in oacc kernels region. > (pass_ch_oacc_kernels::execute): > (pass_data_ch_oacc_kernels): New pass_data. > (class pass_ch_oacc_kernels): New pass. > (pass_ch_oacc_kernels::execute, make_pass_ch_oacc_kernels): New > function. > > git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@222281 > 138bc75d-0d04-0410-961f-82ee72b054a4 > --- > gcc/ChangeLog.gomp | 15 ++++++++ > gcc/omp-low.c | 91 > ++++++++++++++++++++++++++++++++++++++++++++++++ > gcc/omp-low.h | 2 ++ > gcc/passes.def | 1 + > gcc/tree-pass.h | 1 + > gcc/tree-ssa-loop-ch.c | 59 +++++++++++++++++++++++++++++-- > 6 files changed, 167 insertions(+), 2 deletions(-) > > diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp > index 8a53ad8..d00c5e0 100644 > --- gcc/ChangeLog.gomp > +++ gcc/ChangeLog.gomp > @@ -1,5 +1,20 @@ > 2015-04-21 Tom de Vries <t...@codesourcery.com> > > + * omp-low.c (loop_in_oacc_kernels_region_p): New function. > + * omp-low.h (loop_in_oacc_kernels_region_p): Declare. > + * passes.def: Add pass_ch_oacc_kernels to pass group pass_oacc_kernels. > + * tree-pass.h (make_pass_ch_oacc_kernels): Declare > + * tree-ssa-loop-ch.c: Include omp-low.h. > + (pass_ch_execute): Declare. > + (pass_ch::execute): Factor out ... > + (pass_ch_execute): ... this new function. If handling oacc kernels, > + skip loops that are not in oacc kernels region. > + (pass_ch_oacc_kernels::execute): > + (pass_data_ch_oacc_kernels): New pass_data. > + (class pass_ch_oacc_kernels): New pass. > + (pass_ch_oacc_kernels::execute, make_pass_ch_oacc_kernels): New > + function. > + > * passes.def: Add pass group pass_oacc_kernels. > * tree-pass.h (make_pass_oacc_kernels): Declare. > * tree-ssa-loop.c (gate_oacc_kernels): New static function. > diff --git gcc/omp-low.c gcc/omp-low.c > index 16d9a5e..1b03ae6 100644 > --- gcc/omp-low.c > +++ gcc/omp-low.c > @@ -13920,4 +13920,95 @@ gimple_stmt_omp_data_i_init_p (gimple stmt) > SSA_OP_DEF); > } > > +/* Return true if LOOP is inside a kernels region. */ > + > +bool > +loop_in_oacc_kernels_region_p (struct loop *loop, basic_block *region_entry, > + basic_block *region_exit)
Ehm. So why not simply add a flag to struct loop instead and set it during OMP region parsing/lowering? It's also very odd that you disable transforms on OMP regions but at the same time do all the OMP processing _after_ those transforms. Something feels backward here. Richard. > +{ > + bitmap excludes_bitmap = BITMAP_GGC_ALLOC (); > + bitmap region_bitmap = BITMAP_GGC_ALLOC (); > + bitmap_clear (region_bitmap); > + > + if (region_entry != NULL) > + *region_entry = NULL; > + if (region_exit != NULL) > + *region_exit = NULL; > + > + basic_block bb; > + gimple last; > + FOR_EACH_BB_FN (bb, cfun) > + { > + if (bitmap_bit_p (region_bitmap, bb->index)) > + continue; > + > + last = last_stmt (bb); > + if (!last) > + continue; > + > + if (gimple_code (last) != GIMPLE_OMP_TARGET > + || (gimple_omp_target_kind (last) != GF_OMP_TARGET_KIND_OACC_KERNELS)) > + continue; > + > + bitmap_clear (excludes_bitmap); > + bitmap_set_bit (excludes_bitmap, bb->index); > + > + vec<basic_block> dominated > + = get_all_dominated_blocks (CDI_DOMINATORS, bb); > + > + unsigned di; > + basic_block dom; > + > + basic_block end_region = NULL; > + FOR_EACH_VEC_ELT (dominated, di, dom) > + { > + if (dom == bb) > + continue; > + > + last = last_stmt (dom); > + if (!last) > + continue; > + > + if (gimple_code (last) != GIMPLE_OMP_RETURN) > + continue; > + > + if (end_region == NULL > + || dominated_by_p (CDI_DOMINATORS, end_region, dom)) > + end_region = dom; > + } > + > + if (end_region == NULL) > + { > + gimple kernels = last_stmt (bb); > + fatal_error (gimple_location (kernels), > + "End of kernel region unreachable"); > + } > + > + vec<basic_block> excludes > + = get_all_dominated_blocks (CDI_DOMINATORS, end_region); > + > + unsigned di2; > + basic_block exclude; > + > + FOR_EACH_VEC_ELT (excludes, di2, exclude) > + if (exclude != end_region) > + bitmap_set_bit (excludes_bitmap, exclude->index); > + > + FOR_EACH_VEC_ELT (dominated, di, dom) > + if (!bitmap_bit_p (excludes_bitmap, dom->index)) > + bitmap_set_bit (region_bitmap, dom->index); > + > + if (bitmap_bit_p (region_bitmap, loop->header->index)) > + { > + if (region_entry != NULL) > + *region_entry = bb; > + if (region_exit != NULL) > + *region_exit = end_region; > + return true; > + } > + } > + > + return false; > +} > + > #include "gt-omp-low.h" > diff --git gcc/omp-low.h gcc/omp-low.h > index 3d30c3b..ae63c9f 100644 > --- gcc/omp-low.h > +++ gcc/omp-low.h > @@ -29,6 +29,8 @@ extern tree omp_reduction_init (tree, tree); > extern bool make_gimple_omp_edges (basic_block, struct omp_region **, int *); > extern void omp_finish_file (void); > extern bool gimple_stmt_omp_data_i_init_p (gimple); > +extern bool loop_in_oacc_kernels_region_p (struct loop *, basic_block *, > + basic_block *); > > extern GTY(()) vec<tree, va_gc> *offload_funcs; > extern GTY(()) vec<tree, va_gc> *offload_vars; > diff --git gcc/passes.def gcc/passes.def > index 854c5b8..5cdbc87 100644 > --- gcc/passes.def > +++ gcc/passes.def > @@ -90,6 +90,7 @@ along with GCC; see the file COPYING3. If not see > function. */ > NEXT_PASS (pass_oacc_kernels); > PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels) > + NEXT_PASS (pass_ch_oacc_kernels); > NEXT_PASS (pass_expand_omp_ssa); > POP_INSERT_PASSES () > NEXT_PASS (pass_merge_phi); > diff --git gcc/tree-pass.h gcc/tree-pass.h > index 35778f2..321229a 100644 > --- gcc/tree-pass.h > +++ gcc/tree-pass.h > @@ -379,6 +379,7 @@ extern gimple_opt_pass *make_pass_loop_prefetch > (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_iv_optimize (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_tree_loop_done (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_ch (gcc::context *ctxt); > +extern gimple_opt_pass *make_pass_ch_oacc_kernels (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_ccp (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_phi_only_cprop (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_build_ssa (gcc::context *ctxt); > diff --git gcc/tree-ssa-loop-ch.c gcc/tree-ssa-loop-ch.c > index d759de7..5f24bcb 100644 > --- gcc/tree-ssa-loop-ch.c > +++ gcc/tree-ssa-loop-ch.c > @@ -54,12 +54,15 @@ along with GCC; see the file COPYING3. If not see > #include "tree-inline.h" > #include "flags.h" > #include "tree-ssa-threadedge.h" > +#include "omp-low.h" > > /* Duplicates headers of loops if they are small enough, so that the > statements > in the loop body are always executed when the loop is entered. This > increases effectiveness of code motion optimizations, and reduces the need > for loop preconditioning. */ > > +static unsigned int pass_ch_execute (function *, bool); > + > /* Check whether we should duplicate HEADER of LOOP. At most *LIMIT > instructions should be duplicated, limit is decreased by the actual > amount. */ > @@ -178,6 +181,14 @@ public: > unsigned int > pass_ch::execute (function *fun) > { > + return pass_ch_execute (fun, false); > +} > + > +} // anon namespace > + > +static unsigned int > +pass_ch_execute (function *fun, bool oacc_kernels_p) > +{ > struct loop *loop; > basic_block header; > edge exit, entry; > @@ -211,6 +222,10 @@ pass_ch::execute (function *fun) > if (do_while_loop_p (loop)) > continue; > > + if (oacc_kernels_p > + && !loop_in_oacc_kernels_region_p (loop, NULL, NULL)) > + continue; > + > /* Iterate the header copying up to limit; this takes care of the cases > like while (a && b) {...}, where we want to have both of the conditions > copied. TODO -- handle while (a || b) - like cases, by not requiring > @@ -301,10 +316,50 @@ pass_ch::execute (function *fun) > return 0; > } > > -} // anon namespace > - > gimple_opt_pass * > make_pass_ch (gcc::context *ctxt) > { > return new pass_ch (ctxt); > } > + > +namespace { > + > +const pass_data pass_data_ch_oacc_kernels = > +{ > + GIMPLE_PASS, /* type */ > + "ch_oacc_kernels", /* name */ > + OPTGROUP_LOOP, /* optinfo_flags */ > + TV_TREE_CH, /* tv_id */ > + ( PROP_cfg | PROP_ssa ), /* properties_required */ > + 0, /* properties_provided */ > + 0, /* properties_destroyed */ > + 0, /* todo_flags_start */ > + TODO_cleanup_cfg, /* todo_flags_finish */ > +}; > + > + class pass_ch_oacc_kernels : public gimple_opt_pass > +{ > +public: > + pass_ch_oacc_kernels (gcc::context *ctxt) > + : gimple_opt_pass (pass_data_ch_oacc_kernels, ctxt) > + {} > + > + /* opt_pass methods: */ > + virtual bool gate (function *) { return true; } > + virtual unsigned int execute (function *); > + > +}; // class pass_ch_oacc_kernels > + > +unsigned int > +pass_ch_oacc_kernels::execute (function *fun) > +{ > + return pass_ch_execute (fun, true); > +} > + > +} // anon namespace > + > +gimple_opt_pass * > +make_pass_ch_oacc_kernels (gcc::context *ctxt) > +{ > + return new pass_ch_oacc_kernels (ctxt); > +} > > > Grüße, > Thomas > -- Richard Biener <rguent...@suse.de> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Jennifer Guild, Dilip Upmanyu, Graham Norton HRB 21284 (AG Nuernberg)