On 09/11/15 16:35, Tom de Vries wrote:
Hi,
this patch series for stage1 trunk adds support to:
- parallelize oacc kernels regions using parloops, and
- map the loops onto the oacc gang dimension.
The patch series contains these patches:
1 Insert new exit block only when needed in
transform_to_exit_first_loop_alt
2 Make create_parallel_loop return void
3 Ignore reduction clause on kernels directive
4 Implement -foffload-alias
5 Add in_oacc_kernels_region in struct loop
6 Add pass_oacc_kernels
7 Add pass_dominator_oacc_kernels
8 Add pass_ch_oacc_kernels
9 Add pass_parallelize_loops_oacc_kernels
10 Add pass_oacc_kernels pass group in passes.def
11 Update testcases after adding kernels pass group
12 Handle acc loop directive
13 Add c-c++-common/goacc/kernels-*.c
14 Add gfortran.dg/goacc/kernels-*.f95
15 Add libgomp.oacc-c-c++-common/kernels-*.c
16 Add libgomp.oacc-fortran/kernels-*.f95
The first 9 patches are more or less independent, but patches 10-16 are
intended to be committed at the same time.
Bootstrapped and reg-tested on x86_64.
Build and reg-tested with nvidia accelerator, in combination with a
patch that enables accelerator testing (which is submitted at
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg01771.html ).
I'll post the individual patches in reply to this message.
this patch adds a pass pass_ch_oacc_kernels, which is like pass_ch, but
only runs for loops with oacc_kernels_region set.
[ But... thinking about it a bit more, I think that we could use a
regular pass_ch instead. We only use the kernels pass group for a single
loop nest in a kernels region, and we mark all the loops in the loop
nest with oacc_kernels_region. So I think that the oacc_kernels_region
test in pass_ch_oacc_kernels::process_loop_p evaluates to true. ]
So, I'll try to confirm with retesting that we can drop this patch.
Thanks,
- Tom
Add pass_ch_oacc_kernels
2015-11-09 Tom de Vries <t...@codesourcery.com>
* tree-pass.h (make_pass_ch_oacc_kernels): Declare.
* tree-ssa-loop-ch.c (pass_ch::pass_ch (pass_data, gcc::context)): New
constructor.
(pass_data_ch_oacc_kernels): New pass_data.
(class pass_ch_oacc_kernels): New pass.
(pass_ch_oacc_kernels::process_loop_p): New function.
(make_pass_ch_oacc_kernels): New function.
---
gcc/tree-pass.h | 1 +
gcc/tree-ssa-loop-ch.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 54 insertions(+), 1 deletion(-)
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 2825aea..f95a820 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -389,6 +389,7 @@ extern gimple_opt_pass *make_pass_iv_optimize (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_tree_loop_done (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_ch (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_ch_vect (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_ch_oacc_kernels (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_ccp (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_phi_only_cprop (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_build_ssa (gcc::context *ctxt);
diff --git a/gcc/tree-ssa-loop-ch.c b/gcc/tree-ssa-loop-ch.c
index 7e618bf..8bf47fe 100644
--- a/gcc/tree-ssa-loop-ch.c
+++ b/gcc/tree-ssa-loop-ch.c
@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-inline.h"
#include "tree-ssa-scopedtables.h"
#include "tree-ssa-threadedge.h"
+#include "omp-low.h"
/* Duplicates headers of loops if they are small enough, so that the statements
in the loop body are always executed when the loop is entered. This
@@ -124,7 +125,7 @@ do_while_loop_p (struct loop *loop)
namespace {
-/* Common superclass for both header-copying phases. */
+/* Common superclass for header-copying phases. */
class ch_base : public gimple_opt_pass
{
protected:
@@ -159,6 +160,10 @@ public:
: ch_base (pass_data_ch, ctxt)
{}
+ pass_ch (pass_data data, gcc::context *ctxt)
+ : ch_base (data, ctxt)
+ {}
+
/* opt_pass methods: */
virtual bool gate (function *) { return flag_tree_ch != 0; }
@@ -414,3 +419,50 @@ make_pass_ch (gcc::context *ctxt)
{
return new pass_ch (ctxt);
}
+
+namespace {
+
+const pass_data pass_data_ch_oacc_kernels =
+{
+ GIMPLE_PASS, /* type */
+ "ch_oacc_kernels", /* name */
+ OPTGROUP_LOOP, /* optinfo_flags */
+ TV_TREE_CH, /* tv_id */
+ ( PROP_cfg | PROP_ssa ), /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_cleanup_cfg, /* todo_flags_finish */
+};
+
+class pass_ch_oacc_kernels : public pass_ch
+{
+public:
+ pass_ch_oacc_kernels (gcc::context *ctxt)
+ : pass_ch (pass_data_ch_oacc_kernels, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *) { return true; }
+
+protected:
+ /* ch_base method: */
+ virtual bool process_loop_p (struct loop *loop);
+}; // class pass_ch_oacc_kernels
+
+} // anon namespace
+
+bool
+pass_ch_oacc_kernels::process_loop_p (struct loop *loop)
+{
+ if (!loop->in_oacc_kernels_region)
+ return false;
+
+ return pass_ch::process_loop_p (loop);
+}
+
+gimple_opt_pass *
+make_pass_ch_oacc_kernels (gcc::context *ctxt)
+{
+ return new pass_ch_oacc_kernels (ctxt);
+}
--
1.9.1