On 15-11-14 13:14, Tom de Vries wrote:
Hi,

I'm submitting a patch series with initial support for the oacc kernels 
directive.

The patch series uses pass_parallelize_loops to implement parallelization of
loops in the oacc kernels region.

The patch series consists of these 8 patches:
...
     1  Expand oacc kernels after pass_build_ealias
     2  Add pass_oacc_kernels
     3  Add pass_ch_oacc_kernels to pass_oacc_kernels
     4  Add pass_tree_loop_{init,done} to pass_oacc_kernels
     5  Add pass_loop_im to pass_oacc_kernels
     6  Add pass_ccp to pass_oacc_kernels
     7  Add pass_parloops_oacc_kernels to pass_oacc_kernels
     8  Do simple omp lowering for no address taken var
...

This patch lowers integer variables that do not have their address taken as local variable. We use a copy at region entry and exit to copy the value in and out.

In the context of reduction handling in a kernels region, this allows the parloops reduction analysis to recognize the reduction, even after oacc lowering has been done in pass_lower_omp.

In more detail, without this patch, the omp_data_i load and stores are generated in place (in this case, in the loop):
...
                {
                  .omp_data_iD.2201 = &.omp_data_arr.15D.2220;
                  {
                    unsigned intD.9 iD.2146;

                    iD.2146 = 0;
                    goto <D.2207>;
                    <D.2208>:
                    D.2216 = .omp_data_iD.2201->cD.2203;
                    c.9D.2176 = *D.2216;
                    D.2177 = (long unsigned intD.10) iD.2146;
                    D.2178 = D.2177 * 4;
                    D.2179 = c.9D.2176 + D.2178;
                    D.2180 = *D.2179;
                    D.2217 = .omp_data_iD.2201->sumD.2205;
                    D.2218 = *D.2217;
                    D.2217 = .omp_data_iD.2201->sumD.2205;
                    D.2219 = D.2180 + D.2218;
                    *D.2217 = D.2219;
                    iD.2146 = iD.2146 + 1;
                    <D.2207>:
                    if (iD.2146 <= 524287) goto <D.2208>; else goto <D.2209>;
                    <D.2209>:
                  }
...

With this patch, the omp_data_i load and stores for sum are generated at entry and exit:
...
                {
                  .omp_data_iD.2201 = &.omp_data_arr.15D.2218;
                  D.2216 = .omp_data_iD.2201->sumD.2205;
                  sumD.2206 = *D.2216;
                  {
                    unsigned intD.9 iD.2146;

                    iD.2146 = 0;
                    goto <D.2207>;
                    <D.2208>:
                    D.2217 = .omp_data_iD.2201->cD.2203;
                    c.9D.2176 = *D.2217;
                    D.2177 = (long unsigned intD.10) iD.2146;
                    D.2178 = D.2177 * 4;
                    D.2179 = c.9D.2176 + D.2178;
                    D.2180 = *D.2179;
                    sumD.2206 = D.2180 + sumD.2206;
                    iD.2146 = iD.2146 + 1;
                    <D.2207>:
                    if (iD.2146 <= 524287) goto <D.2208>; else goto <D.2209>;
                    <D.2209>:
                  }
                  *D.2216 = sumD.2206;
                  #pragma omp return
                }
...


So, without the patch the reduction operation looks like this:
...
    *(.omp_data_iD.2201->sumD.2205) = *(.omp_data_iD.2201->sumD.2205) + x
...

And with this patch the reduction operation is simply:
...
    sumD.2206 = sumD.2206 + x:
...

OK for trunk?

Thanks,
- Tom

2014-11-03  Tom de Vries  <t...@codesourcery.com>

	* gimple.c (gimple_seq_ior_addresses_taken_op)
	(gimple_seq_ior_addresses_taken): New function.
	* gimple.h (gimple_seq_ior_addresses_taken): Declare.
	* omp-low.c (addresses_taken): Declare local variable.
	(lower_oacc_offload): Lower variables that do not have their address
	taken as local variable.  Use a copy at region entry and exit to copy
	the value in and out.
	(execute_lower_omp): Calculate addresses_taken.
---
 gcc/gimple.c  | 35 +++++++++++++++++++++++++++++++++++
 gcc/gimple.h  |  1 +
 gcc/omp-low.c | 25 ++++++++++++++++++++++---
 3 files changed, 58 insertions(+), 3 deletions(-)

diff --git a/gcc/gimple.c b/gcc/gimple.c
index a9174e6..107eb26 100644
--- a/gcc/gimple.c
+++ b/gcc/gimple.c
@@ -2428,6 +2428,41 @@ gimple_ior_addresses_taken (bitmap addresses_taken, gimple stmt)
 					gimple_ior_addresses_taken_1);
 }
 
+/* Helper function for gimple_seq_ior_addresses_taken.  */
+
+static tree
+gimple_seq_ior_addresses_taken_op (tree *tp,
+				   int *walk_subtrees ATTRIBUTE_UNUSED,
+				   void *data)
+{
+  struct walk_stmt_info *wi = (struct walk_stmt_info *)data;
+  bitmap addresses_taken = (bitmap)wi->info;
+
+  tree t = *tp;
+  if (TREE_CODE (t) != ADDR_EXPR)
+    return NULL_TREE;
+
+  tree var = TREE_OPERAND (t, 0);
+  if (!DECL_P (var))
+    return NULL_TREE;
+
+  bitmap_set_bit (addresses_taken, DECL_UID (var));
+
+  return NULL_TREE;
+}
+
+/* Find the decls in SEQ that have their address taken, and set the
+   corresponding decl_uid in ADDRESSES_TAKEN.  */
+
+void
+gimple_seq_ior_addresses_taken (gimple_seq seq, bitmap addresses_taken)
+{
+  struct walk_stmt_info wi;
+  memset (&wi, 0, sizeof (wi));
+  wi.info = addresses_taken;
+
+  walk_gimple_seq (seq, NULL, gimple_seq_ior_addresses_taken_op, &wi);
+}
 
 /* Return true if TYPE1 and TYPE2 are compatible enough for builtin
    processing.  */
diff --git a/gcc/gimple.h b/gcc/gimple.h
index 4faeaaa..528a9df 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -1316,6 +1316,7 @@ extern tree gimple_unsigned_type (tree);
 extern tree gimple_signed_type (tree);
 extern alias_set_type gimple_get_alias_set (tree);
 extern bool gimple_ior_addresses_taken (bitmap, gimple);
+extern void gimple_seq_ior_addresses_taken (gimple_seq, bitmap);
 extern bool gimple_builtin_call_types_compatible_p (const_gimple, tree);
 extern bool gimple_call_builtin_p (const_gimple);
 extern bool gimple_call_builtin_p (const_gimple, enum built_in_class);
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index e35fa8b..ff78b04 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -229,6 +229,7 @@ static int target_nesting_level;
 static struct omp_region *root_omp_region;
 static bitmap task_shared_vars;
 static vec<omp_context *> taskreg_contexts;
+static bitmap addresses_taken;
 
 static void scan_omp (gimple_seq *, omp_context *);
 static tree scan_omp_1_op (tree *, int *, void *);
@@ -11307,7 +11308,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
   tree child_fn, t, c;
   gimple stmt = gsi_stmt (*gsi_p);
   gimple tgt_bind, bind;
-  gimple_seq tgt_body, olist, ilist, orlist, irlist, new_body;
+  gimple_seq tgt_body, olist, ilist, orlist, irlist, olist2, ilist2, new_body;
   location_t loc = gimple_location (stmt);
   bool offloaded, data_region;
   unsigned int map_cnt = 0;
@@ -11368,6 +11369,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 
   irlist = NULL;
   orlist = NULL;
+  ilist2 = NULL;
+  olist2 = NULL;
   switch (gimple_code (stmt))
     {
     case GIMPLE_OACC_KERNELS:
@@ -11451,8 +11454,18 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 		&& !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c)
 		&& TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE)
 	      x = build_simple_mem_ref (x);
-	    SET_DECL_VALUE_EXPR (new_var, x);
-	    DECL_HAS_VALUE_EXPR_P (new_var) = 1;
+	    if (gimple_code (stmt) == GIMPLE_OACC_KERNELS
+		&& !bitmap_bit_p (addresses_taken, DECL_UID (var))
+		&& INTEGRAL_TYPE_P (TREE_TYPE (var)))
+	      {
+		gimplify_assign (new_var, x, &ilist2);
+		gimplify_assign (unshare_expr (x), new_var, &olist2);
+	      }
+	    else
+	      {
+		SET_DECL_VALUE_EXPR (new_var, x);
+		DECL_HAS_VALUE_EXPR_P (new_var) = 1;
+	      }
 	  }
 	map_cnt++;
       }
@@ -11719,7 +11732,9 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 
   if (offloaded)
     {
+      gimple_seq_add_seq (&new_body, ilist2);
       gimple_seq_add_seq (&new_body, tgt_body);
+      gimple_seq_add_seq (&new_body, olist2);
       new_body = maybe_catch_exception (new_body);
     }
   else if (data_region)
@@ -12054,6 +12069,9 @@ execute_lower_omp (void)
       && flag_cilkplus == 0)
     return 0;
 
+  addresses_taken = BITMAP_ALLOC (NULL);
+  gimple_seq_ior_addresses_taken (gimple_body (cfun->decl), addresses_taken);
+
   all_contexts = splay_tree_new (splay_tree_compare_pointers, 0,
 				 delete_omp_context);
 
@@ -12079,6 +12097,7 @@ execute_lower_omp (void)
       all_contexts = NULL;
     }
   BITMAP_FREE (task_shared_vars);
+  BITMAP_FREE (addresses_taken);
   return 0;
 }
 
-- 
1.9.1





Reply via email to