On 15-11-14 13:14, Tom de Vries wrote:
Hi,
I'm submitting a patch series with initial support for the oacc kernels
directive.
The patch series uses pass_parallelize_loops to implement parallelization of
loops in the oacc kernels region.
The patch series consists of these 8 patches:
...
1 Expand oacc kernels after pass_build_ealias
2 Add pass_oacc_kernels
3 Add pass_ch_oacc_kernels to pass_oacc_kernels
4 Add pass_tree_loop_{init,done} to pass_oacc_kernels
5 Add pass_loop_im to pass_oacc_kernels
6 Add pass_ccp to pass_oacc_kernels
7 Add pass_parloops_oacc_kernels to pass_oacc_kernels
8 Do simple omp lowering for no address taken var
...
This patch lowers integer variables that do not have their address taken as
local variable. We use a copy at region entry and exit to copy the value in and
out.
In the context of reduction handling in a kernels region, this allows the
parloops reduction analysis to recognize the reduction, even after oacc lowering
has been done in pass_lower_omp.
In more detail, without this patch, the omp_data_i load and stores are generated
in place (in this case, in the loop):
...
{
.omp_data_iD.2201 = &.omp_data_arr.15D.2220;
{
unsigned intD.9 iD.2146;
iD.2146 = 0;
goto <D.2207>;
<D.2208>:
D.2216 = .omp_data_iD.2201->cD.2203;
c.9D.2176 = *D.2216;
D.2177 = (long unsigned intD.10) iD.2146;
D.2178 = D.2177 * 4;
D.2179 = c.9D.2176 + D.2178;
D.2180 = *D.2179;
D.2217 = .omp_data_iD.2201->sumD.2205;
D.2218 = *D.2217;
D.2217 = .omp_data_iD.2201->sumD.2205;
D.2219 = D.2180 + D.2218;
*D.2217 = D.2219;
iD.2146 = iD.2146 + 1;
<D.2207>:
if (iD.2146 <= 524287) goto <D.2208>; else goto <D.2209>;
<D.2209>:
}
...
With this patch, the omp_data_i load and stores for sum are generated at entry
and exit:
...
{
.omp_data_iD.2201 = &.omp_data_arr.15D.2218;
D.2216 = .omp_data_iD.2201->sumD.2205;
sumD.2206 = *D.2216;
{
unsigned intD.9 iD.2146;
iD.2146 = 0;
goto <D.2207>;
<D.2208>:
D.2217 = .omp_data_iD.2201->cD.2203;
c.9D.2176 = *D.2217;
D.2177 = (long unsigned intD.10) iD.2146;
D.2178 = D.2177 * 4;
D.2179 = c.9D.2176 + D.2178;
D.2180 = *D.2179;
sumD.2206 = D.2180 + sumD.2206;
iD.2146 = iD.2146 + 1;
<D.2207>:
if (iD.2146 <= 524287) goto <D.2208>; else goto <D.2209>;
<D.2209>:
}
*D.2216 = sumD.2206;
#pragma omp return
}
...
So, without the patch the reduction operation looks like this:
...
*(.omp_data_iD.2201->sumD.2205) = *(.omp_data_iD.2201->sumD.2205) + x
...
And with this patch the reduction operation is simply:
...
sumD.2206 = sumD.2206 + x:
...
OK for trunk?
Thanks,
- Tom
2014-11-03 Tom de Vries <t...@codesourcery.com>
* gimple.c (gimple_seq_ior_addresses_taken_op)
(gimple_seq_ior_addresses_taken): New function.
* gimple.h (gimple_seq_ior_addresses_taken): Declare.
* omp-low.c (addresses_taken): Declare local variable.
(lower_oacc_offload): Lower variables that do not have their address
taken as local variable. Use a copy at region entry and exit to copy
the value in and out.
(execute_lower_omp): Calculate addresses_taken.
---
gcc/gimple.c | 35 +++++++++++++++++++++++++++++++++++
gcc/gimple.h | 1 +
gcc/omp-low.c | 25 ++++++++++++++++++++++---
3 files changed, 58 insertions(+), 3 deletions(-)
diff --git a/gcc/gimple.c b/gcc/gimple.c
index a9174e6..107eb26 100644
--- a/gcc/gimple.c
+++ b/gcc/gimple.c
@@ -2428,6 +2428,41 @@ gimple_ior_addresses_taken (bitmap addresses_taken, gimple stmt)
gimple_ior_addresses_taken_1);
}
+/* Helper function for gimple_seq_ior_addresses_taken. */
+
+static tree
+gimple_seq_ior_addresses_taken_op (tree *tp,
+ int *walk_subtrees ATTRIBUTE_UNUSED,
+ void *data)
+{
+ struct walk_stmt_info *wi = (struct walk_stmt_info *)data;
+ bitmap addresses_taken = (bitmap)wi->info;
+
+ tree t = *tp;
+ if (TREE_CODE (t) != ADDR_EXPR)
+ return NULL_TREE;
+
+ tree var = TREE_OPERAND (t, 0);
+ if (!DECL_P (var))
+ return NULL_TREE;
+
+ bitmap_set_bit (addresses_taken, DECL_UID (var));
+
+ return NULL_TREE;
+}
+
+/* Find the decls in SEQ that have their address taken, and set the
+ corresponding decl_uid in ADDRESSES_TAKEN. */
+
+void
+gimple_seq_ior_addresses_taken (gimple_seq seq, bitmap addresses_taken)
+{
+ struct walk_stmt_info wi;
+ memset (&wi, 0, sizeof (wi));
+ wi.info = addresses_taken;
+
+ walk_gimple_seq (seq, NULL, gimple_seq_ior_addresses_taken_op, &wi);
+}
/* Return true if TYPE1 and TYPE2 are compatible enough for builtin
processing. */
diff --git a/gcc/gimple.h b/gcc/gimple.h
index 4faeaaa..528a9df 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -1316,6 +1316,7 @@ extern tree gimple_unsigned_type (tree);
extern tree gimple_signed_type (tree);
extern alias_set_type gimple_get_alias_set (tree);
extern bool gimple_ior_addresses_taken (bitmap, gimple);
+extern void gimple_seq_ior_addresses_taken (gimple_seq, bitmap);
extern bool gimple_builtin_call_types_compatible_p (const_gimple, tree);
extern bool gimple_call_builtin_p (const_gimple);
extern bool gimple_call_builtin_p (const_gimple, enum built_in_class);
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index e35fa8b..ff78b04 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -229,6 +229,7 @@ static int target_nesting_level;
static struct omp_region *root_omp_region;
static bitmap task_shared_vars;
static vec<omp_context *> taskreg_contexts;
+static bitmap addresses_taken;
static void scan_omp (gimple_seq *, omp_context *);
static tree scan_omp_1_op (tree *, int *, void *);
@@ -11307,7 +11308,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
tree child_fn, t, c;
gimple stmt = gsi_stmt (*gsi_p);
gimple tgt_bind, bind;
- gimple_seq tgt_body, olist, ilist, orlist, irlist, new_body;
+ gimple_seq tgt_body, olist, ilist, orlist, irlist, olist2, ilist2, new_body;
location_t loc = gimple_location (stmt);
bool offloaded, data_region;
unsigned int map_cnt = 0;
@@ -11368,6 +11369,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
irlist = NULL;
orlist = NULL;
+ ilist2 = NULL;
+ olist2 = NULL;
switch (gimple_code (stmt))
{
case GIMPLE_OACC_KERNELS:
@@ -11451,8 +11454,18 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
&& !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c)
&& TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE)
x = build_simple_mem_ref (x);
- SET_DECL_VALUE_EXPR (new_var, x);
- DECL_HAS_VALUE_EXPR_P (new_var) = 1;
+ if (gimple_code (stmt) == GIMPLE_OACC_KERNELS
+ && !bitmap_bit_p (addresses_taken, DECL_UID (var))
+ && INTEGRAL_TYPE_P (TREE_TYPE (var)))
+ {
+ gimplify_assign (new_var, x, &ilist2);
+ gimplify_assign (unshare_expr (x), new_var, &olist2);
+ }
+ else
+ {
+ SET_DECL_VALUE_EXPR (new_var, x);
+ DECL_HAS_VALUE_EXPR_P (new_var) = 1;
+ }
}
map_cnt++;
}
@@ -11719,7 +11732,9 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
if (offloaded)
{
+ gimple_seq_add_seq (&new_body, ilist2);
gimple_seq_add_seq (&new_body, tgt_body);
+ gimple_seq_add_seq (&new_body, olist2);
new_body = maybe_catch_exception (new_body);
}
else if (data_region)
@@ -12054,6 +12069,9 @@ execute_lower_omp (void)
&& flag_cilkplus == 0)
return 0;
+ addresses_taken = BITMAP_ALLOC (NULL);
+ gimple_seq_ior_addresses_taken (gimple_body (cfun->decl), addresses_taken);
+
all_contexts = splay_tree_new (splay_tree_compare_pointers, 0,
delete_omp_context);
@@ -12079,6 +12097,7 @@ execute_lower_omp (void)
all_contexts = NULL;
}
BITMAP_FREE (task_shared_vars);
+ BITMAP_FREE (addresses_taken);
return 0;
}
--
1.9.1