Hi,
this patch fixes PR46032.
It handles a call:
...
__builtin_GOMP_parallel (fn, data, num_threads, flags)
...
as:
...
fn (data)
...
in ipa-pta.
This improves ipa-pta alias analysis in the parallelized function fn,
and allows vectorization in the testcase without a runtime alias test.
Bootstrapped and reg-tested on x86_64.
OK for stage3 trunk?
Thanks,
- Tom
Handle BUILT_IN_GOMP_PARALLEL in pta
2015-11-30 Tom de Vries <t...@codesourcery.com>
PR tree-optimization/46032
* tree-ssa-structalias.c (find_func_aliases_for_builtin_call)
(find_func_clobbers): Handle BUILT_IN_GOMP_PARALLEL.
(ipa_pta_execute): Same. Handle node->parallelized_function as a local
function.
* gcc.dg/pr46032.c: New test.
* testsuite/libgomp.c/pr46032.c: New test.
---
gcc/testsuite/gcc.dg/pr46032.c | 47 ++++++++++++++++++++++
gcc/tree-ssa-structalias.c | 73 ++++++++++++++++++++++++++++++++++-
libgomp/testsuite/libgomp.c/pr46032.c | 44 +++++++++++++++++++++
3 files changed, 162 insertions(+), 2 deletions(-)
diff --git a/gcc/testsuite/gcc.dg/pr46032.c b/gcc/testsuite/gcc.dg/pr46032.c
new file mode 100644
index 0000000..b91190e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr46032.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fopenmp -ftree-vectorize -std=c99 -fipa-pta -fdump-tree-vect-all" } */
+
+extern void abort (void);
+
+#define nEvents 1000
+
+static void __attribute__((noinline, noclone, optimize("-fno-tree-vectorize")))
+init (unsigned *results, unsigned *pData)
+{
+ unsigned int i;
+ for (i = 0; i < nEvents; ++i)
+ pData[i] = i % 3;
+}
+
+static void __attribute__((noinline, noclone, optimize("-fno-tree-vectorize")))
+check (unsigned *results)
+{
+ unsigned sum = 0;
+ for (int idx = 0; idx < (int)nEvents; idx++)
+ sum += results[idx];
+
+ if (sum != 1998)
+ abort ();
+}
+
+int
+main (void)
+{
+ unsigned results[nEvents];
+ unsigned pData[nEvents];
+ unsigned coeff = 2;
+
+ init (&results[0], &pData[0]);
+
+#pragma omp parallel for
+ for (int idx = 0; idx < (int)nEvents; idx++)
+ results[idx] = coeff * pData[idx];
+
+ check (&results[0]);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "note: vectorized 1 loop" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "versioning for alias required" "vect" } } */
+
diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index f24ebeb..3fe538b 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -4488,6 +4488,39 @@ find_func_aliases_for_builtin_call (struct function *fn, gcall *t)
}
return true;
}
+ case BUILT_IN_GOMP_PARALLEL:
+ {
+ /* Handle
+ __builtin_GOMP_parallel (fn, data, num_threads, flags). */
+ if (in_ipa_mode)
+ {
+ tree fnarg = gimple_call_arg (t, 0);
+ gcc_assert (TREE_CODE (fnarg) == ADDR_EXPR);
+ tree fndecl = TREE_OPERAND (fnarg, 0);
+ varinfo_t fi = get_vi_for_tree (fndecl);
+ tree arg = gimple_call_arg (t, 1);
+ gcc_assert (TREE_CODE (arg) == ADDR_EXPR);
+
+ /* Assign the passed argument to the appropriate incoming
+ parameter of the function. */
+ struct constraint_expr lhs ;
+ lhs = get_function_part_constraint (fi, fi_parm_base + 0);
+ auto_vec<ce_s, 2> rhsc;
+ struct constraint_expr *rhsp;
+ get_constraint_for_rhs (arg, &rhsc);
+ while (rhsc.length () != 0)
+ {
+ rhsp = &rhsc.last ();
+ process_constraint (new_constraint (lhs, *rhsp));
+ rhsc.pop ();
+ }
+
+ return true;
+ }
+ /* Else fallthru to generic handling which will let
+ the frame escape. */
+ break;
+ }
/* printf-style functions may have hooks to set pointers to
point to somewhere into the generated string. Leave them
for a later exercise... */
@@ -5036,6 +5069,37 @@ find_func_clobbers (struct function *fn, gimple *origt)
case BUILT_IN_VA_START:
case BUILT_IN_VA_END:
return;
+ case BUILT_IN_GOMP_PARALLEL:
+ {
+ /* Handle
+ __builtin_GOMP_parallel (fn, data, num_threads, flags). */
+ tree fnarg = gimple_call_arg (t, 0);
+ gcc_assert (TREE_CODE (fnarg) == ADDR_EXPR);
+ tree fndecl = TREE_OPERAND (fnarg, 0);
+ varinfo_t cfi = get_vi_for_tree (fndecl);
+ tree arg = gimple_call_arg (t, 1);
+ gcc_assert (TREE_CODE (arg) == ADDR_EXPR);
+
+ /* Parameter passed by value is used. */
+ lhs = get_function_part_constraint (fi, fi_uses);
+ struct constraint_expr *rhsp;
+ get_constraint_for_address_of (arg, &rhsc);
+ FOR_EACH_VEC_ELT (rhsc, j, rhsp)
+ process_constraint (new_constraint (lhs, *rhsp));
+ rhsc.truncate (0);
+
+ /* The caller clobbers what the callee does. */
+ lhs = get_function_part_constraint (fi, fi_clobbers);
+ rhs = get_function_part_constraint (cfi, fi_clobbers);
+ process_constraint (new_constraint (lhs, rhs));
+
+ /* The caller uses what the callee does. */
+ lhs = get_function_part_constraint (fi, fi_uses);
+ rhs = get_function_part_constraint (cfi, fi_uses);
+ process_constraint (new_constraint (lhs, rhs));
+
+ return;
+ }
/* printf-style functions may have hooks to set pointers to
point to somewhere into the generated string. Leave them
for a later exercise... */
@@ -7352,7 +7416,8 @@ ipa_pta_execute (void)
bool nonlocal_p = (node->used_from_other_partition
|| node->externally_visible
|| node->force_output
- || node->address_taken);
+ || (node->address_taken
+ && !node->parallelized_function));
vi = create_function_info_for (node->decl,
alias_get_name (node->decl), false,
@@ -7504,7 +7569,11 @@ ipa_pta_execute (void)
continue;
/* Handle direct calls to functions with body. */
- decl = gimple_call_fndecl (stmt);
+ if (gimple_call_builtin_p (stmt, BUILT_IN_GOMP_PARALLEL))
+ decl = TREE_OPERAND (gimple_call_arg (stmt, 0), 0);
+ else
+ decl = gimple_call_fndecl (stmt);
+
if (decl
&& (fi = lookup_vi_for_tree (decl))
&& fi->is_fn_info)
diff --git a/libgomp/testsuite/libgomp.c/pr46032.c b/libgomp/testsuite/libgomp.c/pr46032.c
new file mode 100644
index 0000000..2178aa7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/pr46032.c
@@ -0,0 +1,44 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -std=c99 -fipa-pta" } */
+
+
+extern void abort (void);
+
+#define nEvents 1000
+
+static void __attribute__((noinline, noclone, optimize("-fno-tree-vectorize")))
+init (unsigned *results, unsigned *pData)
+{
+ unsigned int i;
+ for (i = 0; i < nEvents; ++i)
+ pData[i] = i % 3;
+}
+
+static void __attribute__((noinline, noclone, optimize("-fno-tree-vectorize")))
+check (unsigned *results)
+{
+ unsigned sum = 0;
+ for (int idx = 0; idx < (int)nEvents; idx++)
+ sum += results[idx];
+
+ if (sum != 1998)
+ abort ();
+}
+
+int
+main (void)
+{
+ unsigned results[nEvents];
+ unsigned pData[nEvents];
+ unsigned coeff = 2;
+
+ init (&results[0], &pData[0]);
+
+#pragma omp parallel for
+ for (int idx = 0; idx < (int)nEvents; idx++)
+ results[idx] = coeff * pData[idx];
+
+ check (&results[0]);
+
+ return 0;
+}