Hi Richard,

On 26.09.22 10:32, Richard Biener wrote:

On Fri, Sep 23, 2022 at 5:25 PM Tobias Burnus 
<tob...@codesourcery.com><mailto:tob...@codesourcery.com> wrote:



This fixes a tree-sharing ICE. It seems as if all unshare_expr
I added were required in this case. [...]


looks like v1/v2/v3 are now unshared twice

According to the assert, that's not the case. 'var' is a memory
reference – and taking out any of the newly added unshare_expr
will give an ICE with the new *8.c testcase.

better done when its used.  That said, please put the unshares
at places where new things are built, that's much clearer.  That means
the 'outgoing' at
       gimplify_assign (outgoing, teardown_call, &after_join);


The most localized change is the 'else' branch:

       else
-         v1 = v2 = v3 = var;
+         {
+           /* Note that 'var' might be a mem ref.  */
+           v1 = unshare_expr (var);
+           v2 = unshare_expr (var);
+           v3 = unshare_expr (var);
+           incoming = unshare_expr (incoming);
+           outgoing = unshare_expr (outgoing);
+         }

But then I still need to unshare v1/v2/v3 at one other place. Namely:

Either in

-       gimplify_assign (v1, setup_call, &before_fork);
+       gimplify_assign (unshare_expr (v1), setup_call, &before_fork);

or in
         = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
                                         TREE_TYPE (var), 6, init_code,
                                         unshare_expr (ref_to_res),
-                                         v1, level, op, off);
+                                         unshare_expr (v1), level, op, off);


Alternatively, I keep the
       else
         v1 = v2 = v3 = var;
as is, possible adding the comment there, – and then add the unshare_expr
for v1/v2/v3/incoming to build_call_expr_internal_loc
*and* for v1/v2/v3/outgoind to gimplify_assign.

Which variant do you prefer?
(I have attached both – and the only difference is in omp-low.cc.)

(Certainly, other permutations are possible, one is the one in the first patch,
but I like either of the two new patches more.)

Tobias


-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
 gcc/omp-low.cc                                 | 15 +++++++++++----
 gcc/testsuite/c-c++-common/goacc/reduction-7.c | 22 ++++++++++++++++++++++
 gcc/testsuite/c-c++-common/goacc/reduction-8.c | 12 ++++++++++++
 3 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index d9f9aaebc0b..144ccd4bd3d 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -7631,7 +7631,14 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
 	      incoming = build_simple_mem_ref (incoming);
 	  }
 	else
-	  v1 = v2 = v3 = var;
+	  {
+	    /* Note that 'var' might be a mem ref.  */
+	    v1 = unshare_expr (var);
+	    v2 = unshare_expr (var);
+	    v3 = unshare_expr (var);
+	    incoming = unshare_expr (incoming);
+	    outgoing = unshare_expr (outgoing);
+	  }
 
 	/* Determine position in reduction buffer, which may be used
 	   by target.  The parser has ensured that this is not a
@@ -7675,9 +7682,9 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
 					  TREE_TYPE (var), 6, teardown_code,
 					  ref_to_res, v3, level, op, off);
 
-	gimplify_assign (v1, setup_call, &before_fork);
-	gimplify_assign (v2, init_call, &after_fork);
-	gimplify_assign (v3, fini_call, &before_join);
+	gimplify_assign (unshare_expr (v1), setup_call, &before_fork);
+	gimplify_assign (unshare_expr (v2), init_call, &after_fork);
+	gimplify_assign (unshare_expr (v3), fini_call, &before_join);
 	gimplify_assign (outgoing, teardown_call, &after_join);
       }
 
diff --git a/gcc/testsuite/c-c++-common/goacc/reduction-7.c b/gcc/testsuite/c-c++-common/goacc/reduction-7.c
new file mode 100644
index 00000000000..482b0ab1984
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/reduction-7.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+
+/* PR middle-end/106982 */
+
+long long n = 100;
+int multiplicitive_n = 128;
+
+void test1(double *rand, double *a, double *b, double *c)
+{
+#pragma acc data copyin(a[0:10*multiplicitive_n], b[0:10*multiplicitive_n]) copyout(c[0:10])
+    {
+#pragma acc parallel loop
+        for (int i = 0; i < 10; ++i)
+        {
+        double temp = 1.0;
+#pragma acc loop vector reduction(*:temp)
+        for (int j = 0; j < multiplicitive_n; ++j)
+          temp *= a[(i * multiplicitive_n) + j] + b[(i * multiplicitive_n) + j];
+        c[i] = temp;
+        }
+    }
+}
diff --git a/gcc/testsuite/c-c++-common/goacc/reduction-8.c b/gcc/testsuite/c-c++-common/goacc/reduction-8.c
new file mode 100644
index 00000000000..2c3ed499d5b
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/reduction-8.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+
+/* PR middle-end/106982 */
+
+void test1(double *c)
+{
+    double reduced[5];
+#pragma acc parallel loop gang private(reduced)
+    for (int x = 0; x < 5; ++x)
+#pragma acc loop worker reduction(*:reduced)
+      for (int y = 0; y < 5; ++y) { }
+}
OpenACC: Fix reduction tree-sharing issue [PR106982]

The tree for var == incoming == outgound was
'MEM <double[5]> [(double *)&reduced]' which caused the ICE
"incorrect sharing of tree nodes".

	PR middle-end/106982

gcc/ChangeLog:

	* omp-low.cc (lower_oacc_reductions): Add some unshare_expr.

gcc/testsuite/ChangeLog:

	* c-c++-common/goacc/reduction-7.c: New test.
	* c-c++-common/goacc/reduction-8.c: New test.

 gcc/omp-low.cc                                 | 19 +++++++++++--------
 gcc/testsuite/c-c++-common/goacc/reduction-7.c | 22 ++++++++++++++++++++++
 gcc/testsuite/c-c++-common/goacc/reduction-8.c | 12 ++++++++++++
 3 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index d9f9aaebc0b..dc42c752017 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -7631,6 +7631,7 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
 	      incoming = build_simple_mem_ref (incoming);
 	  }
 	else
+	  /* Note that 'var' might be a mem ref.  */
 	  v1 = v2 = v3 = var;
 
 	/* Determine position in reduction buffer, which may be used
@@ -7659,26 +7660,28 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
 	  = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
 					  TREE_TYPE (var), 6, setup_code,
 					  unshare_expr (ref_to_res),
-					  incoming, level, op, off);
+					  unshare_expr (incoming),
+					  level, op, off);
 	tree init_call
 	  = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
 					  TREE_TYPE (var), 6, init_code,
 					  unshare_expr (ref_to_res),
-					  v1, level, op, off);
+					  unshare_expr (v1), level, op, off);
 	tree fini_call
 	  = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
 					  TREE_TYPE (var), 6, fini_code,
 					  unshare_expr (ref_to_res),
-					  v2, level, op, off);
+					  unshare_expr (v2), level, op, off);
 	tree teardown_call
 	  = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
 					  TREE_TYPE (var), 6, teardown_code,
-					  ref_to_res, v3, level, op, off);
+					  ref_to_res, unshare_expr (v3),
+					  level, op, off);
 
-	gimplify_assign (v1, setup_call, &before_fork);
-	gimplify_assign (v2, init_call, &after_fork);
-	gimplify_assign (v3, fini_call, &before_join);
-	gimplify_assign (outgoing, teardown_call, &after_join);
+	gimplify_assign (unshare_expr (v1), setup_call, &before_fork);
+	gimplify_assign (unshare_expr (v2), init_call, &after_fork);
+	gimplify_assign (unshare_expr (v3), fini_call, &before_join);
+	gimplify_assign (unshare_expr (outgoing), teardown_call, &after_join);
       }
 
   /* Now stitch things together.  */
diff --git a/gcc/testsuite/c-c++-common/goacc/reduction-7.c b/gcc/testsuite/c-c++-common/goacc/reduction-7.c
new file mode 100644
index 00000000000..482b0ab1984
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/reduction-7.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+
+/* PR middle-end/106982 */
+
+long long n = 100;
+int multiplicitive_n = 128;
+
+void test1(double *rand, double *a, double *b, double *c)
+{
+#pragma acc data copyin(a[0:10*multiplicitive_n], b[0:10*multiplicitive_n]) copyout(c[0:10])
+    {
+#pragma acc parallel loop
+        for (int i = 0; i < 10; ++i)
+        {
+        double temp = 1.0;
+#pragma acc loop vector reduction(*:temp)
+        for (int j = 0; j < multiplicitive_n; ++j)
+          temp *= a[(i * multiplicitive_n) + j] + b[(i * multiplicitive_n) + j];
+        c[i] = temp;
+        }
+    }
+}
diff --git a/gcc/testsuite/c-c++-common/goacc/reduction-8.c b/gcc/testsuite/c-c++-common/goacc/reduction-8.c
new file mode 100644
index 00000000000..2c3ed499d5b
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/reduction-8.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+
+/* PR middle-end/106982 */
+
+void test1(double *c)
+{
+    double reduced[5];
+#pragma acc parallel loop gang private(reduced)
+    for (int x = 0; x < 5; ++x)
+#pragma acc loop worker reduction(*:reduced)
+      for (int y = 0; y < 5; ++y) { }
+}

Reply via email to