Hi!

As the new testcase shows, it is not safe to assume we can optimize
a conditional store into an automatic non-addressable var, we can do it
only if we can prove that the unconditional load or store actually will
not be outside of the boundaries of the variable.
If the offset and size are constant, we can, but this is already all
checked in !tree_could_trap_p, otherwise we really need to check for
a dominating unconditional store, or for the specific case of automatic
non-addressable variables, it is enough if there is a dominating load
(that is what those 4 testcases have).  tree-ssa-phiopt.c has some
infrastructure for this already, see the add_or_mark_expr method etc.,
but right now it handles only MEM_REFs with SSA_NAME first operand
and some integral offset.  So, I think it can be for GCC11 extended
to handle other memory references, possibly up to just doing
get_inner_reference and hasing based on the base, offset expressions
and bit_offset and bit_size, and have also a special case that for
!TREE_ADDRESSABLE automatic variables it could ignore whether something
is a load or store because the local stack should be always writable.
But it feels way too dangerous to do this this late for GCC10, so this
patch just restricts the optimization to the safe case (where lhs doesn't
trap), and on Richi's request also ignores TREE_ADDRESSABLE bit if
flag_store_data_races, because my understanding the reason for
TREE_ADDRESSABLE check is that we want to avoid introducing
store data races (if address of an automatic var escapes, some other thread
could be accessing it concurrently).

Bootstrapped/regtested on powerpc64{,le}-linux, ok for trunk?

2020-04-24  Jakub Jelinek  <ja...@redhat.com>
            Richard Biener  <rguent...@suse.de>

        PR tree-optimization/94734
        PR tree-optimization/89430
        * tree-ssa-phiopt.c: Include tree-eh.h.
        (cond_store_replacement): Return false if an automatic variable
        access could trap.  If -fstore-data-races, don't return false
        just because an automatic variable is addressable.

        * gcc.dg/tree-ssa/pr89430-1.c: Add xfail.
        * gcc.dg/tree-ssa/pr89430-2.c: Add xfail.
        * gcc.dg/tree-ssa/pr89430-5.c: Add xfail.
        * gcc.dg/tree-ssa/pr89430-6.c: Add xfail.
        * gcc.c-torture/execute/pr94734.c: New test.

--- gcc/tree-ssa-phiopt.c.jj    2020-03-19 10:23:50.542872359 +0100
+++ gcc/tree-ssa-phiopt.c       2020-04-24 12:33:02.368355735 +0200
@@ -45,6 +45,7 @@ along with GCC; see the file COPYING3.
 #include "tree-scalar-evolution.h"
 #include "tree-inline.h"
 #include "case-cfn-macros.h"
+#include "tree-eh.h"
 
 static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
 static bool two_value_replacement (basic_block, basic_block, edge, gphi *,
@@ -2237,10 +2238,13 @@ cond_store_replacement (basic_block midd
      whose value is not available readily, which we want to avoid.  */
   if (!nontrap->contains (lhs))
     {
-      /* If LHS is a local variable without address-taken, we could
+      /* If LHS is an access to a local variable without address-taken
+        (or when we allow data races) and known not to trap, we could
         always safely move down the store.  */
       tree base = get_base_address (lhs);
-      if (!auto_var_p (base) || TREE_ADDRESSABLE (base))
+      if (!auto_var_p (base)
+         || (TREE_ADDRESSABLE (base) && !flag_store_data_races)
+         || tree_could_trap_p (lhs))
        return false;
     }
 
--- gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c.jj        2020-01-12 
11:54:37.606395410 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c   2020-04-24 11:26:45.021425675 
+0200
@@ -9,4 +9,4 @@ unsigned test(unsigned k, unsigned b) {
         return a[0]+a[1];
 }
 
-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { 
xfail *-*-* } } } */
--- gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c.jj        2020-01-12 
11:54:37.606395410 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c   2020-04-24 11:26:52.319316212 
+0200
@@ -11,4 +11,4 @@ unsigned test(unsigned k, unsigned b) {
         return a[0]+a[1];
 }
 
-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { 
xfail *-*-* } } } */
--- gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c.jj        2020-01-12 
11:54:37.606395410 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c   2020-04-24 11:26:59.782204277 
+0200
@@ -13,4 +13,4 @@ int test(int b, int k) {
     return a.data[0] + a.data[1];
 }
 
-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { 
xfail *-*-* } } } */
--- gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c.jj        2020-01-12 
11:54:37.606395410 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c   2020-04-24 11:27:06.456104170 
+0200
@@ -16,4 +16,4 @@ int test(int b, int k) {
     return a.data[0].x + a.data[1].x;
 }
 
-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { 
xfail *-*-* } } } */
--- gcc/testsuite/gcc.c-torture/execute/pr94734.c.jj    2020-04-24 
11:19:13.745211960 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr94734.c       2020-04-24 
11:18:45.426639974 +0200
@@ -0,0 +1,59 @@
+/* PR tree-optimization/94734 */
+
+__attribute__((noipa)) int
+foo (int n)
+{
+  int arr[16], s = 0;
+  for (int i = 0; i < n; i++)
+    {
+      if (i < 16)
+       arr[i] = i;
+    }
+  for (int i = 0; i < 16; i++)
+    s += arr[i];
+  return s;
+}
+
+__attribute__((noipa)) int
+bar (int n, int x, unsigned long y, unsigned long z)
+{
+  int arr[16], s = 0;
+  arr[4] = 42;
+  for (int i = 0; i < n; i++)
+    {
+      if (x == (i & 0x25))
+       arr[y] = i;
+    }
+  return arr[z];
+}
+
+__attribute__((noipa)) int
+baz (int n, int x, unsigned long z)
+{
+  int arr[16], s = 0;
+  arr[12] = 42;
+  for (int i = 0; i < n; i++)
+    {
+      if (x == (i & 0x25))
+       arr[7] = i;
+    }
+  return arr[z];
+}
+
+int
+main ()
+{
+  if (foo (10374) != 15 * 16 / 2)
+    __builtin_abort ();
+  if (bar (25, 0x25, (unsigned long) 0xdeadbeefbeefdeadULL, 4) != 42)
+    __builtin_abort ();
+  if (bar (25, 4, 15, 15) != 22)
+    __builtin_abort ();
+  if (baz (25, 0x25, 12) != 42)
+    __builtin_abort ();
+  if (baz (25, 4, 7) != 22)
+    __builtin_abort ();
+  if (baz (25, 4, 12) != 42)
+    __builtin_abort ();
+  return 0;
+}

        Jakub

Reply via email to