Hi!

The following patch contains 2 changes:
1) BIT_NOT_EXPR on a load from memory is handled, including when one
   or both BIT_{AND,IOR,XOR}_EXPR operands is BIT_NOT_EXPR of a memory load
2) it changes the aliasing handling, because the old
   ao_ref_init_from_ptr_and_size caused way too many unnecessary chain
   terminations
For 2), I initially wrote a completely different patch, which computed
base_ref for the store chain and minimum and maximum offsets from it which
then were used in the ref_maybe_used_by_stmt_p/stmt_may_clobber_ref_p_1
calls.  But in the end this seems to be far simpler, it checks the aliasing
the way it was checking it for the chain_info chain before, I see no reason
to handle other chains differently.  All that is needed is that even a
successful addition of a store into a chain needs to terminate other chains
that could possibly alias, just not the current chain, so the chain_info
argument is repurposed for this - terminate chains that alias except for
the one referenced by chain_info if non-NULL.
To show in more detail what it does on store_merging_2.c:
struct bar { int a; unsigned char b, c; short d; unsigned char e, f, g; };
__attribute__ ((noinline)) void
foo2 (struct bar *p, struct bar *p2)
{
  p->b = 0xff;
  p2->b = 0xa;
  p->a = 0xfffff;
  p2->c = 0xc;
  p->c = 0xff;
  p2->d = 0xbf;
  p->d = 0xfff;
}
Previously the p2->* stores would terminate the p based chains and
vice versa, no store merging would occur.  With this patch, p2->b
may alias p->b in the other chain, so we terminate that (and do nothing
for p->b, because it has just a single store).  Then we process p->a
store, but that doesn't alias with p2->b, so we have 2 different chains
with 1 store in it each.  Then we process p2->c store, which again doesn't
alias with p->a, so we have 2 stores in p2 based chain, 1 store in p based.
Next, p->c aliases with p2->c in the other chain, so we terminate that
chain, and optimize the p2->{b,c} stores into one combined one.
So we have just one chain with p->{a,c} in it.  Then we process p2->d,
not aliasing with anything, create p2 based chain for it.  Finally,
p->d aliases with p2->d, so terminate that chain (no useful merging),
and finally the p->{a,c,d} chain is not merged, because there is a gap
between a and c, can't modify b, and while {c,d} is adjacent, it is 3 bytes
and we'd need 2 stores like before.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2017-11-06  Jakub Jelinek  <ja...@redhat.com>

        PR tree-optimization/78821
        * gimple-ssa-store-merging.c (struct store_operand_info): Add bit_not_p
        data member.
        (store_operand_info::store_operand_info): Initialize it to false.
        (pass_store_merging::terminate_all_aliasing_chains): Rewritten to use
        ref_maybe_used_by_stmt_p and stmt_may_clobber_ref_p on lhs of each
        store in the group, and if chain_info is non-NULL, to ignore altogether
        that chain.
        (compatible_load_p): Fail if bit_not_p does not match.
        (imm_store_chain_info::output_merged_store): Handle bit_not_p loads.
        (handled_load): Fill in bit_not_p.  Handle BIT_NOT_EXPR.
        (pass_store_merging::process_store): Adjust
        terminate_all_aliasing_chains calls to pass NULL in all current spots,
        call terminate_all_aliasing_chains newly when adding a store into
        a chain with non-NULL chain_info.

        * gcc.dg/store_merging_2.c: Expect 3 store mergings instead of 2.
        * gcc.dg/store_merging_13.c (f7, f8, f9, f10, f11, f12, f13): New
        functions.
        (main): Test also those.  Expect 13 store mergings instead of 6.
        * gcc.dg/store_merging_14.c (f7, f8, f9): New functions.
        (main): Test also those.  Expect 9 store mergings instead of 6.

--- gcc/gimple-ssa-store-merging.c.jj   2017-11-06 08:51:15.313990289 +0100
+++ gcc/gimple-ssa-store-merging.c      2017-11-06 08:57:07.621523803 +0100
@@ -183,12 +183,13 @@ struct store_operand_info
   unsigned HOST_WIDE_INT bitregion_start;
   unsigned HOST_WIDE_INT bitregion_end;
   gimple *stmt;
+  bool bit_not_p;
   store_operand_info ();
 };
 
 store_operand_info::store_operand_info ()
   : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
-    bitregion_start (0), bitregion_end (0), stmt (NULL)
+    bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
 {
 }
 
@@ -910,8 +911,7 @@ private:
 
   void process_store (gimple *);
   bool terminate_and_process_all_chains ();
-  bool terminate_all_aliasing_chains (imm_store_chain_info **,
-                                     gimple *);
+  bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
   bool terminate_and_release_chain (imm_store_chain_info *);
 }; // class pass_store_merging
 
@@ -930,13 +930,9 @@ pass_store_merging::terminate_and_proces
   return ret;
 }
 
-/* Terminate all chains that are affected by the assignment to DEST, appearing
-   in statement STMT and ultimately points to the object BASE.  Return true if
-   at least one aliasing chain was terminated.  BASE and DEST are allowed to
-   be NULL_TREE.  In that case the aliasing checks are performed on the whole
-   statement rather than a particular operand in it.  VAR_OFFSET_P signifies
-   whether STMT represents a store to BASE offset by a variable amount.
-   If that is the case we have to terminate any chain anchored at BASE.  */
+/* Terminate all chains that are affected by the statement STMT.
+   CHAIN_INFO is the chain we should ignore from the checks if
+   non-NULL.  */
 
 bool
 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
@@ -949,14 +945,18 @@ pass_store_merging::terminate_all_aliasi
   if (!gimple_vuse (stmt))
     return false;
 
-  /* Check if the assignment destination (BASE) is part of a store chain.
-     This is to catch non-constant stores to destinations that may be part
-     of a chain.  */
-  if (chain_info)
+  for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = 
next)
     {
+      next = cur->next;
+
+      /* We already checked all the stores in chain_info and terminated the
+        chain if necessary.  Skip it here.  */
+      if (chain_info && *chain_info == cur)
+       continue;
+
       store_immediate_info *info;
       unsigned int i;
-      FOR_EACH_VEC_ELT ((*chain_info)->m_store_info, i, info)
+      FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
        {
          if (ref_maybe_used_by_stmt_p (stmt, gimple_assign_lhs (info->stmt))
              || stmt_may_clobber_ref_p (stmt, gimple_assign_lhs (info->stmt)))
@@ -966,37 +966,13 @@ pass_store_merging::terminate_all_aliasi
                  fprintf (dump_file, "stmt causes chain termination:\n");
                  print_gimple_stmt (dump_file, stmt, 0);
                }
-             terminate_and_release_chain (*chain_info);
+             terminate_and_release_chain (cur);
              ret = true;
              break;
            }
        }
     }
 
-  /* Check for aliasing with all other store chains.  */
-  for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = 
next)
-    {
-      next = cur->next;
-
-      /* We already checked all the stores in chain_info and terminated the
-        chain if necessary.  Skip it here.  */
-      if (chain_info && (*chain_info) == cur)
-       continue;
-
-      /* We can't use the base object here as that does not reliably exist.
-        Build a ao_ref from the base object address (if we know the
-        minimum and maximum offset and the maximum size we could improve
-        things here).  */
-      ao_ref chain_ref;
-      ao_ref_init_from_ptr_and_size (&chain_ref, cur->base_addr, NULL_TREE);
-      if (ref_maybe_used_by_stmt_p (stmt, &chain_ref)
-         || stmt_may_clobber_ref_p_1 (stmt, &chain_ref))
-       {
-         terminate_and_release_chain (cur);
-         ret = true;
-       }
-    }
-
   return ret;
 }
 
@@ -1053,6 +1029,7 @@ compatible_load_p (merged_store_group *m
 {
   store_immediate_info *infof = merged_store->stores[0];
   if (!info->ops[idx].base_addr
+      || info->ops[idx].bit_not_p != infof->ops[idx].bit_not_p
       || (info->ops[idx].bitpos - infof->ops[idx].bitpos
          != info->bitpos - infof->bitpos)
       || !operand_equal_p (info->ops[idx].base_addr,
@@ -1755,6 +1732,19 @@ imm_store_chain_info::output_merged_stor
                      gimple_seq_add_stmt_without_update (&seq, stmt);
                    }
                  ops[j] = gimple_assign_lhs (stmt);
+                 if (op.bit_not_p)
+                   {
+                     stmt = gimple_build_assign (make_ssa_name (int_type),
+                                                 BIT_NOT_EXPR, ops[j]);
+                     gimple_set_location (stmt, load_loc);
+                     ops[j] = gimple_assign_lhs (stmt);
+
+                     if (gsi_bb (load_gsi[j]))
+                       gimple_seq_add_stmt_without_update (&load_seq[j],
+                                                           stmt);
+                     else
+                       gimple_seq_add_stmt_without_update (&seq, stmt);
+                   }
                }
              else
                ops[j] = native_interpret_expr (int_type,
@@ -2100,9 +2090,23 @@ handled_load (gimple *stmt, store_operan
              unsigned HOST_WIDE_INT bitregion_start,
              unsigned HOST_WIDE_INT bitregion_end)
 {
-  if (!is_gimple_assign (stmt) || !gimple_vuse (stmt))
+  if (!is_gimple_assign (stmt))
     return false;
-  if (gimple_assign_load_p (stmt)
+  if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
+    {
+      tree rhs1 = gimple_assign_rhs1 (stmt);
+      if (TREE_CODE (rhs1) == SSA_NAME
+         && has_single_use (rhs1)
+         && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
+                          bitregion_start, bitregion_end))
+       {
+         op->bit_not_p = !op->bit_not_p;
+         return true;
+       }
+      return false;
+    }
+  if (gimple_vuse (stmt)
+      && gimple_assign_load_p (stmt)
       && !stmt_can_throw_internal (stmt)
       && !gimple_has_volatile_ops (stmt))
     {
@@ -2119,6 +2123,7 @@ handled_load (gimple *stmt, store_operan
        {
          op->stmt = stmt;
          op->val = mem;
+         op->bit_not_p = false;
          return true;
        }
     }
@@ -2202,16 +2207,16 @@ pass_store_merging::process_store (gimpl
          }
     }
 
-  struct imm_store_chain_info **chain_info = NULL;
-  if (base_addr)
-    chain_info = m_stores.get (base_addr);
-
   if (invalid)
     {
-      terminate_all_aliasing_chains (chain_info, stmt);
+      terminate_all_aliasing_chains (NULL, stmt);
       return;
     }
 
+  struct imm_store_chain_info **chain_info = NULL;
+  if (base_addr)
+    chain_info = m_stores.get (base_addr);
+
   store_immediate_info *info;
   if (chain_info)
     {
@@ -2225,6 +2230,7 @@ pass_store_merging::process_store (gimpl
          print_gimple_stmt (dump_file, stmt, 0);
        }
       (*chain_info)->m_store_info.safe_push (info);
+      terminate_all_aliasing_chains (chain_info, stmt);
       /* If we reach the limit of stores to merge in a chain terminate and
         process the chain now.  */
       if ((*chain_info)->m_store_info.length ()
@@ -2239,7 +2245,7 @@ pass_store_merging::process_store (gimpl
     }
 
   /* Store aliases any existing chain?  */
-  terminate_all_aliasing_chains (chain_info, stmt);
+  terminate_all_aliasing_chains (NULL, stmt);
   /* Start a new chain.  */
   struct imm_store_chain_info *new_chain
     = new imm_store_chain_info (m_stores_head, base_addr);
--- gcc/testsuite/gcc.dg/store_merging_2.c.jj   2017-11-06 08:46:29.996607515 
+0100
+++ gcc/testsuite/gcc.dg/store_merging_2.c      2017-11-06 08:51:20.347926470 
+0100
@@ -77,4 +77,4 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "Merging successful" 2 "store-merging" } 
} */
+/* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging" } 
} */
--- gcc/testsuite/gcc.dg/store_merging_13.c.jj  2017-11-06 08:46:29.962607947 
+0100
+++ gcc/testsuite/gcc.dg/store_merging_13.c     2017-11-06 08:51:20.347926470 
+0100
@@ -104,6 +104,90 @@ f6 (struct S *p, struct S *q)
   p->g = pg;
 }
 
+__attribute__((noipa)) void
+f7 (struct S *__restrict p, struct S *__restrict q)
+{
+  p->a |= q->a;
+  p->b |= q->b;
+  p->c |= q->c;
+  p->d |= q->d;
+  p->e |= q->e;
+  p->f |= q->f;
+  p->g |= q->g;
+}
+
+__attribute__((noipa)) void
+f8 (struct S *__restrict p, struct S *__restrict q)
+{
+  p->a &= q->a;
+  p->b &= q->b;
+  p->c &= q->c;
+  p->d &= q->d;
+  p->e &= q->e;
+  p->f &= q->f;
+  p->g &= q->g;
+}
+
+__attribute__((noipa)) void
+f9 (struct S *__restrict p, struct S *__restrict q)
+{
+  p->a ^= q->a;
+  p->b ^= q->b;
+  p->c ^= q->c;
+  p->d ^= q->d;
+  p->e ^= q->e;
+  p->f ^= q->f;
+  p->g ^= q->g;
+}
+
+__attribute__((noipa)) void
+f10 (struct S *__restrict p, struct S *__restrict q)
+{
+  p->a = ~q->a;
+  p->b = ~q->b;
+  p->c = ~q->c;
+  p->d = ~q->d;
+  p->e = ~q->e;
+  p->f = ~q->f;
+  p->g = ~q->g;
+}
+
+__attribute__((noipa)) void
+f11 (struct S *__restrict p, struct S *__restrict q)
+{
+  p->a = p->a | (unsigned char) ~q->a;
+  p->b = p->b | (unsigned char) ~q->b;
+  p->c = p->c | (unsigned short) ~q->c;
+  p->d = p->d | (unsigned char) ~q->d;
+  p->e = p->e | (unsigned char) ~q->e;
+  p->f = p->f | (unsigned char) ~q->f;
+  p->g = p->g | (unsigned char) ~q->g;
+}
+
+__attribute__((noipa)) void
+f12 (struct S *__restrict p, struct S *__restrict q)
+{
+  p->a = p->a & (unsigned char) ~q->a;
+  p->b = p->b & (unsigned char) ~q->b;
+  p->c = p->c & (unsigned short) ~q->c;
+  p->d = p->d & (unsigned char) ~q->d;
+  p->e = p->e & (unsigned char) ~q->e;
+  p->f = p->f & (unsigned char) ~q->f;
+  p->g = p->g & (unsigned char) ~q->g;
+}
+
+__attribute__((noipa)) void
+f13 (struct S *__restrict p, struct S *__restrict q)
+{
+  p->a = p->a ^ (unsigned char) ~q->a;
+  p->b = p->b ^ (unsigned char) ~q->b;
+  p->c = p->c ^ (unsigned short) ~q->c;
+  p->d = p->d ^ (unsigned char) ~q->d;
+  p->e = p->e ^ (unsigned char) ~q->e;
+  p->f = p->f ^ (unsigned char) ~q->f;
+  p->g = p->g ^ (unsigned char) ~q->g;
+}
+
 struct S s = { 20, 21, 22, 23, 24, 25, 26, 27 };
 struct S t = { 0x71, 0x72, 0x7f04, 0x78, 0x31, 0x32, 0x34, 
0xf1f2f3f4f5f6f7f8ULL };
 struct S u = { 28, 29, 30, 31, 32, 33, 34, 35 };
@@ -151,7 +235,62 @@ main ()
       || s.e != (40 ^ 0x31) || s.f != (41 ^ 0x32)
       || s.g != (42 ^ 0x34) || s.h != 27)
     __builtin_abort ();
+  f3 (&s, &v);
+  f7 (&s, &t);
+  asm volatile ("" : : : "memory");
+  if (s.a != (36 | 0x71) || s.b != (37 | 0x72)
+      || s.c != (38 | 0x7f04) || s.d != (39 | 0x78)
+      || s.e != (40 | 0x31) || s.f != (41 | 0x32)
+      || s.g != (42 | 0x34) || s.h != 27)
+    __builtin_abort ();
+  f3 (&s, &u);
+  f8 (&s, &t);
+  asm volatile ("" : : : "memory");
+  if (s.a != (28 & 0x71) || s.b != (29 & 0x72)
+      || s.c != (30 & 0x7f04) || s.d != (31 & 0x78)
+      || s.e != (32 & 0x31) || s.f != (33 & 0x32)
+      || s.g != (34 & 0x34) || s.h != 27)
+    __builtin_abort ();
+  f2 (&s, &v);
+  f9 (&s, &t);
+  asm volatile ("" : : : "memory");
+  if (s.a != (36 ^ 0x71) || s.b != (37 ^ 0x72)
+      || s.c != (38 ^ 0x7f04) || s.d != (39 ^ 0x78)
+      || s.e != (40 ^ 0x31) || s.f != (41 ^ 0x32)
+      || s.g != (42 ^ 0x34) || s.h != 27)
+    __builtin_abort ();
+  f10 (&s, &u);
+  asm volatile ("" : : : "memory");
+  if (s.a != (unsigned char) ~28 || s.b != (unsigned char) ~29
+      || s.c != (unsigned short) ~30 || s.d != (unsigned char) ~31
+      || s.e != (unsigned char) ~32 || s.f != (unsigned char) ~33
+      || s.g != (unsigned char) ~34 || s.h != 27)
+    __builtin_abort ();
+  f3 (&s, &v);
+  f11 (&s, &t);
+  asm volatile ("" : : : "memory");
+  if (s.a != (36 | (unsigned char) ~0x71) || s.b != (37 | (unsigned char) 
~0x72)
+      || s.c != (38 | (unsigned short) ~0x7f04) || s.d != (39 | (unsigned 
char) ~0x78)
+      || s.e != (40 | (unsigned char) ~0x31) || s.f != (41 | (unsigned char) 
~0x32)
+      || s.g != (42 | (unsigned char) ~0x34) || s.h != 27)
+    __builtin_abort ();
+  f3 (&s, &u);
+  f12 (&s, &t);
+  asm volatile ("" : : : "memory");
+  if (s.a != (28 & (unsigned char) ~0x71) || s.b != (29 & (unsigned char) 
~0x72)
+      || s.c != (30 & (unsigned short) ~0x7f04) || s.d != (31 & (unsigned 
char) ~0x78)
+      || s.e != (32 & (unsigned char) ~0x31) || s.f != (33 & (unsigned char) 
~0x32)
+      || s.g != (34 & (unsigned char) ~0x34) || s.h != 27)
+    __builtin_abort ();
+  f2 (&s, &v);
+  f13 (&s, &t);
+  asm volatile ("" : : : "memory");
+  if (s.a != (36 ^ (unsigned char) ~0x71) || s.b != (37 ^ (unsigned char) 
~0x72)
+      || s.c != (38 ^ (unsigned short) ~0x7f04) || s.d != (39 ^ (unsigned 
char) ~0x78)
+      || s.e != (40 ^ (unsigned char) ~0x31) || s.f != (41 ^ (unsigned char) 
~0x32)
+      || s.g != (42 ^ (unsigned char) ~0x34) || s.h != 27)
+    __builtin_abort ();
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "Merging successful" 6 "store-merging" } 
} */
+/* { dg-final { scan-tree-dump-times "Merging successful" 13 "store-merging" } 
} */
--- gcc/testsuite/gcc.dg/store_merging_14.c.jj  2017-11-06 08:46:29.919608492 
+0100
+++ gcc/testsuite/gcc.dg/store_merging_14.c     2017-11-06 08:51:20.347926470 
+0100
@@ -104,6 +104,42 @@ f6 (struct S *p, struct S *q)
   p->g = pg;
 }
 
+__attribute__((noipa)) void
+f7 (struct S *__restrict p, struct S *__restrict q)
+{
+  p->a |= q->a;
+  p->b |= q->b;
+  p->c |= q->c;
+  p->d |= q->d;
+  p->e |= q->e;
+  p->f |= q->f;
+  p->g |= q->g;
+}
+
+__attribute__((noipa)) void
+f8 (struct S *__restrict p, struct S *__restrict q)
+{
+  p->a &= q->a;
+  p->b &= q->b;
+  p->c &= q->c;
+  p->d &= q->d;
+  p->e &= q->e;
+  p->f &= q->f;
+  p->g &= q->g;
+}
+
+__attribute__((noipa)) void
+f9 (struct S *__restrict p, struct S *__restrict q)
+{
+  p->a ^= q->a;
+  p->b ^= q->b;
+  p->c ^= q->c;
+  p->d ^= q->d;
+  p->e ^= q->e;
+  p->f ^= q->f;
+  p->g ^= q->g;
+}
+
 struct S s = { 72, 20, 21, 73, 22, 23, 24, 25, 26, 74, 27 };
 struct S t = { 75, 0x71, 0x72, 76, 0x7f04, 0x78, 0x31, 0x32, 0x34, 77, 
0xf1f2f3f4f5f6f7f8ULL };
 struct S u = { 78, 28, 29, 79, 30, 31, 32, 33, 34, 80, 35 };
@@ -151,7 +187,31 @@ main ()
       || s.e != (40 ^ 0x31) || s.f != (41 ^ 0x32)
       || s.g != (42 ^ 0x34) || s.k != 74 || s.h != 27)
     __builtin_abort ();
+  f3 (&s, &v);
+  f7 (&s, &t);
+  asm volatile ("" : : : "memory");
+  if (s.i != 72 || s.a != (36 | 0x71) || s.b != (37 | 0x72) || s.j != 73
+      || s.c != (38 | 0x7f04) || s.d != (39 | 0x78)
+      || s.e != (40 | 0x31) || s.f != (41 | 0x32)
+      || s.g != (42 | 0x34) || s.k != 74 || s.h != 27)
+    __builtin_abort ();
+  f3 (&s, &u);
+  f8 (&s, &t);
+  asm volatile ("" : : : "memory");
+  if (s.i != 72 || s.a != (28 & 0x71) || s.b != (29 & 0x72) || s.j != 73
+      || s.c != (30 & 0x7f04) || s.d != (31 & 0x78)
+      || s.e != (32 & 0x31) || s.f != (33 & 0x32)
+      || s.g != (34 & 0x34) || s.k != 74 || s.h != 27)
+    __builtin_abort ();
+  f2 (&s, &v);
+  f9 (&s, &t);
+  asm volatile ("" : : : "memory");
+  if (s.i != 72 || s.a != (36 ^ 0x71) || s.b != (37 ^ 0x72) || s.j != 73
+      || s.c != (38 ^ 0x7f04) || s.d != (39 ^ 0x78)
+      || s.e != (40 ^ 0x31) || s.f != (41 ^ 0x32)
+      || s.g != (42 ^ 0x34) || s.k != 74 || s.h != 27)
+    __builtin_abort ();
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "Merging successful" 6 "store-merging" } 
} */
+/* { dg-final { scan-tree-dump-times "Merging successful" 9 "store-merging" } 
} */

        Jakub

Reply via email to