Hi!

My PR60663 fix unfortunately stopped CSE of all inline-asms, even when
they e.g. only have the clobbers added by default.

This patch attempts to restore the old behavior, with the exceptions:
1) as always, asm volatile is not CSEd
2) inline-asm with multiple outputs are not CSEd
3) on request from Richard (which Segher on IRC argues against), "memory"
   clobber also prevents CSE; this can be removed by removing the
   int j, lim = XVECLEN (x, 0); and loop below it
4) inline-asm with clobbers is never copied into an insn that wasn't
   inline-asm before, so if there are clobbers, we allow CSEing of
   e.g. two same inline-asms, but only by reusing results of one
   of those

Bootstrapped/regtested on x86_64-linux and i686-linux, tested also
with arm cross after reverting the PR60663 arm cost fix.

Ok for trunk this way, or with 3) removed?

2015-01-13  Jakub Jelinek  <ja...@redhat.com>

        PR rtl-optimization/63637
        PR rtl-optimization/60663
        * cse.c (merge_equiv_classes): Set new_elt->cost to MAX_COST
        if elt->cost is MAX_COST for ASM_OPERANDS.
        (find_sets_in_insn): Fix up comment typo.
        (cse_insn): Don't set src_volatile for all non-volatile
        ASM_OPERANDS in PARALLELs, but just those with multiple outputs
        or with "memory" clobber.  Set elt->cost to MAX_COST
        for ASM_OPERANDS in PARALLEL.  Set src_elt->cost to MAX_COST
        if new_src is ASM_OPERANDS and elt->cost is MAX_COST.

        * gcc.dg/pr63637-1.c: New test.
        * gcc.dg/pr63637-2.c: New test.
        * gcc.dg/pr63637-3.c: New test.
        * gcc.dg/pr63637-4.c: New test.
        * gcc.dg/pr63637-5.c: New test.
        * gcc.dg/pr63637-6.c: New test.
        * gcc.target/i386/pr63637-1.c: New test.
        * gcc.target/i386/pr63637-2.c: New test.
        * gcc.target/i386/pr63637-3.c: New test.
        * gcc.target/i386/pr63637-4.c: New test.
        * gcc.target/i386/pr63637-5.c: New test.
        * gcc.target/i386/pr63637-6.c: New test.

--- gcc/cse.c.jj        2015-01-09 21:59:44.000000000 +0100
+++ gcc/cse.c   2015-01-13 13:26:23.391216064 +0100
@@ -1792,6 +1792,8 @@ merge_equiv_classes (struct table_elt *c
            }
          new_elt = insert (exp, class1, hash, mode);
          new_elt->in_memory = hash_arg_in_memory;
+         if (GET_CODE (exp) == ASM_OPERANDS && elt->cost == MAX_COST)
+           new_elt->cost = MAX_COST;
        }
     }
 }
@@ -4258,7 +4260,7 @@ find_sets_in_insn (rtx_insn *insn, struc
     {
       int i, lim = XVECLEN (x, 0);
 
-      /* Go over the epressions of the PARALLEL in forward order, to
+      /* Go over the expressions of the PARALLEL in forward order, to
         put them in the same order in the SETS array.  */
       for (i = 0; i < lim; i++)
        {
@@ -4634,12 +4636,27 @@ cse_insn (rtx_insn *insn)
          && REGNO (dest) >= FIRST_PSEUDO_REGISTER)
        sets[i].src_volatile = 1;
 
-      /* Also do not record result of a non-volatile inline asm with
-        more than one result or with clobbers, we do not want CSE to
-        break the inline asm apart.  */
       else if (GET_CODE (src) == ASM_OPERANDS
               && GET_CODE (x) == PARALLEL)
-       sets[i].src_volatile = 1;
+       {
+         /* Do not record result of a non-volatile inline asm with
+            more than one result.  */
+         if (n_sets > 1)
+           sets[i].src_volatile = 1;
+
+         int j, lim = XVECLEN (x, 0);
+         for (j = 0; j < lim; j++)
+           {
+             rtx y = XVECEXP (x, 0, j);
+             /* And do not record result of a non-volatile inline asm
+                with "memory" clobber.  */
+             if (GET_CODE (y) == CLOBBER && MEM_P (XEXP (y, 0)))
+               {
+                 sets[i].src_volatile = 1;
+                 break;
+               }
+           }
+       }
 
 #if 0
       /* It is no longer clear why we used to do this, but it doesn't
@@ -5230,8 +5247,8 @@ cse_insn (rtx_insn *insn)
            ;
 
          /* Look for a substitution that makes a valid insn.  */
-         else if (validate_unshare_change
-                    (insn, &SET_SRC (sets[i].rtl), trial, 0))
+         else if (validate_unshare_change (insn, &SET_SRC (sets[i].rtl),
+                                           trial, 0))
            {
              rtx new_rtx = canon_reg (SET_SRC (sets[i].rtl), insn);
 
@@ -5593,6 +5610,12 @@ cse_insn (rtx_insn *insn)
                  }
                elt = insert (src, classp, sets[i].src_hash, mode);
                elt->in_memory = sets[i].src_in_memory;
+               /* If inline asm has any clobbers, ensure we only reuse
+                  existing inline asms and never try to put the ASM_OPERANDS
+                  into an insn that isn't inline asm.  */
+               if (GET_CODE (src) == ASM_OPERANDS
+                   && GET_CODE (x) == PARALLEL)
+                 elt->cost = MAX_COST;
                sets[i].src_elt = classp = elt;
              }
            if (sets[i].src_const && sets[i].src_const_elt == 0
@@ -5906,6 +5929,9 @@ cse_insn (rtx_insn *insn)
                      }
                    src_elt = insert (new_src, classp, src_hash, new_mode);
                    src_elt->in_memory = elt->in_memory;
+                   if (GET_CODE (new_src) == ASM_OPERANDS
+                       && elt->cost == MAX_COST)
+                     src_elt->cost = MAX_COST;
                  }
                else if (classp && classp != src_elt->first_same_value)
                  /* Show that two things that we've seen before are
--- gcc/testsuite/gcc.dg/pr63637-1.c.jj 2015-01-13 13:40:56.385782037 +0100
+++ gcc/testsuite/gcc.dg/pr63637-1.c    2015-01-13 13:41:08.931559978 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+  int a, b, c;
+  asm ("# Magic instruction" : "=r" (a));
+  asm ("# Magic instruction" : "=r" (b));
+  asm ("# Magic instruction" : "=r" (c));
+  return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 1 } } */
--- gcc/testsuite/gcc.dg/pr63637-2.c.jj 2015-01-13 13:41:36.967063752 +0100
+++ gcc/testsuite/gcc.dg/pr63637-2.c    2015-01-13 13:42:04.758571844 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+  int a, b, c;
+  asm ("# Magic instruction" : "=r" (a) : "r" (0));
+  asm ("# Magic instruction" : "=r" (b) : "r" (0));
+  asm ("# Magic instruction" : "=r" (c) : "r" (0));
+  return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 1 } } */
--- gcc/testsuite/gcc.dg/pr63637-3.c.jj 2015-01-13 13:43:58.820552956 +0100
+++ gcc/testsuite/gcc.dg/pr63637-3.c    2015-01-13 13:44:21.702147954 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+  int a, b, c;
+  asm ("# Magic instruction" : "=r" (a) : : "memory");
+  asm ("# Magic instruction" : "=r" (b) : : "memory");
+  asm ("# Magic instruction" : "=r" (c) : : "memory");
+  return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */
--- gcc/testsuite/gcc.dg/pr63637-4.c.jj 2015-01-13 13:44:01.624503326 +0100
+++ gcc/testsuite/gcc.dg/pr63637-4.c    2015-01-13 13:44:44.220749376 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+  int a, b, c;
+  asm ("# Magic instruction" : "=r" (a) : "r" (0) : "memory");
+  asm ("# Magic instruction" : "=r" (b) : "r" (0) : "memory");
+  asm ("# Magic instruction" : "=r" (c) : "r" (0) : "memory");
+  return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */
--- gcc/testsuite/gcc.dg/pr63637-5.c.jj 2015-01-13 13:46:36.837756064 +0100
+++ gcc/testsuite/gcc.dg/pr63637-5.c    2015-01-13 13:47:01.461320229 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+  int a, b, c, d, e, f;
+  asm ("# Magic instruction" : "=r" (a), "=r" (d));
+  asm ("# Magic instruction" : "=r" (b), "=r" (e));
+  asm ("# Magic instruction" : "=r" (c), "=r" (f));
+  return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */
--- gcc/testsuite/gcc.dg/pr63637-6.c.jj 2015-01-13 13:46:39.834703018 +0100
+++ gcc/testsuite/gcc.dg/pr63637-6.c    2015-01-13 13:47:27.915851986 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+  int a, b, c, d, e, f;
+  asm ("# Magic instruction" : "=r" (a), "=r" (d) : "r" (0));
+  asm ("# Magic instruction" : "=r" (b), "=r" (e) : "r" (0));
+  asm ("# Magic instruction" : "=r" (c), "=r" (f) : "r" (0));
+  return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */
--- gcc/testsuite/gcc.target/i386/pr63637-1.c.jj        2015-01-13 
13:40:13.996531691 +0100
+++ gcc/testsuite/gcc.target/i386/pr63637-1.c   2015-01-13 13:42:37.945984430 
+0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+  int a, b, c;
+  asm ("# Magic instruction" : "=r" (a) : : "eax");
+  asm ("# Magic instruction" : "=r" (b) : : "edx");
+  asm ("# Magic instruction" : "=r" (c) : : "ecx");
+  return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 1 } } */
--- gcc/testsuite/gcc.target/i386/pr63637-2.c.jj        2015-01-13 
13:42:12.557433805 +0100
+++ gcc/testsuite/gcc.target/i386/pr63637-2.c   2015-01-13 13:42:30.656113460 
+0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+  int a, b, c;
+  asm ("# Magic instruction" : "=r" (a) : "r" (0) : "eax");
+  asm ("# Magic instruction" : "=r" (b) : "r" (0) : "edx");
+  asm ("# Magic instruction" : "=r" (c) : "r" (0) : "ecx");
+  return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 1 } } */
--- gcc/testsuite/gcc.target/i386/pr63637-3.c.jj        2015-01-13 
13:43:06.407480663 +0100
+++ gcc/testsuite/gcc.target/i386/pr63637-3.c   2015-01-13 13:43:28.600087856 
+0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+  int a, b, c;
+  asm ("# Magic instruction" : "=r" (a) : : "eax", "memory");
+  asm ("# Magic instruction" : "=r" (b) : : "edx", "memory");
+  asm ("# Magic instruction" : "=r" (c) : : "ecx", "memory");
+  return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */
--- gcc/testsuite/gcc.target/i386/pr63637-4.c.jj        2015-01-13 
13:43:09.505425830 +0100
+++ gcc/testsuite/gcc.target/i386/pr63637-4.c   2015-01-13 13:43:44.769801653 
+0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+  int a, b, c;
+  asm ("# Magic instruction" : "=r" (a) : "r" (0) : "eax", "memory");
+  asm ("# Magic instruction" : "=r" (b) : "r" (0) : "edx", "memory");
+  asm ("# Magic instruction" : "=r" (c) : "r" (0) : "ecx", "memory");
+  return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */
--- gcc/testsuite/gcc.target/i386/pr63637-5.c.jj        2015-01-13 
13:45:38.747784252 +0100
+++ gcc/testsuite/gcc.target/i386/pr63637-5.c   2015-01-13 13:45:34.350862077 
+0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+  int a, b, c, d, e, f;
+  asm ("# Magic instruction" : "=r" (a), "=r" (d) : : "eax");
+  asm ("# Magic instruction" : "=r" (b), "=r" (e) : : "edx");
+  asm ("# Magic instruction" : "=r" (c), "=r" (f) : : "ecx");
+  return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */
--- gcc/testsuite/gcc.target/i386/pr63637-6.c.jj        2015-01-13 
13:45:54.923497943 +0100
+++ gcc/testsuite/gcc.target/i386/pr63637-6.c   2015-01-13 13:46:23.965983893 
+0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+  int a, b, c, d, e, f;
+  asm ("# Magic instruction" : "=r" (a), "=r" (d) : "r" (0) : "eax");
+  asm ("# Magic instruction" : "=r" (b), "=r" (e) : "r" (0) : "edx");
+  asm ("# Magic instruction" : "=r" (c), "=r" (f) : "r" (0) : "ecx");
+  return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */

        Jakub

Reply via email to