Hi! My PR60663 fix unfortunately stopped CSE of all inline-asms, even when they e.g. only have the clobbers added by default.
This patch attempts to restore the old behavior, with the exceptions: 1) as always, asm volatile is not CSEd 2) inline-asm with multiple outputs are not CSEd 3) on request from Richard (which Segher on IRC argues against), "memory" clobber also prevents CSE; this can be removed by removing the int j, lim = XVECLEN (x, 0); and loop below it 4) inline-asm with clobbers is never copied into an insn that wasn't inline-asm before, so if there are clobbers, we allow CSEing of e.g. two same inline-asms, but only by reusing results of one of those Bootstrapped/regtested on x86_64-linux and i686-linux, tested also with arm cross after reverting the PR60663 arm cost fix. Ok for trunk this way, or with 3) removed? 2015-01-13 Jakub Jelinek <ja...@redhat.com> PR rtl-optimization/63637 PR rtl-optimization/60663 * cse.c (merge_equiv_classes): Set new_elt->cost to MAX_COST if elt->cost is MAX_COST for ASM_OPERANDS. (find_sets_in_insn): Fix up comment typo. (cse_insn): Don't set src_volatile for all non-volatile ASM_OPERANDS in PARALLELs, but just those with multiple outputs or with "memory" clobber. Set elt->cost to MAX_COST for ASM_OPERANDS in PARALLEL. Set src_elt->cost to MAX_COST if new_src is ASM_OPERANDS and elt->cost is MAX_COST. * gcc.dg/pr63637-1.c: New test. * gcc.dg/pr63637-2.c: New test. * gcc.dg/pr63637-3.c: New test. * gcc.dg/pr63637-4.c: New test. * gcc.dg/pr63637-5.c: New test. * gcc.dg/pr63637-6.c: New test. * gcc.target/i386/pr63637-1.c: New test. * gcc.target/i386/pr63637-2.c: New test. * gcc.target/i386/pr63637-3.c: New test. * gcc.target/i386/pr63637-4.c: New test. * gcc.target/i386/pr63637-5.c: New test. * gcc.target/i386/pr63637-6.c: New test. --- gcc/cse.c.jj 2015-01-09 21:59:44.000000000 +0100 +++ gcc/cse.c 2015-01-13 13:26:23.391216064 +0100 @@ -1792,6 +1792,8 @@ merge_equiv_classes (struct table_elt *c } new_elt = insert (exp, class1, hash, mode); new_elt->in_memory = hash_arg_in_memory; + if (GET_CODE (exp) == ASM_OPERANDS && elt->cost == MAX_COST) + new_elt->cost = MAX_COST; } } } @@ -4258,7 +4260,7 @@ find_sets_in_insn (rtx_insn *insn, struc { int i, lim = XVECLEN (x, 0); - /* Go over the epressions of the PARALLEL in forward order, to + /* Go over the expressions of the PARALLEL in forward order, to put them in the same order in the SETS array. */ for (i = 0; i < lim; i++) { @@ -4634,12 +4636,27 @@ cse_insn (rtx_insn *insn) && REGNO (dest) >= FIRST_PSEUDO_REGISTER) sets[i].src_volatile = 1; - /* Also do not record result of a non-volatile inline asm with - more than one result or with clobbers, we do not want CSE to - break the inline asm apart. */ else if (GET_CODE (src) == ASM_OPERANDS && GET_CODE (x) == PARALLEL) - sets[i].src_volatile = 1; + { + /* Do not record result of a non-volatile inline asm with + more than one result. */ + if (n_sets > 1) + sets[i].src_volatile = 1; + + int j, lim = XVECLEN (x, 0); + for (j = 0; j < lim; j++) + { + rtx y = XVECEXP (x, 0, j); + /* And do not record result of a non-volatile inline asm + with "memory" clobber. */ + if (GET_CODE (y) == CLOBBER && MEM_P (XEXP (y, 0))) + { + sets[i].src_volatile = 1; + break; + } + } + } #if 0 /* It is no longer clear why we used to do this, but it doesn't @@ -5230,8 +5247,8 @@ cse_insn (rtx_insn *insn) ; /* Look for a substitution that makes a valid insn. */ - else if (validate_unshare_change - (insn, &SET_SRC (sets[i].rtl), trial, 0)) + else if (validate_unshare_change (insn, &SET_SRC (sets[i].rtl), + trial, 0)) { rtx new_rtx = canon_reg (SET_SRC (sets[i].rtl), insn); @@ -5593,6 +5610,12 @@ cse_insn (rtx_insn *insn) } elt = insert (src, classp, sets[i].src_hash, mode); elt->in_memory = sets[i].src_in_memory; + /* If inline asm has any clobbers, ensure we only reuse + existing inline asms and never try to put the ASM_OPERANDS + into an insn that isn't inline asm. */ + if (GET_CODE (src) == ASM_OPERANDS + && GET_CODE (x) == PARALLEL) + elt->cost = MAX_COST; sets[i].src_elt = classp = elt; } if (sets[i].src_const && sets[i].src_const_elt == 0 @@ -5906,6 +5929,9 @@ cse_insn (rtx_insn *insn) } src_elt = insert (new_src, classp, src_hash, new_mode); src_elt->in_memory = elt->in_memory; + if (GET_CODE (new_src) == ASM_OPERANDS + && elt->cost == MAX_COST) + src_elt->cost = MAX_COST; } else if (classp && classp != src_elt->first_same_value) /* Show that two things that we've seen before are --- gcc/testsuite/gcc.dg/pr63637-1.c.jj 2015-01-13 13:40:56.385782037 +0100 +++ gcc/testsuite/gcc.dg/pr63637-1.c 2015-01-13 13:41:08.931559978 +0100 @@ -0,0 +1,15 @@ +/* PR rtl-optimization/63637 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int +foo (void) +{ + int a, b, c; + asm ("# Magic instruction" : "=r" (a)); + asm ("# Magic instruction" : "=r" (b)); + asm ("# Magic instruction" : "=r" (c)); + return a + b + c; +} + +/* { dg-final { scan-assembler-times "# Magic instruction" 1 } } */ --- gcc/testsuite/gcc.dg/pr63637-2.c.jj 2015-01-13 13:41:36.967063752 +0100 +++ gcc/testsuite/gcc.dg/pr63637-2.c 2015-01-13 13:42:04.758571844 +0100 @@ -0,0 +1,15 @@ +/* PR rtl-optimization/63637 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int +foo (void) +{ + int a, b, c; + asm ("# Magic instruction" : "=r" (a) : "r" (0)); + asm ("# Magic instruction" : "=r" (b) : "r" (0)); + asm ("# Magic instruction" : "=r" (c) : "r" (0)); + return a + b + c; +} + +/* { dg-final { scan-assembler-times "# Magic instruction" 1 } } */ --- gcc/testsuite/gcc.dg/pr63637-3.c.jj 2015-01-13 13:43:58.820552956 +0100 +++ gcc/testsuite/gcc.dg/pr63637-3.c 2015-01-13 13:44:21.702147954 +0100 @@ -0,0 +1,15 @@ +/* PR rtl-optimization/63637 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int +foo (void) +{ + int a, b, c; + asm ("# Magic instruction" : "=r" (a) : : "memory"); + asm ("# Magic instruction" : "=r" (b) : : "memory"); + asm ("# Magic instruction" : "=r" (c) : : "memory"); + return a + b + c; +} + +/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */ --- gcc/testsuite/gcc.dg/pr63637-4.c.jj 2015-01-13 13:44:01.624503326 +0100 +++ gcc/testsuite/gcc.dg/pr63637-4.c 2015-01-13 13:44:44.220749376 +0100 @@ -0,0 +1,15 @@ +/* PR rtl-optimization/63637 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int +foo (void) +{ + int a, b, c; + asm ("# Magic instruction" : "=r" (a) : "r" (0) : "memory"); + asm ("# Magic instruction" : "=r" (b) : "r" (0) : "memory"); + asm ("# Magic instruction" : "=r" (c) : "r" (0) : "memory"); + return a + b + c; +} + +/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */ --- gcc/testsuite/gcc.dg/pr63637-5.c.jj 2015-01-13 13:46:36.837756064 +0100 +++ gcc/testsuite/gcc.dg/pr63637-5.c 2015-01-13 13:47:01.461320229 +0100 @@ -0,0 +1,15 @@ +/* PR rtl-optimization/63637 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int +foo (void) +{ + int a, b, c, d, e, f; + asm ("# Magic instruction" : "=r" (a), "=r" (d)); + asm ("# Magic instruction" : "=r" (b), "=r" (e)); + asm ("# Magic instruction" : "=r" (c), "=r" (f)); + return a + b + c; +} + +/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */ --- gcc/testsuite/gcc.dg/pr63637-6.c.jj 2015-01-13 13:46:39.834703018 +0100 +++ gcc/testsuite/gcc.dg/pr63637-6.c 2015-01-13 13:47:27.915851986 +0100 @@ -0,0 +1,15 @@ +/* PR rtl-optimization/63637 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int +foo (void) +{ + int a, b, c, d, e, f; + asm ("# Magic instruction" : "=r" (a), "=r" (d) : "r" (0)); + asm ("# Magic instruction" : "=r" (b), "=r" (e) : "r" (0)); + asm ("# Magic instruction" : "=r" (c), "=r" (f) : "r" (0)); + return a + b + c; +} + +/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */ --- gcc/testsuite/gcc.target/i386/pr63637-1.c.jj 2015-01-13 13:40:13.996531691 +0100 +++ gcc/testsuite/gcc.target/i386/pr63637-1.c 2015-01-13 13:42:37.945984430 +0100 @@ -0,0 +1,15 @@ +/* PR rtl-optimization/63637 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int +foo (void) +{ + int a, b, c; + asm ("# Magic instruction" : "=r" (a) : : "eax"); + asm ("# Magic instruction" : "=r" (b) : : "edx"); + asm ("# Magic instruction" : "=r" (c) : : "ecx"); + return a + b + c; +} + +/* { dg-final { scan-assembler-times "# Magic instruction" 1 } } */ --- gcc/testsuite/gcc.target/i386/pr63637-2.c.jj 2015-01-13 13:42:12.557433805 +0100 +++ gcc/testsuite/gcc.target/i386/pr63637-2.c 2015-01-13 13:42:30.656113460 +0100 @@ -0,0 +1,15 @@ +/* PR rtl-optimization/63637 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int +foo (void) +{ + int a, b, c; + asm ("# Magic instruction" : "=r" (a) : "r" (0) : "eax"); + asm ("# Magic instruction" : "=r" (b) : "r" (0) : "edx"); + asm ("# Magic instruction" : "=r" (c) : "r" (0) : "ecx"); + return a + b + c; +} + +/* { dg-final { scan-assembler-times "# Magic instruction" 1 } } */ --- gcc/testsuite/gcc.target/i386/pr63637-3.c.jj 2015-01-13 13:43:06.407480663 +0100 +++ gcc/testsuite/gcc.target/i386/pr63637-3.c 2015-01-13 13:43:28.600087856 +0100 @@ -0,0 +1,15 @@ +/* PR rtl-optimization/63637 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int +foo (void) +{ + int a, b, c; + asm ("# Magic instruction" : "=r" (a) : : "eax", "memory"); + asm ("# Magic instruction" : "=r" (b) : : "edx", "memory"); + asm ("# Magic instruction" : "=r" (c) : : "ecx", "memory"); + return a + b + c; +} + +/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */ --- gcc/testsuite/gcc.target/i386/pr63637-4.c.jj 2015-01-13 13:43:09.505425830 +0100 +++ gcc/testsuite/gcc.target/i386/pr63637-4.c 2015-01-13 13:43:44.769801653 +0100 @@ -0,0 +1,15 @@ +/* PR rtl-optimization/63637 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int +foo (void) +{ + int a, b, c; + asm ("# Magic instruction" : "=r" (a) : "r" (0) : "eax", "memory"); + asm ("# Magic instruction" : "=r" (b) : "r" (0) : "edx", "memory"); + asm ("# Magic instruction" : "=r" (c) : "r" (0) : "ecx", "memory"); + return a + b + c; +} + +/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */ --- gcc/testsuite/gcc.target/i386/pr63637-5.c.jj 2015-01-13 13:45:38.747784252 +0100 +++ gcc/testsuite/gcc.target/i386/pr63637-5.c 2015-01-13 13:45:34.350862077 +0100 @@ -0,0 +1,15 @@ +/* PR rtl-optimization/63637 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int +foo (void) +{ + int a, b, c, d, e, f; + asm ("# Magic instruction" : "=r" (a), "=r" (d) : : "eax"); + asm ("# Magic instruction" : "=r" (b), "=r" (e) : : "edx"); + asm ("# Magic instruction" : "=r" (c), "=r" (f) : : "ecx"); + return a + b + c; +} + +/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */ --- gcc/testsuite/gcc.target/i386/pr63637-6.c.jj 2015-01-13 13:45:54.923497943 +0100 +++ gcc/testsuite/gcc.target/i386/pr63637-6.c 2015-01-13 13:46:23.965983893 +0100 @@ -0,0 +1,15 @@ +/* PR rtl-optimization/63637 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int +foo (void) +{ + int a, b, c, d, e, f; + asm ("# Magic instruction" : "=r" (a), "=r" (d) : "r" (0) : "eax"); + asm ("# Magic instruction" : "=r" (b), "=r" (e) : "r" (0) : "edx"); + asm ("# Magic instruction" : "=r" (c), "=r" (f) : "r" (0) : "ecx"); + return a + b + c; +} + +/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */ Jakub