Hi!

The following patch improves the last 8 cases (both -m64 and ia32)
that were using RMW cycle, for xor we actually emit
new = old ^ other; new != old
rather than
new = old ^ other; new != 0 
and thus the peephole2 needs to recognize that too.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-03-29  Jakub Jelinek  <ja...@redhat.com>

        PR rtl-optimization/89865
        * config/i386/i386.md: Add peepholes for z = x; x ^= y; x != z.

        * gcc.target/i386/pr49095.c: Don't expect any RMW sequences.

--- gcc/config/i386/i386.md.jj  2019-03-29 15:58:05.350731242 +0100
+++ gcc/config/i386/i386.md     2019-03-29 18:43:45.251208879 +0100
@@ -18922,6 +18922,100 @@ (define_peephole2
                       const0_rtx);
 })
 
+;; Special cases for xor, where (x ^= y) != 0 is (misoptimized)
+;; into x = z; x ^= y; x != z
+(define_peephole2
+  [(set (match_operand:SWI 0 "register_operand")
+       (match_operand:SWI 1 "memory_operand"))
+   (set (match_operand:SWI 3 "register_operand") (match_dup 0))
+   (parallel [(set (match_operand:SWI 4 "register_operand")
+                  (xor:SWI (match_dup 4)
+                           (match_operand:SWI 2 "<nonmemory_operand>")))
+             (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 1) (match_dup 4))
+   (set (reg:CCZ FLAGS_REG)
+       (compare:CCZ (match_operand:SWI 5 "register_operand")
+                    (match_operand:SWI 6 "<nonmemory_operand>")))]
+  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && (REGNO (operands[4]) == REGNO (operands[0])
+       || REGNO (operands[4]) == REGNO (operands[3]))
+   && (rtx_equal_p (operands[REGNO (operands[4]) == REGNO (operands[0])
+                            ? 3 : 0], operands[5])
+       ? rtx_equal_p (operands[2], operands[6])
+       : rtx_equal_p (operands[2], operands[5])
+        && rtx_equal_p (operands[REGNO (operands[4]) == REGNO (operands[0])
+                                 ? 3 : 0], operands[6]))
+   && peep2_reg_dead_p (4, operands[4])
+   && peep2_reg_dead_p (5, operands[REGNO (operands[4]) == REGNO (operands[0])
+                                   ? 3 : 0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && !reg_overlap_mentioned_p (operands[3], operands[0])
+   && !reg_overlap_mentioned_p (operands[3], operands[1])
+   && !reg_overlap_mentioned_p (operands[3], operands[2])
+   && (<MODE>mode != QImode
+       || immediate_operand (operands[2], QImode)
+       || any_QIreg_operand (operands[2], QImode))"
+  [(parallel [(set (match_dup 7) (match_dup 9))
+             (set (match_dup 1) (match_dup 8))])]
+{
+  operands[7] = SET_DEST (PATTERN (peep2_next_insn (4)));
+  operands[8] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
+                            operands[2]);
+  operands[9]
+    = gen_rtx_COMPARE (GET_MODE (operands[7]),
+                      copy_rtx (operands[8]),
+                      const0_rtx);
+})
+
+(define_peephole2
+  [(set (match_operand:SWI12 0 "register_operand")
+       (match_operand:SWI12 1 "memory_operand"))
+   (set (match_operand:SWI12 3 "register_operand") (match_dup 0))
+   (parallel [(set (match_operand:SI 4 "register_operand")
+                  (xor:SI (match_dup 4)
+                          (match_operand:SI 2 "<nonmemory_operand>")))
+             (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 1) (match_operand:SWI12 5 "register_operand"))
+   (set (reg:CCZ FLAGS_REG)
+       (compare:CCZ (match_operand:SWI12 6 "register_operand")
+                    (match_operand:SWI12 7 "<nonmemory_operand>")))]
+  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && (REGNO (operands[5]) == REGNO (operands[0])
+       || REGNO (operands[5]) == REGNO (operands[3]))
+   && REGNO (operands[5]) == REGNO (operands[4])
+   && (rtx_equal_p (operands[REGNO (operands[5]) == REGNO (operands[0])
+                            ? 3 : 0], operands[6])
+       ? (REG_P (operands[2])
+         ? REG_P (operands[7]) && REGNO (operands[2]) == REGNO (operands[7])
+         : rtx_equal_p (operands[2], operands[7]))
+       : (rtx_equal_p (operands[REGNO (operands[5]) == REGNO (operands[0])
+                               ? 3 : 0], operands[7])
+         && REG_P (operands[2])
+         && REGNO (operands[2]) == REGNO (operands[6])))
+   && peep2_reg_dead_p (4, operands[5])
+   && peep2_reg_dead_p (5, operands[REGNO (operands[5]) == REGNO (operands[0])
+                                   ? 3 : 0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && !reg_overlap_mentioned_p (operands[3], operands[0])
+   && !reg_overlap_mentioned_p (operands[3], operands[1])
+   && !reg_overlap_mentioned_p (operands[3], operands[2])
+   && (<MODE>mode != QImode
+       || immediate_operand (operands[2], SImode)
+       || any_QIreg_operand (operands[2], SImode))"
+  [(parallel [(set (match_dup 8) (match_dup 10))
+             (set (match_dup 1) (match_dup 9))])]
+{
+  operands[8] = SET_DEST (PATTERN (peep2_next_insn (4)));
+  operands[9] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
+                            gen_lowpart (<MODE>mode, operands[2]));
+  operands[10]
+    = gen_rtx_COMPARE (GET_MODE (operands[8]),
+                      copy_rtx (operands[9]),
+                      const0_rtx);
+})
+
 ;; Attempt to optimize away memory stores of values the memory already
 ;; has.  See PR79593.
 (define_peephole2
--- gcc/testsuite/gcc.target/i386/pr49095.c.jj  2019-03-29 16:00:58.526926046 
+0100
+++ gcc/testsuite/gcc.target/i386/pr49095.c     2019-03-29 17:33:38.749010111 
+0100
@@ -71,7 +71,5 @@ G (int)
 G (long)
 
 /* { dg-final { scan-assembler-not "test\[lq\]" } } */
-/* The {f,h}{char,short,int,long}xor functions aren't optimized into
-   a RMW instruction, so need load, modify and store.  FIXME eventually.  */
-/* { dg-final { scan-assembler-times "\\(%eax\\), %" 8 { target { ia32 } } } } 
*/
-/* { dg-final { scan-assembler-times "\\(%\[re\]di\\), %" 8 { target { ! ia32 
} } } } */
+/* { dg-final { scan-assembler-not "\\(%eax\\), %" { target { ia32 } } } } */
+/* { dg-final { scan-assembler-not "\\(%\[re\]di\\), %" { target { ! ia32 } } 
} } */

        Jakub

Reply via email to