Hi,

The patch aims to optimize
         movb    (%rdi), %al
         movq    %rdi, %rbx
         xorl    %esi, %eax, %edx
         movb    %dl, (%rdi)
         cmpb    %sil, %al
         jne
to
         xorb    %sil, (%rdi)
         movq    %rdi, %rbx
         jne

Reduce 2 mov and 1 cmp instructions.

Due to APX NDD allowing the dest register and source register to be different,
some original peephole2 are invalid. Add new peephole2 patterns for APX NDD.

Bootstrapped and regtested on x86_64-linux-pc-gnu, OK for trunk?

BRs,
Lin

gcc/ChangeLog:

        * config/i386/i386.md (define_peephole2): Define some new peephole2 for
        APX NDD.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/pr49095-2.c: New test.
---
 gcc/config/i386/i386.md                   | 135 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr49095-2.c |  73 ++++++++++++
 2 files changed, 208 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr49095-2.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index b7a18d583da..398cdf447b3 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -28212,6 +28212,41 @@ (define_peephole2
                       const0_rtx);
 })
 
+;; For APX NDD PLUS/MINUS/LOGIC
+;; Like cmpelim optimized pattern.
+;; Reduce an extra mov instruction like
+;; decl (%rdi), %eax
+;; mov %eax, (%rdi)
+;; to
+;; decl (%rdi)
+(define_peephole2
+  [(parallel [(set (reg FLAGS_REG)
+                  (compare (match_operator:SWI 2 "plusminuslogic_operator"
+                             [(match_operand:SWI 0 "memory_operand")
+                              (match_operand:SWI 1 "<nonmemory_operand>")])
+                           (const_int 0)))
+             (set (match_operand:SWI 3 "register_operand") (match_dup 2))])
+   (set (match_dup 0) (match_dup 3))]
+  "TARGET_APX_NDD
+   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && peep2_reg_dead_p (2, operands[3])
+   && !reg_overlap_mentioned_p (operands[3], operands[0])
+   && ix86_match_ccmode (peep2_next_insn (0),
+                        (GET_CODE (operands[2]) == PLUS
+                         || GET_CODE (operands[2]) == MINUS)
+                        ? CCGOCmode : CCNOmode)"
+  [(parallel [(set (match_dup 4) (match_dup 6))
+             (set (match_dup 0) (match_dup 5))])]
+{
+  operands[4] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (0)), 0, 0));
+  operands[5]
+    = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
+                     copy_rtx (operands[0]), operands[1]);
+  operands[6]
+    = gen_rtx_COMPARE (GET_MODE (operands[4]), copy_rtx (operands[5]),
+                      const0_rtx);
+})
+
 ;; Likewise for instances where we have a lea pattern.
 (define_peephole2
   [(set (match_operand:SWI 0 "register_operand")
@@ -28500,6 +28535,54 @@ (define_peephole2
                       const0_rtx);
 })
 
+;; For APX NDD XOR
+;; Reduce 2 mov and 1 cmp instruction.
+;; from
+;; movq (%rdi), %rax
+;; xorq %rsi, %rax, %rdx
+;; movb %rdx, (%rdi)
+;; cmpb %rsi, %rax
+;; jne
+;; to
+;; xorb %rsi, (%rdi)
+;; jne
+(define_peephole2
+  [(set (match_operand:SWI 0 "register_operand")
+       (match_operand:SWI 1 "memory_operand"))
+   (parallel [(set (match_operand:SWI 4 "register_operand")
+                  (xor:SWI (match_operand:SWI 3 "register_operand")
+                           (match_operand:SWI 2 "<nonmemory_operand>")))
+             (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 1) (match_dup 4))
+   (set (reg:CCZ FLAGS_REG)
+       (compare:CCZ (match_operand:SWI 5 "register_operand")
+                    (match_operand:SWI 6 "<nonmemory_operand>")))]
+  "TARGET_APX_NDD
+   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && REGNO (operands[3]) == REGNO (operands[0])
+   && (rtx_equal_p (operands[0], operands[5])
+       ? rtx_equal_p (operands[2], operands[6])
+       : rtx_equal_p (operands[2], operands[5])
+        && rtx_equal_p (operands[0], operands[6]))
+   && peep2_reg_dead_p (3, operands[4])
+   && peep2_reg_dead_p (4, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && (<MODE>mode != QImode
+       || immediate_operand (operands[2], QImode)
+       || any_QIreg_operand (operands[2], QImode))"
+  [(parallel [(set (match_dup 7) (match_dup 9))
+             (set (match_dup 1) (match_dup 8))])]
+{
+  operands[7] = SET_DEST (PATTERN (peep2_next_insn (3)));
+  operands[8] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
+                            operands[2]);
+  operands[9]
+    = gen_rtx_COMPARE (GET_MODE (operands[7]),
+                      copy_rtx (operands[8]),
+                      const0_rtx);
+})
+
 (define_peephole2
   [(set (match_operand:SWI12 0 "register_operand")
        (match_operand:SWI12 1 "memory_operand"))
@@ -28548,6 +28631,58 @@ (define_peephole2
                       const0_rtx);
 })
 
+;; For APX NDD XOR
+;; Reduce 2 mov and 1 cmp instruction.
+;; from
+;; movb (%rdi), %al
+;; xorl %esi, %eax, %edx
+;; movb %dl, (%rdi)
+;; cmpb %sil, %al
+;; jne
+;; to
+;; xorl %sil, (%rdi)
+;; jne
+(define_peephole2
+  [(set (match_operand:SWI12 0 "register_operand")
+       (match_operand:SWI12 1 "memory_operand"))
+   (parallel [(set (match_operand:SI 4 "register_operand")
+                  (xor:SI (match_operand:SI 3 "register_operand")
+                          (match_operand:SI 2 "<nonmemory_operand>")))
+             (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 1) (match_operand:SWI12 5 "register_operand"))
+   (set (reg:CCZ FLAGS_REG)
+       (compare:CCZ (match_operand:SWI12 6 "register_operand")
+                    (match_operand:SWI12 7 "<nonmemory_operand>")))]
+  "TARGET_APX_NDD
+   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && REGNO (operands[3]) == REGNO (operands[0])
+   && REGNO (operands[5]) == REGNO (operands[4])
+   && (rtx_equal_p (operands[0], operands[6])
+       ? (REG_P (operands[2])
+         ? REG_P (operands[7]) && REGNO (operands[2]) == REGNO (operands[7])
+         : rtx_equal_p (operands[2], operands[7]))
+       : (rtx_equal_p (operands[0], operands[7])
+         && REG_P (operands[2])
+         && REGNO (operands[2]) == REGNO (operands[6])))
+   && peep2_reg_dead_p (3, operands[5])
+   && peep2_reg_dead_p (4, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && (<MODE>mode != QImode
+       || immediate_operand (operands[2], SImode)
+       || any_QIreg_operand (operands[2], SImode))"
+  [(parallel [(set (match_dup 8) (match_dup 10))
+             (set (match_dup 1) (match_dup 9))])]
+{
+  operands[8] = SET_DEST (PATTERN (peep2_next_insn (3)));
+  operands[9] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
+                            gen_lowpart (<MODE>mode, operands[2]));
+  operands[10]
+    = gen_rtx_COMPARE (GET_MODE (operands[8]),
+                      copy_rtx (operands[9]),
+                      const0_rtx);
+})
+
 ;; Attempt to optimize away memory stores of values the memory already
 ;; has.  See PR79593.
 (define_peephole2
diff --git a/gcc/testsuite/gcc.target/i386/pr49095-2.c 
b/gcc/testsuite/gcc.target/i386/pr49095-2.c
new file mode 100644
index 00000000000..25bc6b79a43
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr49095-2.c
@@ -0,0 +1,73 @@
+/* PR rtl-optimization/49095 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-Os -fno-shrink-wrap -masm=att -mapxf" } */
+
+void foo (void *);
+
+int *
+f1 (int *x)
+{
+  if (!--*x)
+    foo (x);
+  return x;
+}
+
+int
+g1 (int x)
+{
+  if (!--x)
+    foo ((void *) 0);
+  return x;
+}
+
+#define F(T, OP, OPN) \
+T *                    \
+f##T##OPN (T *x, T y)  \
+{                      \
+  *x OP y;             \
+  if (!*x)             \
+    foo (x);           \
+  return x;            \
+}                      \
+                       \
+T                      \
+g##T##OPN (T x, T y)   \
+{                      \
+  x OP y;              \
+  if (!x)              \
+    foo ((void *) 0);  \
+  return x;            \
+}                      \
+                       \
+T *                    \
+h##T##OPN (T *x)       \
+{                      \
+  *x OP 24;            \
+  if (!*x)             \
+    foo (x);           \
+  return x;            \
+}                      \
+                       \
+T                      \
+i##T##OPN (T x, T y)   \
+{                      \
+  x OP 24;             \
+  if (!x)              \
+    foo ((void *) 0);  \
+  return x;            \
+}
+
+#define G(T) \
+F (T, +=, plus)                \
+F (T, -=, minus)       \
+F (T, &=, and)         \
+F (T, |=, or)          \
+F (T, ^=, xor)
+
+G (char)
+G (short)
+G (int)
+G (long)
+
+/* { dg-final { scan-assembler-not "test\[lq\]" } } */
+/* { dg-final { scan-assembler-not "\\(%\[re\]di\\), %" } } */
-- 
2.31.1

Reply via email to