CMPXCHG instruction sets ZF flag if the values in the destination operand
and EAX register are equal; otherwise the ZF flag is cleared and value
from destination operand is loaded to EAX. Following assembly:

        movl    %esi, %eax
        lock cmpxchgl   %edx, (%rdi)
        cmpl    %esi, %eax
        sete    %al

can be optimized by removing the unneeded comparison, since set ZF flag
signals that no update to EAX happened.

2020-15-07  Uroš Bizjak  <ubiz...@gmail.com>

gcc/ChangeLog:
    PR target/96189
    * config/i386/sync.md
    (peephole2 to remove unneded compare after CMPXCHG): New pattern.

gcc/testsuite/ChangeLog:
    PR target/96189
    * gcc.target/i386/pr96189.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Uros.
diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
index 9ab5456b227..d203e9d1ecb 100644
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@@ -594,6 +594,41 @@
   "TARGET_CMPXCHG"
   "lock{%;} %K4cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")
 
+(define_peephole2
+  [(set (match_operand:SWI 0 "register_operand")
+       (match_operand:SWI 1 "general_operand"))
+   (parallel [(set (match_dup 0)
+                  (unspec_volatile:SWI
+                    [(match_operand:SWI 2 "memory_operand")
+                     (match_dup 0)
+                     (match_operand:SWI 3 "register_operand")
+                     (match_operand:SI 4 "const_int_operand")]
+                    UNSPECV_CMPXCHG))
+             (set (match_dup 2)
+                  (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
+             (set (reg:CCZ FLAGS_REG)
+                  (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))])
+   (set (reg:CCZ FLAGS_REG)
+       (compare:CCZ (match_operand:SWI 5 "register_operand")
+                    (match_operand:SWI 6 "general_operand")))]
+  "(rtx_equal_p (operands[0], operands[5])
+    && rtx_equal_p (operands[1], operands[6]))
+   || (rtx_equal_p (operands[0], operands[6])
+       && rtx_equal_p (operands[1], operands[5]))"
+  [(set (match_dup 0)
+       (match_dup 1))
+   (parallel [(set (match_dup 0)
+                  (unspec_volatile:SWI
+                    [(match_dup 2)
+                     (match_dup 0)
+                     (match_dup 3)
+                     (match_dup 4)]
+                    UNSPECV_CMPXCHG))
+             (set (match_dup 2)
+                  (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
+             (set (reg:CCZ FLAGS_REG)
+                  (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))])])
+
 ;; For operand 2 nonmemory_operand predicate is used instead of
 ;; register_operand to allow combiner to better optimize atomic
 ;; additions of constants.
diff --git a/gcc/testsuite/gcc.target/i386/pr96189.c 
b/gcc/testsuite/gcc.target/i386/pr96189.c
new file mode 100644
index 00000000000..1505e483b94
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr96189.c
@@ -0,0 +1,12 @@
+/* PR target/96176 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "\tcmpb\t" } } */
+
+_Bool
+foo (unsigned char *x, unsigned char y, unsigned char z)
+{
+  unsigned char y_old = y;
+  __atomic_compare_exchange_n (x, &y, z, 0, __ATOMIC_RELAXED, 
__ATOMIC_RELAXED);
+  return y == y_old;
+}

Reply via email to