[PATCH, i386]: Fix PR 58945, Improve atomic_compare_and_swap*_doubleword pattern

Uros Bizjak Tue, 31 Mar 2015 10:14:10 -0700

Hello!

As shown in the PR, the attached patch substantial improves generated
code when cmpxchg}8,16}b insn is involved. Following testcase:


--cut here--
__int128_t i;

int main()
{
  __atomic_store_16(&i, -1, 0);
  if (i != -1)
    __builtin_abort();
  return 0;
}
--cut here--

compiles with -O2 -mcx16 to:

        movq    i(%rip), %rax
        movq    $-1, %rcx
        movq    i+8(%rip), %rdx
.L2:
        movq    %rcx, %rbx
        lock cmpxchg16b        i(%rip)
        jne     .L2

where without the patch, the compiler generated:

        movq    i(%rip), %rsi
        movq    $-1, %rcx
        movq    i+8(%rip), %rdi
.L2:
        movq    %rsi, %rax
        movq    %rdi, %rdx
        movq    %rcx, %rbx
        lock cmpxchg16b i(%rip)
        movq    %rdx, %rdi
        movq    %rax, %rsi
        jne     .L2

2015-03-31  Uros Bizjak  <ubiz...@gmail.com>

    PR target/58945
    * config/i386/sync.md (atomic_compare_and_swap<dwi>_doubleword):
    Do not split operands 0 and operands 2 to halfmode.
    (atomic_compare_and_swap<mode>): Update for
    atomic_compare_and_swap<dwi>_doubleword changes.

Patch was bootstrapped and regression tested on x86_64-linux-gnu
{,-m32} and was committed to mainline.

Uros.

Index: config/i386/sync.md
===================================================================
--- config/i386/sync.md (revision 221786)
+++ config/i386/sync.md (working copy)
@@ -351,21 +351,12 @@
   else
     {
       machine_mode hmode = <CASHMODE>mode;
-      rtx lo_o, lo_e, lo_n, hi_o, hi_e, hi_n;
 
-      lo_o = operands[1];
-      lo_e = operands[3];
-      lo_n = operands[4];
-      hi_o = gen_highpart (hmode, lo_o);
-      hi_e = gen_highpart (hmode, lo_e);
-      hi_n = gen_highpart (hmode, lo_n);
-      lo_o = gen_lowpart (hmode, lo_o);
-      lo_e = gen_lowpart (hmode, lo_e);
-      lo_n = gen_lowpart (hmode, lo_n);
-
       emit_insn
        (gen_atomic_compare_and_swap<mode>_doubleword
-        (lo_o, hi_o, operands[2], lo_e, hi_e, lo_n, hi_n, operands[6]));
+        (operands[1], operands[2], operands[3],
+        gen_lowpart (hmode, operands[4]), gen_highpart (hmode, operands[4]),
+        operands[6]));
     }
 
   ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
@@ -389,31 +380,26 @@
   "lock{%;} %K4cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")
 
 ;; For double-word compare and swap, we are obliged to play tricks with
-;; the input newval (op5:op6) because the Intel register numbering does
+;; the input newval (op3:op4) because the Intel register numbering does
 ;; not match the gcc register numbering, so the pair must be CX:BX.
-;; That said, in order to take advantage of possible lower-subreg opts,
-;; treat all of the integral operands in the same way.
 
 (define_mode_attr doublemodesuffix [(SI "8") (DI "16")])
 
 (define_insn "atomic_compare_and_swap<dwi>_doubleword"
-  [(set (match_operand:DWIH 0 "register_operand" "=a")
-       (unspec_volatile:DWIH
-         [(match_operand:<DWI> 2 "memory_operand" "+m")
-          (match_operand:DWIH 3 "register_operand" "0")
-          (match_operand:DWIH 4 "register_operand" "1")
-          (match_operand:DWIH 5 "register_operand" "b")
-          (match_operand:DWIH 6 "register_operand" "c")
-          (match_operand:SI 7 "const_int_operand")]
+  [(set (match_operand:<DWI> 0 "register_operand" "=A")
+       (unspec_volatile:<DWI>
+         [(match_operand:<DWI> 1 "memory_operand" "+m")
+          (match_operand:<DWI> 2 "register_operand" "0")
+          (match_operand:DWIH 3 "register_operand" "b")
+          (match_operand:DWIH 4 "register_operand" "c")
+          (match_operand:SI 5 "const_int_operand")]
          UNSPECV_CMPXCHG))
-   (set (match_operand:DWIH 1 "register_operand" "=d")
-       (unspec_volatile:DWIH [(const_int 0)] UNSPECV_CMPXCHG))
-   (set (match_dup 2)
+   (set (match_dup 1)
        (unspec_volatile:<DWI> [(const_int 0)] UNSPECV_CMPXCHG))
    (set (reg:CCZ FLAGS_REG)
         (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))]
   "TARGET_CMPXCHG<doublemodesuffix>B"
-  "lock{%;} %K7cmpxchg<doublemodesuffix>b\t%2")
+  "lock{%;} %K5cmpxchg<doublemodesuffix>b\t%1")
 
 ;; For operand 2 nonmemory_operand predicate is used instead of
 ;; register_operand to allow combiner to better optimize atomic

[PATCH, i386]: Fix PR 58945, Improve atomic_compare_and_swap*_doubleword pattern

Reply via email to