ARMv8.1 adds instructions for atomic compare-and-swap with optional memory ordering specifiers. This patch adds the compare-and-swap instructions and changes the atomic_compare_and_swap patterns to use them
The changes to the atomic_compare_and_swap pattern makes the new instructions available as an alternative for the existing expander, to be used when the target supports them. The existing expander is reused so that it can generate code needed to meet the requirements of the atomic_compare_and_swap name. Using the atomic CAS instructions, the code generated for a call to __atomic_compare_exchange (ptr, expected, desired, weak, smodel, fmodel) becomes: mov r, r1 cas<smo><sz> r, r2, [r0] cmp r, r1 cset r0, eq cbnz r0, L strb r, [r1] L: ret where r0 = ptr, r1 = *expected, r2 = *desired, r is some temporary. mo is one of {'', 'a', 'l', 'al'}, depending on smodel sz is one of {'', 'b', 'h'} depending on the data size. Tested the series for aarch64-none-linux-gnu with native bootstrap and make check and for aarch64-none-elf with cross-compiled check-gcc. Also tested aarch64-none-elf with cross-compiled check-gcc on an emulator that supports ARMv8.1. Ok for trunk? Matthew 2015-08-12 Matthew Wahab <matthew.wa...@arm.com> * config/aarch64/aarch64-protos.h (aarch64_gen_atomic_cas): Declare. * config/aarch64/aarch64.c (aarch64_expand_compare_and_swap): Choose appropriate instruction pattern for the target. (aarch64_gen_atomic_cas): New. * config/aarch64/atomics.md (UNSPECV_ATOMIC_CAS): New. (atomic_compare_and_swap<mode>_1): Rename to aarch64_compare_and_swap<mode>. Fix some indentation. (aarch64_compare_and_swap<mode>_lse): New. (aarch64_atomic_cas<mode>): New.
>From a84d8f8202a30fca18ed79e617d5ba3422eb021f Mon Sep 17 00:00:00 2001 From: Matthew Wahab <matthew.wa...@arm.com> Date: Mon, 10 Aug 2015 16:59:18 +0100 Subject: [PATCH 2/3] Add and use atomic CAS instruction. Change-Id: Ie40f345d414fc9dc6c8cbac0eb9457547aa9ec2d --- gcc/config/aarch64/aarch64-protos.h | 1 + gcc/config/aarch64/aarch64.c | 66 ++++++++++++++++++-- gcc/config/aarch64/atomics.md | 117 +++++++++++++++++++++++++++++++++--- 3 files changed, 172 insertions(+), 12 deletions(-) diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 32b5d09..0b09d49 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -362,6 +362,7 @@ rtx aarch64_load_tp (rtx); void aarch64_expand_compare_and_swap (rtx op[]); void aarch64_split_compare_and_swap (rtx op[]); +void aarch64_gen_atomic_cas (rtx, rtx, rtx, rtx, rtx); void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx); bool aarch64_gen_adjusted_ldpstp (rtx *, bool, enum machine_mode, RTX_CODE); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 6da7245..259e049 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -10749,7 +10749,23 @@ aarch64_expand_compare_and_swap (rtx operands[]) { rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x; machine_mode mode, cmp_mode; - rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx); + typedef rtx (*gen_cas_fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx); + int idx; + gen_cas_fn gen; + const gen_cas_fn split_cas[] = + { + gen_aarch64_compare_and_swapqi, + gen_aarch64_compare_and_swaphi, + gen_aarch64_compare_and_swapsi, + gen_aarch64_compare_and_swapdi + }; + const gen_cas_fn atomic_cas[] = + { + gen_aarch64_compare_and_swapqi_lse, + gen_aarch64_compare_and_swaphi_lse, + gen_aarch64_compare_and_swapsi_lse, + gen_aarch64_compare_and_swapdi_lse + }; bval = operands[0]; rval = operands[1]; @@ -10794,13 +10810,17 @@ aarch64_expand_compare_and_swap (rtx operands[]) switch (mode) { - case QImode: gen = gen_atomic_compare_and_swapqi_1; break; - case HImode: gen = gen_atomic_compare_and_swaphi_1; break; - case SImode: gen = gen_atomic_compare_and_swapsi_1; break; - case DImode: gen = gen_atomic_compare_and_swapdi_1; break; + case QImode: idx = 0; break; + case HImode: idx = 1; break; + case SImode: idx = 2; break; + case DImode: idx = 3; break; default: gcc_unreachable (); } + if (TARGET_LSE) + gen = atomic_cas[idx]; + else + gen = split_cas[idx]; emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f)); @@ -10829,6 +10849,42 @@ aarch64_emit_post_barrier (enum memmodel model) } } +/* Emit an atomic compare-and-swap operation. RVAL is the destination register + for the data in memory. EXPECTED is the value expected to be in memory. + DESIRED is the value to store to memory. MEM is the memory location. MODEL + is the memory ordering to use. */ + +void +aarch64_gen_atomic_cas (rtx rval, rtx mem, + rtx expected, rtx desired, + rtx model) +{ + rtx (*gen) (rtx, rtx, rtx, rtx); + machine_mode mode; + + mode = GET_MODE (mem); + + switch (mode) + { + case QImode: gen = gen_aarch64_atomic_casqi; break; + case HImode: gen = gen_aarch64_atomic_cashi; break; + case SImode: gen = gen_aarch64_atomic_cassi; break; + case DImode: gen = gen_aarch64_atomic_casdi; break; + default: + gcc_unreachable (); + } + + /* Move the expected value into the CAS destination register. */ + emit_insn (gen_rtx_SET (rval, expected)); + + /* Emit the CAS. */ + emit_insn (gen (rval, mem, desired, model)); + + /* Compare the expected value with the value loaded by the CAS, to establish + whether the swap was made. */ + aarch64_gen_compare_reg (EQ, rval, expected); +} + /* Split a compare and swap pattern. */ void diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md index 1a38ac0..7082f61 100644 --- a/gcc/config/aarch64/atomics.md +++ b/gcc/config/aarch64/atomics.md @@ -26,6 +26,7 @@ UNSPECV_STL ; Represent an atomic store or store-release. UNSPECV_ATOMIC_CMPSW ; Represent an atomic compare swap. UNSPECV_ATOMIC_EXCHG ; Represent an atomic exchange. + UNSPECV_ATOMIC_CAS ; Represent an atomic CAS. UNSPECV_ATOMIC_OP ; Represent an atomic operation. ]) @@ -45,10 +46,10 @@ } ) -(define_insn_and_split "atomic_compare_and_swap<mode>_1" +(define_insn_and_split "aarch64_compare_and_swap<mode>" [(set (reg:CC CC_REGNUM) ;; bool out (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW)) - (set (match_operand:SI 0 "register_operand" "=&r") ;; val out + (set (match_operand:SI 0 "register_operand" "=&r") ;; val out (zero_extend:SI (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory (set (match_dup 1) @@ -57,7 +58,7 @@ (match_operand:SHORT 3 "register_operand" "r") ;; desired (match_operand:SI 4 "const_int_operand") ;; is_weak (match_operand:SI 5 "const_int_operand") ;; mod_s - (match_operand:SI 6 "const_int_operand")] ;; mod_f + (match_operand:SI 6 "const_int_operand")] ;; mod_f UNSPECV_ATOMIC_CMPSW)) (clobber (match_scratch:SI 7 "=&r"))] "" @@ -70,17 +71,17 @@ } ) -(define_insn_and_split "atomic_compare_and_swap<mode>_1" +(define_insn_and_split "aarch64_compare_and_swap<mode>" [(set (reg:CC CC_REGNUM) ;; bool out (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW)) (set (match_operand:GPI 0 "register_operand" "=&r") ;; val out - (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory + (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory (set (match_dup 1) (unspec_volatile:GPI [(match_operand:GPI 2 "aarch64_plus_operand" "rI") ;; expect (match_operand:GPI 3 "register_operand" "r") ;; desired - (match_operand:SI 4 "const_int_operand") ;; is_weak - (match_operand:SI 5 "const_int_operand") ;; mod_s + (match_operand:SI 4 "const_int_operand") ;; is_weak + (match_operand:SI 5 "const_int_operand") ;; mod_s (match_operand:SI 6 "const_int_operand")] ;; mod_f UNSPECV_ATOMIC_CMPSW)) (clobber (match_scratch:SI 7 "=&r"))] @@ -94,6 +95,57 @@ } ) +(define_insn_and_split "aarch64_compare_and_swap<mode>_lse" + [(set (reg:CC CC_REGNUM) ;; bool out + (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW)) + (set (match_operand:SI 0 "register_operand" "=&r") ;; val out + (zero_extend:SI + (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory + (set (match_dup 1) + (unspec_volatile:SHORT + [(match_operand:SI 2 "aarch64_plus_operand" "rI") ;; expected + (match_operand:SHORT 3 "register_operand" "r") ;; desired + (match_operand:SI 4 "const_int_operand") ;; is_weak + (match_operand:SI 5 "const_int_operand") ;; mod_s + (match_operand:SI 6 "const_int_operand")] ;; mod_f + UNSPECV_ATOMIC_CMPSW))] + "TARGET_LSE" + "#" + "&& reload_completed" + [(const_int 0)] + { + aarch64_gen_atomic_cas (operands[0], operands[1], + operands[2], operands[3], + operands[5]); + DONE; + } +) + +(define_insn_and_split "aarch64_compare_and_swap<mode>_lse" + [(set (reg:CC CC_REGNUM) ;; bool out + (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW)) + (set (match_operand:GPI 0 "register_operand" "=&r") ;; val out + (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory + (set (match_dup 1) + (unspec_volatile:GPI + [(match_operand:GPI 2 "aarch64_plus_operand" "rI") ;; expect + (match_operand:GPI 3 "register_operand" "r") ;; desired + (match_operand:SI 4 "const_int_operand") ;; is_weak + (match_operand:SI 5 "const_int_operand") ;; mod_s + (match_operand:SI 6 "const_int_operand")] ;; mod_f + UNSPECV_ATOMIC_CMPSW))] + "TARGET_LSE " + "#" + "&& reload_completed" + [(const_int 0)] + { + aarch64_gen_atomic_cas (operands[0], operands[1], + operands[2], operands[3], + operands[5]); + DONE; + } +) + (define_insn_and_split "atomic_exchange<mode>" [(set (match_operand:ALLI 0 "register_operand" "=&r") ;; output (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory @@ -370,3 +422,54 @@ return "dmb\\tish"; } ) + +;; ARMv8.1 LSE instructions. + +;; Atomic compare-and-swap: HI and smaller modes. + +(define_insn "aarch64_atomic_cas<mode>" + [(set (match_operand:SI 0 "register_operand" "+&r") ;; out + (zero_extend:SI + (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory. + (set (match_dup 1) + (unspec_volatile:SHORT + [(match_dup 0) + (match_operand:SHORT 2 "register_operand" "r") ;; value. + (match_operand:SI 3 "const_int_operand" "")] ;; model. + UNSPECV_ATOMIC_CAS))] + "TARGET_LSE && reload_completed" +{ + enum memmodel model = memmodel_from_int (INTVAL (operands[3])); + if (is_mm_relaxed (model)) + return "cas<atomic_sfx>\t%<w>0, %<w>2, %1"; + else if (is_mm_acquire (model) || is_mm_consume (model)) + return "casa<atomic_sfx>\t%<w>0, %<w>2, %1"; + else if (is_mm_release (model)) + return "casl<atomic_sfx>\t%<w>0, %<w>2, %1"; + else + return "casal<atomic_sfx>\t%<w>0, %<w>2, %1"; +}) + +;; Atomic compare-and-swap: SI and larger modes. + +(define_insn "aarch64_atomic_cas<mode>" + [(set (match_operand:GPI 0 "register_operand" "+&r") ;; out + (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory. + (set (match_dup 1) + (unspec_volatile:GPI + [(match_dup 0) + (match_operand:GPI 2 "register_operand" "r") ;; value. + (match_operand:SI 3 "const_int_operand" "")] ;; model. + UNSPECV_ATOMIC_CAS))] + "TARGET_LSE && reload_completed" +{ + enum memmodel model = memmodel_from_int (INTVAL (operands[3])); + if (is_mm_relaxed (model)) + return "cas<atomic_sfx>\t%<w>0, %<w>2, %1"; + else if (is_mm_acquire (model) || is_mm_consume (model)) + return "casa<atomic_sfx>\t%<w>0, %<w>2, %1"; + else if (is_mm_release (model)) + return "casl<atomic_sfx>\t%<w>0, %<w>2, %1"; + else + return "casal<atomic_sfx>\t%<w>0, %<w>2, %1"; +}) -- 1.9.1