This patch changes the code generated for __sync_type_compare_and_swap to
ldxr reg; cmp; bne label; stlxr; cbnz; label: dmb ish; mov .., reg This removes the acquire-barrier from the load and ends the operation with a fence to prevent memory references appearing after the __sync operation from being moved ahead of the store-release. This also strengthens the acquire barrier generated for __sync_lock_test_and_set (which, like compare-and-swap, is implemented as a form of atomic exchange): ldaxr; stxr; cbnz becomes ldxr; stxr; cbnz; dmb ish Tested with check-gcc for aarch64-none-linux-gnu. Ok for trunk? Matthew 2015-05-21 Matthew Wahab <matthew.wa...@arm.com> * config/aarch64/aarch64.c (aarch64_split_compare_and_swap): Check for __sync memory models, emit appropriate initial and final barriers.
From 6f748034d25b75ea7829192d94e54189c2fbf99e Mon Sep 17 00:00:00 2001 From: Matthew Wahab <matthew.wa...@arm.com> Date: Fri, 15 May 2015 09:31:06 +0100 Subject: [PATCH 2/3] [AArch64] Strengthen barriers for sync-compare-swap builtins. Change-Id: I335771f2f42ea951d227f20f6cb9daa07330614d --- gcc/config/aarch64/aarch64.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 778571f..11a8cd0 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -9275,14 +9275,19 @@ aarch64_split_compare_and_swap (rtx operands[]) bool is_weak; rtx_code_label *label1, *label2; rtx x, cond; + enum memmodel model; + rtx model_rtx; + rtx load_model_rtx; rval = operands[0]; mem = operands[1]; oldval = operands[2]; newval = operands[3]; is_weak = (operands[4] != const0_rtx); + model_rtx = operands[5]; scratch = operands[7]; mode = GET_MODE (mem); + model = memmodel_from_int (INTVAL (model_rtx)); label1 = NULL; if (!is_weak) @@ -9292,7 +9297,13 @@ aarch64_split_compare_and_swap (rtx operands[]) } label2 = gen_label_rtx (); - aarch64_emit_load_exclusive (mode, rval, mem, operands[5]); + /* A __sync operation will end with a fence so the load can be relaxed. */ + if (is_mm_sync (model)) + load_model_rtx = GEN_INT (MEMMODEL_RELAXED); + else + load_model_rtx = model_rtx; + + aarch64_emit_load_exclusive (mode, rval, mem, load_model_rtx); cond = aarch64_gen_compare_reg (NE, rval, oldval); x = gen_rtx_NE (VOIDmode, cond, const0_rtx); @@ -9300,7 +9311,7 @@ aarch64_split_compare_and_swap (rtx operands[]) gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); - aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]); + aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx); if (!is_weak) { @@ -9317,6 +9328,10 @@ aarch64_split_compare_and_swap (rtx operands[]) } emit_label (label2); + + /* A __sync operation may need a final fence. */ + if (is_mm_sync (model)) + aarch64_emit_post_barrier (model); } /* Split an atomic operation. */ -- 1.9.1