Hi,
With -moutline-atomics gcc stops generating a barrier for __sync builtins: https://gcc.gnu.org/PR105162<https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105162> This is a problem on CPUs without LSE instructions where the ld/st exclusives do not guarantee a full barrier. The attached patch adds the barrier to the outline-atomics functions on the path without LSE instructions. In consequence, under -moutline-atomics __atomic and __sync builtins now behave the same with and without LSE instructions. To complete the change, the second patch makes gcc emit the barrier for __atomic builtins as well, i.e., independently of is_mm_sync(). Sebastian
From b1ffa7d737427dc9414cb0c315f08b7c84ef647b Mon Sep 17 00:00:00 2001 From: Sebastian Pop <s...@amazon.com> Date: Wed, 6 Apr 2022 21:42:11 +0000 Subject: [PATCH] [AArch64] add barrier to no LSE path in outline-atomics functions --- libgcc/config/aarch64/lse.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S index c353ec2215b..ac77c68e300 100644 --- a/libgcc/config/aarch64/lse.S +++ b/libgcc/config/aarch64/lse.S @@ -229,6 +229,7 @@ STARTFN NAME(swp) 0: LDXR s(0), [x1] STXR w(tmp1), s(tmp0), [x1] cbnz w(tmp1), 0b + dmb ish ret ENDFN NAME(swp) @@ -273,6 +274,7 @@ STARTFN NAME(LDNM) OP s(tmp1), s(0), s(tmp0) STXR w(tmp2), s(tmp1), [x1] cbnz w(tmp2), 0b + dmb ish ret ENDFN NAME(LDNM) -- 2.25.1
From 68c07f95157057f0167723b182f0ccffdac8a17e Mon Sep 17 00:00:00 2001 From: Sebastian Pop <s...@amazon.com> Date: Thu, 7 Apr 2022 19:18:57 +0000 Subject: [PATCH 2/2] [AArch64] emit a barrier for __atomic builtins --- gcc/config/aarch64/aarch64.cc | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 18f80499079..be1b8d22c6a 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -22931,9 +22931,7 @@ aarch64_split_compare_and_swap (rtx operands[]) if (strong_zero_p) aarch64_gen_compare_reg (NE, rval, const0_rtx); - /* Emit any final barrier needed for a __sync operation. */ - if (is_mm_sync (model)) - aarch64_emit_post_barrier (model); + aarch64_emit_post_barrier (model); } /* Split an atomic operation. */ @@ -22948,7 +22946,6 @@ aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem, machine_mode mode = GET_MODE (mem); machine_mode wmode = (mode == DImode ? DImode : SImode); const enum memmodel model = memmodel_from_int (INTVAL (model_rtx)); - const bool is_sync = is_mm_sync (model); rtx_code_label *label; rtx x; @@ -22966,11 +22963,7 @@ aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem, /* The initial load can be relaxed for a __sync operation since a final barrier will be emitted to stop code hoisting. */ - if (is_sync) - aarch64_emit_load_exclusive (mode, old_out, mem, - GEN_INT (MEMMODEL_RELAXED)); - else - aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx); + aarch64_emit_load_exclusive (mode, old_out, mem, GEN_INT (MEMMODEL_RELAXED)); switch (code) { @@ -23016,9 +23009,7 @@ aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem, gen_rtx_LABEL_REF (Pmode, label), pc_rtx); aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); - /* Emit any final barrier needed for a __sync operation. */ - if (is_sync) - aarch64_emit_post_barrier (model); + aarch64_emit_post_barrier (model); } static void -- 2.25.1