https://github.com/statham-arm updated https://github.com/llvm/llvm-project/pull/151354
>From 7cc5fed6d24f78114607309a5d1d0aba3a1553e4 Mon Sep 17 00:00:00 2001 From: Simon Tatham <simon.tat...@arm.com> Date: Wed, 30 Jul 2025 13:46:30 +0100 Subject: [PATCH 1/3] [clang][ARM] Fix build failure in <arm_acle.h> for __swp In commit d5985905ae8e5b2 I introduced a Sema check that prohibits `__builtin_arm_ldrex` and `__builtin_arm_strex` for data sizes not supported by the target architecture version. However, `arm_acle.h` unconditionally uses those builtins with a 32-bit data size. So now including that header will cause a build failure on Armv6-M, or historic architectures like Armv5. To fix it, `arm_acle.h` now queries the compiler-defined `__ARM_FEATURE_LDREX` macro (also fixed recently in commit 34f59d79209268e so that it matches the target architecture). If 32-bit LDREX isn't available it will fall back to the older SWP instruction, or failing that (on Armv6-M), a libcall. While I was modifying the header anyway, I also renamed the local variable `v` inside `__swp` so that it starts with `__`, avoiding any risk of user code having #defined `v`. --- clang/lib/Headers/arm_acle.h | 22 +++++++++++++++++++--- clang/test/CodeGen/arm_acle_swp.c | 19 +++++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 clang/test/CodeGen/arm_acle_swp.c diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h index 5cfa3d023a7d5..a74144baadaba 100644 --- a/clang/lib/Headers/arm_acle.h +++ b/clang/lib/Headers/arm_acle.h @@ -55,11 +55,27 @@ __chkfeat(uint64_t __features) { /* 7.5 Swap */ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __swp(uint32_t __x, volatile uint32_t *__p) { - uint32_t v; + uint32_t __v; +#if __ARM_FEATURE_LDREX & 4 do - v = __builtin_arm_ldrex(__p); + __v = __builtin_arm_ldrex(__p); while (__builtin_arm_strex(__x, __p)); - return v; +#elif !__ARM_ARCH_6M__ + /* Fall back to the deprecated SWP instruction, on historic architecture + * versions without load/store exclusive instructions on 32-bit data. ACLE is + * clear that we mustn't use SWP in any _other_ situation, but permits us to + * use it if there's no other option. */ + __asm__("swp %0, %1, [%2]" : "=r"(__v) : "r"(__x), "r"(__p) : "memory"); +#else + /* Armv6-M doesn't have either of LDREX or SWP. ACLE suggests this + * implementation, which Clang lowers to the 'cmpxchg' operation in LLVM IR. + * On Armv6-M, LLVM turns that into a libcall to __atomic_compare_exchange_4, + * so the runtime will need to implement that. */ + do + __v = *__p; + while (__sync_bool_compare_and_swap(__p, __v, __x)); +#endif + return __v; } /* 7.6 Memory prefetch intrinsics */ diff --git a/clang/test/CodeGen/arm_acle_swp.c b/clang/test/CodeGen/arm_acle_swp.c new file mode 100644 index 0000000000000..3a99737163afe --- /dev/null +++ b/clang/test/CodeGen/arm_acle_swp.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -ffreestanding -triple thumbv7m-none-eabi -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefix=LDREX +// RUN: %clang_cc1 -ffreestanding -triple armv7a-none-eabi -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefix=LDREX +// RUN: %clang_cc1 -ffreestanding -triple armv6-none-eabi -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefix=LDREX +// RUN: %clang_cc1 -ffreestanding -triple thumbv6m-none-eabi -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefix=SYNC +// RUN: %clang_cc1 -ffreestanding -triple armv5-none-eabi -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefix=SWP + +// REQUIRES: arm-registered-target + +#include <arm_acle.h> + +// LDREX: call i32 @llvm.arm.ldrex.p0(ptr elementtype(i32) {{.*}}) +// LDREX: call i32 @llvm.arm.strex.p0(i32 {{.*}}, ptr elementtype(i32) {{.*}}) + +// SWP: call i32 asm "swp $0, $1, [$2]", "=r,r,r,~{memory}"(i32 {{.*}}, ptr {{.*}}) + +// SYNC: cmpxchg ptr {{.*}}, i32 {{.*}}, i32 {{.*}} seq_cst seq_cst, align 4 +uint32_t test_swp(uint32_t x, volatile void *p) { + return __swp(x, p); +} >From f95f6076c91bbdee5220d7d41f2b549d206bde47 Mon Sep 17 00:00:00 2001 From: Simon Tatham <simon.tat...@arm.com> Date: Thu, 31 Jul 2025 09:58:01 +0100 Subject: [PATCH 2/3] Use __atomic_exchange_n to improve the v6-M case --- clang/lib/Headers/arm_acle.h | 11 ++++------- clang/test/CodeGen/arm_acle_swp.c | 10 +++++----- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h index a74144baadaba..d769e8310847a 100644 --- a/clang/lib/Headers/arm_acle.h +++ b/clang/lib/Headers/arm_acle.h @@ -67,13 +67,10 @@ __swp(uint32_t __x, volatile uint32_t *__p) { * use it if there's no other option. */ __asm__("swp %0, %1, [%2]" : "=r"(__v) : "r"(__x), "r"(__p) : "memory"); #else - /* Armv6-M doesn't have either of LDREX or SWP. ACLE suggests this - * implementation, which Clang lowers to the 'cmpxchg' operation in LLVM IR. - * On Armv6-M, LLVM turns that into a libcall to __atomic_compare_exchange_4, - * so the runtime will need to implement that. */ - do - __v = *__p; - while (__sync_bool_compare_and_swap(__p, __v, __x)); + /* Armv6-M doesn't have either of LDREX or SWP. LLVM turns the following + * builtin into a libcall to __atomic_exchange_4, so the runtime will need to + * implement that. */ + __v = __atomic_exchange_n(__p, __x, __ATOMIC_RELAXED); #endif return __v; } diff --git a/clang/test/CodeGen/arm_acle_swp.c b/clang/test/CodeGen/arm_acle_swp.c index 3a99737163afe..fe87b8c1f7230 100644 --- a/clang/test/CodeGen/arm_acle_swp.c +++ b/clang/test/CodeGen/arm_acle_swp.c @@ -1,19 +1,19 @@ // RUN: %clang_cc1 -ffreestanding -triple thumbv7m-none-eabi -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefix=LDREX // RUN: %clang_cc1 -ffreestanding -triple armv7a-none-eabi -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefix=LDREX // RUN: %clang_cc1 -ffreestanding -triple armv6-none-eabi -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefix=LDREX -// RUN: %clang_cc1 -ffreestanding -triple thumbv6m-none-eabi -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefix=SYNC +// RUN: %clang_cc1 -ffreestanding -triple thumbv6m-none-eabi -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefix=ATOMIC // RUN: %clang_cc1 -ffreestanding -triple armv5-none-eabi -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefix=SWP // REQUIRES: arm-registered-target #include <arm_acle.h> -// LDREX: call i32 @llvm.arm.ldrex.p0(ptr elementtype(i32) {{.*}}) -// LDREX: call i32 @llvm.arm.strex.p0(i32 {{.*}}, ptr elementtype(i32) {{.*}}) +// LDREX: call i32 @llvm.arm.ldrex.p0(ptr elementtype(i32) {{.*}}) +// LDREX: call i32 @llvm.arm.strex.p0(i32 {{.*}}, ptr elementtype(i32) {{.*}}) -// SWP: call i32 asm "swp $0, $1, [$2]", "=r,r,r,~{memory}"(i32 {{.*}}, ptr {{.*}}) +// SWP: call i32 asm "swp $0, $1, [$2]", "=r,r,r,~{memory}"(i32 {{.*}}, ptr {{.*}}) -// SYNC: cmpxchg ptr {{.*}}, i32 {{.*}}, i32 {{.*}} seq_cst seq_cst, align 4 +// ATOMIC: atomicrmw volatile xchg ptr {{.*}}, i32 {{.*}} monotonic, align 4 uint32_t test_swp(uint32_t x, volatile void *p) { return __swp(x, p); } >From bfa6a3d73b7c542017d40e3ea458346a8fd2af78 Mon Sep 17 00:00:00 2001 From: Simon Tatham <simon.tat...@arm.com> Date: Fri, 1 Aug 2025 11:55:26 +0100 Subject: [PATCH 3/3] Use __atomic_exchange_n for modern systems and Linux too --- clang/lib/Headers/arm_acle.h | 41 ++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h index d769e8310847a..fcc2075121b44 100644 --- a/clang/lib/Headers/arm_acle.h +++ b/clang/lib/Headers/arm_acle.h @@ -56,21 +56,34 @@ __chkfeat(uint64_t __features) { static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __swp(uint32_t __x, volatile uint32_t *__p) { uint32_t __v; -#if __ARM_FEATURE_LDREX & 4 - do - __v = __builtin_arm_ldrex(__p); - while (__builtin_arm_strex(__x, __p)); -#elif !__ARM_ARCH_6M__ - /* Fall back to the deprecated SWP instruction, on historic architecture - * versions without load/store exclusive instructions on 32-bit data. ACLE is - * clear that we mustn't use SWP in any _other_ situation, but permits us to - * use it if there's no other option. */ - __asm__("swp %0, %1, [%2]" : "=r"(__v) : "r"(__x), "r"(__p) : "memory"); -#else - /* Armv6-M doesn't have either of LDREX or SWP. LLVM turns the following - * builtin into a libcall to __atomic_exchange_4, so the runtime will need to - * implement that. */ +#if (__ARM_FEATURE_LDREX & 4) || __ARM_ARCH_6M__ || __linux__ + /* + * Using this clang builtin is sensible in most situations. Where + * LDREX and STREX are available, it will compile to a loop using + * them. Otherwise it will compile to a libcall, requiring the + * runtime to provide that library function. + * + * That's unavoidable on Armv6-M, which has no atomic instructions + * at all (not even SWP), so in that situation the user will just + * have to provide an implementation of __atomic_exchange_4 (perhaps + * it would temporarily disable interrupts, and then do a separate + * load and store). + * + * We also use the libcall strategy on pre-Armv7 Linux targets, on + * the theory that Linux's runtime support library _will_ provide a + * suitable libcall, and it's better to use that than the SWP + * instruction because then when the same binary is run on a later + * Linux system the libcall implementation will use LDREX instead. + */ __v = __atomic_exchange_n(__p, __x, __ATOMIC_RELAXED); +#else + /* + * But for older Arm architectures when the target is not Linux, we + * fall back to using the SWP instruction via inline assembler. ACLE + * is clear that we're allowed to do this, but shouldn't do it if we + * have a better alternative. + */ + __asm__("swp %0, %1, [%2]" : "=r"(__v) : "r"(__x), "r"(__p) : "memory"); #endif return __v; } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits