https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/115545
>From 9b8cb87e0e12899df3a5af7f312f637a6c242921 Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Fri, 8 Nov 2024 15:42:04 -0600 Subject: [PATCH 1/4] [Clang] Add support for scoped atomic thread fence Summary: Previously we added support for all of the atomic GNU extensions with optional memory scoped except for `__atomic_thread_fence`. This patch adds support for that. This should ideally allow us to generically emit these LLVM scopes. --- clang/include/clang/Basic/Builtins.td | 6 + clang/lib/CodeGen/CGBuiltin.cpp | 129 +++++++++++++++++++ clang/test/CodeGen/scoped-fence-ops.c | 179 ++++++++++++++++++++++++++ 3 files changed, 314 insertions(+) create mode 100644 clang/test/CodeGen/scoped-fence-ops.c diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 4360e0bf9840f13..82ba26085e9267f 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -1995,6 +1995,12 @@ def AtomicThreadFence : Builtin { let Prototype = "void(int)"; } +def ScopedAtomicThreadFence : Builtin { + let Spellings = ["__scoped_atomic_thread_fence"]; + let Attributes = [NoThrow]; + let Prototype = "void(int, int)"; +} + def AtomicSignalFence : Builtin { let Spellings = ["__atomic_signal_fence"]; let Attributes = [NoThrow]; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 430ac5626f89d7f..c8f90df2546f256 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5162,6 +5162,135 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.SetInsertPoint(ContBB); return RValue::get(nullptr); } + case Builtin::BI__scoped_atomic_thread_fence: { + auto ScopeModel = AtomicScopeModel::create(AtomicScopeModelKind::Generic); + + Value *Order = EmitScalarExpr(E->getArg(0)); + Value *Scope = EmitScalarExpr(E->getArg(1)); + if (isa<llvm::ConstantInt>(Order) && isa<llvm::ConstantInt>(Scope)) { + int Ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); + int Scp = cast<llvm::ConstantInt>(Scope)->getZExtValue(); + SyncScope SS = ScopeModel->isValid(Scp) + ? ScopeModel->map(Scp) + : ScopeModel->map(ScopeModel->getFallBackValue()); + switch (Ord) { + case 0: // memory_order_relaxed + default: // invalid order + break; + case 1: // memory_order_consume + case 2: // memory_order_acquire + Builder.CreateFence( + llvm::AtomicOrdering::Acquire, + getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS, + llvm::AtomicOrdering::Acquire, + getLLVMContext())); + break; + case 3: // memory_order_release + Builder.CreateFence( + llvm::AtomicOrdering::Release, + getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS, + llvm::AtomicOrdering::Release, + getLLVMContext())); + break; + case 4: // memory_order_acq_rel + Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, + getTargetHooks().getLLVMSyncScopeID( + getLangOpts(), SS, + llvm::AtomicOrdering::AcquireRelease, + getLLVMContext())); + break; + case 5: // memory_order_seq_cst + Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, + getTargetHooks().getLLVMSyncScopeID( + getLangOpts(), SS, + llvm::AtomicOrdering::SequentiallyConsistent, + getLLVMContext())); + break; + } + return RValue::get(nullptr); + } + + llvm::BasicBlock *ContBB = createBasicBlock("atomic.scope.continue", CurFn); + + llvm::DenseMap<llvm::BasicBlock *, llvm::AtomicOrdering> OrderBBs; + if (isa<llvm::ConstantInt>(Order)) { + int Ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); + switch (Ord) { + case 0: // memory_order_relaxed + default: // invalid order + ContBB->eraseFromParent(); + return RValue::get(nullptr); + case 1: // memory_order_consume + case 2: // memory_order_acquire + OrderBBs[Builder.GetInsertBlock()] = llvm::AtomicOrdering::Acquire; + break; + case 3: // memory_order_release + OrderBBs[Builder.GetInsertBlock()] = llvm::AtomicOrdering::Release; + break; + case 4: // memory_order_acq_rel + OrderBBs[Builder.GetInsertBlock()] = + llvm::AtomicOrdering::AcquireRelease; + break; + case 5: // memory_order_seq_cst + OrderBBs[Builder.GetInsertBlock()] = + llvm::AtomicOrdering::SequentiallyConsistent; + break; + } + } else { + llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB; + AcquireBB = createBasicBlock("acquire", CurFn); + ReleaseBB = createBasicBlock("release", CurFn); + AcqRelBB = createBasicBlock("acqrel", CurFn); + SeqCstBB = createBasicBlock("seqcst", CurFn); + + Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); + llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); + SI->addCase(Builder.getInt32(1), AcquireBB); + SI->addCase(Builder.getInt32(2), AcquireBB); + SI->addCase(Builder.getInt32(3), ReleaseBB); + SI->addCase(Builder.getInt32(4), AcqRelBB); + SI->addCase(Builder.getInt32(5), SeqCstBB); + + OrderBBs[AcquireBB] = llvm::AtomicOrdering::Acquire; + OrderBBs[ReleaseBB] = llvm::AtomicOrdering::Release; + OrderBBs[AcqRelBB] = llvm::AtomicOrdering::AcquireRelease; + OrderBBs[SeqCstBB] = llvm::AtomicOrdering::SequentiallyConsistent; + } + + for (auto &[OrderBB, Ordering] : OrderBBs) { + Builder.SetInsertPoint(OrderBB); + if (isa<llvm::ConstantInt>(Scope)) { + int Scp = cast<llvm::ConstantInt>(Scope)->getZExtValue(); + SyncScope SS = ScopeModel->isValid(Scp) + ? ScopeModel->map(Scp) + : ScopeModel->map(ScopeModel->getFallBackValue()); + Builder.CreateFence(Ordering, + getTargetHooks().getLLVMSyncScopeID( + getLangOpts(), SS, Ordering, getLLVMContext())); + Builder.CreateBr(ContBB); + } else { + llvm::DenseMap<unsigned, llvm::BasicBlock *> BBs; + for (unsigned Scp : ScopeModel->getRuntimeValues()) + BBs[Scp] = createBasicBlock(getAsString(ScopeModel->map(Scp)), CurFn); + + auto *SC = Builder.CreateIntCast(Scope, Builder.getInt32Ty(), false); + llvm::SwitchInst *SI = Builder.CreateSwitch(SC, ContBB); + for (unsigned Scp : ScopeModel->getRuntimeValues()) { + auto *B = BBs[Scp]; + SI->addCase(Builder.getInt32(Scp), B); + + Builder.SetInsertPoint(B); + Builder.CreateFence(Ordering, getTargetHooks().getLLVMSyncScopeID( + getLangOpts(), ScopeModel->map(Scp), + Ordering, getLLVMContext())); + Builder.CreateBr(ContBB); + } + } + } + + Builder.SetInsertPoint(ContBB); + return RValue::get(nullptr); + } case Builtin::BI__builtin_signbit: case Builtin::BI__builtin_signbitf: diff --git a/clang/test/CodeGen/scoped-fence-ops.c b/clang/test/CodeGen/scoped-fence-ops.c new file mode 100644 index 000000000000000..cb48176d37c8522 --- /dev/null +++ b/clang/test/CodeGen/scoped-fence-ops.c @@ -0,0 +1,179 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \ +// RUN: -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s +//: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \ +//: -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s + +// +// SPIRV-LABEL: define hidden spir_func void @fe1a( +// SPIRV-SAME: ) #[[ATTR0:[0-9]+]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: fence syncscope("workgroup") release +// SPIRV-NEXT: ret void +// AMDGCN-LABEL: define hidden void @fe1a( +// AMDGCN-SAME: ) #[[ATTR0:[0-9]+]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: fence syncscope("workgroup-one-as") release +// AMDGCN-NEXT: ret void +// +void fe1a() { + __scoped_atomic_thread_fence(__ATOMIC_RELEASE, __MEMORY_SCOPE_WRKGRP); +} + +// +// SPIRV-LABEL: define hidden spir_func void @fe1b( +// SPIRV-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: store i32 [[ORD]], ptr [[ORD_ADDR]], align 4 +// SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR]], align 4 +// SPIRV-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [ +// SPIRV-NEXT: i32 1, label %[[ACQUIRE:.*]] +// SPIRV-NEXT: i32 2, label %[[ACQUIRE]] +// SPIRV-NEXT: i32 3, label %[[RELEASE:.*]] +// SPIRV-NEXT: i32 4, label %[[ACQREL:.*]] +// SPIRV-NEXT: i32 5, label %[[SEQCST:.*]] +// SPIRV-NEXT: ] +// SPIRV: [[ATOMIC_SCOPE_CONTINUE]]: +// SPIRV-NEXT: ret void +// SPIRV: [[ACQUIRE]]: +// SPIRV-NEXT: fence syncscope("workgroup") acquire +// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// SPIRV: [[RELEASE]]: +// SPIRV-NEXT: fence syncscope("workgroup") release +// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// SPIRV: [[ACQREL]]: +// SPIRV-NEXT: fence syncscope("workgroup") acq_rel +// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// SPIRV: [[SEQCST]]: +// SPIRV-NEXT: fence syncscope("workgroup") seq_cst +// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN-LABEL: define hidden void @fe1b( +// AMDGCN-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-NEXT: [[ORD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ORD_ADDR]] to ptr +// AMDGCN-NEXT: store i32 [[ORD]], ptr [[ORD_ADDR_ASCAST]], align 4 +// AMDGCN-NEXT: [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR_ASCAST]], align 4 +// AMDGCN-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [ +// AMDGCN-NEXT: i32 1, label %[[ACQUIRE:.*]] +// AMDGCN-NEXT: i32 2, label %[[ACQUIRE]] +// AMDGCN-NEXT: i32 3, label %[[RELEASE:.*]] +// AMDGCN-NEXT: i32 4, label %[[ACQREL:.*]] +// AMDGCN-NEXT: i32 5, label %[[SEQCST:.*]] +// AMDGCN-NEXT: ] +// AMDGCN: [[ATOMIC_SCOPE_CONTINUE]]: +// AMDGCN-NEXT: ret void +// AMDGCN: [[ACQUIRE]]: +// AMDGCN-NEXT: fence syncscope("workgroup-one-as") acquire +// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN: [[RELEASE]]: +// AMDGCN-NEXT: fence syncscope("workgroup-one-as") release +// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN: [[ACQREL]]: +// AMDGCN-NEXT: fence syncscope("workgroup-one-as") acq_rel +// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN: [[SEQCST]]: +// AMDGCN-NEXT: fence syncscope("workgroup") seq_cst +// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// +void fe1b(int ord) { + __scoped_atomic_thread_fence(ord, __MEMORY_SCOPE_WRKGRP); +} + +// +// SPIRV-LABEL: define hidden spir_func void @fe1c( +// SPIRV-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR]], align 4 +// SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR]], align 4 +// SPIRV-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [ +// SPIRV-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]] +// SPIRV-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]] +// SPIRV-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]] +// SPIRV-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]] +// SPIRV-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]] +// SPIRV-NEXT: ] +// SPIRV: [[ATOMIC_SCOPE_CONTINUE]]: +// SPIRV-NEXT: ret void +// SPIRV: [[DEVICE_SCOPE]]: +// SPIRV-NEXT: fence syncscope("device") release +// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// SPIRV: [[SYSTEM_SCOPE]]: +// SPIRV-NEXT: fence release +// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// SPIRV: [[WORKGROUP_SCOPE]]: +// SPIRV-NEXT: fence syncscope("workgroup") release +// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// SPIRV: [[WAVEFRONT_SCOPE]]: +// SPIRV-NEXT: fence syncscope("subgroup") release +// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// SPIRV: [[SINGLE_SCOPE]]: +// SPIRV-NEXT: fence syncscope("singlethread") release +// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN-LABEL: define hidden void @fe1c( +// AMDGCN-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-NEXT: [[SCOPE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SCOPE_ADDR]] to ptr +// AMDGCN-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR_ASCAST]], align 4 +// AMDGCN-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR_ASCAST]], align 4 +// AMDGCN-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [ +// AMDGCN-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]] +// AMDGCN-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]] +// AMDGCN-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]] +// AMDGCN-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]] +// AMDGCN-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]] +// AMDGCN-NEXT: ] +// AMDGCN: [[ATOMIC_SCOPE_CONTINUE]]: +// AMDGCN-NEXT: ret void +// AMDGCN: [[DEVICE_SCOPE]]: +// AMDGCN-NEXT: fence syncscope("agent-one-as") release +// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN: [[SYSTEM_SCOPE]]: +// AMDGCN-NEXT: fence syncscope("one-as") release +// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN: [[WORKGROUP_SCOPE]]: +// AMDGCN-NEXT: fence syncscope("workgroup-one-as") release +// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN: [[WAVEFRONT_SCOPE]]: +// AMDGCN-NEXT: fence syncscope("wavefront-one-as") release +// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// AMDGCN: [[SINGLE_SCOPE]]: +// AMDGCN-NEXT: fence syncscope("singlethread-one-as") release +// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// +void fe1c(int scope) { + __scoped_atomic_thread_fence(__ATOMIC_RELEASE, scope); +} + +// +// SPIRV-LABEL: define hidden spir_func void @fe2a( +// SPIRV-SAME: ) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: ret void +// AMDGCN-LABEL: define hidden void @fe2a( +// AMDGCN-SAME: ) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: ret void +// +void fe2a() { + __scoped_atomic_thread_fence(999, __MEMORY_SCOPE_SYSTEM); +} + +// +// SPIRV-LABEL: define hidden spir_func void @fe2b( +// SPIRV-SAME: ) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: fence release +// SPIRV-NEXT: ret void +// AMDGCN-LABEL: define hidden void @fe2b( +// AMDGCN-SAME: ) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: fence syncscope("one-as") release +// AMDGCN-NEXT: ret void +// +void fe2b() { + __scoped_atomic_thread_fence(__ATOMIC_RELEASE, 999); +} >From 7f6e87d6a09bdc3bcc5de0cdc392459d2fbac40b Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Fri, 8 Nov 2024 16:02:29 -0600 Subject: [PATCH 2/4] x86 test --- clang/test/CodeGen/scoped-fence-ops.c | 216 ++++++++++++++++++-------- 1 file changed, 147 insertions(+), 69 deletions(-) diff --git a/clang/test/CodeGen/scoped-fence-ops.c b/clang/test/CodeGen/scoped-fence-ops.c index cb48176d37c8522..376cb11e84d3da5 100644 --- a/clang/test/CodeGen/scoped-fence-ops.c +++ b/clang/test/CodeGen/scoped-fence-ops.c @@ -1,53 +1,33 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \ // RUN: -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s -//: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \ -//: -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s +// RUN: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \ +// RUN: -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s +// RUN: %clang_cc1 %s -emit-llvm -o - -triple=x86_64-unknown-linux-gnu -ffreestanding \ +// RUN: -fvisibility=hidden | FileCheck --check-prefix=X86_64 %s +// AMDGCN-LABEL: define hidden void @fe1a( +// AMDGCN-SAME: ) #[[ATTR0:[0-9]+]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: fence syncscope("workgroup-one-as") release +// AMDGCN-NEXT: ret void // // SPIRV-LABEL: define hidden spir_func void @fe1a( // SPIRV-SAME: ) #[[ATTR0:[0-9]+]] { // SPIRV-NEXT: [[ENTRY:.*:]] // SPIRV-NEXT: fence syncscope("workgroup") release // SPIRV-NEXT: ret void -// AMDGCN-LABEL: define hidden void @fe1a( -// AMDGCN-SAME: ) #[[ATTR0:[0-9]+]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: fence syncscope("workgroup-one-as") release -// AMDGCN-NEXT: ret void +// +// X86_64-LABEL: define hidden void @fe1a( +// X86_64-SAME: ) #[[ATTR0:[0-9]+]] { +// X86_64-NEXT: [[ENTRY:.*:]] +// X86_64-NEXT: fence release +// X86_64-NEXT: ret void // void fe1a() { __scoped_atomic_thread_fence(__ATOMIC_RELEASE, __MEMORY_SCOPE_WRKGRP); } -// -// SPIRV-LABEL: define hidden spir_func void @fe1b( -// SPIRV-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] { -// SPIRV-NEXT: [[ENTRY:.*:]] -// SPIRV-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4 -// SPIRV-NEXT: store i32 [[ORD]], ptr [[ORD_ADDR]], align 4 -// SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR]], align 4 -// SPIRV-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [ -// SPIRV-NEXT: i32 1, label %[[ACQUIRE:.*]] -// SPIRV-NEXT: i32 2, label %[[ACQUIRE]] -// SPIRV-NEXT: i32 3, label %[[RELEASE:.*]] -// SPIRV-NEXT: i32 4, label %[[ACQREL:.*]] -// SPIRV-NEXT: i32 5, label %[[SEQCST:.*]] -// SPIRV-NEXT: ] -// SPIRV: [[ATOMIC_SCOPE_CONTINUE]]: -// SPIRV-NEXT: ret void -// SPIRV: [[ACQUIRE]]: -// SPIRV-NEXT: fence syncscope("workgroup") acquire -// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] -// SPIRV: [[RELEASE]]: -// SPIRV-NEXT: fence syncscope("workgroup") release -// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] -// SPIRV: [[ACQREL]]: -// SPIRV-NEXT: fence syncscope("workgroup") acq_rel -// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] -// SPIRV: [[SEQCST]]: -// SPIRV-NEXT: fence syncscope("workgroup") seq_cst -// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] // AMDGCN-LABEL: define hidden void @fe1b( // AMDGCN-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] @@ -77,41 +57,66 @@ void fe1a() { // AMDGCN-NEXT: fence syncscope("workgroup") seq_cst // AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] // -void fe1b(int ord) { - __scoped_atomic_thread_fence(ord, __MEMORY_SCOPE_WRKGRP); -} - -// -// SPIRV-LABEL: define hidden spir_func void @fe1c( -// SPIRV-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] { +// SPIRV-LABEL: define hidden spir_func void @fe1b( +// SPIRV-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] { // SPIRV-NEXT: [[ENTRY:.*:]] -// SPIRV-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4 -// SPIRV-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR]], align 4 -// SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR]], align 4 +// SPIRV-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: store i32 [[ORD]], ptr [[ORD_ADDR]], align 4 +// SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR]], align 4 // SPIRV-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [ -// SPIRV-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]] -// SPIRV-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]] -// SPIRV-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]] -// SPIRV-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]] -// SPIRV-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]] +// SPIRV-NEXT: i32 1, label %[[ACQUIRE:.*]] +// SPIRV-NEXT: i32 2, label %[[ACQUIRE]] +// SPIRV-NEXT: i32 3, label %[[RELEASE:.*]] +// SPIRV-NEXT: i32 4, label %[[ACQREL:.*]] +// SPIRV-NEXT: i32 5, label %[[SEQCST:.*]] // SPIRV-NEXT: ] // SPIRV: [[ATOMIC_SCOPE_CONTINUE]]: // SPIRV-NEXT: ret void -// SPIRV: [[DEVICE_SCOPE]]: -// SPIRV-NEXT: fence syncscope("device") release -// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] -// SPIRV: [[SYSTEM_SCOPE]]: -// SPIRV-NEXT: fence release +// SPIRV: [[ACQUIRE]]: +// SPIRV-NEXT: fence syncscope("workgroup") acquire // SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] -// SPIRV: [[WORKGROUP_SCOPE]]: +// SPIRV: [[RELEASE]]: // SPIRV-NEXT: fence syncscope("workgroup") release // SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] -// SPIRV: [[WAVEFRONT_SCOPE]]: -// SPIRV-NEXT: fence syncscope("subgroup") release +// SPIRV: [[ACQREL]]: +// SPIRV-NEXT: fence syncscope("workgroup") acq_rel // SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] -// SPIRV: [[SINGLE_SCOPE]]: -// SPIRV-NEXT: fence syncscope("singlethread") release +// SPIRV: [[SEQCST]]: +// SPIRV-NEXT: fence syncscope("workgroup") seq_cst // SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// +// X86_64-LABEL: define hidden void @fe1b( +// X86_64-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] { +// X86_64-NEXT: [[ENTRY:.*:]] +// X86_64-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4 +// X86_64-NEXT: store i32 [[ORD]], ptr [[ORD_ADDR]], align 4 +// X86_64-NEXT: [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR]], align 4 +// X86_64-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [ +// X86_64-NEXT: i32 1, label %[[ACQUIRE:.*]] +// X86_64-NEXT: i32 2, label %[[ACQUIRE]] +// X86_64-NEXT: i32 3, label %[[RELEASE:.*]] +// X86_64-NEXT: i32 4, label %[[ACQREL:.*]] +// X86_64-NEXT: i32 5, label %[[SEQCST:.*]] +// X86_64-NEXT: ] +// X86_64: [[ATOMIC_SCOPE_CONTINUE]]: +// X86_64-NEXT: ret void +// X86_64: [[ACQUIRE]]: +// X86_64-NEXT: fence acquire +// X86_64-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// X86_64: [[RELEASE]]: +// X86_64-NEXT: fence release +// X86_64-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// X86_64: [[ACQREL]]: +// X86_64-NEXT: fence acq_rel +// X86_64-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// X86_64: [[SEQCST]]: +// X86_64-NEXT: fence seq_cst +// X86_64-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// +void fe1b(int ord) { + __scoped_atomic_thread_fence(ord, __MEMORY_SCOPE_WRKGRP); +} + // AMDGCN-LABEL: define hidden void @fe1c( // AMDGCN-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] @@ -144,35 +149,108 @@ void fe1b(int ord) { // AMDGCN-NEXT: fence syncscope("singlethread-one-as") release // AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] // +// SPIRV-LABEL: define hidden spir_func void @fe1c( +// SPIRV-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR]], align 4 +// SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR]], align 4 +// SPIRV-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [ +// SPIRV-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]] +// SPIRV-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]] +// SPIRV-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]] +// SPIRV-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]] +// SPIRV-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]] +// SPIRV-NEXT: ] +// SPIRV: [[ATOMIC_SCOPE_CONTINUE]]: +// SPIRV-NEXT: ret void +// SPIRV: [[DEVICE_SCOPE]]: +// SPIRV-NEXT: fence syncscope("device") release +// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// SPIRV: [[SYSTEM_SCOPE]]: +// SPIRV-NEXT: fence release +// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// SPIRV: [[WORKGROUP_SCOPE]]: +// SPIRV-NEXT: fence syncscope("workgroup") release +// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// SPIRV: [[WAVEFRONT_SCOPE]]: +// SPIRV-NEXT: fence syncscope("subgroup") release +// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// SPIRV: [[SINGLE_SCOPE]]: +// SPIRV-NEXT: fence syncscope("singlethread") release +// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// +// X86_64-LABEL: define hidden void @fe1c( +// X86_64-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] { +// X86_64-NEXT: [[ENTRY:.*:]] +// X86_64-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4 +// X86_64-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR]], align 4 +// X86_64-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR]], align 4 +// X86_64-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [ +// X86_64-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]] +// X86_64-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]] +// X86_64-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]] +// X86_64-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]] +// X86_64-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]] +// X86_64-NEXT: ] +// X86_64: [[ATOMIC_SCOPE_CONTINUE]]: +// X86_64-NEXT: ret void +// X86_64: [[DEVICE_SCOPE]]: +// X86_64-NEXT: fence release +// X86_64-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// X86_64: [[SYSTEM_SCOPE]]: +// X86_64-NEXT: fence release +// X86_64-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// X86_64: [[WORKGROUP_SCOPE]]: +// X86_64-NEXT: fence release +// X86_64-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// X86_64: [[WAVEFRONT_SCOPE]]: +// X86_64-NEXT: fence release +// X86_64-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// X86_64: [[SINGLE_SCOPE]]: +// X86_64-NEXT: fence release +// X86_64-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// void fe1c(int scope) { __scoped_atomic_thread_fence(__ATOMIC_RELEASE, scope); } +// AMDGCN-LABEL: define hidden void @fe2a( +// AMDGCN-SAME: ) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: ret void // // SPIRV-LABEL: define hidden spir_func void @fe2a( // SPIRV-SAME: ) #[[ATTR0]] { // SPIRV-NEXT: [[ENTRY:.*:]] // SPIRV-NEXT: ret void -// AMDGCN-LABEL: define hidden void @fe2a( -// AMDGCN-SAME: ) #[[ATTR0]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: ret void +// +// X86_64-LABEL: define hidden void @fe2a( +// X86_64-SAME: ) #[[ATTR0]] { +// X86_64-NEXT: [[ENTRY:.*:]] +// X86_64-NEXT: ret void // void fe2a() { __scoped_atomic_thread_fence(999, __MEMORY_SCOPE_SYSTEM); } +// AMDGCN-LABEL: define hidden void @fe2b( +// AMDGCN-SAME: ) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: fence syncscope("one-as") release +// AMDGCN-NEXT: ret void // // SPIRV-LABEL: define hidden spir_func void @fe2b( // SPIRV-SAME: ) #[[ATTR0]] { // SPIRV-NEXT: [[ENTRY:.*:]] // SPIRV-NEXT: fence release // SPIRV-NEXT: ret void -// AMDGCN-LABEL: define hidden void @fe2b( -// AMDGCN-SAME: ) #[[ATTR0]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: fence syncscope("one-as") release -// AMDGCN-NEXT: ret void +// +// X86_64-LABEL: define hidden void @fe2b( +// X86_64-SAME: ) #[[ATTR0]] { +// X86_64-NEXT: [[ENTRY:.*:]] +// X86_64-NEXT: fence release +// X86_64-NEXT: ret void // void fe2b() { __scoped_atomic_thread_fence(__ATOMIC_RELEASE, 999); >From e8896fdaa21a6b46a45222bd527829dad418dc7b Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Fri, 8 Nov 2024 17:05:13 -0600 Subject: [PATCH 3/4] comments --- clang/lib/CodeGen/CGBuiltin.cpp | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c8f90df2546f256..24893f6c20340e0 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5213,9 +5213,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::BasicBlock *ContBB = createBasicBlock("atomic.scope.continue", CurFn); llvm::DenseMap<llvm::BasicBlock *, llvm::AtomicOrdering> OrderBBs; - if (isa<llvm::ConstantInt>(Order)) { - int Ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); - switch (Ord) { + if (auto Ord = dyn_cast<llvm::ConstantInt>(Order)) { + switch (Ord->getZExtValue()) { case 0: // memory_order_relaxed default: // invalid order ContBB->eraseFromParent(); @@ -5237,11 +5236,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, break; } } else { - llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB; - AcquireBB = createBasicBlock("acquire", CurFn); - ReleaseBB = createBasicBlock("release", CurFn); - AcqRelBB = createBasicBlock("acqrel", CurFn); - SeqCstBB = createBasicBlock("seqcst", CurFn); + llvm::BasicBlock *AcquireBB = createBasicBlock("acquire", CurFn); + llvm::BasicBlock *ReleaseBB = createBasicBlock("release", CurFn); + llvm::BasicBlock *AcqRelBB = createBasicBlock("acqrel", CurFn); + llvm::BasicBlock *SeqCstBB = createBasicBlock("seqcst", CurFn); Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); @@ -5259,10 +5257,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, for (auto &[OrderBB, Ordering] : OrderBBs) { Builder.SetInsertPoint(OrderBB); - if (isa<llvm::ConstantInt>(Scope)) { - int Scp = cast<llvm::ConstantInt>(Scope)->getZExtValue(); - SyncScope SS = ScopeModel->isValid(Scp) - ? ScopeModel->map(Scp) + if (auto Scp = dyn_cast<llvm::ConstantInt>(Scope)) { + SyncScope SS = ScopeModel->isValid(Scp->getZExtValue()) + ? ScopeModel->map(Scp->getZExtValue()) : ScopeModel->map(ScopeModel->getFallBackValue()); Builder.CreateFence(Ordering, getTargetHooks().getLLVMSyncScopeID( >From 5b14ae0b1861a058d083c8a6dde2f7b83c791996 Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Mon, 11 Nov 2024 17:30:41 -0600 Subject: [PATCH 4/4] Comments --- clang/lib/CodeGen/CGBuiltin.cpp | 36 +++++++++++++++++---------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 24893f6c20340e0..01e76ffeb1d24a1 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5167,13 +5167,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *Order = EmitScalarExpr(E->getArg(0)); Value *Scope = EmitScalarExpr(E->getArg(1)); - if (isa<llvm::ConstantInt>(Order) && isa<llvm::ConstantInt>(Scope)) { - int Ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); - int Scp = cast<llvm::ConstantInt>(Scope)->getZExtValue(); - SyncScope SS = ScopeModel->isValid(Scp) - ? ScopeModel->map(Scp) + if (auto Ord = dyn_cast<llvm::ConstantInt>(Order); + auto Scp = dyn_cast<llvm::ConstantInt>(Scope)) { + SyncScope SS = ScopeModel->isValid(Scp->getZExtValue()) + ? ScopeModel->map(Scp->getZExtValue()) : ScopeModel->map(ScopeModel->getFallBackValue()); - switch (Ord) { + switch (Ord->getZExtValue()) { case 0: // memory_order_relaxed default: // invalid order break; @@ -5212,7 +5211,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::BasicBlock *ContBB = createBasicBlock("atomic.scope.continue", CurFn); - llvm::DenseMap<llvm::BasicBlock *, llvm::AtomicOrdering> OrderBBs; + llvm::SmallVector<std::pair<llvm::BasicBlock *, llvm::AtomicOrdering>> OrderBBs; if (auto Ord = dyn_cast<llvm::ConstantInt>(Order)) { switch (Ord->getZExtValue()) { case 0: // memory_order_relaxed @@ -5221,18 +5220,20 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(nullptr); case 1: // memory_order_consume case 2: // memory_order_acquire - OrderBBs[Builder.GetInsertBlock()] = llvm::AtomicOrdering::Acquire; + OrderBBs.emplace_back(Builder.GetInsertBlock(), + llvm::AtomicOrdering::Acquire); break; case 3: // memory_order_release - OrderBBs[Builder.GetInsertBlock()] = llvm::AtomicOrdering::Release; + OrderBBs.emplace_back(Builder.GetInsertBlock(), + llvm::AtomicOrdering::Release); break; case 4: // memory_order_acq_rel - OrderBBs[Builder.GetInsertBlock()] = - llvm::AtomicOrdering::AcquireRelease; + OrderBBs.emplace_back(Builder.GetInsertBlock(), + llvm::AtomicOrdering::AcquireRelease); break; case 5: // memory_order_seq_cst - OrderBBs[Builder.GetInsertBlock()] = - llvm::AtomicOrdering::SequentiallyConsistent; + OrderBBs.emplace_back(Builder.GetInsertBlock(), + llvm::AtomicOrdering::SequentiallyConsistent); break; } } else { @@ -5249,10 +5250,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, SI->addCase(Builder.getInt32(4), AcqRelBB); SI->addCase(Builder.getInt32(5), SeqCstBB); - OrderBBs[AcquireBB] = llvm::AtomicOrdering::Acquire; - OrderBBs[ReleaseBB] = llvm::AtomicOrdering::Release; - OrderBBs[AcqRelBB] = llvm::AtomicOrdering::AcquireRelease; - OrderBBs[SeqCstBB] = llvm::AtomicOrdering::SequentiallyConsistent; + OrderBBs.emplace_back(AcquireBB, llvm::AtomicOrdering::Acquire); + OrderBBs.emplace_back(ReleaseBB, llvm::AtomicOrdering::Release); + OrderBBs.emplace_back(AcqRelBB, llvm::AtomicOrdering::AcquireRelease); + OrderBBs.emplace_back(SeqCstBB, + llvm::AtomicOrdering::SequentiallyConsistent); } for (auto &[OrderBB, Ordering] : OrderBBs) { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits