llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Joseph Huber (jhuber6) <details> <summary>Changes</summary> Summary: The documentation at https://llvm.org/docs/AMDGPUUsage.html#memory-scopes states that these 'one-as' modifiers are more specific versions of the scopes that only apply to a specific address space. This doesn't make sense for fences which have no associated address space to use, and it's a more restrictive version the normal scope. This should not tbe the default behavior, but it is currently emitted in all cases except for sequentially consistent. --- Patch is 266.86 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/120095.diff 8 Files Affected: - (modified) clang/lib/CodeGen/Targets/AMDGPU.cpp (-7) - (modified) clang/test/CodeGen/scoped-atomic-ops.c (+2311-161) - (modified) clang/test/CodeGen/scoped-fence-ops.c (+10-10) - (modified) clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu (+30-30) - (modified) clang/test/CodeGenCUDA/atomic-ops.cu (+140-140) - (modified) clang/test/CodeGenOpenCL/atomic-ops.cl (+14-14) - (modified) clang/test/CodeGenOpenCL/atomics-cas-remarks-gfx90a.cl (+8-8) - (modified) clang/test/CodeGenOpenCL/atomics-unsafe-hw-remarks-gfx90a.cl (+6-6) ``````````diff diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 56ad0503a11ab2..ecf044436d8c86 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -537,13 +537,6 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, break; } - if (Ordering != llvm::AtomicOrdering::SequentiallyConsistent) { - if (!Name.empty()) - Name = Twine(Twine(Name) + Twine("-")).str(); - - Name = Twine(Twine(Name) + Twine("one-as")).str(); - } - return Ctx.getOrInsertSyncScopeID(Name); } diff --git a/clang/test/CodeGen/scoped-atomic-ops.c b/clang/test/CodeGen/scoped-atomic-ops.c index cf98812a07e91d..c3162ebe66661f 100644 --- a/clang/test/CodeGen/scoped-atomic-ops.c +++ b/clang/test/CodeGen/scoped-atomic-ops.c @@ -5,17 +5,57 @@ // RUN: -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s // AMDGCN-LABEL: define hidden i32 @fi1a( -// AMDGCN: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] syncscope("one-as") monotonic, align 4 -// AMDGCN: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:.+]] syncscope("agent-one-as") monotonic, align 4 -// AMDGCN: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// AMDGCN: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// AMDGCN: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:.+]] syncscope("singlethread-one-as") monotonic, align 4 -// SPIRV: define hidden spir_func i32 @fi1a( -// SPIRV: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] monotonic, align 4 -// SPIRV: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:.+]] syncscope("device") monotonic, align 4 -// SPIRV: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:.+]] syncscope("workgroup") monotonic, align 4 -// SPIRV: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("subgroup") monotonic, align 4 -// SPIRV: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:.+]] syncscope("singlethread") monotonic, align 4 +// AMDGCN-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// AMDGCN-NEXT: [[V:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// AMDGCN-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr +// AMDGCN-NEXT: [[V_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V]] to ptr +// AMDGCN-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4 +// AMDGCN-NEXT: store i32 [[TMP1]], ptr [[V_ASCAST]], align 4 +// AMDGCN-NEXT: [[TMP2:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP3:%.*]] = load atomic i32, ptr [[TMP2]] syncscope("agent") monotonic, align 4 +// AMDGCN-NEXT: store i32 [[TMP3]], ptr [[V_ASCAST]], align 4 +// AMDGCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP4]] syncscope("workgroup") monotonic, align 4 +// AMDGCN-NEXT: store i32 [[TMP5]], ptr [[V_ASCAST]], align 4 +// AMDGCN-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP7:%.*]] = load atomic i32, ptr [[TMP6]] syncscope("wavefront") monotonic, align 4 +// AMDGCN-NEXT: store i32 [[TMP7]], ptr [[V_ASCAST]], align 4 +// AMDGCN-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP9:%.*]] = load atomic i32, ptr [[TMP8]] syncscope("singlethread") monotonic, align 4 +// AMDGCN-NEXT: store i32 [[TMP9]], ptr [[V_ASCAST]], align 4 +// AMDGCN-NEXT: [[TMP10:%.*]] = load i32, ptr [[V_ASCAST]], align 4 +// AMDGCN-NEXT: ret i32 [[TMP10]] +// +// SPIRV-LABEL: define hidden spir_func i32 @fi1a( +// SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 +// SPIRV-NEXT: [[V:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4 +// SPIRV-NEXT: store i32 [[TMP1]], ptr [[V]], align 4 +// SPIRV-NEXT: [[TMP2:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP3:%.*]] = load atomic i32, ptr [[TMP2]] syncscope("device") monotonic, align 4 +// SPIRV-NEXT: store i32 [[TMP3]], ptr [[V]], align 4 +// SPIRV-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP4]] syncscope("workgroup") monotonic, align 4 +// SPIRV-NEXT: store i32 [[TMP5]], ptr [[V]], align 4 +// SPIRV-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP7:%.*]] = load atomic i32, ptr [[TMP6]] syncscope("subgroup") monotonic, align 4 +// SPIRV-NEXT: store i32 [[TMP7]], ptr [[V]], align 4 +// SPIRV-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP9:%.*]] = load atomic i32, ptr [[TMP8]] syncscope("singlethread") monotonic, align 4 +// SPIRV-NEXT: store i32 [[TMP9]], ptr [[V]], align 4 +// SPIRV-NEXT: [[TMP10:%.*]] = load i32, ptr [[V]], align 4 +// SPIRV-NEXT: ret i32 [[TMP10]] +// int fi1a(int *i) { int v; __scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); @@ -27,17 +67,101 @@ int fi1a(int *i) { } // AMDGCN-LABEL: define hidden i32 @fi1b( -// AMDGCN: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4 -// AMDGCN: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4 -// AMDGCN: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4 -// AMDGCN: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4 -// AMDGCN: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// AMDGCN-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// AMDGCN-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr +// AMDGCN-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr +// AMDGCN-NEXT: [[ATOMIC_TEMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP1]] to ptr +// AMDGCN-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr +// AMDGCN-NEXT: [[ATOMIC_TEMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP3]] to ptr +// AMDGCN-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr +// AMDGCN-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4 +// AMDGCN-NEXT: store i32 [[TMP1]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// AMDGCN-NEXT: [[TMP2:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// AMDGCN-NEXT: [[TMP3:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4 +// AMDGCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP4]] syncscope("agent") monotonic, align 4 +// AMDGCN-NEXT: store i32 [[TMP5]], ptr [[ATOMIC_TEMP1_ASCAST]], align 4 +// AMDGCN-NEXT: [[TMP6:%.*]] = load i32, ptr [[ATOMIC_TEMP1_ASCAST]], align 4 +// AMDGCN-NEXT: [[TMP7:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: store i32 [[TMP6]], ptr [[TMP7]], align 4 +// AMDGCN-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP9:%.*]] = load atomic i32, ptr [[TMP8]] syncscope("workgroup") monotonic, align 4 +// AMDGCN-NEXT: store i32 [[TMP9]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4 +// AMDGCN-NEXT: [[TMP10:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4 +// AMDGCN-NEXT: [[TMP11:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: store i32 [[TMP10]], ptr [[TMP11]], align 4 +// AMDGCN-NEXT: [[TMP12:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP13:%.*]] = load atomic i32, ptr [[TMP12]] syncscope("wavefront") monotonic, align 4 +// AMDGCN-NEXT: store i32 [[TMP13]], ptr [[ATOMIC_TEMP3_ASCAST]], align 4 +// AMDGCN-NEXT: [[TMP14:%.*]] = load i32, ptr [[ATOMIC_TEMP3_ASCAST]], align 4 +// AMDGCN-NEXT: [[TMP15:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: store i32 [[TMP14]], ptr [[TMP15]], align 4 +// AMDGCN-NEXT: [[TMP16:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP17:%.*]] = load atomic i32, ptr [[TMP16]] syncscope("singlethread") monotonic, align 4 +// AMDGCN-NEXT: store i32 [[TMP17]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4 +// AMDGCN-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP4_ASCAST]], align 4 +// AMDGCN-NEXT: [[TMP19:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: store i32 [[TMP18]], ptr [[TMP19]], align 4 +// AMDGCN-NEXT: [[TMP20:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// AMDGCN-NEXT: ret i32 [[TMP21]] +// // SPIRV-LABEL: define hidden spir_func i32 @fi1b( -// SPIRV: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:%.+]] monotonic, align 4 -// SPIRV: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:%.+]] syncscope("device") monotonic, align 4 -// SPIRV: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:%.+]] syncscope("workgroup") monotonic, align 4 -// SPIRV: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:%.+]] syncscope("subgroup") monotonic, align 4 -// SPIRV: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:%.+]] syncscope("singlethread") monotonic, align 4 +// SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 +// SPIRV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4 +// SPIRV-NEXT: store i32 [[TMP1]], ptr [[ATOMIC_TEMP]], align 4 +// SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 +// SPIRV-NEXT: [[TMP3:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4 +// SPIRV-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP4]] syncscope("device") monotonic, align 4 +// SPIRV-NEXT: store i32 [[TMP5]], ptr [[ATOMIC_TEMP1]], align 4 +// SPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr [[ATOMIC_TEMP1]], align 4 +// SPIRV-NEXT: [[TMP7:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: store i32 [[TMP6]], ptr [[TMP7]], align 4 +// SPIRV-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP9:%.*]] = load atomic i32, ptr [[TMP8]] syncscope("workgroup") monotonic, align 4 +// SPIRV-NEXT: store i32 [[TMP9]], ptr [[ATOMIC_TEMP2]], align 4 +// SPIRV-NEXT: [[TMP10:%.*]] = load i32, ptr [[ATOMIC_TEMP2]], align 4 +// SPIRV-NEXT: [[TMP11:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: store i32 [[TMP10]], ptr [[TMP11]], align 4 +// SPIRV-NEXT: [[TMP12:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP13:%.*]] = load atomic i32, ptr [[TMP12]] syncscope("subgroup") monotonic, align 4 +// SPIRV-NEXT: store i32 [[TMP13]], ptr [[ATOMIC_TEMP3]], align 4 +// SPIRV-NEXT: [[TMP14:%.*]] = load i32, ptr [[ATOMIC_TEMP3]], align 4 +// SPIRV-NEXT: [[TMP15:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: store i32 [[TMP14]], ptr [[TMP15]], align 4 +// SPIRV-NEXT: [[TMP16:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP17:%.*]] = load atomic i32, ptr [[TMP16]] syncscope("singlethread") monotonic, align 4 +// SPIRV-NEXT: store i32 [[TMP17]], ptr [[ATOMIC_TEMP4]], align 4 +// SPIRV-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP4]], align 4 +// SPIRV-NEXT: [[TMP19:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: store i32 [[TMP18]], ptr [[TMP19]], align 4 +// SPIRV-NEXT: [[TMP20:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// SPIRV-NEXT: ret i32 [[TMP21]] +// int fi1b(int *i) { *i = __scoped_atomic_load_n(i, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); *i = __scoped_atomic_load_n(i, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); @@ -48,17 +172,55 @@ int fi1b(int *i) { } // AMDGCN-LABEL: define hidden void @fi2a( -// AMDGCN: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4 -// AMDGCN: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4 -// AMDGCN: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4 -// AMDGCN: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4 -// AMDGCN: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// AMDGCN-NEXT: [[V:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr +// AMDGCN-NEXT: [[V_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V]] to ptr +// AMDGCN-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: store i32 1, ptr [[V_ASCAST]], align 4 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = load i32, ptr [[V_ASCAST]], align 4 +// AMDGCN-NEXT: store atomic i32 [[TMP1]], ptr [[TMP0]] monotonic, align 4 +// AMDGCN-NEXT: [[TMP2:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP3:%.*]] = load i32, ptr [[V_ASCAST]], align 4 +// AMDGCN-NEXT: store atomic i32 [[TMP3]], ptr [[TMP2]] syncscope("agent") monotonic, align 4 +// AMDGCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP5:%.*]] = load i32, ptr [[V_ASCAST]], align 4 +// AMDGCN-NEXT: store atomic i32 [[TMP5]], ptr [[TMP4]] syncscope("workgroup") monotonic, align 4 +// AMDGCN-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP7:%.*]] = load i32, ptr [[V_ASCAST]], align 4 +// AMDGCN-NEXT: store atomic i32 [[TMP7]], ptr [[TMP6]] syncscope("wavefront") monotonic, align 4 +// AMDGCN-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP9:%.*]] = load i32, ptr [[V_ASCAST]], align 4 +// AMDGCN-NEXT: store atomic i32 [[TMP9]], ptr [[TMP8]] syncscope("singlethread") monotonic, align 4 +// AMDGCN-NEXT: ret void +// // SPIRV-LABEL: define hidden spir_func void @fi2a( -// SPIRV: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] monotonic, align 4 -// SPIRV: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("device") monotonic, align 4 -// SPIRV: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup") monotonic, align 4 -// SPIRV: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("subgroup") monotonic, align 4 -// SPIRV: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread") monotonic, align 4 +// SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 +// SPIRV-NEXT: [[V:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: store i32 1, ptr [[V]], align 4 +// SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[V]], align 4 +// SPIRV-NEXT: store atomic i32 [[TMP1]], ptr [[TMP0]] monotonic, align 4 +// SPIRV-NEXT: [[TMP2:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP3:%.*]] = load i32, ptr [[V]], align 4 +// SPIRV-NEXT: store atomic i32 [[TMP3]], ptr [[TMP2]] syncscope("device") monotonic, align 4 +// SPIRV-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP5:%.*]] = load i32, ptr [[V]], align 4 +// SPIRV-NEXT: store atomic i32 [[TMP5]], ptr [[TMP4]] syncscope("workgroup") monotonic, align 4 +// SPIRV-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP7:%.*]] = load i32, ptr [[V]], align 4 +// SPIRV-NEXT: store atomic i32 [[TMP7]], ptr [[TMP6]] syncscope("subgroup") monotonic, align 4 +// SPIRV-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP9:%.*]] = load i32, ptr [[V]], align 4 +// SPIRV-NEXT: store atomic i32 [[TMP9]], ptr [[TMP8]] syncscope("singlethread") monotonic, align 4 +// SPIRV-NEXT: ret void +// void fi2a(int *i) { int v = 1; __scoped_atomic_store(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); @@ -69,17 +231,75 @@ void fi2a(int *i) { } // AMDGCN-LABEL: define hidden void @fi2b( -// AMDGCN: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4 -// AMDGCN: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4 -// AMDGCN: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4 -// AMDGCN: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4 -// AMDGCN: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// AMDGCN-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-NEXT: [[DOTATOMICTMP2:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-NEXT: [[DOTATOMICTMP4:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr +// AMDGCN-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr +// AMDGCN-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr +// AMDGCN-NEXT: [[DOTATOMICTMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP2]] to ptr +// AMDGCN-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr +// AMDGCN-NEXT: [[DOTATOMICTMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP4]] to ptr +// AMDGCN-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 +/... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/120095 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits