https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96874
>From e10cf564b3f902655d465a2a62f1d25d3ac82018 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Wed, 26 Jun 2024 19:15:26 +0200 Subject: [PATCH] clang/AMDGPU: Emit atomicrmw from flat_atomic_{f32|f64} builtins --- clang/lib/CodeGen/CGBuiltin.cpp | 17 ++++++----------- .../CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl | 6 ++++-- .../CodeGenOpenCL/builtins-fp-atomics-gfx940.cl | 3 ++- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 77c652573cae42..0b6e4f55502655 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18922,10 +18922,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, } case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: { Intrinsic::ID IID; llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); switch (BuiltinID) { @@ -18935,19 +18933,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: IID = Intrinsic::amdgcn_global_atomic_fmax; break; - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: - IID = Intrinsic::amdgcn_flat_atomic_fadd; - break; case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: IID = Intrinsic::amdgcn_flat_atomic_fmin; break; case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: IID = Intrinsic::amdgcn_flat_atomic_fmax; break; - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: - ArgTy = llvm::Type::getFloatTy(getLLVMContext()); - IID = Intrinsic::amdgcn_flat_atomic_fadd; - break; } llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); llvm::Value *Val = EmitScalarExpr(E->getArg(1)); @@ -19350,7 +19341,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19370,6 +19363,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl index e6469c189ac91d..9381ce951df3e3 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl @@ -45,7 +45,8 @@ void test_global_max_f64(__global double *addr, double x){ } // CHECK-LABEL: test_flat_add_local_f64 -// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p3.f64(ptr addrspace(3) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8{{$}} + // GFX90A-LABEL: test_flat_add_local_f64$local // GFX90A: ds_add_rtn_f64 void test_flat_add_local_f64(__local double *addr, double x){ @@ -54,7 +55,8 @@ void test_flat_add_local_f64(__local double *addr, double x){ } // CHECK-LABEL: test_flat_global_add_f64 -// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fadd ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX90A-LABEL: test_flat_global_add_f64$local // GFX90A: global_atomic_add_f64 void test_flat_global_add_f64(__global double *addr, double x){ diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl index b07bdf524f230e..254e2814580ca6 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl @@ -10,7 +10,8 @@ typedef half __attribute__((ext_vector_type(2))) half2; typedef short __attribute__((ext_vector_type(2))) short2; // CHECK-LABEL: test_flat_add_f32 -// CHECK: call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %{{.*}}, float %{{.*}}) +// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, float %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} + // GFX940-LABEL: test_flat_add_f32 // GFX940: flat_atomic_add_f32 half2 test_flat_add_f32(__generic float *addr, float x) { _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits