Author: Shilei Tian Date: 2025-07-18T08:43:08-04:00 New Revision: 602d43cfd1fe7cc47146b6327d8df6e5e0ec47ae
URL: https://github.com/llvm/llvm-project/commit/602d43cfd1fe7cc47146b6327d8df6e5e0ec47ae DIFF: https://github.com/llvm/llvm-project/commit/602d43cfd1fe7cc47146b6327d8df6e5e0ec47ae.diff LOG: [Clang][AMDGPU] Add the missing builtin `__builtin_amdgcn_sqrt_bf16` (#149447) Co-authored-by: Mekhanoshin, Stanislav <stanislav.mekhanos...@amd.com> Added: Modified: clang/include/clang/Basic/BuiltinsAMDGPU.def clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index ed51f1d5de447..a916af7e0c2df 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -673,6 +673,7 @@ TARGET_BUILTIN(__builtin_amdgcn_tanhf, "ff", "nc", "tanh-insts") TARGET_BUILTIN(__builtin_amdgcn_tanhh, "hh", "nc", "tanh-insts") TARGET_BUILTIN(__builtin_amdgcn_tanh_bf16, "yy", "nc", "bf16-trans-insts") TARGET_BUILTIN(__builtin_amdgcn_rcp_bf16, "yy", "nc", "bf16-trans-insts") +TARGET_BUILTIN(__builtin_amdgcn_sqrt_bf16, "yy", "nc", "bf16-trans-insts") TARGET_BUILTIN(__builtin_amdgcn_rsq_bf16, "yy", "nc", "bf16-trans-insts") TARGET_BUILTIN(__builtin_amdgcn_log_bf16, "yy", "nc", "bf16-trans-insts") TARGET_BUILTIN(__builtin_amdgcn_exp2_bf16, "yy", "nc", "bf16-trans-insts") diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index a7d796ecccc61..ee736a2816218 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -416,6 +416,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_sqrt: case AMDGPU::BI__builtin_amdgcn_sqrtf: case AMDGPU::BI__builtin_amdgcn_sqrth: + case AMDGPU::BI__builtin_amdgcn_sqrt_bf16: return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sqrt); case AMDGPU::BI__builtin_amdgcn_rsq: diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl index 738b7ab7f2b75..a9ea17642d6ad 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl @@ -119,6 +119,25 @@ void test_rcp_bf16(global __bf16* out, __bf16 a) *out = __builtin_amdgcn_rcp_bf16(a); } +// CHECK-LABEL: @test_sqrt_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store bfloat [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = call bfloat @llvm.amdgcn.sqrt.bf16(bfloat [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store bfloat [[TMP1]], ptr addrspace(1) [[TMP2]], align 2 +// CHECK-NEXT: ret void +// +void test_sqrt_bf16(global __bf16* out, __bf16 a) +{ + *out = __builtin_amdgcn_sqrt_bf16(a); +} + // CHECK-LABEL: @test_rsq_bf16( // CHECK-NEXT: entry: // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits