https://github.com/choikwa updated https://github.com/llvm/llvm-project/pull/129347
>From 00827e0ebb8853dfc886e1d6226136ddbdb6e0a4 Mon Sep 17 00:00:00 2001 From: Kevin Choi <kevin.c...@amd.com> Date: Fri, 28 Feb 2025 16:52:03 -0600 Subject: [PATCH] [AMDGPU][clang] provide device implementation for __builtin_logb and __builtin_scalbn Clang generates library calls for __builtin_* functions which can be a problem for GPUs that cannot handle them. This patch generates a device implementations for __builtin_logb and __builtin_scalbn. --- clang/lib/CodeGen/CGBuiltin.cpp | 19 ++++++++- clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 45 +++++++++++++++++++++ clang/test/CodeGen/logb_scalbn.c | 32 +++++++++++++++ 3 files changed, 95 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGen/logb_scalbn.c diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index f0ba52fa41ce8..b5c5b5e55efed 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6011,10 +6011,27 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } } + // These will be emitted as Intrinsic later. + auto NeedsDeviceOverload = [&](unsigned BuiltinID) { + if (getTarget().getTriple().isAMDGCN()) { + switch (BuiltinID) { + default: + return false; + case Builtin::BIlogb: + case Builtin::BI__builtin_logb: + case Builtin::BIscalbn: + case Builtin::BI__builtin_scalbn: + return true; + } + } + return false; + }; + // If this is an alias for a lib function (e.g. __builtin_sin), emit // the call using the normal call path, but using the unmangled // version of the function name. - if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) + if (!NeedsDeviceOverload(BuiltinID) && + getContext().BuiltinInfo.isLibFunction(BuiltinID)) return emitLibraryCall(*this, FD, E, CGM.getBuiltinLibFunction(FD, BuiltinID)); diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index f94917c905081..b0e7679b69043 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -31,6 +31,27 @@ using namespace CodeGen; using namespace llvm; namespace { + +// Has second type mangled argument. +static Value * +emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, + Intrinsic::ID IntrinsicID, + Intrinsic::ID ConstrainedIntrinsicID) { + llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); + if (CGF.Builder.getIsFPConstrained()) { + Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, + {Src0->getType(), Src1->getType()}); + return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1}); + } + + Function *F = + CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()}); + return CGF.Builder.CreateCall(F, {Src0, Src1}); +} + // If \p E is not null pointer, insert address space cast to match return // type of \p E if necessary. Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF, @@ -1876,6 +1897,30 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_s_prefetch_data: return emitBuiltinWithOneOverloadedType<2>( *this, E, Intrinsic::amdgcn_s_prefetch_data); + case Builtin::BIlogb: + case Builtin::BI__builtin_logb: { + auto *Src0 = EmitScalarExpr(E->getArg(0)); + auto *FrExpFunc = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, + {Builder.getInt32Ty(), Src0->getType()}); + auto *FrExp = Builder.CreateCall(FrExpFunc, Src0); + auto *Add = Builder.CreateAdd( + FrExp, ConstantInt::getSigned(FrExp->getType(), -1), "", false, true); + auto *SIToFP = Builder.CreateSIToFP(Add, Builder.getDoubleTy()); + auto *Fabs = emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs); + auto *FCmpONE = Builder.CreateFCmpONE( + Fabs, ConstantFP::getInfinity(Builder.getDoubleTy())); + auto *Sel1 = Builder.CreateSelect(FCmpONE, SIToFP, Fabs); + auto *FCmpOEQ = + Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(Builder.getDoubleTy())); + auto *Sel2 = Builder.CreateSelect( + FCmpOEQ, ConstantFP::getInfinity(Builder.getDoubleTy(), /*Neg*/ true), + Sel1); + return Sel2; + } + case Builtin::BIscalbn: + case Builtin::BI__builtin_scalbn: + return emitBinaryExpMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp); default: return nullptr; } diff --git a/clang/test/CodeGen/logb_scalbn.c b/clang/test/CodeGen/logb_scalbn.c new file mode 100644 index 0000000000000..51f0fc5639c9f --- /dev/null +++ b/clang/test/CodeGen/logb_scalbn.c @@ -0,0 +1,32 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang -cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s +#include <math.h> + +// + +// CHECK-LABEL: define dso_local void @my_kernel( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[D1:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[D2:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D1]] to ptr +// CHECK-NEXT: [[D2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D2]] to ptr +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f64(double 1.600000e+01) +// CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[TMP0]], -1 +// CHECK-NEXT: [[TMP2:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK-NEXT: [[TMP3:%.*]] = call double @llvm.fabs.f64(double 1.600000e+01) +// CHECK-NEXT: [[TMP4:%.*]] = fcmp one double [[TMP3]], 0x7FF0000000000000 +// CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], double [[TMP2]], double [[TMP3]] +// CHECK-NEXT: [[TMP6:%.*]] = select i1 false, double 0xFFF0000000000000, double [[TMP5]] +// CHECK-NEXT: [[CONV:%.*]] = fptrunc double [[TMP6]] to float +// CHECK-NEXT: store float [[CONV]], ptr [[D1_ASCAST]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = call double @llvm.ldexp.f64.i32(double 1.600000e+01, i32 10) +// CHECK-NEXT: [[CONV1:%.*]] = fptrunc double [[TMP7]] to float +// CHECK-NEXT: store float [[CONV1]], ptr [[D2_ASCAST]], align 4 +// CHECK-NEXT: ret void +// +void my_kernel(){ + float D1 = __builtin_logb((float)16); + float D2 = __builtin_scalbn((float)16, 10); +} + _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits