https://github.com/pravinjagtap updated https://github.com/llvm/llvm-project/pull/71139
>From c28e9f9fb753e41bc539fa4c45bd7896d7c5d04d Mon Sep 17 00:00:00 2001 From: Pravin Jagtap <pravin.jag...@amd.com> Date: Fri, 3 Nov 2023 00:04:14 -0400 Subject: [PATCH 1/3] [AMDGPU] const-fold imm operands of amdgcn_update_dpp intrinsic --- clang/lib/CodeGen/CGBuiltin.cpp | 16 +++++++- clang/test/CodeGenHIP/dpp-const-fold.hip | 48 ++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGenHIP/dpp-const-fold.hip diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e047d31c012116f..a4049cbc79d303d 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17632,8 +17632,20 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_mov_dpp: case AMDGPU::BI__builtin_amdgcn_update_dpp: { llvm::SmallVector<llvm::Value *, 6> Args; - for (unsigned I = 0; I != E->getNumArgs(); ++I) - Args.push_back(EmitScalarExpr(E->getArg(I))); + for (unsigned I = 0; I != E->getNumArgs(); ++I) { + llvm::Value *Arg = EmitScalarExpr(E->getArg(I)); + // Except first two input operands, all other are imm operands for dpp + // intrinsic. + if (llvm::is_contained(std::initializer_list<unsigned>{2, 3, 4, 5}, I)) { + // If this is required to be a constant, constant fold it so that we + // know that the generated intrinsic gets a ConstantInt. + std::optional<llvm::APSInt> Result = + E->getArg(I)->getIntegerConstantExpr(getContext()); + assert(Result && "Expected argument to be a constant"); + Arg = llvm::ConstantInt::get(getLLVMContext(), *Result); + } + Args.push_back(Arg); + } assert(Args.size() == 5 || Args.size() == 6); if (Args.size() == 5) Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType())); diff --git a/clang/test/CodeGenHIP/dpp-const-fold.hip b/clang/test/CodeGenHIP/dpp-const-fold.hip new file mode 100644 index 000000000000000..1d1d135fb06239a --- /dev/null +++ b/clang/test/CodeGenHIP/dpp-const-fold.hip @@ -0,0 +1,48 @@ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang --offload-arch=gfx906 -S -o - -emit-llvm --cuda-device-only \ +// RUN: %s | FileCheck %s + +constexpr static int OpCtrl() +{ + return 15 + 1; +} + +constexpr static int RowMask() +{ + return 3 + 1; +} + +constexpr static int BankMask() +{ + return 2 + 1; +} + +constexpr static bool BountCtrl() +{ + return true & false; +} + +// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 16, i32 0, i32 0, i1 false) +__attribute__((global)) void test_update_dpp_const_fold_imm_operand_2(int* out, int a, int b) +{ + *out = __builtin_amdgcn_update_dpp(a, b, OpCtrl(), 0, 0, false); +} + +// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 4, i32 0, i1 false) +__attribute__((global)) void test_update_dpp_const_fold_imm_operand_3(int* out, int a, int b) +{ + *out = __builtin_amdgcn_update_dpp(a, b, 0, RowMask(), 0, false); +} + +// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 3, i1 false) +__attribute__((global)) void test_update_dpp_const_fold_imm_operand_4(int* out, int a, int b) +{ + *out = __builtin_amdgcn_update_dpp(a, b, 0, 0, BankMask(), false); +} + +// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 0, i1 false) +__attribute__((global)) void test_update_dpp_const_fold_imm_operand_5(int* out, int a, int b) +{ + *out = __builtin_amdgcn_update_dpp(a, b, 0, 0, 0, BountCtrl()); +} >From 5c88b0b48c449f7beff887036a3f41b8b50ec19a Mon Sep 17 00:00:00 2001 From: Pravin Jagtap <pravin.jag...@amd.com> Date: Fri, 3 Nov 2023 04:21:39 -0400 Subject: [PATCH 2/3] removed the hardcoded look up for imm arguments --- clang/lib/CodeGen/CGBuiltin.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a4049cbc79d303d..b7d2fc738d067c2 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17632,11 +17632,18 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_mov_dpp: case AMDGPU::BI__builtin_amdgcn_update_dpp: { llvm::SmallVector<llvm::Value *, 6> Args; + // Find out if any arguments are required to be integer constant + // expressions. + unsigned ICEArguments = 0; + ASTContext::GetBuiltinTypeError Error; + getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); + assert(Error == ASTContext::GE_None && "Should not codegen an error"); for (unsigned I = 0; I != E->getNumArgs(); ++I) { - llvm::Value *Arg = EmitScalarExpr(E->getArg(I)); - // Except first two input operands, all other are imm operands for dpp - // intrinsic. - if (llvm::is_contained(std::initializer_list<unsigned>{2, 3, 4, 5}, I)) { + llvm::Value *Arg = nullptr; + // If this is a normal argument, just emit it as a scalar. + if ((ICEArguments & (1 << I)) == 0) { + Arg = EmitScalarExpr(E->getArg(I)); + } else { // If this is required to be a constant, constant fold it so that we // know that the generated intrinsic gets a ConstantInt. std::optional<llvm::APSInt> Result = >From 3464dd5c4dfbcc5d6c57ea29160e770a6915c7e2 Mon Sep 17 00:00:00 2001 From: Pravin Jagtap <pravin.jag...@amd.com> Date: Wed, 8 Nov 2023 03:52:30 -0500 Subject: [PATCH 3/3] Refactored the code to avoid duplication using newly Added EmitScalarOrConstFoldImmArg API. --- clang/lib/CodeGen/CGBuiltin.cpp | 87 ++++++++--------------------- clang/lib/CodeGen/CodeGenFunction.h | 2 + 2 files changed, 25 insertions(+), 64 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index b7d2fc738d067c2..349c8d3eb0745fd 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5708,18 +5708,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::FunctionType *FTy = F->getFunctionType(); for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { - Value *ArgValue; - // If this is a normal argument, just emit it as a scalar. - if ((ICEArguments & (1 << i)) == 0) { - ArgValue = EmitScalarExpr(E->getArg(i)); - } else { - // If this is required to be a constant, constant fold it so that we - // know that the generated intrinsic gets a ConstantInt. - ArgValue = llvm::ConstantInt::get( - getLLVMContext(), - *E->getArg(i)->getIntegerConstantExpr(getContext())); - } - + Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E); // If the intrinsic arg type is different from the builtin arg type // we need to do a bit cast. llvm::Type *PTy = FTy->getParamType(i); @@ -8599,15 +8588,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, } } - if ((ICEArguments & (1 << i)) == 0) { - Ops.push_back(EmitScalarExpr(E->getArg(i))); - } else { - // If this is required to be a constant, constant fold it so that we know - // that the generated intrinsic gets a ConstantInt. - Ops.push_back(llvm::ConstantInt::get( - getLLVMContext(), - *E->getArg(i)->getIntegerConstantExpr(getContext()))); - } + Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E)); } switch (BuiltinID) { @@ -11088,15 +11069,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, continue; } } - if ((ICEArguments & (1 << i)) == 0) { - Ops.push_back(EmitScalarExpr(E->getArg(i))); - } else { - // If this is required to be a constant, constant fold it so that we know - // that the generated intrinsic gets a ConstantInt. - Ops.push_back(llvm::ConstantInt::get( - getLLVMContext(), - *E->getArg(i)->getIntegerConstantExpr(getContext()))); - } + Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E)); } auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap); @@ -13808,16 +13781,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, assert(Error == ASTContext::GE_None && "Should not codegen an error"); for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { - // If this is a normal argument, just emit it as a scalar. - if ((ICEArguments & (1 << i)) == 0) { - Ops.push_back(EmitScalarExpr(E->getArg(i))); - continue; - } - - // If this is required to be a constant, constant fold it so that we know - // that the generated intrinsic gets a ConstantInt. - Ops.push_back(llvm::ConstantInt::get( - getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext()))); + Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E)); } // These exist so that the builtin that takes an immediate can be bounds @@ -17582,6 +17546,23 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope, SSID = getLLVMContext().getOrInsertSyncScopeID(scp); } +llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments, + unsigned Idx, + const CallExpr *E) { + llvm::Value *Arg = nullptr; + if ((ICEArguments & (1 << Idx)) == 0) { + Arg = EmitScalarExpr(E->getArg(Idx)); + } else { + // If this is required to be a constant, constant fold it so that we + // know that the generated intrinsic gets a ConstantInt. + std::optional<llvm::APSInt> Result = + E->getArg(Idx)->getIntegerConstantExpr(getContext()); + assert(Result && "Expected argument to be a constant"); + Arg = llvm::ConstantInt::get(getLLVMContext(), *Result); + } + return Arg; +} + Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; @@ -17639,19 +17620,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); assert(Error == ASTContext::GE_None && "Should not codegen an error"); for (unsigned I = 0; I != E->getNumArgs(); ++I) { - llvm::Value *Arg = nullptr; - // If this is a normal argument, just emit it as a scalar. - if ((ICEArguments & (1 << I)) == 0) { - Arg = EmitScalarExpr(E->getArg(I)); - } else { - // If this is required to be a constant, constant fold it so that we - // know that the generated intrinsic gets a ConstantInt. - std::optional<llvm::APSInt> Result = - E->getArg(I)->getIntegerConstantExpr(getContext()); - assert(Result && "Expected argument to be a constant"); - Arg = llvm::ConstantInt::get(getLLVMContext(), *Result); - } - Args.push_back(Arg); + Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E)); } assert(Args.size() == 5 || Args.size() == 6); if (Args.size() == 5) @@ -20636,17 +20605,7 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, Ops.push_back(AggValue); continue; } - - // If this is a normal argument, just emit it as a scalar. - if ((ICEArguments & (1 << i)) == 0) { - Ops.push_back(EmitScalarExpr(E->getArg(i))); - continue; - } - - // If this is required to be a constant, constant fold it so that we know - // that the generated intrinsic gets a ConstantInt. - Ops.push_back(llvm::ConstantInt::get( - getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext()))); + Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E)); } Intrinsic::ID ID = Intrinsic::not_intrinsic; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index e82115e2d706cf1..5adcdafda3b4a06 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4321,6 +4321,8 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, + const CallExpr *E); llvm::Value *EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits