https://github.com/pravinjagtap updated https://github.com/llvm/llvm-project/pull/71139
>From c28e9f9fb753e41bc539fa4c45bd7896d7c5d04d Mon Sep 17 00:00:00 2001 From: Pravin Jagtap <pravin.jag...@amd.com> Date: Fri, 3 Nov 2023 00:04:14 -0400 Subject: [PATCH 1/2] [AMDGPU] const-fold imm operands of amdgcn_update_dpp intrinsic --- clang/lib/CodeGen/CGBuiltin.cpp | 16 +++++++- clang/test/CodeGenHIP/dpp-const-fold.hip | 48 ++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGenHIP/dpp-const-fold.hip diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e047d31c012116f..a4049cbc79d303d 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17632,8 +17632,20 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_mov_dpp: case AMDGPU::BI__builtin_amdgcn_update_dpp: { llvm::SmallVector<llvm::Value *, 6> Args; - for (unsigned I = 0; I != E->getNumArgs(); ++I) - Args.push_back(EmitScalarExpr(E->getArg(I))); + for (unsigned I = 0; I != E->getNumArgs(); ++I) { + llvm::Value *Arg = EmitScalarExpr(E->getArg(I)); + // Except first two input operands, all other are imm operands for dpp + // intrinsic. + if (llvm::is_contained(std::initializer_list<unsigned>{2, 3, 4, 5}, I)) { + // If this is required to be a constant, constant fold it so that we + // know that the generated intrinsic gets a ConstantInt. + std::optional<llvm::APSInt> Result = + E->getArg(I)->getIntegerConstantExpr(getContext()); + assert(Result && "Expected argument to be a constant"); + Arg = llvm::ConstantInt::get(getLLVMContext(), *Result); + } + Args.push_back(Arg); + } assert(Args.size() == 5 || Args.size() == 6); if (Args.size() == 5) Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType())); diff --git a/clang/test/CodeGenHIP/dpp-const-fold.hip b/clang/test/CodeGenHIP/dpp-const-fold.hip new file mode 100644 index 000000000000000..1d1d135fb06239a --- /dev/null +++ b/clang/test/CodeGenHIP/dpp-const-fold.hip @@ -0,0 +1,48 @@ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang --offload-arch=gfx906 -S -o - -emit-llvm --cuda-device-only \ +// RUN: %s | FileCheck %s + +constexpr static int OpCtrl() +{ + return 15 + 1; +} + +constexpr static int RowMask() +{ + return 3 + 1; +} + +constexpr static int BankMask() +{ + return 2 + 1; +} + +constexpr static bool BountCtrl() +{ + return true & false; +} + +// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 16, i32 0, i32 0, i1 false) +__attribute__((global)) void test_update_dpp_const_fold_imm_operand_2(int* out, int a, int b) +{ + *out = __builtin_amdgcn_update_dpp(a, b, OpCtrl(), 0, 0, false); +} + +// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 4, i32 0, i1 false) +__attribute__((global)) void test_update_dpp_const_fold_imm_operand_3(int* out, int a, int b) +{ + *out = __builtin_amdgcn_update_dpp(a, b, 0, RowMask(), 0, false); +} + +// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 3, i1 false) +__attribute__((global)) void test_update_dpp_const_fold_imm_operand_4(int* out, int a, int b) +{ + *out = __builtin_amdgcn_update_dpp(a, b, 0, 0, BankMask(), false); +} + +// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 0, i1 false) +__attribute__((global)) void test_update_dpp_const_fold_imm_operand_5(int* out, int a, int b) +{ + *out = __builtin_amdgcn_update_dpp(a, b, 0, 0, 0, BountCtrl()); +} >From 3f9364ccf22dc7484f8473eb084998f753edbdf8 Mon Sep 17 00:00:00 2001 From: Pravin Jagtap <pravin.jag...@amd.com> Date: Fri, 3 Nov 2023 04:21:39 -0400 Subject: [PATCH 2/2] removed the hardcoded look up for imm arguments --- clang/lib/CodeGen/CGBuiltin.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a4049cbc79d303d..30c9451a9a7953b 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17632,11 +17632,18 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_mov_dpp: case AMDGPU::BI__builtin_amdgcn_update_dpp: { llvm::SmallVector<llvm::Value *, 6> Args; + // Find out if any arguments are required to be integer constant + // expressions. + unsigned ICEArguments = 0; + ASTContext::GetBuiltinTypeError Error; + getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); + assert(Error == ASTContext::GE_None && "Should not codegen an error"); for (unsigned I = 0; I != E->getNumArgs(); ++I) { - llvm::Value *Arg = EmitScalarExpr(E->getArg(I)); - // Except first two input operands, all other are imm operands for dpp - // intrinsic. - if (llvm::is_contained(std::initializer_list<unsigned>{2, 3, 4, 5}, I)) { + llvm::Value* Arg = nullptr; + // If this is a normal argument, just emit it as a scalar. + if ((ICEArguments & (1 << I)) == 0) { + Arg = EmitScalarExpr(E->getArg(I)); + } else { // If this is required to be a constant, constant fold it so that we // know that the generated intrinsic gets a ConstantInt. std::optional<llvm::APSInt> Result = _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits