[clang] [AMDGPU] const-fold imm operands of amdgcn_update_dpp intrinsic (PR #71139)

Pravin Jagtap via cfe-commits Wed, 08 Nov 2023 00:55:55 -0800

https://github.com/pravinjagtap updated 
https://github.com/llvm/llvm-project/pull/71139


>From c28e9f9fb753e41bc539fa4c45bd7896d7c5d04d Mon Sep 17 00:00:00 2001
From: Pravin Jagtap <pravin.jag...@amd.com>
Date: Fri, 3 Nov 2023 00:04:14 -0400
Subject: [PATCH 1/3] [AMDGPU] const-fold imm operands of amdgcn_update_dpp
 intrinsic

---
 clang/lib/CodeGen/CGBuiltin.cpp          | 16 +++++++-
 clang/test/CodeGenHIP/dpp-const-fold.hip | 48 ++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/CodeGenHIP/dpp-const-fold.hip

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e047d31c012116f..a4049cbc79d303d 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17632,8 +17632,20 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   case AMDGPU::BI__builtin_amdgcn_mov_dpp:
   case AMDGPU::BI__builtin_amdgcn_update_dpp: {
     llvm::SmallVector<llvm::Value *, 6> Args;
-    for (unsigned I = 0; I != E->getNumArgs(); ++I)
-      Args.push_back(EmitScalarExpr(E->getArg(I)));
+    for (unsigned I = 0; I != E->getNumArgs(); ++I) {
+      llvm::Value *Arg = EmitScalarExpr(E->getArg(I));
+      // Except first two input operands, all other are imm operands for dpp
+      // intrinsic.
+      if (llvm::is_contained(std::initializer_list<unsigned>{2, 3, 4, 5}, I)) {
+        // If this is required to be a constant, constant fold it so that we
+        // know that the generated intrinsic gets a ConstantInt.
+        std::optional<llvm::APSInt> Result =
+            E->getArg(I)->getIntegerConstantExpr(getContext());
+        assert(Result && "Expected argument to be a constant");
+        Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
+      }
+      Args.push_back(Arg);
+    }
     assert(Args.size() == 5 || Args.size() == 6);
     if (Args.size() == 5)
       Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType()));
diff --git a/clang/test/CodeGenHIP/dpp-const-fold.hip 
b/clang/test/CodeGenHIP/dpp-const-fold.hip
new file mode 100644
index 000000000000000..1d1d135fb06239a
--- /dev/null
+++ b/clang/test/CodeGenHIP/dpp-const-fold.hip
@@ -0,0 +1,48 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang --offload-arch=gfx906 -S -o - -emit-llvm --cuda-device-only \
+// RUN:   %s | FileCheck %s
+
+constexpr static int OpCtrl()
+{
+    return 15 + 1;
+}
+
+constexpr static int RowMask()
+{
+    return 3 + 1;
+}
+
+constexpr static int BankMask()
+{
+    return 2 + 1;
+}
+
+constexpr static bool BountCtrl()
+{
+    return true & false;
+}
+
+// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 16, i32 0, 
i32 0, i1 false)
+__attribute__((global)) void test_update_dpp_const_fold_imm_operand_2(int* 
out, int a, int b)
+{
+  *out = __builtin_amdgcn_update_dpp(a, b, OpCtrl(), 0, 0, false);
+}
+
+// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 4, 
i32 0, i1 false)
+__attribute__((global)) void test_update_dpp_const_fold_imm_operand_3(int* 
out, int a, int b)
+{
+  *out = __builtin_amdgcn_update_dpp(a, b, 0, RowMask(), 0, false);
+}
+
+// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, 
i32 3, i1 false)
+__attribute__((global)) void test_update_dpp_const_fold_imm_operand_4(int* 
out, int a, int b)
+{
+  *out = __builtin_amdgcn_update_dpp(a, b, 0, 0, BankMask(), false);
+}
+
+// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, 
i32 0, i1 false)
+__attribute__((global)) void test_update_dpp_const_fold_imm_operand_5(int* 
out, int a, int b)
+{
+  *out = __builtin_amdgcn_update_dpp(a, b, 0, 0, 0, BountCtrl());
+}

>From 5c88b0b48c449f7beff887036a3f41b8b50ec19a Mon Sep 17 00:00:00 2001
From: Pravin Jagtap <pravin.jag...@amd.com>
Date: Fri, 3 Nov 2023 04:21:39 -0400
Subject: [PATCH 2/3] removed the hardcoded look up for imm arguments

---
 clang/lib/CodeGen/CGBuiltin.cpp | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a4049cbc79d303d..b7d2fc738d067c2 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17632,11 +17632,18 @@ Value 
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
   case AMDGPU::BI__builtin_amdgcn_mov_dpp:
   case AMDGPU::BI__builtin_amdgcn_update_dpp: {
     llvm::SmallVector<llvm::Value *, 6> Args;
+    // Find out if any arguments are required to be integer constant
+    // expressions.
+    unsigned ICEArguments = 0;
+    ASTContext::GetBuiltinTypeError Error;
+    getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
+    assert(Error == ASTContext::GE_None && "Should not codegen an error");
     for (unsigned I = 0; I != E->getNumArgs(); ++I) {
-      llvm::Value *Arg = EmitScalarExpr(E->getArg(I));
-      // Except first two input operands, all other are imm operands for dpp
-      // intrinsic.
-      if (llvm::is_contained(std::initializer_list<unsigned>{2, 3, 4, 5}, I)) {
+      llvm::Value *Arg = nullptr;
+      // If this is a normal argument, just emit it as a scalar.
+      if ((ICEArguments & (1 << I)) == 0) {
+        Arg = EmitScalarExpr(E->getArg(I));
+      } else {
         // If this is required to be a constant, constant fold it so that we
         // know that the generated intrinsic gets a ConstantInt.
         std::optional<llvm::APSInt> Result =

>From 3464dd5c4dfbcc5d6c57ea29160e770a6915c7e2 Mon Sep 17 00:00:00 2001
From: Pravin Jagtap <pravin.jag...@amd.com>
Date: Wed, 8 Nov 2023 03:52:30 -0500
Subject: [PATCH 3/3] Refactored the code to avoid duplication using newly
 Added EmitScalarOrConstFoldImmArg API.

---
 clang/lib/CodeGen/CGBuiltin.cpp     | 87 ++++++++---------------------
 clang/lib/CodeGen/CodeGenFunction.h |  2 +
 2 files changed, 25 insertions(+), 64 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b7d2fc738d067c2..349c8d3eb0745fd 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5708,18 +5708,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
     llvm::FunctionType *FTy = F->getFunctionType();
 
     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
-      Value *ArgValue;
-      // If this is a normal argument, just emit it as a scalar.
-      if ((ICEArguments & (1 << i)) == 0) {
-        ArgValue = EmitScalarExpr(E->getArg(i));
-      } else {
-        // If this is required to be a constant, constant fold it so that we
-        // know that the generated intrinsic gets a ConstantInt.
-        ArgValue = llvm::ConstantInt::get(
-            getLLVMContext(),
-            *E->getArg(i)->getIntegerConstantExpr(getContext()));
-      }
-
+      Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
       // If the intrinsic arg type is different from the builtin arg type
       // we need to do a bit cast.
       llvm::Type *PTy = FTy->getParamType(i);
@@ -8599,15 +8588,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned 
BuiltinID,
       }
     }
 
-    if ((ICEArguments & (1 << i)) == 0) {
-      Ops.push_back(EmitScalarExpr(E->getArg(i)));
-    } else {
-      // If this is required to be a constant, constant fold it so that we know
-      // that the generated intrinsic gets a ConstantInt.
-      Ops.push_back(llvm::ConstantInt::get(
-          getLLVMContext(),
-          *E->getArg(i)->getIntegerConstantExpr(getContext())));
-    }
+    Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
   }
 
   switch (BuiltinID) {
@@ -11088,15 +11069,7 @@ Value 
*CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
         continue;
       }
     }
-    if ((ICEArguments & (1 << i)) == 0) {
-      Ops.push_back(EmitScalarExpr(E->getArg(i)));
-    } else {
-      // If this is required to be a constant, constant fold it so that we know
-      // that the generated intrinsic gets a ConstantInt.
-      Ops.push_back(llvm::ConstantInt::get(
-          getLLVMContext(),
-          *E->getArg(i)->getIntegerConstantExpr(getContext())));
-    }
+    Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
   }
 
   auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
@@ -13808,16 +13781,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned 
BuiltinID,
   assert(Error == ASTContext::GE_None && "Should not codegen an error");
 
   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
-    // If this is a normal argument, just emit it as a scalar.
-    if ((ICEArguments & (1 << i)) == 0) {
-      Ops.push_back(EmitScalarExpr(E->getArg(i)));
-      continue;
-    }
-
-    // If this is required to be a constant, constant fold it so that we know
-    // that the generated intrinsic gets a ConstantInt.
-    Ops.push_back(llvm::ConstantInt::get(
-        getLLVMContext(), 
*E->getArg(i)->getIntegerConstantExpr(getContext())));
+    Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
   }
 
   // These exist so that the builtin that takes an immediate can be bounds
@@ -17582,6 +17546,23 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value 
*Order, Value *Scope,
   SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
 }
 
+llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned 
ICEArguments,
+                                                          unsigned Idx,
+                                                          const CallExpr *E) {
+  llvm::Value *Arg = nullptr;
+  if ((ICEArguments & (1 << Idx)) == 0) {
+    Arg = EmitScalarExpr(E->getArg(Idx));
+  } else {
+    // If this is required to be a constant, constant fold it so that we
+    // know that the generated intrinsic gets a ConstantInt.
+    std::optional<llvm::APSInt> Result =
+        E->getArg(Idx)->getIntegerConstantExpr(getContext());
+    assert(Result && "Expected argument to be a constant");
+    Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
+  }
+  return Arg;
+}
+
 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
                                               const CallExpr *E) {
   llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
@@ -17639,19 +17620,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
     assert(Error == ASTContext::GE_None && "Should not codegen an error");
     for (unsigned I = 0; I != E->getNumArgs(); ++I) {
-      llvm::Value *Arg = nullptr;
-      // If this is a normal argument, just emit it as a scalar.
-      if ((ICEArguments & (1 << I)) == 0) {
-        Arg = EmitScalarExpr(E->getArg(I));
-      } else {
-        // If this is required to be a constant, constant fold it so that we
-        // know that the generated intrinsic gets a ConstantInt.
-        std::optional<llvm::APSInt> Result =
-            E->getArg(I)->getIntegerConstantExpr(getContext());
-        assert(Result && "Expected argument to be a constant");
-        Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
-      }
-      Args.push_back(Arg);
+      Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E));
     }
     assert(Args.size() == 5 || Args.size() == 6);
     if (Args.size() == 5)
@@ -20636,17 +20605,7 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned 
BuiltinID,
       Ops.push_back(AggValue);
       continue;
     }
-
-    // If this is a normal argument, just emit it as a scalar.
-    if ((ICEArguments & (1 << i)) == 0) {
-      Ops.push_back(EmitScalarExpr(E->getArg(i)));
-      continue;
-    }
-
-    // If this is required to be a constant, constant fold it so that we know
-    // that the generated intrinsic gets a ConstantInt.
-    Ops.push_back(llvm::ConstantInt::get(
-        getLLVMContext(), 
*E->getArg(i)->getIntegerConstantExpr(getContext())));
+    Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
   }
 
   Intrinsic::ID ID = Intrinsic::not_intrinsic;
diff --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index e82115e2d706cf1..5adcdafda3b4a06 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4321,6 +4321,8 @@ class CodeGenFunction : public CodeGenTypeCache {
   llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+  llvm::Value *EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx,
+                                           const CallExpr *E);
   llvm::Value *EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [AMDGPU] const-fold imm operands of amdgcn_update_dpp intrinsic (PR #71139)

Reply via email to