This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Pierre-vh marked an inline comment as done.
Closed by commit rG23431b524603: [clang][CodeGen] Fix GPU-specific attributes 
being dropped by bitcode linking (authored by Pierre-vh).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152251/new/

https://reviews.llvm.org/D152251

Files:
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/CodeGen/CodeGenModule.h
  clang/test/CodeGenCUDA/Inputs/ocml-sample-target-attrs.cl
  clang/test/CodeGenCUDA/link-builtin-bitcode-denormal-fp-mode.cu
  clang/test/CodeGenCUDA/link-builtin-bitcode-gpu-attrs-preserved.cu

Index: clang/test/CodeGenCUDA/link-builtin-bitcode-gpu-attrs-preserved.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/link-builtin-bitcode-gpu-attrs-preserved.cu
@@ -0,0 +1,48 @@
+// Verify the behavior of the +gfxN-insts in the way that
+// rocm-device-libs should be built with. e.g. If the device libraries has a function
+// with "+gfx11-insts", that attribute should still be present after linking and not
+// overwritten with the current target's settings.
+
+// This is important because at this time, many device-libs functions that are only
+// available on some GPUs put an attribute such as "+gfx11-insts" so that
+// AMDGPURemoveIncompatibleFunctions can detect & remove them if needed.
+
+// Build the fake device library in the way rocm-device-libs should be built.
+//
+// RUN: %clang_cc1 -x cl -triple amdgcn-amd-amdhsa\
+// RUN:   -mcode-object-version=none -emit-llvm-bc \
+// RUN:   %S/Inputs/ocml-sample-target-attrs.cl -o %t.bc
+
+// Check the default behavior
+// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 -fcuda-is-device \
+// RUN:   -mlink-builtin-bitcode %t.bc \
+// RUN:   -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,INTERNALIZE
+
+// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx1101 -fcuda-is-device \
+// RUN:   -mlink-builtin-bitcode %t.bc -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,INTERNALIZE
+
+// Check the case where no internalization is performed
+// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \
+// RUN:   -fcuda-is-device -mlink-bitcode-file %t.bc -emit-llvm %s -o -  | FileCheck %s --check-prefixes=CHECK,NOINTERNALIZE
+
+// Check the case where no internalization is performed
+// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx1101 \
+// RUN:   -fcuda-is-device -mlink-bitcode-file %t.bc -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,NOINTERNALIZE
+
+
+// CHECK: define {{.*}} i64 @do_intrin_stuff() #[[ATTR:[0-9]+]]
+// INTERNALIZE: attributes #[[ATTR]] = {{.*}} "target-cpu"="gfx{{.*}}" "target-features"="+gfx11-insts"
+// NOINTERNALIZE: attributes #[[ATTR]] = {{.*}} "target-features"="+gfx11-insts"
+
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+
+typedef unsigned long ulong;
+
+extern "C" {
+__device__ ulong do_intrin_stuff(void);
+
+__global__ void kernel_f16(ulong* out) {
+    *out = do_intrin_stuff();
+  }
+}
Index: clang/test/CodeGenCUDA/link-builtin-bitcode-denormal-fp-mode.cu
===================================================================
--- clang/test/CodeGenCUDA/link-builtin-bitcode-denormal-fp-mode.cu
+++ clang/test/CodeGenCUDA/link-builtin-bitcode-denormal-fp-mode.cu
@@ -132,24 +132,32 @@
 
 // Default mode relies on the implicit check-not for the denormal-fp-math.
 
-// PSZ: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" {{.*}} "target-cpu"="gfx803" {{.*}} }
-// PSZ: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" {{.*}} "target-cpu"="gfx803" {{.*}} }
-// PSZ: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" {{.*}} "target-cpu"="gfx803" {{.*}} }
+// PSZ: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign"
+// PSZ-SAME: "target-cpu"="gfx803"
+// PSZ: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
+// PSZ-SAME: "target-cpu"="gfx803"
+// PSZ: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
+// PSZ-SAME: "target-cpu"="gfx803"
 
 // FIXME: Should check-not "denormal-fp-math" within the line
-// IEEEF64-PSZF32: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" {{.*}} "target-cpu"="gfx803" {{.*}} }
-// IEEEF64-PSZF32: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" {{.*}} "target-cpu"="gfx803" {{.*}} }
-// IEEEF64-PSZF32: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" {{.*}} "target-cpu"="gfx803" {{.*}} }
+// IEEEF64-PSZF32: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
+// IEEEF64-PSZF32-SAME: "target-cpu"="gfx803"
+// IEEEF64-PSZF32: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
+// IEEEF64-PSZF32-SAME: "target-cpu"="gfx803"
+// IEEEF64-PSZF32: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
+// IEEEF64-PSZF32-SAME: "target-cpu"="gfx803"
 
 // IEEEF32-PSZF64-DYNF32: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" {{.*}} "target-cpu"="gfx803" {{.*}}  }
 // implicit check-not
 // implicit check-not
 
 
-// IEEEF32-PSZF64-DYNFULL: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" {{.*}} "target-cpu"="gfx803" {{.*}} }
-// IEEEF32-PSZF64-DYNFULL: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" {{.*}} "target-cpu"="gfx803" {{.*}}  }
-// IEEEF32-PSZF64-DYNFULL: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee"  {{.*}} "target-cpu"="gfx803" {{.*}} }
-
+// IEEEF32-PSZF64-DYNFULL: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee"
+// IEEEF32-PSZF64-DYNFULL-SAME: "target-cpu"="gfx803"
+// IEEEF32-PSZF64-DYNFULL: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee"
+// IEEEF32-PSZF64-DYNFULL-SAME: "target-cpu"="gfx803"
+// IEEEF32-PSZF64-DYNFULL: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee"
+// IEEEF32-PSZF64-DYNFULL-SAME: "target-cpu"="gfx803"
 
 // -mlink-bitcode-file doesn't internalize or propagate attributes.
 // NOINTERNALIZE-IEEEF32-PSZF64-DYNFULL: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" {{.*}} "target-cpu"="gfx803" {{.*}} }
Index: clang/test/CodeGenCUDA/Inputs/ocml-sample-target-attrs.cl
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/Inputs/ocml-sample-target-attrs.cl
@@ -0,0 +1,7 @@
+typedef unsigned long ulong;
+
+__attribute__((target("gfx11-insts")))
+ulong do_intrin_stuff(void)
+{
+  return __builtin_amdgcn_s_sendmsg_rtnl(0x0);
+}
Index: clang/lib/CodeGen/CodeGenModule.h
===================================================================
--- clang/lib/CodeGen/CodeGenModule.h
+++ clang/lib/CodeGen/CodeGenModule.h
@@ -1583,7 +1583,8 @@
                         ForDefinition_t IsForDefinition = NotForDefinition);
 
   bool GetCPUAndFeaturesAttributes(GlobalDecl GD,
-                                   llvm::AttrBuilder &AttrBuilder);
+                                   llvm::AttrBuilder &AttrBuilder,
+                                   bool SetTargetFeatures = true);
   void setNonAliasAttributes(GlobalDecl GD, llvm::GlobalObject *GO);
 
   /// Set function attributes for a function declaration.
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -2226,7 +2226,8 @@
 }
 
 bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
-                                                llvm::AttrBuilder &Attrs) {
+                                                llvm::AttrBuilder &Attrs,
+                                                bool SetTargetFeatures) {
   // Add target-cpu and target-features attributes to functions. If
   // we have a decl for the function and it has a target attribute then
   // parse that and add it to the feature set.
@@ -2286,7 +2287,7 @@
     Attrs.addAttribute("tune-cpu", TuneCPU);
     AddedAttr = true;
   }
-  if (!Features.empty()) {
+  if (!Features.empty() && SetTargetFeatures) {
     llvm::sort(Features);
     Attrs.addAttribute("target-features", llvm::join(Features, ","));
     AddedAttr = true;
Index: clang/lib/CodeGen/CGCall.cpp
===================================================================
--- clang/lib/CodeGen/CGCall.cpp
+++ clang/lib/CodeGen/CGCall.cpp
@@ -2025,7 +2025,8 @@
   llvm::AttrBuilder FuncAttrs(F.getContext());
   getTrivialDefaultFunctionAttributes(F.getName(), F.hasOptNone(),
                                       /*AttrOnCallSite=*/false, FuncAttrs);
-  GetCPUAndFeaturesAttributes(GlobalDecl(), FuncAttrs);
+  GetCPUAndFeaturesAttributes(GlobalDecl(), FuncAttrs,
+                              /*AddTargetFeatures=*/false);
 
   if (!WillInternalize && F.isInterposable()) {
     // Do not promote "dynamic" denormal-fp-math to this translation unit's
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to