[PATCH] D47070: [CUDA] Upgrade linked bitcode to enable inlining

Jonas Hahnfeld via Phabricator via cfe-commits Fri, 18 May 2018 08:01:24 -0700

Hahnfeld created this revision.
Hahnfeld added reviewers: tra, jlebar.
Herald added a subscriber: cfe-commits.


Revision https://reviews.llvm.org/rC329829 added the architecture to 
"target-features". This
prevents inlining of previously generated bitcode because the
feature sets don't match. Thus duplicate the information from
"target-cpu" to avoid writing special cases in the analysis.

I'm not sure if that will save us in the long term because inlining
will break again when we add new features. Additionally, using later
CUDA versions might raise the PTX version which is also a feature...


Repository:
  rC Clang

https://reviews.llvm.org/D47070

Files:
  lib/CodeGen/CGCall.cpp
  test/CodeGenCUDA/Inputs/device-code-2.ll
  test/CodeGenCUDA/Inputs/device-code.ll
  test/CodeGenCUDA/link-device-bitcode.cu

Index: test/CodeGenCUDA/link-device-bitcode.cu
===================================================================
--- test/CodeGenCUDA/link-device-bitcode.cu
+++ test/CodeGenCUDA/link-device-bitcode.cu
@@ -56,15 +56,24 @@
 // Make sure device_mul_or_add() is present in IR, is internal and
 // calls __nvvm_reflect().
 // CHECK-IR-LABEL: define internal float @_Z17device_mul_or_addff(
+// CHECK-IR-SAME: [[MUL_OR_ADD:#[0-9]+]] {
 // CHECK-IR-NLD-LABEL: define float @_Z17device_mul_or_addff(
 // CHECK-IR: call i32 @__nvvm_reflect
 // CHECK-IR: ret float
 
 // Make sure we've linked in and internalized only needed functions
 // from the second bitcode file.
 // CHECK-IR-2-LABEL: define internal double @__nv_sin
+// CHECK-IR-2-SAME: [[IR2ATTR:#[0-9]+]] {
 // CHECK-IR-2-LABEL: define internal double @__nv_exp
+// CHECK-IR-2-SAME: [[IR2ATTR]] {
 // CHECK-IR-2-NOT: double @__unused
 
+// CHECK-IR: attributes [[MUL_OR_ADD]] = {
+// CHECK-IR-SAME: "target-features"="+ptx42,+sm_35"
+
+// CHECK-IR-2: attributes [[IR2ATTR]] = {
+// CHECK-IR-2-SAME: "target-features"="+sm_35"
+
 // Verify that NVVMReflect pass is among the passes run by NVPTX back-end.
 // CHECK-REFLECT: Replace occurrences of __nvvm_reflect() calls with 0/1
Index: test/CodeGenCUDA/Inputs/device-code.ll
===================================================================
--- test/CodeGenCUDA/Inputs/device-code.ll
+++ test/CodeGenCUDA/Inputs/device-code.ll
@@ -16,7 +16,7 @@
        ret void
 }
 
-define float @_Z17device_mul_or_addff(float %a, float %b) {
+define float @_Z17device_mul_or_addff(float %a, float %b) #0 {
   %reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*))
   %cmp = icmp ne i32 %reflect, 0
   br i1 %cmp, label %use_mul, label %use_add
@@ -36,3 +36,5 @@
 
   ret float %ret
 }
+
+attributes #0 = { "target-cpu"="sm_35" "target-features"="+ptx42" }
Index: test/CodeGenCUDA/Inputs/device-code-2.ll
===================================================================
--- test/CodeGenCUDA/Inputs/device-code-2.ll
+++ test/CodeGenCUDA/Inputs/device-code-2.ll
@@ -2,15 +2,16 @@
 
 target triple = "nvptx-unknown-cuda"
 
-define double @__nv_sin(double %a) {
+define double @__nv_sin(double %a) #0 {
        ret double 1.0
 }
 
-define double @__nv_exp(double %a) {
+define double @__nv_exp(double %a) #0 {
        ret double 3.0
 }
 
 define double @__unused(double %a) {
        ret double 2.0
 }
 
+attributes #0 = { "target-cpu"="sm_35" }
Index: lib/CodeGen/CGCall.cpp
===================================================================
--- lib/CodeGen/CGCall.cpp
+++ lib/CodeGen/CGCall.cpp
@@ -1790,12 +1790,45 @@
   }
 }
 
+static bool hasTargetFeature(llvm::StringRef FeatureList,
+                             llvm::StringRef Feature) {
+  StringRef Rest = FeatureList;
+  while (!Rest.empty()) {
+    auto Split = Rest.split(',');
+    if (Split.first == Feature)
+      return true;
+    Rest = Split.second;
+  }
+
+  return false;
+}
+
 void CodeGenModule::AddDefaultFnAttrs(llvm::Function &F) {
   llvm::AttrBuilder FuncAttrs;
   ConstructDefaultFnAttrList(F.getName(),
                              F.hasFnAttribute(llvm::Attribute::OptimizeNone),
                              /* AttrOnCallsite = */ false, FuncAttrs);
   F.addAttributes(llvm::AttributeList::FunctionIndex, FuncAttrs);
+
+  if (getTriple().isNVPTX()) {
+    // Revision 329829 added the architecture as a "target-feature". Duplicate
+    // this information from "target-cpu" to maintain the ability to inline
+    // functions from bitcode files compiled with older versions of LLVM/Clang.
+    auto TargetCpu = F.getFnAttribute("target-cpu");
+    if (TargetCpu.isStringAttribute()) {
+      llvm::StringRef CpuAttr = TargetCpu.getValueAsString();
+
+      auto TargetFeatures = F.getFnAttribute("target-features");
+      if (TargetFeatures.isStringAttribute()) {
+        llvm::StringRef FeatureList = TargetFeatures.getValueAsString();
+        if (!hasTargetFeature(FeatureList, CpuAttr.str())) {
+          F.addFnAttr("target-features", (FeatureList + ",+" + CpuAttr).str());
+        }
+      } else {
+        F.addFnAttr("target-features", llvm::Twine("+", CpuAttr).str());
+      }
+    }
+  }
 }
 
 void CodeGenModule::ConstructAttributeList(

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D47070: [CUDA] Upgrade linked bitcode to enable inlining

Reply via email to