Hahnfeld created this revision. Hahnfeld added reviewers: tra, jlebar. Herald added a subscriber: cfe-commits.
Revision https://reviews.llvm.org/rC329829 added the architecture to "target-features". This prevents inlining of previously generated bitcode because the feature sets don't match. Thus duplicate the information from "target-cpu" to avoid writing special cases in the analysis. I'm not sure if that will save us in the long term because inlining will break again when we add new features. Additionally, using later CUDA versions might raise the PTX version which is also a feature... Repository: rC Clang https://reviews.llvm.org/D47070 Files: lib/CodeGen/CGCall.cpp test/CodeGenCUDA/Inputs/device-code-2.ll test/CodeGenCUDA/Inputs/device-code.ll test/CodeGenCUDA/link-device-bitcode.cu
Index: test/CodeGenCUDA/link-device-bitcode.cu =================================================================== --- test/CodeGenCUDA/link-device-bitcode.cu +++ test/CodeGenCUDA/link-device-bitcode.cu @@ -56,15 +56,24 @@ // Make sure device_mul_or_add() is present in IR, is internal and // calls __nvvm_reflect(). // CHECK-IR-LABEL: define internal float @_Z17device_mul_or_addff( +// CHECK-IR-SAME: [[MUL_OR_ADD:#[0-9]+]] { // CHECK-IR-NLD-LABEL: define float @_Z17device_mul_or_addff( // CHECK-IR: call i32 @__nvvm_reflect // CHECK-IR: ret float // Make sure we've linked in and internalized only needed functions // from the second bitcode file. // CHECK-IR-2-LABEL: define internal double @__nv_sin +// CHECK-IR-2-SAME: [[IR2ATTR:#[0-9]+]] { // CHECK-IR-2-LABEL: define internal double @__nv_exp +// CHECK-IR-2-SAME: [[IR2ATTR]] { // CHECK-IR-2-NOT: double @__unused +// CHECK-IR: attributes [[MUL_OR_ADD]] = { +// CHECK-IR-SAME: "target-features"="+ptx42,+sm_35" + +// CHECK-IR-2: attributes [[IR2ATTR]] = { +// CHECK-IR-2-SAME: "target-features"="+sm_35" + // Verify that NVVMReflect pass is among the passes run by NVPTX back-end. // CHECK-REFLECT: Replace occurrences of __nvvm_reflect() calls with 0/1 Index: test/CodeGenCUDA/Inputs/device-code.ll =================================================================== --- test/CodeGenCUDA/Inputs/device-code.ll +++ test/CodeGenCUDA/Inputs/device-code.ll @@ -16,7 +16,7 @@ ret void } -define float @_Z17device_mul_or_addff(float %a, float %b) { +define float @_Z17device_mul_or_addff(float %a, float %b) #0 { %reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*)) %cmp = icmp ne i32 %reflect, 0 br i1 %cmp, label %use_mul, label %use_add @@ -36,3 +36,5 @@ ret float %ret } + +attributes #0 = { "target-cpu"="sm_35" "target-features"="+ptx42" } Index: test/CodeGenCUDA/Inputs/device-code-2.ll =================================================================== --- test/CodeGenCUDA/Inputs/device-code-2.ll +++ test/CodeGenCUDA/Inputs/device-code-2.ll @@ -2,15 +2,16 @@ target triple = "nvptx-unknown-cuda" -define double @__nv_sin(double %a) { +define double @__nv_sin(double %a) #0 { ret double 1.0 } -define double @__nv_exp(double %a) { +define double @__nv_exp(double %a) #0 { ret double 3.0 } define double @__unused(double %a) { ret double 2.0 } +attributes #0 = { "target-cpu"="sm_35" } Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -1790,12 +1790,45 @@ } } +static bool hasTargetFeature(llvm::StringRef FeatureList, + llvm::StringRef Feature) { + StringRef Rest = FeatureList; + while (!Rest.empty()) { + auto Split = Rest.split(','); + if (Split.first == Feature) + return true; + Rest = Split.second; + } + + return false; +} + void CodeGenModule::AddDefaultFnAttrs(llvm::Function &F) { llvm::AttrBuilder FuncAttrs; ConstructDefaultFnAttrList(F.getName(), F.hasFnAttribute(llvm::Attribute::OptimizeNone), /* AttrOnCallsite = */ false, FuncAttrs); F.addAttributes(llvm::AttributeList::FunctionIndex, FuncAttrs); + + if (getTriple().isNVPTX()) { + // Revision 329829 added the architecture as a "target-feature". Duplicate + // this information from "target-cpu" to maintain the ability to inline + // functions from bitcode files compiled with older versions of LLVM/Clang. + auto TargetCpu = F.getFnAttribute("target-cpu"); + if (TargetCpu.isStringAttribute()) { + llvm::StringRef CpuAttr = TargetCpu.getValueAsString(); + + auto TargetFeatures = F.getFnAttribute("target-features"); + if (TargetFeatures.isStringAttribute()) { + llvm::StringRef FeatureList = TargetFeatures.getValueAsString(); + if (!hasTargetFeature(FeatureList, CpuAttr.str())) { + F.addFnAttr("target-features", (FeatureList + ",+" + CpuAttr).str()); + } + } else { + F.addFnAttr("target-features", llvm::Twine("+", CpuAttr).str()); + } + } + } } void CodeGenModule::ConstructAttributeList(
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits