================
@@ -763,6 +765,36 @@ void CodeGenModule::handleAMDGPUWavesPerEUAttr(
assert(Max == 0 && "Max must be zero");
}
+// If the module references both __ocml_sin and __ocml_cos for a given type,
+// inject a declaration + @llvm.compiler.used entry for the corresponding
+// __ocml_sincos so the demand-linker pulls it in from the device library.
+// The @llvm.compiler.used entry prevents early GlobalDCE from removing sincos
+// before the AMDGPUSimplifyLibCallsPass can use it. A late cleanup pass
+// (AMDGPUUnusedLibFuncCleanupPass, registered at OptimizerLastEP) removes
+// unused sincos after optimization.
+void AMDGPUTargetCodeGenInfo::emitTargetGlobals(
+ CodeGen::CodeGenModule &CGM) const {
+ llvm::Module &M = CGM.getModule();
+ llvm::SmallVector<llvm::GlobalValue *, 2> ToAdd;
+
+ for (bool IsF32 : {true, false}) {
+ auto *Sin = M.getFunction(IsF32 ? "__ocml_sin_f32" : "__ocml_sin_f64");
+ auto *Cos = M.getFunction(IsF32 ? "__ocml_cos_f32" : "__ocml_cos_f64");
+ const char *Name = IsF32 ? "__ocml_sincos_f32" : "__ocml_sincos_f64";
+ if (!Sin || !Cos || M.getFunction(Name))
+ continue;
+ llvm::Type *FPTy = IsF32 ? llvm::Type::getFloatTy(M.getContext())
+ : llvm::Type::getDoubleTy(M.getContext());
+ llvm::Type *PtrTy = llvm::PointerType::get(M.getContext(), 5);
+ ToAdd.push_back(llvm::Function::Create(
+ llvm::FunctionType::get(FPTy, {FPTy, PtrTy}, false),
+ llvm::GlobalValue::ExternalLinkage, Name, &M));
+ }
+
+ if (!ToAdd.empty())
+ llvm::appendToCompilerUsed(M, ToAdd);
----------------
jmmartinez wrote:
Would it make sense to move this from Clang's CodeGen into a pass in the
optimization pipeline?
Today, SPIRV's backend doesn't preserve `llvm.compiler.used`; so these
references would get lost when translating back to LLVM-IR and feed to the
AMDGPU backend.
https://github.com/llvm/llvm-project/pull/181774
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits