================
@@ -763,6 +765,36 @@ void CodeGenModule::handleAMDGPUWavesPerEUAttr(
     assert(Max == 0 && "Max must be zero");
 }
 
+// If the module references both __ocml_sin and __ocml_cos for a given type,
+// inject a declaration + @llvm.compiler.used entry for the corresponding
+// __ocml_sincos so the demand-linker pulls it in from the device library.
+// The @llvm.compiler.used entry prevents early GlobalDCE from removing sincos
+// before the AMDGPUSimplifyLibCallsPass can use it.  A late cleanup pass
+// (AMDGPUUnusedLibFuncCleanupPass, registered at OptimizerLastEP) removes
+// unused sincos after optimization.
+void AMDGPUTargetCodeGenInfo::emitTargetGlobals(
+    CodeGen::CodeGenModule &CGM) const {
+  llvm::Module &M = CGM.getModule();
+  llvm::SmallVector<llvm::GlobalValue *, 2> ToAdd;
+
+  for (bool IsF32 : {true, false}) {
+    auto *Sin = M.getFunction(IsF32 ? "__ocml_sin_f32" : "__ocml_sin_f64");
+    auto *Cos = M.getFunction(IsF32 ? "__ocml_cos_f32" : "__ocml_cos_f64");
+    const char *Name = IsF32 ? "__ocml_sincos_f32" : "__ocml_sincos_f64";
+    if (!Sin || !Cos || M.getFunction(Name))
+      continue;
+    llvm::Type *FPTy = IsF32 ? llvm::Type::getFloatTy(M.getContext())
+                              : llvm::Type::getDoubleTy(M.getContext());
+    llvm::Type *PtrTy = llvm::PointerType::get(M.getContext(), 5);
----------------
jmmartinez wrote:

```suggestion
    llvm::Type *PtrTy = llvm::PointerType::get(M.getContext(), 
AMDGPUAS::PRIVATE_ADDRESS);
```

https://github.com/llvm/llvm-project/pull/181774
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to