================ @@ -424,6 +424,34 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, CGM.CreateRuntimeFunction(FTy, LaunchKernelName); CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(), LaunchKernelArgs); + + // To prevent CUDA device stub functions from being merged by ICF in MSVC + // environment, create an unique global variable for each kernel and write to + // the variable in the device stub. + if (CGM.getContext().getTargetInfo().getCXXABI().isMicrosoft() && + !CGF.getLangOpts().HIP) { + llvm::Function *KernelFunction = llvm::cast<llvm::Function>(Kernel); + if (KernelFunction->hasComdat()) { + std::string KernelName = KernelFunction->getName().str(); + std::string GlobalVarName = KernelName + ".id"; + + llvm::GlobalVariable *HandleVar = + CGM.getModule().getNamedGlobal(GlobalVarName); + if (!HandleVar) { + HandleVar = new llvm::GlobalVariable( + CGM.getModule(), CGM.Int8Ty, + /*Constant=*/false, KernelFunction->getLinkage(), + llvm::ConstantInt::get(CGM.Int8Ty, 0), GlobalVarName); + HandleVar->setDSOLocal(KernelFunction->isDSOLocal()); + HandleVar->setVisibility(KernelFunction->getVisibility()); + HandleVar->setComdat(CGM.getModule().getOrInsertComdat(GlobalVarName)); + } + + CGF.Builder.CreateAlignedStore(llvm::ConstantInt::get(CGM.Int8Ty, 1), ---------------- rnk wrote:
LLVM knows how to optimize away a single write to an otherwise unused global, so I would mark this store volatile. https://github.com/llvm/llvm-project/pull/90155 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits