SimeonEhrig updated this revision to Diff 142921.
SimeonEhrig added a comment.

Thank you everyone for your review comments!

We addressed the inline comments and improved the description of the change set 
for clarity and context.
Tests are updated as well.

This now implements the same fix as previously received in 
https://reviews.llvm.org/D34059 but just for CUDA.


https://reviews.llvm.org/D44435

Files:
  lib/CodeGen/CGCUDANV.cpp
  unittests/CodeGen/IncrementalProcessingTest.cpp

Index: unittests/CodeGen/IncrementalProcessingTest.cpp
===================================================================
--- unittests/CodeGen/IncrementalProcessingTest.cpp
+++ unittests/CodeGen/IncrementalProcessingTest.cpp
@@ -21,9 +21,11 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Target/TargetOptions.h"
 #include "gtest/gtest.h"
 
 #include <memory>
+#include <string>
 
 using namespace llvm;
 using namespace clang;
@@ -171,4 +173,122 @@
 
 }
 
+
+// In CUDA incremental processing, a CUDA ctor or dtor will be generated for
+// every statement if a fatbinary file exists.
+const char CUDATestProgram1[] =
+    "void cudaFunc1(){}\n";
+
+const char CUDATestProgram2[] =
+    "void cudaFunc2(){}\n";
+
+const Function* getCUDActor(llvm::Module& M) {
+  for (const auto& Func: M)
+    if (Func.hasName() && Func.getName().startswith("__cuda_module_ctor_"))
+      return &Func;
+
+  return nullptr;
+}
+
+const Function* getCUDAdtor(llvm::Module& M) {
+  for (const auto& Func: M)
+    if (Func.hasName() && Func.getName().startswith("__cuda_module_dtor_"))
+      return &Func;
+
+  return nullptr;
+}
+
+TEST(IncrementalProcessing, EmitCUDAGlobalInitFunc) {
+    LLVMContext Context;
+    CompilerInstance compiler;
+
+    compiler.createDiagnostics();
+    compiler.getLangOpts().CPlusPlus = 1;
+    compiler.getLangOpts().CPlusPlus11 = 1;
+    compiler.getLangOpts().CUDA = 1;
+
+    compiler.getTargetOpts().Triple = llvm::Triple::normalize(
+        llvm::sys::getProcessTriple());
+    compiler.setTarget(clang::TargetInfo::CreateTargetInfo(
+      compiler.getDiagnostics(),
+      std::make_shared<clang::TargetOptions>(
+        compiler.getTargetOpts())));
+
+    // To enable the generating of cuda host code, it's needs to set up the
+    // auxTriple.
+    llvm::Triple hostTriple(llvm::sys::getProcessTriple());
+    compiler.getFrontendOpts().AuxTriple =
+        hostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda" : "nvptx-nvidia-cuda";
+    auto targetOptions = std::make_shared<clang::TargetOptions>();
+    targetOptions->Triple = compiler.getFrontendOpts().AuxTriple;
+    targetOptions->HostTriple = compiler.getTarget().getTriple().str();
+    compiler.setAuxTarget(clang::TargetInfo::CreateTargetInfo(
+        compiler.getDiagnostics(), targetOptions));
+
+    // A fatbinary file is necessary, that the code generator generates the ctor
+    // and dtor.
+    auto tmpFatbinFileOrError = llvm::sys::fs::TempFile::create("dummy.fatbin");
+    ASSERT_TRUE((bool)tmpFatbinFileOrError);
+    auto tmpFatbinFile = std::move(*tmpFatbinFileOrError);
+    compiler.getCodeGenOpts().CudaGpuBinaryFileName = tmpFatbinFile.TmpName;
+
+    compiler.createFileManager();
+    compiler.createSourceManager(compiler.getFileManager());
+    compiler.createPreprocessor(clang::TU_Prefix);
+    compiler.getPreprocessor().enableIncrementalProcessing();
+
+    compiler.createASTContext();
+
+    CodeGenerator* CG =
+        CreateLLVMCodeGen(
+            compiler.getDiagnostics(),
+            "main-module",
+            compiler.getHeaderSearchOpts(),
+            compiler.getPreprocessorOpts(),
+            compiler.getCodeGenOpts(),
+            Context);
+
+    compiler.setASTConsumer(std::unique_ptr<ASTConsumer>(CG));
+    compiler.createSema(clang::TU_Prefix, nullptr);
+    Sema& S = compiler.getSema();
+
+    std::unique_ptr<Parser> ParseOP(new Parser(S.getPreprocessor(), S,
+                                               /*SkipFunctionBodies*/ false));
+    Parser &P = *ParseOP.get();
+
+    std::array<std::unique_ptr<llvm::Module>, 3> M;
+    M[0] = IncrementalParseAST(compiler, P, *CG, nullptr);
+    ASSERT_TRUE(M[0]);
+
+    M[1] = IncrementalParseAST(compiler, P, *CG, CUDATestProgram1);
+    ASSERT_TRUE(M[1]);
+    ASSERT_TRUE(M[1]->getFunction("_Z9cudaFunc1v"));
+
+    M[2] = IncrementalParseAST(compiler, P, *CG, CUDATestProgram2);
+    ASSERT_TRUE(M[2]);
+    ASSERT_TRUE(M[2]->getFunction("_Z9cudaFunc2v"));
+    // First code should not end up in second module:
+    ASSERT_FALSE(M[2]->getFunction("_Z9cudaFunc1v"));
+
+    // Make sure, that cuda ctor's and dtor's exist:
+    const Function* CUDActor1 = getCUDActor(*M[1]);
+    ASSERT_TRUE(CUDActor1);
+
+    const Function* CUDActor2 = getCUDActor(*M[2]);
+    ASSERT_TRUE(CUDActor2);
+
+    const Function* CUDAdtor1 = getCUDAdtor(*M[1]);
+    ASSERT_TRUE(CUDAdtor1);
+
+    const Function* CUDAdtor2 = getCUDAdtor(*M[2]);
+    ASSERT_TRUE(CUDAdtor2);
+
+    // Compare the names of both ctor's and dtor's to check, that they are
+    // unique.
+    ASSERT_FALSE(CUDActor1->getName() == CUDActor2->getName());
+    ASSERT_FALSE(CUDAdtor1->getName() == CUDAdtor2->getName());
+
+    ASSERT_FALSE((bool)tmpFatbinFile.discard());
+}
+
 } // end anonymous namespace
Index: lib/CodeGen/CGCUDANV.cpp
===================================================================
--- lib/CodeGen/CGCUDANV.cpp
+++ lib/CodeGen/CGCUDANV.cpp
@@ -21,6 +21,7 @@
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/Support/Path.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -244,7 +245,7 @@
 
 /// Creates a global constructor function for the module:
 /// \code
-/// void __cuda_module_ctor(void*) {
+/// void __cuda_module_ctor_<ModuleName>(void*) {
 ///     Handle = __cudaRegisterFatBinary(GpuBinaryBlob);
 ///     __cuda_register_globals(Handle);
 /// }
@@ -277,9 +278,26 @@
     return nullptr;
   }
 
+  // get name from the module to generate unique ctor name for every module
+  const SmallString<128> ModuleName
+      = llvm::sys::path::filename(CGM.getModule().getName());
+  SmallString<128> CtorSuffix("");
+  if (!ModuleName.empty()){
+    CtorSuffix.append("_");
+    CtorSuffix.append(ModuleName);
+  }
+
+  for (size_t i = 0; i < CtorSuffix.size(); ++i) {
+    // Replace everything that's not [a-zA-Z0-9._] with a _. This set happens
+    // to be the set of C preprocessing numbers.
+    if (!isPreprocessingNumberBody(CtorSuffix[i]))
+      CtorSuffix[i] = '_';
+  }
+
   llvm::Function *ModuleCtorFunc = llvm::Function::Create(
       llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
-      llvm::GlobalValue::InternalLinkage, "__cuda_module_ctor", &TheModule);
+      llvm::GlobalValue::InternalLinkage, "__cuda_module_ctor" + CtorSuffix,
+      &TheModule);
   llvm::BasicBlock *CtorEntryBB =
       llvm::BasicBlock::Create(Context, "entry", ModuleCtorFunc);
   CGBuilderTy CtorBuilder(CGM, Context);
@@ -329,7 +347,7 @@
 /// Creates a global destructor function that unregisters the GPU code blob
 /// registered by constructor.
 /// \code
-/// void __cuda_module_dtor(void*) {
+/// void __cuda_module_dtor_<ModuleName>(void*) {
 ///     __cudaUnregisterFatBinary(Handle);
 /// }
 /// \endcode
@@ -343,9 +361,26 @@
       llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
       "__cudaUnregisterFatBinary");
 
+  // get name from the module to generate unique dtor name for every module
+  const SmallString<128> ModuleName
+      = llvm::sys::path::filename(CGM.getModule().getName());
+  SmallString<128> DtorSuffix("");
+  if (!ModuleName.empty()){
+    DtorSuffix.append("_");
+    DtorSuffix.append(ModuleName);
+  }
+
+  for (size_t i = 0; i < DtorSuffix.size(); ++i) {
+    // Replace everything that's not [a-zA-Z0-9._] with a _. This set happens
+    // to be the set of C preprocessing numbers.
+    if (!isPreprocessingNumberBody(DtorSuffix[i]))
+      DtorSuffix[i] = '_';
+  }
+
   llvm::Function *ModuleDtorFunc = llvm::Function::Create(
       llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
-      llvm::GlobalValue::InternalLinkage, "__cuda_module_dtor", &TheModule);
+      llvm::GlobalValue::InternalLinkage, "__cuda_module_dtor" + DtorSuffix,
+      &TheModule);
   llvm::BasicBlock *DtorEntryBB =
       llvm::BasicBlock::Create(Context, "entry", ModuleDtorFunc);
   CGBuilderTy DtorBuilder(CGM, Context);
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to