argentite created this revision.
Herald added subscribers: mattd, carlosgalvezp, yaxunl.
Herald added a project: All.
argentite updated this revision to Diff 507049.
argentite added a comment.
argentite updated this revision to Diff 510808.
argentite edited the summary of this revision.
argentite added reviewers: v.g.vassilev, sgraenitz, lhames.
argentite published this revision for review.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.
Use full name of CUDA library
argentite added a comment.
Clear LinkModules on every interpreter iteration
CUDA support can be enabled in clang-repl with --cuda flag.
Device code linking is not yet supported. inline must be used with all
__device__ functions.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D146389
Files:
clang/include/clang/Interpreter/Interpreter.h
clang/lib/CodeGen/CodeGenAction.cpp
clang/lib/CodeGen/CodeGenModule.cpp
clang/lib/Interpreter/CMakeLists.txt
clang/lib/Interpreter/IncrementalParser.cpp
clang/lib/Interpreter/IncrementalParser.h
clang/lib/Interpreter/Interpreter.cpp
clang/lib/Interpreter/Offload.cpp
clang/lib/Interpreter/Offload.h
clang/tools/clang-repl/ClangRepl.cpp
Index: clang/tools/clang-repl/ClangRepl.cpp
===================================================================
--- clang/tools/clang-repl/ClangRepl.cpp
+++ clang/tools/clang-repl/ClangRepl.cpp
@@ -23,6 +23,9 @@
#include "llvm/Support/TargetSelect.h" // llvm::Initialize*
#include <optional>
+static llvm::cl::opt<bool> CudaEnabled("cuda", llvm::cl::Hidden);
+static llvm::cl::opt<std::string> OffloadArch("offload-arch", llvm::cl::Hidden);
+
static llvm::cl::list<std::string>
ClangArgs("Xcc",
llvm::cl::desc("Argument to pass to the CompilerInvocation"),
@@ -90,9 +93,29 @@
return 0;
}
+ std::unique_ptr<clang::CompilerInstance> DeviceCI;
+ if (CudaEnabled) {
+ // initialize NVPTX backend
+ LLVMInitializeNVPTXTargetInfo();
+ LLVMInitializeNVPTXTarget();
+ LLVMInitializeNVPTXTargetMC();
+ LLVMInitializeNVPTXAsmPrinter();
+
+ auto DeviceArgv = ClangArgv;
+
+ DeviceCI = ExitOnErr(
+ clang::IncrementalCudaCompilerBuilder::createDevice(DeviceArgv));
+ }
+
// FIXME: Investigate if we could use runToolOnCodeWithArgs from tooling. It
// can replace the boilerplate code for creation of the compiler instance.
- auto CI = ExitOnErr(clang::IncrementalCompilerBuilder::create(ClangArgv));
+ std::unique_ptr<clang::CompilerInstance> CI;
+ if (CudaEnabled) {
+ CI = ExitOnErr(clang::IncrementalCudaCompilerBuilder::createHost(
+ ClangArgv, "/tmp/clang-repl.fatbin"));
+ } else {
+ CI = ExitOnErr(clang::IncrementalCompilerBuilder::createCpp(ClangArgv));
+ }
// Set an error handler, so that any LLVM backend diagnostics go through our
// error handler.
@@ -102,7 +125,19 @@
// Load any requested plugins.
CI->LoadRequestedPlugins();
- auto Interp = ExitOnErr(clang::Interpreter::create(std::move(CI)));
+ std::unique_ptr<clang::Interpreter> Interp;
+ if (CudaEnabled) {
+ if (OffloadArch.empty()) {
+ OffloadArch = "sm_35";
+ }
+ Interp = ExitOnErr(clang::Interpreter::createWithCUDA(
+ std::move(CI), std::move(DeviceCI), OffloadArch,
+ "/tmp/clang-repl.fatbin"));
+
+ ExitOnErr(Interp->LoadDynamicLibrary("libcudart.so"));
+ } else
+ Interp = ExitOnErr(clang::Interpreter::create(std::move(CI)));
+
for (const std::string &input : OptInputs) {
if (auto Err = Interp->ParseAndExecute(input))
llvm::logAllUnhandledErrors(std::move(Err), llvm::errs(), "error: ");
Index: clang/lib/Interpreter/Offload.h
===================================================================
--- /dev/null
+++ clang/lib/Interpreter/Offload.h
@@ -0,0 +1,47 @@
+//===--------------- Offload.h - CUDA Offloading ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements classes required for offloading to CUDA devices.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_INTERPRETER_OFFLOAD_H
+#define LLVM_CLANG_LIB_INTERPRETER_OFFLOAD_H
+
+#include "IncrementalParser.h"
+
+namespace clang {
+
+class DeviceCodeInlinerAction;
+
+class IncrementalCUDADeviceParser : public IncrementalParser {
+public:
+ IncrementalCUDADeviceParser(std::unique_ptr<CompilerInstance> Instance,
+ llvm::LLVMContext &LLVMCtx, llvm::StringRef Arch,
+ llvm::StringRef FatbinFile, llvm::Error &Err);
+
+ llvm::Expected<PartialTranslationUnit &>
+ Parse(llvm::StringRef Input) override;
+
+ // Generate PTX for the last PTU
+ llvm::Expected<llvm::StringRef> GeneratePTX();
+
+ // Write last PTX to the fatbinary file
+ llvm::Error WriteFatbinary() const;
+
+ ~IncrementalCUDADeviceParser();
+
+protected:
+ int SMVersion;
+ std::string FatbinFilePath;
+ llvm::SmallString<1024> PTXCode;
+};
+
+} // namespace clang
+
+#endif // LLVM_CLANG_LIB_INTERPRETER_OFFLOAD_H
Index: clang/lib/Interpreter/Offload.cpp
===================================================================
--- /dev/null
+++ clang/lib/Interpreter/Offload.cpp
@@ -0,0 +1,166 @@
+//===-------------- Offload.cpp - CUDA Offloading ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements offloading to CUDA devices.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Offload.h"
+
+#include "clang/Frontend/CompilerInstance.h"
+
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace clang {
+
+IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
+ std::unique_ptr<CompilerInstance> Instance, llvm::LLVMContext &LLVMCtx,
+ llvm::StringRef Arch, llvm::StringRef FatbinFile, llvm::Error &Err)
+ : IncrementalParser(std::move(Instance), LLVMCtx, Err) {
+ if (Err)
+ return;
+
+ if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) {
+ llvm::errs() << Arch.substr(3) << SMVersion << '\n';
+
+ Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>(
+ "Invalid CUDA architecture",
+ llvm::inconvertibleErrorCode()));
+ return;
+ }
+
+ FatbinFilePath = FatbinFile.str();
+}
+
+llvm::Expected<PartialTranslationUnit &>
+IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) {
+ auto PTU = IncrementalParser::Parse(Input);
+ if (!PTU)
+ return PTU.takeError();
+
+ auto PTX = GeneratePTX();
+ if (!PTX)
+ return PTX.takeError();
+
+ auto Err = WriteFatbinary();
+ if (Err)
+ return Err;
+
+ return PTU;
+}
+
+llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() {
+ auto &PTU = PTUs.back();
+ std::string Error;
+ const llvm::Target *Target = llvm::TargetRegistry::lookupTarget(
+ PTU.TheModule->getTargetTriple(), Error);
+ if (!Target)
+ return llvm::make_error<llvm::StringError>(std::move(Error),
+ std::error_code());
+
+ llvm::TargetOptions TO = llvm::TargetOptions();
+ llvm::TargetMachine *TargetMachine = Target->createTargetMachine(
+ PTU.TheModule->getTargetTriple(), "sm_" + llvm::itostr(SMVersion), "", TO,
+ llvm::Reloc::Model::PIC_);
+ PTU.TheModule->setDataLayout(TargetMachine->createDataLayout());
+
+ PTXCode.clear();
+ llvm::raw_svector_ostream dest(PTXCode);
+
+ llvm::legacy::PassManager PM;
+ if (TargetMachine->addPassesToEmitFile(PM, dest, nullptr,
+ llvm::CGFT_AssemblyFile)) {
+ return llvm::make_error<llvm::StringError>(
+ "NVPTX backend cannot produce PTX code.",
+ llvm::inconvertibleErrorCode());
+ }
+
+ if (!PM.run(*PTU.TheModule))
+ return llvm::make_error<llvm::StringError>("Failed to emit PTX code.",
+ llvm::inconvertibleErrorCode());
+
+ PTXCode += '\0';
+ while (PTXCode.size() % 8)
+ PTXCode += '\0';
+ return PTXCode.str();
+}
+
+llvm::Error IncrementalCUDADeviceParser::WriteFatbinary() const {
+ enum FatBinFlags {
+ AddressSize64 = 0x01,
+ HasDebugInfo = 0x02,
+ ProducerCuda = 0x04,
+ HostLinux = 0x10,
+ HostMac = 0x20,
+ HostWindows = 0x40
+ };
+
+ struct FatBinInnerHeader {
+ uint16_t Kind; // 0x00
+ uint16_t unknown02; // 0x02
+ uint32_t HeaderSize; // 0x04
+ uint32_t DataSize; // 0x08
+ uint32_t unknown0c; // 0x0c
+ uint32_t CompressedSize; // 0x10
+ uint32_t SubHeaderSize; // 0x14
+ uint16_t VersionMinor; // 0x18
+ uint16_t VersionMajor; // 0x1a
+ uint32_t CudaArch; // 0x1c
+ uint32_t unknown20; // 0x20
+ uint32_t unknown24; // 0x24
+ uint32_t Flags; // 0x28
+ uint32_t unknown2c; // 0x2c
+ uint32_t unknown30; // 0x30
+ uint32_t unknown34; // 0x34
+ uint32_t UncompressedSize; // 0x38
+ uint32_t unknown3c; // 0x3c
+ uint32_t unknown40; // 0x40
+ uint32_t unknown44; // 0x44
+ FatBinInnerHeader(uint32_t DataSize, uint32_t CudaArch, uint32_t Flags)
+ : Kind(1 /*PTX*/), unknown02(0x0101), HeaderSize(sizeof(*this)),
+ DataSize(DataSize), unknown0c(0), CompressedSize(0),
+ SubHeaderSize(HeaderSize - 8), VersionMinor(2), VersionMajor(4),
+ CudaArch(CudaArch), unknown20(0), unknown24(0), Flags(Flags),
+ unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0),
+ unknown3c(0), unknown40(0), unknown44(0) {}
+ };
+
+ struct FatBinHeader {
+ uint32_t Magic; // 0x00
+ uint16_t Version; // 0x04
+ uint16_t HeaderSize; // 0x06
+ uint32_t DataSize; // 0x08
+ uint32_t unknown0c; // 0x0c
+ public:
+ FatBinHeader(uint32_t DataSize)
+ : Magic(0xba55ed50), Version(1), HeaderSize(sizeof(*this)),
+ DataSize(DataSize), unknown0c(0) {}
+ };
+
+ std::error_code EC;
+ llvm::raw_fd_ostream os(FatbinFilePath.c_str(), EC, llvm::sys::fs::OF_None);
+ if (EC) {
+ return llvm::errorCodeToError(EC);
+ }
+
+ FatBinHeader FatbinOuterHeader(sizeof(FatBinInnerHeader) + PTXCode.size());
+ os.write((const char *)&FatbinOuterHeader, FatbinOuterHeader.HeaderSize);
+
+ FatBinInnerHeader InnerHeader(
+ PTXCode.size(), 30, FatBinFlags::AddressSize64 | FatBinFlags::HostLinux);
+ os.write((const char *)&InnerHeader, InnerHeader.HeaderSize);
+ os << PTXCode;
+
+ return llvm::Error::success();
+}
+
+IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {}
+
+} // namespace clang
Index: clang/lib/Interpreter/Interpreter.cpp
===================================================================
--- clang/lib/Interpreter/Interpreter.cpp
+++ clang/lib/Interpreter/Interpreter.cpp
@@ -15,6 +15,7 @@
#include "IncrementalExecutor.h"
#include "IncrementalParser.h"
+#include "Offload.h"
#include "clang/AST/ASTContext.h"
#include "clang/Basic/TargetInfo.h"
@@ -139,7 +140,6 @@
// action and use other actions in incremental mode.
// FIXME: Print proper driver diagnostics if the driver flags are wrong.
// We do C++ by default; append right after argv[0] if no "-x" given
- ClangArgv.insert(ClangArgv.end(), "-xc++");
ClangArgv.insert(ClangArgv.end(), "-Xclang");
ClangArgv.insert(ClangArgv.end(), "-fincremental-extensions");
ClangArgv.insert(ClangArgv.end(), "-c");
@@ -172,6 +172,39 @@
return CreateCI(**ErrOrCC1Args);
}
+llvm::Expected<std::unique_ptr<CompilerInstance>>
+IncrementalCompilerBuilder::createCpp(std::vector<const char *> &ClangArgv) {
+ ClangArgv.insert(ClangArgv.begin(), "-xc++");
+
+ return IncrementalCompilerBuilder::create(ClangArgv);
+}
+
+llvm::Expected<std::unique_ptr<CompilerInstance>>
+IncrementalCudaCompilerBuilder::createDevice(
+ std::vector<const char *> &ClangArgv) {
+ ClangArgv.insert(ClangArgv.begin(), "-xcuda");
+ ClangArgv.insert(ClangArgv.begin(), "--cuda-device-only");
+
+ auto CI = IncrementalCompilerBuilder::create(ClangArgv);
+ assert(!CI.takeError());
+ return CI;
+}
+
+llvm::Expected<std::unique_ptr<CompilerInstance>>
+IncrementalCudaCompilerBuilder::createHost(std::vector<const char *> &ClangArgv,
+ llvm::StringRef FatbinFile) {
+ ClangArgv.insert(ClangArgv.begin(), "-xcuda");
+ ClangArgv.insert(ClangArgv.begin(), "--cuda-host-only");
+
+ auto CI = IncrementalCompilerBuilder::create(ClangArgv);
+ if (!CI)
+ return CI.takeError();
+
+ (*CI)->getCodeGenOpts().CudaGpuBinaryFileName = FatbinFile;
+
+ return CI;
+}
+
Interpreter::Interpreter(std::unique_ptr<CompilerInstance> CI,
llvm::Error &Err) {
llvm::ErrorAsOutParameter EAO(&Err);
@@ -200,6 +233,25 @@
return std::move(Interp);
}
+llvm::Expected<std::unique_ptr<Interpreter>> Interpreter::createWithCUDA(
+ std::unique_ptr<CompilerInstance> CI, std::unique_ptr<CompilerInstance> DCI,
+ llvm::StringRef CudaArch, llvm::StringRef TempDeviceCodeFilename) {
+ auto Interp = Interpreter::create(std::move(CI));
+ if (auto E = Interp.takeError())
+ return E;
+
+ llvm::Error Err = llvm::Error::success();
+ auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>(
+ std::move(DCI), *(*Interp)->TSCtx->getContext(), CudaArch,
+ TempDeviceCodeFilename, Err);
+ if (Err)
+ return std::move(Err);
+
+ (*Interp)->DeviceParser = std::move(DeviceParser);
+
+ return Interp;
+}
+
const CompilerInstance *Interpreter::getCompilerInstance() const {
return IncrParser->getCI();
}
@@ -215,6 +267,11 @@
llvm::Expected<PartialTranslationUnit &>
Interpreter::Parse(llvm::StringRef Code) {
+ if (DeviceParser) {
+ auto DevicePTU = DeviceParser->Parse(Code);
+ if (auto E = DevicePTU.takeError())
+ return E;
+ }
return IncrParser->Parse(Code);
}
Index: clang/lib/Interpreter/IncrementalParser.h
===================================================================
--- clang/lib/Interpreter/IncrementalParser.h
+++ clang/lib/Interpreter/IncrementalParser.h
@@ -37,6 +37,7 @@
/// changes between the subsequent incremental input.
///
class IncrementalParser {
+protected:
/// Long-lived, incremental parsing action.
std::unique_ptr<IncrementalAction> Act;
@@ -56,17 +57,19 @@
/// of code.
std::list<PartialTranslationUnit> PTUs;
+ IncrementalParser();
+
public:
IncrementalParser(std::unique_ptr<CompilerInstance> Instance,
llvm::LLVMContext &LLVMCtx, llvm::Error &Err);
- ~IncrementalParser();
+ virtual ~IncrementalParser();
const CompilerInstance *getCI() const { return CI.get(); }
/// Parses incremental input by creating an in-memory file.
///\returns a \c PartialTranslationUnit which holds information about the
/// \c TranslationUnitDecl and \c llvm::Module corresponding to the input.
- llvm::Expected<PartialTranslationUnit &> Parse(llvm::StringRef Input);
+ virtual llvm::Expected<PartialTranslationUnit &> Parse(llvm::StringRef Input);
/// Uses the CodeGenModule mangled name cache and avoids recomputing.
///\returns the mangled name of a \c GD.
Index: clang/lib/Interpreter/IncrementalParser.cpp
===================================================================
--- clang/lib/Interpreter/IncrementalParser.cpp
+++ clang/lib/Interpreter/IncrementalParser.cpp
@@ -122,6 +122,10 @@
}
};
+static CodeGenerator *getCodeGen(FrontendAction *Act);
+
+IncrementalParser::IncrementalParser() {}
+
IncrementalParser::IncrementalParser(std::unique_ptr<CompilerInstance> Instance,
llvm::LLVMContext &LLVMCtx,
llvm::Error &Err)
@@ -135,6 +139,21 @@
P.reset(
new Parser(CI->getPreprocessor(), CI->getSema(), /*SkipBodies=*/false));
P->Initialize();
+
+ // An initial PTU is needed as CUDA includes some headers automatically
+ auto PTU = ParseOrWrapTopLevelDecl();
+ if (auto E = PTU.takeError()) {
+ consumeError(std::move(E)); // FIXME
+ return; // PTU.takeError();
+ }
+
+ if (CodeGenerator *CG = getCodeGen(Act.get())) {
+ std::unique_ptr<llvm::Module> M(CG->ReleaseModule());
+ CG->StartModule("incr_module_" + std::to_string(PTUs.size()),
+ M->getContext());
+ PTU->TheModule = std::move(M);
+ assert(PTU->TheModule && "Failed to create initial PTU");
+ }
}
IncrementalParser::~IncrementalParser() {
Index: clang/lib/Interpreter/CMakeLists.txt
===================================================================
--- clang/lib/Interpreter/CMakeLists.txt
+++ clang/lib/Interpreter/CMakeLists.txt
@@ -1,6 +1,7 @@
set(LLVM_LINK_COMPONENTS
core
native
+ MC
Option
OrcJit
Support
@@ -12,6 +13,7 @@
IncrementalExecutor.cpp
IncrementalParser.cpp
Interpreter.cpp
+ Offload.cpp
DEPENDS
intrinsics_gen
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -6253,6 +6253,10 @@
}
void CodeGenModule::EmitTopLevelStmt(const TopLevelStmtDecl *D) {
+ // Device code should not be at top level.
+ if (LangOpts.CUDA && LangOpts.CUDAIsDevice)
+ return;
+
std::unique_ptr<CodeGenFunction> &CurCGF =
GlobalTopLevelStmtBlockInFlight.first;
Index: clang/lib/CodeGen/CodeGenAction.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenAction.cpp
+++ clang/lib/CodeGen/CodeGenAction.cpp
@@ -264,6 +264,7 @@
// Links each entry in LinkModules into our module. Returns true on error.
bool LinkInModules() {
for (auto &LM : LinkModules) {
+ assert(LM.Module && "LinkModule does not actually have a module");
if (LM.PropagateAttrs)
for (Function &F : *LM.Module) {
// Skip intrinsics. Keep consistent with how intrinsics are created
@@ -292,6 +293,7 @@
if (Err)
return true;
}
+ LinkModules.clear();
return false; // success
}
Index: clang/include/clang/Interpreter/Interpreter.h
===================================================================
--- clang/include/clang/Interpreter/Interpreter.h
+++ clang/include/clang/Interpreter/Interpreter.h
@@ -42,6 +42,16 @@
public:
static llvm::Expected<std::unique_ptr<CompilerInstance>>
create(std::vector<const char *> &ClangArgv);
+ static llvm::Expected<std::unique_ptr<CompilerInstance>>
+ createCpp(std::vector<const char *> &ClangArgv);
+};
+
+class IncrementalCudaCompilerBuilder {
+public:
+ static llvm::Expected<std::unique_ptr<CompilerInstance>>
+ createHost(std::vector<const char *> &ClangArgv, llvm::StringRef FatbinFile);
+ static llvm::Expected<std::unique_ptr<CompilerInstance>>
+ createDevice(std::vector<const char *> &ClangArgv);
};
/// Provides top-level interfaces for incremental compilation and execution.
@@ -50,6 +60,9 @@
std::unique_ptr<IncrementalParser> IncrParser;
std::unique_ptr<IncrementalExecutor> IncrExecutor;
+ // An optional parser for CUDA offloading
+ std::unique_ptr<IncrementalParser> DeviceParser;
+
Interpreter(std::unique_ptr<CompilerInstance> CI, llvm::Error &Err);
llvm::Error CreateExecutor();
@@ -58,6 +71,11 @@
~Interpreter();
static llvm::Expected<std::unique_ptr<Interpreter>>
create(std::unique_ptr<CompilerInstance> CI);
+ static llvm::Expected<std::unique_ptr<Interpreter>>
+ createWithCUDA(std::unique_ptr<CompilerInstance> CI,
+ std::unique_ptr<CompilerInstance> DCI,
+ llvm::StringRef OffloadArch,
+ llvm::StringRef TempDeviceCodeFilename);
const CompilerInstance *getCompilerInstance() const;
llvm::Expected<llvm::orc::LLJIT &> getExecutionEngine();
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits