jhuber6 created this revision. jhuber6 added reviewers: jdoerfert, gregrodgers, JonChesterfield, ronlieb. Herald added subscribers: ormris, dexonsmith, dang, guansong, hiraditya, yaxunl, mgorny. jhuber6 requested review of this revision. Herald added subscribers: llvm-commits, cfe-commits, sstefan1. Herald added projects: clang, LLVM.
This patch introduces the `-fopenmp-new-driver` option which instructs the compiler to use a new driver scheme for producing offloading code. In this scheme we create a complete offloading object file and then pass it as input to the host compilation phase. This will allow us to embed the object code in the backend phase. [OpenMP] Add a flag for embedding a file into the module This patch adds support for a flag `-fembed-offload-binary` to embed a file as an ELF section in the output by placing it in a global variable. This can be used to bundle offloading files with the host binary so it can be accessed by the linker. The section is named using the `-fembed-offload-section` option. [OpenMP] Embed device files into the host IR This patch adds support for embedding the device object files into the host IR to create a fat binary. Each offloading file will be inserted into a section with the following naming format `.llvm.offloading.<triple>.<arch>`. [Clang] Introduce Clang Linker Wrapper Tool This patch introduces a linker wrapper tool that allows us to preprocess files before they are sent to the linker. This adds a dummy action and job to the driver stage that builds the linker command as usual and then replaces the command line with the wrapper tool. [OpenMP] Add support for extracting device code in linker wrapper This patchs add support for extracting device offloading code from the linker's input files. If the file contains a section with the name `.llvm.offloading.<triple>.<arch>` it will be extracted to a new temporary file to be linked. Addtionally, the host file containing it will have the section stripped so it does not remain in the executable once linked. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D116541 Files: clang/include/clang/Basic/CodeGenOptions.h clang/include/clang/CodeGen/BackendUtil.h clang/include/clang/Driver/Action.h clang/include/clang/Driver/Driver.h clang/include/clang/Driver/Job.h clang/include/clang/Driver/Options.td clang/include/clang/Driver/ToolChain.h clang/lib/CodeGen/BackendUtil.cpp clang/lib/CodeGen/CodeGenAction.cpp clang/lib/Driver/Action.cpp clang/lib/Driver/Driver.cpp clang/lib/Driver/ToolChain.cpp clang/lib/Driver/ToolChains/Clang.cpp clang/lib/Driver/ToolChains/Clang.h clang/test/Driver/openmp-offload-gpu.c clang/test/Frontend/embed-object.ll clang/tools/CMakeLists.txt clang/tools/clang-linker-wrapper/CMakeLists.txt clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp llvm/include/llvm/Bitcode/BitcodeWriter.h llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
Index: llvm/lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -4971,3 +4971,42 @@ llvm::ConstantArray::get(ATy, UsedArray), "llvm.compiler.used"); NewUsed->setSection("llvm.metadata"); } + +void llvm::EmbedObjectInModule(llvm::Module &M, llvm::MemoryBufferRef Buf, + StringRef SectionName) { + // Save llvm.compiler.used and remove it. + SmallVector<Constant *, 2> UsedArray; + SmallVector<GlobalValue *, 4> UsedGlobals; + Type *UsedElementType = Type::getInt8Ty(M.getContext())->getPointerTo(0); + GlobalVariable *Used = collectUsedGlobalVariables(M, UsedGlobals, true); + for (auto *GV : UsedGlobals) { + if (!GV->getName().startswith("llvm.embedded.object")) + UsedArray.push_back( + ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); + } + if (Used) + Used->eraseFromParent(); + + ArrayRef<uint8_t> ModuleData = ArrayRef<uint8_t>( + (const uint8_t *)Buf.getBufferStart(), Buf.getBufferSize()); + + // Embed the data in the + llvm::Constant *ModuleConstant = + llvm::ConstantDataArray::get(M.getContext(), ModuleData); + llvm::GlobalVariable *GV = new llvm::GlobalVariable( + M, ModuleConstant->getType(), true, llvm::GlobalValue::PrivateLinkage, + ModuleConstant, "llvm.embedded.object"); + GV->setSection(SectionName); + // Set alignment to 1 to prevent padding between two contributions from input + // sections after linking. + GV->setAlignment(Align(1)); + UsedArray.push_back( + ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); + + // Recreate llvm.compiler.used. + ArrayType *ATy = ArrayType::get(UsedElementType, UsedArray.size()); + auto *NewUsed = new GlobalVariable( + M, ATy, false, llvm::GlobalValue::AppendingLinkage, + llvm::ConstantArray::get(ATy, UsedArray), "llvm.compiler.used"); + NewUsed->setSection("llvm.metadata"); +} Index: llvm/include/llvm/Bitcode/BitcodeWriter.h =================================================================== --- llvm/include/llvm/Bitcode/BitcodeWriter.h +++ llvm/include/llvm/Bitcode/BitcodeWriter.h @@ -165,6 +165,11 @@ bool EmbedCmdline, const std::vector<uint8_t> &CmdArgs); + /// Embeds the memory buffer \p Buf into the module \p M as a global using the + /// section name \p SectionName. + void EmbedObjectInModule(Module &M, MemoryBufferRef Buf, + StringRef SectionName); + } // end namespace llvm #endif // LLVM_BITCODE_BITCODEWRITER_H Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp =================================================================== --- /dev/null +++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -0,0 +1,469 @@ +//===-- clang-linker-wrapper/ClangLinkerWrapper.cpp - wrapper over linker-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// +// +// This tool works as a wrapper over a linking job. This tool is used to create +// linked device images for offloading. It scans the linker's input for embedded +// device offloading data stored in sections `.llvm.offloading.<triple>.<arch>` +// and extracts it as a temporary file. The extracted device files will then be +// passed to a device linking job to create a final device image. +// +//===---------------------------------------------------------------------===// + +#include "clang/Basic/Version.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Module.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ArchiveWriter.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/StringSaver.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::object; + +static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden); + +// Mark all our options with this category, everything else (except for -help) +// will be hidden. +static cl::OptionCategory + ClangLinkerWrapperCategory("clang-linker-wrapper options"); + +static cl::opt<bool> StripSections( + "strip-sections", cl::ZeroOrMore, + cl::desc("Strip offloading sections from the host object file."), + cl::init(true), cl::cat(ClangLinkerWrapperCategory)); + +static cl::opt<std::string> LinkerUserPath("linker-path", + cl::desc("Path of linker binary"), + cl::cat(ClangLinkerWrapperCategory)); + +// Do not parse linker options. +static cl::list<std::string> + LinkerArgs(cl::Sink, cl::desc("<options to be passed to linker>...")); + +/// Path of the current binary. +static std::string LinkerExecutable; + +/// Magic section string that marks the existence of offloading data. The +/// section string will be formatted as `.llvm.offloading.<triple>.<arch>`. +#define OFFLOAD_SECTION_MAGIC_STR ".llvm.offloading" + +struct DeviceFile { + DeviceFile(StringRef TheTriple, StringRef Arch, StringRef Filename) + : TheTriple(TheTriple), Arch(Arch), Filename(Filename) {} + + const Triple TheTriple; + const std::string Arch; + const std::string Filename; +}; + +namespace { + +Expected<Optional<std::string>> +extractFromBuffer(std::unique_ptr<MemoryBuffer> Buffer, + SmallVectorImpl<DeviceFile> &DeviceFiles); + +Error runLinker(std::string &LinkerPath, SmallVectorImpl<std::string> &Args) { + std::vector<StringRef> LinkerArgs; + LinkerArgs.push_back(LinkerPath); + for (auto &Arg : Args) + LinkerArgs.push_back(Arg); + + if (sys::ExecuteAndWait(LinkerPath, LinkerArgs)) + return createStringError(inconvertibleErrorCode(), "'linker' failed"); + return Error::success(); +} + +void PrintVersion(raw_ostream &OS) { + OS << clang::getClangToolFullVersion("clang-linker-wrapper") << '\n'; +} + +void removeFromCompilerUsed(Module &M, GlobalValue &Value) { + GlobalVariable *GV = M.getGlobalVariable("llvm.compiler.used"); + Type *Int8PtrTy = Type::getInt8PtrTy(M.getContext()); + Constant *ValueToRemove = + ConstantExpr::getPointerBitCastOrAddrSpaceCast(&Value, Int8PtrTy); + SmallPtrSet<Constant *, 16> InitAsSet; + SmallVector<Constant *, 16> Init; + if (GV) { + if (GV->hasInitializer()) { + auto *CA = cast<ConstantArray>(GV->getInitializer()); + for (auto &Op : CA->operands()) { + Constant *C = cast_or_null<Constant>(Op); + if (C != ValueToRemove && InitAsSet.insert(C).second) + Init.push_back(C); + } + } + GV->eraseFromParent(); + } + + if (Init.empty()) + return; + + ArrayType *ATy = ArrayType::get(Int8PtrTy, Init.size()); + GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, + ConstantArray::get(ATy, Init), + "llvm.compiler.used"); + GV->setSection("llvm.metadata"); +} + +Expected<Optional<std::string>> +extractFromBinary(const ObjectFile &Obj, + SmallVectorImpl<DeviceFile> &DeviceFiles) { + + StringRef Extension = sys::path::extension(Obj.getFileName()).drop_front(); + StringRef Prefix = sys::path::stem(Obj.getFileName()).take_until([](char C) { + return C == '-'; + }); + SmallVector<StringRef, 4> ToBeStripped; + + // Extract data from sections of the form `.llvm.offloading.<triple>.<arch>`. + for (const SectionRef &Sec : Obj.sections()) { + Expected<StringRef> Name = Sec.getName(); + if (!Name || !Name->startswith(OFFLOAD_SECTION_MAGIC_STR)) + continue; + + SmallVector<StringRef, 4> SectionFields; + Name->split(SectionFields, '.', -1, false); + + assert(SectionFields.size() == 4 && + "Offloading section name is missing required fields"); + + const StringRef DeviceTriple = SectionFields[2]; + const StringRef Arch = SectionFields[3]; + + if (Expected<StringRef> Contents = Sec.getContents()) { + SmallString<128> TempFile; + if (std::error_code EC = sys::fs::createTemporaryFile( + Prefix + "-device-" + DeviceTriple, Extension, TempFile)) + return createFileError(TempFile, EC); + + Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr = + FileOutputBuffer::create(TempFile, Sec.getSize()); + if (!OutputOrErr) + return OutputOrErr.takeError(); + std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr); + std::copy(Contents->begin(), Contents->end(), Output->getBufferStart()); + if (Error E = Output->commit()) + return E; + + DeviceFiles.emplace_back(DeviceTriple, Arch, TempFile); + ToBeStripped.push_back(*Name); + } + } + + if (ToBeStripped.empty()) + return None; + + // We will use llvm-strip to remove the now unneeded section containing the + // offloading code. + ErrorOr<std::string> StripPath = sys::findProgramByName( + "llvm-strip", sys::path::parent_path(LinkerExecutable)); + if (!StripPath) + StripPath = sys::findProgramByName("llvm-strip"); + if (!StripPath) + return createStringError(StripPath.getError(), + "Unable to find 'llvm-strip' in path"); + + SmallString<128> TempFile; + if (std::error_code EC = + sys::fs::createTemporaryFile(Prefix + "-host", Extension, TempFile)) + return createFileError(TempFile, EC); + + SmallVector<StringRef, 8> StripArgs; + StripArgs.push_back(*StripPath); + StripArgs.push_back("--no-strip-all"); + StripArgs.push_back(Obj.getFileName()); + for (auto &Section : ToBeStripped) { + StripArgs.push_back("--remove-section"); + StripArgs.push_back(Section); + } + StripArgs.push_back("-o"); + StripArgs.push_back(TempFile); + + if (sys::ExecuteAndWait(*StripPath, StripArgs)) + return createStringError(inconvertibleErrorCode(), "'llvm-strip' failed"); + + return static_cast<std::string>(TempFile); +} + +Expected<Optional<std::string>> +extractFromBitcode(std::unique_ptr<MemoryBuffer> Buffer, + SmallVectorImpl<DeviceFile> &DeviceFiles) { + LLVMContext Context; + SMDiagnostic Err; + std::unique_ptr<Module> M = getLazyIRModule(std::move(Buffer), Err, Context); + if (!M) + return createStringError(inconvertibleErrorCode(), + "Failed to create module"); + + StringRef Extension = sys::path::extension(M->getName()).drop_front(); + StringRef Prefix = + sys::path::stem(M->getName()).take_until([](char C) { return C == '-'; }); + + SmallVector<GlobalVariable *, 4> ToBeDeleted; + + // Extract data from the global string containing a section of the form + // `.llvm.offloading.<triple>.<arch>`. + for (GlobalVariable &GV : M->globals()) { + if (!GV.hasSection() || + !GV.getSection().startswith(OFFLOAD_SECTION_MAGIC_STR)) + continue; + + auto *CDS = dyn_cast<ConstantDataSequential>(GV.getInitializer()); + if (!CDS) + continue; + + SmallVector<StringRef, 4> SectionFields; + GV.getSection().split(SectionFields, '.', -1, false); + + assert(SectionFields.size() == 4 && + "Offloading section name is missing required fields"); + + const StringRef DeviceTriple = SectionFields[2]; + const StringRef Arch = SectionFields[3]; + + StringRef Contents = CDS->getAsString(); + SmallString<128> TempFile; + if (std::error_code EC = sys::fs::createTemporaryFile( + Prefix + "-device-" + DeviceTriple, Extension, TempFile)) + return createFileError(TempFile, EC); + + Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr = + FileOutputBuffer::create(TempFile, Contents.size()); + if (!OutputOrErr) + return OutputOrErr.takeError(); + std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr); + std::copy(Contents.begin(), Contents.end(), Output->getBufferStart()); + if (Error E = Output->commit()) + return E; + + DeviceFiles.emplace_back(DeviceTriple, Arch, TempFile); + ToBeDeleted.push_back(&GV); + } + + if (ToBeDeleted.empty()) + return None; + + // We need to materialize the lazy module before we make any changes. + if (Error Err = M->materializeAll()) + return Err; + + // Remove the global from the module and write it to a new file. + for (GlobalVariable *GV : ToBeDeleted) { + removeFromCompilerUsed(*M, *GV); + GV->eraseFromParent(); + } + + SmallString<128> TempFile; + if (std::error_code EC = + sys::fs::createTemporaryFile(Prefix + "-host", Extension, TempFile)) + return createFileError(TempFile, EC); + std::error_code EC; + raw_fd_ostream HostOutput(TempFile, EC, sys::fs::OF_None); + if (EC) + return createFileError(TempFile, EC); + WriteBitcodeToFile(*M, HostOutput); + return static_cast<std::string>(TempFile); +} + +Expected<Optional<std::string>> +extractFromArchive(const Archive &Library, + SmallVectorImpl<DeviceFile> &DeviceFiles) { + + StringRef Extension = + sys::path::extension(Library.getFileName()).drop_front(); + StringRef Prefix = + sys::path::stem(Library.getFileName()).take_until([](char C) { + return C == '-'; + }); + + bool NewMembers = false; + SmallVector<NewArchiveMember, 8> Members; + + // Try to extract device code from each file stored in the static archive. + // Save the stripped archive members to create a new host archive with the + // offloading code removed. + Error Err = Error::success(); + for (auto Child : Library.children(Err)) { + auto ChildBufferRefOrErr = Child.getMemoryBufferRef(); + if (!ChildBufferRefOrErr) + return ChildBufferRefOrErr.takeError(); + std::unique_ptr<MemoryBuffer> ChildBuffer = + MemoryBuffer::getMemBuffer(*ChildBufferRefOrErr, false); + + auto FileOrErr = extractFromBuffer(std::move(ChildBuffer), DeviceFiles); + if (!FileOrErr) + return FileOrErr.takeError(); + + // If we created a new stripped host file, use it to create a new archive + // member, otherwise use the old member. + if (!FileOrErr->hasValue()) { + Expected<NewArchiveMember> NewMember = + NewArchiveMember::getOldMember(Child, true); + if (!NewMember) + return NewMember.takeError(); + Members.push_back(std::move(*NewMember)); + } else { + Expected<NewArchiveMember> NewMember = + NewArchiveMember::getFile(**FileOrErr, true); + if (!NewMember) + return NewMember.takeError(); + Members.push_back(std::move(*NewMember)); + NewMembers = true; + + // We no longer need the stripped file, remove it. + if (std::error_code EC = sys::fs::remove(**FileOrErr)) + return createFileError(**FileOrErr, EC); + } + } + + if (Err) + return Err; + + if (!NewMembers) + return None; + + // Create a new static library using the stripped host files. + SmallString<128> TempFile; + if (std::error_code EC = + sys::fs::createTemporaryFile(Prefix + "-host", Extension, TempFile)) + return createFileError(TempFile, EC); + + std::unique_ptr<MemoryBuffer> Buffer = + MemoryBuffer::getMemBuffer(Library.getMemoryBufferRef(), false); + if (Error WriteErr = writeArchive(TempFile, Members, true, Library.kind(), + true, Library.isThin(), std::move(Buffer))) + return WriteErr; + + return static_cast<std::string>(TempFile); +} + +/// Extracts embedded device offloading code from a memory \p Buffer to a list +/// of \p DeviceFiles. If device code was extracted a new file with the embedded +/// device code stripped from the buffer will be returned. +Expected<Optional<std::string>> +extractFromBuffer(std::unique_ptr<MemoryBuffer> Buffer, + SmallVectorImpl<DeviceFile> &DeviceFiles) { + file_magic Type = identify_magic(Buffer->getBuffer()); + switch (Type) { + case file_magic::bitcode: + return extractFromBitcode(std::move(Buffer), DeviceFiles); + case file_magic::elf_relocatable: + case file_magic::macho_object: + case file_magic::coff_object: { + Expected<std::unique_ptr<ObjectFile>> ObjFile = + ObjectFile::createObjectFile(*Buffer, Type); + if (!ObjFile) + return ObjFile.takeError(); + return extractFromBinary(*ObjFile->get(), DeviceFiles); + } + case file_magic::archive: { + Expected<std::unique_ptr<llvm::object::Archive>> LibFile = + object::Archive::create(*Buffer); + if (!LibFile) + return LibFile.takeError(); + return extractFromArchive(*LibFile->get(), DeviceFiles); + } + default: + return errorCodeToError(object_error::invalid_file_type); + } + + return None; +} + +} // namespace + +int main(int argc, const char **argv) { + InitLLVM X(argc, argv); + + sys::PrintStackTraceOnErrorSignal(argv[0]); + cl::SetVersionPrinter(PrintVersion); + cl::HideUnrelatedOptions(ClangLinkerWrapperCategory); + cl::ParseCommandLineOptions( + argc, argv, + "A wrapper utility over the host linker. It scans the input files for\n" + "sections that require additional processing prior to linking. The tool\n" + "will then transparently pass all arguments and input to the specified\n" + "host linker to create the final binary.\n"); + + if (Help) { + cl::PrintHelpMessage(); + return EXIT_SUCCESS; + } + LinkerExecutable = argv[0]; + + SmallVector<std::string, 4> TempFiles; + SmallVector<DeviceFile, 4> DeviceFiles; + + auto reportError = [argv](Error E) { + logAllUnhandledErrors(std::move(E), WithColor::error(errs(), argv[0])); + exit(EXIT_FAILURE); + }; + + // Try to extract device code from the linker input and replace the linker + // input with a new file that has the device section stripped. + for (std::string &Arg : LinkerArgs) { + if (sys::path::extension(Arg) == ".o" || + sys::path::extension(Arg) == ".a") { + ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = + MemoryBuffer::getFileOrSTDIN(Arg); + if (std::error_code EC = BufferOrErr.getError()) + reportError(createFileError(Arg, EC)); + + auto NewFileOrErr = + extractFromBuffer(std::move(*BufferOrErr), DeviceFiles); + + if (!NewFileOrErr) + reportError(NewFileOrErr.takeError()); + + if (NewFileOrErr->hasValue()) { + TempFiles.push_back(**NewFileOrErr); + Arg = **NewFileOrErr; + } + } + } + + // Add the newly extracted device files to the temporary list. + for (const auto &DeviceFile : DeviceFiles) + TempFiles.push_back(DeviceFile.Filename); + + // TODO: Perform appropriate device linking action. + // TODO: Wrap device image in a host binary and pass it to the linker. + WithColor::warning(errs(), argv[0]) << "Offload linking not yet supported.\n"; + + SmallVector<std::string, 16> LinkerArgv; + for (const std::string &Arg : LinkerArgs) + LinkerArgv.push_back(Arg); + + // Run the host linking job. + if (Error Err = runLinker(LinkerUserPath, LinkerArgv)) + reportError(std::move(Err)); + + for (const auto &TempFile : TempFiles) { + if (std::error_code EC = sys::fs::remove(TempFile)) + reportError(createFileError(TempFile, EC)); + } + + return EXIT_SUCCESS; +} Index: clang/tools/clang-linker-wrapper/CMakeLists.txt =================================================================== --- /dev/null +++ clang/tools/clang-linker-wrapper/CMakeLists.txt @@ -0,0 +1,25 @@ +set(LLVM_LINK_COMPONENTS BitWriter Core BinaryFormat IRReader Object Support) + +if(NOT CLANG_BUILT_STANDALONE) + set(tablegen_deps intrinsics_gen) +endif() + +add_clang_executable(clang-linker-wrapper + ClangLinkerWrapper.cpp + + DEPENDS + ${tablegen_deps} + ) + +set(CLANG_LINKER_WRAPPER_LIB_DEPS + clangBasic + ) + +add_dependencies(clang clang-linker-wrapper) + +target_link_libraries(clang-linker-wrapper + PRIVATE + ${CLANG_LINKER_WRAPPER_LIB_DEPS} + ) + +install(TARGETS clang-linker-wrapper RUNTIME DESTINATION bin) Index: clang/tools/CMakeLists.txt =================================================================== --- clang/tools/CMakeLists.txt +++ clang/tools/CMakeLists.txt @@ -9,6 +9,7 @@ add_clang_subdirectory(clang-fuzzer) add_clang_subdirectory(clang-import-test) add_clang_subdirectory(clang-nvlink-wrapper) +add_clang_subdirectory(clang-linker-wrapper) add_clang_subdirectory(clang-offload-bundler) add_clang_subdirectory(clang-offload-wrapper) add_clang_subdirectory(clang-scan-deps) Index: clang/test/Frontend/embed-object.ll =================================================================== --- /dev/null +++ clang/test/Frontend/embed-object.ll @@ -0,0 +1,13 @@ +; RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \ +; RUN: -fembed-offload-binary=%S/Inputs/empty.h -fembed-offload-section=section -x ir %s -o - \ +; RUN: | FileCheck %s -check-prefix=CHECK + +; CHECK: @llvm.embedded.object = private constant [0 x i8] zeroinitializer, section ".llvm.offloading.section", align 1 +; CHECK: @llvm.compiler.used = appending global [2 x i8*] [i8* @x, i8* getelementptr inbounds ([0 x i8], [0 x i8]* @llvm.embedded.object, i32 0, i32 0)], section "llvm.metadata" + +@x = private constant i8 1 +@llvm.compiler.used = appending global [1 x i8*] [i8* @x], section "llvm.metadata" + +define i32 @foo() { + ret i32 0 +} Index: clang/test/Driver/openmp-offload-gpu.c =================================================================== --- clang/test/Driver/openmp-offload-gpu.c +++ clang/test/Driver/openmp-offload-gpu.c @@ -343,3 +343,13 @@ // RUN: | FileCheck -check-prefix=SAVE_TEMPS_NAMES %s // SAVE_TEMPS_NAMES-NOT: "GNU::Linker"{{.*}}["[[SAVE_TEMPS_INPUT1:.*\.o]]", "[[SAVE_TEMPS_INPUT1]]"] + +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ +// RUN: -fopenmp-new-driver -no-canonical-prefixes -ccc-print-bindings %s -o openmp-offload-gpu 2>&1 \ +// RUN: | FileCheck -check-prefix=NEW_DRIVER %s + +// NEW_DRIVER: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_INPUT:.+]]"], output: "[[HOST_BC:.+]]" +// NEW_DRIVER: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[DEVICE_INPUT:.+]]", "[[HOST_BC]]"], output: "[[DEVICE_ASM:.+]]" +// NEW_DRIVER: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_ASM]]"], output: "[[DEVICE_OBJ:.+]]" +// NEW_DRIVER: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_OBJ]]"], output: "[[HOST_OBJ:.+]]" +// NEW_DRIVER: "x86_64-unknown-linux-gnu" - "[[LINKER:.+]]", inputs: ["[[HOST_OBJ]]"], output: "openmp-offload-gpu" Index: clang/lib/Driver/ToolChains/Clang.h =================================================================== --- clang/lib/Driver/ToolChains/Clang.h +++ clang/lib/Driver/ToolChains/Clang.h @@ -170,6 +170,21 @@ const char *LinkingOutput) const override; }; +/// Linker wrapper tool. +class LLVM_LIBRARY_VISIBILITY LinkerWrapper final : public Tool { + const Tool *Linker; + +public: + LinkerWrapper(const ToolChain &TC, const Tool *Linker) + : Tool("Offload::Linker", "linker", TC), Linker(Linker) {} + + bool hasIntegratedCPP() const override { return false; } + void ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const override; +}; + } // end namespace tools } // end namespace driver Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -4338,6 +4338,7 @@ bool IsHIP = JA.isOffloading(Action::OFK_HIP); bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP); bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP); + bool IsOpenMPHost = JA.isHostOffloading(Action::OFK_OpenMP); bool IsHeaderModulePrecompile = isa<HeaderModulePrecompileJobAction>(JA); bool IsDeviceOffloadAction = !(JA.isDeviceOffloading(Action::OFK_None) || JA.isDeviceOffloading(Action::OFK_Host)); @@ -4356,6 +4357,7 @@ IsHeaderModulePrecompile ? HeaderModuleInput : Inputs[0]; InputInfoList ModuleHeaderInputs; + InputInfoList OpenMPHostInputs; const InputInfo *CudaDeviceInput = nullptr; const InputInfo *OpenMPDeviceInput = nullptr; for (const InputInfo &I : Inputs) { @@ -4374,6 +4376,8 @@ CudaDeviceInput = &I; } else if (IsOpenMPDevice && !OpenMPDeviceInput) { OpenMPDeviceInput = &I; + } else if (IsOpenMPHost) { + OpenMPHostInputs.push_back(I); } else { llvm_unreachable("unexpectedly given multiple inputs"); } @@ -6866,6 +6870,32 @@ } } + // Host-side OpenMP offloading recieves the device object files and embeds it + // in a named section including the associated target triple and architecture. + if (IsOpenMPHost && !OpenMPHostInputs.empty()) { + SmallString<128> InputFiles("-fembed-offload-binary="); + SmallString<128> InputSections("-fembed-offload-section="); + + auto InputFile = OpenMPHostInputs.begin(); + auto OpenMPTCs = C.getOffloadToolChains<Action::OFK_OpenMP>(); + for (auto TI = OpenMPTCs.first, TE = OpenMPTCs.second; TI != TE; + ++TI, ++InputFile) { + const ToolChain *TC = TI->second; + const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP); + InputSections += TC->getTripleString() + "."; + InputSections += TCArgs.getLastArgValue(options::OPT_march_EQ); + InputSections += ","; + + InputFiles += C.getArgs().MakeArgString(TC->getInputFilename(*InputFile)); + InputFiles += ","; + } + InputSections.pop_back(); + InputFiles.pop_back(); + + CmdArgs.push_back(Args.MakeArgString(InputFiles.str())); + CmdArgs.push_back(Args.MakeArgString(InputSections.str())); + } + if (Triple.isAMDGPU()) { handleAMDGPUCodeObjectVersionOptions(D, Args, CmdArgs); @@ -8092,3 +8122,28 @@ Args.MakeArgString(getToolChain().GetProgramPath(getShortName())), CmdArgs, Inputs, Output)); } + +void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, + const InputInfoList &Inputs, + const ArgList &Args, + const char *LinkingOutput) const { + ArgStringList CmdArgs; + + // Construct the link job so we can wrap around it. + Linker->ConstructJob(C, JA, Output, Inputs, Args, LinkingOutput); + const auto &LinkCommand = C.getJobs().getJobs().back(); + + CmdArgs.push_back("-linker-path"); + CmdArgs.push_back(LinkCommand->getExecutable()); + for (const char *LinkArg : LinkCommand->getArguments()) + CmdArgs.push_back(LinkArg); + + const char *Exec = + Args.MakeArgString(getToolChain().GetProgramPath("clang-linker-wrapper")); + + // Replace the executable and arguments associated with the link job to the + // wrapper. + LinkCommand->replaceExecutable(Exec); + LinkCommand->replaceArguments(CmdArgs); +} Index: clang/lib/Driver/ToolChain.cpp =================================================================== --- clang/lib/Driver/ToolChain.cpp +++ clang/lib/Driver/ToolChain.cpp @@ -324,6 +324,12 @@ return OffloadWrapper.get(); } +Tool *ToolChain::getLinkerWrapper() const { + if (!LinkerWrapper) + LinkerWrapper.reset(new tools::LinkerWrapper(*this, getLink())); + return LinkerWrapper.get(); +} + Tool *ToolChain::getTool(Action::ActionClass AC) const { switch (AC) { case Action::AssembleJobClass: @@ -362,6 +368,8 @@ case Action::OffloadWrapperJobClass: return getOffloadWrapper(); + case Action::LinkerWrapperJobClass: + return getLinkerWrapper(); } llvm_unreachable("Invalid tool kind."); Index: clang/lib/Driver/Driver.cpp =================================================================== --- clang/lib/Driver/Driver.cpp +++ clang/lib/Driver/Driver.cpp @@ -3788,6 +3788,11 @@ // Builder to be used to build offloading actions. OffloadingActionBuilder OffloadBuilder(C, Args, Inputs); + // Offload kinds active for this compilation. + unsigned OffloadKinds = Action::OFK_None; + if (C.hasOffloadToolChain<Action::OFK_OpenMP>()) + OffloadKinds |= Action::OFK_OpenMP; + // Construct the actions to perform. HeaderModulePrecompileJobAction *HeaderModuleAction = nullptr; ActionList LinkerInputs; @@ -3808,14 +3813,16 @@ // Use the current host action in any of the offloading actions, if // required. - if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg)) - break; + if (!Args.hasArg(options::OPT_fopenmp_new_driver)) + if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg)) + break; for (phases::ID Phase : PL) { // Add any offload action the host action depends on. - Current = OffloadBuilder.addDeviceDependencesToHostAction( - Current, InputArg, Phase, PL.back(), FullPL); + if (!Args.hasArg(options::OPT_fopenmp_new_driver)) + Current = OffloadBuilder.addDeviceDependencesToHostAction( + Current, InputArg, Phase, PL.back(), FullPL); if (!Current) break; @@ -3855,6 +3862,11 @@ break; } + // Try to build the offloading actions and add the result as a dependency + // to the host. + if (Args.hasArg(options::OPT_fopenmp_new_driver)) + Current = BuildOffloadingActions(C, Args, I, Current); + // FIXME: Should we include any prior module file outputs as inputs of // later actions in the same command line? @@ -3872,8 +3884,9 @@ // Use the current host action in any of the offloading actions, if // required. - if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg)) - break; + if (!Args.hasArg(options::OPT_fopenmp_new_driver)) + if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg)) + break; if (Current->getType() == types::TY_Nothing) break; @@ -3884,21 +3897,32 @@ Actions.push_back(Current); // Add any top level actions generated for offloading. - OffloadBuilder.appendTopLevelActions(Actions, Current, InputArg); + if (!Args.hasArg(options::OPT_fopenmp_new_driver)) + OffloadBuilder.appendTopLevelActions(Actions, Current, InputArg); + else if (Current) + Current->propagateHostOffloadInfo(OffloadKinds, + /*BoundArch=*/nullptr); } // Add a link action if necessary. if (!LinkerInputs.empty()) { - if (Action *Wrapper = OffloadBuilder.makeHostLinkAction()) - LinkerInputs.push_back(Wrapper); + if (!Args.hasArg(options::OPT_fopenmp_new_driver)) + if (Action *Wrapper = OffloadBuilder.makeHostLinkAction()) + LinkerInputs.push_back(Wrapper); Action *LA; // Check if this Linker Job should emit a static library. if (ShouldEmitStaticLibrary(Args)) { LA = C.MakeAction<StaticLibJobAction>(LinkerInputs, types::TY_Image); + } else if (Args.hasArg(options::OPT_fopenmp_new_driver) && + OffloadKinds != Action::OFK_None) { + LA = C.MakeAction<LinkerWrapperJobAction>(LinkerInputs, types::TY_Image); + LA->propagateHostOffloadInfo(OffloadKinds, + /*BoundArch=*/nullptr); } else { LA = C.MakeAction<LinkJobAction>(LinkerInputs, types::TY_Image); } - LA = OffloadBuilder.processHostLinkAction(LA); + if (!Args.hasArg(options::OPT_fopenmp_new_driver)) + LA = OffloadBuilder.processHostLinkAction(LA); Actions.push_back(LA); } @@ -3984,6 +4008,68 @@ Args.ClaimAllArgs(options::OPT_cuda_compile_host_device); } +Action *Driver::BuildOffloadingActions(Compilation &C, + llvm::opt::DerivedArgList &Args, + const InputTy &Input, + Action *HostAction) const { + if (!isa<CompileJobAction>(HostAction)) + return HostAction; + + SmallVector<const ToolChain *, 2> ToolChains; + ActionList DeviceActions; + + types::ID InputType = Input.first; + const Arg *InputArg = Input.second; + + auto OpenMPTCRange = C.getOffloadToolChains<Action::OFK_OpenMP>(); + for (auto TI = OpenMPTCRange.first, TE = OpenMPTCRange.second; TI != TE; ++TI) + ToolChains.push_back(TI->second); + + for (unsigned I = 0; I < ToolChains.size(); ++I) + DeviceActions.push_back(C.MakeAction<InputAction>(*InputArg, InputType)); + + if (DeviceActions.empty()) + return HostAction; + + auto PL = types::getCompilationPhases(*this, Args, InputType); + + for (phases::ID Phase : PL) { + if (Phase == phases::Link) { + assert(Phase == PL.back() && "linking must be final compilation step."); + break; + } + + auto TC = ToolChains.begin(); + for (Action *&A : DeviceActions) { + A = ConstructPhaseAction(C, Args, Phase, A); + + if (isa<CompileJobAction>(A)) { + HostAction->setCannotBeCollapsedWithNextDependentAction(); + OffloadAction::HostDependence HDep( + *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(), + /*BourdArch=*/nullptr, Action::OFK_OpenMP); + OffloadAction::DeviceDependences DDep; + DDep.add(*A, **TC, /*BoundArch=*/nullptr, Action::OFK_OpenMP); + A = C.MakeAction<OffloadAction>(HDep, DDep); + } + ++TC; + } + } + + OffloadAction::DeviceDependences DDeps; + + auto TC = ToolChains.begin(); + for (Action *A : DeviceActions) { + DDeps.add(*A, **TC, /*BoundArch=*/nullptr, Action::OFK_OpenMP); + TC++; + } + + OffloadAction::HostDependence HDep( + *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(), + /*BoundArch=*/nullptr, DDeps); + return C.MakeAction<OffloadAction>(HDep, DDeps); +} + Action *Driver::ConstructPhaseAction( Compilation &C, const ArgList &Args, phases::ID Phase, Action *Input, Action::OffloadKind TargetDeviceOffloadKind) const { @@ -4143,7 +4229,7 @@ ArchNames.insert(A->getValue()); // Set of (Action, canonical ToolChain triple) pairs we've built jobs for. - std::map<std::pair<const Action *, std::string>, InputInfo> CachedResults; + std::map<std::pair<const Action *, std::string>, InputInfoList> CachedResults; for (Action *A : C.getActions()) { // If we are linking an image for multiple archs then the linker wants // -arch_multiple and -final_output <final image name>. Unfortunately, this @@ -4600,10 +4686,11 @@ return TriplePlusArch; } -InputInfo Driver::BuildJobsForAction( +InputInfoList Driver::BuildJobsForAction( Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, - std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults, + std::map<std::pair<const Action *, std::string>, InputInfoList> + &CachedResults, Action::OffloadKind TargetDeviceOffloadKind) const { std::pair<const Action *, std::string> ActionTC = { A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; @@ -4611,17 +4698,18 @@ if (CachedResult != CachedResults.end()) { return CachedResult->second; } - InputInfo Result = BuildJobsForActionNoCache( + InputInfoList Result = BuildJobsForActionNoCache( C, A, TC, BoundArch, AtTopLevel, MultipleArchs, LinkingOutput, CachedResults, TargetDeviceOffloadKind); CachedResults[ActionTC] = Result; return Result; } -InputInfo Driver::BuildJobsForActionNoCache( +InputInfoList Driver::BuildJobsForActionNoCache( Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, - std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults, + std::map<std::pair<const Action *, std::string>, InputInfoList> + &CachedResults, Action::OffloadKind TargetDeviceOffloadKind) const { llvm::PrettyStackTraceString CrashInfo("Building compilation jobs"); @@ -4659,7 +4747,7 @@ // If there is a single device option, just generate the job for it. if (OA->hasSingleDeviceDependence()) { - InputInfo DevA; + InputInfoList DevA; OA->doOnEachDeviceDependence([&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) { DevA = @@ -4677,7 +4765,7 @@ OA->doOnEachDependence( /*IsHostDependence=*/BuildingForOffloadDevice, [&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) { - OffloadDependencesInputInfo.push_back(BuildJobsForAction( + OffloadDependencesInputInfo.append(BuildJobsForAction( C, DepA, DepTC, DepBoundArch, /*AtTopLevel=*/false, /*MultipleArchs*/ !!DepBoundArch, LinkingOutput, CachedResults, DepA->getOffloadingDeviceKind())); @@ -4686,6 +4774,17 @@ A = BuildingForOffloadDevice ? OA->getSingleDeviceDependence(/*DoNotConsiderHostActions=*/true) : OA->getHostDependence(); + + // We may have already built this action as a part of the offloading + // toolchain, return the cached input if so. + std::pair<const Action *, std::string> ActionTC = { + OA->getHostDependence(), + GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; + if (CachedResults.find(ActionTC) != CachedResults.end()) { + InputInfoList Inputs = CachedResults[ActionTC]; + Inputs.append(OffloadDependencesInputInfo); + return Inputs; + } } if (const InputAction *IA = dyn_cast<InputAction>(A)) { @@ -4695,9 +4794,9 @@ Input.claim(); if (Input.getOption().matches(options::OPT_INPUT)) { const char *Name = Input.getValue(); - return InputInfo(A, Name, /* _BaseInput = */ Name); + return {InputInfo(A, Name, /* _BaseInput = */ Name)}; } - return InputInfo(A, &Input, /* _BaseInput = */ ""); + return {InputInfo(A, &Input, /* _BaseInput = */ "")}; } if (const BindArchAction *BAA = dyn_cast<BindArchAction>(A)) { @@ -4727,7 +4826,7 @@ const Tool *T = TS.getTool(Inputs, CollapsedOffloadActions); if (!T) - return InputInfo(); + return {InputInfo()}; if (BuildingForOffloadDevice && A->getOffloadingDeviceKind() == Action::OFK_OpenMP) { @@ -4754,7 +4853,7 @@ cast<OffloadAction>(OA)->doOnEachDependence( /*IsHostDependence=*/BuildingForOffloadDevice, [&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) { - OffloadDependencesInputInfo.push_back(BuildJobsForAction( + OffloadDependencesInputInfo.append(BuildJobsForAction( C, DepA, DepTC, DepBoundArch, /* AtTopLevel */ false, /*MultipleArchs=*/!!DepBoundArch, LinkingOutput, CachedResults, DepA->getOffloadingDeviceKind())); @@ -4768,7 +4867,7 @@ // FIXME: Clean this up. bool SubJobAtTopLevel = AtTopLevel && (isa<DsymutilJobAction>(A) || isa<VerifyJobAction>(A)); - InputInfos.push_back(BuildJobsForAction( + InputInfos.append(BuildJobsForAction( C, Input, TC, BoundArch, SubJobAtTopLevel, MultipleArchs, LinkingOutput, CachedResults, A->getOffloadingDeviceKind())); } @@ -4852,8 +4951,8 @@ Arch = BoundArch; CachedResults[{A, GetTriplePlusArchString(UI.DependentToolChain, Arch, - UI.DependentOffloadKind)}] = - CurI; + UI.DependentOffloadKind)}] = { + CurI}; } // Now that we have all the results generated, select the one that should be @@ -4862,9 +4961,9 @@ A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; assert(CachedResults.find(ActionTC) != CachedResults.end() && "Result does not exist??"); - Result = CachedResults[ActionTC]; + Result = CachedResults[ActionTC].front(); } else if (JA->getType() == types::TY_Nothing) - Result = InputInfo(A, BaseInput); + Result = {InputInfo(A, BaseInput)}; else { // We only have to generate a prefix for the host if this is not a top-level // action. @@ -4917,7 +5016,7 @@ C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()), LinkingOutput); } - return Result; + return {Result}; } const char *Driver::getDefaultImageName() const { Index: clang/lib/Driver/Action.cpp =================================================================== --- clang/lib/Driver/Action.cpp +++ clang/lib/Driver/Action.cpp @@ -43,6 +43,8 @@ return "clang-offload-unbundler"; case OffloadWrapperJobClass: return "clang-offload-wrapper"; + case LinkerWrapperJobClass: + return "clang-linker-wrapper"; case StaticLibJobClass: return "static-lib-linker"; } @@ -418,6 +420,12 @@ types::ID Type) : JobAction(OffloadWrapperJobClass, Inputs, Type) {} +void LinkerWrapperJobAction::anchor() {} + +LinkerWrapperJobAction::LinkerWrapperJobAction(ActionList &Inputs, + types::ID Type) + : JobAction(LinkerWrapperJobClass, Inputs, Type) {} + void StaticLibJobAction::anchor() {} StaticLibJobAction::StaticLibJobAction(ActionList &Inputs, types::ID Type) Index: clang/lib/CodeGen/CodeGenAction.cpp =================================================================== --- clang/lib/CodeGen/CodeGenAction.cpp +++ clang/lib/CodeGen/CodeGenAction.cpp @@ -1134,6 +1134,7 @@ TheModule->setTargetTriple(TargetOpts.Triple); } + EmbedBinary(TheModule.get(), CodeGenOpts, Diagnostics); EmbedBitcode(TheModule.get(), CodeGenOpts, *MainFile); LLVMContext &Ctx = TheModule->getContext(); Index: clang/lib/CodeGen/BackendUtil.cpp =================================================================== --- clang/lib/CodeGen/BackendUtil.cpp +++ clang/lib/CodeGen/BackendUtil.cpp @@ -1738,8 +1738,43 @@ llvm::MemoryBufferRef Buf) { if (CGOpts.getEmbedBitcode() == CodeGenOptions::Embed_Off) return; + llvm::EmbedBitcodeInModule( *M, Buf, CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Marker, CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Bitcode, CGOpts.CmdArgs); } + +void clang::EmbedBinary(llvm::Module *M, const CodeGenOptions &CGOpts, + DiagnosticsEngine &Diags) { + if (CGOpts.OffloadBinaryString.empty()) + return; + + SmallVector<StringRef, 4> BinaryFilenames; + SmallVector<StringRef, 4> BinarySections; + StringRef(CGOpts.OffloadBinaryString).split(BinaryFilenames, ","); + StringRef(CGOpts.OffloadSectionString).split(BinarySections, ","); + + assert(BinaryFilenames.size() == BinarySections.size() && + "Different number of filenames and section names in embedding"); + + auto BinarySection = BinarySections.begin(); + for (StringRef BinaryFilename : BinaryFilenames) { + llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> BinaryOrErr = + llvm::MemoryBuffer::getFileOrSTDIN(BinaryFilename); + if (std::error_code EC = BinaryOrErr.getError()) { + auto DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "could not open '%0' for embedding"); + Diags.Report(DiagID) << BinaryFilename; + return; + } + + SmallString<128> SectionName(".llvm.offloading"); + if (!BinarySection->empty()) { + SectionName += "."; + SectionName += *BinarySection; + } + llvm::EmbedObjectInModule(*M, **BinaryOrErr, SectionName); + ++BinarySection; + } +} Index: clang/include/clang/Driver/ToolChain.h =================================================================== --- clang/include/clang/Driver/ToolChain.h +++ clang/include/clang/Driver/ToolChain.h @@ -151,6 +151,7 @@ mutable std::unique_ptr<Tool> IfsMerge; mutable std::unique_ptr<Tool> OffloadBundler; mutable std::unique_ptr<Tool> OffloadWrapper; + mutable std::unique_ptr<Tool> LinkerWrapper; Tool *getClang() const; Tool *getFlang() const; @@ -161,6 +162,7 @@ Tool *getClangAs() const; Tool *getOffloadBundler() const; Tool *getOffloadWrapper() const; + Tool *getLinkerWrapper() const; mutable bool SanitizerArgsChecked = false; mutable std::unique_ptr<XRayArgs> XRayArguments; Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -1148,6 +1148,14 @@ PosFlag<SetTrue, [CC1Option], "Enable support for the C++ Coroutines TS">, NegFlag<SetFalse>>; +def fembed_offload_binary_EQ : Joined<["-"], "fembed-offload-binary=">, + Group<f_Group>, Flags<[NoXarchOption, CC1Option]>, + HelpText<"Embed Offloading device-side binary into host object file.">, + MarshallingInfoString<CodeGenOpts<"OffloadBinaryString">>; +def fembed_offload_section_EQ : Joined<["-"], "fembed-offload-section=">, + Group<f_Group>, Flags<[NoXarchOption, CC1Option]>, + HelpText<"Section name to use for the embedded device binary.">, + MarshallingInfoString<CodeGenOpts<"OffloadSectionString">>; def fembed_bitcode_EQ : Joined<["-"], "fembed-bitcode=">, Group<f_Group>, Flags<[NoXarchOption, CC1Option, CC1AsOption]>, MetaVarName<"<option>">, HelpText<"Embed LLVM bitcode (option: off, all, bitcode, marker)">, @@ -2461,6 +2469,8 @@ PosFlag<SetTrue, [CC1Option]>, NegFlag<SetFalse>, BothFlags<[NoArgumentUnused, HelpHidden]>>; def static_openmp: Flag<["-"], "static-openmp">, HelpText<"Use the static host OpenMP runtime while linking.">; +def fopenmp_new_driver : Flag<["-"], "fopenmp-new-driver">, Flags<[CC1Option]>, Group<Action_Group>, + HelpText<"Use the new driver for OpenMP offloading.">; def fno_optimize_sibling_calls : Flag<["-"], "fno-optimize-sibling-calls">, Group<f_Group>; def foptimize_sibling_calls : Flag<["-"], "foptimize-sibling-calls">, Group<f_Group>; defm escaping_block_tail_calls : BoolFOption<"escaping-block-tail-calls", Index: clang/include/clang/Driver/Job.h =================================================================== --- clang/include/clang/Driver/Job.h +++ clang/include/clang/Driver/Job.h @@ -208,6 +208,8 @@ Arguments = std::move(List); } + void replaceExecutable(const char *Exe) { Executable = Exe; } + const char *getExecutable() const { return Executable; } const llvm::opt::ArgStringList &getArguments() const { return Arguments; } Index: clang/include/clang/Driver/Driver.h =================================================================== --- clang/include/clang/Driver/Driver.h +++ clang/include/clang/Driver/Driver.h @@ -12,6 +12,7 @@ #include "clang/Basic/Diagnostic.h" #include "clang/Basic/LLVM.h" #include "clang/Driver/Action.h" +#include "clang/Driver/InputInfo.h" #include "clang/Driver/Options.h" #include "clang/Driver/Phases.h" #include "clang/Driver/ToolChain.h" @@ -38,13 +39,14 @@ namespace driver { - class Command; - class Compilation; - class InputInfo; - class JobList; - class JobAction; - class SanitizerArgs; - class ToolChain; +typedef SmallVector<InputInfo, 4> InputInfoList; + +class Command; +class Compilation; +class JobList; +class JobAction; +class SanitizerArgs; +class ToolChain; /// Describes the kind of LTO mode selected via -f(no-)?lto(=.*)? options. enum LTOKind { @@ -171,9 +173,11 @@ /// The file to log CC_LOG_DIAGNOSTICS output to, if enabled. std::string CCLogDiagnosticsFilename; + /// An input type and its arguments. + using InputTy = std::pair<types::ID, const llvm::opt::Arg *>; + /// A list of inputs and their types for the given arguments. - typedef SmallVector<std::pair<types::ID, const llvm::opt::Arg *>, 16> - InputList; + using InputList = SmallVector<InputTy, 16>; /// Whether the driver should follow g++ like behavior. bool CCCIsCXX() const { return Mode == GXXMode; } @@ -413,6 +417,18 @@ void BuildUniversalActions(Compilation &C, const ToolChain &TC, const InputList &BAInputs) const; + /// BuildOffloadingActions - Construct the list of actions to perform for the + /// offloading toolchain that will be embedded in the host. + /// + /// \param C - The compilation that is being built. + /// \param Args - The input arguments. + /// \param Input - The input type and arguments + /// \param HostAction - The host action used in the offloading toolchain. + Action *BuildOffloadingActions(Compilation &C, + llvm::opt::DerivedArgList &Args, + const InputTy &Input, + Action *HostAction) const; + /// Check that the file referenced by Value exists. If it doesn't, /// issue a diagnostic and return false. /// If TypoCorrect is true and the file does not exist, see if it looks @@ -503,13 +519,12 @@ /// BuildJobsForAction - Construct the jobs to perform for the action \p A and /// return an InputInfo for the result of running \p A. Will only construct /// jobs for a given (Action, ToolChain, BoundArch, DeviceKind) tuple once. - InputInfo - BuildJobsForAction(Compilation &C, const Action *A, const ToolChain *TC, - StringRef BoundArch, bool AtTopLevel, bool MultipleArchs, - const char *LinkingOutput, - std::map<std::pair<const Action *, std::string>, InputInfo> - &CachedResults, - Action::OffloadKind TargetDeviceOffloadKind) const; + InputInfoList BuildJobsForAction( + Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, + bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, + std::map<std::pair<const Action *, std::string>, InputInfoList> + &CachedResults, + Action::OffloadKind TargetDeviceOffloadKind) const; /// Returns the default name for linked images (e.g., "a.out"). const char *getDefaultImageName() const; @@ -617,10 +632,10 @@ /// Helper used in BuildJobsForAction. Doesn't use the cache when building /// jobs specifically for the given action, but will use the cache when /// building jobs for the Action's inputs. - InputInfo BuildJobsForActionNoCache( + InputInfoList BuildJobsForActionNoCache( Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, - std::map<std::pair<const Action *, std::string>, InputInfo> + std::map<std::pair<const Action *, std::string>, InputInfoList> &CachedResults, Action::OffloadKind TargetDeviceOffloadKind) const; Index: clang/include/clang/Driver/Action.h =================================================================== --- clang/include/clang/Driver/Action.h +++ clang/include/clang/Driver/Action.h @@ -73,6 +73,7 @@ OffloadBundlingJobClass, OffloadUnbundlingJobClass, OffloadWrapperJobClass, + LinkerWrapperJobClass, StaticLibJobClass, JobClassFirst = PreprocessJobClass, @@ -642,6 +643,17 @@ } }; +class LinkerWrapperJobAction : public JobAction { + void anchor() override; + +public: + LinkerWrapperJobAction(ActionList &Inputs, types::ID Type); + + static bool classof(const Action *A) { + return A->getKind() == LinkerWrapperJobClass; + } +}; + class StaticLibJobAction : public JobAction { void anchor() override; Index: clang/include/clang/CodeGen/BackendUtil.h =================================================================== --- clang/include/clang/CodeGen/BackendUtil.h +++ clang/include/clang/CodeGen/BackendUtil.h @@ -44,6 +44,9 @@ void EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts, llvm::MemoryBufferRef Buf); + + void EmbedBinary(llvm::Module *M, const CodeGenOptions &CGOpts, + DiagnosticsEngine &Diags); } #endif Index: clang/include/clang/Basic/CodeGenOptions.h =================================================================== --- clang/include/clang/Basic/CodeGenOptions.h +++ clang/include/clang/Basic/CodeGenOptions.h @@ -276,6 +276,14 @@ /// CUDA runtime back-end for incorporating them into host-side object file. std::string CudaGpuBinaryFileName; + /// List of file passed with -fembed-offload-binary option to embed + /// device-side offloading binaries in the host object file. + std::string OffloadBinaryString; + + /// List of section names pass with -fembed-offload-binary to use when + /// embedding files passed with -fembed-offload-binary. + std::string OffloadSectionString; + /// The name of the file to which the backend should save YAML optimization /// records. std::string OptRecordFile;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits