llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-mlir-llvm @llvm/pr-subscribers-mlir-gpu Author: Mehdi Amini (joker-eph) <details> <summary>Changes</summary> Some specific implementation of the offload may want more customization, and even avoid using LLVM in-tree to dispatch the ISA translation to a custom solution. This refactoring makes it possible for such implementation to work without even configuring the target backend in LLVM. --- Patch is 20.24 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/71165.diff 9 Files Affected: - (modified) mlir/include/mlir/Target/LLVM/ModuleToObject.h (+20-17) - (modified) mlir/include/mlir/Target/LLVM/NVVM/Utils.h (+1-2) - (modified) mlir/include/mlir/Target/LLVM/ROCDL/Utils.h (+3-6) - (modified) mlir/lib/Conversion/GPUCommon/CMakeLists.txt (+1-1) - (modified) mlir/lib/Dialect/GPU/CMakeLists.txt (+1-1) - (modified) mlir/lib/Target/LLVM/CMakeLists.txt (+1-1) - (modified) mlir/lib/Target/LLVM/ModuleToObject.cpp (+43-36) - (modified) mlir/lib/Target/LLVM/NVVM/Target.cpp (+15-11) - (modified) mlir/lib/Target/LLVM/ROCDL/Target.cpp (+21-15) ``````````diff diff --git a/mlir/include/mlir/Target/LLVM/ModuleToObject.h b/mlir/include/mlir/Target/LLVM/ModuleToObject.h index d17afc1077fb45d..e40d7e9a43dd6b5 100644 --- a/mlir/include/mlir/Target/LLVM/ModuleToObject.h +++ b/mlir/include/mlir/Target/LLVM/ModuleToObject.h @@ -31,7 +31,7 @@ class ModuleToObject { public: ModuleToObject(Operation &module, StringRef triple, StringRef chip, StringRef features = {}, int optLevel = 3); - virtual ~ModuleToObject() = default; + virtual ~ModuleToObject(); /// Returns the operation being serialized. Operation &getOperation(); @@ -42,44 +42,43 @@ class ModuleToObject { protected: // Hooks to be implemented by derived classes. + /// Hook for computing the Datalayout + virtual void setDataLayoutAndTriple(llvm::Module &module); + /// Hook for loading bitcode files, returns std::nullopt on failure. virtual std::optional<SmallVector<std::unique_ptr<llvm::Module>>> - loadBitcodeFiles(llvm::Module &module, llvm::TargetMachine &targetMachine) { + loadBitcodeFiles(llvm::Module &module) { return SmallVector<std::unique_ptr<llvm::Module>>(); } /// Hook for performing additional actions on a loaded bitcode file. - virtual LogicalResult handleBitcodeFile(llvm::Module &module, - llvm::TargetMachine &targetMachine) { + virtual LogicalResult handleBitcodeFile(llvm::Module &module) { return success(); } /// Hook for performing additional actions on the llvmModule pre linking. - virtual void handleModulePreLink(llvm::Module &module, - llvm::TargetMachine &targetMachine) {} + virtual void handleModulePreLink(llvm::Module &module) {} /// Hook for performing additional actions on the llvmModule post linking. - virtual void handleModulePostLink(llvm::Module &module, - llvm::TargetMachine &targetMachine) {} + virtual void handleModulePostLink(llvm::Module &module) {} /// Serializes the LLVM IR bitcode to an object file, by default it serializes /// to LLVM bitcode. virtual std::optional<SmallVector<char, 0>> - moduleToObject(llvm::Module &llvmModule, llvm::TargetMachine &targetMachine); + moduleToObject(llvm::Module &llvmModule); protected: /// Create the target machine based on the target triple and chip. - std::unique_ptr<llvm::TargetMachine> createTargetMachine(); + /// This can fail if the target is not available. + std::optional<llvm::TargetMachine *> getOrCreateTargetMachine(); /// Loads a bitcode file from path. - std::unique_ptr<llvm::Module> - loadBitcodeFile(llvm::LLVMContext &context, - llvm::TargetMachine &targetMachine, StringRef path); + std::unique_ptr<llvm::Module> loadBitcodeFile(llvm::LLVMContext &context, + StringRef path); /// Loads multiple bitcode files. LogicalResult loadBitcodeFilesFromList( - llvm::LLVMContext &context, llvm::TargetMachine &targetMachine, - ArrayRef<std::string> fileList, + llvm::LLVMContext &context, ArrayRef<std::string> fileList, SmallVector<std::unique_ptr<llvm::Module>> &llvmModules, bool failureOnError = true); @@ -92,8 +91,7 @@ class ModuleToObject { SmallVector<std::unique_ptr<llvm::Module>> &&libs); /// Optimize the module. - LogicalResult optimizeModule(llvm::Module &module, - llvm::TargetMachine &targetMachine, int optL); + virtual LogicalResult optimizeModule(llvm::Module &module, int optL); /// Utility function for translating to ISA, returns `std::nullopt` on /// failure. @@ -115,6 +113,11 @@ class ModuleToObject { /// Optimization level. int optLevel; + +private: + /// The TargetMachine created for the given Triple, if available. + /// Accessible through `getOrCreateTargetMachine()`. + std::unique_ptr<llvm::TargetMachine> targetMachine; }; } // namespace LLVM } // namespace mlir diff --git a/mlir/include/mlir/Target/LLVM/NVVM/Utils.h b/mlir/include/mlir/Target/LLVM/NVVM/Utils.h index d5926d15484722c..65ae8a6bdb4ada2 100644 --- a/mlir/include/mlir/Target/LLVM/NVVM/Utils.h +++ b/mlir/include/mlir/Target/LLVM/NVVM/Utils.h @@ -55,8 +55,7 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject { /// Loads the bitcode files in `fileList`. virtual std::optional<SmallVector<std::unique_ptr<llvm::Module>>> - loadBitcodeFiles(llvm::Module &module, - llvm::TargetMachine &targetMachine) override; + loadBitcodeFiles(llvm::Module &module) override; protected: /// NVVM target attribute. diff --git a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h index c14fa80056a879e..374fa65bd02e3b8 100644 --- a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h +++ b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h @@ -54,16 +54,13 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject { /// Loads the bitcode files in `fileList`. virtual std::optional<SmallVector<std::unique_ptr<llvm::Module>>> - loadBitcodeFiles(llvm::Module &module, - llvm::TargetMachine &targetMachine) override; + loadBitcodeFiles(llvm::Module &module) override; /// Adds `oclc` control variables to the LLVM module. - void handleModulePreLink(llvm::Module &module, - llvm::TargetMachine &targetMachine) override; + void handleModulePreLink(llvm::Module &module) override; /// Removes unnecessary metadata from the loaded bitcode files. - LogicalResult handleBitcodeFile(llvm::Module &module, - llvm::TargetMachine &targetMachine) override; + LogicalResult handleBitcodeFile(llvm::Module &module) override; protected: /// Appends the paths of common ROCm device libraries to `libs`. diff --git a/mlir/lib/Conversion/GPUCommon/CMakeLists.txt b/mlir/lib/Conversion/GPUCommon/CMakeLists.txt index 255b9efd32f86e5..b15876ab91c13f2 100644 --- a/mlir/lib/Conversion/GPUCommon/CMakeLists.txt +++ b/mlir/lib/Conversion/GPUCommon/CMakeLists.txt @@ -1,4 +1,4 @@ -if (MLIR_ENABLE_CUDA_CONVERSIONS) +if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) set(NVPTX_LIBS NVPTXCodeGen NVPTXDesc diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt index 324d5c136672270..1601413c49f1fc2 100644 --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -1,4 +1,4 @@ -if (MLIR_ENABLE_CUDA_CONVERSIONS) +if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) set(NVPTX_LIBS NVPTXCodeGen NVPTXDesc diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt index ce07c259df83351..cc2c3a00a02eaff 100644 --- a/mlir/lib/Target/LLVM/CMakeLists.txt +++ b/mlir/lib/Target/LLVM/CMakeLists.txt @@ -21,7 +21,7 @@ add_mlir_library(MLIRTargetLLVM MLIRTargetLLVMIRExport ) -if (MLIR_ENABLE_CUDA_CONVERSIONS) +if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) set(NVPTX_LIBS NVPTXCodeGen NVPTXDesc diff --git a/mlir/lib/Target/LLVM/ModuleToObject.cpp b/mlir/lib/Target/LLVM/ModuleToObject.cpp index e68ae8311ecfb95..6af3d49ab23bf74 100644 --- a/mlir/lib/Target/LLVM/ModuleToObject.cpp +++ b/mlir/lib/Target/LLVM/ModuleToObject.cpp @@ -39,32 +39,32 @@ ModuleToObject::ModuleToObject(Operation &module, StringRef triple, : module(module), triple(triple), chip(chip), features(features), optLevel(optLevel) {} +ModuleToObject::~ModuleToObject() = default; + Operation &ModuleToObject::getOperation() { return module; } -std::unique_ptr<llvm::TargetMachine> ModuleToObject::createTargetMachine() { +std::optional<llvm::TargetMachine *> +ModuleToObject::getOrCreateTargetMachine() { std::string error; // Load the target. const llvm::Target *target = llvm::TargetRegistry::lookupTarget(triple, error); if (!target) { - getOperation().emitError() << "Failed to lookup target: " << error; - return {}; + getOperation().emitError() + << "Failed to lookup target for triple '" << triple << "' " << error; + return std::nullopt; } // Create the target machine using the target. - llvm::TargetMachine *machine = - target->createTargetMachine(triple, chip, features, {}, {}); - if (!machine) { - getOperation().emitError() << "Failed to create the target machine."; - return {}; - } - return std::unique_ptr<llvm::TargetMachine>{machine}; + targetMachine.reset( + target->createTargetMachine(triple, chip, features, {}, {})); + if (!targetMachine) + return std::nullopt; + return targetMachine.get(); } std::unique_ptr<llvm::Module> -ModuleToObject::loadBitcodeFile(llvm::LLVMContext &context, - llvm::TargetMachine &targetMachine, - StringRef path) { +ModuleToObject::loadBitcodeFile(llvm::LLVMContext &context, StringRef path) { llvm::SMDiagnostic error; std::unique_ptr<llvm::Module> library = llvm::getLazyIRFileModule(path, error, context); @@ -73,15 +73,14 @@ ModuleToObject::loadBitcodeFile(llvm::LLVMContext &context, << ", error: " << error.getMessage(); return nullptr; } - if (failed(handleBitcodeFile(*library, targetMachine))) { + if (failed(handleBitcodeFile(*library))) { return nullptr; } return library; } LogicalResult ModuleToObject::loadBitcodeFilesFromList( - llvm::LLVMContext &context, llvm::TargetMachine &targetMachine, - ArrayRef<std::string> fileList, + llvm::LLVMContext &context, ArrayRef<std::string> fileList, SmallVector<std::unique_ptr<llvm::Module>> &llvmModules, bool failureOnError) { for (const std::string &str : fileList) { @@ -93,7 +92,7 @@ LogicalResult ModuleToObject::loadBitcodeFilesFromList( return failure(); } // Load the file or abort on error. - if (auto bcFile = loadBitcodeFile(context, targetMachine, pathRef)) + if (auto bcFile = loadBitcodeFile(context, pathRef)) llvmModules.push_back(std::move(bcFile)); else if (failureOnError) return failure(); @@ -137,16 +136,22 @@ ModuleToObject::linkFiles(llvm::Module &module, } LogicalResult ModuleToObject::optimizeModule(llvm::Module &module, - llvm::TargetMachine &targetMachine, + int optLevel) { if (optLevel < 0 || optLevel > 3) return getOperation().emitError() << "Invalid optimization level: " << optLevel << "."; - targetMachine.setOptLevel(static_cast<llvm::CodeGenOptLevel>(optLevel)); + std::optional<llvm::TargetMachine *> targetMachine = + getOrCreateTargetMachine(); + if (!targetMachine) + return getOperation().emitError() + << "Target Machine unavailable for triple " << triple + << ", can't optimize with LLVM\n"; + (*targetMachine)->setOptLevel(static_cast<llvm::CodeGenOptLevel>(optLevel)); auto transformer = - makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, &targetMachine); + makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, *targetMachine); auto error = transformer(&module); if (error) { InFlightDiagnostic mlirError = getOperation().emitError(); @@ -178,9 +183,19 @@ ModuleToObject::translateToISA(llvm::Module &llvmModule, return stream.str(); } +void ModuleToObject::setDataLayoutAndTriple(llvm::Module &module) { + // Create the target machine. + std::optional<llvm::TargetMachine *> targetMachine = + getOrCreateTargetMachine(); + if (targetMachine) { + // Set the data layout and target triple of the module. + module.setDataLayout((*targetMachine)->createDataLayout()); + module.setTargetTriple((*targetMachine)->getTargetTriple().getTriple()); + } +} + std::optional<SmallVector<char, 0>> -ModuleToObject::moduleToObject(llvm::Module &llvmModule, - llvm::TargetMachine &targetMachine) { +ModuleToObject::moduleToObject(llvm::Module &llvmModule) { SmallVector<char, 0> binaryData; // Write the LLVM module bitcode to a buffer. llvm::raw_svector_ostream outputStream(binaryData); @@ -196,32 +211,24 @@ std::optional<SmallVector<char, 0>> ModuleToObject::run() { getOperation().emitError() << "Failed creating the llvm::Module."; return std::nullopt; } - - // Create the target machine. - std::unique_ptr<llvm::TargetMachine> targetMachine = createTargetMachine(); - if (!targetMachine) - return std::nullopt; - - // Set the data layout and target triple of the module. - llvmModule->setDataLayout(targetMachine->createDataLayout()); - llvmModule->setTargetTriple(targetMachine->getTargetTriple().getTriple()); + setDataLayoutAndTriple(*llvmModule); // Link bitcode files. - handleModulePreLink(*llvmModule, *targetMachine); + handleModulePreLink(*llvmModule); { - auto libs = loadBitcodeFiles(*llvmModule, *targetMachine); + auto libs = loadBitcodeFiles(*llvmModule); if (!libs) return std::nullopt; if (!libs->empty()) if (failed(linkFiles(*llvmModule, std::move(*libs)))) return std::nullopt; - handleModulePostLink(*llvmModule, *targetMachine); + handleModulePostLink(*llvmModule); } // Optimize the module. - if (failed(optimizeModule(*llvmModule, *targetMachine, optLevel))) + if (failed(optimizeModule(*llvmModule, optLevel))) return std::nullopt; // Return the serialized object. - return moduleToObject(*llvmModule, *targetMachine); + return moduleToObject(*llvmModule); } diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp index 7f263627db54fbe..eaf94147e2a6f1f 100644 --- a/mlir/lib/Target/LLVM/NVVM/Target.cpp +++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp @@ -106,7 +106,7 @@ void SerializeGPUModuleBase::init() { static llvm::once_flag initializeBackendOnce; llvm::call_once(initializeBackendOnce, []() { // If the `NVPTX` LLVM target was built, initialize it. -#if MLIR_CUDA_CONVERSIONS_ENABLED == 1 +#if LLVM_HAS_NVPTX_TARGET LLVMInitializeNVPTXTarget(); LLVMInitializeNVPTXTargetInfo(); LLVMInitializeNVPTXTargetMC(); @@ -148,11 +148,10 @@ LogicalResult SerializeGPUModuleBase::appendStandardLibs() { } std::optional<SmallVector<std::unique_ptr<llvm::Module>>> -SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module, - llvm::TargetMachine &targetMachine) { +SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) { SmallVector<std::unique_ptr<llvm::Module>> bcFiles; - if (failed(loadBitcodeFilesFromList(module.getContext(), targetMachine, - fileList, bcFiles, true))) + if (failed(loadBitcodeFilesFromList(module.getContext(), fileList, bcFiles, + true))) return std::nullopt; return std::move(bcFiles); } @@ -175,8 +174,7 @@ class NVPTXSerializer : public SerializeGPUModuleBase { compileToBinaryNVPTX(const std::string &ptxCode); std::optional<SmallVector<char, 0>> - moduleToObject(llvm::Module &llvmModule, - llvm::TargetMachine &targetMachine) override; + moduleToObject(llvm::Module &llvmModule) override; private: using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>; @@ -514,8 +512,7 @@ NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) { #endif // MLIR_NVPTXCOMPILER_ENABLED == 1 std::optional<SmallVector<char, 0>> -NVPTXSerializer::moduleToObject(llvm::Module &llvmModule, - llvm::TargetMachine &targetMachine) { +NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) { // Return LLVM IR if the compilation target is offload. #define DEBUG_TYPE "serialize-to-llvm" LLVM_DEBUG({ @@ -526,11 +523,18 @@ NVPTXSerializer::moduleToObject(llvm::Module &llvmModule, }); #undef DEBUG_TYPE if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload) - return SerializeGPUModuleBase::moduleToObject(llvmModule, targetMachine); + return SerializeGPUModuleBase::moduleToObject(llvmModule); // Emit PTX code. + std::optional<llvm::TargetMachine *> targetMachine = + getOrCreateTargetMachine(); + if (!targetMachine) { + getOperation().emitError() << "Target Machine unavailable for triple " + << triple << ", can't optimize with LLVM\n"; + return std::nullopt; + } std::optional<std::string> serializedISA = - translateToISA(llvmModule, targetMachine); + translateToISA(llvmModule, **targetMachine); if (!serializedISA) { getOperation().emitError() << "Failed translating the module to ISA."; return std::nullopt; diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp index 23e9a4a52b43530..709275c7ddef20f 100644 --- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp +++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp @@ -44,6 +44,7 @@ #include "llvm/TargetParser/TargetParser.h" #include <cstdlib> +#include <optional> using namespace mlir; using namespace mlir::ROCDL; @@ -158,18 +159,15 @@ LogicalResult SerializeGPUModuleBase::appendStandardLibs() { } std::optional<SmallVector<std::unique_ptr<llvm::Module>>> -SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module, - llvm::TargetMachine &targetMachine) { +SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) { SmallVector<std::unique_ptr<llvm::Module>> bcFiles; - if (failed(loadBitcodeFilesFromList(module.getContext(), targetMachine, - fileList, bcFiles, true))) + if (failed(loadBitcodeFilesFromList(module.getContext(), fileList, bcFiles, + true))) return std::nullopt; return std::move(bcFiles); } -LogicalResult -SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module, - llvm::TargetMachine &targetMachine) { +LogicalResult SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module) { // Some ROCM builds don't strip this like they should if (auto *openclVersion = module.getNamedMetadata("opencl.ocl.version")) module.eraseNamedMetadata(openclVersion); @@ -179,8 +177,10 @@ SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module, return success(); } -void SerializeGPUModuleBase::handleModulePreLink( - llvm::Module &module, llvm::TargetMachine &targetMachine) { +void SerializeGPUModuleBase::handleModulePreLink(llvm::Module &module) { + std::optional<llvm::TargetMachine *> targetMachine = + getOrCreateTargetMachine(); + assert(targetMachine && "expect a TargetMachine"); addControlVariables(module, target.hasWave64(), target.hasDaz(), target.hasFiniteOnly(), target.hasUnsafeMath(), target.hasFastMath(), target.hasCorrectSqrt(), @@ -332,8 +332,7 @@ class AMDGPUSerializer : public SerializeGPUModuleBase { compileToBinary(const std::string &serializedISA); std::optional<SmallVector<char, 0>> - moduleToObject(llvm::Module &llvmModule, - llvm::TargetMachine &targetMachine) override; + moduleToObject(llvm::Module &llvmModule) override; private: // Target options. @@ -411,8 +410,7 @@ AMDGPUSerializer::compileToBinary(const std::string &serializedISA) { } std::optional<SmallVector<char, 0>> -AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule, - llvm::TargetMachine &targetMachine) { +AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { // Return LLVM IR if the compilation target is offload. #define DEBUG_TYPE "serialize-to-llvm" LLVM_DEBUG({ @@ -422,11 +420,19 @@ AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule, }); #undef DEBUG_TYPE if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload) - return SerializeGPUModuleBase::moduleToObject(llvmModule, targetMachine); + return SerializeGPUModuleBase::moduleToObject(llvmModule); + + std::optional<llvm::TargetMachine *> targetMachine = + getOrCreateTargetMachine(); + if (!targetMachine) { + getOperation().emitError() << "Target Machine unavailable for triple " + << triple << ", can't compile with LLVM\n"; + return std::nullopt; + } // Translate the Module to ISA. std::optional<std::string> se... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/71165 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits