saiislam created this revision. saiislam added reviewers: jdoerfert, JonChesterfield, jhuber6, yaxunl. Herald added a subscriber: guansong. Herald added a project: All. saiislam requested review of this revision. Herald added subscribers: openmp-commits, cfe-commits, sstefan1. Herald added projects: clang, OpenMP.
This patch adds "__tgt_image_info" field for each of the images embedded in a multi-arch image. Required changes in libomptarget are also shown. The information in "__tgt_image_info" struct is provided in the clang-linker-wrapper as a call to __tgt_register_image_info for each image in the library of images also created by the clang-linker-wrapper. __tgt_register_image_info is called for each image BEFORE the single call to __tgt_register_lib so that image information is available before they are loaded. clang-linker-wrapper gets this image information from command line arguments provided by the clang driver when it creates the call to the __clang-linker-wrapper command. This architecture allows the binary image (pointed to by ImageStart and ImageEnd in __tgt_device_image) to remain architecture indenendent. That is, the architecture independent part of the libomptarget runtime does not need to peer inside the image to determine if it is loadable even though in most cases the image is an elf object. There is one __tgt_image_info for each __tgt_device_image. For backward compabibility, no changes are allowed to either __tgt_device_image or __tgt_bin_desc. The absense of __tgt_image_info is the indication that the runtime is being used on a binary created by an old version of the compiler. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D124525 Files: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp clang/tools/clang-linker-wrapper/OffloadWrapper.cpp clang/tools/clang-linker-wrapper/OffloadWrapper.h openmp/libomptarget/include/omptarget.h openmp/libomptarget/src/exports openmp/libomptarget/src/interface.cpp openmp/libomptarget/src/rtl.cpp
Index: openmp/libomptarget/src/rtl.cpp =================================================================== --- openmp/libomptarget/src/rtl.cpp +++ openmp/libomptarget/src/rtl.cpp @@ -13,6 +13,7 @@ #include "rtl.h" #include "device.h" #include "private.h" +#include "llvm/OffloadArch/OffloadArch.h" #include <cassert> #include <cstdlib> @@ -20,6 +21,8 @@ #include <dlfcn.h> #include <mutex> #include <string> +// It's strange we do not have llvm tools for openmp runtime, so we use stat +#include <sys/stat.h> // List of all plugins that can support offloading. static const char *RTLNames[] = { @@ -351,18 +354,109 @@ initRTLonce(R); } +/// Query runtime capabilities of this system by calling offload-arch -c +/// offload_arch_output_buffer is persistant storage returned by this +/// __tgt_get_active_offload_env. +static void +__tgt_get_active_offload_env(__tgt_active_offload_env *active_env, + char *offload_arch_output_buffer, + size_t offload_arch_output_buffer_size) { + + // If OFFLOAD_ARCH_OVERRIDE env varible is present then use its value instead of + // querying it using LLVMOffloadArch library. + if (char *OffloadArchEnvVar = getenv("OFFLOAD_ARCH_OVERRIDE")) { + if (OffloadArchEnvVar) { + active_env->capabilities = OffloadArchEnvVar; + return; + } + } + // Qget runtime capabilities of this system with libLLVMOffloadArch.a + if (int rc = getRuntimeCapabilities(offload_arch_output_buffer, + offload_arch_output_buffer_size)) + return; + active_env->capabilities = offload_arch_output_buffer; + return; +} + +std::vector<std::string> _splitstrings(char *input, const char *sep) { + std::vector<std::string> split_strings; + std::string s(input); + std::string delimiter(sep); + size_t pos = 0; + while ((pos = s.find(delimiter)) != std::string::npos) { + if (pos != 0) + split_strings.push_back(s.substr(0, pos)); + s.erase(0, pos + delimiter.length()); + } + if (s.length() > 1) + split_strings.push_back(s.substr(0, s.length())); + return split_strings; +} + +static bool _ImageIsCompatibleWithEnv(__tgt_image_info *img_info, + __tgt_active_offload_env *active_env) { + // get_image_info will return null if no image information was registered. + // If no image information, assume application built with old compiler and + // check each image. + if (!img_info) + return true; + + if (!active_env->capabilities) + return false; + + // Each runtime requirement for the compiled image is stored in + // the img_info->offload_arch (TargetID) string. + // Each runtime capability obtained from "offload-arch -c" is stored in + // actvie_env->capabilities (TargetID) string. + // If every requirement has a matching capability, then the image + // is compatible with active environment + + std::vector<std::string> reqs = _splitstrings(img_info->offload_arch, ":"); + std::vector<std::string> caps = _splitstrings(active_env->capabilities, ":"); + + bool is_compatible = true; + for (auto req : reqs) { + bool missing_capability = true; + for (auto capability : caps) + if (capability == req) + missing_capability = false; + if (missing_capability) { + DP("Image requires %s but runtime capability %s is missing.\n", + img_info->offload_arch, req.c_str()); + is_compatible = false; + } + } + return is_compatible; +} + +#define MAX_CAPS_STR_SIZE 1024 void RTLsTy::RegisterLib(__tgt_bin_desc *desc) { + + // Get the current active offload environment + __tgt_active_offload_env offload_env = { nullptr }; + // Need a buffer to hold results of offload-arch -c command + size_t offload_arch_output_buffer_size = MAX_CAPS_STR_SIZE; + std::vector<char> offload_arch_output_buffer; + offload_arch_output_buffer.resize(offload_arch_output_buffer_size); + __tgt_get_active_offload_env(&offload_env, offload_arch_output_buffer.data(), + offload_arch_output_buffer_size); + + RTLInfoTy *FoundRTL = NULL; PM->RTLsMtx.lock(); // Register the images with the RTLs that understand them, if any. for (int32_t i = 0; i < desc->NumDeviceImages; ++i) { // Obtain the image. __tgt_device_image *img = &desc->DeviceImages[i]; - RTLInfoTy *FoundRTL = nullptr; - + // Get corresponding image info offload_arch and check with runtime + __tgt_image_info *img_info = __tgt_get_image_info(i); + if (!_ImageIsCompatibleWithEnv(img_info, &offload_env)) + continue; + FoundRTL = NULL; // Scan the RTLs that have associated images until we find one that supports // the current image. for (auto &R : AllRTLs) { + if (!R.is_valid_binary(img)) { DP("Image " DPxMOD " is NOT compatible with RTL %s!\n", DPxPTR(img->ImageStart), R.RTLName.c_str()); @@ -407,6 +501,39 @@ } PM->RTLsMtx.unlock(); + if (!FoundRTL) { + if (PM->TargetOffloadPolicy == tgt_mandatory) + fprintf(stderr, "ERROR:\ + Runtime capabilities do NOT meet any offload image offload_arch\n\ + and the OMP_TARGET_OFFLOAD policy is mandatory. Terminating!\n\ + Runtime capabilities : %s\n", + offload_env.capabilities); + else if (PM->TargetOffloadPolicy == tgt_disabled) + fprintf(stderr, "WARNING: Offloading is disabled.\n"); + else + fprintf( + stderr, + "WARNING: Runtime capabilities do NOT meet any image offload_arch.\n\ + So device offloading is now disabled.\n\ + Runtime capabilities : %s\n", + offload_env.capabilities); + if (PM->TargetOffloadPolicy != tgt_disabled) { + for (int32_t i = 0; i < desc->NumDeviceImages; ++i) { + __tgt_image_info *img_info = __tgt_get_image_info(i); + if (img_info) + fprintf(stderr, "\ + Image %d offload_arch : %s\n", + i, img_info->offload_arch); + else + fprintf(stderr, "\ + Image %d has no offload_arch. Could be from older compiler\n", + i); + } + } + if (PM->TargetOffloadPolicy == tgt_mandatory) + exit(1); + } + DP("Done registering entries!\n"); } Index: openmp/libomptarget/src/interface.cpp =================================================================== --- openmp/libomptarget/src/interface.cpp +++ openmp/libomptarget/src/interface.cpp @@ -43,6 +43,29 @@ PM->RTLs.RegisterLib(desc); } +static __tgt_image_info **__tgt_AllImageInfos; +static int __tgt_num_registered_images = 0; +EXTERN void __tgt_register_image_info(__tgt_image_info *imageInfo) { + + DP(" register_image_info image %d of %d offload-arch:%s VERSION:%d\n", + imageInfo->image_number, imageInfo->number_images, imageInfo->offload_arch, + imageInfo->version); + if (!__tgt_AllImageInfos) + __tgt_AllImageInfos = (__tgt_image_info **)malloc( + sizeof(__tgt_image_info *) * imageInfo->number_images); + __tgt_AllImageInfos[imageInfo->image_number] = imageInfo; + __tgt_num_registered_images = imageInfo->number_images; +} + +//////////////////////////////////////////////////////////////////////////////// +/// Return pointer to image information if it was registered +EXTERN __tgt_image_info *__tgt_get_image_info(unsigned image_number) { + if (__tgt_num_registered_images) + return __tgt_AllImageInfos[image_number]; + else + return nullptr; +} + //////////////////////////////////////////////////////////////////////////////// /// Initialize all available devices without registering any image EXTERN void __tgt_init_all_rtls() { PM->RTLs.initAllRTLs(); } @@ -59,6 +82,10 @@ } } } + if (__tgt_num_registered_images) { + free(__tgt_AllImageInfos); + __tgt_num_registered_images = 0; + } } /// creates host-to-target data mapping, stores it in the Index: openmp/libomptarget/src/exports =================================================================== --- openmp/libomptarget/src/exports +++ openmp/libomptarget/src/exports @@ -2,6 +2,7 @@ global: __tgt_register_requires; __tgt_register_lib; + __tgt_register_image_info; __tgt_unregister_lib; __tgt_init_all_rtls; __tgt_target_data_begin; Index: openmp/libomptarget/include/omptarget.h =================================================================== --- openmp/libomptarget/include/omptarget.h +++ openmp/libomptarget/include/omptarget.h @@ -136,6 +136,44 @@ __tgt_offload_entry *HostEntriesEnd; // End of table (non inclusive) }; +/// __tgt_image_info: +/// +/// The information in this struct is provided in the clang-linker-wrapper +/// as a call to __tgt_register_image_info for each image in the library +/// of images also created by the clang-linker-wrapper. +/// __tgt_register_image_info is called for each image BEFORE the single +/// call to __tgt_register_lib so that image information is available +/// before they are loaded. clang-linker-wrapper gets this image information +/// from command line arguments provided by the clang driver when it creates +/// the call to the __clang-linker-wrapper command. +/// This architecture allows the binary image (pointed to by ImageStart and +/// ImageEnd in __tgt_device_image) to remain architecture indenendent. +/// That is, the architecture independent part of the libomptarget runtime +/// does not need to peer inside the image to determine if it is loadable +/// even though in most cases the image is an elf object. +/// There is one __tgt_image_info for each __tgt_device_image. For backward +/// compabibility, no changes are allowed to either __tgt_device_image or +/// __tgt_bin_desc. The absense of __tgt_image_info is the indication that +/// the runtime is being used on a binary created by an old version of +/// the compiler. +/// +struct __tgt_image_info { + int32_t version; // The version of this struct + int32_t image_number; // Image number in image library starting from 0 + int32_t number_images; // Number of images, used for initial allocation + char *offload_arch; // e.g. sm_30, sm_70, gfx906, includes features + char *compile_opts; // reserved for future use +}; + +/// __tgt_active_offload_env +/// +/// This structure is created by __tgt_get_active_offload_env and is used +/// to determine compatibility of the images with the current environment +/// that is "in play". +struct __tgt_active_offload_env { +char *capabilities; // string returned by offload-arch -c +}; + /// This struct contains the offload entries identified by the target runtime struct __tgt_target_table { __tgt_offload_entry *EntriesBegin; // Begin of the table with all the entries @@ -236,6 +274,12 @@ /// Initialize all RTLs at once void __tgt_init_all_rtls(); +/// adds an image information struct, called for each image +void __tgt_register_image_info(__tgt_image_info *imageInfo); + +/// gets pointer to image information for specified image number +/// Returns nullptr for apps built with old version of compiler +__tgt_image_info *__tgt_get_image_info(uint32_t image_num); /// removes a target shared library from the target execution image void __tgt_unregister_lib(__tgt_bin_desc *desc); Index: clang/tools/clang-linker-wrapper/OffloadWrapper.h =================================================================== --- clang/tools/clang-linker-wrapper/OffloadWrapper.h +++ clang/tools/clang-linker-wrapper/OffloadWrapper.h @@ -15,6 +15,7 @@ /// Wrap the input device images into the module \p M as global symbols and /// registers the images with the OpenMP Offloading runtime libomptarget. llvm::Error wrapBinaries(llvm::Module &M, - llvm::ArrayRef<llvm::ArrayRef<char>> Images); + llvm::ArrayRef<llvm::ArrayRef<char>> Images, + llvm::ArrayRef<llvm::ArrayRef<char>> OffloadArchs); #endif Index: clang/tools/clang-linker-wrapper/OffloadWrapper.cpp =================================================================== --- clang/tools/clang-linker-wrapper/OffloadWrapper.cpp +++ clang/tools/clang-linker-wrapper/OffloadWrapper.cpp @@ -93,6 +93,29 @@ return PointerType::getUnqual(getBinDescTy(M)); } + // This matches the runtime struct definition of __tgt_image_info + // declared in openmp/libomptarget/include/omptarget.h / + // struct __tgt_image_info { + // int32_t version; + // int32_t image_number; + // int32_t number_images; + // char* offload_arch; + // char* target_compile_opts; + // }; + StructType *getImageInfoTy(Module &M) { + LLVMContext &C = M.getContext(); + StructType *ImageInfoTy = StructType::getTypeByName(C, "__tgt_image_info"); + if (!ImageInfoTy) + ImageInfoTy = StructType::create( + "__tgt_image_info", Type::getInt32Ty(C), Type::getInt32Ty(C), + Type::getInt32Ty(C), Type::getInt8PtrTy(C), Type::getInt8PtrTy(C)); + return ImageInfoTy; + } + + PointerType *getImageInfoPtrTy(Module &M) { + return PointerType::getUnqual(getImageInfoTy(M)); + } + /// Creates binary descriptor for the given device images. Binary descriptor /// is an object that is passed to the offloading runtime at program startup /// and it describes all device images available in the executable or shared @@ -205,7 +228,7 @@ ".omp_offloading.descriptor"); } -void createRegisterFunction(Module &M, GlobalVariable *BinDesc) { +void createRegisterFunction(Module &M, GlobalVariable *BinDesc, ArrayRef<ArrayRef<char>> OffloadArchs) { LLVMContext &C = M.getContext(); auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage, @@ -220,6 +243,56 @@ // Construct function body IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func)); + // Create calls to __tgt_register_image_info for each image + auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy()); + auto *Zero = ConstantInt::get(getSizeTTy(M), 0u); + auto *RegInfoFuncTy = + FunctionType::get(Type::getVoidTy(C), getImageInfoPtrTy(M), false); + FunctionCallee RegInfoFuncC = + M.getOrInsertFunction("__tgt_register_image_info", RegInfoFuncTy); + unsigned int ImgCount = 0; + std::string OffloadArchBase = "__offload_arch"; + std::string OffloadImageBase = "offload_image_info"; + + for (ArrayRef<char> OArch : OffloadArchs) { + Constant *OArchV = ConstantDataArray::get(C, OArch); + std::string OffloadArchGV(OffloadArchBase), OffloadImageGV(OffloadImageBase); + if(ImgCount) { + auto Suffix = std::to_string(ImgCount); + OffloadArchGV.append(".").append(Suffix); + OffloadImageGV.append(".").append(Suffix); + } + + auto *GV = + new GlobalVariable(M, OArchV->getType(), /*isConstant*/ true, + GlobalValue::InternalLinkage, OArchV, + OffloadArchGV); + GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + + // store value of these variables (i.e. offload archs) into a custom + // section which will be used by "offload-arch -f". It won't be + // removed during binary stripping. + GV->setSection(".offload_arch_list"); + + auto *RequirementVPtr = + ConstantExpr::getGetElementPtr(GV->getValueType(), GV, Zero); + RequirementVPtr = + ConstantExpr::getBitCast(RequirementVPtr, Type::getInt8PtrTy(C)); + auto *InfoInit = ConstantStruct::get( + getImageInfoTy(M), ConstantInt::get(Type::getInt32Ty(C), 1), + ConstantInt::get(Type::getInt32Ty(C), ImgCount++), + ConstantInt::get(Type::getInt32Ty(C), (uint32_t)OffloadArchs.size()), + RequirementVPtr, + NullPtr // TODO: capture target-compile-opts from clang driver + ); + auto *ImageInfoGV = new GlobalVariable( + M, InfoInit->getType(), + /*isConstant*/ true, GlobalValue::InternalLinkage, InfoInit, + OffloadImageGV); + ImageInfoGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + Builder.CreateCall(RegInfoFuncC, ImageInfoGV); + } + Builder.CreateCall(RegFuncC, BinDesc); Builder.CreateRetVoid(); @@ -257,12 +330,12 @@ } // namespace -Error wrapBinaries(Module &M, ArrayRef<ArrayRef<char>> Images) { +Error wrapBinaries(Module &M, ArrayRef<ArrayRef<char>> Images, ArrayRef<ArrayRef<char>> OffloadArchs) { GlobalVariable *Desc = createBinDesc(M, Images); if (!Desc) return createStringError(inconvertibleErrorCode(), "No binary descriptors created."); - createRegisterFunction(M, Desc); + createRegisterFunction(M, Desc, OffloadArchs); createUnregisterFunction(M, Desc); return Error::success(); } Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp =================================================================== --- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -172,6 +172,15 @@ std::string Filename; }; +/// Information for an offloading image obtained after linking device images. +struct ImageInfo { + ImageInfo(StringRef Arch, StringRef Imagename) + : Arch(Arch), Imagename(Imagename) {} + + std::string Arch; + std::string Imagename; +}; + namespace llvm { /// Helper that allows DeviceFile to be used as a key in a DenseMap. template <> struct DenseMapInfo<DeviceFile> { @@ -702,6 +711,8 @@ CmdArgs.push_back("gnu"); CmdArgs.push_back("--no-undefined"); CmdArgs.push_back("-shared"); + std::string ArchArg = std::string("-plugin-opt=mcpu=").append(Arch.str()); + CmdArgs.push_back(ArchArg); CmdArgs.push_back("-o"); CmdArgs.push_back(TempFile); @@ -1110,7 +1121,7 @@ /// Runs the appropriate linking action on all the device files specified in \p /// DeviceFiles. The linked device images are returned in \p LinkedImages. Error linkDeviceFiles(ArrayRef<DeviceFile> DeviceFiles, - SmallVectorImpl<std::string> &LinkedImages) { + SmallVectorImpl<ImageInfo> &LinkedImages) { // Get the list of inputs for a specific device. DenseMap<DeviceFile, SmallVector<std::string, 4>> LinkerInputMap; for (auto &File : DeviceFiles) @@ -1120,17 +1131,18 @@ for (auto &LinkerInput : LinkerInputMap) { DeviceFile &File = LinkerInput.getFirst(); Triple TheTriple = Triple(File.TheTriple); + std::string TheArch = File.Arch; bool WholeProgram = false; // Run LTO on any bitcode files and replace the input with the result. if (Error Err = linkBitcodeFiles(LinkerInput.getSecond(), TheTriple, - File.Arch, WholeProgram)) + TheArch, WholeProgram)) return Err; // If we are embedding bitcode for JIT, skip the final device linking. if (EmbedBitcode) { assert(!LinkerInput.getSecond().empty() && "No bitcode image to embed"); - LinkedImages.push_back(LinkerInput.getSecond().front()); + LinkedImages.emplace_back(TheArch, LinkerInput.getSecond().front()); continue; } @@ -1138,15 +1150,15 @@ // CUDA in non-RDC mode. if (WholeProgram && TheTriple.isNVPTX()) { assert(!LinkerInput.getSecond().empty() && "No non-RDC image to embed"); - LinkedImages.push_back(LinkerInput.getSecond().front()); + LinkedImages.emplace_back(TheArch, LinkerInput.getSecond().front()); continue; } - auto ImageOrErr = linkDevice(LinkerInput.getSecond(), TheTriple, File.Arch); + auto ImageOrErr = linkDevice(LinkerInput.getSecond(), TheTriple, TheArch); if (!ImageOrErr) return ImageOrErr.takeError(); - LinkedImages.push_back(*ImageOrErr); + LinkedImages.emplace_back(TheArch, *ImageOrErr); } return Error::success(); } @@ -1193,11 +1205,14 @@ /// Creates the object file containing the device image and runtime registration /// code from the device images stored in \p Images. -Expected<std::string> wrapDeviceImages(ArrayRef<std::string> Images) { +Expected<std::string> wrapDeviceImages(ArrayRef<ImageInfo> Images) { SmallVector<std::unique_ptr<MemoryBuffer>, 4> SavedBuffers; SmallVector<ArrayRef<char>, 4> ImagesToWrap; - - for (StringRef ImageFilename : Images) { + SmallVector<ArrayRef<char>, 4u> OffloadArchs; + OffloadArchs.reserve(Images.size()); + + for (ImageInfo Image : Images) { + StringRef ImageFilename = Image.Imagename; llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError = llvm::MemoryBuffer::getFileOrSTDIN(ImageFilename); if (std::error_code EC = ImageOrError.getError()) @@ -1205,12 +1220,16 @@ ImagesToWrap.emplace_back((*ImageOrError)->getBufferStart(), (*ImageOrError)->getBufferSize()); SavedBuffers.emplace_back(std::move(*ImageOrError)); + + std::string Arch = Image.Arch; + Arch.append("\0"); + OffloadArchs.emplace_back(Arch.data(), Arch.size() + 1); } LLVMContext Context; Module M("offload.wrapper.module", Context); M.setTargetTriple(HostTriple); - if (Error Err = wrapBinaries(M, ImagesToWrap)) + if (Error Err = wrapBinaries(M, ImagesToWrap, OffloadArchs)) return std::move(Err); if (PrintWrappedModule) @@ -1347,7 +1366,7 @@ DeviceFiles.push_back(getBitcodeLibrary(LibraryStr)); // Link the device images extracted from the linker input. - SmallVector<std::string, 16> LinkedImages; + SmallVector<ImageInfo, 16> LinkedImages; if (Error Err = linkDeviceFiles(DeviceFiles, LinkedImages)) return reportError(std::move(Err));
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits