Author: Joseph Huber Date: 2022-08-11T09:08:51+02:00 New Revision: 0b12f770241134865be39d73413466da8430f126
URL: https://github.com/llvm/llvm-project/commit/0b12f770241134865be39d73413466da8430f126 DIFF: https://github.com/llvm/llvm-project/commit/0b12f770241134865be39d73413466da8430f126.diff LOG: [Libomptarget][CUDA] Check CUDA compatibilty correctly We recently added support for multi-architecture binaries in libomptarget. This is done by extracting the architecture from the embedded image and comparing it with the major and minor version supported by the current CUDA installation. Previously we just compared these directly, which was not correct for binary compatibility. The CUDA documentation states that we can consider any image with an equivalent major or a greater or equal to minor compatible with the current image. Change the check to use this new logic in the CUDA plugin. Fixes #57049 Reviewed By: jdoerfert, ye-luo Differential Revision: https://reviews.llvm.org/D131567 (cherry picked from commit fdbb15355e7977b914cbd7e753b5e909d735ad83) Added: Modified: openmp/libomptarget/plugins/cuda/src/rtl.cpp Removed: ################################################################################ diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp index 97fc3e9908ee..6da43ad93da9 100644 --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -10,6 +10,8 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/StringRef.h" + #include <algorithm> #include <cassert> #include <cstddef> @@ -33,6 +35,8 @@ #include "llvm/Frontend/OpenMP/OMPConstants.h" +using namespace llvm; + // Utility for retrieving and printing CUDA error string. #ifdef OMPTARGET_DEBUG #define CUDA_ERR_STRING(err) \ @@ -1529,13 +1533,14 @@ int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *image, return false; // A subarchitecture was not specified. Assume it is compatible. - if (!info->Arch) + if (!info || !info->Arch) return true; int32_t NumberOfDevices = 0; if (cuDeviceGetCount(&NumberOfDevices) != CUDA_SUCCESS) return false; + StringRef ArchStr = StringRef(info->Arch).drop_front(sizeof("sm_") - 1); for (int32_t DeviceId = 0; DeviceId < NumberOfDevices; ++DeviceId) { CUdevice Device; if (cuDeviceGet(&Device, DeviceId) != CUDA_SUCCESS) @@ -1551,8 +1556,11 @@ int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *image, Device) != CUDA_SUCCESS) return false; - std::string ArchStr = "sm_" + std::to_string(Major) + std::to_string(Minor); - if (ArchStr != info->Arch) + // A cubin generated for a certain compute capability is supported to run on + // any GPU with the same major revision and same or higher minor revision. + int32_t ImageMajor = ArchStr[0] - '0'; + int32_t ImageMinor = ArchStr[1] - '0'; + if (Major != ImageMajor || Minor < ImageMinor) return false; } _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits