Author: Jon Chesterfield Date: 2020-12-09T18:55:53Z New Revision: cab9f6923522475e0d2137c66622c3fa70b01d3b
URL: https://github.com/llvm/llvm-project/commit/cab9f6923522475e0d2137c66622c3fa70b01d3b DIFF: https://github.com/llvm/llvm-project/commit/cab9f6923522475e0d2137c66622c3fa70b01d3b.diff LOG: [libomptarget][amdgpu] Improve diagnostics on arch mismatch Added: openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.cpp openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.h Modified: openmp/libomptarget/plugins/amdgpu/CMakeLists.txt openmp/libomptarget/plugins/amdgpu/src/rtl.cpp Removed: ################################################################################ diff --git a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt index 0c50ffdf2fa6..38f0afabf3ad 100644 --- a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt +++ b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt @@ -57,6 +57,7 @@ add_library(omptarget.rtl.amdgpu SHARED impl/atmi.cpp impl/atmi_interop_hsa.cpp impl/data.cpp + impl/get_elf_mach_gfx_name.cpp impl/machine.cpp impl/system.cpp impl/utils.cpp diff --git a/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.cpp b/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.cpp new file mode 100644 index 000000000000..45af34684117 --- /dev/null +++ b/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.cpp @@ -0,0 +1,53 @@ +#include "get_elf_mach_gfx_name.h" + +// This header conflicts with the system elf.h (macros vs enums of the same +// identifier) and contains more up to date values for the enum checked here. +// rtl.cpp uses the system elf.h. +#include "llvm/BinaryFormat/ELF.h" + +const char *get_elf_mach_gfx_name(uint32_t EFlags) { + using namespace llvm::ELF; + uint32_t Gfx = (EFlags & EF_AMDGPU_MACH); + switch (Gfx) { + case EF_AMDGPU_MACH_AMDGCN_GFX801: + return "gfx801"; + case EF_AMDGPU_MACH_AMDGCN_GFX802: + return "gfx802"; + case EF_AMDGPU_MACH_AMDGCN_GFX803: + return "gfx803"; + case EF_AMDGPU_MACH_AMDGCN_GFX805: + return "gfx805"; + case EF_AMDGPU_MACH_AMDGCN_GFX810: + return "gfx810"; + case EF_AMDGPU_MACH_AMDGCN_GFX900: + return "gfx900"; + case EF_AMDGPU_MACH_AMDGCN_GFX902: + return "gfx902"; + case EF_AMDGPU_MACH_AMDGCN_GFX904: + return "gfx904"; + case EF_AMDGPU_MACH_AMDGCN_GFX906: + return "gfx906"; + case EF_AMDGPU_MACH_AMDGCN_GFX908: + return "gfx908"; + case EF_AMDGPU_MACH_AMDGCN_GFX909: + return "gfx909"; + case EF_AMDGPU_MACH_AMDGCN_GFX90C: + return "gfx90c"; + case EF_AMDGPU_MACH_AMDGCN_GFX1010: + return "gfx1010"; + case EF_AMDGPU_MACH_AMDGCN_GFX1011: + return "gfx1011"; + case EF_AMDGPU_MACH_AMDGCN_GFX1012: + return "gfx1012"; + case EF_AMDGPU_MACH_AMDGCN_GFX1030: + return "gfx1030"; + case EF_AMDGPU_MACH_AMDGCN_GFX1031: + return "gfx1031"; + case EF_AMDGPU_MACH_AMDGCN_GFX1032: + return "gfx1032"; + case EF_AMDGPU_MACH_AMDGCN_GFX1033: + return "gfx1033"; + default: + return "--unknown gfx"; + } +} diff --git a/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.h b/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.h new file mode 100644 index 000000000000..b1be90dc29d5 --- /dev/null +++ b/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.h @@ -0,0 +1,8 @@ +#ifndef GET_ELF_MACH_GFX_NAME_H_INCLUDED +#define GET_ELF_MACH_GFX_NAME_H_INCLUDED + +#include <stdint.h> + +const char *get_elf_mach_gfx_name(uint32_t EFlags); + +#endif diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp index 252abca08944..60040d1c0da4 100644 --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -36,6 +36,7 @@ #include "internal.h" #include "Debug.h" +#include "get_elf_mach_gfx_name.h" #include "omptargetplugin.h" #include "llvm/Frontend/OpenMP/OMPGridValues.h" @@ -92,14 +93,6 @@ uint32_t TgtStackItemSize = 0; #include "../../common/elf_common.c" -static bool elf_machine_id_is_amdgcn(__tgt_device_image *image) { - const uint16_t amdgcnMachineID = 224; - int32_t r = elf_check_machine(image, amdgcnMachineID); - if (!r) { - DP("Supported machine ID not found\n"); - } - return r; -} /// Keep entries table per device struct FuncOrGblEntryTy { @@ -319,6 +312,7 @@ class RTLDeviceInfoTy { std::vector<int> GroupsPerDevice; std::vector<int> ThreadsPerGroup; std::vector<int> WarpSize; + std::vector<std::string> GPUName; // OpenMP properties std::vector<int> NumTeams; @@ -472,6 +466,7 @@ class RTLDeviceInfoTy { FuncGblEntries.resize(NumberOfDevices); ThreadsPerGroup.resize(NumberOfDevices); ComputeUnits.resize(NumberOfDevices); + GPUName.resize(NumberOfDevices); GroupsPerDevice.resize(NumberOfDevices); WarpSize.resize(NumberOfDevices); NumTeams.resize(NumberOfDevices); @@ -642,6 +637,40 @@ void finiAsyncInfoPtr(__tgt_async_info *async_info_ptr) { assert(async_info_ptr->Queue); async_info_ptr->Queue = 0; } + +bool elf_machine_id_is_amdgcn(__tgt_device_image *image) { + const uint16_t amdgcnMachineID = EM_AMDGPU; + int32_t r = elf_check_machine(image, amdgcnMachineID); + if (!r) { + DP("Supported machine ID not found\n"); + } + return r; +} + +uint32_t elf_e_flags(__tgt_device_image *image) { + char *img_begin = (char *)image->ImageStart; + size_t img_size = (char *)image->ImageEnd - img_begin; + + Elf *e = elf_memory(img_begin, img_size); + if (!e) { + DP("Unable to get ELF handle: %s!\n", elf_errmsg(-1)); + return 0; + } + + Elf64_Ehdr *eh64 = elf64_getehdr(e); + + if (!eh64) { + DP("Unable to get machine ID from ELF file!\n"); + elf_end(e); + return 0; + } + + uint32_t Flags = eh64->e_flags; + + elf_end(e); + DP("ELF Flags: 0x%x\n", Flags); + return Flags; +} } // namespace int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) { @@ -676,9 +705,20 @@ int32_t __tgt_rtl_init_device(int device_id) { DeviceInfo.ComputeUnits[device_id] = compute_units; DP("Using %d compute unis per grid\n", DeviceInfo.ComputeUnits[device_id]); } + + char GetInfoName[64]; // 64 max size returned by get info + err = hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AGENT_INFO_NAME, + (void *) GetInfoName); + if (err) + DeviceInfo.GPUName[device_id] = "--unknown gpu--"; + else { + DeviceInfo.GPUName[device_id] = GetInfoName; + } + if (print_kernel_trace == 4) - fprintf(stderr, "Device#%-2d CU's: %2d\n", device_id, - DeviceInfo.ComputeUnits[device_id]); + fprintf(stderr, "Device#%-2d CU's: %2d %s\n", device_id, + DeviceInfo.ComputeUnits[device_id], + DeviceInfo.GPUName[device_id].c_str()); // Query attributes to determine number of threads/block and blocks/grid. uint16_t workgroup_max_dim[3]; @@ -1038,22 +1078,18 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id, return ATMI_STATUS_SUCCESS; }; - atmi_status_t err; { - err = module_register_from_memory_to_place( + atmi_status_t err = module_register_from_memory_to_place( (void *)image->ImageStart, img_size, get_gpu_place(device_id), on_deserialized_data); check("Module registering", err); if (err != ATMI_STATUS_SUCCESS) { - char GPUName[64] = "--unknown gpu--"; - hsa_agent_t agent = DeviceInfo.HSAAgents[device_id]; - (void)hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AGENT_INFO_NAME, - (void *)GPUName); fprintf(stderr, - "Possible gpu arch mismatch: %s, please check" - " compiler: -march=<gpu> flag\n", - GPUName); + "Possible gpu arch mismatch: device:%s, image:%s please check" + " compiler flag: -march=<gpu>\n", + DeviceInfo.GPUName[device_id].c_str(), + get_elf_mach_gfx_name(elf_e_flags(image))); return NULL; } } @@ -1149,8 +1185,8 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id, void *varptr; uint32_t varsize; - err = atmi_interop_hsa_get_symbol_info(get_gpu_mem_place(device_id), - e->name, &varptr, &varsize); + atmi_status_t err = atmi_interop_hsa_get_symbol_info( + get_gpu_mem_place(device_id), e->name, &varptr, &varsize); if (err != ATMI_STATUS_SUCCESS) { DP("Loading global '%s' (Failed)\n", e->name); @@ -1192,7 +1228,7 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id, atmi_mem_place_t place = get_gpu_mem_place(device_id); uint32_t kernarg_segment_size; - err = atmi_interop_hsa_get_kernel_info( + atmi_status_t err = atmi_interop_hsa_get_kernel_info( place, e->name, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &kernarg_segment_size); _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits