https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/76955
>From 616dda8bc9e000e4243ddb8f6b7f4b04f956a620 Mon Sep 17 00:00:00 2001 From: pvanhout <pierre.vanhoutr...@amd.com> Date: Thu, 4 Jan 2024 14:48:05 +0100 Subject: [PATCH] [AMDGPU] Introduce GFX9/10.1/10.3/11 Generic Targets These generic targets include multiple GPUs and will, in the future, provide a way to build once and run on multiple GPU, at the cost of less optimization opportunities. Note that this is just doing the compiler side of things, device libs an runtimes/loader/etc. don't know about these targets yet, so none of them actually work in practice right now. This is just the initial commit to make LLVM aware of them. No docs in this patch either as I plan to do it all in a follow-up patch. --- clang/lib/Basic/Targets/AMDGPU.cpp | 20 +- clang/test/Driver/amdgpu-macros.cl | 5 + clang/test/Driver/amdgpu-mcpu.cl | 10 + llvm/docs/AMDGPUUsage.rst | 325 +++++++++++++----- llvm/include/llvm/BinaryFormat/ELF.h | 6 +- llvm/include/llvm/TargetParser/TargetParser.h | 10 + llvm/lib/Object/ELFObjectFile.cpp | 10 + llvm/lib/ObjectYAML/ELFYAML.cpp | 4 + llvm/lib/Target/AMDGPU/AMDGPU.td | 87 +++-- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 6 + .../AMDGPURemoveIncompatibleFunctions.cpp | 6 +- llvm/lib/Target/AMDGPU/GCNProcessors.td | 22 ++ llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 + .../MCTargetDesc/AMDGPUTargetStreamer.cpp | 26 ++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 11 + llvm/lib/TargetParser/TargetParser.cpp | 46 +++ .../GlobalISel/llvm.amdgcn.workitem.id.ll | 1 + .../CodeGen/AMDGPU/directive-amdgcn-target.ll | 14 + .../CodeGen/AMDGPU/elf-header-flags-mach.ll | 10 + llvm/test/CodeGen/AMDGPU/gds-allocation.ll | 1 + llvm/test/CodeGen/AMDGPU/gds-atomic.ll | 1 + .../AMDGPU/generic-targets-require-v6.ll | 18 + .../AMDGPU/hsa-generic-target-features.ll | 31 ++ .../llvm.amdgcn.image.gather4.d16.dim.ll | 3 + .../AMDGPU/llvm.amdgcn.image.sample.dim.ll | 3 + .../AMDGPU/unsupported-image-sample.ll | 12 +- .../Object/AMDGPU/elf-header-flags-mach.yaml | 29 ++ .../llvm-objdump/ELF/AMDGPU/subtarget.ll | 20 ++ .../llvm-readobj/ELF/AMDGPU/elf-headers.test | 12 + llvm/tools/llvm-readobj/ELFDumper.cpp | 128 +++---- 30 files changed, 689 insertions(+), 192 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll create mode 100644 llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 141501e8a4d9a..799634ccec7ba 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -279,13 +279,25 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost) return; - StringRef CanonName = isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind) - : getArchNameR600(GPUKind); + std::string CanonName = (isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind) + : getArchNameR600(GPUKind)) + .str(); + + // Sanitize the name of generic targets. + // e.g. gfx10.1-generic -> gfx10_1_generic + if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST && + GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) { + std::replace(CanonName.begin(), CanonName.end(), '.', '_'); + std::replace(CanonName.begin(), CanonName.end(), '-', '_'); + } + Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___ if (isAMDGCN(getTriple()) && !IsHIPHost) { - assert(CanonName.starts_with("gfx") && "Invalid amdgcn canonical name"); - Builder.defineMacro(Twine("__") + Twine(CanonName.drop_back(2).upper()) + + assert(StringRef(CanonName).starts_with("gfx") && + "Invalid amdgcn canonical name"); + StringRef CanonFamilyName = getArchFamilyNameAMDGCN(GPUKind); + Builder.defineMacro(Twine("__") + Twine(CanonFamilyName.upper()) + Twine("__")); Builder.defineMacro("__amdgcn_processor__", Twine("\"") + Twine(CanonName) + Twine("\"")); diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl index 81c22af460d12..3b10444ef71d3 100644 --- a/clang/test/Driver/amdgpu-macros.cl +++ b/clang/test/Driver/amdgpu-macros.cl @@ -131,6 +131,11 @@ // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1200 -DFAMILY=GFX12 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1201 -DFAMILY=GFX12 +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx9_generic -DFAMILY=GFX9 +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10.1-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_1_generic -DFAMILY=GFX10 +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10.3-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_3_generic -DFAMILY=GFX10 +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx11_generic -DFAMILY=GFX11 + // ARCH-GCN-DAG: #define FP_FAST_FMA 1 // FAST_FMAF-DAG: #define FP_FAST_FMAF 1 diff --git a/clang/test/Driver/amdgpu-mcpu.cl b/clang/test/Driver/amdgpu-mcpu.cl index eeb16ae98ebad..6f18ea0615cb6 100644 --- a/clang/test/Driver/amdgpu-mcpu.cl +++ b/clang/test/Driver/amdgpu-mcpu.cl @@ -115,6 +115,11 @@ // RUN: %clang -### -target amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefix=GFX1200 %s // RUN: %clang -### -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefix=GFX1201 %s +// RUN: %clang -### -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefix=GFX9_GENERIC %s +// RUN: %clang -### -target amdgcn -mcpu=gfx10.1-generic %s 2>&1 | FileCheck --check-prefix=GFX10_1_GENERIC %s +// RUN: %clang -### -target amdgcn -mcpu=gfx10.3-generic %s 2>&1 | FileCheck --check-prefix=GFX10_3_GENERIC %s +// RUN: %clang -### -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefix=GFX11_GENERIC %s + // GCNDEFAULT-NOT: -target-cpu // GFX600: "-target-cpu" "gfx600" // GFX601: "-target-cpu" "gfx601" @@ -160,3 +165,8 @@ // GFX1151: "-target-cpu" "gfx1151" // GFX1200: "-target-cpu" "gfx1200" // GFX1201: "-target-cpu" "gfx1201" + +// GFX9_GENERIC: "-target-cpu" "gfx9-generic" +// GFX10_1_GENERIC: "-target-cpu" "gfx10.1-generic" +// GFX10_3_GENERIC: "-target-cpu" "gfx10.3-generic" +// GFX11_GENERIC: "-target-cpu" "gfx11-generic" diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 6b2417143ca06..3319524b39bb3 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -520,6 +520,104 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following =========== =============== ============ ===== ================= =============== =============== ====================== +Generic processors also exist. +Generic processor code objects can be executed on any of the processors that +are supported by the generic processor. Such code objects may not perform as well +as those for the non-generic processors. + +Generic processors are only available on code object V6 and above (see :ref:`amdgpu-elf-code-object`). + +Generic processor code objects are versioned (see :ref:`amdgpu-elf-header-e_flags-table-v6-onwards`). +The version number is used by runtimes to determine if a code object can be run on a specific agent. +The version number may be increased as-needed, e.g., when a major codegen change occurs for a generic +target. + + .. table:: AMDGPU Generic Processors + :name: amdgpu-generic-processor-table + + ==================== ============== ================= ================== ================= ================================= + Processor Target Supported Target Features Target Properties Target Restrictions + Triple Processors Supported + Architecture + + ==================== ============== ================= ================== ================= ================================= + ``gfx9-generic`` ``amdgcn`` - ``gfx900`` - xnack - Absolute flat - ``v_mad_mix`` instructions + - ``gfx902`` scratch are not available on + - ``gfx904`` ``gfx900``, ``gfx902``, + - ``gfx906`` ``gfx909``, ``gfx90c`` + - ``gfx909`` - ``v_fma_mix`` instructions + - ``gfx90c`` are not available on ``gfx904`` + - sramecc is not available on + ``gfx906`` + - The following instructions + are not available on ``gfx906``: + + - ``v_fmac_f32`` + - ``v_xnor_b32`` + - ``v_dot4_i32_i8`` + - ``v_dot8_i32_i4`` + - ``v_dot2_i32_i16`` + - ``v_dot2_u32_u16`` + - ``v_dot4_u32_u8`` + - ``v_dot8_u32_u4`` + - ``v_dot2_f32_f16`` + + + ``gfx10.1-generic`` ``amdgcn`` - ``gfx1010`` - xnack - Absolute flat - The following instructions are + - ``gfx1011`` - wavefrontsize64 scratch not available on ``gfx1011`` + - ``gfx1012`` - cumode and ``gfx1012`` + - ``gfx1013`` + - ``v_dot4_i32_i8`` + - ``v_dot8_i32_i4`` + - ``v_dot2_i32_i16`` + - ``v_dot2_u32_u16`` + - ``v_dot2c_f32_f16`` + - ``v_dot4c_i32_i8`` + - ``v_dot4_u32_u8`` + - ``v_dot8_u32_u4`` + - ``v_dot2_f32_f16`` + + - BVH Ray Tracing instructions + are not available on + ``gfx1013`` + + + ``gfx10.3-generic`` ``amdgcn`` - ``gfx1030`` - wavefrontsize64 - Absolute flat No restrictions. + - ``gfx1031`` - cumode scratch + - ``gfx1032`` + - ``gfx1033`` + - ``gfx1034`` + - ``gfx1035`` + - ``gfx1036`` + + + ``gfx11-generic`` ``amdgcn`` - ``gfx1100`` - wavefrontsize64 - Architected Various codegen pessimizations + - ``gfx1101`` - cumode flat scratch are applied to work around some + - ``gfx1102`` - Packed hazards specific to some targets + - ``gfx1103`` work-item within this family. + - ``gfx1150`` IDs + - ``gfx1151`` Not all VGPRs can be used on: + + - ``gfx1100`` + - ``gfx1101`` + - ``gfx1151`` + + SALU floating point instructions + and single-use VGPR hint + instructions are not available + on: + + - ``gfx1150`` + - ``gfx1151`` + + SGPRs are not supported for src1 + in dpp instructions for: + + - ``gfx1150`` + - ``gfx1151`` + ==================== ============== ================= ================== ================= ================================= + + .. _amdgpu-target-features: Target Features @@ -533,7 +631,7 @@ generating the code. A mismatch of features may result in incorrect execution, or a reduction in performance. The target features supported by each processor is listed in -:ref:`amdgpu-processor-table`. +:ref:`amdgpu-processors`. Target features are controlled by exactly one of the following Clang options: @@ -1443,6 +1541,7 @@ The AMDGPU backend uses the following ELF header: - ``ELFABIVERSION_AMDGPU_HSA_V3`` - ``ELFABIVERSION_AMDGPU_HSA_V4`` - ``ELFABIVERSION_AMDGPU_HSA_V5`` + - ``ELFABIVERSION_AMDGPU_HSA_V6`` - ``ELFABIVERSION_AMDGPU_PAL`` - ``ELFABIVERSION_AMDGPU_MESA3D`` ``e_type`` - ``ET_REL`` @@ -1451,7 +1550,8 @@ The AMDGPU backend uses the following ELF header: ``e_entry`` 0 ``e_flags`` See :ref:`amdgpu-elf-header-e_flags-v2-table`, :ref:`amdgpu-elf-header-e_flags-table-v3`, - and :ref:`amdgpu-elf-header-e_flags-table-v4-onwards` + :ref:`amdgpu-elf-header-e_flags-table-v4-v5`, + and :ref:`amdgpu-elf-header-e_flags-table-v6-onwards` ========================== =============================== .. @@ -1471,6 +1571,7 @@ The AMDGPU backend uses the following ELF header: ``ELFABIVERSION_AMDGPU_HSA_V3`` 1 ``ELFABIVERSION_AMDGPU_HSA_V4`` 2 ``ELFABIVERSION_AMDGPU_HSA_V5`` 3 + ``ELFABIVERSION_AMDGPU_HSA_V6`` 4 ``ELFABIVERSION_AMDGPU_PAL`` 0 ``ELFABIVERSION_AMDGPU_MESA3D`` 0 =============================== ===== @@ -1517,6 +1618,10 @@ The AMDGPU backend uses the following ELF header: ``-mcode-object-version=5``. This is the default code object version if not specified. + * ``ELFABIVERSION_AMDGPU_HSA_V6`` is used to specify the version of AMD HSA + runtime ABI for code object V6. Specify using the Clang option + ``-mcode-object-version=6``. + * ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL runtime ABI. @@ -1543,8 +1648,9 @@ The AMDGPU backend uses the following ELF header: ``NT_AMD_HSA_ISA_VERSION`` note record for code object V2 (see :ref:`amdgpu-note-records-v2`) and in the ``EF_AMDGPU_MACH`` bit field of the ``e_flags`` for code object V3 and above (see - :ref:`amdgpu-elf-header-e_flags-table-v3` and - :ref:`amdgpu-elf-header-e_flags-table-v4-onwards`). + :ref:`amdgpu-elf-header-e_flags-table-v3`, + :ref:`amdgpu-elf-header-e_flags-table-v4-v5` and + :ref:`amdgpu-elf-header-e_flags-table-v6-onwards`). ``e_entry`` The entry point is 0 as the entry points for individual kernels must be @@ -1615,8 +1721,8 @@ The AMDGPU backend uses the following ELF header: :ref:`amdgpu-target-features`. ================================= ===== ============================= - .. table:: AMDGPU ELF Header ``e_flags`` for Code Object V4 and After - :name: amdgpu-elf-header-e_flags-table-v4-onwards + .. table:: AMDGPU ELF Header ``e_flags`` for Code Object V4 and V5 + :name: amdgpu-elf-header-e_flags-table-v4-v5 ============================================ ===== =================================== Name Value Description @@ -1642,80 +1748,118 @@ The AMDGPU backend uses the following ELF header: ``EF_AMDGPU_FEATURE_SRAMECC_ON_V4`` 0xc00 SRAMECC enabled. ============================================ ===== =================================== + .. table:: AMDGPU ELF Header ``e_flags`` for Code Object V6 and After + :name: amdgpu-elf-header-e_flags-table-v6-onwards + + ============================================ ========== ========================================= + Name Value Description + ============================================ ========== ========================================= + ``EF_AMDGPU_MACH`` 0x0ff AMDGPU processor selection + mask for + ``EF_AMDGPU_MACH_xxx`` values + defined in + :ref:`amdgpu-ef-amdgpu-mach-table`. + ``EF_AMDGPU_FEATURE_XNACK_V4`` 0x300 XNACK selection mask for + ``EF_AMDGPU_FEATURE_XNACK_*_V4`` + values. + ``EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4`` 0x000 XNACK unsupported. + ``EF_AMDGPU_FEATURE_XNACK_ANY_V4`` 0x100 XNACK can have any value. + ``EF_AMDGPU_FEATURE_XNACK_OFF_V4`` 0x200 XNACK disabled. + ``EF_AMDGPU_FEATURE_XNACK_ON_V4`` 0x300 XNACK enabled. + ``EF_AMDGPU_FEATURE_SRAMECC_V4`` 0xc00 SRAMECC selection mask for + ``EF_AMDGPU_FEATURE_SRAMECC_*_V4`` + values. + ``EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4`` 0x000 SRAMECC unsupported. + ``EF_AMDGPU_FEATURE_SRAMECC_ANY_V4`` 0x400 SRAMECC can have any value. + ``EF_AMDGPU_FEATURE_SRAMECC_OFF_V4`` 0x800 SRAMECC disabled, + ``EF_AMDGPU_FEATURE_SRAMECC_ON_V4`` 0xc00 SRAMECC enabled. + ``EF_AMDGPU_GENERIC_VERSION_V<X>`` 0x01000000 Value between 1 and 255 for generic code + to object versioning, stored in the MSB. + 0xff000000 See :ref:`amdgpu-generic-processor-table` + ============================================ ========== ========================================= + .. table:: AMDGPU ``EF_AMDGPU_MACH`` Values :name: amdgpu-ef-amdgpu-mach-table - ==================================== ========== ============================= - Name Value Description (see - :ref:`amdgpu-processor-table`) - ==================================== ========== ============================= - ``EF_AMDGPU_MACH_NONE`` 0x000 *not specified* - ``EF_AMDGPU_MACH_R600_R600`` 0x001 ``r600`` - ``EF_AMDGPU_MACH_R600_R630`` 0x002 ``r630`` - ``EF_AMDGPU_MACH_R600_RS880`` 0x003 ``rs880`` - ``EF_AMDGPU_MACH_R600_RV670`` 0x004 ``rv670`` - ``EF_AMDGPU_MACH_R600_RV710`` 0x005 ``rv710`` - ``EF_AMDGPU_MACH_R600_RV730`` 0x006 ``rv730`` - ``EF_AMDGPU_MACH_R600_RV770`` 0x007 ``rv770`` - ``EF_AMDGPU_MACH_R600_CEDAR`` 0x008 ``cedar`` - ``EF_AMDGPU_MACH_R600_CYPRESS`` 0x009 ``cypress`` - ``EF_AMDGPU_MACH_R600_JUNIPER`` 0x00a ``juniper`` - ``EF_AMDGPU_MACH_R600_REDWOOD`` 0x00b ``redwood`` - ``EF_AMDGPU_MACH_R600_SUMO`` 0x00c ``sumo`` - ``EF_AMDGPU_MACH_R600_BARTS`` 0x00d ``barts`` - ``EF_AMDGPU_MACH_R600_CAICOS`` 0x00e ``caicos`` - ``EF_AMDGPU_MACH_R600_CAYMAN`` 0x00f ``cayman`` - ``EF_AMDGPU_MACH_R600_TURKS`` 0x010 ``turks`` - *reserved* 0x011 - Reserved for ``r600`` - 0x01f architecture processors. - ``EF_AMDGPU_MACH_AMDGCN_GFX600`` 0x020 ``gfx600`` - ``EF_AMDGPU_MACH_AMDGCN_GFX601`` 0x021 ``gfx601`` - ``EF_AMDGPU_MACH_AMDGCN_GFX700`` 0x022 ``gfx700`` - ``EF_AMDGPU_MACH_AMDGCN_GFX701`` 0x023 ``gfx701`` - ``EF_AMDGPU_MACH_AMDGCN_GFX702`` 0x024 ``gfx702`` - ``EF_AMDGPU_MACH_AMDGCN_GFX703`` 0x025 ``gfx703`` - ``EF_AMDGPU_MACH_AMDGCN_GFX704`` 0x026 ``gfx704`` - *reserved* 0x027 Reserved. - ``EF_AMDGPU_MACH_AMDGCN_GFX801`` 0x028 ``gfx801`` - ``EF_AMDGPU_MACH_AMDGCN_GFX802`` 0x029 ``gfx802`` - ``EF_AMDGPU_MACH_AMDGCN_GFX803`` 0x02a ``gfx803`` - ``EF_AMDGPU_MACH_AMDGCN_GFX810`` 0x02b ``gfx810`` - ``EF_AMDGPU_MACH_AMDGCN_GFX900`` 0x02c ``gfx900`` - ``EF_AMDGPU_MACH_AMDGCN_GFX902`` 0x02d ``gfx902`` - ``EF_AMDGPU_MACH_AMDGCN_GFX904`` 0x02e ``gfx904`` - ``EF_AMDGPU_MACH_AMDGCN_GFX906`` 0x02f ``gfx906`` - ``EF_AMDGPU_MACH_AMDGCN_GFX908`` 0x030 ``gfx908`` - ``EF_AMDGPU_MACH_AMDGCN_GFX909`` 0x031 ``gfx909`` - ``EF_AMDGPU_MACH_AMDGCN_GFX90C`` 0x032 ``gfx90c`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1010`` 0x033 ``gfx1010`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1011`` 0x034 ``gfx1011`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1012`` 0x035 ``gfx1012`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1030`` 0x036 ``gfx1030`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1031`` 0x037 ``gfx1031`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1032`` 0x038 ``gfx1032`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1033`` 0x039 ``gfx1033`` - ``EF_AMDGPU_MACH_AMDGCN_GFX602`` 0x03a ``gfx602`` - ``EF_AMDGPU_MACH_AMDGCN_GFX705`` 0x03b ``gfx705`` - ``EF_AMDGPU_MACH_AMDGCN_GFX805`` 0x03c ``gfx805`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1035`` 0x03d ``gfx1035`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1034`` 0x03e ``gfx1034`` - ``EF_AMDGPU_MACH_AMDGCN_GFX90A`` 0x03f ``gfx90a`` - ``EF_AMDGPU_MACH_AMDGCN_GFX940`` 0x040 ``gfx940`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1100`` 0x041 ``gfx1100`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1013`` 0x042 ``gfx1013`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1150`` 0x043 ``gfx1150`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1103`` 0x044 ``gfx1103`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1036`` 0x045 ``gfx1036`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1101`` 0x046 ``gfx1101`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1102`` 0x047 ``gfx1102`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1200`` 0x048 ``gfx1200`` - *reserved* 0x049 Reserved. - ``EF_AMDGPU_MACH_AMDGCN_GFX1151`` 0x04a ``gfx1151`` - ``EF_AMDGPU_MACH_AMDGCN_GFX941`` 0x04b ``gfx941`` - ``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942`` - *reserved* 0x04d Reserved. - ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201`` - ==================================== ========== ============================= + ========================================== ========== ============================= + Name Value Description (see + :ref:`amdgpu-processor-table`) + ========================================== ========== ============================= + ``EF_AMDGPU_MACH_NONE`` 0x000 *not specified* + ``EF_AMDGPU_MACH_R600_R600`` 0x001 ``r600`` + ``EF_AMDGPU_MACH_R600_R630`` 0x002 ``r630`` + ``EF_AMDGPU_MACH_R600_RS880`` 0x003 ``rs880`` + ``EF_AMDGPU_MACH_R600_RV670`` 0x004 ``rv670`` + ``EF_AMDGPU_MACH_R600_RV710`` 0x005 ``rv710`` + ``EF_AMDGPU_MACH_R600_RV730`` 0x006 ``rv730`` + ``EF_AMDGPU_MACH_R600_RV770`` 0x007 ``rv770`` + ``EF_AMDGPU_MACH_R600_CEDAR`` 0x008 ``cedar`` + ``EF_AMDGPU_MACH_R600_CYPRESS`` 0x009 ``cypress`` + ``EF_AMDGPU_MACH_R600_JUNIPER`` 0x00a ``juniper`` + ``EF_AMDGPU_MACH_R600_REDWOOD`` 0x00b ``redwood`` + ``EF_AMDGPU_MACH_R600_SUMO`` 0x00c ``sumo`` + ``EF_AMDGPU_MACH_R600_BARTS`` 0x00d ``barts`` + ``EF_AMDGPU_MACH_R600_CAICOS`` 0x00e ``caicos`` + ``EF_AMDGPU_MACH_R600_CAYMAN`` 0x00f ``cayman`` + ``EF_AMDGPU_MACH_R600_TURKS`` 0x010 ``turks`` + *reserved* 0x011 - Reserved for ``r600`` + 0x01f architecture processors. + ``EF_AMDGPU_MACH_AMDGCN_GFX600`` 0x020 ``gfx600`` + ``EF_AMDGPU_MACH_AMDGCN_GFX601`` 0x021 ``gfx601`` + ``EF_AMDGPU_MACH_AMDGCN_GFX700`` 0x022 ``gfx700`` + ``EF_AMDGPU_MACH_AMDGCN_GFX701`` 0x023 ``gfx701`` + ``EF_AMDGPU_MACH_AMDGCN_GFX702`` 0x024 ``gfx702`` + ``EF_AMDGPU_MACH_AMDGCN_GFX703`` 0x025 ``gfx703`` + ``EF_AMDGPU_MACH_AMDGCN_GFX704`` 0x026 ``gfx704`` + *reserved* 0x027 Reserved. + ``EF_AMDGPU_MACH_AMDGCN_GFX801`` 0x028 ``gfx801`` + ``EF_AMDGPU_MACH_AMDGCN_GFX802`` 0x029 ``gfx802`` + ``EF_AMDGPU_MACH_AMDGCN_GFX803`` 0x02a ``gfx803`` + ``EF_AMDGPU_MACH_AMDGCN_GFX810`` 0x02b ``gfx810`` + ``EF_AMDGPU_MACH_AMDGCN_GFX900`` 0x02c ``gfx900`` + ``EF_AMDGPU_MACH_AMDGCN_GFX902`` 0x02d ``gfx902`` + ``EF_AMDGPU_MACH_AMDGCN_GFX904`` 0x02e ``gfx904`` + ``EF_AMDGPU_MACH_AMDGCN_GFX906`` 0x02f ``gfx906`` + ``EF_AMDGPU_MACH_AMDGCN_GFX908`` 0x030 ``gfx908`` + ``EF_AMDGPU_MACH_AMDGCN_GFX909`` 0x031 ``gfx909`` + ``EF_AMDGPU_MACH_AMDGCN_GFX90C`` 0x032 ``gfx90c`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1010`` 0x033 ``gfx1010`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1011`` 0x034 ``gfx1011`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1012`` 0x035 ``gfx1012`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1030`` 0x036 ``gfx1030`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1031`` 0x037 ``gfx1031`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1032`` 0x038 ``gfx1032`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1033`` 0x039 ``gfx1033`` + ``EF_AMDGPU_MACH_AMDGCN_GFX602`` 0x03a ``gfx602`` + ``EF_AMDGPU_MACH_AMDGCN_GFX705`` 0x03b ``gfx705`` + ``EF_AMDGPU_MACH_AMDGCN_GFX805`` 0x03c ``gfx805`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1035`` 0x03d ``gfx1035`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1034`` 0x03e ``gfx1034`` + ``EF_AMDGPU_MACH_AMDGCN_GFX90A`` 0x03f ``gfx90a`` + ``EF_AMDGPU_MACH_AMDGCN_GFX940`` 0x040 ``gfx940`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1100`` 0x041 ``gfx1100`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1013`` 0x042 ``gfx1013`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1150`` 0x043 ``gfx1150`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1103`` 0x044 ``gfx1103`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1036`` 0x045 ``gfx1036`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1101`` 0x046 ``gfx1101`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1102`` 0x047 ``gfx1102`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1200`` 0x048 ``gfx1200`` + *reserved* 0x049 Reserved. + ``EF_AMDGPU_MACH_AMDGCN_GFX1151`` 0x04a ``gfx1151`` + ``EF_AMDGPU_MACH_AMDGCN_GFX941`` 0x04b ``gfx941`` + ``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942`` + *reserved* 0x04d Reserved. + ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201`` + *reserved* 0x04f Reserved. + *reserved* 0x050 Reserved. + ``EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC`` 0x051 ``gfx9-generic`` + ``EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC`` 0x052 ``gfx10.1-generic`` + ``EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC`` 0x053 ``gfx10.3-generic`` + ``EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC`` 0x054 ``gfx11-generic`` + ========================================== ========== ============================= Sections -------- @@ -4143,6 +4287,33 @@ Code object V5 metadata is the same as ====================== ============== ========= ================================ +.. _amdgpu-amdhsa-code-object-metadata-v6: + +Code Object V6 Metadata ++++++++++++++++++++++++ + +.. warning:: + Code object V6 is not the default code object version emitted by this version + of LLVM. + + +Code object V6 metadata is the same as +:ref:`amdgpu-amdhsa-code-object-metadata-v5` with the changes defined in table +:ref:`amdgpu-amdhsa-code-object-metadata-map-table-v6`. + + .. table:: AMDHSA Code Object V6 Metadata Map Changes + :name: amdgpu-amdhsa-code-object-metadata-map-table-v6 + + ================= ============== ========= ======================================= + String Key Value Type Required? Description + ================= ============== ========= ======================================= + "amdhsa.version" sequence of Required - The first integer is the major + 2 integers version. Currently 1. + - The second integer is the minor + version. Currently 3. + ================= ============== ========= ======================================= + + .. Kernel Dispatch diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index efd41f9812baa..3eddaee4f7d1a 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -790,11 +790,15 @@ enum : unsigned { EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e, EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4F = 0x04f, EF_AMDGPU_MACH_AMDGCN_RESERVED_0X50 = 0x050, + EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x051, + EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052, + EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC = 0x053, + EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC = 0x054, // clang-format on // First/last AMDGCN-based processors. EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600, - EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1201, + EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, // Indicates if the "xnack" target feature is enabled for all code contained // in the object. diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h index 6464285980f00..7da11701fefb8 100644 --- a/llvm/include/llvm/TargetParser/TargetParser.h +++ b/llvm/include/llvm/TargetParser/TargetParser.h @@ -111,6 +111,14 @@ enum GPUKind : uint32_t { GK_AMDGCN_FIRST = GK_GFX600, GK_AMDGCN_LAST = GK_GFX1201, + + GK_GFX9_GENERIC = 192, + GK_GFX10_1_GENERIC = 193, + GK_GFX10_3_GENERIC = 194, + GK_GFX11_GENERIC = 195, + + GK_AMDGCN_GENERIC_FIRST = GK_GFX9_GENERIC, + GK_AMDGCN_GENERIC_LAST = GK_GFX11_GENERIC, }; /// Instruction set architecture version. @@ -147,6 +155,8 @@ enum ArchFeatureKind : uint32_t { FEATURE_WGP = 1 << 9, }; +StringRef getArchFamilyNameAMDGCN(GPUKind AK); + StringRef getArchNameAMDGCN(GPUKind AK); StringRef getArchNameR600(GPUKind AK); StringRef getCanonicalArchName(const Triple &T, StringRef Arch); diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp index 8f7eead02a66c..8acd901dbfc0b 100644 --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -514,6 +514,16 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const { return "gfx1200"; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: return "gfx1201"; + + // Generic AMDGCN targets + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: + return "gfx9-generic"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: + return "gfx10.1-generic"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: + return "gfx10.3-generic"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: + return "gfx11-generic"; default: llvm_unreachable("Unknown EF_AMDGPU_MACH value"); } diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index 1436e920c0112..de1ef2458152c 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -612,6 +612,10 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO, BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1151, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1200, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1201, EF_AMDGPU_MACH); + BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, EF_AMDGPU_MACH); + BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC, EF_AMDGPU_MACH); + BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC, EF_AMDGPU_MACH); + BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, EF_AMDGPU_MACH); switch (Object->Header.ABIVersion) { default: // ELFOSABI_AMDGPU_PAL, ELFOSABI_AMDGPU_MESA3D support *_V3 flags. diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 55dbc1a803e13..4ab2b124ef530 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1002,6 +1002,12 @@ def FeatureGWS : SubtargetFeature<"gws", "Has Global Wave Sync" >; +def FeatureRequiresCOV6 : SubtargetFeature<"requires-cov6", + "RequiresCOV6", + "true", + "Target Requires Code Object V6" +>; + // Dummy feature used to disable assembler instructions. def FeatureDisable : SubtargetFeature<"", "FeatureDisable","true", @@ -1212,6 +1218,17 @@ def FeatureISAVersion9_0_Common : FeatureSet< FeatureImageInsts, FeatureMadMacF32Insts]>; +def FeatureISAVersion9_0_Consumer_Common : FeatureSet< + !listconcat(FeatureISAVersion9_0_Common.Features, + [FeatureImageGather4D16Bug, + FeatureDsSrc2Insts, + FeatureExtendedImageInsts, + FeatureGDS])>; + +def FeatureISAVersion9_Generic : FeatureSet< + !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, + [FeatureRequiresCOV6])>; + def FeatureISAVersion9_0_MI_Common : FeatureSet< !listconcat(FeatureISAVersion9_0_Common.Features, [FeatureFmaMixInsts, @@ -1230,43 +1247,27 @@ def FeatureISAVersion9_0_MI_Common : FeatureSet< FeatureSupportsSRAMECC])>; def FeatureISAVersion9_0_0 : FeatureSet< - !listconcat(FeatureISAVersion9_0_Common.Features, - [FeatureGDS, - FeatureMadMixInsts, - FeatureDsSrc2Insts, - FeatureExtendedImageInsts, - FeatureImageGather4D16Bug])>; + !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, + [FeatureMadMixInsts])>; def FeatureISAVersion9_0_2 : FeatureSet< - !listconcat(FeatureISAVersion9_0_Common.Features, - [FeatureGDS, - FeatureMadMixInsts, - FeatureDsSrc2Insts, - FeatureExtendedImageInsts, - FeatureImageGather4D16Bug])>; + !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, + [FeatureMadMixInsts])>; def FeatureISAVersion9_0_4 : FeatureSet< - !listconcat(FeatureISAVersion9_0_Common.Features, - [FeatureGDS, - FeatureDsSrc2Insts, - FeatureExtendedImageInsts, - FeatureFmaMixInsts, - FeatureImageGather4D16Bug])>; + !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, + [FeatureFmaMixInsts])>; def FeatureISAVersion9_0_6 : FeatureSet< - !listconcat(FeatureISAVersion9_0_Common.Features, - [FeatureGDS, - HalfRate64Ops, + !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, + [HalfRate64Ops, FeatureFmaMixInsts, - FeatureDsSrc2Insts, - FeatureExtendedImageInsts, FeatureDLInsts, FeatureDot1Insts, FeatureDot2Insts, FeatureDot7Insts, FeatureDot10Insts, - FeatureSupportsSRAMECC, - FeatureImageGather4D16Bug])>; + FeatureSupportsSRAMECC])>; def FeatureISAVersion9_0_8 : FeatureSet< !listconcat(FeatureISAVersion9_0_MI_Common.Features, @@ -1279,13 +1280,9 @@ def FeatureISAVersion9_0_8 : FeatureSet< FeatureImageGather4D16Bug])>; def FeatureISAVersion9_0_9 : FeatureSet< - !listconcat(FeatureISAVersion9_0_Common.Features, - [FeatureGDS, - FeatureMadMixInsts, - FeatureDsSrc2Insts, - FeatureExtendedImageInsts, - FeatureImageInsts, - FeatureImageGather4D16Bug])>; + !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, + [FeatureMadMixInsts, + FeatureImageInsts])>; def FeatureISAVersion9_0_A : FeatureSet< !listconcat(FeatureISAVersion9_0_MI_Common.Features, @@ -1301,12 +1298,8 @@ def FeatureISAVersion9_0_A : FeatureSet< FeatureKernargPreload])>; def FeatureISAVersion9_0_C : FeatureSet< - !listconcat(FeatureISAVersion9_0_Common.Features, - [FeatureGDS, - FeatureMadMixInsts, - FeatureDsSrc2Insts, - FeatureExtendedImageInsts, - FeatureImageGather4D16Bug])>; + !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, + [FeatureMadMixInsts])>; def FeatureISAVersion9_4_Common : FeatureSet< [FeatureGFX9, @@ -1387,6 +1380,10 @@ def FeatureISAVersion10_1_Common : FeatureSet< FeatureFlatSegmentOffsetBug, FeatureNegativeUnalignedScratchOffsetBug])>; +def FeatureISAVersion10_1_Generic : FeatureSet< + !listconcat(FeatureISAVersion10_1_Common.Features, + [FeatureRequiresCOV6])>; + def FeatureISAVersion10_1_0 : FeatureSet< !listconcat(FeatureISAVersion10_1_Common.Features, [])>; @@ -1426,6 +1423,10 @@ def FeatureISAVersion10_3_0 : FeatureSet< FeatureDot10Insts, FeatureShaderCyclesRegister])>; +def FeatureISAVersion10_3_Generic: FeatureSet< + !listconcat(FeatureISAVersion10_3_0.Features, + [FeatureRequiresCOV6])>; + def FeatureISAVersion11_Common : FeatureSet< [FeatureGFX11, FeatureLDSBankCount32, @@ -1447,6 +1448,16 @@ def FeatureISAVersion11_Common : FeatureSet< FeaturePackedTID, FeatureVcmpxPermlaneHazard]>; +// There are few workarounds that need to be +// added to all targets. This pessimizes codegen +// a bit on the generic GFX11 target. +def FeatureISAVersion11_Generic: FeatureSet< + !listconcat(FeatureISAVersion11_Common.Features, + [FeatureMSAALoadDstSelBug, + FeatureVALUTransUseHazard, + FeatureUserSGPRInit16Bug, + FeatureRequiresCOV6])>; + def FeatureISAVersion11_0_Common : FeatureSet< !listconcat(FeatureISAVersion11_Common.Features, [FeatureMSAALoadDstSelBug, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index db81e1ee9e389..e5bc0233f534b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -156,6 +156,12 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() { const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>(); const Function &F = MF->getFunction(); + // TODO: We're checking this late, would be nice to check it earlier. + if (STM.requiresCodeObjectV6() && CodeObjectVersion < AMDGPU::AMDHSA_COV6) + report_fatal_error( + STM.getCPU() + " is only available on code object version 6 or better", + /*gen_crash_diag*/ false); + // TODO: Which one is called first, emitStartOfAsmFile or // emitFunctionBodyStart? if (!getTargetStreamer()->getTargetID()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp b/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp index 6f1236fd3b7da..9d44b65d1698c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp @@ -139,10 +139,10 @@ bool AMDGPURemoveIncompatibleFunctions::checkFunction(Function &F) { const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F)); - // Check the GPU isn't generic. Generic is used for testing only - // and we don't want this pass to interfere with it. + // Check the GPU isn't generic or generic-hsa. Generic is used for testing + // only and we don't want this pass to interfere with it. StringRef GPUName = ST->getCPU(); - if (GPUName.empty() || GPUName.contains("generic")) + if (GPUName.empty() || GPUName.starts_with("generic")) return false; // Try to fetch the GPU's info. If we can't, it's likely an unknown processor diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td index 96af1a6aab3da..4671e03d43b3a 100644 --- a/llvm/lib/Target/AMDGPU/GCNProcessors.td +++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td @@ -204,6 +204,11 @@ def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel, FeatureISAVersion9_4_2.Features >; +// [gfx900, gfx902, gfx904, gfx906, gfx909, gfx90c] +def : ProcessorModel<"gfx9-generic", SIQuarterSpeedModel, + FeatureISAVersion9_Generic.Features +>; + //===----------------------------------------------------------------------===// // GCN GFX10. //===----------------------------------------------------------------------===// @@ -252,6 +257,16 @@ def : ProcessorModel<"gfx1036", GFX10SpeedModel, FeatureISAVersion10_3_0.Features >; +// [gfx1010, gfx1011, gfx1012, gfx1013] +def : ProcessorModel<"gfx10.1-generic", GFX10SpeedModel, + FeatureISAVersion10_1_Generic.Features +>; + +// [gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036] +def : ProcessorModel<"gfx10.3-generic", GFX10SpeedModel, + FeatureISAVersion10_3_Generic.Features +>; + //===----------------------------------------------------------------------===// // GCN GFX11. //===----------------------------------------------------------------------===// @@ -280,10 +295,17 @@ def : ProcessorModel<"gfx1151", GFX11SpeedModel, FeatureISAVersion11_5_1.Features >; +// [gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151] +def : ProcessorModel<"gfx11-generic", GFX11SpeedModel, + FeatureISAVersion11_Generic.Features +>; + //===----------------------------------------------------------------------===// // GCN GFX12. //===----------------------------------------------------------------------===// +// TODO: gfx12-generic ? + def : ProcessorModel<"gfx1200", GFX12SpeedModel, FeatureISAVersion12.Features >; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 4f8eeaaf500b4..b13b4f7d8f9a2 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -224,6 +224,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasVALUTransUseHazard = false; bool HasForceStoreSC0SC1 = false; + bool RequiresCOV6 = false; + // Dummy feature to use for assembler in tablegen. bool FeatureDisable = false; @@ -1165,6 +1167,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasForceStoreSC0SC1() const { return HasForceStoreSC0SC1; } + bool requiresCodeObjectV6() const { return RequiresCOV6; } + bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; } /// Return if operations acting on VGPR tuples require even alignment. diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 5e9b1674d87dc..a25622c06bfa9 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -115,6 +115,10 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) { case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: AK = GK_GFX9_GENERIC; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: AK = GK_GFX10_1_GENERIC; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: AK = GK_GFX10_3_GENERIC; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: AK = GK_GFX11_GENERIC; break; case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break; default: AK = GK_NONE; break; } @@ -193,6 +197,10 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) { case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151; case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200; case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201; + case GK_GFX9_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC; + case GK_GFX10_1_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC; + case GK_GFX10_3_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC; + case GK_GFX11_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC; case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE; } // clang-format on @@ -659,6 +667,24 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV6() { unsigned Flags = getEFlagsV4(); unsigned Version = ForceGenericVersion; + if (!Version) { + switch (parseArchAMDGCN(STI.getCPU())) { + case AMDGPU::GK_GFX9_GENERIC: + Version = GenericVersion::GFX9; + break; + case AMDGPU::GK_GFX10_1_GENERIC: + Version = GenericVersion::GFX10_1; + break; + case AMDGPU::GK_GFX10_3_GENERIC: + Version = GenericVersion::GFX10_3; + break; + case AMDGPU::GK_GFX11_GENERIC: + Version = GenericVersion::GFX11; + break; + default: + break; + } + } // Versions start at 1. if (Version) { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index f24b9f0e3615d..ded252c81af3e 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -42,6 +42,17 @@ namespace AMDGPU { struct IsaVersion; +/// Generic target versions emitted by this version of LLVM. +/// +/// These numbers are incremented every time a codegen breaking change occurs +/// within a generic family. +namespace GenericVersion { +static constexpr unsigned GFX9 = 1; +static constexpr unsigned GFX10_1 = 1; +static constexpr unsigned GFX10_3 = 1; +static constexpr unsigned GFX11 = 1; +} // namespace GenericVersion + enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 }; /// \returns True if \p STI is AMDHSA. diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index 20f324604aa52..684d698521e59 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -126,6 +126,11 @@ constexpr GPUInfo AMDGCNGPUs[] = { {{"gfx1151"}, {"gfx1151"}, GK_GFX1151, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, {{"gfx1200"}, {"gfx1200"}, GK_GFX1200, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, {{"gfx1201"}, {"gfx1201"}, GK_GFX1201, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, + + {{"gfx9-generic"}, {"gfx9-generic"}, GK_GFX9_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, + {{"gfx10.1-generic"}, {"gfx10.1-generic"}, GK_GFX10_1_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP}, + {{"gfx10.3-generic"}, {"gfx10.3-generic"}, GK_GFX10_3_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, + {{"gfx11-generic"}, {"gfx11-generic"}, GK_GFX11_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, // clang-format on }; @@ -144,6 +149,22 @@ const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) { } // namespace +StringRef llvm::AMDGPU::getArchFamilyNameAMDGCN(GPUKind AK) { + switch (AK) { + case AMDGPU::GK_GFX9_GENERIC: + return "gfx9"; + case AMDGPU::GK_GFX10_1_GENERIC: + case AMDGPU::GK_GFX10_3_GENERIC: + return "gfx10"; + case AMDGPU::GK_GFX11_GENERIC: + return "gfx11"; + default: { + StringRef ArchName = getArchNameAMDGCN(AK); + return ArchName.empty() ? "" : ArchName.drop_back(2); + } + } +} + StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) { if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs)) return Entry->CanonicalName; @@ -253,6 +274,24 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) { case GK_GFX1151: return {11, 5, 1}; case GK_GFX1200: return {12, 0, 0}; case GK_GFX1201: return {12, 0, 1}; + + // Generic targets return the lowest common denominator + // within their family. That is, the ISA that is the most + // restricted in terms of features. + // + // gfx9-generic is tricky because there is no lowest + // common denominator, so we return gfx900 which has mad-mix + // but this family doesn't have it. + // + // This API should never be used to check for a particular + // feature anyway. + // + // TODO: Split up this API depending on its caller so + // generic target handling is more obvious and less risky. + case GK_GFX9_GENERIC: return {9, 0, 0}; + case GK_GFX10_1_GENERIC: return {10, 1, 0}; + case GK_GFX10_3_GENERIC: return {10, 3, 0}; + case GK_GFX11_GENERIC: return {11, 0, 3}; default: return {0, 0, 0}; } // clang-format on @@ -302,6 +341,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, case GK_GFX1102: case GK_GFX1101: case GK_GFX1100: + case GK_GFX11_GENERIC: Features["ci-insts"] = true; Features["dot5-insts"] = true; Features["dot7-insts"] = true; @@ -327,6 +367,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, case GK_GFX1032: case GK_GFX1031: case GK_GFX1030: + case GK_GFX10_3_GENERIC: Features["ci-insts"] = true; Features["dot1-insts"] = true; Features["dot2-insts"] = true; @@ -357,6 +398,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, [[fallthrough]]; case GK_GFX1013: case GK_GFX1010: + case GK_GFX10_1_GENERIC: Features["dl-insts"] = true; Features["ci-insts"] = true; Features["16-bit-insts"] = true; @@ -424,6 +466,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, case GK_GFX904: case GK_GFX902: case GK_GFX900: + case GK_GFX9_GENERIC: Features["gfx9-insts"] = true; [[fallthrough]]; case GK_GFX810: @@ -510,6 +553,9 @@ static bool isWave32Capable(StringRef GPU, const Triple &T) { case GK_GFX1011: case GK_GFX1013: case GK_GFX1010: + case GK_GFX11_GENERIC: + case GK_GFX10_3_GENERIC: + case GK_GFX10_1_GENERIC: IsWave32Capable = true; break; default: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll index 155f4c92f97d3..4e921f2562054 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll @@ -6,6 +6,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s ; RUN: llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s ; RUN: llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s +; RUN: llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i32 @llvm.amdgcn.workitem.id.y() #0 diff --git a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll index 357fcf8ef1561..038219fc37404 100644 --- a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll +++ b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll @@ -108,6 +108,13 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX1200 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1201 < %s | FileCheck --check-prefixes=GFX1201 %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-generic -mattr=-xnack < %s | FileCheck --check-prefixes=GFX9_GENERIC_NOXNACK %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-generic -mattr=+xnack < %s | FileCheck --check-prefixes=GFX9_GENERIC_XNACK %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=-xnack < %s | FileCheck --check-prefixes=GFX10_1_GENERIC_NOXNACK %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=+xnack < %s | FileCheck --check-prefixes=GFX10_1_GENERIC_XNACK %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic < %s | FileCheck --check-prefixes=GFX10_3_GENERIC %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic < %s | FileCheck --check-prefixes=GFX11_GENERIC %s + ; GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600" ; GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601" ; GFX602: .amdgcn_target "amdgcn-amd-amdhsa--gfx602" @@ -196,6 +203,13 @@ ; GFX1200: .amdgcn_target "amdgcn-amd-amdhsa--gfx1200" ; GFX1201: .amdgcn_target "amdgcn-amd-amdhsa--gfx1201" +; GFX9_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-generic:xnack-" +; GFX9_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-generic:xnack+" +; GFX10_1_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx10.1-generic:xnack-" +; GFX10_1_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx10.1-generic:xnack+" +; GFX10_3_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx10.3-generic" +; GFX11_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx11-generic" + define amdgpu_kernel void @directive_amdgcn_target() { ret void } diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll index 380439d8cd9c6..9ba8176947174 100644 --- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll +++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll @@ -77,6 +77,11 @@ ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1200 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1200 %s ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1201 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1201 %s +; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx9-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX9_GENERIC %s +; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx10.1-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX10_1_GENERIC %s +; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx10.3-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX10_3_GENERIC %s +; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx11-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX11_GENERIC %s + ; FIXME: With the default attributes the eflags are not accurate for ; xnack and sramecc. Subsequent Target-ID patches will address this. @@ -149,6 +154,11 @@ ; GFX1151: EF_AMDGPU_MACH_AMDGCN_GFX1151 (0x4A) ; GFX1200: EF_AMDGPU_MACH_AMDGCN_GFX1200 (0x48) ; GFX1201: EF_AMDGPU_MACH_AMDGCN_GFX1201 (0x4E) + +; GFX9_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC (0x51) +; GFX10_1_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC (0x52) +; GFX10_3_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC (0x53) +; GFX11_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC (0x54) ; ALL: ] define amdgpu_kernel void @elf_header() { diff --git a/llvm/test/CodeGen/AMDGPU/gds-allocation.ll b/llvm/test/CodeGen/AMDGPU/gds-allocation.ll index dc6fea4ba1a37..1a9334706cb92 100644 --- a/llvm/test/CodeGen/AMDGPU/gds-allocation.ll +++ b/llvm/test/CodeGen/AMDGPU/gds-allocation.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx9-generic --amdhsa-code-object-version=6 -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s @gds0 = internal addrspace(2) global [4 x i32] undef, align 4 @lds0 = internal addrspace(3) global [4 x i32] undef, align 128 diff --git a/llvm/test/CodeGen/AMDGPU/gds-atomic.ll b/llvm/test/CodeGen/AMDGPU/gds-atomic.ll index 3e4e6938d72eb..8d44330b1b973 100644 --- a/llvm/test/CodeGen/AMDGPU/gds-atomic.ll +++ b/llvm/test/CodeGen/AMDGPU/gds-atomic.ll @@ -2,6 +2,7 @@ ; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s ; FUNC-LABEL: {{^}}atomic_add_ret_gds: ; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s diff --git a/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll b/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll new file mode 100644 index 0000000000000..e3f4b14bac0c1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll @@ -0,0 +1,18 @@ +; RUN: not llc -march=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX9-V5 %s +; RUN: not llc -march=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX101-V5 %s +; RUN: not llc -march=amdgcn -mcpu=gfx10.3-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX103-V5 %s +; RUN: not llc -march=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX11-V5 %s + +; RUN: llc -march=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -o - %s +; RUN: llc -march=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=6 -o - %s +; RUN: llc -march=amdgcn -mcpu=gfx10.3-generic --amdhsa-code-object-version=6 -o - %s +; RUN: llc -march=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -o - %s + +; GFX9-V5: gfx9-generic is only available on code object version 6 or better +; GFX101-V5: gfx10.1-generic is only available on code object version 6 or better +; GFX103-V5: gfx10.3-generic is only available on code object version 6 or better +; GFX11-V5: gfx11-generic is only available on code object version 6 or better + +define void @foo() { + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll b/llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll new file mode 100644 index 0000000000000..4fee563d1cc93 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll @@ -0,0 +1,31 @@ +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=+cumode < %s | FileCheck -check-prefix=NOCU %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic < %s | FileCheck -check-prefix=CU %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic -mattr=+cumode < %s | FileCheck -check-prefix=NOCU %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic < %s | FileCheck -check-prefix=CU %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic -mattr=+cumode < %s | FileCheck -check-prefix=NOCU %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic < %s | FileCheck -check-prefix=CU %s + +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefix=W32 %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefix=W64 %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefix=W32 %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefix=W64 %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefix=W32 %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefix=W64 %s + +; Checks 10.1, 10.3 and 11 generic targets allow cumode/wave64. + +; NOCU: .amdhsa_workgroup_processor_mode 0 +; NOCU: .workgroup_processor_mode: 0 +; CU: .amdhsa_workgroup_processor_mode 1 +; CU: .workgroup_processor_mode: 1 + +; W64: .amdhsa_wavefront_size32 0 +; W32: .amdhsa_wavefront_size32 1 + +define amdgpu_kernel void @wavefrontsize() { +entry: + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 600} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll index 91284d3838675..cf324d62e1de1 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll @@ -1,8 +1,11 @@ ; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,UNPACKED %s ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck --check-prefix=GCN %s ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9 %s ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX12 %s ; GCN-LABEL: {{^}}image_gather4_b_2d_v4f16: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll index 7dc139ef96974..10e1ae3ecfcbd 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll @@ -1,8 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=VERDE %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll b/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll index 0878fc689cd4b..b08586efe2f21 100644 --- a/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll +++ b/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll @@ -1,15 +1,13 @@ -; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906 %s -; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX908 %s +; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s ; RUN: not --crash llc -O0 -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX90A %s ; RUN: not --crash llc -O0 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX940 %s ; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1030 %s ; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100 %s -; GFX906-LABEL: image_sample_test: -; GFX906: image_sample_lz - -; GFX908-LABEL: image_sample_test: -; GFX908: image_sample_lz +; GFX9-LABEL: image_sample_test: +; GFX9: image_sample_lz ; GFX90A: LLVM ERROR: requested image instruction is not supported on this GPU diff --git a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml index 7fb33ca662b19..4c2b4479a4aa3 100644 --- a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml +++ b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml @@ -238,6 +238,23 @@ # RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1201 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1201 %s # RUN: obj2yaml %t.o.AMDGCN_GFX1201 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1201 %s +# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX9_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX9_GENERIC +# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX9_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX9_GENERIC %s +# RUN: obj2yaml %t.o.AMDGCN_GFX9_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX9_GENERIC %s + +# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX10_1_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX10_1_GENERIC +# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX10_1_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX10_1_GENERIC %s +# RUN: obj2yaml %t.o.AMDGCN_GFX10_1_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX10_1_GENERIC %s + +# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX10_3_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX10_3_GENERIC +# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX10_3_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX10_3_GENERIC %s +# RUN: obj2yaml %t.o.AMDGCN_GFX10_3_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX10_3_GENERIC %s + +# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX11_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX11_GENERIC +# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX11_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX11_GENERIC %s +# RUN: obj2yaml %t.o.AMDGCN_GFX11_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX11_GENERIC %s + + # ELF-R600-ALL: Format: elf32-amdgpu # ELF-R600-ALL: Arch: r600 # ELF-R600-ALL: AddressSize: 32bit @@ -435,6 +452,18 @@ # ELF-AMDGCN-GFX1201: EF_AMDGPU_MACH_AMDGCN_GFX1201 (0x4E) # YAML-AMDGCN-GFX1201: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1201 ] +# ELF-AMDGCN-GFX9_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC (0x51) +# YAML-AMDGCN-GFX9_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC ] + +# ELF-AMDGCN-GFX10_1_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC (0x52) +# YAML-AMDGCN-GFX10_1_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC ] + +# ELF-AMDGCN-GFX10_3_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC (0x53) +# YAML-AMDGCN-GFX10_3_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC ] + +# ELF-AMDGCN-GFX11_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC (0x54) +# YAML-AMDGCN-GFX11_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC ] + # ELF-AMDGCN-ALL: ] diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll index e296d7fb1fc8f..ca136a6a0d5ba 100644 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll @@ -18,6 +18,11 @@ define amdgpu_kernel void @test_kernel() { ; ----------------------------------GFX11-------------------------------------- ; +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx11-generic %t.o > %t-specify.txt +; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1151 -filetype=obj -O0 -o %t.o %s ; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1151 %t.o > %t-specify.txt ; RUN: llvm-objdump -D %t.o > %t-detect.txt @@ -49,6 +54,11 @@ define amdgpu_kernel void @test_kernel() { ; RUN: diff %t-specify.txt %t-detect.txt ; ----------------------------------GFX10-------------------------------------- +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx10.3-generic -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx10.3-generic %t.o > %t-specify.txt +; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1036 -filetype=obj -O0 -o %t.o %s ; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1036 %t.o > %t-specify.txt ; RUN: llvm-objdump -D %t.o > %t-detect.txt @@ -84,6 +94,11 @@ define amdgpu_kernel void @test_kernel() { ; RUN: llvm-objdump -D %t.o > %t-detect.txt ; RUN: diff %t-specify.txt %t-detect.txt +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx10.1-generic -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx10.1-generic %t.o > %t-specify.txt +; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 -filetype=obj -O0 -o %t.o %s ; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1013 %t.o > %t-specify.txt ; RUN: llvm-objdump -D %t.o > %t-detect.txt @@ -107,6 +122,11 @@ define amdgpu_kernel void @test_kernel() { ; ----------------------------------GFX9--------------------------------------- ; +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx9-generic -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx9-generic %t.o > %t-specify.txt +; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=obj -O0 -o %t.o %s ; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx942 %t.o > %t-specify.txt ; RUN: llvm-objdump -D %t.o > %t-detect.txt diff --git a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test index e2266d81d1a59..7fbf4aae6c246 100644 --- a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test +++ b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test @@ -253,6 +253,9 @@ # RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 -DFLAG_VALUE=0x42 +# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC -DFLAG_VALUE=0x52 + # RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 -DFLAG_VALUE=0x42 @@ -322,6 +325,9 @@ # RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1036 # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1036 -DFLAG_VALUE=0x45 +# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC -DFLAG_VALUE=0x53 + # RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_XNACK_V3" # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_XNACK_V3 (0x100)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x13F @@ -355,6 +361,9 @@ # RUN: yaml2obj %s -o %t -DABI_VERSION=16 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,UNKNOWN-ABI-VERSION --match-full-lines -DABI_VERSION=16 -DFILE=%t -DFLAG_VALUE=0x3F +# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC -DFLAG_VALUE=0x51 + # RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100 # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100 -DFLAG_VALUE=0x41 @@ -391,6 +400,9 @@ # RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1103 # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1103 -DFLAG_VALUE=0x44 +# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC -DFLAG_VALUE=0x54 + # RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1150 # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1150 -DFLAG_VALUE=0x43 diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 82bb12f95d3a3..8e68f08c3fa9a 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1559,68 +1559,72 @@ const EnumEntry<unsigned> ElfHeaderMipsFlags[] = { }; // clang-format off -#define AMDGPU_MACH_ENUM_ENTS \ - ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201") +#define AMDGPU_MACH_ENUM_ENTS \ + ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, "gfx9-generic"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC, "gfx10.1-generic"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC, "gfx10.3-generic"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, "gfx11-generic") // clang-format on const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits