llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) <details> <summary>Changes</summary> AMDGPU: Copy correct predicates for SDWA reals There are a lot of messes in the special case predicate handling. Currently broad let blocks override specific predicates with more general cases. For instructions with SDWA, the HasSDWA predicate was overriding the SubtargetPredicate for the instruction. This fixes enough to properly disallow new instructions that support SDWA on older targets. AMDGPU: Add gfx950 subtarget definitions Mostly a stub, but adds some baseline tests and tests for removed instructions. --- Patch is 634.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116307.diff 47 Files Affected: - (modified) clang/docs/ReleaseNotes.rst (+2) - (modified) clang/include/clang/Basic/Cuda.h (+1) - (modified) clang/lib/Basic/Cuda.cpp (+1) - (modified) clang/lib/Basic/Targets/NVPTX.cpp (+1) - (modified) clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp (+1) - (modified) clang/test/CodeGenOpenCL/amdgpu-features.cl (+2) - (modified) clang/test/Driver/amdgpu-macros.cl (+1) - (modified) clang/test/Driver/amdgpu-mcpu.cl (+2) - (modified) clang/test/Misc/target-invalid-cpu-note/amdgcn.c (+1) - (modified) clang/test/Misc/target-invalid-cpu-note/nvptx.c (+1) - (modified) llvm/docs/AMDGPUUsage.rst (+8-1) - (modified) llvm/include/llvm/BinaryFormat/ELF.h (+1-1) - (modified) llvm/include/llvm/TargetParser/TargetParser.h (+13-12) - (modified) llvm/lib/Object/ELFObjectFile.cpp (+2) - (modified) llvm/lib/ObjectYAML/ELFYAML.cpp (+1) - (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+20-2) - (modified) llvm/lib/Target/AMDGPU/GCNProcessors.td (+4) - (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+1) - (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp (+2) - (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+2-2) - (modified) llvm/lib/Target/AMDGPU/VOP2Instructions.td (+14-12) - (modified) llvm/lib/Target/AMDGPU/VOPCInstructions.td (+1-1) - (modified) llvm/lib/Target/AMDGPU/VOPInstructions.td (+6-6) - (modified) llvm/lib/TargetParser/TargetParser.cpp (+9-2) - (modified) llvm/test/CodeGen/AMDGPU/bf16-conversions.ll (+232-113) - (modified) llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll (+6) - (modified) llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll (+2) - (modified) llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll (+8) - (modified) llvm/test/CodeGen/AMDGPU/fmaximum3.ll (+437-157) - (modified) llvm/test/CodeGen/AMDGPU/fminimum3.ll (+437-157) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll (+2) - (modified) llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll (+586-638) - (modified) llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll (+527-586) - (modified) llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll (+755-814) - (modified) llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll (+586-637) - (modified) llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll (+527-586) - (modified) llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll (+755-814) - (modified) llvm/test/MC/AMDGPU/flat-scratch-gfx940.s (+1) - (modified) llvm/test/MC/AMDGPU/gfx940_asm_features.s (+1) - (added) llvm/test/MC/AMDGPU/gfx950-unsupported.s (+179) - (added) llvm/test/MC/AMDGPU/gfx950_invalid_encoding.txt (+13) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx940_features.txt (+1) - (modified) llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml (+7) - (modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll (+5) - (modified) llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test (+9) - (modified) llvm/tools/llvm-readobj/ELFDumper.cpp (+1) - (modified) offload/DeviceRTL/CMakeLists.txt (+1-1) ``````````diff diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 681728d36952c1..5caedd0b6d3cfd 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -710,6 +710,8 @@ Target Specific Changes AMDGPU Support ^^^^^^^^^^^^^^ +* Initial support for gfx950 + X86 Support ^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index 721e8981af6ffc..c2a4addf488df1 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -107,6 +107,7 @@ enum class OffloadArch { GFX940, GFX941, GFX942, + GFX950, GFX10_1_GENERIC, GFX1010, GFX1011, diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index 59c932468cd891..d56609a2a8f24a 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -125,6 +125,7 @@ static const OffloadArchToStringMap arch_names[] = { GFX(940), // gfx940 GFX(941), // gfx941 GFX(942), // gfx942 + GFX(950), // gfx950 {OffloadArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"}, GFX(1010), // gfx1010 GFX(1011), // gfx1011 diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 0897032c4b8546..dbc3fec3657610 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -209,6 +209,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, case OffloadArch::GFX940: case OffloadArch::GFX941: case OffloadArch::GFX942: + case OffloadArch::GFX950: case OffloadArch::GFX10_1_GENERIC: case OffloadArch::GFX1010: case OffloadArch::GFX1011: diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 43dc0e62284602..b595d3250d6230 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -2305,6 +2305,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) { case OffloadArch::GFX940: case OffloadArch::GFX941: case OffloadArch::GFX942: + case OffloadArch::GFX950: case OffloadArch::GFX10_1_GENERIC: case OffloadArch::GFX1010: case OffloadArch::GFX1011: diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index 8b56ec94f2c4ee..5c324032b51956 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -32,6 +32,7 @@ // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx940 -emit-llvm -o - %s | FileCheck --check-prefix=GFX940 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx941 -emit-llvm -o - %s | FileCheck --check-prefix=GFX941 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx942 -emit-llvm -o - %s | FileCheck --check-prefix=GFX942 %s +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx950 -emit-llvm -o - %s | FileCheck --check-prefix=GFX950 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1011 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1011 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1012 %s @@ -88,6 +89,7 @@ // GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" // GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" // GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX950: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" // GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" // GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl index d354f933c5ad78..d97b2ddb1fc663 100644 --- a/clang/test/Driver/amdgpu-macros.cl +++ b/clang/test/Driver/amdgpu-macros.cl @@ -110,6 +110,7 @@ // RUN: %clang -E -dM -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx940 -DFAMILY=GFX9 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx941 -DFAMILY=GFX9 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx942 -DFAMILY=GFX9 +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx950 -DFAMILY=GFX9 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1010 -DFAMILY=GFX10 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1011 -DFAMILY=GFX10 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1012 -DFAMILY=GFX10 diff --git a/clang/test/Driver/amdgpu-mcpu.cl b/clang/test/Driver/amdgpu-mcpu.cl index ba578435072985..7c34d3ec6c63a9 100644 --- a/clang/test/Driver/amdgpu-mcpu.cl +++ b/clang/test/Driver/amdgpu-mcpu.cl @@ -95,6 +95,7 @@ // RUN: %clang -### -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefix=GFX940 %s // RUN: %clang -### -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefix=GFX941 %s // RUN: %clang -### -target amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefix=GFX942 %s +// RUN: %clang -### -target amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck --check-prefix=GFX950 %s // RUN: %clang -### -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefix=GFX1010 %s // RUN: %clang -### -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefix=GFX1011 %s // RUN: %clang -### -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefix=GFX1012 %s @@ -150,6 +151,7 @@ // GFX940: "-target-cpu" "gfx940" // GFX941: "-target-cpu" "gfx941" // GFX942: "-target-cpu" "gfx942" +// GFX950: "-target-cpu" "gfx950" // GFX1010: "-target-cpu" "gfx1010" // GFX1011: "-target-cpu" "gfx1011" // GFX1012: "-target-cpu" "gfx1012" diff --git a/clang/test/Misc/target-invalid-cpu-note/amdgcn.c b/clang/test/Misc/target-invalid-cpu-note/amdgcn.c index 4e675871f1e5bd..642d2df211c21a 100644 --- a/clang/test/Misc/target-invalid-cpu-note/amdgcn.c +++ b/clang/test/Misc/target-invalid-cpu-note/amdgcn.c @@ -48,6 +48,7 @@ // CHECK-SAME: {{^}}, gfx940 // CHECK-SAME: {{^}}, gfx941 // CHECK-SAME: {{^}}, gfx942 +// CHECK-SAME: {{^}}, gfx950 // CHECK-SAME: {{^}}, gfx1010 // CHECK-SAME: {{^}}, gfx1011 // CHECK-SAME: {{^}}, gfx1012 diff --git a/clang/test/Misc/target-invalid-cpu-note/nvptx.c b/clang/test/Misc/target-invalid-cpu-note/nvptx.c index 44fe07065b2428..3ea6c02d6b3846 100644 --- a/clang/test/Misc/target-invalid-cpu-note/nvptx.c +++ b/clang/test/Misc/target-invalid-cpu-note/nvptx.c @@ -54,6 +54,7 @@ // CHECK-SAME: {{^}}, gfx940 // CHECK-SAME: {{^}}, gfx941 // CHECK-SAME: {{^}}, gfx942 +// CHECK-SAME: {{^}}, gfx950 // CHECK-SAME: {{^}}, gfx10-1-generic // CHECK-SAME: {{^}}, gfx1010 // CHECK-SAME: {{^}}, gfx1011 diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index c180ca5fcebef3..b85b680b9c82d3 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -399,6 +399,13 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following work-item IDs + ``gfx950`` ``amdgcn`` dGPU - sramecc - Architected *TBA* + - tgsplit flat + - xnack scratch .. TODO:: + - kernarg preload - Packed + work-item Add product + IDs names. + **GCN GFX10.1 (RDNA 1)** [AMD-GCN-GFX10-RDNA1]_ ----------------------------------------------------------------------------------------------------------------------- ``gfx1010`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 5700 @@ -2178,7 +2185,7 @@ The AMDGPU backend uses the following ELF header: ``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942`` *reserved* 0x04d Reserved. ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201`` - *reserved* 0x04f Reserved. + ``EF_AMDGPU_MACH_AMDGCN_GFX950`` 0x04f ``gfx950`` *reserved* 0x050 Reserved. ``EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC`` 0x051 ``gfx9-generic`` ``EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC`` 0x052 ``gfx10-1-generic`` diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 6c05ea7208e1f1..fd32a6ec19652b 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -811,7 +811,7 @@ enum : unsigned { EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c, EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D = 0x04d, EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e, - EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4F = 0x04f, + EF_AMDGPU_MACH_AMDGCN_GFX950 = 0x04f, EF_AMDGPU_MACH_AMDGCN_RESERVED_0X50 = 0x050, EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x051, EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052, diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h index c6db4dfd7f5159..55e7b417428c4e 100644 --- a/llvm/include/llvm/TargetParser/TargetParser.h +++ b/llvm/include/llvm/TargetParser/TargetParser.h @@ -86,18 +86,19 @@ enum GPUKind : uint32_t { GK_GFX940 = 68, GK_GFX941 = 69, GK_GFX942 = 70, - - GK_GFX1010 = 71, - GK_GFX1011 = 72, - GK_GFX1012 = 73, - GK_GFX1013 = 74, - GK_GFX1030 = 75, - GK_GFX1031 = 76, - GK_GFX1032 = 77, - GK_GFX1033 = 78, - GK_GFX1034 = 79, - GK_GFX1035 = 80, - GK_GFX1036 = 81, + GK_GFX950 = 71, + + GK_GFX1010 = 72, + GK_GFX1011 = 73, + GK_GFX1012 = 74, + GK_GFX1013 = 75, + GK_GFX1030 = 76, + GK_GFX1031 = 77, + GK_GFX1032 = 78, + GK_GFX1033 = 79, + GK_GFX1034 = 80, + GK_GFX1035 = 81, + GK_GFX1036 = 82, GK_GFX1100 = 90, GK_GFX1101 = 91, diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp index 9dc39936ffd8bb..2ffb2ac5e7e453 100644 --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -550,6 +550,8 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const { return "gfx941"; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: return "gfx942"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950: + return "gfx950"; // AMDGCN GFX10. case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index 130b8798ab4a46..ca0ea03452d3be 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -609,6 +609,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO, BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX940, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX941, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX942, EF_AMDGPU_MACH); + BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX950, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH); diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index c8ae010414dc40..d028c1f5ca7613 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -360,6 +360,12 @@ def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts", "Additional instructions for GFX940+" >; +def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts", + "GFX950Insts", + "true", + "Additional instructions for GFX950+" +>; + def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts", "GFX10Insts", "true", @@ -1470,6 +1476,14 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureFlatBufferGlobalAtomicFaddF64Inst ]>; +def FeatureISAVersion9_5_Common : FeatureSet< + !listconcat(FeatureISAVersion9_4_Common.Features, + [FeatureFP8Insts, + FeatureFP8ConversionInsts, + FeatureCvtFP8VOP1Bug, + FeatureGFX950Insts + ])>; + def FeatureISAVersion9_4_0 : FeatureSet< !listconcat(FeatureISAVersion9_4_Common.Features, [ @@ -1503,6 +1517,8 @@ def FeatureISAVersion9_4_Generic : FeatureSet< !listconcat(FeatureISAVersion9_4_Common.Features, [FeatureRequiresCOV6])>; +def FeatureISAVersion9_5_0 : FeatureSet<FeatureISAVersion9_5_Common.Features>; + def FeatureISAVersion10_Common : FeatureSet< [FeatureGFX10, FeatureLDSBankCount32, @@ -2103,8 +2119,10 @@ def NotHasMinMaxDenormModes : Predicate<"!Subtarget->supportsMinMaxDenormModes() def HasFminFmaxLegacy : Predicate<"Subtarget->hasFminFmaxLegacy()">; -def HasSDWA : Predicate<"Subtarget->hasSDWA()">, - AssemblerPredicate<(all_of FeatureSDWA, FeatureVolcanicIslands)>; +def HasSDWA : Predicate<"Subtarget->hasSDWA()">; + +def HasSDWA8 : Predicate<"Subtarget->hasSDWA()">, + AssemblerPredicate<(all_of (not FeatureGFX9Insts), FeatureSDWA)>; def HasSDWA9 : Predicate<"Subtarget->hasSDWA()">, diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td index 067043d290b760..3403cbab526d46 100644 --- a/llvm/lib/Target/AMDGPU/GCNProcessors.td +++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td @@ -204,6 +204,10 @@ def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel, FeatureISAVersion9_4_2.Features >; +def : ProcessorModel<"gfx950", SIDPGFX940FullSpeedModel, + FeatureISAVersion9_5_0.Features +>; + // [gfx900, gfx902, gfx904, gfx906, gfx909, gfx90c] def : ProcessorModel<"gfx9-generic", SIQuarterSpeedModel, FeatureISAVersion9_Generic.Features diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 6ff964077d8fd0..1b06756a8a1016 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -106,6 +106,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool GFX9Insts = false; bool GFX90AInsts = false; bool GFX940Insts = false; + bool GFX950Insts = false; bool GFX10Insts = false; bool GFX11Insts = false; bool GFX12Insts = false; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 55ba5ebbebb8fd..ffde4d33f1341a 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -96,6 +96,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) { case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: AK = GK_GFX941; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950: AK = GK_GFX950; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break; @@ -182,6 +183,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) { case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940; case GK_GFX941: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX941; case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942; + case GK_GFX950: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX950; case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010; case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011; case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012; diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index c743eb43e3465c..f7a66a08209397 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -1268,7 +1268,7 @@ multiclass VOP1_Real_vi <bits<10> op> { if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA then def _sdwa_vi : - VOP_SDWA_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, + VOP_SDWA8_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then @@ -1474,7 +1474,7 @@ def : GCNPat < // GFX9 //===----------------------------------------------------------------------===// -let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in { +let DecoderNamespace = "GFX9" in { multiclass VOP1_Real_gfx9 <bits<10> op> { defm NAME : VOP1_Real_e32e64_vi <op>; diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 925b60561c9d68..c0d38fa52b3446 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -766,16 +766,16 @@ defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, " defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32">; -let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in { +let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in { defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32">; defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32">; } -let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1, isAdd = 1 in { +let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1, isAdd = 1 in { defm V_ADD_U32 : VOP2Inst_VOPD <"v_add_u32", VOP_I32_I32_I32_ARITH, 0x10, "v_add_nc_u32", null_frag, "v_add_u32">; } -let isAdd = 1 in { +let isAdd = 1 in { defm V_ADD_CO_U32 : VO... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/116307 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits