https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/117286
>From 7c92bcc28e7327c0637caa0d744d1e4d20eace02 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Mon, 18 Mar 2024 13:45:22 +0530 Subject: [PATCH] AMDGPU: Handle vcmpx+permalane gfx950 hazard Confusingly, this is a different hazard to the one on gfx10 with a subtarget feature. --- .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 35 ++++- llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 + llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir | 144 ++++++++++++++++++ 3 files changed, 176 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 8008b5f7bcc991..45ff1f4a63cf03 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -168,7 +168,11 @@ static bool isPermlane(const MachineInstr &MI) { Opcode == AMDGPU::V_PERMLANE64_B32 || Opcode == AMDGPU::V_PERMLANEX16_B32_e64 || Opcode == AMDGPU::V_PERMLANE16_VAR_B32_e64 || - Opcode == AMDGPU::V_PERMLANEX16_VAR_B32_e64; + Opcode == AMDGPU::V_PERMLANEX16_VAR_B32_e64 || + Opcode == AMDGPU::V_PERMLANE16_SWAP_B32_e32 || + Opcode == AMDGPU::V_PERMLANE16_SWAP_B32_e64 || + Opcode == AMDGPU::V_PERMLANE32_SWAP_B32_e32 || + Opcode == AMDGPU::V_PERMLANE32_SWAP_B32_e64; } static bool isLdsDma(const MachineInstr &MI) { @@ -395,6 +399,9 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) { SIInstrInfo::isDS(*MI)) return std::max(WaitStates, checkMAILdStHazards(MI)); + if (ST.hasGFX950Insts() && isPermlane(*MI)) + return std::max(WaitStates, checkPermlaneHazards(MI)); + return WaitStates; } @@ -1200,6 +1207,14 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) { fixRequiredExportPriority(MI); } +static bool isVCmpXWritesExec(const SIInstrInfo &TII, + const SIRegisterInfo &TRI, + const MachineInstr &MI) { + return (TII.isVOPC(MI) || + (MI.isCompare() && (TII.isVOP3(MI) || TII.isSDWA(MI)))) && + MI.modifiesRegister(AMDGPU::EXEC, &TRI); +} + bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) { if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI)) return false; @@ -1207,9 +1222,7 @@ bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) { const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); auto IsHazardFn = [TII, TRI](const MachineInstr &MI) { - return (TII->isVOPC(MI) || - ((TII->isVOP3(MI) || TII->isSDWA(MI)) && MI.isCompare())) && - MI.modifiesRegister(AMDGPU::EXEC, TRI); + return isVCmpXWritesExec(*TII, *TRI, MI); }; auto IsExpiredFn = [](const MachineInstr &MI, int) { @@ -2529,6 +2542,20 @@ int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) { return WaitStatesNeeded; } +int GCNHazardRecognizer::checkPermlaneHazards(MachineInstr *MI) { + assert(!ST.hasVcmpxPermlaneHazard() && + "this is a different vcmpx+permlane hazard"); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + const SIInstrInfo *TII = ST.getInstrInfo(); + + auto IsVCmpXWritesExecFn = [TII, TRI](const MachineInstr &MI) { + return isVCmpXWritesExec(*TII, *TRI, MI); + }; + + const int NumWaitStates = 4; + return NumWaitStates - getWaitStatesSince(IsVCmpXWritesExecFn, NumWaitStates); +} + static int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) { // 2 pass -> 4 // 4 pass -> 6 diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h index adb2278c48eebe..83ce100c58f0a6 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -134,6 +134,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer { int checkMFMAPadding(MachineInstr *MI); int checkMAIVALUHazards(MachineInstr *MI); int checkMAILdStHazards(MachineInstr *MI); + int checkPermlaneHazards(MachineInstr *MI); public: GCNHazardRecognizer(const MachineFunction &MF); diff --git a/llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir b/llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir new file mode 100644 index 00000000000000..97bef7be711ff2 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir @@ -0,0 +1,144 @@ +# RUN: llc -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs -run-pass=post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s + +--- +# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop1 +# GCN: V_CMPX_EQ_I32_e32 +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_PERMLANE +name: vcmpx_vopc_write_exec_permlane16_swap_vop1 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec +... + +--- +# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane16_swap_vop1 +# GCN: V_CMPX_EQ_I32_e64 +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_PERMLANE +name: vcmpx_vop3_write_exec_permlane16_swap_vop1 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + $exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec +... + +--- +# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop3 +# GCN: V_CMPX_EQ_I32_e32 +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_PERMLANE +name: vcmpx_vopc_write_exec_permlane16_swap_vop3 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec +... + +--- +# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane16_swap_vop3 +# GCN: V_CMPX_EQ_I32_e64 +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_PERMLANE +name: vcmpx_vop3_write_exec_permlane16_swap_vop3 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + $exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec +... + +--- +# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane32_swap_vop1 +# GCN: V_CMPX_EQ_I32_e32 +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_PERMLANE +name: vcmpx_vopc_write_exec_permlane32_swap_vop1 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec +... + +--- +# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane32_swap_vop1 +# GCN: V_CMPX_EQ_I32_e64 +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_PERMLANE +name: vcmpx_vop3_write_exec_permlane32_swap_vop1 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + $exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec +... + +--- +# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane32_swap_vop3 +# GCN: V_CMPX_EQ_I32_e32 +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_PERMLANE +name: vcmpx_vopc_write_exec_permlane32_swap_vop3 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec +... + +--- +# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane32_swap_vop3 +# GCN: V_CMPX_EQ_I32_e64 +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_PERMLANE +name: vcmpx_vop3_write_exec_permlane32_swap_vop3 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + $exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec +... + +--- +# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop1__nowait +# GCN: V_CMPX_EQ_I32_e32 +# GCN-NEXT: V_MOV_B32 +# GCN-NEXT: V_MOV_B32 +# GCN-NEXT: V_MOV_B32 +# GCN-NEXT: V_MOV_B32 +# GCN-NEXT: V_PERMLANE +name: vcmpx_vopc_write_exec_permlane16_swap_vop1__nowait +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec + $vgpr2 = V_MOV_B32_e32 0, implicit $exec + $vgpr3 = V_MOV_B32_e32 0, implicit $exec + $vgpr4 = V_MOV_B32_e32 0, implicit $exec + $vgpr5 = V_MOV_B32_e32 0, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec +... + +--- +# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop1__wait1 +# GCN: V_CMPX_EQ_I32_e32 +# GCN-NEXT: V_MOV_B32 +# GCN-NEXT: V_MOV_B32 +# GCN-NEXT: V_MOV_B32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_PERMLANE +name: vcmpx_vopc_write_exec_permlane16_swap_vop1__wait1 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec + $vgpr2 = V_MOV_B32_e32 0, implicit $exec + $vgpr3 = V_MOV_B32_e32 0, implicit $exec + $vgpr4 = V_MOV_B32_e32 0, implicit $exec + renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec +... _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits