https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/95924
>From 5ac4ff3040f8a5a6cc68efffe3349ef9d181ddec Mon Sep 17 00:00:00 2001 From: Wang Pengcheng <[email protected]> Date: Tue, 18 Jun 2024 21:33:25 +0800 Subject: [PATCH 1/3] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?= =?UTF-8?q?itial=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.6-beta.1 --- llvm/include/llvm/CodeGen/MachineScheduler.h | 43 ++++++++-- llvm/lib/CodeGen/MachineScheduler.cpp | 34 +------- llvm/lib/Target/RISCV/CMakeLists.txt | 1 + .../Target/RISCV/RISCVMachineScheduler.cpp | 83 +++++++++++++++++++ llvm/lib/Target/RISCV/RISCVMachineScheduler.h | 42 ++++++++++ llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 8 +- llvm/test/CodeGen/RISCV/rvv/schedule.ll | 49 +++++++++++ 7 files changed, 215 insertions(+), 45 deletions(-) create mode 100644 llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp create mode 100644 llvm/lib/Target/RISCV/RISCVMachineScheduler.h create mode 100644 llvm/test/CodeGen/RISCV/rvv/schedule.ll diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h index b15abf040058e..d1b5b83e5300b 100644 --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -1349,14 +1349,6 @@ class PostGenericScheduler : public GenericSchedulerBase { void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand); }; -/// Create the standard converging machine scheduler. This will be used as the -/// default scheduler if the target does not set a default. -/// Adds default DAG mutations. -ScheduleDAGMILive *createGenericSchedLive(MachineSchedContext *C); - -/// Create a generic scheduler with no vreg liveness or DAG mutation passes. -ScheduleDAGMI *createGenericSchedPostRA(MachineSchedContext *C); - /// If ReorderWhileClustering is set to true, no attempt will be made to /// reduce reordering due to store clustering. std::unique_ptr<ScheduleDAGMutation> @@ -1375,6 +1367,41 @@ std::unique_ptr<ScheduleDAGMutation> createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI); +/// Create the standard converging machine scheduler. This will be used as the +/// default scheduler if the target does not set a default. +/// Adds default DAG mutations. +template <typename Strategy = GenericScheduler> +ScheduleDAGMILive *createGenericSchedLive(MachineSchedContext *C) { + ScheduleDAGMILive *DAG = + new ScheduleDAGMILive(C, std::make_unique<Strategy>(C)); + // Register DAG post-processors. + // + // FIXME: extend the mutation API to allow earlier mutations to instantiate + // data and pass it to later mutations. Have a single mutation that gathers + // the interesting nodes in one pass. + DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); + + const TargetSubtargetInfo &STI = C->MF->getSubtarget(); + // Add MacroFusion mutation if fusions are not empty. + const auto &MacroFusions = STI.getMacroFusions(); + if (!MacroFusions.empty()) + DAG->addMutation(createMacroFusionDAGMutation(MacroFusions)); + return DAG; +} + +/// Create a generic scheduler with no vreg liveness or DAG mutation passes. +template <typename Strategy = PostGenericScheduler> +ScheduleDAGMI *createGenericSchedPostRA(MachineSchedContext *C) { + ScheduleDAGMI *DAG = new ScheduleDAGMI(C, std::make_unique<Strategy>(C), + /*RemoveKillFlags=*/true); + const TargetSubtargetInfo &STI = C->MF->getSubtarget(); + // Add MacroFusion mutation if fusions are not empty. + const auto &MacroFusions = STI.getMacroFusions(); + if (!MacroFusions.empty()) + DAG->addMutation(createMacroFusionDAGMutation(MacroFusions)); + return DAG; +} + } // end namespace llvm #endif // LLVM_CODEGEN_MACHINESCHEDULER_H diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index cf72f74380835..ac792ad4d5484 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -2701,7 +2701,7 @@ void SchedBoundary::bumpNode(SUnit *SU) { unsigned NextCycle = CurrCycle; switch (SchedModel->getMicroOpBufferSize()) { case 0: - assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); + // assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); break; case 1: if (ReadyCycle > NextCycle) { @@ -3847,26 +3847,6 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { } } -/// Create the standard converging machine scheduler. This will be used as the -/// default scheduler if the target does not set a default. -ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) { - ScheduleDAGMILive *DAG = - new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C)); - // Register DAG post-processors. - // - // FIXME: extend the mutation API to allow earlier mutations to instantiate - // data and pass it to later mutations. Have a single mutation that gathers - // the interesting nodes in one pass. - DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); - - const TargetSubtargetInfo &STI = C->MF->getSubtarget(); - // Add MacroFusion mutation if fusions are not empty. - const auto &MacroFusions = STI.getMacroFusions(); - if (!MacroFusions.empty()) - DAG->addMutation(createMacroFusionDAGMutation(MacroFusions)); - return DAG; -} - static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { return createGenericSchedLive(C); } @@ -4139,18 +4119,6 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { } } -ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) { - ScheduleDAGMI *DAG = - new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C), - /*RemoveKillFlags=*/true); - const TargetSubtargetInfo &STI = C->MF->getSubtarget(); - // Add MacroFusion mutation if fusions are not empty. - const auto &MacroFusions = STI.getMacroFusions(); - if (!MacroFusions.empty()) - DAG->addMutation(createMacroFusionDAGMutation(MacroFusions)); - return DAG; -} - //===----------------------------------------------------------------------===// // ILP Scheduler. Currently for experimental analysis of heuristics. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index 8715403f3839a..fe3f213b253f7 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -44,6 +44,7 @@ add_llvm_target(RISCVCodeGen RISCVISelDAGToDAG.cpp RISCVISelLowering.cpp RISCVMachineFunctionInfo.cpp + RISCVMachineScheduler.cpp RISCVMergeBaseOffset.cpp RISCVOptWInstrs.cpp RISCVPostRAExpandPseudoInsts.cpp diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp new file mode 100644 index 0000000000000..d993d840c3d3a --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp @@ -0,0 +1,83 @@ +//===- RISCVMachineScheduler.cpp - MI Scheduler for RISC-V ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RISCVMachineScheduler.h" +#include "MCTargetDesc/RISCVBaseInfo.h" +#include "MCTargetDesc/RISCVMCTargetDesc.h" +#include "RISCVInstrInfo.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Support/Debug.h" +#include "llvm/TargetParser/RISCVTargetParser.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-prera-sched-strategy" + +static cl::opt<bool> EnableScheduleSameVType( + "riscv-enable-schedule-same-vtype", cl::init(false), cl::Hidden, + cl::desc("Enable scheduling RVV instructions with same vtype first")); + +SUnit *RISCVPreRAMachineSchedStrategy::pickNode(bool &IsTopNode) { + if (EnableScheduleSameVType) { + for (SUnit *SU : Bot.Available) { + MachineInstr *MI = SU->getInstr(); + const MCInstrDesc &Desc = MI->getDesc(); + if (RISCVII::hasSEWOp(Desc.TSFlags)) { + unsigned CurVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm(); + RISCVII::VLMUL CurVLMUL = RISCVII::getLMul(Desc.TSFlags); + if (CurVSEW == PrevVSEW && CurVLMUL == PrevVLMUL) { + Bot.removeReady(SU); + IsTopNode = true; + return SU; + } + } + } + for (SUnit *SU : Bot.Pending) { + MachineInstr *MI = SU->getInstr(); + const MCInstrDesc &Desc = MI->getDesc(); + if (RISCVII::hasSEWOp(Desc.TSFlags)) { + unsigned CurVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm(); + RISCVII::VLMUL CurVLMUL = RISCVII::getLMul(Desc.TSFlags); + if (CurVSEW == PrevVSEW && CurVLMUL == PrevVLMUL) { + Bot.removeReady(SU); + IsTopNode = false; + return SU; + } + } + } + } + return GenericScheduler::pickNode(IsTopNode); +} + +bool RISCVPreRAMachineSchedStrategy::tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary *Zone) const { + bool OriginalResult = GenericScheduler::tryCandidate(Cand, TryCand, Zone); + + return OriginalResult; +} + +void RISCVPreRAMachineSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { + GenericScheduler::schedNode(SU, IsTopNode); + MachineInstr *MI = SU->getInstr(); + const MCInstrDesc &Desc = MI->getDesc(); + if (RISCVII::hasSEWOp(Desc.TSFlags)) { + PrevVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm(); + PrevVLMUL = RISCVII::getLMul(Desc.TSFlags); + } + LLVM_DEBUG(dbgs() << "Previous scheduled Unit: "; + dbgs() << "SU(" << SU->NodeNum << ") - "; SU->getInstr()->dump();); + LLVM_DEBUG(dbgs() << "Previous VSEW : " << (1 << PrevVSEW) << "\n"; + auto LMUL = RISCVVType::decodeVLMUL(PrevVLMUL); + dbgs() << "Previous VLMUL: m" << (LMUL.second ? "f" : "") + << LMUL.first << "\n";); +} diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.h b/llvm/lib/Target/RISCV/RISCVMachineScheduler.h new file mode 100644 index 0000000000000..bd806cef57dcb --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.h @@ -0,0 +1,42 @@ +//===--- RISCVMachineScheduler.h - Custom RISC-V MI scheduler ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Custom RISC-V MI scheduler. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_RISCV_RISCVMACHINESCHEDULER_H +#define LLVM_LIB_TARGET_RISCV_RISCVMACHINESCHEDULER_H + +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/TargetParser/RISCVTargetParser.h" + +namespace llvm { + +/// A GenericScheduler implementation for RISCV pre RA scheduling. +class RISCVPreRAMachineSchedStrategy : public GenericScheduler { +private: + RISCVII::VLMUL PrevVLMUL; + unsigned PrevVSEW; + +public: + RISCVPreRAMachineSchedStrategy(const MachineSchedContext *C) + : GenericScheduler(C) {} + +protected: + SUnit *pickNode(bool &IsTopNode) override; + + bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) const override; + + void schedNode(SUnit *SU, bool IsTopNode) override; +}; + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 35d0b3408d09f..e0dcbbddc3f53 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -14,6 +14,7 @@ #include "MCTargetDesc/RISCVBaseInfo.h" #include "RISCV.h" #include "RISCVMachineFunctionInfo.h" +#include "RISCVMachineScheduler.h" #include "RISCVTargetObjectFile.h" #include "RISCVTargetTransformInfo.h" #include "TargetInfo/RISCVTargetInfo.h" @@ -340,12 +341,11 @@ class RISCVPassConfig : public TargetPassConfig { ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const override { - ScheduleDAGMILive *DAG = nullptr; - if (EnableMISchedLoadClustering) { - DAG = createGenericSchedLive(C); + ScheduleDAGMILive *DAG = + createGenericSchedLive<RISCVPreRAMachineSchedStrategy>(C); + if (EnableMISchedLoadClustering) DAG->addMutation(createLoadClusterDAGMutation( DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true)); - } return DAG; } diff --git a/llvm/test/CodeGen/RISCV/rvv/schedule.ll b/llvm/test/CodeGen/RISCV/rvv/schedule.ll new file mode 100644 index 0000000000000..baf15ef400df5 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/schedule.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=DEFAULT +; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=SAME-VTYPE-FIRST + +define <vscale x 1 x i64> @test(<vscale x 1 x i64> %v64_0, <vscale x 1 x i64> %v64_1, <vscale x 1 x i32> %v32_0, <vscale x 1 x i32> %v32_1) { +; DEFAULT-LABEL: test: +; DEFAULT: # %bb.0: # %entry +; DEFAULT-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; DEFAULT-NEXT: vdiv.vv v12, v8, v9 +; DEFAULT-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; DEFAULT-NEXT: vdiv.vv v13, v10, v11 +; DEFAULT-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; DEFAULT-NEXT: vadd.vv v8, v8, v9 +; DEFAULT-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; DEFAULT-NEXT: vadd.vv v9, v10, v11 +; DEFAULT-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; DEFAULT-NEXT: vadd.vv v8, v8, v12 +; DEFAULT-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; DEFAULT-NEXT: vadd.vv v9, v9, v13 +; DEFAULT-NEXT: vwadd.wv v8, v8, v9 +; DEFAULT-NEXT: ret +; +; SAME-VTYPE-FIRST-LABEL: test: +; SAME-VTYPE-FIRST: # %bb.0: # %entry +; SAME-VTYPE-FIRST-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; SAME-VTYPE-FIRST-NEXT: vadd.vv v12, v8, v9 +; SAME-VTYPE-FIRST-NEXT: vdiv.vv v8, v8, v9 +; SAME-VTYPE-FIRST-NEXT: vadd.vv v8, v12, v8 +; SAME-VTYPE-FIRST-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; SAME-VTYPE-FIRST-NEXT: vadd.vv v9, v10, v11 +; SAME-VTYPE-FIRST-NEXT: vdiv.vv v10, v10, v11 +; SAME-VTYPE-FIRST-NEXT: vadd.vv v9, v9, v10 +; SAME-VTYPE-FIRST-NEXT: vwadd.wv v8, v8, v9 +; SAME-VTYPE-FIRST-NEXT: ret +entry: + %0 = add <vscale x 1 x i64> %v64_0, %v64_1 + %1 = add <vscale x 1 x i32> %v32_0, %v32_1 + %2 = sdiv <vscale x 1 x i64> %v64_0, %v64_1 + %3 = sdiv <vscale x 1 x i32> %v32_0, %v32_1 + %4 = add <vscale x 1 x i64> %0, %2 + %5 = add <vscale x 1 x i32> %1, %3 + + %6 = sext <vscale x 1 x i32> %5 to <vscale x 1 x i64> + %7 = add <vscale x 1 x i64> %4, %6 + ret <vscale x 1 x i64> %7 +} + >From 185e0f8266c2dec9a161328c6c14490fe3cffa69 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng <[email protected]> Date: Wed, 19 Jun 2024 18:45:50 +0800 Subject: [PATCH 2/3] Support buttomup/topdown/bidirectional and fix some failures Created using spr 1.3.6-beta.1 --- .../Target/RISCV/RISCVMachineScheduler.cpp | 79 ++++++++--- llvm/test/CodeGen/RISCV/rvv/schedule.ll | 125 +++++++++++++++--- 2 files changed, 165 insertions(+), 39 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp index d993d840c3d3a..530d4f6b2d845 100644 --- a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp +++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp @@ -27,34 +27,68 @@ static cl::opt<bool> EnableScheduleSameVType( cl::desc("Enable scheduling RVV instructions with same vtype first")); SUnit *RISCVPreRAMachineSchedStrategy::pickNode(bool &IsTopNode) { - if (EnableScheduleSameVType) { - for (SUnit *SU : Bot.Available) { + auto FindPotentialRVVInstructionInQueue = + [&](SchedBoundary &Boundary, ReadyQueue Q, bool ShouldBeTop) -> SUnit * { + for (SUnit *SU : Q) { + if (SU->isScheduled) + continue; + MachineInstr *MI = SU->getInstr(); const MCInstrDesc &Desc = MI->getDesc(); if (RISCVII::hasSEWOp(Desc.TSFlags)) { unsigned CurVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm(); RISCVII::VLMUL CurVLMUL = RISCVII::getLMul(Desc.TSFlags); + // FIXME: We should consider vl and policy here. if (CurVSEW == PrevVSEW && CurVLMUL == PrevVLMUL) { - Bot.removeReady(SU); - IsTopNode = true; + IsTopNode = ShouldBeTop; + // Boundary.removeReady(SU); + if (SU->isTopReady()) + Top.removeReady(SU); + if (SU->isBottomReady()) + Bot.removeReady(SU); + LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " + << *SU->getInstr()); return SU; } } } - for (SUnit *SU : Bot.Pending) { - MachineInstr *MI = SU->getInstr(); - const MCInstrDesc &Desc = MI->getDesc(); - if (RISCVII::hasSEWOp(Desc.TSFlags)) { - unsigned CurVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm(); - RISCVII::VLMUL CurVLMUL = RISCVII::getLMul(Desc.TSFlags); - if (CurVSEW == PrevVSEW && CurVLMUL == PrevVLMUL) { - Bot.removeReady(SU); - IsTopNode = false; - return SU; - } - } + return nullptr; + }; + + auto FindPotentialRVVInstruction = [&](SchedBoundary &Boundary, + bool ShouldBeTop) -> SUnit * { + if (SUnit *Available = FindPotentialRVVInstructionInQueue( + Boundary, Boundary.Available, ShouldBeTop)) + return Available; + if (SUnit *Pending = FindPotentialRVVInstructionInQueue( + Boundary, Boundary.Pending, ShouldBeTop)) + return Pending; + return nullptr; + }; + + if (EnableScheduleSameVType) { + if (RegionPolicy.OnlyBottomUp) { + if (SUnit *SU = FindPotentialRVVInstruction(Bot, false)) + return SU; + } else if (RegionPolicy.OnlyTopDown) { + if (SUnit *SU = FindPotentialRVVInstruction(Top, true)) + return SU; + } else { + if (SUnit *SU = + FindPotentialRVVInstructionInQueue(Bot, Bot.Available, false)) + return SU; + if (SUnit *SU = + FindPotentialRVVInstructionInQueue(Top, Top.Available, true)) + return SU; + if (SUnit *SU = + FindPotentialRVVInstructionInQueue(Bot, Bot.Pending, false)) + return SU; + if (SUnit *SU = + FindPotentialRVVInstructionInQueue(Top, Top.Pending, true)) + return SU; } } + return GenericScheduler::pickNode(IsTopNode); } @@ -73,11 +107,12 @@ void RISCVPreRAMachineSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { if (RISCVII::hasSEWOp(Desc.TSFlags)) { PrevVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm(); PrevVLMUL = RISCVII::getLMul(Desc.TSFlags); + LLVM_DEBUG(dbgs() << "Previous scheduled Unit: "; + dbgs() << "SU(" << SU->NodeNum << ") - "; + SU->getInstr()->dump();); + LLVM_DEBUG(dbgs() << "Previous VSEW : " << (1 << PrevVSEW) << "\n"; + auto LMUL = RISCVVType::decodeVLMUL(PrevVLMUL); + dbgs() << "Previous VLMUL: m" << (LMUL.second ? "f" : "") + << LMUL.first << "\n";); } - LLVM_DEBUG(dbgs() << "Previous scheduled Unit: "; - dbgs() << "SU(" << SU->NodeNum << ") - "; SU->getInstr()->dump();); - LLVM_DEBUG(dbgs() << "Previous VSEW : " << (1 << PrevVSEW) << "\n"; - auto LMUL = RISCVVType::decodeVLMUL(PrevVLMUL); - dbgs() << "Previous VLMUL: m" << (LMUL.second ? "f" : "") - << LMUL.first << "\n";); } diff --git a/llvm/test/CodeGen/RISCV/rvv/schedule.ll b/llvm/test/CodeGen/RISCV/rvv/schedule.ll index baf15ef400df5..6b466d802ac4a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/schedule.ll +++ b/llvm/test/CodeGen/RISCV/rvv/schedule.ll @@ -1,15 +1,33 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefix=DEFAULT -; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefix=SAME-VTYPE-FIRST +; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -misched-bottomup=true -misched-topdown=false \ +; RUN: -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=SAME-VTYPE-FIRST-BOTTOMUP +; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -misched-bottomup=false -misched-topdown=true \ +; RUN: -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=SAME-VTYPE-FIRST-TOPDOWN +; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -misched-bottomup=false -misched-topdown=false \ +; RUN: -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=SAME-VTYPE-FIRST-BIDIRECTIONAL -define <vscale x 1 x i64> @test(<vscale x 1 x i64> %v64_0, <vscale x 1 x i64> %v64_1, <vscale x 1 x i32> %v32_0, <vscale x 1 x i32> %v32_1) { +declare void @consume(i64 %scalar, <vscale x 1 x i64> %vector) + +define void @test(i64 %a, i64 %b, <vscale x 1 x i64> %v64_0, <vscale x 1 x i64> %v64_1, <vscale x 1 x i32> %v32_0, <vscale x 1 x i32> %v32_1) { ; DEFAULT-LABEL: test: ; DEFAULT: # %bb.0: # %entry -; DEFAULT-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; DEFAULT-NEXT: addi sp, sp, -16 +; DEFAULT-NEXT: .cfi_def_cfa_offset 16 +; DEFAULT-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; DEFAULT-NEXT: .cfi_offset ra, -8 +; DEFAULT-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; DEFAULT-NEXT: vdiv.vv v12, v8, v9 ; DEFAULT-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; DEFAULT-NEXT: div a2, a0, a1 +; DEFAULT-NEXT: add a3, a0, a1 +; DEFAULT-NEXT: mul a0, a0, a1 +; DEFAULT-NEXT: add a0, a0, a3 +; DEFAULT-NEXT: add a0, a0, a2 ; DEFAULT-NEXT: vdiv.vv v13, v10, v11 ; DEFAULT-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; DEFAULT-NEXT: vadd.vv v8, v8, v9 @@ -20,30 +38,103 @@ define <vscale x 1 x i64> @test(<vscale x 1 x i64> %v64_0, <vscale x 1 x i64> %v ; DEFAULT-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; DEFAULT-NEXT: vadd.vv v9, v9, v13 ; DEFAULT-NEXT: vwadd.wv v8, v8, v9 +; DEFAULT-NEXT: call consume +; DEFAULT-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; DEFAULT-NEXT: addi sp, sp, 16 ; DEFAULT-NEXT: ret ; -; SAME-VTYPE-FIRST-LABEL: test: -; SAME-VTYPE-FIRST: # %bb.0: # %entry -; SAME-VTYPE-FIRST-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; SAME-VTYPE-FIRST-NEXT: vadd.vv v12, v8, v9 -; SAME-VTYPE-FIRST-NEXT: vdiv.vv v8, v8, v9 -; SAME-VTYPE-FIRST-NEXT: vadd.vv v8, v12, v8 -; SAME-VTYPE-FIRST-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; SAME-VTYPE-FIRST-NEXT: vadd.vv v9, v10, v11 -; SAME-VTYPE-FIRST-NEXT: vdiv.vv v10, v10, v11 -; SAME-VTYPE-FIRST-NEXT: vadd.vv v9, v9, v10 -; SAME-VTYPE-FIRST-NEXT: vwadd.wv v8, v8, v9 -; SAME-VTYPE-FIRST-NEXT: ret +; SAME-VTYPE-FIRST-BOTTOMUP-LABEL: test: +; SAME-VTYPE-FIRST-BOTTOMUP: # %bb.0: # %entry +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: addi sp, sp, -16 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: .cfi_def_cfa_offset 16 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: .cfi_offset ra, -8 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vsetvli a2, zero, e64, m1, ta, ma +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vadd.vv v12, v8, v9 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: div a2, a0, a1 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: add a3, a0, a1 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vdiv.vv v8, v8, v9 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: mul a0, a0, a1 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: add a0, a0, a3 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: add a0, a0, a2 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vadd.vv v8, v12, v8 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vadd.vv v9, v10, v11 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vdiv.vv v10, v10, v11 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vadd.vv v9, v9, v10 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vwadd.wv v8, v8, v9 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: call consume +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: addi sp, sp, 16 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: ret +; +; SAME-VTYPE-FIRST-TOPDOWN-LABEL: test: +; SAME-VTYPE-FIRST-TOPDOWN: # %bb.0: # %entry +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: addi sp, sp, -16 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: .cfi_def_cfa_offset 16 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: .cfi_offset ra, -8 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vsetvli a3, zero, e32, mf2, ta, ma +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vadd.vv v12, v10, v11 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vdiv.vv v10, v10, v11 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: add a2, a0, a1 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: mul a3, a0, a1 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: div a0, a0, a1 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: add a2, a2, a3 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vadd.vv v10, v12, v10 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vadd.vv v11, v8, v9 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vdiv.vv v8, v8, v9 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: add a0, a0, a2 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vadd.vv v8, v11, v8 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vwadd.wv v8, v8, v10 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: call consume +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: addi sp, sp, 16 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: ret +; +; SAME-VTYPE-FIRST-BIDIRECTIONAL-LABEL: test: +; SAME-VTYPE-FIRST-BIDIRECTIONAL: # %bb.0: # %entry +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: addi sp, sp, -16 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: .cfi_def_cfa_offset 16 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: .cfi_offset ra, -8 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vsetvli a2, zero, e32, mf2, ta, ma +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vadd.vv v12, v10, v11 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vdiv.vv v10, v10, v11 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: div a2, a0, a1 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: add a3, a0, a1 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: mul a0, a0, a1 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: add a0, a0, a3 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vadd.vv v11, v8, v9 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vdiv.vv v8, v8, v9 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: add a0, a0, a2 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vadd.vv v8, v11, v8 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vadd.vv v9, v12, v10 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vwadd.wv v8, v8, v9 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: call consume +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: addi sp, sp, 16 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: ret entry: %0 = add <vscale x 1 x i64> %v64_0, %v64_1 + %scalar0 = add i64 %a, %b %1 = add <vscale x 1 x i32> %v32_0, %v32_1 %2 = sdiv <vscale x 1 x i64> %v64_0, %v64_1 + %scalar1 = mul i64 %a, %b %3 = sdiv <vscale x 1 x i32> %v32_0, %v32_1 %4 = add <vscale x 1 x i64> %0, %2 + %scalar2 = sdiv i64 %a, %b %5 = add <vscale x 1 x i32> %1, %3 %6 = sext <vscale x 1 x i32> %5 to <vscale x 1 x i64> + %scalar3 = add i64 %scalar0, %scalar1 %7 = add <vscale x 1 x i64> %4, %6 - ret <vscale x 1 x i64> %7 + %scalar4 = add i64 %scalar2, %scalar3 + call void @consume(i64 %scalar4, <vscale x 1 x i64> %7) + ret void } >From b86faa86e6e8e661e460ccbe20048141ff0b2c13 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng <[email protected]> Date: Tue, 2 Dec 2025 17:00:12 +0800 Subject: [PATCH 3/3] Revert llvm/lib/CodeGen/MachineScheduler.cpp change Created using spr 1.3.6-beta.1 --- llvm/lib/CodeGen/MachineScheduler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index efe64788c5efa..de29a9fab876e 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -2964,7 +2964,7 @@ void SchedBoundary::bumpNode(SUnit *SU) { unsigned NextCycle = CurrCycle; switch (SchedModel->getMicroOpBufferSize()) { case 0: - // assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); + assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); break; case 1: if (ReadyCycle > NextCycle) { _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
