Author: Kai Yan Date: 2024-09-01T10:00:16+02:00 New Revision: 06d009789f771e8cef82714549c3136e320312be
URL: https://github.com/llvm/llvm-project/commit/06d009789f771e8cef82714549c3136e320312be DIFF: https://github.com/llvm/llvm-project/commit/06d009789f771e8cef82714549c3136e320312be.diff LOG: [llvm][CodeGen] Fixed a bug in stall cycle calculation for window scheduler (#99451) Fixed a bug in stall cycle calculation. When a register defined by an instruction in the current iteration is used by an instruction in the next iteration, we have modified the number of stall cycle that need to be inserted. Added: llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir Modified: llvm/lib/CodeGen/WindowScheduler.cpp Removed: ################################################################################ diff --git a/llvm/lib/CodeGen/WindowScheduler.cpp b/llvm/lib/CodeGen/WindowScheduler.cpp index 3fe8a1aaafd128..02275afff4e6dd 100644 --- a/llvm/lib/CodeGen/WindowScheduler.cpp +++ b/llvm/lib/CodeGen/WindowScheduler.cpp @@ -488,6 +488,7 @@ int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG, // ======================================== int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) { int MaxStallCycle = 0; + int CurrentII = MaxCycle + 1; auto Range = getScheduleRange(Offset, SchedInstrNum); for (auto &MI : Range) { auto *SU = TripleDAG->getSUnit(&MI); @@ -495,8 +496,8 @@ int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) { for (auto &Succ : SU->Succs) { if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU) continue; - // If the expected cycle does not exceed MaxCycle, no check is needed. - if (DefCycle + (int)Succ.getLatency() <= MaxCycle) + // If the expected cycle does not exceed CurrentII, no check is needed. + if (DefCycle + (int)Succ.getLatency() <= CurrentII) continue; // If the cycle of the scheduled MI A is less than that of the scheduled // MI B, the scheduling will fail because the lifetime of the @@ -506,7 +507,7 @@ int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) { if (DefCycle < UseCycle) return WindowIILimit; // Get the stall cycle introduced by the register between two trips. - int StallCycle = DefCycle + (int)Succ.getLatency() - MaxCycle - UseCycle; + int StallCycle = DefCycle + (int)Succ.getLatency() - CurrentII - UseCycle; MaxStallCycle = std::max(MaxStallCycle, StallCycle); } } diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir b/llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir new file mode 100644 index 00000000000000..ddba67d78eb58c --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir @@ -0,0 +1,59 @@ +# REQUIRES: asserts +# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \ +# RUN: -window-sched=force -filetype=null -verify-machineinstrs \ +# RUN: -window-region-limit=1 -window-search-ratio=100 -window- diff -limit=0 \ +# RUN: 2>&1 | FileCheck %s + +# CHECK-LABEL: Start analyzing II +# CHECK: MaxStallCycle is 0 +# CHECK-LABEL: Start analyzing II +# CHECK: MaxStallCycle is 0 +# CHECK-LABEL: Start analyzing II +# CHECK: MaxStallCycle is 0 + +--- +name: test_window_stall_cycle +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.3(0x40000000), %bb.1(0x40000000) + liveins: $r0, $r1 + + %0:intregs = COPY $r1 + %1:intregs = COPY $r0 + %2:intregs = nsw A2_add %0, %1 + %3:intregs = S2_lsr_i_r_acc %2, %2, 31 + %4:intregs = S2_asr_i_r killed %3, 1 + %5:predregs = C2_cmpgt %1, %4 + %6:intregs = A2_tfrsi 0 + J2_jumpt killed %5, %bb.3, implicit-def dead $pc + J2_jump %bb.1, implicit-def dead $pc + + bb.1: + successors: %bb.2(0x80000000) + + %7:intregs = A2_addi %4, 2 + %8:intregs = A2_tfrsi 0 + %9:intregs = A2_sub %4, %1 + %10:intregs = A2_addi %9, 1 + %11:intregs = COPY %10 + J2_loop0r %bb.2, %11, implicit-def $lc0, implicit-def $sa0, implicit-def $usr + + bb.2 (machine-block-address-taken): + successors: %bb.3(0x04000000), %bb.2(0x7c000000) + + %12:intregs = PHI %7, %bb.1, %13, %bb.2 + %14:intregs = PHI %8, %bb.1, %15, %bb.2 + %16:intregs = PHI %8, %bb.1, %17, %bb.2 + %18:intregs, %13:intregs = L2_loadri_pi %12, -4 + %17:intregs = nsw A2_add killed %18, %16 + %15:intregs = A2_max %17, %14 + ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 + J2_jump %bb.3, implicit-def dead $pc + + bb.3: + %19:intregs = PHI %6, %bb.0, %15, %bb.2 + $r0 = COPY %19 + PS_jmpret $r31, implicit-def dead $pc, implicit $r0 + +... _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits