Author: hans Date: Mon Jul 22 12:54:33 2019 New Revision: 366729 URL: http://llvm.org/viewvc/llvm-project?rev=366729&view=rev Log: Merging r366570: ------------------------------------------------------------------------ r366570 | lkail | 2019-07-19 14:58:16 +0200 (Fri, 19 Jul 2019) | 9 lines
[MachineCSE][MachinePRE] Avoid hoisting code from code regions into hot BBs. Summary: Current PRE hoists common computations into CMBB = DT->findNearestCommonDominator(MBB, MBB1). However, if CMBB is in a hot loop body, we might get performance degradation. Differential Revision: https://reviews.llvm.org/D64394 ------------------------------------------------------------------------ Modified: llvm/branches/release_90/ (props changed) llvm/branches/release_90/lib/CodeGen/MachineCSE.cpp llvm/branches/release_90/test/CodeGen/AArch64/O3-pipeline.ll llvm/branches/release_90/test/CodeGen/ARM/O3-pipeline.ll llvm/branches/release_90/test/CodeGen/PowerPC/machine-pre.ll llvm/branches/release_90/test/CodeGen/X86/O3-pipeline.ll Propchange: llvm/branches/release_90/ ------------------------------------------------------------------------------ --- svn:mergeinfo (original) +++ svn:mergeinfo Mon Jul 22 12:54:33 2019 @@ -1,3 +1,3 @@ /llvm/branches/Apple/Pertwee:110850,110961 /llvm/branches/type-system-rewrite:133420-134817 -/llvm/trunk:155241,366431 +/llvm/trunk:155241,366431,366570 Modified: llvm/branches/release_90/lib/CodeGen/MachineCSE.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_90/lib/CodeGen/MachineCSE.cpp?rev=366729&r1=366728&r2=366729&view=diff ============================================================================== --- llvm/branches/release_90/lib/CodeGen/MachineCSE.cpp (original) +++ llvm/branches/release_90/lib/CodeGen/MachineCSE.cpp Mon Jul 22 12:54:33 2019 @@ -21,6 +21,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFG.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -66,6 +67,7 @@ namespace { AliasAnalysis *AA; MachineDominatorTree *DT; MachineRegisterInfo *MRI; + MachineBlockFrequencyInfo *MBFI; public: static char ID; // Pass identification @@ -83,6 +85,8 @@ namespace { AU.addPreservedID(MachineLoopInfoID); AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addPreserved<MachineBlockFrequencyInfo>(); } void releaseMemory() override { @@ -133,6 +137,11 @@ namespace { bool isPRECandidate(MachineInstr *MI); bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB); bool PerformSimplePRE(MachineDominatorTree *DT); + /// Heuristics to see if it's beneficial to move common computations of MBB + /// and MBB1 to CandidateBB. + bool isBeneficalToHoistInto(MachineBasicBlock *CandidateBB, + MachineBasicBlock *MBB, + MachineBasicBlock *MBB1); }; } // end anonymous namespace @@ -802,6 +811,9 @@ bool MachineCSE::ProcessBlockPRE(Machine if (!CMBB->isLegalToHoistInto()) continue; + if (!isBeneficalToHoistInto(CMBB, MBB, MBB1)) + continue; + // Two instrs are partial redundant if their basic blocks are reachable // from one to another but one doesn't dominate another. if (CMBB != MBB1) { @@ -854,6 +866,18 @@ bool MachineCSE::PerformSimplePRE(Machin return Changed; } +bool MachineCSE::isBeneficalToHoistInto(MachineBasicBlock *CandidateBB, + MachineBasicBlock *MBB, + MachineBasicBlock *MBB1) { + if (CandidateBB->getParent()->getFunction().hasMinSize()) + return true; + assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB"); + assert(DT->dominates(CandidateBB, MBB1) && + "CandidateBB should dominate MBB1"); + return MBFI->getBlockFreq(CandidateBB) <= + MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1); +} + bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -863,6 +887,7 @@ bool MachineCSE::runOnMachineFunction(Ma MRI = &MF.getRegInfo(); AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); DT = &getAnalysis<MachineDominatorTree>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); LookAheadLimit = TII->getMachineCSELookAheadLimit(); bool ChangedPRE, ChangedCSE; ChangedPRE = PerformSimplePRE(DT); Modified: llvm/branches/release_90/test/CodeGen/AArch64/O3-pipeline.ll URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_90/test/CodeGen/AArch64/O3-pipeline.ll?rev=366729&r1=366728&r2=366729&view=diff ============================================================================== --- llvm/branches/release_90/test/CodeGen/AArch64/O3-pipeline.ll (original) +++ llvm/branches/release_90/test/CodeGen/AArch64/O3-pipeline.ll Mon Jul 22 12:54:33 2019 @@ -98,9 +98,9 @@ ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: Early Machine Loop Invariant Code Motion +; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction -; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions Modified: llvm/branches/release_90/test/CodeGen/ARM/O3-pipeline.ll URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_90/test/CodeGen/ARM/O3-pipeline.ll?rev=366729&r1=366728&r2=366729&view=diff ============================================================================== --- llvm/branches/release_90/test/CodeGen/ARM/O3-pipeline.ll (original) +++ llvm/branches/release_90/test/CodeGen/ARM/O3-pipeline.ll Mon Jul 22 12:54:33 2019 @@ -72,9 +72,9 @@ ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: Early Machine Loop Invariant Code Motion +; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction -; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions Modified: llvm/branches/release_90/test/CodeGen/PowerPC/machine-pre.ll URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_90/test/CodeGen/PowerPC/machine-pre.ll?rev=366729&r1=366728&r2=366729&view=diff ============================================================================== --- llvm/branches/release_90/test/CodeGen/PowerPC/machine-pre.ll (original) +++ llvm/branches/release_90/test/CodeGen/PowerPC/machine-pre.ll Mon Jul 22 12:54:33 2019 @@ -8,25 +8,25 @@ define i32 @t(i32 %n, i32 %delta, i32 %a ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lis r7, 0 ; CHECK-P9-NEXT: li r6, 0 +; CHECK-P9-NEXT: li r8, 0 ; CHECK-P9-NEXT: li r9, 0 -; CHECK-P9-NEXT: li r10, 0 ; CHECK-P9-NEXT: ori r7, r7, 65535 ; CHECK-P9-NEXT: .p2align 5 ; CHECK-P9-NEXT: .LBB0_1: # %header ; CHECK-P9-NEXT: # -; CHECK-P9-NEXT: addi r10, r10, 1 -; CHECK-P9-NEXT: cmpw r10, r3 -; CHECK-P9-NEXT: addi r8, r5, 1024 +; CHECK-P9-NEXT: addi r9, r9, 1 +; CHECK-P9-NEXT: cmpw r9, r3 ; CHECK-P9-NEXT: blt cr0, .LBB0_4 ; CHECK-P9-NEXT: # %bb.2: # %cont ; CHECK-P9-NEXT: # -; CHECK-P9-NEXT: add r9, r9, r4 -; CHECK-P9-NEXT: cmpw r9, r7 +; CHECK-P9-NEXT: add r8, r8, r4 +; CHECK-P9-NEXT: cmpw r8, r7 ; CHECK-P9-NEXT: bgt cr0, .LBB0_1 ; CHECK-P9-NEXT: # %bb.3: # %cont.1 -; CHECK-P9-NEXT: mr r6, r8 +; CHECK-P9-NEXT: addi r6, r5, 1024 ; CHECK-P9-NEXT: .LBB0_4: # %return -; CHECK-P9-NEXT: mullw r3, r6, r8 +; CHECK-P9-NEXT: addi r3, r5, 1024 +; CHECK-P9-NEXT: mullw r3, r6, r3 ; CHECK-P9-NEXT: blr entry: br label %header @@ -75,16 +75,19 @@ define dso_local signext i32 @foo(i32 si ; CHECK-P9-NEXT: lis r3, 21845 ; CHECK-P9-NEXT: add r28, r30, r29 ; CHECK-P9-NEXT: ori r27, r3, 21846 -; CHECK-P9-NEXT: b .LBB1_3 +; CHECK-P9-NEXT: b .LBB1_4 ; CHECK-P9-NEXT: .p2align 4 ; CHECK-P9-NEXT: .LBB1_1: # %sw.bb3 ; CHECK-P9-NEXT: # -; CHECK-P9-NEXT: add r28, r3, r28 +; CHECK-P9-NEXT: mulli r3, r30, 23 ; CHECK-P9-NEXT: .LBB1_2: # %sw.epilog ; CHECK-P9-NEXT: # +; CHECK-P9-NEXT: add r28, r3, r28 +; CHECK-P9-NEXT: .LBB1_3: # %sw.epilog +; CHECK-P9-NEXT: # ; CHECK-P9-NEXT: cmpwi r28, 1025 -; CHECK-P9-NEXT: bge cr0, .LBB1_6 -; CHECK-P9-NEXT: .LBB1_3: # %while.cond +; CHECK-P9-NEXT: bge cr0, .LBB1_7 +; CHECK-P9-NEXT: .LBB1_4: # %while.cond ; CHECK-P9-NEXT: # ; CHECK-P9-NEXT: extsw r3, r29 ; CHECK-P9-NEXT: bl bar @@ -101,41 +104,40 @@ define dso_local signext i32 @foo(i32 si ; CHECK-P9-NEXT: add r4, r4, r5 ; CHECK-P9-NEXT: slwi r5, r4, 1 ; CHECK-P9-NEXT: add r4, r4, r5 -; CHECK-P9-NEXT: subf r5, r4, r3 -; CHECK-P9-NEXT: mulli r4, r29, 13 -; CHECK-P9-NEXT: mulli r3, r30, 23 -; CHECK-P9-NEXT: cmplwi r5, 1 +; CHECK-P9-NEXT: subf r3, r4, r3 +; CHECK-P9-NEXT: cmplwi r3, 1 ; CHECK-P9-NEXT: beq cr0, .LBB1_1 -; CHECK-P9-NEXT: # %bb.4: # %while.cond -; CHECK-P9-NEXT: # -; CHECK-P9-NEXT: cmplwi r5, 0 -; CHECK-P9-NEXT: bne cr0, .LBB1_2 -; CHECK-P9-NEXT: # %bb.5: # %sw.bb +; CHECK-P9-NEXT: # %bb.5: # %while.cond ; CHECK-P9-NEXT: # -; CHECK-P9-NEXT: add r28, r4, r28 -; CHECK-P9-NEXT: cmpwi r28, 1025 -; CHECK-P9-NEXT: blt cr0, .LBB1_3 -; CHECK-P9-NEXT: .LBB1_6: # %while.end -; CHECK-P9-NEXT: lis r5, -13108 -; CHECK-P9-NEXT: ori r5, r5, 52429 -; CHECK-P9-NEXT: mullw r5, r28, r5 -; CHECK-P9-NEXT: lis r6, 13107 -; CHECK-P9-NEXT: ori r6, r6, 13108 -; CHECK-P9-NEXT: cmplw r5, r6 -; CHECK-P9-NEXT: blt cr0, .LBB1_8 -; CHECK-P9-NEXT: # %bb.7: # %if.then8 -; CHECK-P9-NEXT: extsw r4, r4 -; CHECK-P9-NEXT: extsw r5, r28 +; CHECK-P9-NEXT: cmplwi r3, 0 +; CHECK-P9-NEXT: bne cr0, .LBB1_3 +; CHECK-P9-NEXT: # %bb.6: # %sw.bb +; CHECK-P9-NEXT: # +; CHECK-P9-NEXT: mulli r3, r29, 13 +; CHECK-P9-NEXT: b .LBB1_2 +; CHECK-P9-NEXT: .LBB1_7: # %while.end +; CHECK-P9-NEXT: lis r3, -13108 +; CHECK-P9-NEXT: ori r3, r3, 52429 +; CHECK-P9-NEXT: mullw r3, r28, r3 +; CHECK-P9-NEXT: lis r4, 13107 +; CHECK-P9-NEXT: ori r4, r4, 13108 +; CHECK-P9-NEXT: cmplw r3, r4 +; CHECK-P9-NEXT: blt cr0, .LBB1_9 +; CHECK-P9-NEXT: # %bb.8: # %if.then8 +; CHECK-P9-NEXT: mulli r3, r29, 13 +; CHECK-P9-NEXT: mulli r5, r30, 23 +; CHECK-P9-NEXT: extsw r4, r28 ; CHECK-P9-NEXT: extsw r3, r3 +; CHECK-P9-NEXT: extsw r5, r5 +; CHECK-P9-NEXT: sub r3, r4, r3 ; CHECK-P9-NEXT: sub r4, r5, r4 -; CHECK-P9-NEXT: sub r3, r3, r5 -; CHECK-P9-NEXT: rldicl r4, r4, 1, 63 ; CHECK-P9-NEXT: rldicl r3, r3, 1, 63 -; CHECK-P9-NEXT: or r3, r4, r3 -; CHECK-P9-NEXT: b .LBB1_9 -; CHECK-P9-NEXT: .LBB1_8: # %cleanup20 -; CHECK-P9-NEXT: li r3, 0 +; CHECK-P9-NEXT: rldicl r4, r4, 1, 63 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: b .LBB1_10 ; CHECK-P9-NEXT: .LBB1_9: # %cleanup20 +; CHECK-P9-NEXT: li r3, 0 +; CHECK-P9-NEXT: .LBB1_10: # %cleanup20 ; CHECK-P9-NEXT: addi r1, r1, 80 ; CHECK-P9-NEXT: ld r0, 16(r1) ; CHECK-P9-NEXT: mtlr r0 Modified: llvm/branches/release_90/test/CodeGen/X86/O3-pipeline.ll URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_90/test/CodeGen/X86/O3-pipeline.ll?rev=366729&r1=366728&r2=366729&view=diff ============================================================================== --- llvm/branches/release_90/test/CodeGen/X86/O3-pipeline.ll (original) +++ llvm/branches/release_90/test/CodeGen/X86/O3-pipeline.ll Mon Jul 22 12:54:33 2019 @@ -84,9 +84,9 @@ ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: Early Machine Loop Invariant Code Motion +; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction -; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits