Author: Nikita Popov Date: 2022-03-07T21:02:28-08:00 New Revision: 67555104d23aaef9b4ce4995ccb98b2ba9aff07d
URL: https://github.com/llvm/llvm-project/commit/67555104d23aaef9b4ce4995ccb98b2ba9aff07d DIFF: https://github.com/llvm/llvm-project/commit/67555104d23aaef9b4ce4995ccb98b2ba9aff07d.diff LOG: [MachineSink] Disable if there are any irreducible cycles This is an alternative to D120330, which disables MachineSink for functions with irreducible cycles entirely. This avoids both the correctness problem, and ensures we don't perform non-profitable sinks into cycles. At the same time, it may also disable profitable sinks in the same function. This can be made more precise by using MachineCycleInfo in the future. Fixes https://github.com/llvm/llvm-project/issues/53990. Differential Revision: https://reviews.llvm.org/D120800 (cherry picked from commit 6fde0439512580df793f3f48f95757b47de40d2b) Added: Modified: llvm/lib/CodeGen/MachineSink.cpp llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll llvm/test/CodeGen/X86/pr38795.ll llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll llvm/test/CodeGen/X86/x86-shrink-wrapping.ll Removed: ################################################################################ diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 0dbbc218e9464..bc03776bde19d 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -18,12 +18,14 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFG.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" @@ -429,6 +431,16 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); RegClassInfo.runOnMachineFunction(MF); + // MachineSink currently uses MachineLoopInfo, which only recognizes natural + // loops. As such, we could sink instructions into irreducible cycles, which + // would be non-profitable. + // WARNING: The current implementation of hasStoreBetween() is incorrect for + // sinking into irreducible cycles (PR53990), this bailout is currently + // necessary for correctness, not just profitability. + ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); + if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *LI)) + return false; + bool EverMadeChange = false; while (true) { diff --git a/llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll b/llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll index 024b6c608abab..f93e181d157c7 100644 --- a/llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll +++ b/llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll @@ -24,7 +24,7 @@ define dso_local void @n(i32* %o, i32 %p, i32 %u) nounwind { ; CHECK-NEXT: movq %r15, %rdi ; CHECK-NEXT: callq l ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: jne .LBB0_10 +; CHECK-NEXT: jne .LBB0_9 ; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: cmpl $0, e(%rip) @@ -44,21 +44,19 @@ define dso_local void @n(i32* %o, i32 %p, i32 %u) nounwind { ; CHECK-NEXT: callq i ; CHECK-NEXT: movl %eax, %ebp ; CHECK-NEXT: orl %r14d, %ebp -; CHECK-NEXT: testl %r13d, %r13d -; CHECK-NEXT: je .LBB0_6 -; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: andl $4, %ebx -; CHECK-NEXT: jmp .LBB0_3 -; CHECK-NEXT: .LBB0_6: # %if.end12 +; CHECK-NEXT: testl %r13d, %r13d +; CHECK-NEXT: jne .LBB0_3 +; CHECK-NEXT: # %bb.5: # %if.end12 ; CHECK-NEXT: testl %ebp, %ebp -; CHECK-NEXT: je .LBB0_9 -; CHECK-NEXT: # %bb.7: # %if.then14 +; CHECK-NEXT: je .LBB0_8 +; CHECK-NEXT: # %bb.6: # %if.then14 ; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: jmp .LBB0_10 +; CHECK-NEXT: jmp .LBB0_9 ; CHECK-NEXT: .Ltmp0: # Block address taken -; CHECK-NEXT: # %bb.8: # %if.then20.critedge +; CHECK-NEXT: # %bb.7: # %if.then20.critedge ; CHECK-NEXT: movl j(%rip), %edi ; CHECK-NEXT: movslq %eax, %rcx ; CHECK-NEXT: movl $1, %esi @@ -71,9 +69,9 @@ define dso_local void @n(i32* %o, i32 %p, i32 %u) nounwind { ; CHECK-NEXT: popq %r15 ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: jmp k # TAILCALL -; CHECK-NEXT: .LBB0_9: # %if.else +; CHECK-NEXT: .LBB0_8: # %if.else ; CHECK-NEXT: incq 0 -; CHECK-NEXT: .LBB0_10: # %cleanup +; CHECK-NEXT: .LBB0_9: # %cleanup ; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 diff --git a/llvm/test/CodeGen/X86/pr38795.ll b/llvm/test/CodeGen/X86/pr38795.ll index d805dcad8b6e6..b526e4f471b1a 100644 --- a/llvm/test/CodeGen/X86/pr38795.ll +++ b/llvm/test/CodeGen/X86/pr38795.ll @@ -32,13 +32,14 @@ define dso_local void @fn() { ; CHECK-NEXT: # implicit-def: $ebp ; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_16: # %for.inc +; CHECK-NEXT: .LBB0_15: # %for.inc ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: movb %dh, %dl ; CHECK-NEXT: .LBB0_1: # %for.cond ; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB0_20 Depth 2 +; CHECK-NEXT: # Child Loop BB0_19 Depth 2 ; CHECK-NEXT: cmpb $8, %dl ; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: ja .LBB0_3 @@ -55,7 +56,7 @@ define dso_local void @fn() { ; CHECK-NEXT: movb %cl, %dh ; CHECK-NEXT: movl $0, h ; CHECK-NEXT: cmpb $8, %dl -; CHECK-NEXT: jg .LBB0_8 +; CHECK-NEXT: jg .LBB0_9 ; CHECK-NEXT: # %bb.5: # %if.then13 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl %eax, %esi @@ -64,12 +65,10 @@ define dso_local void @fn() { ; CHECK-NEXT: calll printf ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: # implicit-def: $eax -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload -; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload ; CHECK-NEXT: movb %dh, %dl -; CHECK-NEXT: jne .LBB0_16 +; CHECK-NEXT: jne .LBB0_15 ; CHECK-NEXT: jmp .LBB0_6 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_3: # %if.then @@ -78,82 +77,82 @@ define dso_local void @fn() { ; CHECK-NEXT: calll printf ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload ; CHECK-NEXT: # implicit-def: $eax +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; CHECK-NEXT: jmp .LBB0_6 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_9: # %if.end21 +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: # implicit-def: $ebp +; CHECK-NEXT: jmp .LBB0_10 +; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_6: # %for.cond35 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movb %dl, %dh ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: je .LBB0_7 -; CHECK-NEXT: .LBB0_11: # %af +; CHECK-NEXT: movl %edi, %esi +; CHECK-NEXT: movl $0, %edi +; CHECK-NEXT: movb %cl, %dl +; CHECK-NEXT: je .LBB0_19 +; CHECK-NEXT: # %bb.7: # %af ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_12 -; CHECK-NEXT: .LBB0_17: # %if.end39 +; CHECK-NEXT: jne .LBB0_8 +; CHECK-NEXT: .LBB0_16: # %if.end39 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: je .LBB0_19 -; CHECK-NEXT: # %bb.18: # %if.then41 +; CHECK-NEXT: je .LBB0_18 +; CHECK-NEXT: # %bb.17: # %if.then41 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $fn, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $.str, (%esp) ; CHECK-NEXT: calll printf -; CHECK-NEXT: .LBB0_19: # %for.end46 +; CHECK-NEXT: .LBB0_18: # %for.end46 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movl %esi, %edi ; CHECK-NEXT: # implicit-def: $dl ; CHECK-NEXT: # implicit-def: $dh ; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: jmp .LBB0_20 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_8: # %if.end21 -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: jmp .LBB0_9 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: movb %dl, %dh -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_20: # %for.cond47 +; CHECK-NEXT: .LBB0_19: # %for.cond47 ; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_20 -; CHECK-NEXT: # %bb.21: # %for.cond47 -; CHECK-NEXT: # in Loop: Header=BB0_20 Depth=2 +; CHECK-NEXT: jne .LBB0_19 +; CHECK-NEXT: # %bb.20: # %for.cond47 +; CHECK-NEXT: # in Loop: Header=BB0_19 Depth=2 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_20 -; CHECK-NEXT: .LBB0_9: # %ae +; CHECK-NEXT: jne .LBB0_19 +; CHECK-NEXT: .LBB0_10: # %ae ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_10 -; CHECK-NEXT: # %bb.13: # %if.end26 +; CHECK-NEXT: jne .LBB0_11 +; CHECK-NEXT: # %bb.12: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: je .LBB0_16 -; CHECK-NEXT: # %bb.14: # %if.end26 +; CHECK-NEXT: je .LBB0_15 +; CHECK-NEXT: # %bb.13: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %ebp, %ebp -; CHECK-NEXT: jne .LBB0_16 -; CHECK-NEXT: # %bb.15: # %if.then31 +; CHECK-NEXT: jne .LBB0_15 +; CHECK-NEXT: # %bb.14: # %if.then31 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: jmp .LBB0_16 +; CHECK-NEXT: jmp .LBB0_15 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_10: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: .LBB0_11: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movl %edi, %esi ; CHECK-NEXT: # implicit-def: $eax ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: je .LBB0_17 -; CHECK-NEXT: .LBB0_12: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: je .LBB0_16 +; CHECK-NEXT: .LBB0_8: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: # implicit-def: $edi ; CHECK-NEXT: # implicit-def: $cl -; CHECK-NEXT: # kill: killed $cl ; CHECK-NEXT: # implicit-def: $dl ; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: jne .LBB0_11 -; CHECK-NEXT: jmp .LBB0_7 +; CHECK-NEXT: jmp .LBB0_6 entry: br label %for.cond diff --git a/llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll b/llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll index 3d7ff6cbe676a..4f56d7b16a879 100644 --- a/llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll +++ b/llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll @@ -7,18 +7,15 @@ define void @test(i1 %c, i64* %p, i64* noalias %p2) nounwind { ; CHECK-LABEL: test: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: pushq %r15 ; CHECK-NEXT: pushq %r14 ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: pushq %rax ; CHECK-NEXT: movq %rdx, %rbx -; CHECK-NEXT: movq %rsi, %r14 -; CHECK-NEXT: movl %edi, %r15d +; CHECK-NEXT: movl %edi, %r14d +; CHECK-NEXT: movq (%rsi), %rbp ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: jmpq *.LJTI0_0(,%rax,8) ; CHECK-NEXT: .LBB0_1: # %split.3 -; CHECK-NEXT: movq (%r14), %rbp -; CHECK-NEXT: testb $1, %r15b +; CHECK-NEXT: testb $1, %r14b ; CHECK-NEXT: je .LBB0_3 ; CHECK-NEXT: # %bb.2: # %clobber ; CHECK-NEXT: callq clobber@PLT diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll index 0f8bb837f82a5..b44895293b411 100644 --- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll +++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll @@ -1377,6 +1377,8 @@ define i32 @irreducibleCFG() #4 { ; ENABLE-NEXT: pushq %rbx ; ENABLE-NEXT: pushq %rax ; ENABLE-NEXT: .cfi_offset %rbx, -24 +; ENABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax +; ENABLE-NEXT: movl (%rax), %edi ; ENABLE-NEXT: movq _irreducibleCFGf@GOTPCREL(%rip), %rax ; ENABLE-NEXT: cmpb $0, (%rax) ; ENABLE-NEXT: je LBB16_2 @@ -1386,24 +1388,20 @@ define i32 @irreducibleCFG() #4 { ; ENABLE-NEXT: jmp LBB16_1 ; ENABLE-NEXT: LBB16_2: ## %split ; ENABLE-NEXT: movq _irreducibleCFGb@GOTPCREL(%rip), %rax +; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: cmpl $0, (%rax) -; ENABLE-NEXT: je LBB16_3 -; ENABLE-NEXT: ## %bb.4: ## %for.body4.i -; ENABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax -; ENABLE-NEXT: movl (%rax), %edi +; ENABLE-NEXT: je LBB16_4 +; ENABLE-NEXT: ## %bb.3: ## %for.body4.i ; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: xorl %eax, %eax ; ENABLE-NEXT: callq _something -; ENABLE-NEXT: jmp LBB16_5 -; ENABLE-NEXT: LBB16_3: -; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: .p2align 4, 0x90 -; ENABLE-NEXT: LBB16_5: ## %for.inc +; ENABLE-NEXT: LBB16_4: ## %for.inc ; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 ; ENABLE-NEXT: incl %ebx ; ENABLE-NEXT: cmpl $7, %ebx -; ENABLE-NEXT: jl LBB16_5 -; ENABLE-NEXT: ## %bb.6: ## %fn1.exit +; ENABLE-NEXT: jl LBB16_4 +; ENABLE-NEXT: ## %bb.5: ## %fn1.exit ; ENABLE-NEXT: xorl %eax, %eax ; ENABLE-NEXT: addq $8, %rsp ; ENABLE-NEXT: popq %rbx @@ -1420,6 +1418,8 @@ define i32 @irreducibleCFG() #4 { ; DISABLE-NEXT: pushq %rbx ; DISABLE-NEXT: pushq %rax ; DISABLE-NEXT: .cfi_offset %rbx, -24 +; DISABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax +; DISABLE-NEXT: movl (%rax), %edi ; DISABLE-NEXT: movq _irreducibleCFGf@GOTPCREL(%rip), %rax ; DISABLE-NEXT: cmpb $0, (%rax) ; DISABLE-NEXT: je LBB16_2 @@ -1429,24 +1429,20 @@ define i32 @irreducibleCFG() #4 { ; DISABLE-NEXT: jmp LBB16_1 ; DISABLE-NEXT: LBB16_2: ## %split ; DISABLE-NEXT: movq _irreducibleCFGb@GOTPCREL(%rip), %rax +; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: cmpl $0, (%rax) -; DISABLE-NEXT: je LBB16_3 -; DISABLE-NEXT: ## %bb.4: ## %for.body4.i -; DISABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax -; DISABLE-NEXT: movl (%rax), %edi +; DISABLE-NEXT: je LBB16_4 +; DISABLE-NEXT: ## %bb.3: ## %for.body4.i ; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: xorl %eax, %eax ; DISABLE-NEXT: callq _something -; DISABLE-NEXT: jmp LBB16_5 -; DISABLE-NEXT: LBB16_3: -; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: .p2align 4, 0x90 -; DISABLE-NEXT: LBB16_5: ## %for.inc +; DISABLE-NEXT: LBB16_4: ## %for.inc ; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 ; DISABLE-NEXT: incl %ebx ; DISABLE-NEXT: cmpl $7, %ebx -; DISABLE-NEXT: jl LBB16_5 -; DISABLE-NEXT: ## %bb.6: ## %fn1.exit +; DISABLE-NEXT: jl LBB16_4 +; DISABLE-NEXT: ## %bb.5: ## %fn1.exit ; DISABLE-NEXT: xorl %eax, %eax ; DISABLE-NEXT: addq $8, %rsp ; DISABLE-NEXT: popq %rbx _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits