lebedev.ri updated this revision to Diff 352176.
lebedev.ri retitled this revision from "[NewPM] Remove SpeculateAroundPHIs pass 
from pipeline" to "[NewPM] Remove SpeculateAroundPHIs pass".
lebedev.ri added a comment.
Herald added a subscriber: mgorny.

On Tue, Jun 15, 2021 at 8:14 PM Wei Mi <w...@google.com> wrote:

> On Mon, Jun 14, 2021 at 4:52 PM Wei Mi <w...@google.com> wrote:
>
>> On Mon, Jun 14, 2021 at 4:04 PM Xinliang David Li <davi...@google.com> wrote:
>>
>>> On Mon, Jun 14, 2021 at 3:59 PM Roman Lebedev via Phabricator 
>>> <revi...@reviews.llvm.org> wrote:
>>>
>>>> lebedev.ri added a subscriber: MaskRay.
>>>> lebedev.ri added a comment.
>>>>
>>>> In D104099#2815531 <https://reviews.llvm.org/D104099#2815531> 
>>>> https://reviews.llvm.org/D104099#2815531, @wenlei wrote:
>>>>
>>>>> In D104099#2814167 <https://reviews.llvm.org/D104099#2814167> 
>>>>> https://reviews.llvm.org/D104099#2814167, @davidxl wrote:
>>>>>
>>>>>> Adding Wei to help measure performance impact on our internal workloads. 
>>>>>>  Also add Wenlei to help measure impact with FB's workloads.
>>>>>
>>>>> Measured perf using FB internal workload w/ and w/o this pass, result is 
>>>>> neutral.
>>>>
>>>> Thank you for checking!
>>>>
>>>> So far, it seems the reaction to this proposal has been overwhelmingly 
>>>> positive.
>>>> Does anyone else wish to chime in? Should i land this? @asbirlea @MaskRay ?
>>>
>>> Wei is doing more measurement @google. Please wait for the response.
>>>
>>> David
>>
>> Start doing the test. Will report back.
>>
>> Wei.
>
> No performance change found in google internal benchmarks.
>
>  Wei.

Thanks for checking.
So, so far no one can point out why this pass is beneficial? :]

How about this then?

>>  
>>
>>>> Repository:
>>>>   rG <https://reviews.llvm.org/source/llvm-github/> LLVM Github Monorepo
>>>>
>>>> CHANGES SINCE LAST ACTION
>>>>   https://reviews.llvm.org/D104099/new/
>>>>
>>>> https://reviews.llvm.org/D104099


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104099/new/

https://reviews.llvm.org/D104099

Files:
  clang/test/CodeGen/thinlto-distributed-newpm.ll
  llvm/include/llvm/Transforms/Scalar/SpeculateAroundPHIs.h
  llvm/lib/Passes/PassBuilder.cpp
  llvm/lib/Passes/PassRegistry.def
  llvm/lib/Transforms/Scalar/CMakeLists.txt
  llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
  llvm/test/Other/new-pm-defaults.ll
  llvm/test/Other/new-pm-thinlto-defaults.ll
  llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
  llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
  llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll
  
llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
  llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll
  llvm/test/Transforms/SpeculateAroundPHIs/basic-x86.ll
  llvm/test/Transforms/SpeculateAroundPHIs/convergent.ll
  llvm/test/Transforms/SpeculateAroundPHIs/pr42991.ll
  llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn

Index: llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn
===================================================================
--- llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn
+++ llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn
@@ -83,7 +83,6 @@
     "SimpleLoopUnswitch.cpp",
     "SimplifyCFGPass.cpp",
     "Sink.cpp",
-    "SpeculateAroundPHIs.cpp",
     "SpeculativeExecution.cpp",
     "StraightLineStrengthReduce.cpp",
     "StructurizeCFG.cpp",
Index: llvm/test/Transforms/SpeculateAroundPHIs/pr42991.ll
===================================================================
--- llvm/test/Transforms/SpeculateAroundPHIs/pr42991.ll
+++ /dev/null
@@ -1,44 +0,0 @@
-; RUN: opt -S -passes=spec-phis %s
-
-; This testcase crashes during the speculate around PHIs pass. The pass however
-; results in no changes.
-
-define i32 @test1() {
-entry:
-  callbr void asm sideeffect "", "X,X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@test1, %return), i8* blockaddress(@test1, %f))
-          to label %asm.fallthrough [label %return, label %f]
-
-asm.fallthrough:
-  br label %return
-
-f:
-  br label %return
-
-return:
-  %retval.0 = phi i32 [ 0, %f ], [ 1, %asm.fallthrough ], [ 1, %entry ]
-  ret i32 %retval.0
-}
-
-define void @test2() {
-entry:
-  br label %tailrecurse
-
-tailrecurse:
-  %call = tail call i32 @test3()
-  %tobool1 = icmp eq i32 %call, 0
-  callbr void asm sideeffect "", "X,X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@test2, %test1.exit), i8* blockaddress(@test2, %f.i))
-          to label %if.end6 [label %test1.exit, label %f.i]
-
-f.i:
-  br label %test1.exit
-
-test1.exit:
-  %retval.0.i = phi i1 [ false, %f.i ], [ true, %tailrecurse ]
-  %brmerge = or i1 %tobool1, %retval.0.i
-  br i1 %brmerge, label %if.end6, label %tailrecurse
-
-if.end6:
-  ret void
-}
-
-declare i32 @test3()
Index: llvm/test/Transforms/SpeculateAroundPHIs/convergent.ll
===================================================================
--- llvm/test/Transforms/SpeculateAroundPHIs/convergent.ll
+++ /dev/null
@@ -1,98 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=spec-phis < %s | FileCheck %s
-; Make sure convergent and noduplicate calls aren't duplicated.
-
-declare i32 @llvm.convergent(i32) #0
-declare i32 @llvm.noduplicate(i32) #1
-declare i32 @llvm.regular(i32) #2
-
-define i32 @test_convergent(i1 %flag, i32 %arg) #0 {
-; CHECK-LABEL: @test_convergent(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]]
-; CHECK:       a:
-; CHECK-NEXT:    br label [[EXIT:%.*]]
-; CHECK:       b:
-; CHECK-NEXT:    br label [[EXIT]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 7, [[A]] ], [ 11, [[B]] ]
-; CHECK-NEXT:    [[SUM:%.*]] = call i32 @llvm.convergent(i32 [[P]])
-; CHECK-NEXT:    ret i32 [[SUM]]
-;
-entry:
-  br i1 %flag, label %a, label %b
-
-a:
-  br label %exit
-
-b:
-  br label %exit
-
-exit:
-  %p = phi i32 [ 7, %a ], [ 11, %b ]
-  %sum = call i32 @llvm.convergent(i32 %p)
-  ret i32 %sum
-}
-
-define i32 @test_noduplicate(i1 %flag, i32 %arg) #1 {
-; CHECK-LABEL: @test_noduplicate(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]]
-; CHECK:       a:
-; CHECK-NEXT:    br label [[EXIT:%.*]]
-; CHECK:       b:
-; CHECK-NEXT:    br label [[EXIT]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 7, [[A]] ], [ 11, [[B]] ]
-; CHECK-NEXT:    [[SUM:%.*]] = call i32 @llvm.noduplicate(i32 [[P]])
-; CHECK-NEXT:    ret i32 [[SUM]]
-;
-entry:
-  br i1 %flag, label %a, label %b
-
-a:
-  br label %exit
-
-b:
-  br label %exit
-
-exit:
-  %p = phi i32 [ 7, %a ], [ 11, %b ]
-  %sum = call i32 @llvm.noduplicate(i32 %p)
-  ret i32 %sum
-}
-
-; Otherwise identical function which should be transformed.
-define i32 @test_reference(i1 %flag, i32 %arg) #2 {
-; CHECK-LABEL: @test_reference(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]]
-; CHECK:       a:
-; CHECK-NEXT:    [[SUM_0:%.*]] = call i32 @llvm.regular(i32 7)
-; CHECK-NEXT:    br label [[EXIT:%.*]]
-; CHECK:       b:
-; CHECK-NEXT:    [[SUM_1:%.*]] = call i32 @llvm.regular(i32 11)
-; CHECK-NEXT:    br label [[EXIT]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[SUM_PHI:%.*]] = phi i32 [ [[SUM_0]], [[A]] ], [ [[SUM_1]], [[B]] ]
-; CHECK-NEXT:    ret i32 [[SUM_PHI]]
-;
-entry:
-  br i1 %flag, label %a, label %b
-
-a:
-  br label %exit
-
-b:
-  br label %exit
-
-exit:
-  %p = phi i32 [ 7, %a ], [ 11, %b ]
-  %sum = call i32 @llvm.regular(i32 %p)
-  ret i32 %sum
-}
-
-
-attributes #0 = { nounwind readnone convergent speculatable }
-attributes #1 = { nounwind readnone noduplicate speculatable }
-attributes #2 = { nounwind readnone speculatable }
Index: llvm/test/Transforms/SpeculateAroundPHIs/basic-x86.ll
===================================================================
--- llvm/test/Transforms/SpeculateAroundPHIs/basic-x86.ll
+++ /dev/null
@@ -1,639 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; Test the basic functionality of speculating around PHI nodes based on reduced
-; cost of the constant operands to the PHI nodes using the x86 cost model.
-;
-; REQUIRES: x86-registered-target
-; RUN: opt -S -passes=spec-phis < %s | FileCheck %s
-
-target triple = "x86_64-unknown-unknown"
-
-define i32 @test_basic(i1 %flag, i32 %arg) {
-; CHECK-LABEL: @test_basic(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]]
-; CHECK:       a:
-; CHECK-NEXT:    [[SUM_0:%.*]] = add i32 [[ARG:%.*]], 7
-; CHECK-NEXT:    br label [[EXIT:%.*]]
-; CHECK:       b:
-; CHECK-NEXT:    [[SUM_1:%.*]] = add i32 [[ARG]], 11
-; CHECK-NEXT:    br label [[EXIT]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[SUM_PHI:%.*]] = phi i32 [ [[SUM_0]], [[A]] ], [ [[SUM_1]], [[B]] ]
-; CHECK-NEXT:    ret i32 [[SUM_PHI]]
-;
-entry:
-  br i1 %flag, label %a, label %b
-
-a:
-  br label %exit
-
-b:
-  br label %exit
-
-exit:
-  %p = phi i32 [ 7, %a ], [ 11, %b ]
-  %sum = add i32 %arg, %p
-  ret i32 %sum
-}
-
-; Check that we handle commuted operands and get the constant onto the RHS.
-define i32 @test_commuted(i1 %flag, i32 %arg) {
-; CHECK-LABEL: @test_commuted(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]]
-; CHECK:       a:
-; CHECK-NEXT:    [[SUM_0:%.*]] = add i32 [[ARG:%.*]], 7
-; CHECK-NEXT:    br label [[EXIT:%.*]]
-; CHECK:       b:
-; CHECK-NEXT:    [[SUM_1:%.*]] = add i32 [[ARG]], 11
-; CHECK-NEXT:    br label [[EXIT]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[SUM_PHI:%.*]] = phi i32 [ [[SUM_0]], [[A]] ], [ [[SUM_1]], [[B]] ]
-; CHECK-NEXT:    ret i32 [[SUM_PHI]]
-;
-entry:
-  br i1 %flag, label %a, label %b
-
-a:
-  br label %exit
-
-b:
-  br label %exit
-
-exit:
-  %p = phi i32 [ 7, %a ], [ 11, %b ]
-  %sum = add i32 %p, %arg
-  ret i32 %sum
-}
-
-define i32 @test_split_crit_edge(i1 %flag, i32 %arg) {
-; CHECK-LABEL: @test_split_crit_edge(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[ENTRY_EXIT_CRIT_EDGE:%.*]], label [[A:%.*]]
-; CHECK:       entry.exit_crit_edge:
-; CHECK-NEXT:    [[SUM_0:%.*]] = add i32 [[ARG:%.*]], 7
-; CHECK-NEXT:    br label [[EXIT:%.*]]
-; CHECK:       a:
-; CHECK-NEXT:    [[SUM_1:%.*]] = add i32 [[ARG]], 11
-; CHECK-NEXT:    br label [[EXIT]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[SUM_PHI:%.*]] = phi i32 [ [[SUM_0]], [[ENTRY_EXIT_CRIT_EDGE]] ], [ [[SUM_1]], [[A]] ]
-; CHECK-NEXT:    ret i32 [[SUM_PHI]]
-;
-entry:
-  br i1 %flag, label %exit, label %a
-
-a:
-  br label %exit
-
-exit:
-  %p = phi i32 [ 7, %entry ], [ 11, %a ]
-  %sum = add i32 %arg, %p
-  ret i32 %sum
-}
-
-define i32 @test_no_spec_dominating_inst(i1 %flag, i32* %ptr) {
-; CHECK-LABEL: @test_no_spec_dominating_inst(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[LOAD:%.*]] = load i32, i32* [[PTR:%.*]]
-; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]]
-; CHECK:       a:
-; CHECK-NEXT:    [[SUM_0:%.*]] = add i32 [[LOAD]], 7
-; CHECK-NEXT:    br label [[EXIT:%.*]]
-; CHECK:       b:
-; CHECK-NEXT:    [[SUM_1:%.*]] = add i32 [[LOAD]], 11
-; CHECK-NEXT:    br label [[EXIT]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[SUM_PHI:%.*]] = phi i32 [ [[SUM_0]], [[A]] ], [ [[SUM_1]], [[B]] ]
-; CHECK-NEXT:    ret i32 [[SUM_PHI]]
-;
-entry:
-  %load = load i32, i32* %ptr
-  br i1 %flag, label %a, label %b
-
-a:
-  br label %exit
-
-b:
-  br label %exit
-
-exit:
-  %p = phi i32 [ 7, %a ], [ 11, %b ]
-  %sum = add i32 %load, %p
-  ret i32 %sum
-}
-
-; We have special logic handling PHI nodes, make sure it doesn't get confused
-; by a dominating PHI.
-define i32 @test_no_spec_dominating_phi(i1 %flag1, i1 %flag2, i32 %x, i32 %y) {
-; CHECK-LABEL: @test_no_spec_dominating_phi(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[FLAG1:%.*]], label [[X_BLOCK:%.*]], label [[Y_BLOCK:%.*]]
-; CHECK:       x.block:
-; CHECK-NEXT:    br label [[MERGE:%.*]]
-; CHECK:       y.block:
-; CHECK-NEXT:    br label [[MERGE]]
-; CHECK:       merge:
-; CHECK-NEXT:    [[XY_PHI:%.*]] = phi i32 [ [[X:%.*]], [[X_BLOCK]] ], [ [[Y:%.*]], [[Y_BLOCK]] ]
-; CHECK-NEXT:    br i1 [[FLAG2:%.*]], label [[A:%.*]], label [[B:%.*]]
-; CHECK:       a:
-; CHECK-NEXT:    [[SUM_0:%.*]] = add i32 [[XY_PHI]], 7
-; CHECK-NEXT:    br label [[EXIT:%.*]]
-; CHECK:       b:
-; CHECK-NEXT:    [[SUM_1:%.*]] = add i32 [[XY_PHI]], 11
-; CHECK-NEXT:    br label [[EXIT]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[SUM_PHI:%.*]] = phi i32 [ [[SUM_0]], [[A]] ], [ [[SUM_1]], [[B]] ]
-; CHECK-NEXT:    ret i32 [[SUM_PHI]]
-;
-entry:
-  br i1 %flag1, label %x.block, label %y.block
-
-x.block:
-  br label %merge
-
-y.block:
-  br label %merge
-
-merge:
-  %xy.phi = phi i32 [ %x, %x.block ], [ %y, %y.block ]
-  br i1 %flag2, label %a, label %b
-
-a:
-  br label %exit
-
-b:
-  br label %exit
-
-exit:
-  %p = phi i32 [ 7, %a ], [ 11, %b ]
-  %sum = add i32 %xy.phi, %p
-  ret i32 %sum
-}
-
-; Ensure that we will speculate some number of "free" instructions on the given
-; architecture even though they are unrelated to the PHI itself.
-define i32 @test_speculate_free_insts(i1 %flag, i64 %arg) {
-; CHECK-LABEL: @test_speculate_free_insts(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]]
-; CHECK:       a:
-; CHECK-NEXT:    [[T1_0:%.*]] = trunc i64 [[ARG:%.*]] to i48
-; CHECK-NEXT:    [[T2_0:%.*]] = trunc i48 [[T1_0]] to i32
-; CHECK-NEXT:    [[SUM_0:%.*]] = add i32 [[T2_0]], 7
-; CHECK-NEXT:    br label [[EXIT:%.*]]
-; CHECK:       b:
-; CHECK-NEXT:    [[T1_1:%.*]] = trunc i64 [[ARG]] to i48
-; CHECK-NEXT:    [[T2_1:%.*]] = trunc i48 [[T1_1]] to i32
-; CHECK-NEXT:    [[SUM_1:%.*]] = add i32 [[T2_1]], 11
-; CHECK-NEXT:    br label [[EXIT]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[SUM_PHI:%.*]] = phi i32 [ [[SUM_0]], [[A]] ], [ [[SUM_1]], [[B]] ]
-; CHECK-NEXT:    ret i32 [[SUM_PHI]]
-;
-entry:
-  br i1 %flag, label %a, label %b
-
-a:
-  br label %exit
-
-b:
-  br label %exit
-
-exit:
-  %p = phi i32 [ 7, %a ], [ 11, %b ]
-  %t1 = trunc i64 %arg to i48
-  %t2 = trunc i48 %t1 to i32
-  %sum = add i32 %t2, %p
-  ret i32 %sum
-}
-
-define i32 @test_speculate_free_phis(i1 %flag, i32 %arg1, i32 %arg2) {
-; CHECK-LABEL: @test_speculate_free_phis(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]]
-; CHECK:       a:
-; CHECK-NEXT:    [[SUM_0:%.*]] = add i32 [[ARG1:%.*]], 7
-; CHECK-NEXT:    br label [[EXIT:%.*]]
-; CHECK:       b:
-; CHECK-NEXT:    [[SUM_1:%.*]] = add i32 [[ARG2:%.*]], 11
-; CHECK-NEXT:    br label [[EXIT]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[SUM_PHI:%.*]] = phi i32 [ [[SUM_0]], [[A]] ], [ [[SUM_1]], [[B]] ]
-; CHECK-NEXT:    [[P2:%.*]] = phi i32 [ [[ARG1]], [[A]] ], [ [[ARG2]], [[B]] ]
-; CHECK-NEXT:    ret i32 [[SUM_PHI]]
-;
-entry:
-  br i1 %flag, label %a, label %b
-
-a:
-  br label %exit
-
-b:
-  br label %exit
-
-; We don't DCE the now unused PHI node...
-exit:
-  %p1 = phi i32 [ 7, %a ], [ 11, %b ]
-  %p2 = phi i32 [ %arg1, %a ], [ %arg2, %b ]
-  %sum = add i32 %p2, %p1
-  ret i32 %sum
-}
-
-; We shouldn't speculate multiple uses even if each individually looks
-; profitable because of the total cost.
-define i32 @test_no_spec_multi_uses(i1 %flag, i32 %arg1, i32 %arg2, i32 %arg3) {
-; CHECK-LABEL: @test_no_spec_multi_uses(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]]
-; CHECK:       a:
-; CHECK-NEXT:    br label [[EXIT:%.*]]
-; CHECK:       b:
-; CHECK-NEXT:    br label [[EXIT]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 7, [[A]] ], [ 11, [[B]] ]
-; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[ARG1:%.*]], [[P]]
-; CHECK-NEXT:    [[ADD2:%.*]] = add i32 [[ARG2:%.*]], [[P]]
-; CHECK-NEXT:    [[ADD3:%.*]] = add i32 [[ARG3:%.*]], [[P]]
-; CHECK-NEXT:    [[SUM1:%.*]] = add i32 [[ADD1]], [[ADD2]]
-; CHECK-NEXT:    [[SUM2:%.*]] = add i32 [[SUM1]], [[ADD3]]
-; CHECK-NEXT:    ret i32 [[SUM2]]
-;
-entry:
-  br i1 %flag, label %a, label %b
-
-a:
-  br label %exit
-
-b:
-  br label %exit
-
-exit:
-  %p = phi i32 [ 7, %a ], [ 11, %b ]
-  %add1 = add i32 %arg1, %p
-  %add2 = add i32 %arg2, %p
-  %add3 = add i32 %arg3, %p
-  %sum1 = add i32 %add1, %add2
-  %sum2 = add i32 %sum1, %add3
-  ret i32 %sum2
-}
-
-define i32 @test_multi_phis1(i1 %flag, i32 %arg) {
-; CHECK-LABEL: @test_multi_phis1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]]
-; CHECK:       a:
-; CHECK-NEXT:    [[SUM1_0:%.*]] = add i32 [[ARG:%.*]], 1
-; CHECK-NEXT:    [[SUM2_0:%.*]] = add i32 [[SUM1_0]], 3
-; CHECK-NEXT:    [[SUM3_0:%.*]] = add i32 [[SUM2_0]], 5
-; CHECK-NEXT:    br label [[EXIT:%.*]]
-; CHECK:       b:
-; CHECK-NEXT:    [[SUM1_1:%.*]] = add i32 [[ARG]], 2
-; CHECK-NEXT:    [[SUM2_1:%.*]] = add i32 [[SUM1_1]], 4
-; CHECK-NEXT:    [[SUM3_1:%.*]] = add i32 [[SUM2_1]], 6
-; CHECK-NEXT:    br label [[EXIT]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[SUM3_PHI:%.*]] = phi i32 [ [[SUM3_0]], [[A]] ], [ [[SUM3_1]], [[B]] ]
-; CHECK-NEXT:    ret i32 [[SUM3_PHI]]
-;
-entry:
-  br i1 %flag, label %a, label %b
-
-a:
-  br label %exit
-
-b:
-  br label %exit
-
-exit:
-  %p1 = phi i32 [ 1, %a ], [ 2, %b ]
-  %p2 = phi i32 [ 3, %a ], [ 4, %b ]
-  %p3 = phi i32 [ 5, %a ], [ 6, %b ]
-  %sum1 = add i32 %arg, %p1
-  %sum2 = add i32 %sum1, %p2
-  %sum3 = add i32 %sum2, %p3
-  ret i32 %sum3
-}
-
-; Check that the order of the PHIs doesn't impact the behavior.
-define i32 @test_multi_phis2(i1 %flag, i32 %arg) {
-; CHECK-LABEL: @test_multi_phis2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]]
-; CHECK:       a:
-; CHECK-NEXT:    [[SUM1_0:%.*]] = add i32 [[ARG:%.*]], 1
-; CHECK-NEXT:    [[SUM2_0:%.*]] = add i32 [[SUM1_0]], 3
-; CHECK-NEXT:    [[SUM3_0:%.*]] = add i32 [[SUM2_0]], 5
-; CHECK-NEXT:    br label [[EXIT:%.*]]
-; CHECK:       b:
-; CHECK-NEXT:    [[SUM1_1:%.*]] = add i32 [[ARG]], 2
-; CHECK-NEXT:    [[SUM2_1:%.*]] = add i32 [[SUM1_1]], 4
-; CHECK-NEXT:    [[SUM3_1:%.*]] = add i32 [[SUM2_1]], 6
-; CHECK-NEXT:    br label [[EXIT]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[SUM3_PHI:%.*]] = phi i32 [ [[SUM3_0]], [[A]] ], [ [[SUM3_1]], [[B]] ]
-; CHECK-NEXT:    ret i32 [[SUM3_PHI]]
-;
-entry:
-  br i1 %flag, label %a, label %b
-
-a:
-  br label %exit
-
-b:
-  br label %exit
-
-exit:
-  %p3 = phi i32 [ 5, %a ], [ 6, %b ]
-  %p2 = phi i32 [ 3, %a ], [ 4, %b ]
-  %p1 = phi i32 [ 1, %a ], [ 2, %b ]
-  %sum1 = add i32 %arg, %p1
-  %sum2 = add i32 %sum1, %p2
-  %sum3 = add i32 %sum2, %p3
-  ret i32 %sum3
-}
-
-define i32 @test_no_spec_indirectbr(i1 %flag, i32 %arg) {
-; CHECK-LABEL: @test_no_spec_indirectbr(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]]
-; CHECK:       a:
-; CHECK-NEXT:    indirectbr i8* undef, [label %exit]
-; CHECK:       b:
-; CHECK-NEXT:    indirectbr i8* undef, [label %exit]
-; CHECK:       exit:
-; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 7, [[A]] ], [ 11, [[B]] ]
-; CHECK-NEXT:    [[SUM:%.*]] = add i32 [[ARG:%.*]], [[P]]
-; CHECK-NEXT:    ret i32 [[SUM]]
-;
-entry:
-  br i1 %flag, label %a, label %b
-
-a:
-  indirectbr i8* undef, [label %exit]
-
-b:
-  indirectbr i8* undef, [label %exit]
-
-exit:
-  %p = phi i32 [ 7, %a ], [ 11, %b ]
-  %sum = add i32 %arg, %p
-  ret i32 %sum
-}
-
-declare void @g()
-
-declare i32 @__gxx_personality_v0(...)
-
-; FIXME: We should be able to handle this case -- only the exceptional edge is
-; impossible to split.
-define i32 @test_no_spec_invoke_continue(i1 %flag, i32 %arg) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
-; CHECK-LABEL: @test_no_spec_invoke_continue(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]]
-; CHECK:       a:
-; CHECK-NEXT:    invoke void @g()
-; CHECK-NEXT:    to label [[EXIT:%.*]] unwind label [[LPAD:%.*]]
-; CHECK:       b:
-; CHECK-NEXT:    invoke void @g()
-; CHECK-NEXT:    to label [[EXIT]] unwind label [[LPAD]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 7, [[A]] ], [ 11, [[B]] ]
-; CHECK-NEXT:    [[SUM:%.*]] = add i32 [[ARG:%.*]], [[P]]
-; CHECK-NEXT:    ret i32 [[SUM]]
-; CHECK:       lpad:
-; CHECK-NEXT:    [[LP:%.*]] = landingpad { i8*, i32 }
-; CHECK-NEXT:    cleanup
-; CHECK-NEXT:    resume { i8*, i32 } undef
-;
-entry:
-  br i1 %flag, label %a, label %b
-
-a:
-  invoke void @g()
-  to label %exit unwind label %lpad
-
-b:
-  invoke void @g()
-  to label %exit unwind label %lpad
-
-exit:
-  %p = phi i32 [ 7, %a ], [ 11, %b ]
-  %sum = add i32 %arg, %p
-  ret i32 %sum
-
-lpad:
-  %lp = landingpad { i8*, i32 }
-  cleanup
-  resume { i8*, i32 } undef
-}
-
-define i32 @test_no_spec_landingpad(i32 %arg, i32* %ptr) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
-; CHECK-LABEL: @test_no_spec_landingpad(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    invoke void @g()
-; CHECK-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
-; CHECK:       invoke.cont:
-; CHECK-NEXT:    invoke void @g()
-; CHECK-NEXT:    to label [[EXIT:%.*]] unwind label [[LPAD]]
-; CHECK:       lpad:
-; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 7, [[ENTRY:%.*]] ], [ 11, [[INVOKE_CONT]] ]
-; CHECK-NEXT:    [[LP:%.*]] = landingpad { i8*, i32 }
-; CHECK-NEXT:    cleanup
-; CHECK-NEXT:    [[SUM:%.*]] = add i32 [[ARG:%.*]], [[P]]
-; CHECK-NEXT:    store i32 [[SUM]], i32* [[PTR:%.*]]
-; CHECK-NEXT:    resume { i8*, i32 } undef
-; CHECK:       exit:
-; CHECK-NEXT:    ret i32 0
-;
-entry:
-  invoke void @g()
-  to label %invoke.cont unwind label %lpad
-
-invoke.cont:
-  invoke void @g()
-  to label %exit unwind label %lpad
-
-lpad:
-  %p = phi i32 [ 7, %entry ], [ 11, %invoke.cont ]
-  %lp = landingpad { i8*, i32 }
-  cleanup
-  %sum = add i32 %arg, %p
-  store i32 %sum, i32* %ptr
-  resume { i8*, i32 } undef
-
-exit:
-  ret i32 0
-}
-
-declare i32 @__CxxFrameHandler3(...)
-
-define i32 @test_no_spec_cleanuppad(i32 %arg, i32* %ptr) personality i32 (...)* @__CxxFrameHandler3 {
-; CHECK-LABEL: @test_no_spec_cleanuppad(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    invoke void @g()
-; CHECK-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
-; CHECK:       invoke.cont:
-; CHECK-NEXT:    invoke void @g()
-; CHECK-NEXT:    to label [[EXIT:%.*]] unwind label [[LPAD]]
-; CHECK:       lpad:
-; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 7, [[ENTRY:%.*]] ], [ 11, [[INVOKE_CONT]] ]
-; CHECK-NEXT:    [[CP:%.*]] = cleanuppad within none []
-; CHECK-NEXT:    [[SUM:%.*]] = add i32 [[ARG:%.*]], [[P]]
-; CHECK-NEXT:    store i32 [[SUM]], i32* [[PTR:%.*]]
-; CHECK-NEXT:    cleanupret from [[CP]] unwind to caller
-; CHECK:       exit:
-; CHECK-NEXT:    ret i32 0
-;
-entry:
-  invoke void @g()
-  to label %invoke.cont unwind label %lpad
-
-invoke.cont:
-  invoke void @g()
-  to label %exit unwind label %lpad
-
-lpad:
-  %p = phi i32 [ 7, %entry ], [ 11, %invoke.cont ]
-  %cp = cleanuppad within none []
-  %sum = add i32 %arg, %p
-  store i32 %sum, i32* %ptr
-  cleanupret from %cp unwind to caller
-
-exit:
-  ret i32 0
-}
-
-; Check that we don't fall over when confronted with seemingly reasonable code
-; for us to handle but in an unreachable region and with non-PHI use-def
-; cycles.
-define i32 @test_unreachable_non_phi_cycles(i1 %flag, i32 %arg) {
-; CHECK-LABEL: @test_unreachable_non_phi_cycles(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    ret i32 42
-; CHECK:       a:
-; CHECK-NEXT:    br label [[EXIT:%.*]]
-; CHECK:       b:
-; CHECK-NEXT:    br label [[EXIT]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 7, [[A:%.*]] ], [ 11, [[B:%.*]] ]
-; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[SUM:%.*]] to i64
-; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i64 [[ZEXT]] to i32
-; CHECK-NEXT:    [[SUM]] = add i32 [[TRUNC]], [[P]]
-; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[A]], label [[B]]
-;
-entry:
-  ret i32 42
-
-a:
-  br label %exit
-
-b:
-  br label %exit
-
-exit:
-  %p = phi i32 [ 7, %a ], [ 11, %b ]
-  %zext = zext i32 %sum to i64
-  %trunc = trunc i64 %zext to i32
-  %sum = add i32 %trunc, %p
-  br i1 %flag, label %a, label %b
-}
-
-; Check that we don't speculate in the face of an expensive immediate. There
-; are two reasons this should never speculate. First, even a local analysis
-; should fail because it makes some paths (%a) potentially more expensive due
-; to multiple uses of the immediate. Additionally, when we go to speculate the
-; instructions, their cost will also be too high.
-; FIXME: The goal is really to test the first property, but there doesn't
-; happen to be any way to use free-to-speculate instructions here so that it
-; would be the only interesting property.
-define i64 @test_expensive_imm(i32 %flag, i64 %arg) {
-; CHECK-LABEL: @test_expensive_imm(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i32 [[FLAG:%.*]], label [[A:%.*]] [
-; CHECK-NEXT:    i32 1, label [[B:%.*]]
-; CHECK-NEXT:    i32 2, label [[C:%.*]]
-; CHECK-NEXT:    i32 3, label [[D:%.*]]
-; CHECK-NEXT:    ]
-; CHECK:       a:
-; CHECK-NEXT:    br label [[EXIT:%.*]]
-; CHECK:       b:
-; CHECK-NEXT:    br label [[EXIT]]
-; CHECK:       c:
-; CHECK-NEXT:    br label [[EXIT]]
-; CHECK:       d:
-; CHECK-NEXT:    br label [[EXIT]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[P:%.*]] = phi i64 [ 4294967296, [[A]] ], [ 1, [[B]] ], [ 1, [[C]] ], [ 1, [[D]] ]
-; CHECK-NEXT:    [[SUM1:%.*]] = add i64 [[ARG:%.*]], [[P]]
-; CHECK-NEXT:    [[SUM2:%.*]] = add i64 [[SUM1]], [[P]]
-; CHECK-NEXT:    ret i64 [[SUM2]]
-;
-entry:
-  switch i32 %flag, label %a [
-  i32 1, label %b
-  i32 2, label %c
-  i32 3, label %d
-  ]
-
-a:
-  br label %exit
-
-b:
-  br label %exit
-
-c:
-  br label %exit
-
-d:
-  br label %exit
-
-exit:
-  %p = phi i64 [ 4294967296, %a ], [ 1, %b ], [ 1, %c ], [ 1, %d ]
-  %sum1 = add i64 %arg, %p
-  %sum2 = add i64 %sum1, %p
-  ret i64 %sum2
-}
-
-define i32 @test_no_spec_non_postdominating_uses(i1 %flag1, i1 %flag2, i32 %arg) {
-; CHECK-LABEL: @test_no_spec_non_postdominating_uses(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[FLAG1:%.*]], label [[A:%.*]], label [[B:%.*]]
-; CHECK:       a:
-; CHECK-NEXT:    [[SUM1_0:%.*]] = add i32 [[ARG:%.*]], 7
-; CHECK-NEXT:    br label [[MERGE:%.*]]
-; CHECK:       b:
-; CHECK-NEXT:    [[SUM1_1:%.*]] = add i32 [[ARG]], 11
-; CHECK-NEXT:    br label [[MERGE]]
-; CHECK:       merge:
-; CHECK-NEXT:    [[SUM1_PHI:%.*]] = phi i32 [ [[SUM1_0]], [[A]] ], [ [[SUM1_1]], [[B]] ]
-; CHECK-NEXT:    [[P2:%.*]] = phi i32 [ 13, [[A]] ], [ 42, [[B]] ]
-; CHECK-NEXT:    br i1 [[FLAG2:%.*]], label [[EXIT1:%.*]], label [[EXIT2:%.*]]
-; CHECK:       exit1:
-; CHECK-NEXT:    ret i32 [[SUM1_PHI]]
-; CHECK:       exit2:
-; CHECK-NEXT:    [[SUM2:%.*]] = add i32 [[ARG]], [[P2]]
-; CHECK-NEXT:    ret i32 [[SUM2]]
-;
-entry:
-  br i1 %flag1, label %a, label %b
-
-a:
-  br label %merge
-
-b:
-  br label %merge
-
-merge:
-  %p1 = phi i32 [ 7, %a ], [ 11, %b ]
-  %p2 = phi i32 [ 13, %a ], [ 42, %b ]
-  %sum1 = add i32 %arg, %p1
-  br i1 %flag2, label %exit1, label %exit2
-
-exit1:
-  ret i32 %sum1
-
-exit2:
-  %sum2 = add i32 %arg, %p2
-  ret i32 %sum2
-}
Index: llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll
===================================================================
--- llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll
+++ llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll
@@ -104,21 +104,18 @@
 ; ROTATED_LATER_NEWPM-NEXT:    br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
 ; ROTATED_LATER_NEWPM:       for.body.preheader:
 ; ROTATED_LATER_NEWPM-NEXT:    [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
-; ROTATED_LATER_NEWPM-NEXT:    [[INC_1:%.*]] = add nuw nsw i32 0, 1
 ; ROTATED_LATER_NEWPM-NEXT:    br label [[FOR_BODY:%.*]]
 ; ROTATED_LATER_NEWPM:       for.cond.cleanup:
 ; ROTATED_LATER_NEWPM-NEXT:    tail call void @f0()
 ; ROTATED_LATER_NEWPM-NEXT:    tail call void @f2()
 ; ROTATED_LATER_NEWPM-NEXT:    br label [[RETURN]]
 ; ROTATED_LATER_NEWPM:       for.body:
-; ROTATED_LATER_NEWPM-NEXT:    [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_BODY_PREHEADER]] ]
+; ROTATED_LATER_NEWPM-NEXT:    [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
 ; ROTATED_LATER_NEWPM-NEXT:    tail call void @f0()
 ; ROTATED_LATER_NEWPM-NEXT:    tail call void @f1()
-; ROTATED_LATER_NEWPM-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]]
-; ROTATED_LATER_NEWPM-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]]
-; ROTATED_LATER_NEWPM:       for.body.for.body_crit_edge:
-; ROTATED_LATER_NEWPM-NEXT:    [[INC_0]] = add nuw nsw i32 [[INC_PHI]], 1
-; ROTATED_LATER_NEWPM-NEXT:    br label [[FOR_BODY]]
+; ROTATED_LATER_NEWPM-NEXT:    [[INC]] = add nuw nsw i32 [[I_04]], 1
+; ROTATED_LATER_NEWPM-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]]
+; ROTATED_LATER_NEWPM-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
 ; ROTATED_LATER_NEWPM:       return:
 ; ROTATED_LATER_NEWPM-NEXT:    ret void
 ;
@@ -155,21 +152,18 @@
 ; ROTATE_NEWPM-NEXT:    br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
 ; ROTATE_NEWPM:       for.body.preheader:
 ; ROTATE_NEWPM-NEXT:    [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
-; ROTATE_NEWPM-NEXT:    [[INC_1:%.*]] = add nuw nsw i32 0, 1
 ; ROTATE_NEWPM-NEXT:    br label [[FOR_BODY:%.*]]
 ; ROTATE_NEWPM:       for.cond.cleanup:
 ; ROTATE_NEWPM-NEXT:    tail call void @f0()
 ; ROTATE_NEWPM-NEXT:    tail call void @f2()
 ; ROTATE_NEWPM-NEXT:    br label [[RETURN]]
 ; ROTATE_NEWPM:       for.body:
-; ROTATE_NEWPM-NEXT:    [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_BODY_PREHEADER]] ]
+; ROTATE_NEWPM-NEXT:    [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
 ; ROTATE_NEWPM-NEXT:    tail call void @f0()
 ; ROTATE_NEWPM-NEXT:    tail call void @f1()
-; ROTATE_NEWPM-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]]
-; ROTATE_NEWPM-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]]
-; ROTATE_NEWPM:       for.body.for.body_crit_edge:
-; ROTATE_NEWPM-NEXT:    [[INC_0]] = add nuw nsw i32 [[INC_PHI]], 1
-; ROTATE_NEWPM-NEXT:    br label [[FOR_BODY]]
+; ROTATE_NEWPM-NEXT:    [[INC]] = add nuw nsw i32 [[I_04]], 1
+; ROTATE_NEWPM-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]]
+; ROTATE_NEWPM-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
 ; ROTATE_NEWPM:       return:
 ; ROTATE_NEWPM-NEXT:    ret void
 ;
Index: llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
===================================================================
--- llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
+++ llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
@@ -156,36 +156,27 @@
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i32 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 0
-; CHECK-NEXT:    [[DOT017:%.*]] = getelementptr inbounds float, float* [[A]], i64 0
-; CHECK-NEXT:    [[DOT018:%.*]] = getelementptr inbounds float, float* [[B]], i64 0
-; CHECK-NEXT:    [[INDEX_NEXT_0:%.*]] = add nuw i64 0, 4
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX_NEXT_PHI:%.*]] = phi i64 [ [[INDEX_NEXT_0]], [[VECTOR_PH]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY_VECTOR_BODY_CRIT_EDGE:%.*]] ]
-; CHECK-NEXT:    [[DOTPHI:%.*]] = phi float* [ [[DOT018]], [[VECTOR_PH]] ], [ [[DOT120:%.*]], [[VECTOR_BODY_VECTOR_BODY_CRIT_EDGE]] ]
-; CHECK-NEXT:    [[DOTPHI21:%.*]] = phi float* [ [[DOT017]], [[VECTOR_PH]] ], [ [[DOT119:%.*]], [[VECTOR_BODY_VECTOR_BODY_CRIT_EDGE]] ]
-; CHECK-NEXT:    [[DOTPHI22:%.*]] = phi i32* [ [[DOT0]], [[VECTOR_PH]] ], [ [[DOT1:%.*]], [[VECTOR_BODY_VECTOR_BODY_CRIT_EDGE]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[DOTPHI22]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4, !alias.scope !8
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], <i32 20, i32 20, i32 20, i32 20>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[DOTPHI21]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4, !alias.scope !11
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul <4 x float> [[WIDE_LOAD14]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[DOTPHI]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD15:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4, !alias.scope !13, !noalias !15
-; CHECK-NEXT:    [[TMP7:%.*]] = fadd <4 x float> [[TMP5]], [[WIDE_LOAD15]]
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP5]], <4 x float> [[TMP7]]
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast float* [[DOTPHI]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[PREDPHI]], <4 x float>* [[TMP8]], align 4, !alias.scope !13, !noalias !15
-; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT_PHI]], 10000
-; CHECK-NEXT:    br i1 [[TMP9]], label [[EXIT:%.*]], label [[VECTOR_BODY_VECTOR_BODY_CRIT_EDGE]], !llvm.loop [[LOOP16:![0-9]+]]
-; CHECK:       vector.body.vector.body_crit_edge:
-; CHECK-NEXT:    [[DOT1]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDEX_NEXT_PHI]]
-; CHECK-NEXT:    [[DOT119]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX_NEXT_PHI]]
-; CHECK-NEXT:    [[DOT120]] = getelementptr inbounds float, float* [[B]], i64 [[INDEX_NEXT_PHI]]
-; CHECK-NEXT:    [[INDEX_NEXT_1]] = add nuw i64 [[INDEX_NEXT_PHI]], 4
-; CHECK-NEXT:    br label [[VECTOR_BODY]]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4, !alias.scope !8
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], <i32 20, i32 20, i32 20, i32 20>
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[TMP5]] to <4 x float>*
+; CHECK-NEXT:    [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4, !alias.scope !11
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul <4 x float> [[WIDE_LOAD14]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
+; CHECK-NEXT:    [[WIDE_LOAD15:%.*]] = load <4 x float>, <4 x float>* [[TMP9]], align 4, !alias.scope !13, !noalias !15
+; CHECK-NEXT:    [[TMP10:%.*]] = fadd <4 x float> [[TMP7]], [[WIDE_LOAD15]]
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> [[TMP7]], <4 x float> [[TMP10]]
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[PREDPHI]], <4 x float>* [[TMP11]], align 4, !alias.scope !13, !noalias !15
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
+; CHECK-NEXT:    br i1 [[TMP12]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
 ; CHECK:       loop.body:
 ; CHECK-NEXT:    [[IV1:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[C_GEP:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[IV1]]
Index: llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll
===================================================================
--- llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll
+++ llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll
@@ -97,12 +97,9 @@
 ; CHECK-GENERIC-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i16, i16* [[ARG_3:%.*]], i64 undef
 ; CHECK-GENERIC-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[ARG_1:%.*]], i64 undef
 ; CHECK-GENERIC-NEXT:    [[CMP52_NOT:%.*]] = icmp eq i32 [[ARG_0:%.*]], 0
-; CHECK-GENERIC-NEXT:    br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[ENTRY_FOR_BODY6_CRIT_EDGE:%.*]]
-; CHECK-GENERIC:       entry.for.body6_crit_edge:
-; CHECK-GENERIC-NEXT:    [[INC_1:%.*]] = add nuw i32 0, 1
-; CHECK-GENERIC-NEXT:    br label [[FOR_BODY6:%.*]]
+; CHECK-GENERIC-NEXT:    br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6:%.*]]
 ; CHECK-GENERIC:       for.body6:
-; CHECK-GENERIC-NEXT:    [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY6_FOR_BODY6_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[ENTRY_FOR_BODY6_CRIT_EDGE]] ]
+; CHECK-GENERIC-NEXT:    [[K_03:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY6]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-GENERIC-NEXT:    [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
 ; CHECK-GENERIC-NEXT:    [[CONV:%.*]] = sext i16 [[TMP0]] to i32
 ; CHECK-GENERIC-NEXT:    [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2
@@ -111,11 +108,9 @@
 ; CHECK-GENERIC-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4
 ; CHECK-GENERIC-NEXT:    [[ADD21:%.*]] = add nsw i32 [[MUL16]], [[TMP2]]
 ; CHECK-GENERIC-NEXT:    store i32 [[ADD21]], i32* [[ARRAYIDX20]], align 4
-; CHECK-GENERIC-NEXT:    [[CMP5:%.*]] = icmp ult i32 [[INC_PHI]], [[ARG_0]]
-; CHECK-GENERIC-NEXT:    br i1 [[CMP5]], label [[FOR_BODY6_FOR_BODY6_CRIT_EDGE]], label [[FOR_END]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK-GENERIC:       for.body6.for.body6_crit_edge:
-; CHECK-GENERIC-NEXT:    [[INC_0]] = add nuw i32 [[INC_PHI]], 1
-; CHECK-GENERIC-NEXT:    br label [[FOR_BODY6]]
+; CHECK-GENERIC-NEXT:    [[INC]] = add nuw i32 [[K_03]], 1
+; CHECK-GENERIC-NEXT:    [[CMP5:%.*]] = icmp ult i32 [[INC]], [[ARG_0]]
+; CHECK-GENERIC-NEXT:    br i1 [[CMP5]], label [[FOR_BODY6]], label [[FOR_END]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK-GENERIC:       for.end:
 ; CHECK-GENERIC-NEXT:    ret void
 ;
Index: llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
+++ llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
@@ -199,7 +199,6 @@
 ; CHECK-O-NEXT: Running pass: InstSimplifyPass
 ; CHECK-O-NEXT: Running pass: DivRemPairsPass
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
-; CHECK-O-NEXT: Running pass: SpeculateAroundPHIsPass
 ; CHECK-O-NEXT: Running pass: CGProfilePass
 ; CHECK-O-NEXT: Running pass: GlobalDCEPass
 ; CHECK-O-NEXT: Running pass: ConstantMergePass
Index: llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
+++ llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
@@ -187,7 +187,6 @@
 ; CHECK-O-NEXT: Running pass: InstSimplifyPass
 ; CHECK-O-NEXT: Running pass: DivRemPairsPass
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
-; CHECK-O-NEXT: Running pass: SpeculateAroundPHIsPass
 ; CHECK-O-NEXT: Running pass: CGProfilePass
 ; CHECK-O-NEXT: Running pass: GlobalDCEPass
 ; CHECK-O-NEXT: Running pass: ConstantMergePass
Index: llvm/test/Other/new-pm-thinlto-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-thinlto-defaults.ll
+++ llvm/test/Other/new-pm-thinlto-defaults.ll
@@ -218,7 +218,6 @@
 ; CHECK-POSTLINK-O-NEXT: Running pass: InstSimplifyPass
 ; CHECK-POSTLINK-O-NEXT: Running pass: DivRemPairsPass
 ; CHECK-POSTLINK-O-NEXT: Running pass: SimplifyCFGPass
-; CHECK-POSTLINK-O-NEXT: Running pass: SpeculateAroundPHIsPass
 ; CHECK-POSTLINK-O-NEXT: Running pass: CGProfilePass
 ; CHECK-POSTLINK-O-NEXT: Running pass: GlobalDCEPass
 ; CHECK-POSTLINK-O-NEXT: Running pass: ConstantMergePass
Index: llvm/test/Other/new-pm-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-defaults.ll
+++ llvm/test/Other/new-pm-defaults.ll
@@ -237,7 +237,6 @@
 ; CHECK-O-NEXT: Running pass: InstSimplifyPass
 ; CHECK-O-NEXT: Running pass: DivRemPairsPass
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
-; CHECK-O-NEXT: Running pass: SpeculateAroundPHIsPass
 ; CHECK-EP-OPTIMIZER-LAST: Running pass: NoOpFunctionPass
 ; CHECK-O-NEXT: Running pass: CGProfilePass
 ; CHECK-O-NEXT: Running pass: GlobalDCEPass
Index: llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
+++ /dev/null
@@ -1,832 +0,0 @@
-//===- SpeculateAroundPHIs.cpp --------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/Sequence.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "spec-phis"
-
-STATISTIC(NumPHIsSpeculated, "Number of PHI nodes we speculated around");
-STATISTIC(NumEdgesSplit,
-          "Number of critical edges which were split for speculation");
-STATISTIC(NumSpeculatedInstructions,
-          "Number of instructions we speculated around the PHI nodes");
-STATISTIC(NumNewRedundantInstructions,
-          "Number of new, redundant instructions inserted");
-
-/// Check whether speculating the users of a PHI node around the PHI
-/// will be safe.
-///
-/// This checks both that all of the users are safe and also that all of their
-/// operands are either recursively safe or already available along an incoming
-/// edge to the PHI.
-///
-/// This routine caches both all the safe nodes explored in `PotentialSpecSet`
-/// and the chain of nodes that definitively reach any unsafe node in
-/// `UnsafeSet`. By preserving these between repeated calls to this routine for
-/// PHIs in the same basic block, the exploration here can be reused. However,
-/// these caches must no be reused for PHIs in a different basic block as they
-/// reflect what is available along incoming edges.
-static bool
-isSafeToSpeculatePHIUsers(PHINode &PN, DominatorTree &DT,
-                          SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
-                          SmallPtrSetImpl<Instruction *> &UnsafeSet) {
-  auto *PhiBB = PN.getParent();
-  SmallPtrSet<Instruction *, 4> Visited;
-  SmallVector<std::pair<Instruction *, User::value_op_iterator>, 16> DFSStack;
-
-  // Walk each user of the PHI node.
-  for (Use &U : PN.uses()) {
-    auto *UI = cast<Instruction>(U.getUser());
-
-    // Ensure the use post-dominates the PHI node. This ensures that, in the
-    // absence of unwinding, the use will actually be reached.
-    // FIXME: We use a blunt hammer of requiring them to be in the same basic
-    // block. We should consider using actual post-dominance here in the
-    // future.
-    if (UI->getParent() != PhiBB) {
-      LLVM_DEBUG(dbgs() << "  Unsafe: use in a different BB: " << *UI << "\n");
-      return false;
-    }
-
-    if (const auto *CS = dyn_cast<CallBase>(UI)) {
-      if (CS->isConvergent() || CS->cannotDuplicate()) {
-        LLVM_DEBUG(dbgs() << "  Unsafe: convergent "
-                   "callsite cannot de duplicated: " << *UI << '\n');
-        return false;
-      }
-    }
-
-    // FIXME: This check is much too conservative. We're not going to move these
-    // instructions onto new dynamic paths through the program unless there is
-    // a call instruction between the use and the PHI node. And memory isn't
-    // changing unless there is a store in that same sequence. We should
-    // probably change this to do at least a limited scan of the intervening
-    // instructions and allow handling stores in easily proven safe cases.
-    if (mayBeMemoryDependent(*UI)) {
-      LLVM_DEBUG(dbgs() << "  Unsafe: can't speculate use: " << *UI << "\n");
-      return false;
-    }
-
-    // Now do a depth-first search of everything these users depend on to make
-    // sure they are transitively safe. This is a depth-first search, but we
-    // check nodes in preorder to minimize the amount of checking.
-    Visited.insert(UI);
-    DFSStack.push_back({UI, UI->value_op_begin()});
-    do {
-      User::value_op_iterator OpIt;
-      std::tie(UI, OpIt) = DFSStack.pop_back_val();
-
-      while (OpIt != UI->value_op_end()) {
-        auto *OpI = dyn_cast<Instruction>(*OpIt);
-        // Increment to the next operand for whenever we continue.
-        ++OpIt;
-        // No need to visit non-instructions, which can't form dependencies.
-        if (!OpI)
-          continue;
-
-        // Now do the main pre-order checks that this operand is a viable
-        // dependency of something we want to speculate.
-
-        // First do a few checks for instructions that won't require
-        // speculation at all because they are trivially available on the
-        // incoming edge (either through dominance or through an incoming value
-        // to a PHI).
-        //
-        // The cases in the current block will be trivially dominated by the
-        // edge.
-        auto *ParentBB = OpI->getParent();
-        if (ParentBB == PhiBB) {
-          if (isa<PHINode>(OpI)) {
-            // We can trivially map through phi nodes in the same block.
-            continue;
-          }
-        } else if (DT.dominates(ParentBB, PhiBB)) {
-          // Instructions from dominating blocks are already available.
-          continue;
-        }
-
-        // Once we know that we're considering speculating the operand, check
-        // if we've already explored this subgraph and found it to be safe.
-        if (PotentialSpecSet.count(OpI))
-          continue;
-
-        // If we've already explored this subgraph and found it unsafe, bail.
-        // If when we directly test whether this is safe it fails, bail.
-        if (UnsafeSet.count(OpI) || ParentBB != PhiBB ||
-            mayBeMemoryDependent(*OpI)) {
-          LLVM_DEBUG(dbgs() << "  Unsafe: can't speculate transitive use: "
-                            << *OpI << "\n");
-          // Record the stack of instructions which reach this node as unsafe
-          // so we prune subsequent searches.
-          UnsafeSet.insert(OpI);
-          for (auto &StackPair : DFSStack) {
-            Instruction *I = StackPair.first;
-            UnsafeSet.insert(I);
-          }
-          return false;
-        }
-
-        // Skip any operands we're already recursively checking.
-        if (!Visited.insert(OpI).second)
-          continue;
-
-        // Push onto the stack and descend. We can directly continue this
-        // loop when ascending.
-        DFSStack.push_back({UI, OpIt});
-        UI = OpI;
-        OpIt = OpI->value_op_begin();
-      }
-
-      // This node and all its operands are safe. Go ahead and cache that for
-      // reuse later.
-      PotentialSpecSet.insert(UI);
-
-      // Continue with the next node on the stack.
-    } while (!DFSStack.empty());
-  }
-
-#ifndef NDEBUG
-  // Every visited operand should have been marked as safe for speculation at
-  // this point. Verify this and return success.
-  for (auto *I : Visited)
-    assert(PotentialSpecSet.count(I) &&
-           "Failed to mark a visited instruction as safe!");
-#endif
-  return true;
-}
-
-/// Check whether, in isolation, a given PHI node is both safe and profitable
-/// to speculate users around.
-///
-/// This handles checking whether there are any constant operands to a PHI
-/// which could represent a useful speculation candidate, whether the users of
-/// the PHI are safe to speculate including all their transitive dependencies,
-/// and whether after speculation there will be some cost savings (profit) to
-/// folding the operands into the users of the PHI node. Returns true if both
-/// safe and profitable with relevant cost savings updated in the map and with
-/// an update to the `PotentialSpecSet`. Returns false if either safety or
-/// profitability are absent. Some new entries may be made to the
-/// `PotentialSpecSet` even when this routine returns false, but they remain
-/// conservatively correct.
-///
-/// The profitability check here is a local one, but it checks this in an
-/// interesting way. Beyond checking that the total cost of materializing the
-/// constants will be less than the cost of folding them into their users, it
-/// also checks that no one incoming constant will have a higher cost when
-/// folded into its users rather than materialized. This higher cost could
-/// result in a dynamic *path* that is more expensive even when the total cost
-/// is lower. Currently, all of the interesting cases where this optimization
-/// should fire are ones where it is a no-loss operation in this sense. If we
-/// ever want to be more aggressive here, we would need to balance the
-/// different incoming edges' cost by looking at their respective
-/// probabilities.
-static bool isSafeAndProfitableToSpeculateAroundPHI(
-    PHINode &PN, SmallDenseMap<PHINode *, InstructionCost, 16> &CostSavingsMap,
-    SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
-    SmallPtrSetImpl<Instruction *> &UnsafeSet, DominatorTree &DT,
-    TargetTransformInfo &TTI) {
-  // First see whether there is any cost savings to speculating around this
-  // PHI, and build up a map of the constant inputs to how many times they
-  // occur.
-  bool NonFreeMat = false;
-  struct CostsAndCount {
-    InstructionCost MatCost = TargetTransformInfo::TCC_Free;
-    InstructionCost FoldedCost = TargetTransformInfo::TCC_Free;
-    int Count = 0;
-  };
-  SmallDenseMap<ConstantInt *, CostsAndCount, 16> CostsAndCounts;
-  SmallPtrSet<BasicBlock *, 16> IncomingConstantBlocks;
-  for (int i : llvm::seq<int>(0, PN.getNumIncomingValues())) {
-    auto *IncomingC = dyn_cast<ConstantInt>(PN.getIncomingValue(i));
-    if (!IncomingC)
-      continue;
-
-    // Only visit each incoming edge with a constant input once.
-    if (!IncomingConstantBlocks.insert(PN.getIncomingBlock(i)).second)
-      continue;
-
-    auto InsertResult = CostsAndCounts.insert({IncomingC, {}});
-    // Count how many edges share a given incoming costant.
-    ++InsertResult.first->second.Count;
-    // Only compute the cost the first time we see a particular constant.
-    if (!InsertResult.second)
-      continue;
-
-    InstructionCost &MatCost = InsertResult.first->second.MatCost;
-    MatCost = TTI.getIntImmCost(IncomingC->getValue(), IncomingC->getType(),
-                                TargetTransformInfo::TCK_SizeAndLatency);
-    NonFreeMat |= MatCost != TTI.TCC_Free;
-  }
-  if (!NonFreeMat) {
-    LLVM_DEBUG(dbgs() << "    Free: " << PN << "\n");
-    // No profit in free materialization.
-    return false;
-  }
-
-  // Now check that the uses of this PHI can actually be speculated,
-  // otherwise we'll still have to materialize the PHI value.
-  if (!isSafeToSpeculatePHIUsers(PN, DT, PotentialSpecSet, UnsafeSet)) {
-    LLVM_DEBUG(dbgs() << "    Unsafe PHI: " << PN << "\n");
-    return false;
-  }
-
-  // Compute how much (if any) savings are available by speculating around this
-  // PHI.
-  for (Use &U : PN.uses()) {
-    auto *UserI = cast<Instruction>(U.getUser());
-    // Now check whether there is any savings to folding the incoming constants
-    // into this use.
-    unsigned Idx = U.getOperandNo();
-
-    // If we have a binary operator that is commutative, an actual constant
-    // operand would end up on the RHS, so pretend the use of the PHI is on the
-    // RHS.
-    //
-    // Technically, this is a bit weird if *both* operands are PHIs we're
-    // speculating. But if that is the case, giving an "optimistic" cost isn't
-    // a bad thing because after speculation it will constant fold. And
-    // moreover, such cases should likely have been constant folded already by
-    // some other pass, so we shouldn't worry about "modeling" them terribly
-    // accurately here. Similarly, if the other operand is a constant, it still
-    // seems fine to be "optimistic" in our cost modeling, because when the
-    // incoming operand from the PHI node is also a constant, we will end up
-    // constant folding.
-    if (UserI->isBinaryOp() && UserI->isCommutative() && Idx != 1)
-      // Assume we will commute the constant to the RHS to be canonical.
-      Idx = 1;
-
-    // Get the intrinsic ID if this user is an intrinsic.
-    Intrinsic::ID IID = Intrinsic::not_intrinsic;
-    if (auto *UserII = dyn_cast<IntrinsicInst>(UserI))
-      IID = UserII->getIntrinsicID();
-
-    for (auto &IncomingConstantAndCostsAndCount : CostsAndCounts) {
-      ConstantInt *IncomingC = IncomingConstantAndCostsAndCount.first;
-      InstructionCost MatCost = IncomingConstantAndCostsAndCount.second.MatCost;
-      InstructionCost &FoldedCost =
-          IncomingConstantAndCostsAndCount.second.FoldedCost;
-      if (IID)
-        FoldedCost +=
-          TTI.getIntImmCostIntrin(IID, Idx, IncomingC->getValue(),
-                                  IncomingC->getType(),
-                                  TargetTransformInfo::TCK_SizeAndLatency);
-      else
-        FoldedCost +=
-            TTI.getIntImmCostInst(UserI->getOpcode(), Idx,
-                                  IncomingC->getValue(), IncomingC->getType(),
-                                  TargetTransformInfo::TCK_SizeAndLatency);
-
-      // If we accumulate more folded cost for this incoming constant than
-      // materialized cost, then we'll regress any edge with this constant so
-      // just bail. We're only interested in cases where folding the incoming
-      // constants is at least break-even on all paths.
-      if (FoldedCost > MatCost) {
-        LLVM_DEBUG(dbgs() << "  Not profitable to fold imm: " << *IncomingC
-                          << "\n"
-                             "    Materializing cost:    "
-                          << MatCost
-                          << "\n"
-                             "    Accumulated folded cost: "
-                          << FoldedCost << "\n");
-        return false;
-      }
-    }
-  }
-
-  // Compute the total cost savings afforded by this PHI node.
-  InstructionCost TotalMatCost = TTI.TCC_Free, TotalFoldedCost = TTI.TCC_Free;
-  for (auto IncomingConstantAndCostsAndCount : CostsAndCounts) {
-    InstructionCost MatCost = IncomingConstantAndCostsAndCount.second.MatCost;
-    InstructionCost FoldedCost =
-        IncomingConstantAndCostsAndCount.second.FoldedCost;
-    int Count = IncomingConstantAndCostsAndCount.second.Count;
-
-    TotalMatCost += MatCost * Count;
-    TotalFoldedCost += FoldedCost * Count;
-  }
-  assert(TotalMatCost.isValid() && "Constants must be  materializable");
-  assert(TotalFoldedCost <= TotalMatCost && "If each constant's folded cost is "
-                                            "less that its materialized cost, "
-                                            "the sum must be as well.");
-  LLVM_DEBUG(dbgs() << "    Cost savings " << (TotalMatCost - TotalFoldedCost)
-                    << ": " << PN << "\n");
-  CostSavingsMap[&PN] = TotalMatCost - TotalFoldedCost;
-  return true;
-}
-
-/// Simple helper to walk all the users of a list of phis depth first, and call
-/// a visit function on each one in post-order.
-///
-/// All of the PHIs should be in the same basic block, and this is primarily
-/// used to make a single depth-first walk across their collective users
-/// without revisiting any subgraphs. Callers should provide a fast, idempotent
-/// callable to test whether a node has been visited and the more important
-/// callable to actually visit a particular node.
-///
-/// Depth-first and postorder here refer to the *operand* graph -- we start
-/// from a collection of users of PHI nodes and walk "up" the operands
-/// depth-first.
-template <typename IsVisitedT, typename VisitT>
-static void visitPHIUsersAndDepsInPostOrder(ArrayRef<PHINode *> PNs,
-                                            IsVisitedT IsVisited,
-                                            VisitT Visit) {
-  SmallVector<std::pair<Instruction *, User::value_op_iterator>, 16> DFSStack;
-  for (auto *PN : PNs)
-    for (Use &U : PN->uses()) {
-      auto *UI = cast<Instruction>(U.getUser());
-      if (IsVisited(UI))
-        // Already visited this user, continue across the roots.
-        continue;
-
-      // Otherwise, walk the operand graph depth-first and visit each
-      // dependency in postorder.
-      DFSStack.push_back({UI, UI->value_op_begin()});
-      do {
-        User::value_op_iterator OpIt;
-        std::tie(UI, OpIt) = DFSStack.pop_back_val();
-        while (OpIt != UI->value_op_end()) {
-          auto *OpI = dyn_cast<Instruction>(*OpIt);
-          // Increment to the next operand for whenever we continue.
-          ++OpIt;
-          // No need to visit non-instructions, which can't form dependencies,
-          // or instructions outside of our potential dependency set that we
-          // were given. Finally, if we've already visited the node, continue
-          // to the next.
-          if (!OpI || IsVisited(OpI))
-            continue;
-
-          // Push onto the stack and descend. We can directly continue this
-          // loop when ascending.
-          DFSStack.push_back({UI, OpIt});
-          UI = OpI;
-          OpIt = OpI->value_op_begin();
-        }
-
-        // Finished visiting children, visit this node.
-        assert(!IsVisited(UI) && "Should not have already visited a node!");
-        Visit(UI);
-      } while (!DFSStack.empty());
-    }
-}
-
-/// Find profitable PHIs to speculate.
-///
-/// For a PHI node to be profitable, we need the cost of speculating its users
-/// (and their dependencies) to not exceed the savings of folding the PHI's
-/// constant operands into the speculated users.
-///
-/// Computing this is surprisingly challenging. Because users of two different
-/// PHI nodes can depend on each other or on common other instructions, it may
-/// be profitable to speculate two PHI nodes together even though neither one
-/// in isolation is profitable. The straightforward way to find all the
-/// profitable PHIs would be to check each combination of PHIs' cost, but this
-/// is exponential in complexity.
-///
-/// Even if we assume that we only care about cases where we can consider each
-/// PHI node in isolation (rather than considering cases where none are
-/// profitable in isolation but some subset are profitable as a set), we still
-/// have a challenge. The obvious way to find all individually profitable PHIs
-/// is to iterate until reaching a fixed point, but this will be quadratic in
-/// complexity. =/
-///
-/// This code currently uses a linear-to-compute order for a greedy approach.
-/// It won't find cases where a set of PHIs must be considered together, but it
-/// handles most cases of order dependence without quadratic iteration. The
-/// specific order used is the post-order across the operand DAG. When the last
-/// user of a PHI is visited in this postorder walk, we check it for
-/// profitability.
-///
-/// There is an orthogonal extra complexity to all of this: computing the cost
-/// itself can easily become a linear computation making everything again (at
-/// best) quadratic. Using a postorder over the operand graph makes it
-/// particularly easy to avoid this through dynamic programming. As we do the
-/// postorder walk, we build the transitive cost of that subgraph. It is also
-/// straightforward to then update these costs when we mark a PHI for
-/// speculation so that subsequent PHIs don't re-pay the cost of already
-/// speculated instructions.
-static SmallVector<PHINode *, 16> findProfitablePHIs(
-    ArrayRef<PHINode *> PNs,
-    const SmallDenseMap<PHINode *, InstructionCost, 16> &CostSavingsMap,
-    const SmallPtrSetImpl<Instruction *> &PotentialSpecSet, int NumPreds,
-    DominatorTree &DT, TargetTransformInfo &TTI) {
-  SmallVector<PHINode *, 16> SpecPNs;
-
-  // First, establish a reverse mapping from immediate users of the PHI nodes
-  // to the nodes themselves, and count how many users each PHI node has in
-  // a way we can update while processing them.
-  SmallDenseMap<Instruction *, TinyPtrVector<PHINode *>, 16> UserToPNMap;
-  SmallDenseMap<PHINode *, int, 16> PNUserCountMap;
-  SmallPtrSet<Instruction *, 16> UserSet;
-  for (auto *PN : PNs) {
-    assert(UserSet.empty() && "Must start with an empty user set!");
-    for (Use &U : PN->uses())
-      UserSet.insert(cast<Instruction>(U.getUser()));
-    PNUserCountMap[PN] = UserSet.size();
-    for (auto *UI : UserSet)
-      UserToPNMap.insert({UI, {}}).first->second.push_back(PN);
-    UserSet.clear();
-  }
-
-  // Now do a DFS across the operand graph of the users, computing cost as we
-  // go and when all costs for a given PHI are known, checking that PHI for
-  // profitability.
-  SmallDenseMap<Instruction *, InstructionCost, 16> SpecCostMap;
-  visitPHIUsersAndDepsInPostOrder(
-      PNs,
-      /*IsVisited*/
-      [&](Instruction *I) {
-        // We consider anything that isn't potentially speculated to be
-        // "visited" as it is already handled. Similarly, anything that *is*
-        // potentially speculated but for which we have an entry in our cost
-        // map, we're done.
-        return !PotentialSpecSet.count(I) || SpecCostMap.count(I);
-      },
-      /*Visit*/
-      [&](Instruction *I) {
-        // We've fully visited the operands, so sum their cost with this node
-        // and update the cost map.
-        InstructionCost Cost = TTI.TCC_Free;
-        for (Value *OpV : I->operand_values())
-          if (auto *OpI = dyn_cast<Instruction>(OpV)) {
-            auto CostMapIt = SpecCostMap.find(OpI);
-            if (CostMapIt != SpecCostMap.end())
-              Cost += CostMapIt->second;
-          }
-        Cost += TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency);
-        bool Inserted = SpecCostMap.insert({I, Cost}).second;
-        (void)Inserted;
-        assert(Inserted && "Must not re-insert a cost during the DFS!");
-
-        // Now check if this node had a corresponding PHI node using it. If so,
-        // we need to decrement the outstanding user count for it.
-        auto UserPNsIt = UserToPNMap.find(I);
-        if (UserPNsIt == UserToPNMap.end())
-          return;
-        auto &UserPNs = UserPNsIt->second;
-        auto UserPNsSplitIt = std::stable_partition(
-            UserPNs.begin(), UserPNs.end(), [&](PHINode *UserPN) {
-              int &PNUserCount = PNUserCountMap.find(UserPN)->second;
-              assert(
-                  PNUserCount > 0 &&
-                  "Should never re-visit a PN after its user count hits zero!");
-              --PNUserCount;
-              return PNUserCount != 0;
-            });
-
-        // FIXME: Rather than one at a time, we should sum the savings as the
-        // cost will be completely shared.
-        SmallVector<Instruction *, 16> SpecWorklist;
-        for (auto *PN : llvm::make_range(UserPNsSplitIt, UserPNs.end())) {
-          InstructionCost SpecCost = TTI.TCC_Free;
-          for (Use &U : PN->uses())
-            SpecCost +=
-                SpecCostMap.find(cast<Instruction>(U.getUser()))->second;
-          SpecCost *= (NumPreds - 1);
-          // When the user count of a PHI node hits zero, we should check its
-          // profitability. If profitable, we should mark it for speculation
-          // and zero out the cost of everything it depends on.
-          InstructionCost CostSavings = CostSavingsMap.find(PN)->second;
-          if (SpecCost > CostSavings) {
-            LLVM_DEBUG(dbgs() << "  Not profitable, speculation cost: " << *PN
-                              << "\n"
-                                 "    Cost savings:     "
-                              << CostSavings
-                              << "\n"
-                                 "    Speculation cost: "
-                              << SpecCost << "\n");
-            continue;
-          }
-
-          // We're going to speculate this user-associated PHI. Copy it out and
-          // add its users to the worklist to update their cost.
-          SpecPNs.push_back(PN);
-          for (Use &U : PN->uses()) {
-            auto *UI = cast<Instruction>(U.getUser());
-            auto CostMapIt = SpecCostMap.find(UI);
-            if (CostMapIt->second == 0)
-              continue;
-            // Zero out this cost entry to avoid duplicates.
-            CostMapIt->second = 0;
-            SpecWorklist.push_back(UI);
-          }
-        }
-
-        // Now walk all the operands of the users in the worklist transitively
-        // to zero out all the memoized costs.
-        while (!SpecWorklist.empty()) {
-          Instruction *SpecI = SpecWorklist.pop_back_val();
-          assert(SpecCostMap.find(SpecI)->second == 0 &&
-                 "Didn't zero out a cost!");
-
-          // Walk the operands recursively to zero out their cost as well.
-          for (auto *OpV : SpecI->operand_values()) {
-            auto *OpI = dyn_cast<Instruction>(OpV);
-            if (!OpI)
-              continue;
-            auto CostMapIt = SpecCostMap.find(OpI);
-            if (CostMapIt == SpecCostMap.end() || CostMapIt->second == 0)
-              continue;
-            CostMapIt->second = 0;
-            SpecWorklist.push_back(OpI);
-          }
-        }
-      });
-
-  return SpecPNs;
-}
-
-/// Speculate users around a set of PHI nodes.
-///
-/// This routine does the actual speculation around a set of PHI nodes where we
-/// have determined this to be both safe and profitable.
-///
-/// This routine handles any spliting of critical edges necessary to create
-/// a safe block to speculate into as well as cloning the instructions and
-/// rewriting all uses.
-static void speculatePHIs(ArrayRef<PHINode *> SpecPNs,
-                          SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
-                          SmallSetVector<BasicBlock *, 16> &PredSet,
-                          DominatorTree &DT) {
-  LLVM_DEBUG(dbgs() << "  Speculating around " << SpecPNs.size() << " PHIs!\n");
-  NumPHIsSpeculated += SpecPNs.size();
-
-  // Split any critical edges so that we have a block to hoist into.
-  auto *ParentBB = SpecPNs[0]->getParent();
-  SmallVector<BasicBlock *, 16> SpecPreds;
-  SpecPreds.reserve(PredSet.size());
-  for (auto *PredBB : PredSet) {
-    auto *NewPredBB = SplitCriticalEdge(
-        PredBB, ParentBB,
-        CriticalEdgeSplittingOptions(&DT).setMergeIdenticalEdges());
-    if (NewPredBB) {
-      ++NumEdgesSplit;
-      LLVM_DEBUG(dbgs() << "  Split critical edge from: " << PredBB->getName()
-                        << "\n");
-      SpecPreds.push_back(NewPredBB);
-    } else {
-      assert(PredBB->getSingleSuccessor() == ParentBB &&
-             "We need a non-critical predecessor to speculate into.");
-      assert(!isa<InvokeInst>(PredBB->getTerminator()) &&
-             "Cannot have a non-critical invoke!");
-
-      // Already non-critical, use existing pred.
-      SpecPreds.push_back(PredBB);
-    }
-  }
-
-  SmallPtrSet<Instruction *, 16> SpecSet;
-  SmallVector<Instruction *, 16> SpecList;
-  visitPHIUsersAndDepsInPostOrder(SpecPNs,
-                                  /*IsVisited*/
-                                  [&](Instruction *I) {
-                                    // This is visited if we don't need to
-                                    // speculate it or we already have
-                                    // speculated it.
-                                    return !PotentialSpecSet.count(I) ||
-                                           SpecSet.count(I);
-                                  },
-                                  /*Visit*/
-                                  [&](Instruction *I) {
-                                    // All operands scheduled, schedule this
-                                    // node.
-                                    SpecSet.insert(I);
-                                    SpecList.push_back(I);
-                                  });
-
-  int NumSpecInsts = SpecList.size() * SpecPreds.size();
-  int NumRedundantInsts = NumSpecInsts - SpecList.size();
-  LLVM_DEBUG(dbgs() << "  Inserting " << NumSpecInsts
-                    << " speculated instructions, " << NumRedundantInsts
-                    << " redundancies\n");
-  NumSpeculatedInstructions += NumSpecInsts;
-  NumNewRedundantInstructions += NumRedundantInsts;
-
-  // Each predecessor is numbered by its index in `SpecPreds`, so for each
-  // instruction we speculate, the speculated instruction is stored in that
-  // index of the vector associated with the original instruction. We also
-  // store the incoming values for each predecessor from any PHIs used.
-  SmallDenseMap<Instruction *, SmallVector<Value *, 2>, 16> SpeculatedValueMap;
-
-  // Inject the synthetic mappings to rewrite PHIs to the appropriate incoming
-  // value. This handles both the PHIs we are speculating around and any other
-  // PHIs that happen to be used.
-  for (auto *OrigI : SpecList)
-    for (auto *OpV : OrigI->operand_values()) {
-      auto *OpPN = dyn_cast<PHINode>(OpV);
-      if (!OpPN || OpPN->getParent() != ParentBB)
-        continue;
-
-      auto InsertResult = SpeculatedValueMap.insert({OpPN, {}});
-      if (!InsertResult.second)
-        continue;
-
-      auto &SpeculatedVals = InsertResult.first->second;
-
-      // Populating our structure for mapping is particularly annoying because
-      // finding an incoming value for a particular predecessor block in a PHI
-      // node is a linear time operation! To avoid quadratic behavior, we build
-      // a map for this PHI node's incoming values and then translate it into
-      // the more compact representation used below.
-      SmallDenseMap<BasicBlock *, Value *, 16> IncomingValueMap;
-      for (int i : llvm::seq<int>(0, OpPN->getNumIncomingValues()))
-        IncomingValueMap[OpPN->getIncomingBlock(i)] = OpPN->getIncomingValue(i);
-
-      for (auto *PredBB : SpecPreds)
-        SpeculatedVals.push_back(IncomingValueMap.find(PredBB)->second);
-    }
-
-  // Speculate into each predecessor.
-  for (int PredIdx : llvm::seq<int>(0, SpecPreds.size())) {
-    auto *PredBB = SpecPreds[PredIdx];
-    assert(PredBB->getSingleSuccessor() == ParentBB &&
-           "We need a non-critical predecessor to speculate into.");
-
-    for (auto *OrigI : SpecList) {
-      auto *NewI = OrigI->clone();
-      NewI->setName(Twine(OrigI->getName()) + "." + Twine(PredIdx));
-      NewI->insertBefore(PredBB->getTerminator());
-
-      // Rewrite all the operands to the previously speculated instructions.
-      // Because we're walking in-order, the defs must precede the uses and we
-      // should already have these mappings.
-      for (Use &U : NewI->operands()) {
-        auto *OpI = dyn_cast<Instruction>(U.get());
-        if (!OpI)
-          continue;
-        auto MapIt = SpeculatedValueMap.find(OpI);
-        if (MapIt == SpeculatedValueMap.end())
-          continue;
-        const auto &SpeculatedVals = MapIt->second;
-        assert(SpeculatedVals[PredIdx] &&
-               "Must have a speculated value for this predecessor!");
-        assert(SpeculatedVals[PredIdx]->getType() == OpI->getType() &&
-               "Speculated value has the wrong type!");
-
-        // Rewrite the use to this predecessor's speculated instruction.
-        U.set(SpeculatedVals[PredIdx]);
-      }
-
-      // Commute instructions which now have a constant in the LHS but not the
-      // RHS.
-      if (NewI->isBinaryOp() && NewI->isCommutative() &&
-          isa<Constant>(NewI->getOperand(0)) &&
-          !isa<Constant>(NewI->getOperand(1)))
-        NewI->getOperandUse(0).swap(NewI->getOperandUse(1));
-
-      SpeculatedValueMap[OrigI].push_back(NewI);
-      assert(SpeculatedValueMap[OrigI][PredIdx] == NewI &&
-             "Mismatched speculated instruction index!");
-    }
-  }
-
-  // Walk the speculated instruction list and if they have uses, insert a PHI
-  // for them from the speculated versions, and replace the uses with the PHI.
-  // Then erase the instructions as they have been fully speculated. The walk
-  // needs to be in reverse so that we don't think there are users when we'll
-  // actually eventually remove them later.
-  IRBuilder<> IRB(SpecPNs[0]);
-  for (auto *OrigI : llvm::reverse(SpecList)) {
-    // Check if we need a PHI for any remaining users and if so, insert it.
-    if (!OrigI->use_empty()) {
-      auto *SpecIPN = IRB.CreatePHI(OrigI->getType(), SpecPreds.size(),
-                                    Twine(OrigI->getName()) + ".phi");
-      // Add the incoming values we speculated.
-      auto &SpeculatedVals = SpeculatedValueMap.find(OrigI)->second;
-      for (int PredIdx : llvm::seq<int>(0, SpecPreds.size()))
-        SpecIPN->addIncoming(SpeculatedVals[PredIdx], SpecPreds[PredIdx]);
-
-      // And replace the uses with the PHI node.
-      OrigI->replaceAllUsesWith(SpecIPN);
-    }
-
-    // It is important to immediately erase this so that it stops using other
-    // instructions. This avoids inserting needless PHIs of them.
-    OrigI->eraseFromParent();
-  }
-
-  // All of the uses of the speculated phi nodes should be removed at this
-  // point, so erase them.
-  for (auto *SpecPN : SpecPNs) {
-    assert(SpecPN->use_empty() && "All users should have been speculated!");
-    SpecPN->eraseFromParent();
-  }
-}
-
-/// Try to speculate around a series of PHIs from a single basic block.
-///
-/// This routine checks whether any of these PHIs are profitable to speculate
-/// users around. If safe and profitable, it does the speculation. It returns
-/// true when at least some speculation occurs.
-static bool tryToSpeculatePHIs(SmallVectorImpl<PHINode *> &PNs,
-                               DominatorTree &DT, TargetTransformInfo &TTI) {
-  LLVM_DEBUG(dbgs() << "Evaluating phi nodes for speculation:\n");
-
-  // Savings in cost from speculating around a PHI node.
-  SmallDenseMap<PHINode *, InstructionCost, 16> CostSavingsMap;
-
-  // Remember the set of instructions that are candidates for speculation so
-  // that we can quickly walk things within that space. This prunes out
-  // instructions already available along edges, etc.
-  SmallPtrSet<Instruction *, 16> PotentialSpecSet;
-
-  // Remember the set of instructions that are (transitively) unsafe to
-  // speculate into the incoming edges of this basic block. This avoids
-  // recomputing them for each PHI node we check. This set is specific to this
-  // block though as things are pruned out of it based on what is available
-  // along incoming edges.
-  SmallPtrSet<Instruction *, 16> UnsafeSet;
-
-  // For each PHI node in this block, check whether there are immediate folding
-  // opportunities from speculation, and whether that speculation will be
-  // valid. This determise the set of safe PHIs to speculate.
-  llvm::erase_if(PNs, [&](PHINode *PN) {
-    return !isSafeAndProfitableToSpeculateAroundPHI(
-        *PN, CostSavingsMap, PotentialSpecSet, UnsafeSet, DT, TTI);
-  });
-  // If no PHIs were profitable, skip.
-  if (PNs.empty()) {
-    LLVM_DEBUG(dbgs() << "  No safe and profitable PHIs found!\n");
-    return false;
-  }
-
-  // We need to know how much speculation will cost which is determined by how
-  // many incoming edges will need a copy of each speculated instruction.
-  SmallSetVector<BasicBlock *, 16> PredSet;
-  for (auto *PredBB : PNs[0]->blocks()) {
-    if (!PredSet.insert(PredBB))
-      continue;
-
-    // We cannot speculate when a predecessor is an indirect branch.
-    // FIXME: We also can't reliably create a non-critical edge block for
-    // speculation if the predecessor is an invoke. This doesn't seem
-    // fundamental and we should probably be splitting critical edges
-    // differently.
-    const auto *TermInst = PredBB->getTerminator();
-    if (isa<IndirectBrInst>(TermInst) ||
-        isa<InvokeInst>(TermInst) ||
-        isa<CallBrInst>(TermInst)) {
-      LLVM_DEBUG(dbgs() << "  Invalid: predecessor terminator: "
-                        << PredBB->getName() << "\n");
-      return false;
-    }
-  }
-  if (PredSet.size() < 2) {
-    LLVM_DEBUG(dbgs() << "  Unimportant: phi with only one predecessor\n");
-    return false;
-  }
-
-  SmallVector<PHINode *, 16> SpecPNs = findProfitablePHIs(
-      PNs, CostSavingsMap, PotentialSpecSet, PredSet.size(), DT, TTI);
-  if (SpecPNs.empty())
-    // Nothing to do.
-    return false;
-
-  speculatePHIs(SpecPNs, PotentialSpecSet, PredSet, DT);
-  return true;
-}
-
-PreservedAnalyses SpeculateAroundPHIsPass::run(Function &F,
-                                               FunctionAnalysisManager &AM) {
-  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
-  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
-
-  bool Changed = false;
-  for (auto *BB : ReversePostOrderTraversal<Function *>(&F)) {
-    SmallVector<PHINode *, 16> PNs;
-    auto BBI = BB->begin();
-    while (auto *PN = dyn_cast<PHINode>(&*BBI)) {
-      PNs.push_back(PN);
-      ++BBI;
-    }
-
-    if (PNs.empty())
-      continue;
-
-    Changed |= tryToSpeculatePHIs(PNs, DT, TTI);
-  }
-
-  if (!Changed)
-    return PreservedAnalyses::all();
-
-  PreservedAnalyses PA;
-  return PA;
-}
Index: llvm/lib/Transforms/Scalar/CMakeLists.txt
===================================================================
--- llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -73,7 +73,6 @@
   SimplifyCFGPass.cpp
   Sink.cpp
   SpeculativeExecution.cpp
-  SpeculateAroundPHIs.cpp
   StraightLineStrengthReduce.cpp
   StructurizeCFG.cpp
   TailRecursionElimination.cpp
Index: llvm/lib/Passes/PassRegistry.def
===================================================================
--- llvm/lib/Passes/PassRegistry.def
+++ llvm/lib/Passes/PassRegistry.def
@@ -306,7 +306,6 @@
 FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass())
 FUNCTION_PASS("slsr", StraightLineStrengthReducePass())
 FUNCTION_PASS("speculative-execution", SpeculativeExecutionPass())
-FUNCTION_PASS("spec-phis", SpeculateAroundPHIsPass())
 FUNCTION_PASS("sroa", SROA())
 FUNCTION_PASS("strip-gc-relocates", StripGCRelocates())
 FUNCTION_PASS("structurizecfg", StructurizeCFGPass())
Index: llvm/lib/Passes/PassBuilder.cpp
===================================================================
--- llvm/lib/Passes/PassBuilder.cpp
+++ llvm/lib/Passes/PassBuilder.cpp
@@ -203,7 +203,6 @@
 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
 #include "llvm/Transforms/Scalar/SimplifyCFG.h"
 #include "llvm/Transforms/Scalar/Sink.h"
-#include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h"
 #include "llvm/Transforms/Scalar/SpeculativeExecution.h"
 #include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
 #include "llvm/Transforms/Scalar/StructurizeCFG.h"
@@ -1448,11 +1447,6 @@
   // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
   OptimizePM.addPass(SimplifyCFGPass());
 
-  // Optimize PHIs by speculating around them when profitable. Note that this
-  // pass needs to be run after any PRE or similar pass as it is essentially
-  // inserting redundancies into the program. This even includes SimplifyCFG.
-  OptimizePM.addPass(SpeculateAroundPHIsPass());
-
   if (PTO.Coroutines)
     OptimizePM.addPass(CoroCleanupPass());
 
Index: llvm/include/llvm/Transforms/Scalar/SpeculateAroundPHIs.h
===================================================================
--- llvm/include/llvm/Transforms/Scalar/SpeculateAroundPHIs.h
+++ /dev/null
@@ -1,109 +0,0 @@
-//===- SpeculateAroundPHIs.h - Speculate around PHIs ------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_SCALAR_SPECULATEAROUNDPHIS_H
-#define LLVM_TRANSFORMS_SCALAR_SPECULATEAROUNDPHIS_H
-
-#include "llvm/ADT/SetVector.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/PassManager.h"
-#include "llvm/Support/Compiler.h"
-
-namespace llvm {
-
-/// This pass handles simple speculating of  instructions around PHIs when
-/// doing so is profitable for a particular target despite duplicated
-/// instructions.
-///
-/// The motivating example are PHIs of constants which will require
-/// materializing the constants along each edge. If the PHI is used by an
-/// instruction where the target can materialize the constant as part of the
-/// instruction, it is profitable to speculate those instructions around the
-/// PHI node. This can reduce dynamic instruction count as well as decrease
-/// register pressure.
-///
-/// Consider this IR for example:
-///   ```
-///   entry:
-///     br i1 %flag, label %a, label %b
-///
-///   a:
-///     br label %exit
-///
-///   b:
-///     br label %exit
-///
-///   exit:
-///     %p = phi i32 [ 7, %a ], [ 11, %b ]
-///     %sum = add i32 %arg, %p
-///     ret i32 %sum
-///   ```
-/// To materialize the inputs to this PHI node may require an explicit
-/// instruction. For example, on x86 this would turn into something like
-///   ```
-///     testq %eax, %eax
-///     movl $7, %rNN
-///     jne .L
-///     movl $11, %rNN
-///   .L:
-///     addl %edi, %rNN
-///     movl %rNN, %eax
-///     retq
-///   ```
-/// When these constants can be folded directly into another instruction, it
-/// would be preferable to avoid the potential for register pressure (above we
-/// can easily avoid it, but that isn't always true) and simply duplicate the
-/// instruction using the PHI:
-///   ```
-///   entry:
-///     br i1 %flag, label %a, label %b
-///
-///   a:
-///     %sum.1 = add i32 %arg, 7
-///     br label %exit
-///
-///   b:
-///     %sum.2 = add i32 %arg, 11
-///     br label %exit
-///
-///   exit:
-///     %p = phi i32 [ %sum.1, %a ], [ %sum.2, %b ]
-///     ret i32 %p
-///   ```
-/// Which will generate something like the following on x86:
-///   ```
-///     testq %eax, %eax
-///     addl $7, %edi
-///     jne .L
-///     addl $11, %edi
-///   .L:
-///     movl %edi, %eax
-///     retq
-///   ```
-///
-/// It is important to note that this pass is never intended to handle more
-/// complex cases where speculating around PHIs allows simplifications of the
-/// IR itself or other subsequent optimizations. Those can and should already
-/// be handled before this pass is ever run by a more powerful analysis that
-/// can reason about equivalences and common subexpressions. Classically, those
-/// cases would be handled by a GVN-powered PRE or similar transform. This
-/// pass, in contrast, is *only* interested in cases where despite no
-/// simplifications to the IR itself, speculation is *faster* to execute. The
-/// result of this is that the cost models which are appropriate to consider
-/// here are relatively simple ones around execution and codesize cost, without
-/// any need to consider simplifications or other transformations.
-struct SpeculateAroundPHIsPass : PassInfoMixin<SpeculateAroundPHIsPass> {
-  /// Run the pass over the function.
-  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
-};
-
-} // end namespace llvm
-
-#endif // LLVM_TRANSFORMS_SCALAR_SPECULATEAROUNDPHIS_H
Index: clang/test/CodeGen/thinlto-distributed-newpm.ll
===================================================================
--- clang/test/CodeGen/thinlto-distributed-newpm.ll
+++ clang/test/CodeGen/thinlto-distributed-newpm.ll
@@ -165,7 +165,6 @@
 ; CHECK-O: Running pass: InstSimplifyPass on main
 ; CHECK-O: Running pass: DivRemPairsPass on main
 ; CHECK-O: Running pass: SimplifyCFGPass on main
-; CHECK-O: Running pass: SpeculateAroundPHIsPass on main
 ; CHECK-O: Running pass: CGProfilePass
 ; CHECK-O: Running pass: GlobalDCEPass
 ; CHECK-O: Running pass: ConstantMergePass
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to