yroux updated this revision to Diff 241188.
yroux added a comment.

Here is a new version, which disables ARM Low Overhead Loops pass when the 
Machine Outliner is enabled.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D57054/new/

https://reviews.llvm.org/D57054

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  llvm/include/llvm/CodeGen/TargetPassConfig.h
  llvm/lib/CodeGen/MachineOutliner.cpp
  llvm/lib/CodeGen/TargetPassConfig.cpp
  llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
  llvm/lib/Target/ARM/ARMBaseInstrInfo.h
  llvm/lib/Target/ARM/ARMTargetMachine.cpp
  llvm/test/CodeGen/ARM/O3-pipeline.ll
  llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-arm.mir
  llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir
  llvm/test/CodeGen/ARM/machine-outliner-tail.ll
  llvm/test/CodeGen/ARM/machine-outliner-thunk.ll
  llvm/test/CodeGen/ARM/machine-outliner.ll
  llvm/test/CodeGen/ARM/machine-outliner.mir

Index: llvm/test/CodeGen/ARM/machine-outliner.mir
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/ARM/machine-outliner.mir
@@ -0,0 +1,72 @@
+# RUN: llc -mtriple=arm-- -run-pass=machine-outliner -verify-machineinstrs \
+# RUN: %s -o - | FileCheck %s
+# RUN: llc -mtriple=thumbv7-- -run-pass=machine-outliner -verify-machineinstrs \
+# RUN: %s -o - | FileCheck %s
+
+--- |
+  define void @outline_1() #0 { ret void }
+  define void @outline_2() #0 { ret void }
+  define void @outline_3() #0 { ret void }
+  define void @dont_outline() { ret void }
+
+  attributes #0 = { minsize optsize }
+...
+---
+
+name:           outline_1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: bb.0:
+    ; CHECK: OUTLINED
+    liveins: $r2
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    BX_RET 14, $noreg
+...
+---
+
+name:           outline_2
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: bb.0:
+    ; CHECK: OUTLINED
+    liveins: $r2
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    BX_RET 14, $noreg
+...
+---
+
+name:           outline_3
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: bb.0:
+    ; CHECK: OUTLINED
+    liveins: $r2
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    BX_RET 14, $noreg
+...
+---
+
+name:           dont_outline
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: bb.0:
+    ; CHECK-NOT: BL
+    liveins: $lr, $r2
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    $r2 = MOVi 1, 14, $noreg, $noreg
+    BX_RET 14, $noreg
Index: llvm/test/CodeGen/ARM/machine-outliner.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/ARM/machine-outliner.ll
@@ -0,0 +1,135 @@
+; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=arm-- \
+; RUN: < %s | FileCheck %s --check-prefixes=CHECK,ARM
+; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=thumbv7-- \
+; RUN: < %s | FileCheck %s --check-prefixes=CHECK,THUMB
+; RUN: llc -verify-machineinstrs -enable-machine-outliner \
+; RUN: -enable-linkonceodr-outlining -mtriple=arm-- < %s | FileCheck %s \
+; RUN: --check-prefix=ODR
+; RUN: llc -verify-machineinstrs -enable-machine-outliner \
+; RUN: -enable-linkonceodr-outlining -mtriple=thumbv7-- < %s | FileCheck %s \
+; RUN: --check-prefix=ODR
+; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=arm-- \
+; RUN: -stop-after=machine-outliner < %s | FileCheck %s \
+; RUN: --check-prefix=TARGET_FEATURES
+
+
+; Make sure that we inherit target features from functions and make sure we have
+; the right function attributes.
+; TARGET_FEATURES: define internal void @OUTLINED_FUNCTION_{{[0-9]+}}()
+; TARGET_FEATURES-SAME: #[[ATTR_NUM:[0-9]+]]
+; TARGET_FEATURES-DAG: attributes #[[ATTR_NUM]] = {
+; TARGET_FEATURES-SAME: minsize
+; TARGET_FEATURES-SAME: optsize
+; TARGET_FEATURES-SAME: "target-features"="+neon"
+
+define linkonce_odr void @fish() #0 {
+  ; CHECK-LABEL: fish:
+  ; CHECK-NOT: OUTLINED
+  ; ODR: [[OUTLINED:OUTLINED_FUNCTION_[0-9]+]]
+  %1 = alloca i32, align 4
+  %2 = alloca i32, align 4
+  %3 = alloca i32, align 4
+  %4 = alloca i32, align 4
+  %5 = alloca i32, align 4
+  %6 = alloca i32, align 4
+  store i32 1, i32* %1, align 4
+  store i32 2, i32* %2, align 4
+  store i32 3, i32* %3, align 4
+  store i32 4, i32* %4, align 4
+  store i32 5, i32* %5, align 4
+  store i32 6, i32* %6, align 4
+  ret void
+}
+
+define void @turtle() section "TURTLE,turtle" {
+  ; CHECK-LABEL: turtle:
+  ; ODR-LABEL: turtle:
+  ; CHECK-NOT: OUTLINED
+  %1 = alloca i32, align 4
+  %2 = alloca i32, align 4
+  %3 = alloca i32, align 4
+  %4 = alloca i32, align 4
+  %5 = alloca i32, align 4
+  %6 = alloca i32, align 4
+  store i32 1, i32* %1, align 4
+  store i32 2, i32* %2, align 4
+  store i32 3, i32* %3, align 4
+  store i32 4, i32* %4, align 4
+  store i32 5, i32* %5, align 4
+  store i32 6, i32* %6, align 4
+  ret void
+}
+
+define void @cat() #0 {
+  ; CHECK-LABEL: cat:
+  ; CHECK: [[OUTLINED:OUTLINED_FUNCTION_[0-9]+]]
+  ; ODR: [[OUTLINED]]
+  %1 = alloca i32, align 4
+  %2 = alloca i32, align 4
+  %3 = alloca i32, align 4
+  %4 = alloca i32, align 4
+  %5 = alloca i32, align 4
+  %6 = alloca i32, align 4
+  store i32 1, i32* %1, align 4
+  store i32 2, i32* %2, align 4
+  store i32 3, i32* %3, align 4
+  store i32 4, i32* %4, align 4
+  store i32 5, i32* %5, align 4
+  store i32 6, i32* %6, align 4
+  ret void
+}
+
+define void @dog() #0 {
+  ; CHECK-LABEL: dog:
+  ; CHECK: [[OUTLINED]]
+  ; ODR: [[OUTLINED]]
+  %1 = alloca i32, align 4
+  %2 = alloca i32, align 4
+  %3 = alloca i32, align 4
+  %4 = alloca i32, align 4
+  %5 = alloca i32, align 4
+  %6 = alloca i32, align 4
+  store i32 1, i32* %1, align 4
+  store i32 2, i32* %2, align 4
+  store i32 3, i32* %3, align 4
+  store i32 4, i32* %4, align 4
+  store i32 5, i32* %5, align 4
+  store i32 6, i32* %6, align 4
+  ret void
+}
+
+; ODR: [[OUTLINED]]:
+; ARM: .code 32
+; ARM-NEXT: [[OUTLINED]]:
+; ARM:      mov     r0, #2
+; ARM-NEXT: str     r0, [sp, #16]
+; ARM-NEXT: mov     r0, #1
+; ARM-NEXT: str     r0, [sp, #20]
+; ARM-NEXT: mov     r0, #3
+; ARM-NEXT: str     r0, [sp, #12]
+; ARM-NEXT: mov     r0, #4
+; ARM-NEXT: str     r0, [sp, #8]
+; ARM-NEXT: mov     r0, #5
+; ARM-NEXT: str     r0, [sp, #4]
+; ARM-NEXT: mov     r0, #6
+; ARM-NEXT: str     r0, [sp]
+; ARM-NEXT: mov     pc, lr
+
+; THUMB: .code 16
+; THUMB-NEXT: .thumb_func
+; THUMB-NEXT: [[OUTLINED]]:
+; THUMB:      movs    r0, #2
+; THUMB-NEXT: str     r0, [sp, #16]
+; THUMB-NEXT: movs    r0, #1
+; THUMB-NEXT: str     r0, [sp, #20]
+; THUMB-NEXT: movs    r0, #3
+; THUMB-NEXT: str     r0, [sp, #12]
+; THUMB-NEXT: movs    r0, #4
+; THUMB-NEXT: str     r0, [sp, #8]
+; THUMB-NEXT: movs    r0, #5
+; THUMB-NEXT: str     r0, [sp, #4]
+; THUMB-NEXT: movs    r0, #6
+; THUMB-NEXT: str     r0, [sp]
+; THUMB-NEXT: bx      lr
+
+attributes #0 = { nounwind "target-cpu"="cortex-a53" "target-features"="+neon" }
Index: llvm/test/CodeGen/ARM/machine-outliner-thunk.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/ARM/machine-outliner-thunk.ll
@@ -0,0 +1,111 @@
+; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=armv7-- \
+; RUN: -stop-after=machine-outliner < %s | FileCheck %s --check-prefix=ARM
+; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=thumbv7-- \
+; RUN: -stop-after=machine-outliner < %s | FileCheck %s --check-prefix=THUMB
+; RUN: llc -enable-machine-outliner -verify-machineinstrs \
+; RUN: -mtriple=thumbv7-apple-darwin -stop-after=machine-outliner < %s \
+; RUN: | FileCheck %s --check-prefix=MACHO
+
+declare i32 @thunk_called_fn(i32, i32, i32, i32)
+
+define i32 @a() {
+; ARM-LABEL: name:             a
+; ARM:       bb.0.entry:
+; ARM-NEXT:    liveins: $r11, $lr
+; ARM:         $sp = frame-setup STMDB_UPD $sp, 14, $noreg, killed $r11, killed $lr
+; ARM-NEXT:    frame-setup CFI_INSTRUCTION def_cfa_offset 8
+; ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $lr, -4
+; ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $r11, -8
+; ARM-NEXT:    BL @OUTLINED_FUNCTION_0{{.*}}
+; ARM-NEXT:    renamable $r0 = ADDri killed renamable $r0, 8, 14, $noreg, $noreg
+; ARM-NEXT:    $sp = LDMIA_RET $sp, 14, $noreg, def $r11, def $pc, implicit killed $r0
+
+; THUMB-LABEL: name:             a
+; THUMB:       bb.0.entry:
+; THUMB-NEXT:    liveins: $r7, $lr
+; THUMB:         frame-setup tPUSH 14, $noreg, killed $r7, killed $lr
+; THUMB-NEXT:    frame-setup CFI_INSTRUCTION def_cfa_offset 8
+; THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $lr, -4
+; THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $r7, -8
+; THUMB-NEXT:    tBL 14, $noreg, @OUTLINED_FUNCTION_0{{.*}}
+; THUMB-NEXT:    renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 8, 14, $noreg
+; THUMB-NEXT:    tPOP_RET 14, $noreg, def $r7, def $pc
+
+; MACHO-LABEL: name:             a
+; MACHO:       bb.0.entry:
+; MACHO-NEXT:    liveins: $lr
+; MACHO:         early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -4, 14, $noreg
+; MACHO-NEXT:    frame-setup CFI_INSTRUCTION def_cfa_offset 4
+; MACHO-NEXT:    frame-setup CFI_INSTRUCTION offset $lr, -4
+; MACHO-NEXT:    tBL 14, $noreg, @OUTLINED_FUNCTION_0{{.*}}
+; MACHO-NEXT:    renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 8, 14, $noreg
+; MACHO-NEXT:    $lr, $sp = t2LDR_POST $sp, 4, 14, $noreg
+; MACHO-NEXT:    tBX_RET 14, $noreg, implicit killed $r0
+entry:
+  %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4)
+  %cx = add i32 %call, 8
+  ret i32 %cx
+}
+
+define i32 @b() {
+; ARM-LABEL: name:             b
+; ARM:       bb.0.entry:
+; ARM-NEXT:    liveins: $r11, $lr
+; ARM:         $sp = frame-setup STMDB_UPD $sp, 14, $noreg, killed $r11, killed $lr
+; ARM-NEXT:    frame-setup CFI_INSTRUCTION def_cfa_offset 8
+; ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $lr, -4
+; ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $r11, -8
+; ARM-NEXT:    BL @OUTLINED_FUNCTION_0{{.*}}
+; ARM-NEXT:    renamable $r0 = ADDri killed renamable $r0, 88, 14, $noreg, $noreg
+; ARM-NEXT:    $sp = LDMIA_RET $sp, 14, $noreg, def $r11, def $pc, implicit killed $r0
+
+; THUMB-LABEL: name:             b
+; THUMB:       bb.0.entry:
+; THUMB-NEXT:    liveins: $r7, $lr
+; THUMB:         frame-setup tPUSH 14, $noreg, killed $r7, killed $lr
+; THUMB-NEXT:    frame-setup CFI_INSTRUCTION def_cfa_offset 8
+; THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $lr, -4
+; THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $r7, -8
+; THUMB-NEXT:    tBL 14, $noreg, @OUTLINED_FUNCTION_0{{.*}}
+; THUMB-NEXT:    renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 88, 14, $noreg
+; THUMB-NEXT:    tPOP_RET 14, $noreg, def $r7, def $pc
+
+; MACHO-LABEL: name:             b
+; MACHO:       bb.0.entry:
+; MACHO-NEXT:    liveins: $lr
+; MACHO:         early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -4, 14, $noreg
+; MACHO-NEXT:    frame-setup CFI_INSTRUCTION def_cfa_offset 4
+; MACHO-NEXT:    frame-setup CFI_INSTRUCTION offset $lr, -4
+; MACHO-NEXT:    tBL 14, $noreg, @OUTLINED_FUNCTION_0{{.*}}
+; MACHO-NEXT:    renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 88, 14, $noreg
+; MACHO-NEXT:    $lr, $sp = t2LDR_POST $sp, 4, 14, $noreg
+; MACHO-NEXT:    tBX_RET 14, $noreg, implicit killed $r0
+entry:
+  %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4)
+  %cx = add i32 %call, 88
+  ret i32 %cx
+}
+
+; ARM-LABEL: name:            OUTLINED_FUNCTION_0
+; ARM:        bb.0:
+; ARM-NEXT:   $r0 = MOVi 1, 14, $noreg, $noreg
+; ARM-NEXT:   $r1 = MOVi 2, 14, $noreg, $noreg
+; ARM-NEXT:   $r2 = MOVi 3, 14, $noreg, $noreg
+; ARM-NEXT:   $r3 = MOVi 4, 14, $noreg, $noreg
+; ARM-NEXT:   TAILJMPd @thunk_called_fn, implicit $sp
+
+; THUMB-LABEL: name:            OUTLINED_FUNCTION_0
+; THUMB:        bb.0:
+; THUMB-NEXT:   $r0, dead $cpsr = tMOVi8 1, 14, $noreg
+; THUMB-NEXT:   $r1, dead $cpsr = tMOVi8 2, 14, $noreg
+; THUMB-NEXT:   $r2, dead $cpsr = tMOVi8 3, 14, $noreg
+; THUMB-NEXT:   $r3, dead $cpsr = tMOVi8 4, 14, $noreg
+; THUMB-NEXT:   tTAILJMPdND @thunk_called_fn, 14, $noreg, implicit $sp
+
+; MACHO-LABEL: name:            OUTLINED_FUNCTION_0
+; MACHO:        bb.0:
+; MACHO-NEXT:   $r0, dead $cpsr = tMOVi8 1, 14, $noreg
+; MACHO-NEXT:   $r1, dead $cpsr = tMOVi8 2, 14, $noreg
+; MACHO-NEXT:   $r2, dead $cpsr = tMOVi8 3, 14, $noreg
+; MACHO-NEXT:   $r3, dead $cpsr = tMOVi8 4, 14, $noreg
+; MACHO-NEXT:   tTAILJMPd @thunk_called_fn, 14, $noreg, implicit $sp
Index: llvm/test/CodeGen/ARM/machine-outliner-tail.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/ARM/machine-outliner-tail.ll
@@ -0,0 +1,42 @@
+; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=arm-- \
+; RUN: --stop-after=machine-outliner < %s | FileCheck %s --check-prefix=ARM
+; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=thumbv7-- \
+; RUN: --stop-after=machine-outliner < %s | FileCheck %s --check-prefix=THUMB
+; RUN: llc -verify-machineinstrs -enable-machine-outliner \
+; RUN: -mtriple=thumbv7-apple-darwin --stop-after=machine-outliner < %s \
+; RUN: | FileCheck %s --check-prefix=MACHO
+
+; ARM-LABEL: name:            OUTLINED_FUNCTION_0
+; ARM: $r0 = MOVi 1, 14, $noreg, $noreg
+; ARM-NEXT: $r1 = MOVi 2, 14, $noreg, $noreg
+; ARM-NEXT: $r2 = MOVi 3, 14, $noreg, $noreg
+; ARM-NEXT: $r3 = MOVi 4, 14, $noreg, $noreg
+; ARM-NEXT: TAILJMPd @z
+
+; THUMB-LABEL: name:            OUTLINED_FUNCTION_0
+; THUMB: $r0, dead $cpsr = tMOVi8 1, 14, $noreg
+; THUMB-NEXT: $r1, dead $cpsr = tMOVi8 2, 14, $noreg
+; THUMB-NEXT: $r2, dead $cpsr = tMOVi8 3, 14, $noreg
+; THUMB-NEXT: $r3, dead $cpsr = tMOVi8 4, 14, $noreg
+; THUMB-NEXT: tTAILJMPdND @z, 14, $noreg
+
+; MACHO-LABEL: name:            OUTLINED_FUNCTION_0
+; MACHO: $r0, dead $cpsr = tMOVi8 1, 14, $noreg
+; MACHO-NEXT: $r1, dead $cpsr = tMOVi8 2, 14, $noreg
+; MACHO-NEXT: $r2, dead $cpsr = tMOVi8 3, 14, $noreg
+; MACHO-NEXT: $r3, dead $cpsr = tMOVi8 4, 14, $noreg
+; MACHO-NEXT: tTAILJMPd @z, 14, $noreg
+
+define void @a() {
+entry:
+  tail call void @z(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+declare void @z(i32, i32, i32, i32)
+
+define dso_local void @b(i32* nocapture readnone %p) {
+entry:
+  tail call void @z(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
Index: llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir
@@ -0,0 +1,334 @@
+# RUN: llc -mtriple=thumbv7-- -run-pass=prologepilog \
+# RUN: -run-pass=machine-outliner -verify-machineinstrs %s -o - | FileCheck %s
+
+--- |
+  define void @CheckAddrModeT2_i12() { ret void }
+  define void @CheckAddrModeT2_i8() { ret void }
+  define void @CheckAddrModeT2_i8s4() { ret void }
+  define void @CheckAddrModeT2_ldrex() { ret void }
+  define void @CheckAddrModeT2_i7() { ret void }
+  define void @CheckAddrModeT2_i7s2() { ret void }
+  define void @CheckAddrModeT2_i7s4() { ret void }
+  define void @CheckAddrModeT1_s() { ret void }
+  define void @foo() { ret void }
+
+...
+---
+
+name:           CheckAddrModeT2_i12
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    liveins: $r0
+    ;CHECK-LABEL: name:           CheckAddrModeT2_i12
+    ;CHECK: $r1 = tMOVr killed $r0, 14, $noreg
+    ;CHECK-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_[[I12:[0-9]+]]
+    ;CHECK-NEXT: $r5 = t2LDRi12 $sp, 4088, 14, $noreg
+    $r1 = tMOVr killed $r0, 14, $noreg
+    tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    $r1 = t2LDRi12 $sp, 0, 14, $noreg
+    $r2 = t2LDRi12 $sp, 8, 14, $noreg
+    $r3 = t2LDRi12 $sp, 10, 14, $noreg
+    $r4 = t2LDRi12 $sp, 4086, 14, $noreg
+    $r5 = t2LDRi12 $sp, 4088, 14, $noreg
+    tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    $r1 = t2LDRi12 $sp, 0, 14, $noreg
+    $r2 = t2LDRi12 $sp, 8, 14, $noreg
+    $r3 = t2LDRi12 $sp, 10, 14, $noreg
+    $r4 = t2LDRi12 $sp, 4086, 14, $noreg
+    $r5 = t2LDRi12 $sp, 4088, 14, $noreg
+    BX_RET 14, $noreg
+...
+---
+
+name:           CheckAddrModeT2_i8
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    liveins: $r1, $r2, $r3, $r4
+    ;CHECK-LABEL: name:           CheckAddrModeT2_i8
+    ;CHECK: $r0 = tMOVr $r1, 14, $noreg
+    ;CHECK-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_[[I8:[0-9]+]]
+    ;CHECK-NEXT: t2STRHi8 $r4, $sp, 248, 14, $noreg
+    $r0 = tMOVr $r1, 14, $noreg
+    tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    t2STRHi8 $r0, $sp, 0, 14, $noreg
+    t2STRHi8 $r1, $sp, 8, 14, $noreg
+    t2STRHi8 $r2, $sp, 10, 14, $noreg
+    t2STRHi8 $r3, $sp, 247, 14, $noreg
+    t2STRHi8 $r4, $sp, 248, 14, $noreg
+    tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    t2STRHi8 $r0, $sp, 0, 14, $noreg
+    t2STRHi8 $r1, $sp, 8, 14, $noreg
+    t2STRHi8 $r2, $sp, 10, 14, $noreg
+    t2STRHi8 $r3, $sp, 247, 14, $noreg
+    t2STRHi8 $r4, $sp, 248, 14, $noreg
+    BX_RET 14, $noreg
+...
+---
+
+name:           CheckAddrModeT2_i8s4
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    liveins: $r1, $r2, $r3, $r4, $r5
+    ;CHECK-LABEL: name:           CheckAddrModeT2_i8s4
+    ;CHECK: $r0 = tMOVr $r2, 14, $noreg
+    ;CHECK-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_[[I8S4:[0-9]+]]
+    ;CHECK-NEXT: t2STRDi8 $r4, $r5, $sp, 254, 14, $noreg
+    $r0 = tMOVr $r2, 14, $noreg
+    tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    t2STRDi8 $r0, $r1, $sp, 0, 14, $noreg
+    t2STRDi8 $r1, $r2, $sp, 8, 14, $noreg
+    t2STRDi8 $r2, $r3, $sp, 10, 14, $noreg
+    t2STRDi8 $r3, $r4, $sp, 253, 14, $noreg
+    t2STRDi8 $r4, $r5, $sp, 254, 14, $noreg
+    tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    t2STRDi8 $r0, $r1, $sp, 0, 14, $noreg
+    t2STRDi8 $r1, $r2, $sp, 8, 14, $noreg
+    t2STRDi8 $r2, $r3, $sp, 10, 14, $noreg
+    t2STRDi8 $r3, $r4, $sp, 253, 14, $noreg
+    t2STRDi8 $r4, $r5, $sp, 254, 14, $noreg
+    BX_RET 14, $noreg
+...
+---
+
+name:           CheckAddrModeT2_ldrex
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    liveins: $r1, $r2, $r3, $r4, $r5
+    ;CHECK-LABEL: name:           CheckAddrModeT2_ldrex
+    ;CHECK: $r0 = tMOVr $r2, 14, $noreg
+    ;CHECK-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_[[LDREX:[0-9]+]]
+    ;CHECK-NEXT: t2LDREX $sp, 254, 14, $noreg
+    $r0 = tMOVr $r2, 14, $noreg
+    tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    $r0 = t2LDREX $sp, 0, 14, $noreg
+    $r1 = t2LDREX $sp, 8, 14, $noreg
+    $r2 = t2LDREX $sp, 10, 14, $noreg
+    $r3 = t2LDREX $sp, 253, 14, $noreg
+    $r4 = t2LDREX $sp, 254, 14, $noreg
+    tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    $r0 = t2LDREX $sp, 0, 14, $noreg
+    $r1 = t2LDREX $sp, 8, 14, $noreg
+    $r2 = t2LDREX $sp, 10, 14, $noreg
+    $r3 = t2LDREX $sp, 253, 14, $noreg
+    $r4 = t2LDREX $sp, 254, 14, $noreg
+    BX_RET 14, $noreg
+...
+---
+
+name:           CheckAddrModeT2_i7
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    liveins: $r2, $q0, $q1, $q2, $q3, $q4
+    ;CHECK-LABEL: name:           CheckAddrModeT2_i7
+    ;CHECK: $r0 = tMOVr $r2, 14, $noreg
+    ;CHECK-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_[[I7:[0-9]+]]
+    ;CHECK-NEXT: MVE_VSTRBU8 $q4, $sp, 120, 0, $noreg
+    $r0 = tMOVr $r2, 14, $noreg
+    tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    MVE_VSTRBU8 $q0, $sp, 0, 0, $noreg
+    MVE_VSTRBU8 $q1, $sp, 8, 0, $noreg
+    MVE_VSTRBU8 $q2, $sp, 10, 0, $noreg
+    MVE_VSTRBU8 $q3, $sp, 119, 0, $noreg
+    MVE_VSTRBU8 $q4, $sp, 120, 0, $noreg
+    tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    MVE_VSTRBU8 $q0, $sp, 0, 0, $noreg
+    MVE_VSTRBU8 $q1, $sp, 8, 0, $noreg
+    MVE_VSTRBU8 $q2, $sp, 10, 0, $noreg
+    MVE_VSTRBU8 $q3, $sp, 119, 0, $noreg
+    MVE_VSTRBU8 $q4, $sp, 120, 0, $noreg
+    BX_RET 14, $noreg
+...
+---
+
+name:           CheckAddrModeT2_i7s2
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    liveins: $r2, $q0, $q1, $q2, $q3, $q4
+    ;CHECK-LABEL: name:           CheckAddrModeT2_i7s2
+    ;CHECK: $r0 = tMOVr $r2, 14, $noreg
+    ;CHECK-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_[[I7S2:[0-9]+]]
+    ;CHECK-NEXT: MVE_VSTRHU16 $q4, $sp, 124, 0, $noreg
+    $r0 = tMOVr $r2, 14, $noreg
+    tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    MVE_VSTRHU16 $q0, $sp, 0, 0, $noreg
+    MVE_VSTRHU16 $q1, $sp, 8, 0, $noreg
+    MVE_VSTRHU16 $q2, $sp, 10, 0, $noreg
+    MVE_VSTRHU16 $q3, $sp, 119, 0, $noreg
+    MVE_VSTRHU16 $q4, $sp, 124, 0, $noreg
+    tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    MVE_VSTRHU16 $q0, $sp, 0, 0, $noreg
+    MVE_VSTRHU16 $q1, $sp, 8, 0, $noreg
+    MVE_VSTRHU16 $q2, $sp, 10, 0, $noreg
+    MVE_VSTRHU16 $q3, $sp, 119, 0, $noreg
+    MVE_VSTRHU16 $q4, $sp, 124, 0, $noreg
+    BX_RET 14, $noreg
+...
+---
+
+name:           CheckAddrModeT2_i7s4
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    liveins: $r2, $q0, $q1, $q2, $q3, $q4
+    ;CHECK-LABEL: name:           CheckAddrModeT2_i7s4
+    ;CHECK: $r0 = tMOVr $r2, 14, $noreg
+    ;CHECK-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_[[I7S4:[0-9]+]]
+    ;CHECK-NEXT: MVE_VSTRWU32 $q4, $sp, 126, 0, $noreg
+    $r0 = tMOVr $r2, 14, $noreg
+    tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    MVE_VSTRWU32 $q0, $sp, 0, 0, $noreg
+    MVE_VSTRWU32 $q1, $sp, 8, 0, $noreg
+    MVE_VSTRWU32 $q2, $sp, 10, 0, $noreg
+    MVE_VSTRWU32 $q3, $sp, 125, 0, $noreg
+    MVE_VSTRWU32 $q4, $sp, 126, 0, $noreg
+    tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    MVE_VSTRWU32 $q0, $sp, 0, 0, $noreg
+    MVE_VSTRWU32 $q1, $sp, 8, 0, $noreg
+    MVE_VSTRWU32 $q2, $sp, 10, 0, $noreg
+    MVE_VSTRWU32 $q3, $sp, 125, 0, $noreg
+    MVE_VSTRWU32 $q4, $sp, 126, 0, $noreg
+    BX_RET 14, $noreg
+...
+---
+
+name:           CheckAddrModeT1_s
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6
+    ;CHECK-LABEL: name:           CheckAddrModeT1_s
+    ;CHECK: $r0 = tMOVr $r3, 14, $noreg
+    ;CHECK-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_[[T1_S:[0-9]+]]
+    ;CHECK-NEXT: tSTRspi $r6, $sp, 254, 14, $noreg
+    $r0 = tMOVr $r3, 14, $noreg
+    tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    tSTRspi $r0, $sp, 0, 14, $noreg
+    tSTRspi $r1, $sp, 4, 14, $noreg
+    tSTRspi $r2, $sp, 8, 14, $noreg
+    tSTRspi $r3, $sp, 12, 14, $noreg
+    tSTRspi $r4, $sp, 16, 14, $noreg
+    tSTRspi $r5, $sp, 253, 14, $noreg
+    tSTRspi $r6, $sp, 254, 14, $noreg
+    tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    tSTRspi $r0, $sp, 0, 14, $noreg
+    tSTRspi $r1, $sp, 4, 14, $noreg
+    tSTRspi $r2, $sp, 8, 14, $noreg
+    tSTRspi $r3, $sp, 12, 14, $noreg
+    tSTRspi $r4, $sp, 16, 14, $noreg
+    tSTRspi $r5, $sp, 253, 14, $noreg
+    tSTRspi $r6, $sp, 254, 14, $noreg
+    BX_RET 14, $noreg
+...
+---
+
+name:           foo
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $lr
+
+    BX_RET 14, $noreg
+
+    ;CHECK: name:           OUTLINED_FUNCTION_[[I7S4]]
+    ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14, $noreg
+    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8
+    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8
+    ;CHECK-NEXT: tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    ;CHECK-NEXT: MVE_VSTRWU32 $q0, $sp, 2, 0, $noreg
+    ;CHECK-NEXT: MVE_VSTRWU32 $q1, $sp, 10, 0, $noreg
+    ;CHECK-NEXT: MVE_VSTRWU32 $q2, $sp, 12, 0, $noreg
+    ;CHECK-NEXT: MVE_VSTRWU32 $q3, $sp, 127, 0, $noreg
+    ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14, $noreg
+
+    ;CHECK: name:           OUTLINED_FUNCTION_[[I8]]
+    ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14, $noreg
+    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8
+    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8
+    ;CHECK-NEXT: tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    ;CHECK-NEXT: t2STRHi8 $r0, $sp, 8, 14, $noreg
+    ;CHECK-NEXT: t2STRHi8 $r1, $sp, 16, 14, $noreg
+    ;CHECK-NEXT: t2STRHi8 $r2, $sp, 18, 14, $noreg
+    ;CHECK-NEXT: t2STRHi8 $r3, $sp, 255, 14, $noreg
+    ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14, $noreg
+
+    ;CHECK: name:           OUTLINED_FUNCTION_[[I7S2]]
+    ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14, $noreg
+    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8
+    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8
+    ;CHECK-NEXT: tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    ;CHECK-NEXT: MVE_VSTRHU16 $q0, $sp, 4, 0, $noreg
+    ;CHECK-NEXT: MVE_VSTRHU16 $q1, $sp, 12, 0, $noreg
+    ;CHECK-NEXT: MVE_VSTRHU16 $q2, $sp, 14, 0, $noreg
+    ;CHECK-NEXT: MVE_VSTRHU16 $q3, $sp, 123, 0, $noreg
+    ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14, $noreg
+
+    ;CHECK: name:           OUTLINED_FUNCTION_[[LDREX]]
+    ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14, $noreg
+    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8
+    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8
+    ;CHECK-NEXT: tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    ;CHECK-NEXT: $r0 = t2LDREX $sp, 2, 14, $noreg
+    ;CHECK-NEXT: $r1 = t2LDREX $sp, 10, 14, $noreg
+    ;CHECK-NEXT: $r2 = t2LDREX $sp, 12, 14, $noreg
+    ;CHECK-NEXT: $r3 = t2LDREX $sp, 255, 14, $noreg
+    ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14, $noreg
+
+    ;CHECK: name:           OUTLINED_FUNCTION_[[I7]]
+    ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14, $noreg
+    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8
+    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8
+    ;CHECK-NEXT: tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    ;CHECK-NEXT: MVE_VSTRBU8 $q0, $sp, 8, 0, $noreg
+    ;CHECK-NEXT: MVE_VSTRBU8 $q1, $sp, 16, 0, $noreg
+    ;CHECK-NEXT: MVE_VSTRBU8 $q2, $sp, 18, 0, $noreg
+    ;CHECK-NEXT: MVE_VSTRBU8 $q3, $sp, 127, 0, $noreg
+    ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14, $noreg
+
+    ;CHECK: name:           OUTLINED_FUNCTION_[[I12]]
+    ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14, $noreg
+    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8
+    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8
+    ;CHECK-NEXT: tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    ;CHECK-NEXT: $r1 = t2LDRi12 $sp, 8, 14, $noreg
+    ;CHECK-NEXT: $r2 = t2LDRi12 $sp, 16, 14, $noreg
+    ;CHECK-NEXT: $r3 = t2LDRi12 $sp, 18, 14, $noreg
+    ;CHECK-NEXT: $r4 = t2LDRi12 $sp, 4094, 14, $noreg
+    ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14, $noreg
+
+    ;CHECK: name:           OUTLINED_FUNCTION_[[I8S4]]
+    ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14, $noreg
+    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8
+    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8
+    ;CHECK-NEXT: tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 2, 14, $noreg
+    ;CHECK-NEXT: t2STRDi8 $r1, $r2, $sp, 10, 14, $noreg
+    ;CHECK-NEXT: t2STRDi8 $r2, $r3, $sp, 12, 14, $noreg
+    ;CHECK-NEXT: t2STRDi8 $r3, $r4, $sp, 255, 14, $noreg
+    ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14, $noreg
+
+    ;CHECK: name:           OUTLINED_FUNCTION_[[T1_S]]
+    ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14, $noreg
+    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8
+    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8
+    ;CHECK-NEXT: tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    ;CHECK-NEXT: tSTRspi $r0, $sp, 2, 14, $noreg
+    ;CHECK-NEXT: tSTRspi $r1, $sp, 6, 14, $noreg
+    ;CHECK-NEXT: tSTRspi $r2, $sp, 10, 14, $noreg
+    ;CHECK-NEXT: tSTRspi $r3, $sp, 14, 14, $noreg
+    ;CHECK-NEXT: tSTRspi $r4, $sp, 18, 14, $noreg
+    ;CHECK-NEXT: tSTRspi $r5, $sp, 255, 14, $noreg
+    ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14, $noreg
Index: llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-arm.mir
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-arm.mir
@@ -0,0 +1,169 @@
+# RUN: llc -mtriple=armv7-- -run-pass=prologepilog -run-pass=machine-outliner \
+# RUN: -verify-machineinstrs %s -o - | FileCheck %s
+
+--- |
+  define void @CheckAddrMode_i12() { ret void }
+  define void @CheckAddrMode3() { ret void }
+  define void @CheckAddrMode5() { ret void }
+  define void @CheckAddrMode5FP16() { ret void }
+  define void @foo() { ret void }
+
+...
+---
+
+name:           CheckAddrMode_i12
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $r0
+    ; CHECK-LABEL: name:           CheckAddrMode_i12
+    ; CHECK: $r1 = MOVr killed $r0, 14, $noreg, $noreg
+    ; CHECK-NEXT: BL @OUTLINED_FUNCTION_[[I12:[0-9]+]]
+    ; CHECK-NEXT: $r5 = LDRi12 $sp, 4088, 14, $noreg
+    $r1 = MOVr killed $r0, 14, $noreg, $noreg
+    BL @foo, implicit-def dead $lr, implicit $sp
+    $r1 = LDRi12 $sp, 0, 14, $noreg
+    $r2 = LDRi12 $sp, 8, 14, $noreg
+    $r3 = LDRi12 $sp, 10, 14, $noreg
+    $r4 = LDRi12 $sp, 4086, 14, $noreg
+    $r5 = LDRi12 $sp, 4088, 14, $noreg
+    BL @foo, implicit-def dead $lr, implicit $sp
+    $r1 = LDRi12 $sp, 0, 14, $noreg
+    $r2 = LDRi12 $sp, 8, 14, $noreg
+    $r3 = LDRi12 $sp, 10, 14, $noreg
+    $r4 = LDRi12 $sp, 4086, 14, $noreg
+    $r5 = LDRi12 $sp, 4088, 14, $noreg
+    BX_RET 14, $noreg
+...
+---
+
+name:           CheckAddrMode3
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $r1
+    ; CHECK-LABEL: name:           CheckAddrMode3
+    ; CHECK: $r0 = MOVr killed $r1, 14, $noreg, $noreg
+    ; CHECK-NEXT: BL @OUTLINED_FUNCTION_[[I3:[0-9]+]]
+    ; CHECK-NEXT: $r5 = LDRSH $sp, $noreg, 248, 14, $noreg
+    $r0 = MOVr killed $r1, 14, $noreg, $noreg
+    BL @foo, implicit-def dead $lr, implicit $sp
+    $r1 = LDRSH $sp, $noreg, 0, 14, $noreg
+    $r2 = LDRSH $sp, $noreg, 8, 14, $noreg
+    $r3 = LDRSH $sp, $noreg, 10, 14, $noreg
+    $r4 = LDRSH $sp, $noreg, 247, 14, $noreg
+    $r5 = LDRSH $sp, $noreg, 248, 14, $noreg
+    BL @foo, implicit-def dead $lr, implicit $sp
+    $r1 = LDRSH $sp, $noreg, 0, 14, $noreg
+    $r2 = LDRSH $sp, $noreg, 8, 14, $noreg
+    $r3 = LDRSH $sp, $noreg, 10, 14, $noreg
+    $r4 = LDRSH $sp, $noreg, 247, 14, $noreg
+    $r5 = LDRSH $sp, $noreg, 248, 14, $noreg
+    BX_RET 14, $noreg
+...
+---
+
+name:           CheckAddrMode5
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $r2
+    ; CHECK-LABEL: name:           CheckAddrMode5
+    ; CHECK: $r0 = MOVr killed $r2, 14, $noreg, $noreg
+    ; CHECK-NEXT: BL @OUTLINED_FUNCTION_[[I5:[0-9]+]]
+    ; CHECK-NEXT: $d4 = VLDRD $sp, 254, 14, $noreg
+    $r0 = MOVr killed $r2, 14, $noreg, $noreg
+    BL @foo, implicit-def dead $lr, implicit $sp
+    $d0 = VLDRD $sp, 0, 14, $noreg
+    $d1 = VLDRD $sp, 8, 14, $noreg
+    $d2 = VLDRD $sp, 10, 14, $noreg
+    $d3 = VLDRD $sp, 253, 14, $noreg
+    $d4 = VLDRD $sp, 254, 14, $noreg
+    BL @foo, implicit-def dead $lr, implicit $sp
+    $d0 = VLDRD $sp, 0, 14, $noreg
+    $d1 = VLDRD $sp, 8, 14, $noreg
+    $d2 = VLDRD $sp, 10, 14, $noreg
+    $d3 = VLDRD $sp, 253, 14, $noreg
+    $d4 = VLDRD $sp, 254, 14, $noreg
+    BX_RET 14, $noreg
+...
+---
+
+name:           CheckAddrMode5FP16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $r3
+    ; CHECK-LABEL: name:           CheckAddrMode5FP16
+    ; CHECK: $r0 = MOVr killed $r3, 14, $noreg, $noreg
+    ; CHECK-NEXT: BL @OUTLINED_FUNCTION_[[I5FP16:[0-9]+]]
+    ; CHECK-NEXT: $s5 = VLDRH $sp, 252, 14, $noreg
+    $r0 = MOVr killed $r3, 14, $noreg, $noreg
+    BL @foo, implicit-def dead $lr, implicit $sp
+    $s1 = VLDRH $sp, 0, 14, $noreg
+    $s2 = VLDRH $sp, 8, 14, $noreg
+    $s3 = VLDRH $sp, 10, 14, $noreg
+    $s4 = VLDRH $sp, 240, 14, $noreg
+    $s5 = VLDRH $sp, 252, 14, $noreg
+    BL @foo, implicit-def dead $lr, implicit $sp
+    $s1 = VLDRH $sp, 0, 14, $noreg
+    $s2 = VLDRH $sp, 8, 14, $noreg
+    $s3 = VLDRH $sp, 10, 14, $noreg
+    $s4 = VLDRH $sp, 240, 14, $noreg
+    $s5 = VLDRH $sp, 252, 14, $noreg
+    BX_RET 14, $noreg
+...
+---
+
+name:           foo
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $lr
+
+    BX_RET 14, $noreg
+
+    ;CHECK: name:           OUTLINED_FUNCTION_[[I5FP16]]
+    ;CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14, $noreg
+    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8
+    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8
+    ;CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp
+    ;CHECK-NEXT: $s1 = VLDRH $sp, 4, 14, $noreg
+    ;CHECK-NEXT: $s2 = VLDRH $sp, 12, 14, $noreg
+    ;CHECK-NEXT: $s3 = VLDRH $sp, 14, 14, $noreg
+    ;CHECK-NEXT: $s4 = VLDRH $sp, 244, 14, $noreg
+    ;CHECK-NEXT: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14, $noreg
+
+    ;CHECK: name:           OUTLINED_FUNCTION_[[I3]]
+    ;CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14, $noreg
+    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8
+    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8
+    ;CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp
+    ;CHECK-NEXT: $r1 = LDRSH $sp, $noreg, 8, 14, $noreg
+    ;CHECK-NEXT: $r2 = LDRSH $sp, $noreg, 16, 14, $noreg
+    ;CHECK-NEXT: $r3 = LDRSH $sp, $noreg, 18, 14, $noreg
+    ;CHECK-NEXT: $r4 = LDRSH $sp, $noreg, 255, 14, $noreg
+    ;CHECK-NEXT: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14, $noreg
+
+    ;CHECK: name:           OUTLINED_FUNCTION_[[I5]]
+    ;CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14, $noreg
+    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8
+    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8
+    ;CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp
+    ;CHECK-NEXT: $d0 = VLDRD $sp, 2, 14, $noreg
+    ;CHECK-NEXT: $d1 = VLDRD $sp, 10, 14, $noreg
+    ;CHECK-NEXT: $d2 = VLDRD $sp, 12, 14, $noreg
+    ;CHECK-NEXT: $d3 = VLDRD $sp, 255, 14, $noreg
+    ;CHECK-NEXT: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14, $noreg
+
+    ;CHECK: name:           OUTLINED_FUNCTION_[[I12]]
+    ;CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14, $noreg
+    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8
+    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8
+    ;CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp
+    ;CHECK-NEXT: $r1 = LDRi12 $sp, 8, 14, $noreg
+    ;CHECK-NEXT: $r2 = LDRi12 $sp, 16, 14, $noreg
+    ;CHECK-NEXT: $r3 = LDRi12 $sp, 18, 14, $noreg
+    ;CHECK-NEXT: $r4 = LDRi12 $sp, 4094, 14, $noreg
+    ;CHECK-NEXT: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14, $noreg
+
Index: llvm/test/CodeGen/ARM/O3-pipeline.ll
===================================================================
--- llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -162,15 +162,15 @@
 ; CHECK-NEXT:      Thumb2 instruction size reduce pass
 ; CHECK-NEXT:      Unpack machine instruction bundles
 ; CHECK-NEXT:      optimise barriers pass
+; CHECK-NEXT:      Contiguously Lay Out Funclets
+; CHECK-NEXT:      StackMap Liveness Analysis
+; CHECK-NEXT:      Live DEBUG_VALUE analysis
 ; CHECK-NEXT:      MachineDominator Tree Construction
 ; CHECK-NEXT:      ARM constant island placement and branch shortening pass
 ; CHECK-NEXT:      MachineDominator Tree Construction
 ; CHECK-NEXT:      Machine Natural Loop Construction
 ; CHECK-NEXT:      ReachingDefAnalysis
 ; CHECK-NEXT:      ARM Low Overhead Loops pass
-; CHECK-NEXT:      Contiguously Lay Out Funclets
-; CHECK-NEXT:      StackMap Liveness Analysis
-; CHECK-NEXT:      Live DEBUG_VALUE analysis
 ; CHECK-NEXT:      Lazy Machine Block Frequency Analysis
 ; CHECK-NEXT:      Machine Optimization Remark Emitter
 ; CHECK-NEXT:      ARM Assembly Printer
Index: llvm/lib/Target/ARM/ARMTargetMachine.cpp
===================================================================
--- llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -244,6 +244,10 @@
   }
 
   initAsmInfo();
+
+  // ARM supports the MachineOutliner.
+  setMachineOutliner(true);
+  setSupportsDefaultOutlining(false);
 }
 
 ARMBaseTargetMachine::~ARMBaseTargetMachine() = default;
@@ -359,6 +363,7 @@
   void addPreRegAlloc() override;
   void addPreSched2() override;
   void addPreEmitPass() override;
+  void addPreEmitPass2() override;
 
   std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
 };
@@ -538,9 +543,12 @@
   // Don't optimize barriers at -O0.
   if (getOptLevel() != CodeGenOpt::None)
     addPass(createARMOptimizeBarriersPass());
+}
 
+void ARMPassConfig::addPreEmitPass2() {
   addPass(createARMConstantIslandPass());
-  addPass(createARMLowOverheadLoopsPass());
+  if (!MachineOutlinerEnabled)
+    addPass(createARMLowOverheadLoopsPass());
 
   // Identify valid longjmp targets for Windows Control Flow Guard.
   if (TM->getTargetTriple().isOSWindows())
Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.h
===================================================================
--- llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -343,7 +343,48 @@
   ArrayRef<std::pair<unsigned, const char *>>
   getSerializableBitmaskMachineOperandTargetFlags() const override;
 
+  /// ARM supports the MachineOutliner.
+  bool isFunctionSafeToOutlineFrom(MachineFunction &MF,
+                                   bool OutlineFromLinkOnceODRs) const override;
+  outliner::OutlinedFunction getOutliningCandidateInfo(
+      std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
+  outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT,
+                                       unsigned Flags) const override;
+  bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
+                              unsigned &Flags) const override;
+  void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF,
+                          const outliner::OutlinedFunction &OF) const override;
+  MachineBasicBlock::iterator
+  insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
+                     MachineBasicBlock::iterator &It, MachineFunction &MF,
+                     const outliner::Candidate &C) const override;
+
 private:
+  /// \brief Sets the offsets on outlined instructions in \p MBB which use SP
+  /// so that they will be valid post-outlining.
+  ///
+  /// \param MBB A \p MachineBasicBlock in an outlined function.
+  void fixupPostOutline(MachineBasicBlock &MBB) const;
+
+  /// Returns an unused general-purpose register which can be used for
+  /// constructing an outlined call if one exists. Returns 0 otherwise.
+  unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const;
+
+  /// Adds an instruction which saves the link register on top of the stack into
+  /// the MachineBasicBlock \p MBB at position \p It.
+  void saveLROnStack(MachineBasicBlock &MBB,
+                     MachineBasicBlock::iterator &It) const;
+
+  /// Adds an instruction which restores the link register from the top the
+  /// stack into the MachineBasicBlock \p MBB at position \p It.
+  void restoreLRFromStack(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator &It) const;
+
+  /// Returns true if the machine instruction offset can handle the stack fixup
+  /// and updates it if requested.
+  bool checkAndUpdateStackOffset(MachineInstr *MI, int64_t Fixup,
+                                 bool Updt) const;
+
   unsigned getInstBundleLength(const MachineInstr &MI) const;
 
   int getVLDMDefCycle(const InstrItineraryData *ItinData,
Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
===================================================================
--- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -32,6 +32,7 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
@@ -5460,3 +5461,892 @@
   return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) <
          ConstantMaterializationCost(Val2, Subtarget, !ForCodesize);
 }
+
+/// Constants defining how certain sequences should be outlined.
+/// This encompasses how an outlined function should be called, and what kind of
+/// frame should be emitted for that outlined function.
+///
+/// \p MachineOutlinerDefault implies that the function should be called with
+/// a save and restore of LR to the stack.
+///
+/// That is,
+///
+/// I1     Save LR                    OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION       I1
+/// I3     Restore LR                 I2
+///                                   I3
+///                                   BX LR
+///
+/// +-------------------------+--------+-----+
+/// |                         | Thumb2 | ARM |
+/// +-------------------------+--------+-----+
+/// | Call overhead in Bytes  |      8 |  12 |
+/// | Frame overhead in Bytes |      2 |   4 |
+/// | Stack fixup required    |    Yes | Yes |
+/// +-------------------------+--------+-----+
+///
+/// \p MachineOutlinerRegSave implies that the function should be called with a
+/// save and restore of LR to an available register. This allows us to avoid
+/// stack fixups. Note that this outlining variant is compatible with the
+/// NoLRSave case.
+///
+/// That is,
+///
+/// I1     Save LR                    OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION       I1
+/// I3     Restore LR                 I2
+///                                   I3
+///                                   BX LR
+///
+/// +-------------------------+--------+-----+
+/// |                         | Thumb2 | ARM |
+/// +-------------------------+--------+-----+
+/// | Call overhead in Bytes  |      8 |  12 |
+/// | Frame overhead in Bytes |      2 |   4 |
+/// | Stack fixup required    |     No |  No |
+/// +-------------------------+--------+-----+
+///
+/// \p MachineOutlinerTailCall implies that the function is being created from
+/// a sequence of instructions ending in a return.
+///
+/// That is,
+///
+/// I1                                OUTLINED_FUNCTION:
+/// I2    --> B OUTLINED_FUNCTION     I1
+/// BX LR                             I2
+///                                   BX LR
+///
+/// +-------------------------+--------+-----+
+/// |                         | Thumb2 | ARM |
+/// +-------------------------+--------+-----+
+/// | Call overhead in Bytes  |      4 |   4 |
+/// | Frame overhead in Bytes |      0 |   0 |
+/// | Stack fixup required    |     No |  No |
+/// +-------------------------+--------+-----+
+///
+/// \p MachineOutlinerNoLRSave implies that the function should be called using
+/// a BL instruction, but doesn't require LR to be saved and restored. This
+/// happens when LR is known to be dead.
+///
+/// That is,
+///
+/// I1                                OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION       I1
+/// I3                                I2
+///                                   I3
+///                                   BX LR
+///
+/// +-------------------------+--------+-----+
+/// |                         | Thumb2 | ARM |
+/// +-------------------------+--------+-----+
+/// | Call overhead in Bytes  |      4 |   4 |
+/// | Frame overhead in Bytes |      4 |   4 |
+/// | Stack fixup required    |     No |  No |
+/// +-------------------------+--------+-----+
+///
+/// \p MachineOutlinerThunk implies that the function is being created from
+/// a sequence of instructions ending in a call. The outlined function is
+/// called with a BL instruction, and the outlined function tail-calls the
+/// original call destination.
+///
+/// That is,
+///
+/// I1                                OUTLINED_FUNCTION:
+/// I2   --> BL OUTLINED_FUNCTION     I1
+/// BL f                              I2
+///                                   B f
+///
+/// +-------------------------+--------+-----+
+/// |                         | Thumb2 | ARM |
+/// +-------------------------+--------+-----+
+/// | Call overhead in Bytes  |      4 |   4 |
+/// | Frame overhead in Bytes |      0 |   0 |
+/// | Stack fixup required    |     No |  No |
+/// +-------------------------+--------+-----+
+
+enum MachineOutlinerClass {
+  MachineOutlinerDefault,  /// Emit a save, restore, call, and return.
+  MachineOutlinerRegSave,  /// Same as default, but save to a register.
+  MachineOutlinerTailCall, /// Only emit a branch.
+  MachineOutlinerThunk,    /// Emit a call and tail-call.
+  MachineOutlinerNoLRSave  /// Emit a call and return.
+};
+
+enum MachineOutlinerMBBFlags {
+  LRUnavailableSomewhere = 0x2,
+  HasCalls = 0x4,
+  UnsafeRegsDead = 0x8
+};
+
+struct OutlinerCosts {
+  const int CallDefault;
+  const int FrameDefault;
+  const int CallRegSave;
+  const int FrameRegSave;
+  const int CallNoLRSave;
+  const int FrameNoLRSave;
+  const int CallTailCall;
+  const int FrameTailCall;
+  const int CallThunk;
+  const int FrameThunk;
+
+  OutlinerCosts(const ARMSubtarget &target)
+      : CallDefault(target.isThumb() ? 8 : 12),
+        FrameDefault(target.isThumb() ? 2 : 4),
+        CallRegSave(target.isThumb() ? 8 : 12),
+        FrameRegSave(target.isThumb() ? 2 : 4),
+        CallNoLRSave(target.isThumb() ? 4 : 4),
+        FrameNoLRSave(target.isThumb() ? 4 : 4),
+        CallTailCall(target.isThumb() ? 4 : 4),
+        FrameTailCall(target.isThumb() ? 0 : 0),
+        CallThunk(target.isThumb() ? 4 : 4),
+        FrameThunk(target.isThumb() ? 0 : 0) {}
+};
+
+unsigned
+ARMBaseInstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
+  assert(C.LRUWasSet && "LRU wasn't set?");
+  MachineFunction *MF = C.getMF();
+  const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo *>(
+      MF->getSubtarget().getRegisterInfo());
+
+  BitVector regsReserved = ARI->getReservedRegs(*MF);
+  // Check if there is an available register across the sequence that we can
+  // use.
+  for (unsigned Reg :
+       (Subtarget.isThumb() ? ARM::tGPRRegClass : ARM::GPRRegClass)) {
+    if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) &&
+        Reg != ARM::LR &&  // LR is not reserved, but don't use it.
+        Reg != ARM::R12 && // R12 is not guaranteed to be preserved.
+        C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
+      return Reg;
+  }
+
+  // No suitable register. Return 0.
+  return 0u;
+}
+
+outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
+    std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
+  outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
+  unsigned SequenceSize =
+      std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0,
+                      [this](unsigned Sum, const MachineInstr &MI) {
+                        return Sum + getInstSizeInBytes(MI);
+                      });
+
+  // Properties about candidate MBBs that hold for all of them.
+  unsigned FlagsSetInAll = 0xF;
+
+  // Compute liveness information for each candidate, and set FlagsSetInAll.
+  const TargetRegisterInfo &TRI = getRegisterInfo();
+  std::for_each(
+      RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
+      [&FlagsSetInAll](outliner::Candidate &C) { FlagsSetInAll &= C.Flags; });
+
+  // According to the ARM Procedure Call Standard, the following are
+  // undefined on entry/exit from a function call:
+  //
+  // * Register R12(IP),
+  // * Condition codes (and thus the CPSR register)
+  //
+  // Because of this, we can't outline any sequence of instructions where one
+  // of these registers is live into/across it. Thus, we need to delete those
+  // candidates.
+  auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
+    // If the unsafe registers in this block are all dead, then we don't need
+    // to compute liveness here.
+    if (C.Flags & UnsafeRegsDead)
+      return false;
+    C.initLRU(TRI);
+    LiveRegUnits LRU = C.LRU;
+    return (!LRU.available(ARM::R12) || !LRU.available(ARM::CPSR));
+  };
+
+  // Are there any candidates where those registers are live?
+  if (!(FlagsSetInAll & UnsafeRegsDead)) {
+    // Erase every candidate that violates the restrictions above. (It could be
+    // true that we have viable candidates, so it's not worth bailing out in
+    // the case that, say, 1 out of 20 candidates violate the restructions.)
+    RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
+                                              RepeatedSequenceLocs.end(),
+                                              CantGuaranteeValueAcrossCall),
+                               RepeatedSequenceLocs.end());
+
+    // If the sequence doesn't have enough candidates left, then we're done.
+    if (RepeatedSequenceLocs.size() < 2)
+      return outliner::OutlinedFunction();
+  }
+
+  // At this point, we have only "safe" candidates to outline. Figure out
+  // frame + call instruction information.
+
+  unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
+
+  // Helper lambda which sets call information for every candidate.
+  auto SetCandidateCallInfo =
+      [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
+        for (outliner::Candidate &C : RepeatedSequenceLocs)
+          C.setCallInfo(CallID, NumBytesForCall);
+      };
+
+  OutlinerCosts *Costs = new OutlinerCosts(Subtarget);
+  unsigned FrameID = MachineOutlinerDefault;
+  unsigned NumBytesToCreateFrame = Costs->FrameDefault;
+
+  // If the last instruction in any candidate is a terminator, then we should
+  // tail call all of the candidates.
+  if (RepeatedSequenceLocs[0].back()->isTerminator()) {
+    FrameID = MachineOutlinerTailCall;
+    NumBytesToCreateFrame = Costs->FrameTailCall;
+    SetCandidateCallInfo(MachineOutlinerTailCall, Costs->CallTailCall);
+  } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX ||
+             LastInstrOpcode == ARM::tBL || LastInstrOpcode == ARM::tBLXr ||
+             LastInstrOpcode == ARM::tBLXi) {
+    // FIXME: Do we need to check if the code after this uses the value of LR?
+    FrameID = MachineOutlinerThunk;
+    NumBytesToCreateFrame = Costs->FrameThunk;
+    SetCandidateCallInfo(MachineOutlinerThunk, Costs->CallThunk);
+  } else {
+    // We need to decide how to emit calls + frames. We can always emit the same
+    // frame if we don't need to save to the stack. If we have to save to the
+    // stack, then we need a different frame.
+    unsigned NumBytesNoStackCalls = 0;
+    std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
+
+    for (outliner::Candidate &C : RepeatedSequenceLocs) {
+      C.initLRU(TRI);
+
+      // Is LR available? If so, we don't need a save.
+      if (C.LRU.available(ARM::LR)) {
+        NumBytesNoStackCalls += Costs->CallNoLRSave;
+        C.setCallInfo(MachineOutlinerNoLRSave, Costs->CallNoLRSave);
+        CandidatesWithoutStackFixups.push_back(C);
+      }
+
+      // Is an unused register available? If so, we won't modify the stack, so
+      // we can outline with the same frame type as those that don't save LR.
+      else if (findRegisterToSaveLRTo(C)) {
+        NumBytesNoStackCalls += Costs->CallRegSave;
+        C.setCallInfo(MachineOutlinerRegSave, Costs->CallRegSave);
+        CandidatesWithoutStackFixups.push_back(C);
+      }
+
+      // Is SP used in the sequence at all? If not, we don't have to modify
+      // the stack, so we are guaranteed to get the same frame.
+      else if (C.UsedInSequence.available(ARM::SP)) {
+        NumBytesNoStackCalls += Costs->CallDefault;
+        C.setCallInfo(MachineOutlinerDefault, Costs->CallDefault);
+        CandidatesWithoutStackFixups.push_back(C);
+      }
+
+      // If we outline this, we need to modify the stack. Pretend we don't
+      // outline this by saving all of its bytes.
+      else {
+        NumBytesNoStackCalls += SequenceSize;
+      }
+    }
+
+    // If there are no places where we have to save LR, then note that we don't
+    // have to update the stack. Otherwise, give every candidate the default
+    // call type.
+    if (NumBytesNoStackCalls <=
+        RepeatedSequenceLocs.size() * Costs->CallDefault) {
+      RepeatedSequenceLocs = CandidatesWithoutStackFixups;
+      FrameID = MachineOutlinerNoLRSave;
+    } else {
+      SetCandidateCallInfo(MachineOutlinerDefault, Costs->CallDefault);
+    }
+  }
+
+  // Does every candidate's MBB contain a call?  If so, then we might have a
+  // call in the range.
+  if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
+    // Check if the range contains a call. These require a save + restore of the
+    // link register.
+    if (std::any_of(FirstCand.front(), FirstCand.back(),
+                    [](const MachineInstr &MI) { return MI.isCall(); }))
+      NumBytesToCreateFrame += Costs->FrameDefault;
+
+    // Handle the last instruction separately. If this is a tail call, then the
+    // last instruction is a call. We don't want to save + restore in this case.
+    // However, it could be possible that the last instruction is a call without
+    // it being valid to tail call this sequence. We should consider this as
+    // well.
+    else if (FrameID != MachineOutlinerThunk &&
+             FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall())
+      NumBytesToCreateFrame += Costs->FrameThunk;
+  }
+
+  return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
+                                    NumBytesToCreateFrame, FrameID);
+}
+
+bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom(
+    MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
+  const Function &F = MF.getFunction();
+
+  // Can F be deduplicated by the linker? If it can, don't outline from it.
+  if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
+    return false;
+
+  // Don't outline from functions with section markings; the program could
+  // expect that all the code is in the named section.
+  // FIXME: Allow outlining from multiple functions with the same section
+  // marking.
+  if (F.hasSection())
+    return false;
+
+  // FIXME: Thumb1 outlining is not handled
+  if (MF.getInfo<ARMFunctionInfo>()->isThumb1OnlyFunction())
+    return false;
+
+  // It's safe to outline from MF.
+  return true;
+}
+
+bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
+                                              unsigned &Flags) const {
+  // Check if LR is available through all of the MBB. If it's not, then set
+  // a flag.
+  assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
+         "Suitable Machine Function for outlining must track liveness");
+
+  LiveRegUnits LRU(getRegisterInfo());
+
+  std::for_each(MBB.rbegin(), MBB.rend(),
+                [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
+
+  // Check if each of the unsafe registers are available...
+  bool R12AvailableInBlock = LRU.available(ARM::R12);
+  bool CPSRAvailableInBlock = LRU.available(ARM::CPSR);
+
+  // If all of these are dead (and not live out), we know we don't have to check
+  // them later.
+  if (R12AvailableInBlock && CPSRAvailableInBlock)
+    Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
+
+  // Now, add the live outs the set.
+  LRU.addLiveOuts(MBB);
+
+  // If any of these registers is available in the MBB, but also a live out of
+  // the block, then we know outlining is unsafe.
+  if (R12AvailableInBlock && !LRU.available(ARM::R12))
+    return false;
+  if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR))
+    return false;
+
+  // Check if there's a call inside this MachineBasicBlock.  If there is, then
+  // set a flag.
+  if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
+    Flags |= MachineOutlinerMBBFlags::HasCalls;
+
+  if (!LRU.available(ARM::LR))
+    Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
+
+  return true;
+}
+
+bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
+                                                 int64_t Fixup,
+                                                 bool Updt) const {
+  int SPIdx = MI->findRegisterUseOperandIdx(ARM::SP);
+
+  if (SPIdx < 0)
+    // No SP operand
+    return true;
+
+  unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask);
+
+  // Stack might be involved but addressing mode doesn't handle any offset.
+  // Rq: AddrModeT1_[1|2|4] don't operate on SP
+  if (AddrMode == ARMII::AddrMode1        // Arithmetic instructions
+      || AddrMode == ARMII::AddrMode4     // Load/Store Multiple
+      || AddrMode == ARMII::AddrMode6     // Neon Load/Store Multiple
+      || AddrMode == ARMII::AddrModeT2_so // SP can't be used as based register
+      || AddrMode == ARMII::AddrModeT2_pc // PCrel access
+      || AddrMode == ARMII::AddrMode2     // Used by PRE and POST indexed LD/ST
+      || AddrMode == ARMII::AddrModeNone)
+    return false;
+
+  // If SP is not the base register we can't do much
+  if (SPIdx != 1) {
+    if (AddrMode != ARMII::AddrModeT2_i8s4)
+      return false;
+    else if (SPIdx != 2)
+      return false;
+  }
+
+  unsigned NumOps = MI->getDesc().getNumOperands();
+  unsigned ImmIdx = NumOps - 3;
+
+  const MachineOperand &Offset = MI->getOperand(ImmIdx);
+  assert(Offset.isImm() && "Is not an immediate");
+  int64_t OffVal = Offset.getImm();
+
+  if (OffVal < 0)
+    // Don't override data if the are below SP.
+    return false;
+
+  unsigned NumBits = 0;
+  unsigned Scale = 1;
+
+  switch (AddrMode) {
+  case ARMII::AddrMode3:
+    if (ARM_AM::getAM3Op(OffVal) == ARM_AM::sub)
+      return false;
+    OffVal = ARM_AM::getAM3Offset(OffVal);
+    NumBits = 8;
+    break;
+  case ARMII::AddrMode5:
+    if (ARM_AM::getAM5Op(OffVal) == ARM_AM::sub)
+      return false;
+    OffVal = ARM_AM::getAM5Offset(OffVal);
+    NumBits = 8;
+    Scale = 4;
+    break;
+  case ARMII::AddrMode5FP16:
+    if (ARM_AM::getAM5FP16Op(OffVal) == ARM_AM::sub)
+      return false;
+    OffVal = ARM_AM::getAM5FP16Offset(OffVal);
+    NumBits = 8;
+    Scale = 2;
+    break;
+  case ARMII::AddrModeT2_i8:
+    NumBits = 8;
+    break;
+  case ARMII::AddrModeT2_i8s4:
+  case ARMII::AddrModeT2_ldrex:
+    NumBits = 8;
+    Scale = 4;
+    break;
+  case ARMII::AddrModeT2_i12:
+  case ARMII::AddrMode_i12:
+    NumBits = 12;
+    break;
+  case ARMII::AddrModeT2_i7:
+    NumBits = 7;
+    break;
+  case ARMII::AddrModeT2_i7s2:
+    NumBits = 7;
+    Scale = 2;
+    break;
+  case ARMII::AddrModeT2_i7s4:
+    NumBits = 7;
+    Scale = 4;
+    break;
+  case ARMII::AddrModeT1_s: // SP-relative LD/ST
+    NumBits = 8;
+    Scale = 4;
+    break;
+  default:
+    llvm_unreachable("Unsupported addressing mode!");
+  }
+  // Make sure the offset is encodable for instructions that scale the
+  // immediate.
+  if (((OffVal * Scale + Fixup) & (Scale - 1)) != 0)
+    return false;
+
+  OffVal += Fixup / Scale;
+
+  unsigned Mask = (1 << NumBits) - 1;
+
+  if (OffVal <= Mask) {
+    if (Updt)
+      MI->getOperand(ImmIdx).setImm(OffVal);
+    return true;
+  }
+
+  return false;
+}
+
+outliner::InstrType
+ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
+                                   unsigned Flags) const {
+  MachineInstr &MI = *MIT;
+  MachineBasicBlock *MBB = MI.getParent();
+  MachineFunction *MF = MBB->getParent();
+  const TargetRegisterInfo *TRI = &getRegisterInfo();
+
+  // Be conservative with inline ASM
+  if (MI.isInlineAsm())
+    return outliner::InstrType::Invisible;
+
+  // Don't allow debug values to impact outlining type.
+  if (MI.isDebugInstr() || MI.isIndirectDebugValue())
+    return outliner::InstrType::Invisible;
+
+  // At this point, KILL instructions don't really tell us much so we can go
+  // ahead and skip over them.
+  if (MI.isKill())
+    return outliner::InstrType::Invisible;
+
+  // PIC instructions contain labels, outlining them would break offset
+  // computing.
+  unsigned Opc = MI.getOpcode();
+  switch (Opc) {
+  case ARM::t2IT:
+  case ARM::tPICADD: // This is used in Thumb1 and Thumb2
+  case ARM::PICADD:
+  case ARM::PICSTR:
+  case ARM::PICSTRB:
+  case ARM::PICSTRH:
+  case ARM::PICLDR:
+  case ARM::PICLDRB:
+  case ARM::PICLDRH:
+  case ARM::PICLDRSB:
+  case ARM::PICLDRSH:
+  case ARM::t2LDRpci_pic:
+  case ARM::t2MOVi16_ga_pcrel:
+  case ARM::t2MOVTi16_ga_pcrel:
+  case ARM::t2MOV_ga_pcrel:
+    return outliner::InstrType::Illegal;
+  default:
+    break;
+  }
+
+  // Is this a terminator for a basic block?
+  if (MI.isTerminator()) {
+    // Don't outline if the branch is not unconditional.
+    if (Opc == ARM::BX_RET || Opc == ARM::tBX_RET || Opc == ARM::MOVPCLR) {
+      if (MI.getOperand(0).getImm() != ARMCC::AL)
+        return outliner::InstrType::Illegal;
+    }
+    if (Opc == ARM::LDMIA_RET) {
+      if (MI.getOperand(2).getImm() != ARMCC::AL)
+        return outliner::InstrType::Illegal;
+    }
+
+    // Is this the end of a function?
+    if (MI.getParent()->succ_empty())
+      return outliner::InstrType::Legal;
+
+    // It's not, so don't outline it.
+    return outliner::InstrType::Illegal;
+  }
+
+  // Make sure none of the operands are un-outlinable.
+  for (const MachineOperand &MOP : MI.operands()) {
+    if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
+        MOP.isTargetIndex())
+      return outliner::InstrType::Illegal;
+
+    // If it uses LR then don't touch it.
+    if (MOP.isReg() && !MOP.isImplicit() && (MOP.getReg() == ARM::LR))
+      return outliner::InstrType::Illegal;
+  }
+
+  // If MI is a call we might be able to outline it. We don't want to outline
+  // any calls that rely on the position of items on the stack. When we outline
+  // something containing a call, we have to emit a save and restore of LR in
+  // the outlined function. Currently, this always happens by saving LR to the
+  // stack. Thus, if we outline, say, half the parameters for a function call
+  // plus the call, then we'll break the callee's expectations for the layout
+  // of the stack.
+  //
+  // FIXME: Allow calls to functions which construct a stack frame, as long
+  // as they don't access arguments on the stack.
+  // FIXME: Figure out some way to analyze functions defined in other modules.
+  // We should be able to compute the memory usage based on the IR calling
+  // convention, even if we can't see the definition.
+  if (MI.isCall()) {
+    // Get the function associated with the call. Look at each operand and find
+    // the one that represents the callee and get its name.
+    const Function *Callee = nullptr;
+    for (const MachineOperand &MOP : MI.operands()) {
+      if (MOP.isGlobal()) {
+        Callee = dyn_cast<Function>(MOP.getGlobal());
+        break;
+      }
+    }
+
+    // Never outline calls to mcount.  There isn't any rule that would require
+    // this, but the Linux kernel's "ftrace" feature depends on it.
+    if (Callee && Callee->getName() == "\01_mcount")
+      return outliner::InstrType::Illegal;
+
+    // If we don't know anything about the callee, assume it depends on the
+    // stack layout of the caller. In that case, it's only legal to outline
+    // as a tail-call.  Whitelist the call instructions we know about so we
+    // don't get unexpected results with call pseudo-instructions.
+    auto UnknownCallOutlineType = outliner::InstrType::Illegal;
+    if (MI.getOpcode() == ARM::BL || MI.getOpcode() == ARM::tBL ||
+        MI.getOpcode() == ARM::BLX || MI.getOpcode() == ARM::tBLXr ||
+        MI.getOpcode() == ARM::tBLXi)
+      UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
+
+    if (!Callee)
+      return UnknownCallOutlineType;
+
+    // We have a function we have information about. Check it if it's something
+    // can safely outline.
+    MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
+
+    // We don't know what's going on with the callee at all. Don't touch it.
+    if (!CalleeMF)
+      return UnknownCallOutlineType;
+
+    // Check if we know anything about the callee saves on the function. If we
+    // don't, then don't touch it, since that implies that we haven't
+    // computed anything about its stack frame yet.
+    MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
+    if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
+        MFI.getNumObjects() > 0)
+      return UnknownCallOutlineType;
+
+    // At this point, we can say that CalleeMF ought to not pass anything on the
+    // stack. Therefore, we can outline it.
+    return outliner::InstrType::Legal;
+  }
+
+  // Don't outline positions.
+  if (MI.isPosition())
+    return outliner::InstrType::Illegal;
+
+  // Don't touch the link register
+  if (MI.readsRegister(ARM::LR, &getRegisterInfo()) ||
+      MI.modifiesRegister(ARM::LR, &getRegisterInfo()))
+    return outliner::InstrType::Illegal;
+
+  // Does this use the stack?
+  if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) {
+    // True if there is no chance that any outlined candidate from this range
+    // could require stack fixups. That is, both
+    // * LR is available in the range (No save/restore around call)
+    // * The range doesn't include calls (No save/restore in outlined frame)
+    // are true.
+    // FIXME: This is very restrictive; the flags check the whole block,
+    // not just the bit we will try to outline.
+    bool MightNeedStackFixUp =
+        (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |
+                  MachineOutlinerMBBFlags::HasCalls));
+
+    // If this instruction is in a range where it *never* needs to be fixed
+    // up, then we can *always* outline it. This is true even if it's not
+    // possible to fix that instruction up.
+    //
+    // Why? Consider two equivalent instructions I1, I2 where both I1 and I2
+    // use SP. Suppose that I1 sits within a range that definitely doesn't
+    // need stack fixups, while I2 sits in a range that does.
+    //
+    // First, I1 can be outlined as long as we *never* fix up the stack in
+    // any sequence containing it. I1 is already a safe instruction in the
+    // original program, so as long as we don't modify it we're good to go.
+    // So this leaves us with showing that outlining I2 won't break our
+    // program.
+    //
+    // Suppose I1 and I2 belong to equivalent candidate sequences. When we
+    // look at I2, we need to see if it can be fixed up. Suppose I2, (and
+    // thus I1) cannot be fixed up. Then I2 will be assigned an unique
+    // integer label; thus, I2 cannot belong to any candidate sequence (a
+    // contradiction). Suppose I2 can be fixed up. Then I1 can be fixed up
+    // as well, so we're good. Thus, I1 is always safe to outline.
+    if (!MightNeedStackFixUp)
+      return outliner::InstrType::Legal;
+
+    // Any modification of SP will break our code to save/restore LR.
+    // FIXME: We could handle some instructions which add a constant offset to
+    // SP, with a bit more work.
+    if (MI.modifiesRegister(ARM::SP, TRI))
+      return outliner::InstrType::Illegal;
+
+    // At this point, we have a stack instruction that we might need to fix up.
+    // up. We'll handle it if it's a load or store.
+    if (checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(),
+                                  false))
+      // It's in range, so we can outline it.
+      return outliner::InstrType::Legal;
+
+    // We can't fix it up, so don't outline it.
+    return outliner::InstrType::Illegal;
+  }
+
+  return outliner::InstrType::Legal;
+}
+
+void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
+  for (MachineInstr &MI : MBB) {
+    if (!MI.mayLoadOrStore())
+      continue;
+    checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(), true);
+  }
+}
+
+void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator &It) const {
+  unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM;
+  int Align = -Subtarget.getStackAlignment().value();
+  BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP)
+      .addReg(ARM::LR, RegState::Kill)
+      .addReg(ARM::SP)
+      .addImm(Align)
+      .add(predOps(ARMCC::AL));
+}
+
+void ARMBaseInstrInfo::restoreLRFromStack(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator &It) const {
+  unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
+  MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR)
+                                .addReg(ARM::SP, RegState::Define)
+                                .addReg(ARM::SP);
+  if (!Subtarget.isThumb())
+    MIB.addReg(0);
+  MIB.addImm(Subtarget.getStackAlignment().value()).add(predOps(ARMCC::AL));
+}
+
+void ARMBaseInstrInfo::buildOutlinedFrame(
+    MachineBasicBlock &MBB, MachineFunction &MF,
+    const outliner::OutlinedFunction &OF) const {
+  unsigned StackAlignment = Subtarget.getStackAlignment().value();
+  // For thunk outlining, rewrite the last instruction from a call to a
+  // tail-call.
+  if (OF.FrameConstructionID == MachineOutlinerThunk) {
+    MachineInstr *Call = &*--MBB.instr_end();
+    if (Subtarget.isThumb())
+      if (Call->getOperand(2).isReg())
+        BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::tTAILJMPr))
+            .add(Call->getOperand(2));
+      else if (Subtarget.isTargetMachO())
+        BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::tTAILJMPd))
+            .add(Call->getOperand(2))
+            .add(predOps(ARMCC::AL));
+      else
+        BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::tTAILJMPdND))
+            .add(Call->getOperand(2))
+            .add(predOps(ARMCC::AL));
+    else if (Call->getOperand(0).isReg())
+      BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::TAILJMPr))
+          .add(Call->getOperand(0));
+    else
+      BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::TAILJMPd))
+          .add(Call->getOperand(0));
+    Call->eraseFromParent();
+  }
+  // Is there a call in the outlined range?
+  auto IsNonTailCall = [](MachineInstr &MI) {
+    return MI.isCall() && !MI.isReturn();
+  };
+  if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
+    // LR has to be a live in so that we can save it.
+    MBB.addLiveIn(ARM::LR);
+
+    MachineBasicBlock::iterator It = MBB.begin();
+    MachineBasicBlock::iterator Et = MBB.end();
+
+    if (OF.FrameConstructionID == MachineOutlinerTailCall ||
+        OF.FrameConstructionID == MachineOutlinerThunk)
+      Et = std::prev(MBB.end());
+
+    // Insert a save before the outlined region
+    saveLROnStack(MBB, It);
+
+    // Fix up the instructions in the range, since we're going to modify the
+    // stack.
+    assert(OF.FrameConstructionID != MachineOutlinerDefault &&
+           "Can only fix up stack references once");
+    fixupPostOutline(MBB);
+
+    const TargetSubtargetInfo &STI = MF.getSubtarget();
+    const MCRegisterInfo *MRI = STI.getRegisterInfo();
+    unsigned DwarfReg = MRI->getDwarfRegNum(ARM::LR, true);
+
+    // Add a CFI saying the stack was moved down.
+    int64_t StackPosEntry =
+        MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr,
+                                                             StackAlignment));
+    BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
+        .addCFIIndex(StackPosEntry)
+        .setMIFlags(MachineInstr::FrameSetup);
+
+    // Add a CFI saying that the LR that we want to find is now higher than
+    // before.
+    int64_t LRPosEntry =
+        MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg,
+                                                       StackAlignment));
+    BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
+        .addCFIIndex(LRPosEntry)
+        .setMIFlags(MachineInstr::FrameSetup);
+
+    // Insert a restore before the terminator for the function.
+    // Restore the link register.
+    restoreLRFromStack(MBB, Et);
+  }
+
+  // If this is a tail call outlined function, then there's already a return.
+  if (OF.FrameConstructionID == MachineOutlinerTailCall ||
+      OF.FrameConstructionID == MachineOutlinerThunk)
+    return;
+
+  // It's not a tail call, so we have to insert the return ourselves.  Get the
+  // correct opcode from current feature set.
+  BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode()))
+      .add(predOps(ARMCC::AL));
+
+  // Did we have to modify the stack by saving the link register?
+  if (OF.FrameConstructionID != MachineOutlinerDefault)
+    return;
+
+  // We modified the stack.
+  // Walk over the basic block and fix up all the stack accesses.
+  fixupPostOutline(MBB);
+}
+
+MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall(
+    Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
+    MachineFunction &MF, const outliner::Candidate &C) const {
+  MachineInstrBuilder MIB;
+  MachineBasicBlock::iterator CallPt;
+  unsigned Opc;
+  bool isThumb = Subtarget.isThumb();
+
+  // Are we tail calling?
+  if (C.CallConstructionID == MachineOutlinerTailCall) {
+    // If yes, then we can just branch to the label.
+    Opc = isThumb
+              ? (Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND)
+              : ARM::TAILJMPd;
+    MIB = BuildMI(MF, DebugLoc(), get(Opc))
+              .addGlobalAddress(M.getNamedValue(MF.getName()));
+    if (isThumb)
+      MIB.add(predOps(ARMCC::AL));
+    It = MBB.insert(It, MIB);
+    return It;
+  }
+
+  // Create the call instruction.
+  Opc = isThumb ? ARM::tBL : ARM::BL;
+  MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc));
+  ;
+  if (isThumb)
+    CallMIB.add(predOps(ARMCC::AL));
+  CallMIB.addGlobalAddress(M.getNamedValue(MF.getName()));
+
+  // Are we saving the link register?
+  if (C.CallConstructionID == MachineOutlinerNoLRSave ||
+      C.CallConstructionID == MachineOutlinerThunk) {
+    // No, so just insert the call.
+    It = MBB.insert(It, CallMIB);
+    return It;
+  }
+
+  MBB.addLiveIn(ARM::LR);
+  // Can we save to a register?
+  if (C.CallConstructionID == MachineOutlinerRegSave) {
+    unsigned Reg = findRegisterToSaveLRTo(C);
+    assert(Reg != 0 && "No callee-saved register available?");
+
+    // Save and restore LR from that register.
+    copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true);
+    CallPt = MBB.insert(It, CallMIB);
+    copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true);
+    It--;
+    return CallPt;
+  }
+
+  // We have the default case. Save and restore from SP.
+  saveLROnStack(MBB, It);
+  CallPt = MBB.insert(It, CallMIB);
+  restoreLRFromStack(MBB, It);
+  It--;
+  return CallPt;
+}
Index: llvm/lib/CodeGen/TargetPassConfig.cpp
===================================================================
--- llvm/lib/CodeGen/TargetPassConfig.cpp
+++ llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -979,8 +979,10 @@
     bool RunOnAllFunctions = (EnableMachineOutliner == AlwaysOutline);
     bool AddOutliner = RunOnAllFunctions ||
                        TM->Options.SupportsDefaultOutlining;
-    if (AddOutliner)
+    if (AddOutliner) {
+      MachineOutlinerEnabled = true;
       addPass(createMachineOutlinerPass(RunOnAllFunctions));
+    }
   }
 
   // Add passes that directly emit MI after all other MI passes.
Index: llvm/lib/CodeGen/MachineOutliner.cpp
===================================================================
--- llvm/lib/CodeGen/MachineOutliner.cpp
+++ llvm/lib/CodeGen/MachineOutliner.cpp
@@ -1155,6 +1155,9 @@
   // Outlined functions shouldn't preserve liveness.
   MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
   MF.getRegInfo().freezeReservedRegs(MF);
+  MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA);
+  MF.getProperties().set(MachineFunctionProperties::Property::NoPHIs);
+  MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
 
   // If there's a DISubprogram associated with this outlined function, then
   // emit debug info for the outlined function.
Index: llvm/include/llvm/CodeGen/TargetPassConfig.h
===================================================================
--- llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ llvm/include/llvm/CodeGen/TargetPassConfig.h
@@ -132,6 +132,9 @@
   /// callers.
   bool RequireCodeGenSCCOrder = false;
 
+  /// Default setting for -enable-machine-outliner
+  bool MachineOutlinerEnabled = false;
+
   /// Add the actual instruction selection passes. This does not include
   /// preparation passes on IR.
   bool addCoreISelPasses();
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -5992,8 +5992,9 @@
       // We only support -moutline in AArch64 right now. If we're not compiling
       // for AArch64, emit a warning and ignore the flag. Otherwise, add the
       // proper mllvm flags.
-      if (Triple.getArch() != llvm::Triple::aarch64 &&
-          Triple.getArch() != llvm::Triple::aarch64_32) {
+      if (!(Triple.isARM() || Triple.isThumb() ||
+            Triple.getArch() == llvm::Triple::aarch64 ||
+            Triple.getArch() == llvm::Triple::aarch64_32)) {
         D.Diag(diag::warn_drv_moutline_unsupported_opt) << Triple.getArchName();
       } else {
         CmdArgs.push_back("-mllvm");
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to