https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/68993
>From bfd551c181b8325382247eab80544e69212121aa Mon Sep 17 00:00:00 2001 From: Momchil Velikov <momchil.veli...@arm.com> Date: Sat, 11 Nov 2023 11:41:48 +0000 Subject: [PATCH 1/6] [AArch64] Refactor allocation of locals and stack realignment Factor out some stack allocaton in a separate function. This patch splits out the generatic portion of a larger refactoring done as a part of stack clash protection support. --- .../Target/AArch64/AArch64FrameLowering.cpp | 114 +++++++++--------- .../lib/Target/AArch64/AArch64FrameLowering.h | 5 + .../AArch64/framelayout-sve-basepointer.mir | 4 +- .../framelayout-sve-fixed-width-access.mir | 2 +- .../framelayout-sve-scavengingslot.mir | 4 +- llvm/test/CodeGen/AArch64/framelayout-sve.mir | 55 ++++----- ...nging-call-disable-stackslot-scavenging.ll | 2 +- .../AArch64/spill-stack-realignment.mir | 2 +- llvm/test/CodeGen/AArch64/stack-guard-sve.ll | 4 +- .../AArch64/sve-calling-convention-mixed.ll | 4 +- .../CodeGen/AArch64/sve-fixed-length-fp128.ll | 4 +- 11 files changed, 105 insertions(+), 95 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 18e3aa2b0ecec86..5f617e3a176a16e 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -296,6 +296,7 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF, static bool produceCompactUnwindFrame(MachineFunction &MF); static bool needsWinCFI(const MachineFunction &MF); static StackOffset getSVEStackSize(const MachineFunction &MF); +static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB); /// Returns true if a homogeneous prolog or epilog code can be emitted /// for the size optimization. If possible, a frame helper call is injected. @@ -688,6 +689,44 @@ void AArch64FrameLowering::emitCalleeSavedSVERestores( emitCalleeSavedRestores(MBB, MBBI, true); } +void AArch64FrameLowering::allocateStackSpace( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + bool NeedsRealignment, StackOffset AllocSize, bool NeedsWinCFI, + bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset) const { + + if (!AllocSize) + return; + + DebugLoc DL; + MachineFunction &MF = *MBB.getParent(); + const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); + AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + + Register TargetReg = + NeedsRealignment ? findScratchNonCalleeSaveRegister(&MBB) : AArch64::SP; + // SUB Xd/SP, SP, AllocSize + emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII, + MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI, + EmitCFI, InitialOffset); + + if (NeedsRealignment) { + const int64_t MaxAlign = MFI.getMaxAlign().value(); + const uint64_t AndMask = ~(MaxAlign - 1); + // AND SP, Xd, 0b11111...0000 + BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP) + .addReg(TargetReg, RegState::Kill) + .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64)) + .setMIFlags(MachineInstr::FrameSetup); + AFI.setStackRealigned(true); + + // No need for SEH instructions here; if we're realigning the stack, + // we've set a frame pointer and already finished the SEH prologue. + assert(!NeedsWinCFI); + } +} + static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) { switch (Reg.id()) { default: @@ -1774,7 +1813,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, } } - StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {}; + StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize; MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI; // Process the SVE callee-saves to determine what space needs to be @@ -1787,67 +1826,32 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, ++MBBI; CalleeSavesEnd = MBBI; - AllocateBefore = StackOffset::getScalable(CalleeSavedSize); - AllocateAfter = SVEStackSize - AllocateBefore; + SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize); + SVELocalsSize = SVEStackSize - SVECalleeSavesSize; } // Allocate space for the callee saves (if any). - emitFrameOffset( - MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP, -AllocateBefore, TII, - MachineInstr::FrameSetup, false, false, nullptr, - EmitAsyncCFI && !HasFP && AllocateBefore, - StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes)); + StackOffset CFAOffset = + StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes); + allocateStackSpace(MBB, CalleeSavesBegin, false, SVECalleeSavesSize, false, + nullptr, EmitAsyncCFI && !HasFP, CFAOffset); + CFAOffset += SVECalleeSavesSize; if (EmitAsyncCFI) emitCalleeSavedSVELocations(MBB, CalleeSavesEnd); - // Finally allocate remaining SVE stack space. - emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP, - -AllocateAfter, TII, MachineInstr::FrameSetup, false, false, - nullptr, EmitAsyncCFI && !HasFP && AllocateAfter, - AllocateBefore + StackOffset::getFixed( - (int64_t)MFI.getStackSize() - NumBytes)); - - // Allocate space for the rest of the frame. - if (NumBytes) { - unsigned scratchSPReg = AArch64::SP; - - if (NeedsRealignment) { - scratchSPReg = findScratchNonCalleeSaveRegister(&MBB); - assert(scratchSPReg != AArch64::NoRegister); - } - - // If we're a leaf function, try using the red zone. - if (!canUseRedZone(MF)) { - // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have - // the correct value here, as NumBytes also includes padding bytes, - // which shouldn't be counted here. - emitFrameOffset( - MBB, MBBI, DL, scratchSPReg, AArch64::SP, - StackOffset::getFixed(-NumBytes), TII, MachineInstr::FrameSetup, - false, NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP, - SVEStackSize + - StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes)); - } - if (NeedsRealignment) { - assert(MFI.getMaxAlign() > Align(1)); - assert(scratchSPReg != AArch64::SP); - - // SUB X9, SP, NumBytes - // -- X9 is temporary register, so shouldn't contain any live data here, - // -- free to use. This is already produced by emitFrameOffset above. - // AND SP, X9, 0b11111...0000 - uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1); - - BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) - .addReg(scratchSPReg, RegState::Kill) - .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64)); - AFI->setStackRealigned(true); - - // No need for SEH instructions here; if we're realigning the stack, - // we've set a frame pointer and already finished the SEH prologue. - assert(!NeedsWinCFI); - } + // Allocate space for the rest of the frame including SVE locals. Align the + // stack as necessary. + assert(!(canUseRedZone(MF) && NeedsRealignment) && + "Cannot use redzone with stack realignment"); + if (!canUseRedZone(MF)) { + // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have + // the correct value here, as NumBytes also includes padding bytes, + // which shouldn't be counted here. + allocateStackSpace(MBB, CalleeSavesEnd, NeedsRealignment, + SVELocalsSize + StackOffset::getFixed(NumBytes), + NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP, + CFAOffset); } // If we need a base pointer, set it up here. It's whatever the value of the diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index 147b5c181be5e53..f3313f3b53fffe0 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -150,6 +150,11 @@ class AArch64FrameLowering : public TargetFrameLowering { MachineBasicBlock::iterator MBBI) const; void emitCalleeSavedSVERestores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const; + void allocateStackSpace(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + bool NeedsRealignment, StackOffset AllocSize, + bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI, + StackOffset InitialOffset) const; /// Emit target zero call-used regs. void emitZeroCallUsedRegs(BitVector RegsToZero, diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir index 623c0f240be4fd7..8d39b881395cdf6 100644 --- a/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir @@ -4,8 +4,8 @@ name: hasBasepointer # CHECK-LABEL: name: hasBasepointer # CHECK: bb.0: -# CHECK: $sp = frame-setup ADDVL_XXI $sp, -1 -# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 +# CHECK: $sp = frame-setup SUBXri $sp, 16, 0 +# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1 # CHECK-NEXT: $x19 = ADDXri $sp, 0, 0 # CHECK: STRXui $x0, $x19, 0 tracksRegLiveness: true diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir index e367a380f8ba9f0..35fd7ca77d5cf3e 100644 --- a/llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir @@ -7,9 +7,9 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: mov x29, sp + ; CHECK-NEXT: sub sp, sp, #2064 ; CHECK-NEXT: addvl sp, sp, #-32 ; CHECK-NEXT: addvl sp, sp, #-28 - ; CHECK-NEXT: sub sp, sp, #2064 ; CHECK-NEXT: ldr x8, [sp, #2048] ; CHECK-NEXT: addvl sp, sp, #31 ; CHECK-NEXT: addvl sp, sp, #29 diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir index d54f67634d02a7b..680f9c335c250c5 100644 --- a/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir @@ -4,9 +4,9 @@ name: LateScavengingSlot # CHECK-LABEL: name: LateScavengingSlot # CHECK: bb.0: -# CHECK: $sp = frame-setup ADDVL_XXI $sp, -1 -# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 8, 12 +# CHECK: $sp = frame-setup SUBXri $sp, 8, 12 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 +# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1 # CHECK: STRXui killed $[[SCRATCH:x[0-9]+]], $sp, 0 # CHECK-NEXT: $[[SCRATCH]] = ADDVL_XXI $fp, -1 # CHECK-NEXT: STRXui $x0, killed $[[SCRATCH]], 0 diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-sve.mir index 7c87587c6dc4e2c..213d7919e4a7270 100644 --- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir @@ -60,10 +60,10 @@ # CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16 # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 -# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2 -# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 -# CHECK-NEXT: CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 +# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32 +# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2 +# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 32 @@ -77,7 +77,7 @@ # ASM-LABEL: test_allocate_sve: # ASM: .cfi_def_cfa_offset 16 # ASM-NEXT: .cfi_offset w29, -16 -# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG +# ASM: .cfi_def_cfa_offset 32 # ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 16 * VG # ASM: .cfi_def_cfa wsp, 32 # ASM: .cfi_def_cfa_offset 16 @@ -87,7 +87,7 @@ # # UNWINDINFO: DW_CFA_def_cfa_offset: +16 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa_offset: +32 # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO: DW_CFA_def_cfa: reg31 +32 # UNWINDINFO: DW_CFA_def_cfa_offset: +16 @@ -125,10 +125,11 @@ body: | # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w20, -8 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w21, -16 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -32 -# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2 -# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 +# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 48 +# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 + # # CHECK-NEXT: $x20 = IMPLICIT_DEF # CHECK-NEXT: $x21 = IMPLICIT_DEF @@ -149,7 +150,7 @@ body: | # ASM: .cfi_offset w20, -8 # ASM-NEXT: .cfi_offset w21, -16 # ASM-NEXT: .cfi_offset w29, -32 -# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 16 * VG +# ASM: .cfi_def_cfa_offset 48 # ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 16 * VG # # ASM: .cfi_def_cfa wsp, 48 @@ -164,7 +165,7 @@ body: | # UNWINDINFO: DW_CFA_offset: reg20 -8 # UNWINDINFO-NEXT: DW_CFA_offset: reg21 -16 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -32 -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa_offset: +48 # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # # UNWINDINFO: DW_CFA_def_cfa: reg31 +48 @@ -205,9 +206,9 @@ body: | # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 16 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 -# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2 # CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 16, 0 -# CHECK-NEXT: $sp = ANDXri killed $[[TMP]] +# CHECK-NEXT: $[[TMP]] = frame-setup ADDVL_XXI $[[TMP]], -2 +# CHECK-NEXT: $sp = frame-setup ANDXri killed $[[TMP]] # CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16 # CHECK-NEXT: $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 @@ -267,9 +268,9 @@ body: | # CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16 # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 -# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3 -# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 +# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32 +# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 # CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16 @@ -292,7 +293,7 @@ body: | # ASM-LABEL: test_address_sve: # ASM: .cfi_def_cfa_offset 16 # ASM-NEXT: .cfi_offset w29, -16 -# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG +# ASM: .cfi_def_cfa_offset 32 # ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 24 * VG # # ASM: .cfi_def_cfa wsp, 32 @@ -302,7 +303,7 @@ body: | # # UNWINDINFO: DW_CFA_def_cfa_offset: +16 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa_offset: +32 # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # # UNWINDINFO: DW_CFA_def_cfa: reg31 +32 @@ -353,8 +354,8 @@ body: | # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 16 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 -# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 +# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3 # CHECK-NEXT: STR_ZXI $z0, $fp, -1 # CHECK-NEXT: STR_ZXI $z1, $fp, -2 @@ -429,9 +430,9 @@ body: | # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 -# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1 -# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 +# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32 +# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 # CHECK: $[[TMP:x[0-9]+]] = ADDVL_XXI $sp, 1 # CHECK-NEXT: $x0 = LDRXui killed $[[TMP]], 4 @@ -448,7 +449,7 @@ body: | # ASM-LABEL: test_stack_arg_sve: # ASM: .cfi_def_cfa_offset 16 # ASM-NEXT: .cfi_offset w29, -16 -# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +# ASM: .cfi_def_cfa_offset 32 # ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG # # ASM: .cfi_def_cfa wsp, 32 @@ -458,7 +459,7 @@ body: | # UNWINDINFO: DW_CFA_def_cfa_offset: +16 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa_offset: +32 # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # # UNWINDINFO: DW_CFA_def_cfa: reg31 +32 @@ -640,8 +641,8 @@ body: | # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w19, -16 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -24 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -32 -# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 +# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1 # CHECK-NEXT: $x19 = ADDXri $sp, 0, 0 # CHECK-NEXT: STRXui $xzr, $x19, 0 # CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0 @@ -863,9 +864,9 @@ body: | # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 -# CHECK: $sp = frame-setup ADDVL_XXI $sp, -1 -# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 # CHECK: $sp = frame-setup SUBXri $sp, 32, 0 +# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 +# CHECK: $sp = frame-setup ADDVL_XXI $sp, -1 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 # CHECK: $sp = frame-destroy ADDXri $sp, 32, 0 @@ -916,7 +917,7 @@ body: | # ASM-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 32 - 48 * VG # ASM-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 32 - 56 * VG # ASM-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 32 - 64 * VG -# ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 152 * VG +# ASM: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 64 + 144 * VG # ASM: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 64 + 152 * VG # # ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 152 * VG @@ -950,7 +951,7 @@ body: | # UNWINDINFO-NEXT: DW_CFA_expression: reg77 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -48, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO-NEXT: DW_CFA_expression: reg78 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -56, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO-NEXT: DW_CFA_expression: reg79 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -64, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +64, DW_OP_plus, DW_OP_consts +144, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +64, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus @@ -1031,9 +1032,9 @@ body: | # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 -# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1 # CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 16, 0 -# CHECK-NEXT: $sp = ANDXri killed $[[TMP]] +# CHECK-NEXT: $[[TMP]] = frame-setup ADDVL_XXI $[[TMP]], -1 +# CHECK-NEXT: $sp = frame-setup ANDXri killed $[[TMP]] # CHECK: $sp = frame-destroy ADDVL_XXI $fp, -18 # CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 4 diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll index e4cd4d6c05c5ee3..45ca7844b065513 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll @@ -20,8 +20,8 @@ define void @test_no_stackslot_scavenging(float %f) #0 { ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x24, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP diff --git a/llvm/test/CodeGen/AArch64/spill-stack-realignment.mir b/llvm/test/CodeGen/AArch64/spill-stack-realignment.mir index 1b9411d07f433ab..f6fc627ac2d3d87 100644 --- a/llvm/test/CodeGen/AArch64/spill-stack-realignment.mir +++ b/llvm/test/CodeGen/AArch64/spill-stack-realignment.mir @@ -21,7 +21,7 @@ stack: - { id: 1, size: 4, alignment: 4, local-offset: -68 } # CHECK: body: -# CHECK: $sp = ANDXri killed ${{x[0-9]+}}, 7865 +# CHECK: $sp = frame-setup ANDXri killed ${{x[0-9]+}}, 7865 # CHECK: STRSui $s0, $sp, 0 # CHECK: STRSui $s0, $fp, 7 body: | diff --git a/llvm/test/CodeGen/AArch64/stack-guard-sve.ll b/llvm/test/CodeGen/AArch64/stack-guard-sve.ll index 1672a7eb8739779..5acbb22bf1ab5a4 100644 --- a/llvm/test/CodeGen/AArch64/stack-guard-sve.ll +++ b/llvm/test/CodeGen/AArch64/stack-guard-sve.ll @@ -148,9 +148,9 @@ entry: ; CHECK-LABEL: local_stack_alloc: ; CHECK: mov x29, sp -; CHECK: addvl sp, sp, #-2 ; CHECK: sub sp, sp, #16, lsl #12 ; CHECK: sub sp, sp, #16 +; CHECK: addvl sp, sp, #-2 ; Stack guard is placed below the SVE stack area (and above all fixed-width objects) ; CHECK-DAG: add [[STACK_GUARD_SPILL_PART_LOC:x[0-9]+]], sp, #8, lsl #12 @@ -198,9 +198,9 @@ entry: ; CHECK-LABEL: local_stack_alloc_strong: ; CHECK: mov x29, sp -; CHECK: addvl sp, sp, #-3 ; CHECK: sub sp, sp, #16, lsl #12 ; CHECK: sub sp, sp, #16 +; CHECK: addvl sp, sp, #-3 ; Stack guard is placed at the top of the SVE stack area ; CHECK-DAG: ldr [[STACK_GUARD:x[0-9]+]], [{{x[0-9]+}}, :lo12:__stack_chk_guard] diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll index b7505625cde9773..6738bddb8af3442 100644 --- a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll +++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll @@ -56,8 +56,8 @@ define float @foo2(ptr %x0, ptr %x1) nounwind { ; CHECK-LABEL: foo2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: fmov s0, #1.00000000 ; CHECK-NEXT: add x8, sp, #16 @@ -699,8 +699,8 @@ define void @verify_all_operands_are_initialised() { ; CHECK-LABEL: verify_all_operands_are_initialised: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll index da7e772461e28bc..9d9d4a64a5d1f58 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll @@ -9,8 +9,8 @@ define void @fcvt_v4f64_v4f128(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: add x8, sp, #48 ; CHECK-NEXT: mov x19, x1 @@ -59,8 +59,8 @@ define void @fcvt_v4f128_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: sub sp, sp, #128 +; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: ldr q1, [x0, #64] ; CHECK-NEXT: ldr q0, [x0, #80] ; CHECK-NEXT: mov x19, x1 >From 93c4e5f7abceee7c4edd2eaf232a3184833c23de Mon Sep 17 00:00:00 2001 From: Momchil Velikov <momchil.veli...@arm.com> Date: Sat, 11 Nov 2023 15:27:53 +0000 Subject: [PATCH 2/6] [CFIFixup] Precommit test ahead of multi-block prologues support --- .../cfi-fixup-multi-block-prologue.mir | 307 ++++++++++++++++++ 1 file changed, 307 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir diff --git a/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir b/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir new file mode 100644 index 000000000000000..ddd9a9eaef55efb --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir @@ -0,0 +1,307 @@ +# RUN: llc -run-pass=cfi-fixup %s -o - | FileCheck %s +--- | + source_filename = "cfi-fixup.ll" + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-linux" + + define i32 @f(i32 %x) #0 { + entry: + %p = alloca i8, i32 30000, align 1 + switch i32 %x, label %if.end7 [ + i32 0, label %return + i32 1, label %if.then2 + i32 2, label %if.then5 + ] + + if.then2: ; preds = %entry + %call = tail call i32 @g1(i32 1) + %add = add nsw i32 %call, 1 + br label %return + + if.then5: ; preds = %entry + %call6 = tail call i32 @g0(i32 2) + %sub = sub nsw i32 1, %call6 + br label %return + + if.end7: ; preds = %entry + br label %return + + return: ; preds = %if.end7, %if.then5, %if.then2, %entry + %retval.0 = phi i32 [ %add, %if.then2 ], [ %sub, %if.then5 ], [ 0, %if.end7 ], [ 1, %entry ] + ret i32 %retval.0 + } + + declare i32 @g1(i32) + + declare i32 @g0(i32) + + attributes #0 = { uwtable "probe-stack"="inline-asm" } + +... +--- +name: f +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: true +registers: [] +liveins: + - { reg: '$w0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 30016 + offsetAdjustment: 0 + maxAlignment: 8 + adjustsStack: true + hasCalls: true + stackProtector: '' + functionContext: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 30000 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: p, type: default, offset: -30016, size: 30000, alignment: 1, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -30000, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: -16, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$fp', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + hasRedZone: false +body: | + ; CHECK-LABEL: name: f + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $w0, $lr, $fp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store (s64) into %stack.2), (store (s64) into %stack.1) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 + ; CHECK-NEXT: $x9 = frame-setup SUBXri $sp, 7, 12 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w9, 28688 + ; CHECK-NEXT: CFI_INSTRUCTION remember_state + + ; CHECK: bb.1.entry: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $x9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1, 12 + ; CHECK-NEXT: $xzr = frame-setup SUBSXrx64 $sp, $x9, 24, implicit-def $nzcv + ; CHECK-NEXT: frame-setup STRXui $xzr, $sp, 0 + ; CHECK-NEXT: frame-setup Bcc 1, %bb.1, implicit killed $nzcv + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.entry: + ; CHECK-NEXT: successors: %bb.6(0x20000000), %bb.3(0x60000000) + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $wsp + ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1328, 0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 30016 + ; CHECK-NEXT: frame-setup STRXui $xzr, $sp, 0 + ; CHECK-NEXT: CBZW renamable $w0, %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.entry: + ; CHECK-NEXT: successors: %bb.7(0x2aaaaaab), %bb.4(0x55555555) + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.7, implicit killed $nzcv + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.entry: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.8(0x40000000) + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 1, %bb.8, implicit killed $nzcv + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.if.then2: + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: BL @g1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0 + ; CHECK-NEXT: renamable $w0 = nsw ADDWri killed renamable $w0, 1, 0 + ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 7, 12 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 1344 + ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1328, 0 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1) + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29 + ; CHECK-NEXT: RET undef $lr, implicit killed $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: CFI_INSTRUCTION restore_state + ; CHECK-NEXT: CFI_INSTRUCTION remember_state + ; CHECK-NEXT: renamable $w0 = MOVZWi 1, 0 + ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 7, 12 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 1344 + ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1328, 0 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1) + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29 + ; CHECK-NEXT: RET undef $lr, implicit killed $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7.if.then5: + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CFI_INSTRUCTION restore_state + ; CHECK-NEXT: CFI_INSTRUCTION remember_state + ; CHECK-NEXT: BL @g0, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0 + ; CHECK-NEXT: renamable $w8 = MOVZWi 1, 0 + ; CHECK-NEXT: $w0 = SUBWrs killed renamable $w8, killed renamable $w0, 0 + ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 7, 12 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 1344 + ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1328, 0 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1) + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29 + ; CHECK-NEXT: RET undef $lr, implicit killed $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8.if.end7: + ; CHECK-NEXT: CFI_INSTRUCTION restore_state + ; CHECK-NEXT: $w0 = ORRWrs $wzr, $wzr, 0 + ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 7, 12 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 1344 + ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1328, 0 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1) + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29 + ; CHECK-NEXT: RET undef $lr, implicit killed $w0 + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $w0, $lr, $fp + + early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store (s64) into %stack.2), (store (s64) into %stack.1) + frame-setup CFI_INSTRUCTION def_cfa_offset 16 + frame-setup CFI_INSTRUCTION offset $w30, -8 + frame-setup CFI_INSTRUCTION offset $w29, -16 + $x9 = frame-setup SUBXri $sp, 7, 12 + frame-setup CFI_INSTRUCTION def_cfa $w9, 28688 + + bb.1.entry: + successors: %bb.2(0x40000000), %bb.1(0x40000000) + liveins: $x9 + + $sp = frame-setup SUBXri $sp, 1, 12 + $xzr = frame-setup SUBSXrx64 $sp, $x9, 24, implicit-def $nzcv + frame-setup STRXui $xzr, $sp, 0 + frame-setup Bcc 1, %bb.1, implicit killed $nzcv + + bb.2.entry: + successors: %bb.6(0x20000000), %bb.3(0x60000000) + liveins: $w0 + + frame-setup CFI_INSTRUCTION def_cfa_register $wsp + $sp = frame-setup SUBXri $sp, 1328, 0 + frame-setup CFI_INSTRUCTION def_cfa_offset 30016 + frame-setup STRXui $xzr, $sp, 0 + CBZW renamable $w0, %bb.6 + + bb.3.entry: + successors: %bb.7(0x2aaaaaab), %bb.4(0x55555555) + liveins: $w0 + + dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv + Bcc 0, %bb.7, implicit killed $nzcv + + bb.4.entry: + successors: %bb.5(0x40000000), %bb.8(0x40000000) + liveins: $w0 + + dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv + Bcc 1, %bb.8, implicit killed $nzcv + + bb.5.if.then2: + liveins: $w0 + + BL @g1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0 + renamable $w0 = nsw ADDWri killed renamable $w0, 1, 0 + $sp = frame-destroy ADDXri $sp, 7, 12 + frame-destroy CFI_INSTRUCTION def_cfa_offset 1344 + $sp = frame-destroy ADDXri $sp, 1328, 0 + frame-destroy CFI_INSTRUCTION def_cfa_offset 16 + early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1) + frame-destroy CFI_INSTRUCTION def_cfa_offset 0 + frame-destroy CFI_INSTRUCTION restore $w30 + frame-destroy CFI_INSTRUCTION restore $w29 + RET undef $lr, implicit killed $w0 + + bb.6: + renamable $w0 = MOVZWi 1, 0 + $sp = frame-destroy ADDXri $sp, 7, 12 + frame-destroy CFI_INSTRUCTION def_cfa_offset 1344 + $sp = frame-destroy ADDXri $sp, 1328, 0 + frame-destroy CFI_INSTRUCTION def_cfa_offset 16 + early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1) + frame-destroy CFI_INSTRUCTION def_cfa_offset 0 + frame-destroy CFI_INSTRUCTION restore $w30 + frame-destroy CFI_INSTRUCTION restore $w29 + RET undef $lr, implicit killed $w0 + + bb.7.if.then5: + liveins: $w0 + + BL @g0, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0 + renamable $w8 = MOVZWi 1, 0 + $w0 = SUBWrs killed renamable $w8, killed renamable $w0, 0 + $sp = frame-destroy ADDXri $sp, 7, 12 + frame-destroy CFI_INSTRUCTION def_cfa_offset 1344 + $sp = frame-destroy ADDXri $sp, 1328, 0 + frame-destroy CFI_INSTRUCTION def_cfa_offset 16 + early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1) + frame-destroy CFI_INSTRUCTION def_cfa_offset 0 + frame-destroy CFI_INSTRUCTION restore $w30 + frame-destroy CFI_INSTRUCTION restore $w29 + RET undef $lr, implicit killed $w0 + + bb.8.if.end7: + $w0 = ORRWrs $wzr, $wzr, 0 + $sp = frame-destroy ADDXri $sp, 7, 12 + frame-destroy CFI_INSTRUCTION def_cfa_offset 1344 + $sp = frame-destroy ADDXri $sp, 1328, 0 + frame-destroy CFI_INSTRUCTION def_cfa_offset 16 + early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1) + frame-destroy CFI_INSTRUCTION def_cfa_offset 0 + frame-destroy CFI_INSTRUCTION restore $w30 + frame-destroy CFI_INSTRUCTION restore $w29 + RET undef $lr, implicit killed $w0 + +... >From fd103b19f2cb301f8961c447a6ff5f355b66d5d0 Mon Sep 17 00:00:00 2001 From: Momchil Velikov <momchil.veli...@arm.com> Date: Mon, 2 Oct 2023 14:46:27 +0100 Subject: [PATCH 3/6] [CFIFixup] Allow function prologues to span more than one basic block The CFIFixup pass assumes a function prologue is contained in a single basic block. This assumption is broken with upcoming support for stack probing (`-fstack-clash-protection`) in AArch64 - the emitted probing sequence in a prologue may contain loops, i.e. more than one basic block. The generated CFG is not arbitrary though: * CFI instructions are outside of any loops * for any two CFI instructions of the function prologue one dominates and is post-dominated by the other Thus, for the prologue CFI instructions, if one is executed then all are executed, there is a total order of executions, and the last instruction in that order can be considered the end of the prologoue for the purpose of inserting the initial `.cfi_remember_state` directive. That last instruction is found by finding the first block in the post-order traversal which contains prologue CFI instructions. --- llvm/lib/CodeGen/CFIFixup.cpp | 62 ++++++++++++------- .../cfi-fixup-multi-block-prologue.mir | 7 ++- 2 files changed, 43 insertions(+), 26 deletions(-) diff --git a/llvm/lib/CodeGen/CFIFixup.cpp b/llvm/lib/CodeGen/CFIFixup.cpp index 837dbd77d07361a..964a8d56511fa1b 100644 --- a/llvm/lib/CodeGen/CFIFixup.cpp +++ b/llvm/lib/CodeGen/CFIFixup.cpp @@ -10,20 +10,25 @@ // This pass inserts the necessary instructions to adjust for the inconsistency // of the call-frame information caused by final machine basic block layout. // The pass relies in constraints LLVM imposes on the placement of -// save/restore points (cf. ShrinkWrap): -// * there is a single basic block, containing the function prologue +// save/restore points (cf. ShrinkWrap) and has certain preconditions about +// placement of CFI instructions: +// * for any two CFI instructions of the function prologue one dominates +// and is post-dominated by the other // * possibly multiple epilogue blocks, where each epilogue block is // complete and self-contained, i.e. CSR restore instructions (and the // corresponding CFI instructions are not split across two or more blocks. -// * prologue and epilogue blocks are outside of any loops -// Thus, during execution, at the beginning and at the end of each basic block -// the function can be in one of two states: +// * CFI instructions are not contained in any loops +// Thus, during execution, at the beginning and at the end of each basic block, +// following the prologue, the function can be in one of two states: // - "has a call frame", if the function has executed the prologue, and // has not executed any epilogue // - "does not have a call frame", if the function has not executed the // prologue, or has executed an epilogue // which can be computed by a single RPO traversal. +// The location of the prologue is determined by finding the first block in the +// post-order traversal which contains CFI instructions. + // In order to accommodate backends which do not generate unwind info in // epilogues we compute an additional property "strong no call frame on entry", // which is set for the entry point of the function and for every block @@ -85,10 +90,6 @@ static bool isPrologueCFIInstruction(const MachineInstr &MI) { MI.getFlag(MachineInstr::FrameSetup); } -static bool containsPrologue(const MachineBasicBlock &MBB) { - return llvm::any_of(MBB.instrs(), isPrologueCFIInstruction); -} - static bool containsEpilogue(const MachineBasicBlock &MBB) { return llvm::any_of(llvm::reverse(MBB), [](const auto &MI) { return MI.getOpcode() == TargetOpcode::CFI_INSTRUCTION && @@ -96,6 +97,25 @@ static bool containsEpilogue(const MachineBasicBlock &MBB) { }); } +static MachineBasicBlock * +findPrologueEnd(MachineFunction &MF, MachineBasicBlock::iterator &PrologueEnd) { + MachineBasicBlock *PrologueBlock = nullptr; + for (auto It = po_begin(&MF.front()), End = po_end(&MF.front()); It != End; + ++It) { + MachineBasicBlock *MBB = *It; + llvm::for_each(MBB->instrs(), [&](MachineInstr &MI) { + if (isPrologueCFIInstruction(MI)) { + PrologueBlock = MBB; + PrologueEnd = std::next(MI.getIterator()); + } + }); + if (PrologueBlock) + return PrologueBlock; + } + + return nullptr; +} + bool CFIFixup::runOnMachineFunction(MachineFunction &MF) { const TargetFrameLowering &TFL = *MF.getSubtarget().getFrameLowering(); if (!TFL.enableCFIFixup(MF)) @@ -105,6 +125,14 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) { if (NumBlocks < 2) return false; + // Find the prologue and the point where we can issue the first + // `.cfi_remember_state`. + + MachineBasicBlock::iterator PrologueEnd; + MachineBasicBlock *PrologueBlock = findPrologueEnd(MF, PrologueEnd); + if (PrologueBlock == nullptr) + return false; + struct BlockFlags { bool Reachable : 1; bool StrongNoFrameOnEntry : 1; @@ -116,21 +144,15 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) { BlockInfo[0].StrongNoFrameOnEntry = true; // Compute the presence/absence of frame at each basic block. - MachineBasicBlock *PrologueBlock = nullptr; ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); for (MachineBasicBlock *MBB : RPOT) { BlockFlags &Info = BlockInfo[MBB->getNumber()]; // Set to true if the current block contains the prologue or the epilogue, // respectively. - bool HasPrologue = false; + bool HasPrologue = MBB == PrologueBlock; bool HasEpilogue = false; - if (!PrologueBlock && !Info.HasFrameOnEntry && containsPrologue(*MBB)) { - PrologueBlock = MBB; - HasPrologue = true; - } - if (Info.HasFrameOnEntry || HasPrologue) HasEpilogue = containsEpilogue(*MBB); @@ -149,9 +171,6 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) { } } - if (!PrologueBlock) - return false; - // Walk the blocks of the function in "physical" order. // Every block inherits the frame state (as recorded in the unwind tables) // of the previous block. If the intended frame state is different, insert @@ -162,10 +181,7 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) { // insert a `.cfi_remember_state`, in the case that the current block needs a // `.cfi_restore_state`. MachineBasicBlock *InsertMBB = PrologueBlock; - MachineBasicBlock::iterator InsertPt = PrologueBlock->begin(); - for (MachineInstr &MI : *PrologueBlock) - if (isPrologueCFIInstruction(MI)) - InsertPt = std::next(MI.getIterator()); + MachineBasicBlock::iterator InsertPt = PrologueEnd; assert(InsertPt != PrologueBlock->begin() && "Inconsistent notion of \"prologue block\""); diff --git a/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir b/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir index ddd9a9eaef55efb..31fa3832367becc 100644 --- a/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir +++ b/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 # RUN: llc -run-pass=cfi-fixup %s -o - | FileCheck %s --- | source_filename = "cfi-fixup.ll" @@ -111,9 +112,8 @@ body: | ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 ; CHECK-NEXT: $x9 = frame-setup SUBXri $sp, 7, 12 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w9, 28688 - ; CHECK-NEXT: CFI_INSTRUCTION remember_state - - ; CHECK: bb.1.entry: + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.entry: ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $x9 ; CHECK-NEXT: {{ $}} @@ -129,6 +129,7 @@ body: | ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $wsp ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1328, 0 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 30016 + ; CHECK-NEXT: CFI_INSTRUCTION remember_state ; CHECK-NEXT: frame-setup STRXui $xzr, $sp, 0 ; CHECK-NEXT: CBZW renamable $w0, %bb.6 ; CHECK-NEXT: {{ $}} >From 8811203236b6fdfff19f305b304261737c1c3217 Mon Sep 17 00:00:00 2001 From: Momchil Velikov <momchil.veli...@arm.com> Date: Sat, 28 Oct 2023 13:33:28 +0100 Subject: [PATCH 4/6] Reverse iteration within a block when looking for prologue CFI insns --- llvm/lib/CodeGen/CFIFixup.cpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/llvm/lib/CodeGen/CFIFixup.cpp b/llvm/lib/CodeGen/CFIFixup.cpp index 964a8d56511fa1b..40a2a3a142e1758 100644 --- a/llvm/lib/CodeGen/CFIFixup.cpp +++ b/llvm/lib/CodeGen/CFIFixup.cpp @@ -99,20 +99,16 @@ static bool containsEpilogue(const MachineBasicBlock &MBB) { static MachineBasicBlock * findPrologueEnd(MachineFunction &MF, MachineBasicBlock::iterator &PrologueEnd) { - MachineBasicBlock *PrologueBlock = nullptr; for (auto It = po_begin(&MF.front()), End = po_end(&MF.front()); It != End; ++It) { MachineBasicBlock *MBB = *It; - llvm::for_each(MBB->instrs(), [&](MachineInstr &MI) { - if (isPrologueCFIInstruction(MI)) { - PrologueBlock = MBB; - PrologueEnd = std::next(MI.getIterator()); - } - }); - if (PrologueBlock) - return PrologueBlock; + for (MachineInstr &MI : reverse(MBB->instrs())) { + if (!isPrologueCFIInstruction(MI)) + continue; + PrologueEnd = std::next(MI.getIterator()); + return MBB; + } } - return nullptr; } >From ac04dc7894af4205a4903fcc5d6995d0aff8dfee Mon Sep 17 00:00:00 2001 From: Momchil Velikov <momchil.veli...@arm.com> Date: Tue, 7 Nov 2023 14:08:26 +0000 Subject: [PATCH 5/6] Use simple reverse traversal of basic blocks --- llvm/lib/CodeGen/CFIFixup.cpp | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/llvm/lib/CodeGen/CFIFixup.cpp b/llvm/lib/CodeGen/CFIFixup.cpp index 40a2a3a142e1758..61888a42666524b 100644 --- a/llvm/lib/CodeGen/CFIFixup.cpp +++ b/llvm/lib/CodeGen/CFIFixup.cpp @@ -12,12 +12,14 @@ // The pass relies in constraints LLVM imposes on the placement of // save/restore points (cf. ShrinkWrap) and has certain preconditions about // placement of CFI instructions: -// * for any two CFI instructions of the function prologue one dominates -// and is post-dominated by the other -// * possibly multiple epilogue blocks, where each epilogue block is -// complete and self-contained, i.e. CSR restore instructions (and the -// corresponding CFI instructions are not split across two or more blocks. -// * CFI instructions are not contained in any loops +// * For any two CFI instructions of the function prologue one dominates +// and is post-dominated by the other. +// * The function possibly contains multiple epilogue blocks, where each +// epilogue block is complete and self-contained, i.e. CSR restore +// instructions (and the corresponding CFI instructions) +// are not split across two or more blocks. +// * CFI instructions are not contained in any loops. + // Thus, during execution, at the beginning and at the end of each basic block, // following the prologue, the function can be in one of two states: // - "has a call frame", if the function has executed the prologue, and @@ -27,7 +29,7 @@ // which can be computed by a single RPO traversal. // The location of the prologue is determined by finding the first block in the -// post-order traversal which contains CFI instructions. +// reverse traversal which contains CFI instructions. // In order to accommodate backends which do not generate unwind info in // epilogues we compute an additional property "strong no call frame on entry", @@ -99,14 +101,16 @@ static bool containsEpilogue(const MachineBasicBlock &MBB) { static MachineBasicBlock * findPrologueEnd(MachineFunction &MF, MachineBasicBlock::iterator &PrologueEnd) { - for (auto It = po_begin(&MF.front()), End = po_end(&MF.front()); It != End; - ++It) { - MachineBasicBlock *MBB = *It; - for (MachineInstr &MI : reverse(MBB->instrs())) { + // Even though we should theoretically traverse the blocks in post-order, we + // can't encode correctly cases where prologue blocks are not laid out in + // topological order. Then, assuming topological order, we can just traverse + // the function in reverse. + for (MachineBasicBlock &MBB : reverse(MF)) { + for (MachineInstr &MI : reverse(MBB.instrs())) { if (!isPrologueCFIInstruction(MI)) continue; PrologueEnd = std::next(MI.getIterator()); - return MBB; + return &MBB; } } return nullptr; @@ -123,7 +127,6 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) { // Find the prologue and the point where we can issue the first // `.cfi_remember_state`. - MachineBasicBlock::iterator PrologueEnd; MachineBasicBlock *PrologueBlock = findPrologueEnd(MF, PrologueEnd); if (PrologueBlock == nullptr) >From fdb47f7bbff0eb395c6bb2c7f469a77d19f53a26 Mon Sep 17 00:00:00 2001 From: Momchil Velikov <momchil.veli...@arm.com> Date: Wed, 11 Oct 2023 17:22:51 +0100 Subject: [PATCH 6/6] [clang][AArch64] Pass down stack clash protection options to LLVM/Backend --- clang/lib/CodeGen/CodeGenModule.cpp | 12 +++++++++++- clang/lib/Driver/ToolChains/Clang.cpp | 2 +- clang/test/CodeGen/stack-clash-protection.c | 16 ++++++++++++---- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 4c7f516e308ca00..bc496852b86fba5 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1085,6 +1085,16 @@ void CodeGenModule::Release() { "sign-return-address-with-bkey", 1); } + if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be) { + auto *InlineAsm = llvm::MDString::get(TheModule.getContext(), "inline-asm"); + if (CodeGenOpts.StackClashProtector) + getModule().addModuleFlag(llvm::Module::Override, "probe-stack", + InlineAsm); + if (CodeGenOpts.StackProbeSize && CodeGenOpts.StackProbeSize != 4096) + getModule().addModuleFlag(llvm::Module::Min, "stack-probe-size", + CodeGenOpts.StackProbeSize); + } + if (!CodeGenOpts.MemoryProfileOutput.empty()) { llvm::LLVMContext &Ctx = TheModule.getContext(); getModule().addModuleFlag( @@ -2296,7 +2306,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, if ((!D || !D->hasAttr<NoUwtableAttr>()) && CodeGenOpts.UnwindTables) B.addUWTableAttr(llvm::UWTableKind(CodeGenOpts.UnwindTables)); - if (CodeGenOpts.StackClashProtector) + if (CodeGenOpts.StackClashProtector && !getTarget().getTriple().isAArch64()) B.addAttribute("probe-stack", "inline-asm"); if (!hasUnwindExceptions(LangOpts)) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 3b98c7ae6e6ec66..35133001f95c3f9 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3507,7 +3507,7 @@ static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args, return; if (!EffectiveTriple.isX86() && !EffectiveTriple.isSystemZ() && - !EffectiveTriple.isPPC64()) + !EffectiveTriple.isPPC64() && !EffectiveTriple.isAArch64()) return; Args.addOptInFlag(CmdArgs, options::OPT_fstack_clash_protection, diff --git a/clang/test/CodeGen/stack-clash-protection.c b/clang/test/CodeGen/stack-clash-protection.c index 67571f5cdb2c14c..2f502ef453d42f4 100644 --- a/clang/test/CodeGen/stack-clash-protection.c +++ b/clang/test/CodeGen/stack-clash-protection.c @@ -1,10 +1,12 @@ // Check the correct function attributes are generated -// RUN: %clang_cc1 -triple x86_64-linux -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s -// RUN: %clang_cc1 -triple s390x-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s -// RUN: %clang_cc1 -triple powerpc64le-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s -// RUN: %clang_cc1 -triple powerpc64-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s +// RUN: %clang_cc1 -triple s390x-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64le-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s --check-prefixes CHECK-AARCH64 // CHECK: define{{.*}} void @large_stack() #[[A:.*]] { +// CHECK-AARCH64: define{{.*}} void @large_stack() #[[A:.*]] { void large_stack(void) { volatile int stack[20000], i; for (i = 0; i < sizeof(stack) / sizeof(int); ++i) @@ -12,14 +14,20 @@ void large_stack(void) { } // CHECK: define{{.*}} void @vla({{.*}}) #[[A:.*]] { +// CHECK-AARCH64: define{{.*}} void @vla({{.*}}) #[[A:.*]] { void vla(int n) { volatile int vla[n]; __builtin_memset(&vla[0], 0, 1); } // CHECK: define{{.*}} void @builtin_alloca({{.*}}) #[[A:.*]] { +// CHECK-AARCH64: define{{.*}} void @builtin_alloca({{.*}}) #[[A:.*]] { void builtin_alloca(int n) { volatile void *mem = __builtin_alloca(n); } // CHECK: attributes #[[A]] = {{.*}} "probe-stack"="inline-asm" +// CHECK-AARCH64-NOT: attributes #[[A]] = {{.*}} "probe-stack" + +// CHECK-AARCH64: !{i32 4, !"probe-stack", !"inline-asm"} +// CHECK-AARCH64: !{i32 8, !"stack-probe-size", i32 8192} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits