https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/68993
>From ed580b95157d7f423c5384fa2d51af00f1359a10 Mon Sep 17 00:00:00 2001 From: Momchil Velikov <momchil.veli...@arm.com> Date: Mon, 2 Oct 2023 14:46:27 +0100 Subject: [PATCH 1/3] [CFIFixup] Allow function prologues to span more than one basic block The CFIFixup pass assumes a function prologue is contained in a single basic block. This assumption is broken with upcoming support for stack probing (`-fstack-clash-protection`) in AArch64 - the emitted probing sequence in a prologue may contain loops, i.e. more than one basic block. The generated CFG is not arbitrary though: * CFI instructions are outside of any loops * for any two CFI instructions of the function prologue one dominates and is post-dominated by the other Thus, for the prologue CFI instructions, if one is executed then all are executed, there is a total order of executions, and the last instruction in that order can be considered the end of the prologoue for the purpose of inserting the initial `.cfi_remember_state` directive. That last instruction is found by finding the first block in the post-order traversal which contains prologue CFI instructions. --- llvm/lib/CodeGen/CFIFixup.cpp | 62 ++-- .../cfi-fixup-multi-block-prologue.mir | 308 ++++++++++++++++++ 2 files changed, 347 insertions(+), 23 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir diff --git a/llvm/lib/CodeGen/CFIFixup.cpp b/llvm/lib/CodeGen/CFIFixup.cpp index 837dbd77d07361a..964a8d56511fa1b 100644 --- a/llvm/lib/CodeGen/CFIFixup.cpp +++ b/llvm/lib/CodeGen/CFIFixup.cpp @@ -10,20 +10,25 @@ // This pass inserts the necessary instructions to adjust for the inconsistency // of the call-frame information caused by final machine basic block layout. // The pass relies in constraints LLVM imposes on the placement of -// save/restore points (cf. ShrinkWrap): -// * there is a single basic block, containing the function prologue +// save/restore points (cf. ShrinkWrap) and has certain preconditions about +// placement of CFI instructions: +// * for any two CFI instructions of the function prologue one dominates +// and is post-dominated by the other // * possibly multiple epilogue blocks, where each epilogue block is // complete and self-contained, i.e. CSR restore instructions (and the // corresponding CFI instructions are not split across two or more blocks. -// * prologue and epilogue blocks are outside of any loops -// Thus, during execution, at the beginning and at the end of each basic block -// the function can be in one of two states: +// * CFI instructions are not contained in any loops +// Thus, during execution, at the beginning and at the end of each basic block, +// following the prologue, the function can be in one of two states: // - "has a call frame", if the function has executed the prologue, and // has not executed any epilogue // - "does not have a call frame", if the function has not executed the // prologue, or has executed an epilogue // which can be computed by a single RPO traversal. +// The location of the prologue is determined by finding the first block in the +// post-order traversal which contains CFI instructions. + // In order to accommodate backends which do not generate unwind info in // epilogues we compute an additional property "strong no call frame on entry", // which is set for the entry point of the function and for every block @@ -85,10 +90,6 @@ static bool isPrologueCFIInstruction(const MachineInstr &MI) { MI.getFlag(MachineInstr::FrameSetup); } -static bool containsPrologue(const MachineBasicBlock &MBB) { - return llvm::any_of(MBB.instrs(), isPrologueCFIInstruction); -} - static bool containsEpilogue(const MachineBasicBlock &MBB) { return llvm::any_of(llvm::reverse(MBB), [](const auto &MI) { return MI.getOpcode() == TargetOpcode::CFI_INSTRUCTION && @@ -96,6 +97,25 @@ static bool containsEpilogue(const MachineBasicBlock &MBB) { }); } +static MachineBasicBlock * +findPrologueEnd(MachineFunction &MF, MachineBasicBlock::iterator &PrologueEnd) { + MachineBasicBlock *PrologueBlock = nullptr; + for (auto It = po_begin(&MF.front()), End = po_end(&MF.front()); It != End; + ++It) { + MachineBasicBlock *MBB = *It; + llvm::for_each(MBB->instrs(), [&](MachineInstr &MI) { + if (isPrologueCFIInstruction(MI)) { + PrologueBlock = MBB; + PrologueEnd = std::next(MI.getIterator()); + } + }); + if (PrologueBlock) + return PrologueBlock; + } + + return nullptr; +} + bool CFIFixup::runOnMachineFunction(MachineFunction &MF) { const TargetFrameLowering &TFL = *MF.getSubtarget().getFrameLowering(); if (!TFL.enableCFIFixup(MF)) @@ -105,6 +125,14 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) { if (NumBlocks < 2) return false; + // Find the prologue and the point where we can issue the first + // `.cfi_remember_state`. + + MachineBasicBlock::iterator PrologueEnd; + MachineBasicBlock *PrologueBlock = findPrologueEnd(MF, PrologueEnd); + if (PrologueBlock == nullptr) + return false; + struct BlockFlags { bool Reachable : 1; bool StrongNoFrameOnEntry : 1; @@ -116,21 +144,15 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) { BlockInfo[0].StrongNoFrameOnEntry = true; // Compute the presence/absence of frame at each basic block. - MachineBasicBlock *PrologueBlock = nullptr; ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); for (MachineBasicBlock *MBB : RPOT) { BlockFlags &Info = BlockInfo[MBB->getNumber()]; // Set to true if the current block contains the prologue or the epilogue, // respectively. - bool HasPrologue = false; + bool HasPrologue = MBB == PrologueBlock; bool HasEpilogue = false; - if (!PrologueBlock && !Info.HasFrameOnEntry && containsPrologue(*MBB)) { - PrologueBlock = MBB; - HasPrologue = true; - } - if (Info.HasFrameOnEntry || HasPrologue) HasEpilogue = containsEpilogue(*MBB); @@ -149,9 +171,6 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) { } } - if (!PrologueBlock) - return false; - // Walk the blocks of the function in "physical" order. // Every block inherits the frame state (as recorded in the unwind tables) // of the previous block. If the intended frame state is different, insert @@ -162,10 +181,7 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) { // insert a `.cfi_remember_state`, in the case that the current block needs a // `.cfi_restore_state`. MachineBasicBlock *InsertMBB = PrologueBlock; - MachineBasicBlock::iterator InsertPt = PrologueBlock->begin(); - for (MachineInstr &MI : *PrologueBlock) - if (isPrologueCFIInstruction(MI)) - InsertPt = std::next(MI.getIterator()); + MachineBasicBlock::iterator InsertPt = PrologueEnd; assert(InsertPt != PrologueBlock->begin() && "Inconsistent notion of \"prologue block\""); diff --git a/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir b/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir new file mode 100644 index 000000000000000..31fa3832367becc --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir @@ -0,0 +1,308 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc -run-pass=cfi-fixup %s -o - | FileCheck %s +--- | + source_filename = "cfi-fixup.ll" + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-linux" + + define i32 @f(i32 %x) #0 { + entry: + %p = alloca i8, i32 30000, align 1 + switch i32 %x, label %if.end7 [ + i32 0, label %return + i32 1, label %if.then2 + i32 2, label %if.then5 + ] + + if.then2: ; preds = %entry + %call = tail call i32 @g1(i32 1) + %add = add nsw i32 %call, 1 + br label %return + + if.then5: ; preds = %entry + %call6 = tail call i32 @g0(i32 2) + %sub = sub nsw i32 1, %call6 + br label %return + + if.end7: ; preds = %entry + br label %return + + return: ; preds = %if.end7, %if.then5, %if.then2, %entry + %retval.0 = phi i32 [ %add, %if.then2 ], [ %sub, %if.then5 ], [ 0, %if.end7 ], [ 1, %entry ] + ret i32 %retval.0 + } + + declare i32 @g1(i32) + + declare i32 @g0(i32) + + attributes #0 = { uwtable "probe-stack"="inline-asm" } + +... +--- +name: f +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: true +registers: [] +liveins: + - { reg: '$w0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 30016 + offsetAdjustment: 0 + maxAlignment: 8 + adjustsStack: true + hasCalls: true + stackProtector: '' + functionContext: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 30000 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: p, type: default, offset: -30016, size: 30000, alignment: 1, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -30000, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: -16, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$fp', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + hasRedZone: false +body: | + ; CHECK-LABEL: name: f + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $w0, $lr, $fp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store (s64) into %stack.2), (store (s64) into %stack.1) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 + ; CHECK-NEXT: $x9 = frame-setup SUBXri $sp, 7, 12 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w9, 28688 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.entry: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $x9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1, 12 + ; CHECK-NEXT: $xzr = frame-setup SUBSXrx64 $sp, $x9, 24, implicit-def $nzcv + ; CHECK-NEXT: frame-setup STRXui $xzr, $sp, 0 + ; CHECK-NEXT: frame-setup Bcc 1, %bb.1, implicit killed $nzcv + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.entry: + ; CHECK-NEXT: successors: %bb.6(0x20000000), %bb.3(0x60000000) + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $wsp + ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1328, 0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 30016 + ; CHECK-NEXT: CFI_INSTRUCTION remember_state + ; CHECK-NEXT: frame-setup STRXui $xzr, $sp, 0 + ; CHECK-NEXT: CBZW renamable $w0, %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.entry: + ; CHECK-NEXT: successors: %bb.7(0x2aaaaaab), %bb.4(0x55555555) + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.7, implicit killed $nzcv + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.entry: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.8(0x40000000) + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 1, %bb.8, implicit killed $nzcv + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.if.then2: + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: BL @g1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0 + ; CHECK-NEXT: renamable $w0 = nsw ADDWri killed renamable $w0, 1, 0 + ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 7, 12 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 1344 + ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1328, 0 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1) + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29 + ; CHECK-NEXT: RET undef $lr, implicit killed $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: CFI_INSTRUCTION restore_state + ; CHECK-NEXT: CFI_INSTRUCTION remember_state + ; CHECK-NEXT: renamable $w0 = MOVZWi 1, 0 + ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 7, 12 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 1344 + ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1328, 0 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1) + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29 + ; CHECK-NEXT: RET undef $lr, implicit killed $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7.if.then5: + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CFI_INSTRUCTION restore_state + ; CHECK-NEXT: CFI_INSTRUCTION remember_state + ; CHECK-NEXT: BL @g0, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0 + ; CHECK-NEXT: renamable $w8 = MOVZWi 1, 0 + ; CHECK-NEXT: $w0 = SUBWrs killed renamable $w8, killed renamable $w0, 0 + ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 7, 12 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 1344 + ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1328, 0 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1) + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29 + ; CHECK-NEXT: RET undef $lr, implicit killed $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8.if.end7: + ; CHECK-NEXT: CFI_INSTRUCTION restore_state + ; CHECK-NEXT: $w0 = ORRWrs $wzr, $wzr, 0 + ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 7, 12 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 1344 + ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1328, 0 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1) + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29 + ; CHECK-NEXT: RET undef $lr, implicit killed $w0 + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $w0, $lr, $fp + + early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store (s64) into %stack.2), (store (s64) into %stack.1) + frame-setup CFI_INSTRUCTION def_cfa_offset 16 + frame-setup CFI_INSTRUCTION offset $w30, -8 + frame-setup CFI_INSTRUCTION offset $w29, -16 + $x9 = frame-setup SUBXri $sp, 7, 12 + frame-setup CFI_INSTRUCTION def_cfa $w9, 28688 + + bb.1.entry: + successors: %bb.2(0x40000000), %bb.1(0x40000000) + liveins: $x9 + + $sp = frame-setup SUBXri $sp, 1, 12 + $xzr = frame-setup SUBSXrx64 $sp, $x9, 24, implicit-def $nzcv + frame-setup STRXui $xzr, $sp, 0 + frame-setup Bcc 1, %bb.1, implicit killed $nzcv + + bb.2.entry: + successors: %bb.6(0x20000000), %bb.3(0x60000000) + liveins: $w0 + + frame-setup CFI_INSTRUCTION def_cfa_register $wsp + $sp = frame-setup SUBXri $sp, 1328, 0 + frame-setup CFI_INSTRUCTION def_cfa_offset 30016 + frame-setup STRXui $xzr, $sp, 0 + CBZW renamable $w0, %bb.6 + + bb.3.entry: + successors: %bb.7(0x2aaaaaab), %bb.4(0x55555555) + liveins: $w0 + + dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv + Bcc 0, %bb.7, implicit killed $nzcv + + bb.4.entry: + successors: %bb.5(0x40000000), %bb.8(0x40000000) + liveins: $w0 + + dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv + Bcc 1, %bb.8, implicit killed $nzcv + + bb.5.if.then2: + liveins: $w0 + + BL @g1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0 + renamable $w0 = nsw ADDWri killed renamable $w0, 1, 0 + $sp = frame-destroy ADDXri $sp, 7, 12 + frame-destroy CFI_INSTRUCTION def_cfa_offset 1344 + $sp = frame-destroy ADDXri $sp, 1328, 0 + frame-destroy CFI_INSTRUCTION def_cfa_offset 16 + early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1) + frame-destroy CFI_INSTRUCTION def_cfa_offset 0 + frame-destroy CFI_INSTRUCTION restore $w30 + frame-destroy CFI_INSTRUCTION restore $w29 + RET undef $lr, implicit killed $w0 + + bb.6: + renamable $w0 = MOVZWi 1, 0 + $sp = frame-destroy ADDXri $sp, 7, 12 + frame-destroy CFI_INSTRUCTION def_cfa_offset 1344 + $sp = frame-destroy ADDXri $sp, 1328, 0 + frame-destroy CFI_INSTRUCTION def_cfa_offset 16 + early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1) + frame-destroy CFI_INSTRUCTION def_cfa_offset 0 + frame-destroy CFI_INSTRUCTION restore $w30 + frame-destroy CFI_INSTRUCTION restore $w29 + RET undef $lr, implicit killed $w0 + + bb.7.if.then5: + liveins: $w0 + + BL @g0, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0 + renamable $w8 = MOVZWi 1, 0 + $w0 = SUBWrs killed renamable $w8, killed renamable $w0, 0 + $sp = frame-destroy ADDXri $sp, 7, 12 + frame-destroy CFI_INSTRUCTION def_cfa_offset 1344 + $sp = frame-destroy ADDXri $sp, 1328, 0 + frame-destroy CFI_INSTRUCTION def_cfa_offset 16 + early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1) + frame-destroy CFI_INSTRUCTION def_cfa_offset 0 + frame-destroy CFI_INSTRUCTION restore $w30 + frame-destroy CFI_INSTRUCTION restore $w29 + RET undef $lr, implicit killed $w0 + + bb.8.if.end7: + $w0 = ORRWrs $wzr, $wzr, 0 + $sp = frame-destroy ADDXri $sp, 7, 12 + frame-destroy CFI_INSTRUCTION def_cfa_offset 1344 + $sp = frame-destroy ADDXri $sp, 1328, 0 + frame-destroy CFI_INSTRUCTION def_cfa_offset 16 + early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1) + frame-destroy CFI_INSTRUCTION def_cfa_offset 0 + frame-destroy CFI_INSTRUCTION restore $w30 + frame-destroy CFI_INSTRUCTION restore $w29 + RET undef $lr, implicit killed $w0 + +... >From c0c2083d623f56c3929c0101d8b1f1a85d0c888a Mon Sep 17 00:00:00 2001 From: Momchil Velikov <momchil.veli...@arm.com> Date: Sat, 28 Oct 2023 13:33:28 +0100 Subject: [PATCH 2/3] Reverse iteration within a block when looking for prologue CFI insns --- llvm/lib/CodeGen/CFIFixup.cpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/llvm/lib/CodeGen/CFIFixup.cpp b/llvm/lib/CodeGen/CFIFixup.cpp index 964a8d56511fa1b..40a2a3a142e1758 100644 --- a/llvm/lib/CodeGen/CFIFixup.cpp +++ b/llvm/lib/CodeGen/CFIFixup.cpp @@ -99,20 +99,16 @@ static bool containsEpilogue(const MachineBasicBlock &MBB) { static MachineBasicBlock * findPrologueEnd(MachineFunction &MF, MachineBasicBlock::iterator &PrologueEnd) { - MachineBasicBlock *PrologueBlock = nullptr; for (auto It = po_begin(&MF.front()), End = po_end(&MF.front()); It != End; ++It) { MachineBasicBlock *MBB = *It; - llvm::for_each(MBB->instrs(), [&](MachineInstr &MI) { - if (isPrologueCFIInstruction(MI)) { - PrologueBlock = MBB; - PrologueEnd = std::next(MI.getIterator()); - } - }); - if (PrologueBlock) - return PrologueBlock; + for (MachineInstr &MI : reverse(MBB->instrs())) { + if (!isPrologueCFIInstruction(MI)) + continue; + PrologueEnd = std::next(MI.getIterator()); + return MBB; + } } - return nullptr; } >From 2440b603aa8307364a4897e6782a60c2738c5912 Mon Sep 17 00:00:00 2001 From: Momchil Velikov <momchil.veli...@arm.com> Date: Wed, 11 Oct 2023 17:22:51 +0100 Subject: [PATCH 3/3] [clang][AArch64] Pass down stack clash protection options to LLVM/Backend --- clang/lib/CodeGen/CodeGenModule.cpp | 12 +++++++++++- clang/lib/Driver/ToolChains/Clang.cpp | 2 +- clang/test/CodeGen/stack-clash-protection.c | 16 ++++++++++++---- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index b1a6683a66bd052..517021cb3c89bf1 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1076,6 +1076,16 @@ void CodeGenModule::Release() { "sign-return-address-with-bkey", 1); } + if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be) { + auto *InlineAsm = llvm::MDString::get(TheModule.getContext(), "inline-asm"); + if (CodeGenOpts.StackClashProtector) + getModule().addModuleFlag(llvm::Module::Override, "probe-stack", + InlineAsm); + if (CodeGenOpts.StackProbeSize && CodeGenOpts.StackProbeSize != 4096) + getModule().addModuleFlag(llvm::Module::Min, "stack-probe-size", + CodeGenOpts.StackProbeSize); + } + if (!CodeGenOpts.MemoryProfileOutput.empty()) { llvm::LLVMContext &Ctx = TheModule.getContext(); getModule().addModuleFlag( @@ -2287,7 +2297,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, if ((!D || !D->hasAttr<NoUwtableAttr>()) && CodeGenOpts.UnwindTables) B.addUWTableAttr(llvm::UWTableKind(CodeGenOpts.UnwindTables)); - if (CodeGenOpts.StackClashProtector) + if (CodeGenOpts.StackClashProtector && !getTarget().getTriple().isAArch64()) B.addAttribute("probe-stack", "inline-asm"); if (!hasUnwindExceptions(LangOpts)) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 43a92adbef64ba8..83a6e679fa19a6d 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3507,7 +3507,7 @@ static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args, return; if (!EffectiveTriple.isX86() && !EffectiveTriple.isSystemZ() && - !EffectiveTriple.isPPC64()) + !EffectiveTriple.isPPC64() && !EffectiveTriple.isAArch64()) return; Args.addOptInFlag(CmdArgs, options::OPT_fstack_clash_protection, diff --git a/clang/test/CodeGen/stack-clash-protection.c b/clang/test/CodeGen/stack-clash-protection.c index 67571f5cdb2c14c..2f502ef453d42f4 100644 --- a/clang/test/CodeGen/stack-clash-protection.c +++ b/clang/test/CodeGen/stack-clash-protection.c @@ -1,10 +1,12 @@ // Check the correct function attributes are generated -// RUN: %clang_cc1 -triple x86_64-linux -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s -// RUN: %clang_cc1 -triple s390x-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s -// RUN: %clang_cc1 -triple powerpc64le-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s -// RUN: %clang_cc1 -triple powerpc64-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s +// RUN: %clang_cc1 -triple s390x-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64le-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s --check-prefixes CHECK-AARCH64 // CHECK: define{{.*}} void @large_stack() #[[A:.*]] { +// CHECK-AARCH64: define{{.*}} void @large_stack() #[[A:.*]] { void large_stack(void) { volatile int stack[20000], i; for (i = 0; i < sizeof(stack) / sizeof(int); ++i) @@ -12,14 +14,20 @@ void large_stack(void) { } // CHECK: define{{.*}} void @vla({{.*}}) #[[A:.*]] { +// CHECK-AARCH64: define{{.*}} void @vla({{.*}}) #[[A:.*]] { void vla(int n) { volatile int vla[n]; __builtin_memset(&vla[0], 0, 1); } // CHECK: define{{.*}} void @builtin_alloca({{.*}}) #[[A:.*]] { +// CHECK-AARCH64: define{{.*}} void @builtin_alloca({{.*}}) #[[A:.*]] { void builtin_alloca(int n) { volatile void *mem = __builtin_alloca(n); } // CHECK: attributes #[[A]] = {{.*}} "probe-stack"="inline-asm" +// CHECK-AARCH64-NOT: attributes #[[A]] = {{.*}} "probe-stack" + +// CHECK-AARCH64: !{i32 4, !"probe-stack", !"inline-asm"} +// CHECK-AARCH64: !{i32 8, !"stack-probe-size", i32 8192} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits