serge-sans-paille updated this revision to Diff 225049.
serge-sans-paille added a comment.
Get rid of static mapping + update test cases
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D68720/new/
https://reviews.llvm.org/D68720
Files:
clang/docs/ReleaseNotes.rst
clang/include/clang/Basic/CodeGenOptions.def
clang/include/clang/Basic/DiagnosticFrontendKinds.td
clang/include/clang/Basic/TargetInfo.h
clang/include/clang/Driver/CC1Options.td
clang/include/clang/Driver/Options.td
clang/lib/Basic/Targets/X86.h
clang/lib/CodeGen/CGStmt.cpp
clang/lib/CodeGen/CodeGenModule.cpp
clang/lib/Driver/ToolChains/Clang.cpp
clang/lib/Frontend/CompilerInvocation.cpp
clang/test/CodeGen/stack-clash-protection.c
clang/test/Driver/stack-clash-protection.c
llvm/docs/ReleaseNotes.rst
llvm/include/llvm/CodeGen/TargetLowering.h
llvm/lib/Target/X86/X86CallFrameOptimization.cpp
llvm/lib/Target/X86/X86FrameLowering.cpp
llvm/lib/Target/X86/X86FrameLowering.h
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/lib/Target/X86/X86InstrCompiler.td
llvm/lib/Target/X86/X86InstrInfo.td
llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll
llvm/test/CodeGen/X86/stack-clash-medium.ll
llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll
llvm/test/CodeGen/X86/stack-clash-small.ll
Index: llvm/test/CodeGen/X86/stack-clash-small.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/stack-clash-small.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s | FileCheck %s
+
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo() local_unnamed_addr #0 {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $280, %rsp # imm = 0x118
+; CHECK-NEXT: .cfi_def_cfa_offset 288
+; CHECK-NEXT: movl $1, 264(%rsp)
+; CHECK-NEXT: movl -128(%rsp), %eax
+; CHECK-NEXT: addq $280, %rsp # imm = 0x118
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+
+ %a = alloca i32, i64 100, align 16
+ %b = getelementptr inbounds i32, i32* %a, i64 98
+ store volatile i32 1, i32* %b
+ %c = load volatile i32, i32* %a
+ ret i32 %c
+}
+
+attributes #0 = {"probe-stack"="inline-asm"}
Index: llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo(i64 %i) local_unnamed_addr #0 {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
+; CHECK-NEXT: movq $0, (%rsp)
+; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8
+; CHECK-NEXT: .cfi_def_cfa_offset 7888
+; CHECK-NEXT: movl $1, -128(%rsp,%rdi,4)
+; CHECK-NEXT: movl -128(%rsp), %eax
+; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+
+ %a = alloca i32, i32 2000, align 16
+ %b = getelementptr inbounds i32, i32* %a, i64 %i
+ store volatile i32 1, i32* %b
+ %c = load volatile i32, i32* %a
+ ret i32 %c
+}
+
+attributes #0 = {"probe-stack"="inline-asm"}
+
Index: llvm/test/CodeGen/X86/stack-clash-medium.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/stack-clash-medium.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s | FileCheck %s
+
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo() local_unnamed_addr #0 {
+
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
+; CHECK-NEXT: .cfi_def_cfa_offset 7888
+; CHECK-NEXT: movl $1, 880(%rsp)
+; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8
+; CHECK-NEXT: movq $0, (%rsp)
+; CHECK-NEXT: movl -128(%rsp), %eax
+; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+
+ %a = alloca i32, i64 2000, align 16
+ %b = getelementptr inbounds i32, i32* %a, i64 1198
+ store volatile i32 1, i32* %b
+ %c = load volatile i32, i32* %a
+ ret i32 %c
+}
+
+attributes #0 = {"probe-stack"="inline-asm"}
Index: llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s | FileCheck %s
+
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo() local_unnamed_addr #0 {
+
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
+; CHECK-NEXT: .cfi_def_cfa_offset 7888
+; CHECK-NEXT: movl $1, 264(%rsp)
+; CHECK-NEXT: movl $1, 880(%rsp)
+; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8
+; CHECK-NEXT: movq $0, (%rsp)
+; CHECK-NEXT: movl -128(%rsp), %eax
+; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+
+ %a = alloca i32, i64 2000, align 16
+ %b0 = getelementptr inbounds i32, i32* %a, i64 98
+ %b1 = getelementptr inbounds i32, i32* %a, i64 1198
+ store volatile i32 1, i32* %b0
+ store volatile i32 1, i32* %b1
+ %c = load volatile i32, i32* %a
+ ret i32 %c
+}
+
+attributes #0 = {"probe-stack"="inline-asm"}
Index: llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s | FileCheck %s
+
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo(i32 %n) local_unnamed_addr #0 {
+
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movq %rsp, %rbp
+; CHECK-NEXT: .cfi_def_cfa_register %rbp
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: leaq 15(,%rax,4), %rax
+; CHECK-NEXT: andq $-16, %rax
+; CHECK-NEXT: cmpq $4096, %rax # imm = 0x1000
+; CHECK-NEXT: jl .LBB0_3
+; CHECK-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: subq $4096, %rax # imm = 0x1000
+; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
+; CHECK-NEXT: movq $0, (%rsp)
+; CHECK-NEXT: cmpq $4096, %rax # imm = 0x1000
+; CHECK-NEXT: jge .LBB0_2
+; CHECK-NEXT: .LBB0_3:
+; CHECK-NEXT: subq %rax, %rsp
+; CHECK-NEXT: movq %rsp, %rax
+; CHECK-NEXT: movl $1, 4792(%rax)
+; CHECK-NEXT: movl (%rax), %eax
+; CHECK-NEXT: movq %rbp, %rsp
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
+; CHECK-NEXT: retq
+
+ %a = alloca i32, i32 %n, align 16
+ %b = getelementptr inbounds i32, i32* %a, i64 1198
+ store volatile i32 1, i32* %b
+ %c = load volatile i32, i32* %a
+ ret i32 %c
+}
+
+attributes #0 = {"probe-stack"="inline-asm"}
Index: llvm/lib/Target/X86/X86InstrInfo.td
===================================================================
--- llvm/lib/Target/X86/X86InstrInfo.td
+++ llvm/lib/Target/X86/X86InstrInfo.td
@@ -121,6 +121,8 @@
def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
+def SDT_X86PROBED_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
+
def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
@@ -292,6 +294,9 @@
def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA,
[SDNPHasChain]>;
+def X86ProbedAlloca : SDNode<"X86ISD::PROBED_ALLOCA", SDT_X86PROBED_ALLOCA,
+ [SDNPHasChain]>;
+
def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
Index: llvm/lib/Target/X86/X86InstrCompiler.td
===================================================================
--- llvm/lib/Target/X86/X86InstrCompiler.td
+++ llvm/lib/Target/X86/X86InstrCompiler.td
@@ -111,6 +111,23 @@
[(set GR64:$dst,
(X86SegAlloca GR64:$size))]>,
Requires<[In64BitMode]>;
+
+// To protect against stack clash, dynamic allocation should perform a memory
+// probe at each page.
+
+let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
+def PROBED_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),
+ "# variable sized alloca with probing",
+ [(set GR32:$dst,
+ (X86ProbedAlloca GR32:$size))]>,
+ Requires<[NotLP64]>;
+
+let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in
+def PROBED_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
+ "# variable sized alloca with probing",
+ [(set GR64:$dst,
+ (X86ProbedAlloca GR64:$size))]>,
+ Requires<[In64BitMode]>;
}
// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows
Index: llvm/lib/Target/X86/X86ISelLowering.h
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.h
+++ llvm/lib/Target/X86/X86ISelLowering.h
@@ -537,6 +537,10 @@
// falls back to heap allocation if not.
SEG_ALLOCA,
+ // For allocating stack space when using stack clash protector.
+ // Allocation is performed by block, and each block is probed.
+ PROBED_ALLOCA,
+
// Memory barriers.
MEMBARRIER,
MFENCE,
@@ -1215,6 +1219,8 @@
bool supportSwiftError() const override;
+ bool hasStackProbeSymbol(MachineFunction &MF) const override;
+ bool hasInlineStackProbe(MachineFunction &MF) const override;
StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
unsigned getStackProbeSize(MachineFunction &MF) const;
@@ -1439,6 +1445,9 @@
MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
+ MachineBasicBlock *BB) const;
+
MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
MachineBasicBlock *BB) const;
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22144,9 +22144,9 @@
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool SplitStack = MF.shouldSplitStack();
- bool EmitStackProbe = !getStackProbeSymbolName(MF).empty();
+ bool EmitStackProbeCall = hasStackProbeSymbol(MF);
bool Lower = (Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) ||
- SplitStack || EmitStackProbe;
+ SplitStack || EmitStackProbeCall;
SDLoc dl(Op);
// Get the inputs.
@@ -22170,11 +22170,21 @@
assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
" not tell us which reg is the stack pointer!");
- SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
- Chain = SP.getValue(1);
const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
unsigned StackAlign = TFI.getStackAlignment();
- Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
+ if (hasInlineStackProbe(MF)) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
+ Register Vreg = MRI.createVirtualRegister(AddrRegClass);
+ Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
+ Result = DAG.getNode(X86ISD::PROBED_ALLOCA, dl, SPTy, Chain,
+ DAG.getRegister(Vreg, SPTy));
+ } else {
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
+ Chain = SP.getValue(1);
+ Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
+ }
if (Align > StackAlign)
Result = DAG.getNode(ISD::AND, dl, VT, Result,
DAG.getConstant(-(uint64_t)Align, dl, VT));
@@ -28702,6 +28712,8 @@
case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER";
case X86ISD::MFENCE: return "X86ISD::MFENCE";
case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA";
+ case X86ISD::PROBED_ALLOCA:
+ return "X86ISD::PROBED_ALLOCA";
case X86ISD::SAHF: return "X86ISD::SAHF";
case X86ISD::RDRAND: return "X86ISD::RDRAND";
case X86ISD::RDSEED: return "X86ISD::RDSEED";
@@ -29959,6 +29971,95 @@
}
MachineBasicBlock *
+X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ MachineFunction *MF = BB->getParent();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+
+ const bool Is64Bit = Subtarget.is64Bit();
+ const bool IsLP64 = Subtarget.isTarget64BitLP64();
+
+ const unsigned ProbeSize = getStackProbeSize(*MF);
+
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MachineBasicBlock *testMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *tailMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *blockMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
+ MachineFunction::iterator MBBIter = ++BB->getIterator();
+ MF->insert(MBBIter, testMBB);
+ MF->insert(MBBIter, blockMBB);
+ MF->insert(MBBIter, tailMBB);
+
+ unsigned sizeVReg = MI.getOperand(1).getReg();
+
+ const TargetRegisterClass *SizeRegClass = MRI.getRegClass(sizeVReg);
+
+ unsigned tmpSizeVReg = MRI.createVirtualRegister(SizeRegClass);
+ unsigned tmpSizeVReg2 = MRI.createVirtualRegister(SizeRegClass);
+
+ unsigned physSPReg =
+ IsLP64 || Subtarget.isTargetNaCl64() ? X86::RSP : X86::ESP;
+
+ // test rsp size
+ BuildMI(testMBB, DL, TII->get(X86::PHI), tmpSizeVReg)
+ .addReg(sizeVReg)
+ .addMBB(BB)
+ .addReg(tmpSizeVReg2)
+ .addMBB(blockMBB);
+
+ BuildMI(testMBB, DL, TII->get(IsLP64 ? X86::CMP64ri32 : X86::CMP32ri))
+ .addReg(tmpSizeVReg)
+ .addImm(ProbeSize);
+
+ BuildMI(testMBB, DL, TII->get(X86::JCC_1))
+ .addMBB(tailMBB)
+ .addImm(X86::COND_L);
+ testMBB->addSuccessor(blockMBB);
+ testMBB->addSuccessor(tailMBB);
+
+ // allocate a block and touch it
+
+ BuildMI(blockMBB, DL, TII->get(IsLP64 ? X86::SUB64ri32 : X86::SUB32ri),
+ tmpSizeVReg2)
+ .addReg(tmpSizeVReg)
+ .addImm(ProbeSize);
+
+ BuildMI(blockMBB, DL, TII->get(IsLP64 ? X86::SUB64ri32 : X86::SUB32ri),
+ physSPReg)
+ .addReg(physSPReg)
+ .addImm(ProbeSize);
+
+ const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
+ addRegOffset(BuildMI(blockMBB, DL, TII->get(MovMIOpc)), physSPReg, false, 0)
+ .addImm(0);
+
+ BuildMI(blockMBB, DL, TII->get(X86::JMP_1)).addMBB(testMBB);
+ blockMBB->addSuccessor(testMBB);
+
+ // allocate the tail and continue
+ BuildMI(tailMBB, DL, TII->get(IsLP64 ? X86::SUB64rr : X86::SUB32rr),
+ physSPReg)
+ .addReg(physSPReg)
+ .addReg(tmpSizeVReg);
+ BuildMI(tailMBB, DL, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg())
+ .addReg(physSPReg);
+
+ tailMBB->splice(tailMBB->end(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ tailMBB->transferSuccessorsAndUpdatePHIs(BB);
+ BB->addSuccessor(testMBB);
+
+ // Delete the original pseudo instruction.
+ MI.eraseFromParent();
+
+ // And we're done.
+ return tailMBB;
+}
+
+MachineBasicBlock *
X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
@@ -31133,6 +31234,9 @@
case X86::SEG_ALLOCA_32:
case X86::SEG_ALLOCA_64:
return EmitLoweredSegAlloca(MI, BB);
+ case X86::PROBED_ALLOCA_32:
+ case X86::PROBED_ALLOCA_64:
+ return EmitLoweredProbedAlloca(MI, BB);
case X86::TLSCall_32:
case X86::TLSCall_64:
return EmitLoweredTLSCall(MI, BB);
@@ -45822,10 +45926,29 @@
return Subtarget.is64Bit();
}
+/// Returns true if stack probing through a function call is requested.
+bool X86TargetLowering::hasStackProbeSymbol(MachineFunction &MF) const {
+ return !getStackProbeSymbolName(MF).empty();
+}
+
+/// Returns true if stack probing through inline assembly is requested.
+bool X86TargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
+ // If the function specifically requests inline stack probes, emit them.
+ if (MF.getFunction().hasFnAttribute("probe-stack"))
+ return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
+ "inline-asm";
+
+ return false;
+}
+
/// Returns the name of the symbol used to emit stack probes or the empty
/// string if not applicable.
StringRef
X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const {
+ // Inline Stack probes disable stack probe call
+ if (hasInlineStackProbe(MF))
+ return "";
+
// If the function specifically requests stack probes, emit them.
if (MF.getFunction().hasFnAttribute("probe-stack"))
return MF.getFunction().getFnAttribute("probe-stack").getValueAsString();
Index: llvm/lib/Target/X86/X86FrameLowering.h
===================================================================
--- llvm/lib/Target/X86/X86FrameLowering.h
+++ llvm/lib/Target/X86/X86FrameLowering.h
@@ -128,6 +128,10 @@
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
const DebugLoc &DL, int64_t NumBytes, bool InEpilogue) const;
+ std::tuple<MachineBasicBlock::iterator, int64_t>
+ findFreeStackProbe(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ int64_t MinOffset, int64_t MaxOffset) const;
+
/// Check that LEA can be used on SP in an epilogue sequence for \p MF.
bool canUseLEAForSPInEpilogue(const MachineFunction &MF) const;
Index: llvm/lib/Target/X86/X86FrameLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86FrameLowering.cpp
+++ llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -17,6 +17,7 @@
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -32,6 +33,12 @@
#include "llvm/Target/TargetOptions.h"
#include <cstdlib>
+#define DEBUG_TYPE "x86-fl"
+
+STATISTIC(NumFrameFreeProbe, "Number of free stack probes used in prologue");
+STATISTIC(NumFrameExtraProbe,
+ "Number of extra stack probes generated in prologue");
+
using namespace llvm;
X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
@@ -343,6 +350,25 @@
}
}
+/// Maps an opcode that performs a memory operation to the operand holding the
+/// memory reference, or return -1 if opcode is not supported.
+static int OpcodeToSPOperandIndex(unsigned Opcode) {
+ switch (Opcode) {
+ case X86::MOV8mi:
+ case X86::MOV16mi:
+ case X86::MOV32mi:
+ case X86::MOV64mi32:
+ return 3;
+ case X86::MOV8mr:
+ case X86::MOV16mr:
+ case X86::MOV32mr:
+ case X86::MOV64mr:
+ return 3;
+ default:
+ return -1;
+ }
+};
+
MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
@@ -381,16 +407,118 @@
} else {
bool IsSub = Offset < 0;
uint64_t AbsOffset = IsSub ? -Offset : Offset;
- unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset)
- : getADDriOpcode(Uses64BitFramePtr, AbsOffset);
- MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr)
- .addImm(AbsOffset);
- MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+ const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset)
+ : getADDriOpcode(Uses64BitFramePtr, AbsOffset);
+ const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
+ MachineFunction &MF = *MBB.getParent();
+ const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+ const X86TargetLowering &TLI = *STI.getTargetLowering();
+ const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
+ if (EmitInlineStackProbe && !InEpilogue) {
+ const uint64_t PageSize = TLI.getStackProbeSize(MF);
+ uint64_t CurrentAbsOffset = 0;
+ uint64_t CurrentProbeOffset =
+ 0; // Thanks to return address being saved on the stack
+
+ while (CurrentAbsOffset < AbsOffset) {
+ uint64_t ChunkSize = std::min(AbsOffset - CurrentAbsOffset, PageSize);
+ MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(ChunkSize);
+ CurrentAbsOffset += ChunkSize;
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+
+ // We need to touch the stack allocated by MI. Instead of generating
+ // a probe, we try to reuse existing stack operation available in the
+ // EntryBlock, starting from MI. findFreeStackProbe gives us such a
+ // probe, if any and continuing allocation from that probe doesn't
+ // conflict with instructions between MI and the free stack probe.
+
+ const int64_t FreeProbeLowerBound = AbsOffset - CurrentAbsOffset;
+ const int64_t FreeProbeUpperBound = FreeProbeLowerBound + PageSize;
+ int64_t FreeProbeOffset = -1;
+ do {
+ MachineBasicBlock::iterator FreeProbeIterator;
+ std::tie(FreeProbeIterator, FreeProbeOffset) = findFreeStackProbe(
+ MBB, MBBI, FreeProbeLowerBound, FreeProbeUpperBound);
+ if (FreeProbeOffset != -1) {
+ NumFrameFreeProbe++;
+ MachineInstr &FreeProbe = *FreeProbeIterator;
+ if (!FreeProbe.isCall()) {
+ int OffsetIndex = OpcodeToSPOperandIndex(FreeProbe.getOpcode());
+ assert(OffsetIndex >= 0 && "Supported Opcode");
+ FreeProbe.getOperand(OffsetIndex)
+ .setImm(FreeProbe.getOperand(OffsetIndex).getImm() -
+ FreeProbeLowerBound);
+ }
+ CurrentProbeOffset = AbsOffset - FreeProbeOffset;
+ MBBI = std::next(FreeProbeIterator);
+ }
+ } while (FreeProbeOffset != -1);
+
+ while (CurrentAbsOffset - CurrentProbeOffset >= PageSize) {
+ NumFrameExtraProbe++;
+ auto Shift =
+ std::min(CurrentAbsOffset - CurrentProbeOffset, PageSize);
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)), StackPtr,
+ false, PageSize - Shift)
+ .addImm(0);
+ CurrentProbeOffset += Shift;
+ }
+ }
+ } else {
+ MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(AbsOffset);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+ }
}
return MI;
}
+/// findFreeStackProbe - traverse instruction from @p MBBI to the end of @p MBB,
+/// stoping when either of the following happens first:
+///
+/// - a MachineInstruction writing to the stack at an offset statically known
+/// to be in [MinOffset, MaxOffset[ is found, and return an iterator to that
+/// instruction and the offset of the Probe, (ProbeIterator, ProbeOffset)
+/// - a MachineInstruction reading or writing to the stack at an offset not
+/// statically known is found, and return (MBBI.end(), -1).
+/// - none of the above happen, and return (MBBI.end(), -1).
+std::tuple<MachineBasicBlock::iterator, int64_t>
+X86FrameLowering::findFreeStackProbe(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ int64_t MinOffset,
+ int64_t MaxOffset) const {
+
+ for (; MBBI != MBB.end(); ++MBBI) {
+ MachineInstr &MI = *MBBI;
+ auto OperandIndex = OpcodeToSPOperandIndex(MI.getOpcode());
+ if (OperandIndex != -1) {
+ if (MI.isCall())
+ return {MBBI, MaxOffset};
+ auto &Dst = MI.getOperand(0);
+ if (!Dst.isFI())
+ continue;
+ auto &MOOffset = MI.getOperand(OperandIndex);
+ if (MOOffset.isImm()) {
+ int64_t Offset = MOOffset.getImm();
+ if (MinOffset <= Offset && Offset < MaxOffset) {
+ return {MBBI, Offset};
+ }
+ continue;
+ }
+ // don't know where we write, stopping
+ break;
+ }
+ if (std::any_of(MI.operands_begin(), MI.operands_end(),
+ [](MachineOperand &MO) { return MO.isFI(); })) {
+ break; // effect on stack pointer not modelled, stopping
+ }
+ }
+ return {MBB.end(), -1};
+}
+
int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
bool doMergeWithPrevious) const {
@@ -1015,7 +1143,8 @@
X86FI->setCalleeSavedFrameSize(
X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
- bool UseStackProbe = !STI.getTargetLowering()->getStackProbeSymbolName(MF).empty();
+ const bool EmitStackProbeCall =
+ STI.getTargetLowering()->hasStackProbeSymbol(MF);
unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
// Re-align the stack on 64-bit if the x86-interrupt calling convention is
@@ -1033,11 +1162,10 @@
// pointer, calls, or dynamic alloca then we do not need to adjust the
// stack pointer (we fit in the Red Zone). We also check that we don't
// push and pop from the stack.
- if (has128ByteRedZone(MF) &&
- !TRI->needsStackRealignment(MF) &&
+ if (has128ByteRedZone(MF) && !TRI->needsStackRealignment(MF) &&
!MFI.hasVarSizedObjects() && // No dynamic alloca.
!MFI.adjustsStack() && // No calls.
- !UseStackProbe && // No stack probes.
+ !EmitStackProbeCall && // No stack probes.
!MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
!MF.shouldSplitStack()) { // Regular stack
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
@@ -1238,7 +1366,7 @@
uint64_t AlignedNumBytes = NumBytes;
if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF))
AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
- if (AlignedNumBytes >= StackProbeSize && UseStackProbe) {
+ if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
assert(!X86FI->getUsesRedZone() &&
"The Red Zone is not accounted for in stack probes");
Index: llvm/lib/Target/X86/X86CallFrameOptimization.cpp
===================================================================
--- llvm/lib/Target/X86/X86CallFrameOptimization.cpp
+++ llvm/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -162,14 +162,13 @@
// memory for arguments.
unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();
unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
- bool UseStackProbe =
- !STI->getTargetLowering()->getStackProbeSymbolName(MF).empty();
+ bool EmitStackProbeCall = STI->getTargetLowering()->hasStackProbeSymbol(MF);
unsigned StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF);
for (MachineBasicBlock &BB : MF) {
bool InsideFrameSequence = false;
for (MachineInstr &MI : BB) {
if (MI.getOpcode() == FrameSetupOpcode) {
- if (TII->getFrameSize(MI) >= StackProbeSize && UseStackProbe)
+ if (TII->getFrameSize(MI) >= StackProbeSize && EmitStackProbeCall)
return false;
if (InsideFrameSequence)
return false;
Index: llvm/include/llvm/CodeGen/TargetLowering.h
===================================================================
--- llvm/include/llvm/CodeGen/TargetLowering.h
+++ llvm/include/llvm/CodeGen/TargetLowering.h
@@ -1659,6 +1659,10 @@
/// Returns the name of the symbol used to emit stack probes or the empty
/// string if not applicable.
+ virtual bool hasStackProbeSymbol(MachineFunction &MF) const { return false; }
+
+ virtual bool hasInlineStackProbe(MachineFunction &MF) const { return false; }
+
virtual StringRef getStackProbeSymbolName(MachineFunction &MF) const {
return "";
}
Index: llvm/docs/ReleaseNotes.rst
===================================================================
--- llvm/docs/ReleaseNotes.rst
+++ llvm/docs/ReleaseNotes.rst
@@ -101,6 +101,10 @@
decrease in CPU frequency on these CPUs. This can be re-enabled by passing
-mprefer-vector-width=512 to clang or passing -mattr=-prefer-256-bit to llc.
+* Functions with the probe-stack attribute set to "inline-asm" are now protected
+ against stack clash without the need of a third-party probing function and
+ with limited impact on performance.
+
Changes to the AMDGPU Target
-----------------------------
Index: clang/test/Driver/stack-clash-protection.c
===================================================================
--- /dev/null
+++ clang/test/Driver/stack-clash-protection.c
@@ -0,0 +1,22 @@
+// RUN: %clang -target i386-unknown-linux -fstack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SCP-i386
+// SCP-i386: "-stack-clash-protection"
+
+// RUN: %clang -target x86_64-scei-ps4 -fstack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SCP-x86
+// SCP-x86: "-stack-clash-protection"
+
+// RUN: %clang -target armv7k-apple-watchos2.0 -fstack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SCP-armv7
+// SCP-armv7-NOT: "-stack-clash-protection"
+
+// RUN: %clang -target x86_64-unknown-linux -fstack-clash-protection -c %s 2>&1 | FileCheck %s -check-prefix=SCP-warn
+// SCP-warn: warning: Unable to protect inline asm that clobbers stack pointer against stack clash
+
+int foo(int c) {
+ int r;
+ __asm__("sub %0, %%rsp"
+ :
+ : "rm"(c)
+ : "rsp");
+ __asm__("mov %%rsp, %0"
+ : "=rm"(r)::);
+ return r;
+}
Index: clang/test/CodeGen/stack-clash-protection.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/stack-clash-protection.c
@@ -0,0 +1,40 @@
+// RUN: %clang -S -target x86_64 -o - %s -fstack-clash-protection | FileCheck %s
+// RUN: %clang -target x86_64 -o %t.out %s -fstack-clash-protection && %t.out
+
+#include <alloca.h>
+
+// CHECK-LABEL: @main
+// static alloca instrumentation
+// CHECK: subq $4096
+// CHECK: mov
+// CHECK: subq $4096
+//
+// VLA instrumentation
+// CHECK: subq $4096, %rax
+// CHECK: subq $4096, %rsp
+//
+// dynamic alloca instrumentation (1)
+// CHECK: subq $4096, %rax
+// CHECK: subq $4096, %rsp
+//
+// dynamic alloca instrumentation (2)
+// CHECK: subq $4096, %rax
+// CHECK: subq $4096, %rsp
+//
+int main(int argc, char **argv) {
+ int volatile static_mem[8000];
+ for (size_t i = 0; i < argc * sizeof(static_mem) / sizeof(static_mem[0]); ++i)
+ static_mem[i] = argc * i;
+
+ int vla[argc];
+ memset(&vla[0], 0, argc);
+
+ // also check allocation of 0 size
+ volatile void *mem = __builtin_alloca(argc - 1);
+
+ int volatile *dyn_mem = alloca(sizeof(static_mem) * argc);
+ for (size_t i = 0; i < argc * sizeof(static_mem) / sizeof(static_mem[0]); ++i)
+ dyn_mem[i] = argc * i;
+
+ return static_mem[(7999 * argc) / 2] - dyn_mem[(7999 * argc) / 2] + vla[argc - 1];
+}
Index: clang/lib/Frontend/CompilerInvocation.cpp
===================================================================
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -1213,6 +1213,8 @@
Opts.NoStackArgProbe = Args.hasArg(OPT_mno_stack_arg_probe);
+ Opts.StackClashProtector = Args.hasArg(OPT_stack_clash_protection);
+
if (Arg *A = Args.getLastArg(OPT_fobjc_dispatch_method_EQ)) {
StringRef Name = A->getValue();
unsigned Method = llvm::StringSwitch<unsigned>(Name)
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -2603,6 +2603,29 @@
}
}
+static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args,
+ ArgStringList &CmdArgs) {
+ const llvm::Triple &EffectiveTriple = TC.getEffectiveTriple();
+
+ for (const Arg *A : Args) {
+ switch (A->getOption().getID()) {
+ default:
+ continue;
+ case options::OPT_fstack_clash_protection: {
+ switch (EffectiveTriple.getArch()) {
+ default:
+ return;
+ case llvm::Triple::ArchType::x86:
+ case llvm::Triple::ArchType::x86_64:
+ break;
+ }
+ A->claim();
+ CmdArgs.push_back("-stack-clash-protection");
+ }
+ }
+ }
+}
+
static void RenderTrivialAutoVarInitOptions(const Driver &D,
const ToolChain &TC,
const ArgList &Args,
@@ -4722,6 +4745,7 @@
CmdArgs.push_back(Args.MakeArgString("-mspeculative-load-hardening"));
RenderSSPOptions(TC, Args, CmdArgs, KernelOrKext);
+ RenderSCPOptions(TC, Args, CmdArgs);
RenderTrivialAutoVarInitOptions(D, TC, Args, CmdArgs);
// Translate -mstackrealign
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -1486,6 +1486,9 @@
if (CodeGenOpts.UnwindTables)
B.addAttribute(llvm::Attribute::UWTable);
+ if (CodeGenOpts.StackClashProtector)
+ B.addAttribute("probe-stack", "inline-asm");
+
if (!hasUnwindExceptions(LangOpts))
B.addAttribute(llvm::Attribute::NoUnwind);
Index: clang/lib/CodeGen/CGStmt.cpp
===================================================================
--- clang/lib/CodeGen/CGStmt.cpp
+++ clang/lib/CodeGen/CGStmt.cpp
@@ -10,14 +10,16 @@
//
//===----------------------------------------------------------------------===//
-#include "CodeGenFunction.h"
#include "CGDebugInfo.h"
+#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "TargetInfo.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/PrettyStackTrace.h"
#include "clang/Basic/TargetInfo.h"
+#include "clang/Driver/DriverDiagnostic.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InlineAsm.h"
@@ -2229,8 +2231,14 @@
if (Clobber == "memory")
ReadOnly = ReadNone = false;
- else if (Clobber != "cc")
+ else if (Clobber != "cc") {
Clobber = getTarget().getNormalizedGCCRegisterName(Clobber);
+ if (CGM.getCodeGenOpts().StackClashProtector &&
+ Clobber == getTarget().getSPRegName()) {
+ CGM.getDiags().Report(S.getAsmLoc(),
+ diag::warn_fe_stack_clash_protection_inline_asm);
+ }
+ }
if (!Constraints.empty())
Constraints += ',';
Index: clang/lib/Basic/Targets/X86.h
===================================================================
--- clang/lib/Basic/Targets/X86.h
+++ clang/lib/Basic/Targets/X86.h
@@ -149,6 +149,8 @@
ArrayRef<TargetInfo::AddlRegName> getGCCAddlRegNames() const override;
+ const char *getSPRegName() const override { return "rsp"; }
+
bool validateCpuSupports(StringRef Name) const override;
bool validateCpuIs(StringRef Name) const override;
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -1706,6 +1706,8 @@
def fsplit_stack : Flag<["-"], "fsplit-stack">, Group<f_Group>;
def fstack_protector_all : Flag<["-"], "fstack-protector-all">, Group<f_Group>,
HelpText<"Enable stack protectors for all functions">;
+def fstack_clash_protection : Flag<["-"], "fstack-clash-protection">, Group<f_Group>,
+ HelpText<"Enable stack clash protection">;
def fstack_protector_strong : Flag<["-"], "fstack-protector-strong">, Group<f_Group>,
HelpText<"Enable stack protectors for some functions vulnerable to stack smashing. "
"Compared to -fstack-protector, this uses a stronger heuristic "
Index: clang/include/clang/Driver/CC1Options.td
===================================================================
--- clang/include/clang/Driver/CC1Options.td
+++ clang/include/clang/Driver/CC1Options.td
@@ -736,6 +736,8 @@
HelpText<"Enable stack protectors">;
def stack_protector_buffer_size : Separate<["-"], "stack-protector-buffer-size">,
HelpText<"Lower bound for a buffer to be considered for stack protection">;
+def stack_clash_protection : Separate<["-"], "stack-clash-protection">,
+ HelpText<"Enable stack clash protection">;
def fvisibility : Separate<["-"], "fvisibility">,
HelpText<"Default type and symbol visibility">;
def ftype_visibility : Separate<["-"], "ftype-visibility">,
Index: clang/include/clang/Basic/TargetInfo.h
===================================================================
--- clang/include/clang/Basic/TargetInfo.h
+++ clang/include/clang/Basic/TargetInfo.h
@@ -815,6 +815,8 @@
StringRef getNormalizedGCCRegisterName(StringRef Name,
bool ReturnCanonical = false) const;
+ virtual const char *getSPRegName() const { return nullptr; }
+
/// Extracts a register from the passed constraint (if it is a
/// single-register constraint) and the asm label expression related to a
/// variable in the input or output list of an inline asm statement.
Index: clang/include/clang/Basic/DiagnosticFrontendKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticFrontendKinds.td
+++ clang/include/clang/Basic/DiagnosticFrontendKinds.td
@@ -17,6 +17,9 @@
def err_fe_inline_asm : Error<"%0">, CatInlineAsm;
def warn_fe_inline_asm : Warning<"%0">, CatInlineAsm, InGroup<BackendInlineAsm>;
+def warn_fe_stack_clash_protection_inline_asm : Warning<
+ "Unable to protect inline asm that clobbers stack pointer against stack clash">,
+ CatInlineAsm, InGroup<BackendInlineAsm>;
def note_fe_inline_asm : Note<"%0">, CatInlineAsm;
def note_fe_inline_asm_here : Note<"instantiated into assembly here">;
def err_fe_cannot_link_module : Error<"cannot link module '%0': %1">,
Index: clang/include/clang/Basic/CodeGenOptions.def
===================================================================
--- clang/include/clang/Basic/CodeGenOptions.def
+++ clang/include/clang/Basic/CodeGenOptions.def
@@ -135,6 +135,7 @@
///< enabled.
CODEGENOPT(NoWarn , 1, 0) ///< Set when -Wa,--no-warn is enabled.
CODEGENOPT(EnableSegmentedStacks , 1, 0) ///< Set when -fsplit-stack is enabled.
+CODEGENOPT(StackClashProtector, 1, 0) ///< Set when -fstack-clash-protection is enabled.
CODEGENOPT(NoImplicitFloat , 1, 0) ///< Set when -mno-implicit-float is enabled.
CODEGENOPT(NoInfsFPMath , 1, 0) ///< Assume FP arguments, results not +-Inf.
CODEGENOPT(NoSignedZeros , 1, 0) ///< Allow ignoring the signedness of FP zero
Index: clang/docs/ReleaseNotes.rst
===================================================================
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -70,7 +70,9 @@
New Compiler Flags
------------------
-- ...
+- -fstack-clash-protection will provide a protection against the stack clash
+ attack for x86 architecture through automatic probing of each page of
+ allocated stack.
Deprecated Compiler Flags
-------------------------
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits