llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-codegen Author: None (eddyz87) <details> <summary>Changes</summary> This commit introduces attribute bpf_fastcall to declare BPF functions that do not clobber some of the caller saved registers (R0-R5). The idea is to generate the code complying with generic BPF ABI, but allow compatible Linux Kernel to remove unnecessary spills and fills of non-scratched registers (given some compiler assistance). For such functions do register allocation as-if caller saved registers are not clobbered, but later wrap the calls with spill and fill patterns that are simple to recognize in kernel. For example for the following C code: #define __bpf_fastcall __attribute__((bpf_fastcall)) void bar(void) __bpf_fastcall; void buz(long i, long j, long k); void foo(long i, long j, long k) { bar(); buz(i, j, k); } First allocate registers as if: foo: call bar # note: no spills for i,j,k (r1,r2,r3) call buz exit And later insert spills fills on the peephole phase: foo: *(u64 *)(r10 - 8) = r1; # Such call pattern is *(u64 *)(r10 - 16) = r2; # correct when used with *(u64 *)(r10 - 24) = r3; # old kernels. call bar r3 = *(u64 *)(r10 - 24); # But also allows new r2 = *(u64 *)(r10 - 16); # kernels to recognize the r1 = *(u64 *)(r10 - 8); # pattern and remove spills/fills. call buz exit The offsets for generated spills/fills are picked as minimal stack offsets for the function. Allocated stack slots are not used for any other purposes, in order to simplify in-kernel analysis. Corresponding functionality had been merged in Linux Kernel as [this](https://lore.kernel.org/bpf/172179364482.1919.9590705031832457529.git-patchwork-notify@<!-- -->kernel.org/) patch set (the patch assumed that `no_caller_saved_regsiters` attribute would be used by LLVM, naming does not matter for the Kernel). --- Patch is 23.73 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101228.diff 16 Files Affected: - (modified) clang/include/clang/Basic/Attr.td (+8) - (modified) clang/include/clang/Basic/AttrDocs.td (+19) - (modified) clang/lib/CodeGen/CGCall.cpp (+2) - (added) clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c (+24) - (modified) clang/test/Misc/pragma-attribute-supported-attributes-list.test (+1) - (added) clang/test/Sema/bpf-attr-bpf-fastcall.c (+7) - (modified) llvm/lib/Target/BPF/BPFCallingConv.td (+1) - (modified) llvm/lib/Target/BPF/BPFISelLowering.cpp (+31) - (modified) llvm/lib/Target/BPF/BPFInstrInfo.td (+1-3) - (modified) llvm/lib/Target/BPF/BPFMIPeephole.cpp (+88) - (modified) llvm/lib/Target/BPF/BPFRegisterInfo.cpp (+11) - (modified) llvm/lib/Target/BPF/BPFRegisterInfo.h (+3) - (added) llvm/test/CodeGen/BPF/bpf-fastcall-1.ll (+46) - (added) llvm/test/CodeGen/BPF/bpf-fastcall-2.ll (+68) - (added) llvm/test/CodeGen/BPF/bpf-fastcall-3.ll (+62) - (added) llvm/test/CodeGen/BPF/bpf-fastcall-regmask-1.ll (+110) ``````````diff diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 46d0a66d59c37..437cf3999bc36 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2189,6 +2189,14 @@ def BTFTypeTag : TypeAttr { let LangOpts = [COnly]; } +def BPFFastCall : InheritableAttr, + TargetSpecificAttr<TargetBPF> { + let Spellings = [GCC<"bpf_fastcall">]; + let Subjects = SubjectList<[FunctionLike]>; + let Documentation = [BPFFastCallDocs]; + let SimpleHandler = 1; +} + def WebAssemblyExportName : InheritableAttr, TargetSpecificAttr<TargetWebAssembly> { let Spellings = [Clang<"export_name">]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 4b8d520d73893..6f0f659f064d9 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2317,6 +2317,25 @@ section. }]; } +def BPFFastCallDocs : Documentation { + let Category = DocCatType; + let Content = [{ +Functions annotated with this attribute are likely to be inlined by BPF JIT. +It is assumed that inlined implementation uses less caller saved registers, +than a regular function. +Specifically, the following registers are likely to be preserved: +- ``R0`` if function return value is ``void``; +- ``R2-R5` if function takes 1 argument; +- ``R3-R5` if function takes 2 arguments; +- ``R4-R5` if function takes 3 arguments; +- ``R5`` if function takes 4 arguments; + +For such functions Clang generates code pattern that allows BPF JIT +to recognize and remove unnecessary spills and fills of the preserved +registers. + }]; +} + def MipsInterruptDocs : Documentation { let Category = DocCatFunction; let Heading = "interrupt (MIPS)"; diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 2f3dd5d01fa6c..dbddf49de964b 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2447,6 +2447,8 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, FuncAttrs.addAttribute(llvm::Attribute::NoCfCheck); if (TargetDecl->hasAttr<LeafAttr>()) FuncAttrs.addAttribute(llvm::Attribute::NoCallback); + if (TargetDecl->hasAttr<BPFFastCallAttr>()) + FuncAttrs.addAttribute("bpf_fastcall"); HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>(); if (auto *AllocSize = TargetDecl->getAttr<AllocSizeAttr>()) { diff --git a/clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c b/clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c new file mode 100644 index 0000000000000..bf49c3c9be086 --- /dev/null +++ b/clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c @@ -0,0 +1,24 @@ +// REQUIRES: bpf-registered-target +// RUN: %clang_cc1 -triple bpf -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s + +#define __bpf_fastcall __attribute__((bpf_fastcall)) + +void test(void) __bpf_fastcall; +void (*ptr)(void) __bpf_fastcall; + +void foo(void) { + test(); + (*ptr)(); +} + +// CHECK: @ptr = global ptr null +// CHECK: define {{.*}} @foo() +// CHECK: entry: +// CHECK: call void @test() #[[test_attr:[0-9]+]] +// CHECK: %[[ptr:.*]] = load ptr, ptr @ptr, align 8 +// CHECK: call void %[[ptr]]() #[[test_attr]] +// CHECK: ret void + +// CHECK: declare void @test() #[[ptr_attr:[0-9]+]] +// CHECK: attributes #1 = { {{.*}}"bpf_fastcall"{{.*}} } +// CHECK: attributes #[[test_attr]] = { {{.*}}"bpf_fastcall"{{.*}} } diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index e082db698ef0c..b8f2b02d758d9 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -22,6 +22,7 @@ // CHECK-NEXT: AssumeAligned (SubjectMatchRule_objc_method, SubjectMatchRule_function) // CHECK-NEXT: Availability ((SubjectMatchRule_record, SubjectMatchRule_enum, SubjectMatchRule_enum_constant, SubjectMatchRule_field, SubjectMatchRule_function, SubjectMatchRule_namespace, SubjectMatchRule_objc_category, SubjectMatchRule_objc_implementation, SubjectMatchRule_objc_interface, SubjectMatchRule_objc_method, SubjectMatchRule_objc_property, SubjectMatchRule_objc_protocol, SubjectMatchRule_record, SubjectMatchRule_type_alias, SubjectMatchRule_variable)) // CHECK-NEXT: AvailableOnlyInDefaultEvalMethod (SubjectMatchRule_type_alias) +// CHECK-NEXT: BPFFastCall (SubjectMatchRule_hasType_functionType) // CHECK-NEXT: BPFPreserveAccessIndex (SubjectMatchRule_record) // CHECK-NEXT: BPFPreserveStaticOffset (SubjectMatchRule_record) // CHECK-NEXT: BTFDeclTag (SubjectMatchRule_variable, SubjectMatchRule_function, SubjectMatchRule_record, SubjectMatchRule_field, SubjectMatchRule_type_alias) diff --git a/clang/test/Sema/bpf-attr-bpf-fastcall.c b/clang/test/Sema/bpf-attr-bpf-fastcall.c new file mode 100644 index 0000000000000..81a5cf68a8c06 --- /dev/null +++ b/clang/test/Sema/bpf-attr-bpf-fastcall.c @@ -0,0 +1,7 @@ +// REQUIRES: bpf-registered-target +// RUN: %clang_cc1 %s -triple bpf -verify + +__attribute__((bpf_fastcall)) int var; // expected-warning {{'bpf_fastcall' attribute only applies to functions and function pointers}} + +__attribute__((bpf_fastcall)) void func(); +__attribute__((bpf_fastcall(1))) void func_invalid(); // expected-error {{'bpf_fastcall' attribute takes no arguments}} diff --git a/llvm/lib/Target/BPF/BPFCallingConv.td b/llvm/lib/Target/BPF/BPFCallingConv.td index ef4ef1930aa8f..a557211437e95 100644 --- a/llvm/lib/Target/BPF/BPFCallingConv.td +++ b/llvm/lib/Target/BPF/BPFCallingConv.td @@ -46,3 +46,4 @@ def CC_BPF32 : CallingConv<[ ]>; def CSR : CalleeSavedRegs<(add R6, R7, R8, R9, R10)>; +def CSR_PreserveAll : CalleeSavedRegs<(add R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10)>; diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp index 071fe004806e3..ff23d3b055d0d 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -402,6 +402,21 @@ SDValue BPFTargetLowering::LowerFormalArguments( const size_t BPFTargetLowering::MaxArgs = 5; +static void resetRegMaskBit(const TargetRegisterInfo *TRI, uint32_t *RegMask, + MCRegister Reg) { + for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg)) + RegMask[SubReg / 32] &= ~(1u << (SubReg % 32)); +} + +static uint32_t *regMaskFromTemplate(const TargetRegisterInfo *TRI, + MachineFunction &MF, + const uint32_t *BaseRegMask) { + uint32_t *RegMask = MF.allocateRegMask(); + unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs()); + memcpy(RegMask, BaseRegMask, sizeof(RegMask[0]) * RegMaskSize); + return RegMask; +} + SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const { SelectionDAG &DAG = CLI.DAG; @@ -513,6 +528,22 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, for (auto &Reg : RegsToPass) Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); + bool HasFastCall = + (CLI.CB && isa<CallInst>(CLI.CB) && CLI.CB->hasFnAttr("bpf_fastcall")); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (HasFastCall) { + uint32_t *RegMask = regMaskFromTemplate( + TRI, MF, TRI->getCallPreservedMask(MF, CallingConv::PreserveAll)); + for (auto const &RegPair : RegsToPass) + resetRegMaskBit(TRI, RegMask, RegPair.first); + if (!CLI.CB->getType()->isVoidTy()) + resetRegMaskBit(TRI, RegMask, BPF::R0); + Ops.push_back(DAG.getRegisterMask(RegMask)); + } else { + Ops.push_back( + DAG.getRegisterMask(TRI->getCallPreservedMask(MF, CLI.CallConv))); + } + if (InGlue.getNode()) Ops.push_back(InGlue); diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td index 55989f5eb6a3c..cf9764ca62123 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -677,9 +677,7 @@ let isBranch = 1, isTerminator = 1, hasDelaySlot=0, isBarrier = 1 in { } // Jump and link -let isCall=1, hasDelaySlot=0, Uses = [R11], - // Potentially clobbered registers - Defs = [R0, R1, R2, R3, R4, R5] in { +let isCall=1, hasDelaySlot=0, Uses = [R11] in { def JAL : CALL<"call">; def JALX : CALLX<"callx">; } diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp index f0edf706bd8fd..5ada86a8bf1c2 100644 --- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp +++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp @@ -24,6 +24,8 @@ #include "BPFInstrInfo.h" #include "BPFTargetMachine.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -319,6 +321,7 @@ struct BPFMIPreEmitPeephole : public MachineFunctionPass { bool in16BitRange(int Num); bool eliminateRedundantMov(); bool adjustBranch(); + bool insertMissingCallerSavedSpills(); public: @@ -333,6 +336,7 @@ struct BPFMIPreEmitPeephole : public MachineFunctionPass { Changed = eliminateRedundantMov(); if (SupportGotol) Changed = adjustBranch() || Changed; + Changed |= insertMissingCallerSavedSpills(); return Changed; } }; @@ -596,6 +600,90 @@ bool BPFMIPreEmitPeephole::adjustBranch() { return Changed; } +static const unsigned CallerSavedRegs[] = {BPF::R0, BPF::R1, BPF::R2, + BPF::R3, BPF::R4, BPF::R5}; + +struct BPFFastCall { + MachineInstr *MI; + unsigned LiveCallerSavedRegs; +}; + +static void collectBPFFastCalls(const TargetRegisterInfo *TRI, + LivePhysRegs &LiveRegs, MachineBasicBlock &BB, + SmallVectorImpl<BPFFastCall> &Calls) { + LiveRegs.init(*TRI); + LiveRegs.addLiveOuts(BB); + Calls.clear(); + for (MachineInstr &MI : llvm::reverse(BB)) { + unsigned LiveCallerSavedRegs; + if (!MI.isCall()) + goto NextInsn; + LiveCallerSavedRegs = 0; + for (MCRegister R : CallerSavedRegs) { + bool DoSpillFill = !MI.definesRegister(R, TRI) && LiveRegs.contains(R); + if (!DoSpillFill) + continue; + LiveCallerSavedRegs |= 1 << R; + } + if (LiveCallerSavedRegs) + Calls.push_back({&MI, LiveCallerSavedRegs}); + NextInsn: + LiveRegs.stepBackward(MI); + } +} + +static int64_t computeMinFixedObjOffset(MachineFrameInfo &MFI, + unsigned SlotSize) { + int64_t MinFixedObjOffset = 0; + // Same logic as in X86FrameLowering::adjustFrameForMsvcCxxEh() + for (int I = MFI.getObjectIndexBegin(); I < MFI.getObjectIndexEnd(); ++I) { + if (MFI.isDeadObjectIndex(I)) + continue; + MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I)); + } + MinFixedObjOffset -= + (SlotSize + MinFixedObjOffset % SlotSize) & (SlotSize - 1); + return MinFixedObjOffset; +} + +bool BPFMIPreEmitPeephole::insertMissingCallerSavedSpills() { + MachineFrameInfo &MFI = MF->getFrameInfo(); + SmallVector<BPFFastCall, 8> Calls; + LivePhysRegs LiveRegs; + const unsigned SlotSize = 8; + int64_t MinFixedObjOffset = computeMinFixedObjOffset(MFI, SlotSize); + bool Changed = false; + for (MachineBasicBlock &BB : *MF) { + collectBPFFastCalls(TRI, LiveRegs, BB, Calls); + Changed |= !Calls.empty(); + for (BPFFastCall &Call : Calls) { + int64_t CurOffset = MinFixedObjOffset; + for (MCRegister Reg : CallerSavedRegs) { + if (((1 << Reg) & Call.LiveCallerSavedRegs) == 0) + continue; + // Allocate stack object + CurOffset -= SlotSize; + MFI.CreateFixedSpillStackObject(SlotSize, CurOffset); + // Generate spill + BuildMI(BB, Call.MI->getIterator(), Call.MI->getDebugLoc(), + TII->get(BPF::STD)) + .addReg(Reg) + .addReg(BPF::R10) + .addImm(CurOffset) + .addImm(0); + // Generate fill + BuildMI(BB, ++Call.MI->getIterator(), Call.MI->getDebugLoc(), + TII->get(BPF::LDD)) + .addReg(Reg) + .addReg(BPF::R10) + .addImm(CurOffset) + .addImm(0); + } + } + } + return Changed; +} + } // end default namespace INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole", diff --git a/llvm/lib/Target/BPF/BPFRegisterInfo.cpp b/llvm/lib/Target/BPF/BPFRegisterInfo.cpp index 84af6806abb36..69e1318954a97 100644 --- a/llvm/lib/Target/BPF/BPFRegisterInfo.cpp +++ b/llvm/lib/Target/BPF/BPFRegisterInfo.cpp @@ -40,6 +40,17 @@ BPFRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_SaveList; } +const uint32_t * +BPFRegisterInfo::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { + switch (CC) { + default: + return CSR_RegMask; + case CallingConv::PreserveAll: + return CSR_PreserveAll_RegMask; + } +} + BitVector BPFRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); markSuperRegs(Reserved, BPF::W10); // [W|R]10 is read only frame pointer diff --git a/llvm/lib/Target/BPF/BPFRegisterInfo.h b/llvm/lib/Target/BPF/BPFRegisterInfo.h index f7dea75ebea6f..db868769a1579 100644 --- a/llvm/lib/Target/BPF/BPFRegisterInfo.h +++ b/llvm/lib/Target/BPF/BPFRegisterInfo.h @@ -26,6 +26,9 @@ struct BPFRegisterInfo : public BPFGenRegisterInfo { const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID) const override; + BitVector getReservedRegs(const MachineFunction &MF) const override; bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, diff --git a/llvm/test/CodeGen/BPF/bpf-fastcall-1.ll b/llvm/test/CodeGen/BPF/bpf-fastcall-1.ll new file mode 100644 index 0000000000000..fd81314a495ef --- /dev/null +++ b/llvm/test/CodeGen/BPF/bpf-fastcall-1.ll @@ -0,0 +1,46 @@ +; RUN: llc -O2 --march=bpfel %s -o - | FileCheck %s + +; Generated from the following C code: +; +; #define __bpf_fastcall __attribute__((bpf_fastcall)) +; +; void bar(void) __bpf_fastcall; +; void buz(long i, long j, long k); +; +; void foo(long i, long j, long k) { +; bar(); +; buz(i, j, k); +; } +; +; Using the following command: +; +; clang --target=bpf -emit-llvm -O2 -S -o - t.c +; +; (unnecessary attrs removed maually) + +; Check that function marked with bpf_fastcall does not clobber R1-R5. + +define dso_local void @foo(i64 noundef %i, i64 noundef %j, i64 noundef %k) { +entry: + tail call void @bar() #1 + tail call void @buz(i64 noundef %i, i64 noundef %j, i64 noundef %k) + ret void +} + +; CHECK: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: *(u64 *)(r10 - 8) = r1 +; CHECK-NEXT: *(u64 *)(r10 - 16) = r2 +; CHECK-NEXT: *(u64 *)(r10 - 24) = r3 +; CHECK-NEXT: call bar +; CHECK-NEXT: r3 = *(u64 *)(r10 - 24) +; CHECK-NEXT: r2 = *(u64 *)(r10 - 16) +; CHECK-NEXT: r1 = *(u64 *)(r10 - 8) +; CHECK-NEXT: call buz +; CHECK-NEXT: exit + +declare dso_local void @bar() #0 +declare dso_local void @buz(i64 noundef, i64 noundef, i64 noundef) + +attributes #0 = { "bpf_fastcall" } +attributes #1 = { nounwind "bpf_fastcall" } diff --git a/llvm/test/CodeGen/BPF/bpf-fastcall-2.ll b/llvm/test/CodeGen/BPF/bpf-fastcall-2.ll new file mode 100644 index 0000000000000..e3e29cdddca8e --- /dev/null +++ b/llvm/test/CodeGen/BPF/bpf-fastcall-2.ll @@ -0,0 +1,68 @@ +; RUN: llc -O2 --march=bpfel %s -o - | FileCheck %s + +; Generated from the following C code: +; +; #define __bpf_fastcall __attribute__((bpf_fastcall)) +; +; void bar(void) __bpf_fastcall; +; void buz(long i, long j); +; +; void foo(long i, long j, long k, long l) { +; bar(); +; if (k > 42l) +; buz(i, 1); +; else +; buz(1, j); +; } +; +; Using the following command: +; +; clang --target=bpf -emit-llvm -O2 -S -o - t.c +; +; (unnecessary attrs removed maually) + +; Check that function marked with bpf_fastcall does not clobber R1-R5. +; Use R1 in one branch following call and R2 in another branch following call. + +define dso_local void @foo(i64 noundef %i, i64 noundef %j, i64 noundef %k, i64 noundef %l) { +entry: + tail call void @bar() #0 + %cmp = icmp sgt i64 %k, 42 + br i1 %cmp, label %if.then, label %if.else + +if.then: + tail call void @buz(i64 noundef %i, i64 noundef 1) + br label %if.end + +if.else: + tail call void @buz(i64 noundef 1, i64 noundef %j) + br label %if.end + +if.end: + ret void +} + +; CHECK: foo: # @foo +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: *(u64 *)(r10 - 8) = r1 +; CHECK-NEXT: *(u64 *)(r10 - 16) = r2 +; CHECK-NEXT: *(u64 *)(r10 - 24) = r3 +; CHECK-NEXT: call bar +; CHECK-NEXT: r3 = *(u64 *)(r10 - 24) +; CHECK-NEXT: r2 = *(u64 *)(r10 - 16) +; CHECK-NEXT: r1 = *(u64 *)(r10 - 8) +; CHECK-NEXT: r4 = 43 +; CHECK-NEXT: if r4 s> r3 goto [[ELSE:.*]] +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: r2 = 1 +; CHECK-NEXT: goto [[END:.*]] +; CHECK-NEXT: [[ELSE]]: # %if.else +; CHECK-NEXT: r1 = 1 +; CHECK-NEXT: [[END]]: # %if.end +; CHECK-NEXT: call buz +; CHECK-NEXT: exit + +declare dso_local void @bar() #0 +declare dso_local void @buz(i64 noundef, i64 noundef) + +attributes #0 = { "bpf_fastcall" } diff --git a/llvm/test/CodeGen/BPF/bpf-fastcall-3.ll b/llvm/test/CodeGen/BPF/bpf-fastcall-3.ll new file mode 100644 index 0000000000000..81ca4e1ac57bc --- /dev/null +++ b/llvm/test/CodeGen/BPF/bpf-fastcall-3.ll @@ -0,0 +1,62 @@ +; RUN: llc -O2 --march=bpfel %s -o - | FileCheck %s + +; Generated from the following C code: +; +; #define __bpf_fastcall __attribute__((bpf_fastcall)) +; +; void quux(void *); +; void bar(long) __bpf_fastcall; +; void buz(long i, long j); +; +; void foo(long i, long j) { +; long k; +; bar(i); +; bar(i); +; buz(i, j); +; quux(&k); +; } +; +; Using the following command: +; +; clang --target=bpf -emit-llvm -O2 -S -o - t.c +; +; (unnecessary attrs removed maually) + +; Check that function marked with bpf_fastcall does not clobber R1-R5. +; Check that spills/fills wrapping the call use and reuse lowest stack offsets. + +define dso_local void @foo(i64 noundef %i, i64 noundef %j) { +entry: + %k = alloca i64, align 8 + tail call void @bar(i64 noundef %i) #0 + tail call void @bar(i64 noundef %i) #0 + tail call void @buz(i64 noundef %i, i64 noundef %j) + call void @quux(ptr noundef nonnull %k) + ret void +} + +; CHECK: # %bb.0: +; CHECK-NEXT: r3 = r1 +; CHECK-NEXT: *(u64 *)(r10 - 16) = r2 +; CHECK-NEXT: *(u64 *)(r10 - 24) = r3 +; CHECK-NEXT: call bar +; CHECK-NEXT: r3 = *(u64 *)(r10 - 24) +; CHECK-NEXT: r2 = *(u64 *)(r10 - 16) +; CHECK-NEXT: r1 = r3 +; CHECK-NEXT: *(u64 *)(r10 - 16) = r2 +; CHECK-NEXT: *(u64 *)(r10 - 24) = r3 +; CHECK-NEXT: call bar +; CHECK-NEXT: r3 = *(u64 *)(r10 - 24) +; CHECK-NEXT: r2 = *(u64 *)(r10 - 16) +; CHECK-NEXT: r1 = r3 +; CHECK-NEXT: call buz +; CHECK-NEXT: r1 = r10 +; CHECK-NEXT: r1 += -8 +; CHECK-NEXT: call quux +; CHECK-NEXT: exit + +declare dso_local void @bar(i64 noundef) #0 +declare dso_local void @buz(i64 noundef, i64 noundef) +declare dso_local void @quux(ptr noundef) + +attributes #0 = { "bpf_fastcall" } diff --git a/llvm/test/CodeGen/BPF/bpf-fastcall-regmask-1.ll b/llvm/test/CodeGen/BPF/bpf-fastcall-regmask-1.ll new file mode 100644 index 0000000000000..857d2f000d1d5 --- /dev/null +++ b/llvm/test/CodeGen/BPF/bpf-fastcall-regmask-1.ll @@ -0,0 +1,110 @@ +; RUN: llc -O2 --march=bpfel \ +; RUN: -print-after=stack-slot-coloring %s \ +; RUN: -o /dev/null 2>&1 | FileCheck %s + +; Generated from the following C code: +; +; #define __bpf_fastcall __attribute__((bpf_fastcall)) +; +; void bar1(void) __bpf_fastcall; +; void buz1(long i, long j, long k); +; void foo1(long i, long j, long k) { +; bar1(); +; buz1(i, j, k); +; } +; +; long bar2(void) __bpf_fastcall; +; void buz2(long i, long j, l... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/101228 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits