https://github.com/eddyz87 updated https://github.com/llvm/llvm-project/pull/101228
>From 6a3b5c79d148863f39b84ce3da4e4794829c1c56 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman <eddy...@gmail.com> Date: Wed, 8 May 2024 15:29:47 -0700 Subject: [PATCH 1/6] [BPF] introduce __attribute__((bpf_fastcall)) This commit introduces attribute bpf_fastcall to declare BPF functions that do not clobber some of the caller saved registers (R0-R5). The idea is to generate the code complying with generic BPF ABI, but allow compatible Linux Kernel to remove unnecessary spills and fills of non-scratched registers (given some compiler assistance). For such functions do register allocation as-if caller saved registers are not clobbered, but later wrap the calls with spill and fill patterns that are simple to recognize in kernel. For example for the following C code: #define __bpf_fastcall __attribute__((bpf_fastcall)) void bar(void) __bpf_fastcall; void buz(long i, long j, long k); void foo(long i, long j, long k) { bar(); buz(i, j, k); } First allocate registers as if: foo: call bar # note: no spills for i,j,k (r1,r2,r3) call buz exit And later insert spills fills on the peephole phase: foo: *(u64 *)(r10 - 8) = r1; # Such call pattern is *(u64 *)(r10 - 16) = r2; # correct when used with *(u64 *)(r10 - 24) = r3; # old kernels. call bar r3 = *(u64 *)(r10 - 24); # But also allows new r2 = *(u64 *)(r10 - 16); # kernels to recognize the r1 = *(u64 *)(r10 - 8); # pattern and remove spills/fills. call buz exit The offsets for generated spills/fills are picked as minimal stack offsets for the function. Allocated stack slots are not used for any other purposes, in order to simplify in-kernel analysis. --- clang/include/clang/Basic/Attr.td | 8 ++ clang/include/clang/Basic/AttrDocs.td | 19 +++ clang/lib/CodeGen/CGCall.cpp | 2 + clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c | 24 ++++ ...a-attribute-supported-attributes-list.test | 1 + clang/test/Sema/bpf-attr-bpf-fastcall.c | 7 ++ llvm/lib/Target/BPF/BPFCallingConv.td | 1 + llvm/lib/Target/BPF/BPFISelLowering.cpp | 31 +++++ llvm/lib/Target/BPF/BPFInstrInfo.td | 4 +- llvm/lib/Target/BPF/BPFMIPeephole.cpp | 88 ++++++++++++++ llvm/lib/Target/BPF/BPFRegisterInfo.cpp | 11 ++ llvm/lib/Target/BPF/BPFRegisterInfo.h | 3 + llvm/test/CodeGen/BPF/bpf-fastcall-1.ll | 46 ++++++++ llvm/test/CodeGen/BPF/bpf-fastcall-2.ll | 68 +++++++++++ llvm/test/CodeGen/BPF/bpf-fastcall-3.ll | 62 ++++++++++ .../CodeGen/BPF/bpf-fastcall-regmask-1.ll | 110 ++++++++++++++++++ 16 files changed, 482 insertions(+), 3 deletions(-) create mode 100644 clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c create mode 100644 clang/test/Sema/bpf-attr-bpf-fastcall.c create mode 100644 llvm/test/CodeGen/BPF/bpf-fastcall-1.ll create mode 100644 llvm/test/CodeGen/BPF/bpf-fastcall-2.ll create mode 100644 llvm/test/CodeGen/BPF/bpf-fastcall-3.ll create mode 100644 llvm/test/CodeGen/BPF/bpf-fastcall-regmask-1.ll diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 46d0a66d59c375..437cf3999bc365 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2189,6 +2189,14 @@ def BTFTypeTag : TypeAttr { let LangOpts = [COnly]; } +def BPFFastCall : InheritableAttr, + TargetSpecificAttr<TargetBPF> { + let Spellings = [GCC<"bpf_fastcall">]; + let Subjects = SubjectList<[FunctionLike]>; + let Documentation = [BPFFastCallDocs]; + let SimpleHandler = 1; +} + def WebAssemblyExportName : InheritableAttr, TargetSpecificAttr<TargetWebAssembly> { let Spellings = [Clang<"export_name">]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 4b8d520d73893d..6f0f659f064d95 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2317,6 +2317,25 @@ section. }]; } +def BPFFastCallDocs : Documentation { + let Category = DocCatType; + let Content = [{ +Functions annotated with this attribute are likely to be inlined by BPF JIT. +It is assumed that inlined implementation uses less caller saved registers, +than a regular function. +Specifically, the following registers are likely to be preserved: +- ``R0`` if function return value is ``void``; +- ``R2-R5` if function takes 1 argument; +- ``R3-R5` if function takes 2 arguments; +- ``R4-R5` if function takes 3 arguments; +- ``R5`` if function takes 4 arguments; + +For such functions Clang generates code pattern that allows BPF JIT +to recognize and remove unnecessary spills and fills of the preserved +registers. + }]; +} + def MipsInterruptDocs : Documentation { let Category = DocCatFunction; let Heading = "interrupt (MIPS)"; diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 2f3dd5d01fa6c9..dbddf49de964bc 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2447,6 +2447,8 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, FuncAttrs.addAttribute(llvm::Attribute::NoCfCheck); if (TargetDecl->hasAttr<LeafAttr>()) FuncAttrs.addAttribute(llvm::Attribute::NoCallback); + if (TargetDecl->hasAttr<BPFFastCallAttr>()) + FuncAttrs.addAttribute("bpf_fastcall"); HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>(); if (auto *AllocSize = TargetDecl->getAttr<AllocSizeAttr>()) { diff --git a/clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c b/clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c new file mode 100644 index 00000000000000..bf49c3c9be0864 --- /dev/null +++ b/clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c @@ -0,0 +1,24 @@ +// REQUIRES: bpf-registered-target +// RUN: %clang_cc1 -triple bpf -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s + +#define __bpf_fastcall __attribute__((bpf_fastcall)) + +void test(void) __bpf_fastcall; +void (*ptr)(void) __bpf_fastcall; + +void foo(void) { + test(); + (*ptr)(); +} + +// CHECK: @ptr = global ptr null +// CHECK: define {{.*}} @foo() +// CHECK: entry: +// CHECK: call void @test() #[[test_attr:[0-9]+]] +// CHECK: %[[ptr:.*]] = load ptr, ptr @ptr, align 8 +// CHECK: call void %[[ptr]]() #[[test_attr]] +// CHECK: ret void + +// CHECK: declare void @test() #[[ptr_attr:[0-9]+]] +// CHECK: attributes #1 = { {{.*}}"bpf_fastcall"{{.*}} } +// CHECK: attributes #[[test_attr]] = { {{.*}}"bpf_fastcall"{{.*}} } diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index e082db698ef0ce..b8f2b02d758d9e 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -22,6 +22,7 @@ // CHECK-NEXT: AssumeAligned (SubjectMatchRule_objc_method, SubjectMatchRule_function) // CHECK-NEXT: Availability ((SubjectMatchRule_record, SubjectMatchRule_enum, SubjectMatchRule_enum_constant, SubjectMatchRule_field, SubjectMatchRule_function, SubjectMatchRule_namespace, SubjectMatchRule_objc_category, SubjectMatchRule_objc_implementation, SubjectMatchRule_objc_interface, SubjectMatchRule_objc_method, SubjectMatchRule_objc_property, SubjectMatchRule_objc_protocol, SubjectMatchRule_record, SubjectMatchRule_type_alias, SubjectMatchRule_variable)) // CHECK-NEXT: AvailableOnlyInDefaultEvalMethod (SubjectMatchRule_type_alias) +// CHECK-NEXT: BPFFastCall (SubjectMatchRule_hasType_functionType) // CHECK-NEXT: BPFPreserveAccessIndex (SubjectMatchRule_record) // CHECK-NEXT: BPFPreserveStaticOffset (SubjectMatchRule_record) // CHECK-NEXT: BTFDeclTag (SubjectMatchRule_variable, SubjectMatchRule_function, SubjectMatchRule_record, SubjectMatchRule_field, SubjectMatchRule_type_alias) diff --git a/clang/test/Sema/bpf-attr-bpf-fastcall.c b/clang/test/Sema/bpf-attr-bpf-fastcall.c new file mode 100644 index 00000000000000..81a5cf68a8c061 --- /dev/null +++ b/clang/test/Sema/bpf-attr-bpf-fastcall.c @@ -0,0 +1,7 @@ +// REQUIRES: bpf-registered-target +// RUN: %clang_cc1 %s -triple bpf -verify + +__attribute__((bpf_fastcall)) int var; // expected-warning {{'bpf_fastcall' attribute only applies to functions and function pointers}} + +__attribute__((bpf_fastcall)) void func(); +__attribute__((bpf_fastcall(1))) void func_invalid(); // expected-error {{'bpf_fastcall' attribute takes no arguments}} diff --git a/llvm/lib/Target/BPF/BPFCallingConv.td b/llvm/lib/Target/BPF/BPFCallingConv.td index ef4ef1930aa8fb..a557211437e95f 100644 --- a/llvm/lib/Target/BPF/BPFCallingConv.td +++ b/llvm/lib/Target/BPF/BPFCallingConv.td @@ -46,3 +46,4 @@ def CC_BPF32 : CallingConv<[ ]>; def CSR : CalleeSavedRegs<(add R6, R7, R8, R9, R10)>; +def CSR_PreserveAll : CalleeSavedRegs<(add R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10)>; diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp index 071fe004806e3e..ff23d3b055d0d5 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -402,6 +402,21 @@ SDValue BPFTargetLowering::LowerFormalArguments( const size_t BPFTargetLowering::MaxArgs = 5; +static void resetRegMaskBit(const TargetRegisterInfo *TRI, uint32_t *RegMask, + MCRegister Reg) { + for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg)) + RegMask[SubReg / 32] &= ~(1u << (SubReg % 32)); +} + +static uint32_t *regMaskFromTemplate(const TargetRegisterInfo *TRI, + MachineFunction &MF, + const uint32_t *BaseRegMask) { + uint32_t *RegMask = MF.allocateRegMask(); + unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs()); + memcpy(RegMask, BaseRegMask, sizeof(RegMask[0]) * RegMaskSize); + return RegMask; +} + SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const { SelectionDAG &DAG = CLI.DAG; @@ -513,6 +528,22 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, for (auto &Reg : RegsToPass) Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); + bool HasFastCall = + (CLI.CB && isa<CallInst>(CLI.CB) && CLI.CB->hasFnAttr("bpf_fastcall")); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (HasFastCall) { + uint32_t *RegMask = regMaskFromTemplate( + TRI, MF, TRI->getCallPreservedMask(MF, CallingConv::PreserveAll)); + for (auto const &RegPair : RegsToPass) + resetRegMaskBit(TRI, RegMask, RegPair.first); + if (!CLI.CB->getType()->isVoidTy()) + resetRegMaskBit(TRI, RegMask, BPF::R0); + Ops.push_back(DAG.getRegisterMask(RegMask)); + } else { + Ops.push_back( + DAG.getRegisterMask(TRI->getCallPreservedMask(MF, CLI.CallConv))); + } + if (InGlue.getNode()) Ops.push_back(InGlue); diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td index 55989f5eb6a3c0..cf9764ca62123d 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -677,9 +677,7 @@ let isBranch = 1, isTerminator = 1, hasDelaySlot=0, isBarrier = 1 in { } // Jump and link -let isCall=1, hasDelaySlot=0, Uses = [R11], - // Potentially clobbered registers - Defs = [R0, R1, R2, R3, R4, R5] in { +let isCall=1, hasDelaySlot=0, Uses = [R11] in { def JAL : CALL<"call">; def JALX : CALLX<"callx">; } diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp index f0edf706bd8fd7..5ada86a8bf1c29 100644 --- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp +++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp @@ -24,6 +24,8 @@ #include "BPFInstrInfo.h" #include "BPFTargetMachine.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -319,6 +321,7 @@ struct BPFMIPreEmitPeephole : public MachineFunctionPass { bool in16BitRange(int Num); bool eliminateRedundantMov(); bool adjustBranch(); + bool insertMissingCallerSavedSpills(); public: @@ -333,6 +336,7 @@ struct BPFMIPreEmitPeephole : public MachineFunctionPass { Changed = eliminateRedundantMov(); if (SupportGotol) Changed = adjustBranch() || Changed; + Changed |= insertMissingCallerSavedSpills(); return Changed; } }; @@ -596,6 +600,90 @@ bool BPFMIPreEmitPeephole::adjustBranch() { return Changed; } +static const unsigned CallerSavedRegs[] = {BPF::R0, BPF::R1, BPF::R2, + BPF::R3, BPF::R4, BPF::R5}; + +struct BPFFastCall { + MachineInstr *MI; + unsigned LiveCallerSavedRegs; +}; + +static void collectBPFFastCalls(const TargetRegisterInfo *TRI, + LivePhysRegs &LiveRegs, MachineBasicBlock &BB, + SmallVectorImpl<BPFFastCall> &Calls) { + LiveRegs.init(*TRI); + LiveRegs.addLiveOuts(BB); + Calls.clear(); + for (MachineInstr &MI : llvm::reverse(BB)) { + unsigned LiveCallerSavedRegs; + if (!MI.isCall()) + goto NextInsn; + LiveCallerSavedRegs = 0; + for (MCRegister R : CallerSavedRegs) { + bool DoSpillFill = !MI.definesRegister(R, TRI) && LiveRegs.contains(R); + if (!DoSpillFill) + continue; + LiveCallerSavedRegs |= 1 << R; + } + if (LiveCallerSavedRegs) + Calls.push_back({&MI, LiveCallerSavedRegs}); + NextInsn: + LiveRegs.stepBackward(MI); + } +} + +static int64_t computeMinFixedObjOffset(MachineFrameInfo &MFI, + unsigned SlotSize) { + int64_t MinFixedObjOffset = 0; + // Same logic as in X86FrameLowering::adjustFrameForMsvcCxxEh() + for (int I = MFI.getObjectIndexBegin(); I < MFI.getObjectIndexEnd(); ++I) { + if (MFI.isDeadObjectIndex(I)) + continue; + MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I)); + } + MinFixedObjOffset -= + (SlotSize + MinFixedObjOffset % SlotSize) & (SlotSize - 1); + return MinFixedObjOffset; +} + +bool BPFMIPreEmitPeephole::insertMissingCallerSavedSpills() { + MachineFrameInfo &MFI = MF->getFrameInfo(); + SmallVector<BPFFastCall, 8> Calls; + LivePhysRegs LiveRegs; + const unsigned SlotSize = 8; + int64_t MinFixedObjOffset = computeMinFixedObjOffset(MFI, SlotSize); + bool Changed = false; + for (MachineBasicBlock &BB : *MF) { + collectBPFFastCalls(TRI, LiveRegs, BB, Calls); + Changed |= !Calls.empty(); + for (BPFFastCall &Call : Calls) { + int64_t CurOffset = MinFixedObjOffset; + for (MCRegister Reg : CallerSavedRegs) { + if (((1 << Reg) & Call.LiveCallerSavedRegs) == 0) + continue; + // Allocate stack object + CurOffset -= SlotSize; + MFI.CreateFixedSpillStackObject(SlotSize, CurOffset); + // Generate spill + BuildMI(BB, Call.MI->getIterator(), Call.MI->getDebugLoc(), + TII->get(BPF::STD)) + .addReg(Reg) + .addReg(BPF::R10) + .addImm(CurOffset) + .addImm(0); + // Generate fill + BuildMI(BB, ++Call.MI->getIterator(), Call.MI->getDebugLoc(), + TII->get(BPF::LDD)) + .addReg(Reg) + .addReg(BPF::R10) + .addImm(CurOffset) + .addImm(0); + } + } + } + return Changed; +} + } // end default namespace INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole", diff --git a/llvm/lib/Target/BPF/BPFRegisterInfo.cpp b/llvm/lib/Target/BPF/BPFRegisterInfo.cpp index 84af6806abb36c..69e1318954a973 100644 --- a/llvm/lib/Target/BPF/BPFRegisterInfo.cpp +++ b/llvm/lib/Target/BPF/BPFRegisterInfo.cpp @@ -40,6 +40,17 @@ BPFRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_SaveList; } +const uint32_t * +BPFRegisterInfo::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { + switch (CC) { + default: + return CSR_RegMask; + case CallingConv::PreserveAll: + return CSR_PreserveAll_RegMask; + } +} + BitVector BPFRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); markSuperRegs(Reserved, BPF::W10); // [W|R]10 is read only frame pointer diff --git a/llvm/lib/Target/BPF/BPFRegisterInfo.h b/llvm/lib/Target/BPF/BPFRegisterInfo.h index f7dea75ebea6f9..db868769a1579a 100644 --- a/llvm/lib/Target/BPF/BPFRegisterInfo.h +++ b/llvm/lib/Target/BPF/BPFRegisterInfo.h @@ -26,6 +26,9 @@ struct BPFRegisterInfo : public BPFGenRegisterInfo { const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID) const override; + BitVector getReservedRegs(const MachineFunction &MF) const override; bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, diff --git a/llvm/test/CodeGen/BPF/bpf-fastcall-1.ll b/llvm/test/CodeGen/BPF/bpf-fastcall-1.ll new file mode 100644 index 00000000000000..fd81314a495ef8 --- /dev/null +++ b/llvm/test/CodeGen/BPF/bpf-fastcall-1.ll @@ -0,0 +1,46 @@ +; RUN: llc -O2 --march=bpfel %s -o - | FileCheck %s + +; Generated from the following C code: +; +; #define __bpf_fastcall __attribute__((bpf_fastcall)) +; +; void bar(void) __bpf_fastcall; +; void buz(long i, long j, long k); +; +; void foo(long i, long j, long k) { +; bar(); +; buz(i, j, k); +; } +; +; Using the following command: +; +; clang --target=bpf -emit-llvm -O2 -S -o - t.c +; +; (unnecessary attrs removed maually) + +; Check that function marked with bpf_fastcall does not clobber R1-R5. + +define dso_local void @foo(i64 noundef %i, i64 noundef %j, i64 noundef %k) { +entry: + tail call void @bar() #1 + tail call void @buz(i64 noundef %i, i64 noundef %j, i64 noundef %k) + ret void +} + +; CHECK: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: *(u64 *)(r10 - 8) = r1 +; CHECK-NEXT: *(u64 *)(r10 - 16) = r2 +; CHECK-NEXT: *(u64 *)(r10 - 24) = r3 +; CHECK-NEXT: call bar +; CHECK-NEXT: r3 = *(u64 *)(r10 - 24) +; CHECK-NEXT: r2 = *(u64 *)(r10 - 16) +; CHECK-NEXT: r1 = *(u64 *)(r10 - 8) +; CHECK-NEXT: call buz +; CHECK-NEXT: exit + +declare dso_local void @bar() #0 +declare dso_local void @buz(i64 noundef, i64 noundef, i64 noundef) + +attributes #0 = { "bpf_fastcall" } +attributes #1 = { nounwind "bpf_fastcall" } diff --git a/llvm/test/CodeGen/BPF/bpf-fastcall-2.ll b/llvm/test/CodeGen/BPF/bpf-fastcall-2.ll new file mode 100644 index 00000000000000..e3e29cdddca8ea --- /dev/null +++ b/llvm/test/CodeGen/BPF/bpf-fastcall-2.ll @@ -0,0 +1,68 @@ +; RUN: llc -O2 --march=bpfel %s -o - | FileCheck %s + +; Generated from the following C code: +; +; #define __bpf_fastcall __attribute__((bpf_fastcall)) +; +; void bar(void) __bpf_fastcall; +; void buz(long i, long j); +; +; void foo(long i, long j, long k, long l) { +; bar(); +; if (k > 42l) +; buz(i, 1); +; else +; buz(1, j); +; } +; +; Using the following command: +; +; clang --target=bpf -emit-llvm -O2 -S -o - t.c +; +; (unnecessary attrs removed maually) + +; Check that function marked with bpf_fastcall does not clobber R1-R5. +; Use R1 in one branch following call and R2 in another branch following call. + +define dso_local void @foo(i64 noundef %i, i64 noundef %j, i64 noundef %k, i64 noundef %l) { +entry: + tail call void @bar() #0 + %cmp = icmp sgt i64 %k, 42 + br i1 %cmp, label %if.then, label %if.else + +if.then: + tail call void @buz(i64 noundef %i, i64 noundef 1) + br label %if.end + +if.else: + tail call void @buz(i64 noundef 1, i64 noundef %j) + br label %if.end + +if.end: + ret void +} + +; CHECK: foo: # @foo +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: *(u64 *)(r10 - 8) = r1 +; CHECK-NEXT: *(u64 *)(r10 - 16) = r2 +; CHECK-NEXT: *(u64 *)(r10 - 24) = r3 +; CHECK-NEXT: call bar +; CHECK-NEXT: r3 = *(u64 *)(r10 - 24) +; CHECK-NEXT: r2 = *(u64 *)(r10 - 16) +; CHECK-NEXT: r1 = *(u64 *)(r10 - 8) +; CHECK-NEXT: r4 = 43 +; CHECK-NEXT: if r4 s> r3 goto [[ELSE:.*]] +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: r2 = 1 +; CHECK-NEXT: goto [[END:.*]] +; CHECK-NEXT: [[ELSE]]: # %if.else +; CHECK-NEXT: r1 = 1 +; CHECK-NEXT: [[END]]: # %if.end +; CHECK-NEXT: call buz +; CHECK-NEXT: exit + +declare dso_local void @bar() #0 +declare dso_local void @buz(i64 noundef, i64 noundef) + +attributes #0 = { "bpf_fastcall" } diff --git a/llvm/test/CodeGen/BPF/bpf-fastcall-3.ll b/llvm/test/CodeGen/BPF/bpf-fastcall-3.ll new file mode 100644 index 00000000000000..81ca4e1ac57bc7 --- /dev/null +++ b/llvm/test/CodeGen/BPF/bpf-fastcall-3.ll @@ -0,0 +1,62 @@ +; RUN: llc -O2 --march=bpfel %s -o - | FileCheck %s + +; Generated from the following C code: +; +; #define __bpf_fastcall __attribute__((bpf_fastcall)) +; +; void quux(void *); +; void bar(long) __bpf_fastcall; +; void buz(long i, long j); +; +; void foo(long i, long j) { +; long k; +; bar(i); +; bar(i); +; buz(i, j); +; quux(&k); +; } +; +; Using the following command: +; +; clang --target=bpf -emit-llvm -O2 -S -o - t.c +; +; (unnecessary attrs removed maually) + +; Check that function marked with bpf_fastcall does not clobber R1-R5. +; Check that spills/fills wrapping the call use and reuse lowest stack offsets. + +define dso_local void @foo(i64 noundef %i, i64 noundef %j) { +entry: + %k = alloca i64, align 8 + tail call void @bar(i64 noundef %i) #0 + tail call void @bar(i64 noundef %i) #0 + tail call void @buz(i64 noundef %i, i64 noundef %j) + call void @quux(ptr noundef nonnull %k) + ret void +} + +; CHECK: # %bb.0: +; CHECK-NEXT: r3 = r1 +; CHECK-NEXT: *(u64 *)(r10 - 16) = r2 +; CHECK-NEXT: *(u64 *)(r10 - 24) = r3 +; CHECK-NEXT: call bar +; CHECK-NEXT: r3 = *(u64 *)(r10 - 24) +; CHECK-NEXT: r2 = *(u64 *)(r10 - 16) +; CHECK-NEXT: r1 = r3 +; CHECK-NEXT: *(u64 *)(r10 - 16) = r2 +; CHECK-NEXT: *(u64 *)(r10 - 24) = r3 +; CHECK-NEXT: call bar +; CHECK-NEXT: r3 = *(u64 *)(r10 - 24) +; CHECK-NEXT: r2 = *(u64 *)(r10 - 16) +; CHECK-NEXT: r1 = r3 +; CHECK-NEXT: call buz +; CHECK-NEXT: r1 = r10 +; CHECK-NEXT: r1 += -8 +; CHECK-NEXT: call quux +; CHECK-NEXT: exit + +declare dso_local void @bar(i64 noundef) #0 +declare dso_local void @buz(i64 noundef, i64 noundef) +declare dso_local void @quux(ptr noundef) + +attributes #0 = { "bpf_fastcall" } diff --git a/llvm/test/CodeGen/BPF/bpf-fastcall-regmask-1.ll b/llvm/test/CodeGen/BPF/bpf-fastcall-regmask-1.ll new file mode 100644 index 00000000000000..857d2f000d1d5a --- /dev/null +++ b/llvm/test/CodeGen/BPF/bpf-fastcall-regmask-1.ll @@ -0,0 +1,110 @@ +; RUN: llc -O2 --march=bpfel \ +; RUN: -print-after=stack-slot-coloring %s \ +; RUN: -o /dev/null 2>&1 | FileCheck %s + +; Generated from the following C code: +; +; #define __bpf_fastcall __attribute__((bpf_fastcall)) +; +; void bar1(void) __bpf_fastcall; +; void buz1(long i, long j, long k); +; void foo1(long i, long j, long k) { +; bar1(); +; buz1(i, j, k); +; } +; +; long bar2(void) __bpf_fastcall; +; void buz2(long i, long j, long k); +; void foo2(long i, long j, long k) { +; bar2(); +; buz2(i, j, k); +; } +; +; void bar3(long) __bpf_fastcall; +; void buz3(long i, long j, long k); +; void foo3(long i, long j, long k) { +; bar3(i); +; buz3(i, j, k); +; } +; +; long bar4(long, long) __bpf_fastcall; +; void buz4(long i, long j, long k); +; void foo4(long i, long j, long k) { +; bar4(i, j); +; buz4(i, j, k); +; } +; +; Using the following command: +; +; clang --target=bpf -emit-llvm -O2 -S -o - t.c +; +; (unnecessary attrs removed maually) + +; Check regmask for calls to functions marked with bpf_fastcall: +; - void function w/o parameters +; - non-void function w/o parameters +; - void function with parameters +; - non-void function with parameters + +declare dso_local void @bar1() #0 +declare dso_local void @buz1(i64 noundef, i64 noundef, i64 noundef) +define dso_local void @foo1(i64 noundef %i, i64 noundef %j, i64 noundef %k) { +entry: + tail call void @bar1() #1 + tail call void @buz1(i64 noundef %i, i64 noundef %j, i64 noundef %k) + ret void +} + +; CHECK: JAL @bar1, <regmask $r0 $r1 $r2 $r3 $r4 $r5 $r6 $r7 $r8 $r9 $r10 +; CHECK-SAME: $w0 $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w9 $w10> +; CHECK-SAME: , implicit $r11, implicit-def $r11 +; CHECK: JAL @buz1, <regmask $r6 $r7 $r8 $r9 $r10 $w6 $w7 $w8 $w9 $w10> +; CHECK-SAME: , implicit $r11, implicit $r1, implicit $r2, implicit $r3, implicit-def $r11 + +declare dso_local i64 @bar2() #0 +declare dso_local void @buz2(i64 noundef, i64 noundef, i64 noundef) +define dso_local void @foo2(i64 noundef %i, i64 noundef %j, i64 noundef %k) { +entry: + tail call i64 @bar2() #1 + tail call void @buz2(i64 noundef %i, i64 noundef %j, i64 noundef %k) + ret void +} + +; CHECK: JAL @bar2, <regmask $r1 $r2 $r3 $r4 $r5 $r6 $r7 $r8 $r9 $r10 +; CHECK-SAME: $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w9 $w10> +; CHECK-SAME: , implicit $r11, implicit-def $r11, implicit-def dead $r0 +; CHECK: JAL @buz2, <regmask $r6 $r7 $r8 $r9 $r10 $w6 $w7 $w8 $w9 $w10> +; CHECK-SAME: , implicit $r11, implicit $r1, implicit $r2, implicit $r3, implicit-def $r11 + +declare dso_local void @bar3(i64) #0 +declare dso_local void @buz3(i64 noundef, i64 noundef, i64 noundef) +define dso_local void @foo3(i64 noundef %i, i64 noundef %j, i64 noundef %k) { +entry: + tail call void @bar3(i64 noundef %i) #1 + tail call void @buz3(i64 noundef %i, i64 noundef %j, i64 noundef %k) + ret void +} + +; CHECK: JAL @bar3, <regmask $r0 $r2 $r3 $r4 $r5 $r6 $r7 $r8 $r9 $r10 +; CHECK-SAME: $w0 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w9 $w10> +; CHECK-SAME: , implicit $r11, implicit $r1, implicit-def $r11 +; CHECK: JAL @buz3, <regmask $r6 $r7 $r8 $r9 $r10 $w6 $w7 $w8 $w9 $w10> +; CHECK-SAME: , implicit $r11, implicit $r1, implicit $r2, implicit $r3, implicit-def $r11 + +declare dso_local i64 @bar4(i64 noundef, i64 noundef) #0 +declare dso_local void @buz4(i64 noundef, i64 noundef, i64 noundef) +define dso_local void @foo4(i64 noundef %i, i64 noundef %j, i64 noundef %k) { +entry: + tail call i64 @bar4(i64 noundef %i, i64 noundef %j) #1 + tail call void @buz4(i64 noundef %i, i64 noundef %j, i64 noundef %k) + ret void +} + +; CHECK: JAL @bar4, <regmask $r3 $r4 $r5 $r6 $r7 $r8 $r9 $r10 +; CHECK-SAME: $w3 $w4 $w5 $w6 $w7 $w8 $w9 $w10> +; CHECK-SAME: , implicit $r11, implicit $r1, implicit $r2, implicit-def $r11, implicit-def dead $r0 +; CHECK: JAL @buz4, <regmask $r6 $r7 $r8 $r9 $r10 $w6 $w7 $w8 $w9 $w10> +; CHECK-SAME: , implicit $r11, implicit $r1, implicit $r2, implicit $r3, implicit-def $r11 + +attributes #0 = { "bpf_fastcall" } +attributes #1 = { nounwind "bpf_fastcall" } >From 94f8b89e7110738ce024fe7035edc2528a2018b7 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman <eddy...@gmail.com> Date: Fri, 2 Aug 2024 12:11:49 -0700 Subject: [PATCH 2/6] styling fix: remove goto in collectBPFFastCalls --- llvm/lib/Target/BPF/BPFMIPeephole.cpp | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp index 5ada86a8bf1c29..58dad71b920552 100644 --- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp +++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp @@ -615,19 +615,17 @@ static void collectBPFFastCalls(const TargetRegisterInfo *TRI, LiveRegs.addLiveOuts(BB); Calls.clear(); for (MachineInstr &MI : llvm::reverse(BB)) { - unsigned LiveCallerSavedRegs; - if (!MI.isCall()) - goto NextInsn; - LiveCallerSavedRegs = 0; - for (MCRegister R : CallerSavedRegs) { - bool DoSpillFill = !MI.definesRegister(R, TRI) && LiveRegs.contains(R); - if (!DoSpillFill) - continue; - LiveCallerSavedRegs |= 1 << R; + if (MI.isCall()) { + unsigned LiveCallerSavedRegs = 0; + for (MCRegister R : CallerSavedRegs) { + bool DoSpillFill = !MI.definesRegister(R, TRI) && LiveRegs.contains(R); + if (!DoSpillFill) + continue; + LiveCallerSavedRegs |= 1 << R; + } + if (LiveCallerSavedRegs) + Calls.push_back({&MI, LiveCallerSavedRegs}); } - if (LiveCallerSavedRegs) - Calls.push_back({&MI, LiveCallerSavedRegs}); - NextInsn: LiveRegs.stepBackward(MI); } } >From b89fa14f78e734bc2496821f5425ef7460e4c59c Mon Sep 17 00:00:00 2001 From: Eduard Zingerman <eddy...@gmail.com> Date: Fri, 2 Aug 2024 12:12:22 -0700 Subject: [PATCH 3/6] test fix: use [[names]] for attributes in bpf-attr-bpf-fastcall-1 Also change attribute names to be less confusing. --- clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c b/clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c index bf49c3c9be0864..fa740d8e44ff51 100644 --- a/clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c +++ b/clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c @@ -12,13 +12,13 @@ void foo(void) { } // CHECK: @ptr = global ptr null -// CHECK: define {{.*}} @foo() +// CHECK: define {{.*}} void @foo() // CHECK: entry: -// CHECK: call void @test() #[[test_attr:[0-9]+]] +// CHECK: call void @test() #[[call_attr:[0-9]+]] // CHECK: %[[ptr:.*]] = load ptr, ptr @ptr, align 8 -// CHECK: call void %[[ptr]]() #[[test_attr]] +// CHECK: call void %[[ptr]]() #[[call_attr]] // CHECK: ret void -// CHECK: declare void @test() #[[ptr_attr:[0-9]+]] -// CHECK: attributes #1 = { {{.*}}"bpf_fastcall"{{.*}} } -// CHECK: attributes #[[test_attr]] = { {{.*}}"bpf_fastcall"{{.*}} } +// CHECK: declare void @test() #[[func_attr:[0-9]+]] +// CHECK: attributes #[[func_attr]] = { {{.*}}"bpf_fastcall"{{.*}} } +// CHECK: attributes #[[call_attr]] = { "bpf_fastcall" } >From 8f06f5bbeb1ba004e687417aa42558a2c95bf6a6 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman <eddy...@gmail.com> Date: Thu, 15 Aug 2024 12:11:39 -0700 Subject: [PATCH 4/6] use `Clang` syntax instead of `GCC` for bpf_fastcall --- clang/include/clang/Basic/Attr.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 437cf3999bc365..0106fcf6944a92 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2191,7 +2191,7 @@ def BTFTypeTag : TypeAttr { def BPFFastCall : InheritableAttr, TargetSpecificAttr<TargetBPF> { - let Spellings = [GCC<"bpf_fastcall">]; + let Spellings = [Clang<"bpf_fastcall">]; let Subjects = SubjectList<[FunctionLike]>; let Documentation = [BPFFastCallDocs]; let SimpleHandler = 1; >From 4bf461678b6eb12da78c0ebcb793b120a2918843 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman <eddy...@gmail.com> Date: Thu, 15 Aug 2024 12:28:05 -0700 Subject: [PATCH 5/6] adjust Sema test case to show that bpf_fastcall is not a type attr As discussed in the pull request thread, it is not an error to assign pointer with bpf_fastcall attribute to a pointer w/o such attribute and vice-versa. (Hence, no need to track bpf_fastcall as a part of the type). --- clang/test/Sema/bpf-attr-bpf-fastcall.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/clang/test/Sema/bpf-attr-bpf-fastcall.c b/clang/test/Sema/bpf-attr-bpf-fastcall.c index 81a5cf68a8c061..178b1f50741e87 100644 --- a/clang/test/Sema/bpf-attr-bpf-fastcall.c +++ b/clang/test/Sema/bpf-attr-bpf-fastcall.c @@ -5,3 +5,10 @@ __attribute__((bpf_fastcall)) int var; // expected-warning {{'bpf_fastcall' attr __attribute__((bpf_fastcall)) void func(); __attribute__((bpf_fastcall(1))) void func_invalid(); // expected-error {{'bpf_fastcall' attribute takes no arguments}} + +void (*ptr1)(void) __attribute__((bpf_fastcall)); +void (*ptr2)(void); +void foo(void) { + ptr2 = ptr1; // not an error + ptr1 = ptr2; // not an error +} >From da18a357c5fe6ec8a5430f0ad8e5384c40914bb9 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman <eddy...@gmail.com> Date: Fri, 16 Aug 2024 01:24:52 -0700 Subject: [PATCH 6/6] mark bpf_fastcall as COnly attribute --- clang/include/clang/Basic/Attr.td | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 0106fcf6944a92..901e9cb8a2e0e5 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2194,6 +2194,7 @@ def BPFFastCall : InheritableAttr, let Spellings = [Clang<"bpf_fastcall">]; let Subjects = SubjectList<[FunctionLike]>; let Documentation = [BPFFastCallDocs]; + let LangOpts = [COnly]; let SimpleHandler = 1; } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits