https://github.com/dpaoliello created https://github.com/llvm/llvm-project/pull/81800
Backports two fixes for ARM64EC variadic args: * <https://github.com/llvm/llvm-project/pull/80595> * <https://github.com/llvm/llvm-project/pull/79774> >From 45fbf96034992f7e5e1e6678bfb2988c58a9c0ae Mon Sep 17 00:00:00 2001 From: Billy Laws <blaw...@gmail.com> Date: Wed, 31 Jan 2024 02:32:15 +0000 Subject: [PATCH 1/2] [AArch64] Fix variadic tail-calls on ARM64EC (#79774) ARM64EC varargs calls expect that x4 = sp at entry, special handling is needed to ensure this with tail calls since they occur after the epilogue and the x4 write happens before. I tried going through AArch64MachineFrameLowering for this, hoping to avoid creating the dummy object but this was the best I could do since the stack info that uses isn't populated at this stage, CreateFixedObject also explicitly forbids 0 sized objects. --- .../Target/AArch64/AArch64ISelLowering.cpp | 10 ++++- llvm/test/CodeGen/AArch64/arm64ec-varargs.ll | 37 +++++++++++++++++++ llvm/test/CodeGen/AArch64/vararg-tallcall.ll | 8 ++++ 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e97f5e32201488..957b556edaf312 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8007,11 +8007,19 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, } if (IsVarArg && Subtarget->isWindowsArm64EC()) { + SDValue ParamPtr = StackPtr; + if (IsTailCall) { + // Create a dummy object at the top of the stack that can be used to get + // the SP after the epilogue + int FI = MF.getFrameInfo().CreateFixedObject(1, FPDiff, true); + ParamPtr = DAG.getFrameIndex(FI, PtrVT); + } + // For vararg calls, the Arm64EC ABI requires values in x4 and x5 // describing the argument list. x4 contains the address of the // first stack parameter. x5 contains the size in bytes of all parameters // passed on the stack. - RegsToPass.emplace_back(AArch64::X4, StackPtr); + RegsToPass.emplace_back(AArch64::X4, ParamPtr); RegsToPass.emplace_back(AArch64::X5, DAG.getConstant(NumBytes, DL, MVT::i64)); } diff --git a/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll b/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll index dc16b3a1a0f270..844fc52ddade63 100644 --- a/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll +++ b/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll @@ -100,5 +100,42 @@ define void @varargs_many_argscalleer() nounwind { ret void } +define void @varargs_caller_tail() nounwind { +; CHECK-LABEL: varargs_caller_tail: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: mov x4, sp +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: mov x9, #4617315517961601024 // =0x4014000000000000 +; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000 +; CHECK-NEXT: mov w1, #2 // =0x2 +; CHECK-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000 +; CHECK-NEXT: mov w3, #4 // =0x4 +; CHECK-NEXT: mov w5, #16 // =0x10 +; CHECK-NEXT: stp xzr, x30, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: stp x9, x8, [sp] +; CHECK-NEXT: str xzr, [sp, #16] +; CHECK-NEXT: .weak_anti_dep varargs_callee +; CHECK-NEXT:.set varargs_callee, "#varargs_callee"@WEAKREF +; CHECK-NEXT: .weak_anti_dep "#varargs_callee" +; CHECK-NEXT:.set "#varargs_callee", varargs_callee@WEAKREF +; CHECK-NEXT: bl "#varargs_callee" +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: add x4, sp, #48 +; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000 +; CHECK-NEXT: mov w1, #4 // =0x4 +; CHECK-NEXT: mov w2, #3 // =0x3 +; CHECK-NEXT: mov w3, #2 // =0x2 +; CHECK-NEXT: mov x5, xzr +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: .weak_anti_dep varargs_callee +; CHECK-NEXT:.set varargs_callee, "#varargs_callee"@WEAKREF +; CHECK-NEXT: .weak_anti_dep "#varargs_callee" +; CHECK-NEXT:.set "#varargs_callee", varargs_callee@WEAKREF +; CHECK-NEXT: b "#varargs_callee" + call void (double, ...) @varargs_callee(double 1.0, i32 2, double 3.0, i32 4, double 5.0, <2 x double> <double 0.0, double 0.0>) + tail call void (double, ...) @varargs_callee(double 1.0, i32 4, i32 3, i32 2) + ret void +} declare void @llvm.va_start(ptr) diff --git a/llvm/test/CodeGen/AArch64/vararg-tallcall.ll b/llvm/test/CodeGen/AArch64/vararg-tallcall.ll index 2d6db1642247d7..812837639196e6 100644 --- a/llvm/test/CodeGen/AArch64/vararg-tallcall.ll +++ b/llvm/test/CodeGen/AArch64/vararg-tallcall.ll @@ -1,5 +1,6 @@ ; RUN: llc -mtriple=aarch64-windows-msvc %s -o - | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s +; RUN: llc -mtriple=arm64ec-windows-msvc %s -o - | FileCheck %s --check-prefixes=CHECK-EC ; RUN: llc -global-isel -global-isel-abort=2 -verify-machineinstrs -mtriple=aarch64-windows-msvc %s -o - | FileCheck %s ; RUN: llc -global-isel -global-isel-abort=2 -verify-machineinstrs -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s @@ -32,3 +33,10 @@ attributes #1 = { noinline optnone "thunk" } ; CHECK: ldr x9, [x9] ; CHECK: mov v0.16b, v16.16b ; CHECK: br x9 +; CHECK-EC: mov v7.16b, v0.16b +; CHECK-EC: ldr x9, [x0] +; CHECK-EC: ldr x11, [x9] +; CHECK-EC: mov v0.16b, v7.16b +; CHECK-EC: add x4, sp, #64 +; CHECK-EC: add sp, sp, #64 +; CHECK-EC: br x11 >From dce4933f93b33d2b4fea57aa9e03a559ab0b992c Mon Sep 17 00:00:00 2001 From: Billy Laws <blaw...@gmail.com> Date: Mon, 5 Feb 2024 17:26:16 +0000 Subject: [PATCH 2/2] [AArch64] Fix generated types for ARM64EC variadic entry thunk targets (#80595) ISel handles filling in x4/x5 when calling variadic functions as they don't correspond to the 5th/6th X64 arguments but rather to the end of the shadow space on the stack and the size in bytes of all stack parameters (ignored and written as 0 for calls from entry thunks). Will PR a follow up with ISel handling after this is merged. --- .../AArch64/AArch64Arm64ECCallLowering.cpp | 48 +++++++++++-------- .../CodeGen/AArch64/arm64ec-entry-thunks.ll | 4 +- 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp index 11248bb7aef31f..91b4f18c73c935 100644 --- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp @@ -43,6 +43,8 @@ static cl::opt<bool> GenerateThunks("arm64ec-generate-thunks", cl::Hidden, namespace { +enum class ThunkType { GuestExit, Entry, Exit }; + class AArch64Arm64ECCallLowering : public ModulePass { public: static char ID; @@ -69,14 +71,14 @@ class AArch64Arm64ECCallLowering : public ModulePass { Type *I64Ty; Type *VoidTy; - void getThunkType(FunctionType *FT, AttributeList AttrList, bool EntryThunk, + void getThunkType(FunctionType *FT, AttributeList AttrList, ThunkType TT, raw_ostream &Out, FunctionType *&Arm64Ty, FunctionType *&X64Ty); void getThunkRetType(FunctionType *FT, AttributeList AttrList, raw_ostream &Out, Type *&Arm64RetTy, Type *&X64RetTy, SmallVectorImpl<Type *> &Arm64ArgTypes, SmallVectorImpl<Type *> &X64ArgTypes, bool &HasSretPtr); - void getThunkArgTypes(FunctionType *FT, AttributeList AttrList, + void getThunkArgTypes(FunctionType *FT, AttributeList AttrList, ThunkType TT, raw_ostream &Out, SmallVectorImpl<Type *> &Arm64ArgTypes, SmallVectorImpl<Type *> &X64ArgTypes, bool HasSretPtr); @@ -89,10 +91,11 @@ class AArch64Arm64ECCallLowering : public ModulePass { void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT, AttributeList AttrList, - bool EntryThunk, raw_ostream &Out, + ThunkType TT, raw_ostream &Out, FunctionType *&Arm64Ty, FunctionType *&X64Ty) { - Out << (EntryThunk ? "$ientry_thunk$cdecl$" : "$iexit_thunk$cdecl$"); + Out << (TT == ThunkType::Entry ? "$ientry_thunk$cdecl$" + : "$iexit_thunk$cdecl$"); Type *Arm64RetTy; Type *X64RetTy; @@ -102,8 +105,8 @@ void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT, // The first argument to a thunk is the called function, stored in x9. // For exit thunks, we pass the called function down to the emulator; - // for entry thunks, we just call the Arm64 function directly. - if (!EntryThunk) + // for entry/guest exit thunks, we just call the Arm64 function directly. + if (TT == ThunkType::Exit) Arm64ArgTypes.push_back(PtrTy); X64ArgTypes.push_back(PtrTy); @@ -111,14 +114,16 @@ void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT, getThunkRetType(FT, AttrList, Out, Arm64RetTy, X64RetTy, Arm64ArgTypes, X64ArgTypes, HasSretPtr); - getThunkArgTypes(FT, AttrList, Out, Arm64ArgTypes, X64ArgTypes, HasSretPtr); + getThunkArgTypes(FT, AttrList, TT, Out, Arm64ArgTypes, X64ArgTypes, + HasSretPtr); - Arm64Ty = FunctionType::get(Arm64RetTy, Arm64ArgTypes, false); + Arm64Ty = FunctionType::get(Arm64RetTy, Arm64ArgTypes, + TT == ThunkType::Entry && FT->isVarArg()); X64Ty = FunctionType::get(X64RetTy, X64ArgTypes, false); } void AArch64Arm64ECCallLowering::getThunkArgTypes( - FunctionType *FT, AttributeList AttrList, raw_ostream &Out, + FunctionType *FT, AttributeList AttrList, ThunkType TT, raw_ostream &Out, SmallVectorImpl<Type *> &Arm64ArgTypes, SmallVectorImpl<Type *> &X64ArgTypes, bool HasSretPtr) { @@ -151,14 +156,16 @@ void AArch64Arm64ECCallLowering::getThunkArgTypes( X64ArgTypes.push_back(I64Ty); } - // x4 - Arm64ArgTypes.push_back(PtrTy); - X64ArgTypes.push_back(PtrTy); - // x5 - Arm64ArgTypes.push_back(I64Ty); - // FIXME: x5 isn't actually passed/used by the x64 side; revisit once we - // have proper isel for varargs - X64ArgTypes.push_back(I64Ty); + if (TT != ThunkType::Entry) { + // x4 + Arm64ArgTypes.push_back(PtrTy); + X64ArgTypes.push_back(PtrTy); + // x5 + Arm64ArgTypes.push_back(I64Ty); + // FIXME: x5 isn't actually passed/used by the x64 side; revisit once we + // have proper isel for varargs + X64ArgTypes.push_back(I64Ty); + } return; } @@ -339,8 +346,7 @@ Function *AArch64Arm64ECCallLowering::buildExitThunk(FunctionType *FT, SmallString<256> ExitThunkName; llvm::raw_svector_ostream ExitThunkStream(ExitThunkName); FunctionType *Arm64Ty, *X64Ty; - getThunkType(FT, Attrs, /*EntryThunk*/ false, ExitThunkStream, Arm64Ty, - X64Ty); + getThunkType(FT, Attrs, ThunkType::Exit, ExitThunkStream, Arm64Ty, X64Ty); if (Function *F = M->getFunction(ExitThunkName)) return F; @@ -443,7 +449,7 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) { SmallString<256> EntryThunkName; llvm::raw_svector_ostream EntryThunkStream(EntryThunkName); FunctionType *Arm64Ty, *X64Ty; - getThunkType(F->getFunctionType(), F->getAttributes(), /*EntryThunk*/ true, + getThunkType(F->getFunctionType(), F->getAttributes(), ThunkType::Entry, EntryThunkStream, Arm64Ty, X64Ty); if (Function *F = M->getFunction(EntryThunkName)) return F; @@ -518,7 +524,7 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) { Function *AArch64Arm64ECCallLowering::buildGuestExitThunk(Function *F) { llvm::raw_null_ostream NullThunkName; FunctionType *Arm64Ty, *X64Ty; - getThunkType(F->getFunctionType(), F->getAttributes(), /*EntryThunk*/ true, + getThunkType(F->getFunctionType(), F->getAttributes(), ThunkType::GuestExit, NullThunkName, Arm64Ty, X64Ty); auto MangledName = getArm64ECMangledFunctionName(F->getName().str()); assert(MangledName && "Can't guest exit to function that's already native"); diff --git a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll index 5c56f51e1ca554..0083818def1514 100644 --- a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll +++ b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll @@ -147,8 +147,8 @@ define void @has_varargs(...) nounwind { ; CHECK-NEXT: add x29, sp, #160 ; CHECK-NEXT: .seh_add_fp 160 ; CHECK-NEXT: .seh_endprologue -; CHECK-NEXT: ldp x8, x5, [x4, #32] -; CHECK-NEXT: mov x4, x8 +; CHECK-NEXT: mov x4, sp +; CHECK-NEXT: mov x5, xzr ; CHECK-NEXT: blr x9 ; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_ret ; CHECK-NEXT: ldr x0, [x8, :lo12:__os_arm64x_dispatch_ret] _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits