llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Oliver Stannard (ostannard) <details> <summary>Changes</summary> This is a continuation of #<!-- -->102896 by @<!-- -->kiran-isaac, which gets the `[[clang::musttail]]` attribute working for as many cases as possible in the ARM backend. This includes one target-independent LangRef change to the `musttail` IR attribute, which I think is a case which can't work for any target, but wasn't previously documented. With this patch series applied, I'm still aware of these bugs/limitations in musttail for ARM: * Tail calls aren't supported at all for Armv6-M, because it doesn't have a branch with enough range that does not modify LR. * With M-profile PACBTI, there are some indirect calls (ones which have arguments in all of r0-r3) where no register is available to hold the function pointer, so these can't be tail-called. * Some crash bugs which I've raised separate issues for: * #<!-- -->107569 * #<!-- -->109922 * #<!-- -->109929 --- Patch is 35.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/109943.diff 10 Files Affected: - (modified) clang/lib/CodeGen/CGCall.cpp (+1-1) - (added) clang/test/CodeGen/musttail-sret.cpp (+84) - (modified) llvm/docs/LangRef.rst (+4-4) - (modified) llvm/lib/Target/ARM/ARMISelLowering.cpp (+40-99) - (modified) llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll (+5-11) - (modified) llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll (+5-8) - (modified) llvm/test/CodeGen/ARM/fp-arg-shuffle.ll (+23) - (modified) llvm/test/CodeGen/ARM/fp16-vector-argument.ll (+14-27) - (added) llvm/test/CodeGen/ARM/musttail.ll (+321) - (modified) llvm/test/CodeGen/ARM/struct_byval.ll (-19) ``````````diff diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 4ae981e4013e9c..ecb72c265d7088 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5112,7 +5112,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, RawAddress SRetAlloca = RawAddress::invalid(); llvm::Value *UnusedReturnSizePtr = nullptr; if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) { - if (IsVirtualFunctionPointerThunk && RetAI.isIndirect()) { + if ((IsVirtualFunctionPointerThunk && RetAI.isIndirect()) || IsMustTail) { SRetPtr = makeNaturalAddressForPointer(CurFn->arg_begin() + IRFunctionArgs.getSRetArgNo(), RetTy, CharUnits::fromQuantity(1)); diff --git a/clang/test/CodeGen/musttail-sret.cpp b/clang/test/CodeGen/musttail-sret.cpp new file mode 100644 index 00000000000000..ca67c218cd67f6 --- /dev/null +++ b/clang/test/CodeGen/musttail-sret.cpp @@ -0,0 +1,84 @@ +// RUN: %clang_cc1 -triple=arm %s -emit-llvm -O3 -o - | FileCheck %s --check-prefix=CHECK-ARM +// RUN: %clang_cc1 -triple=arm64 %s -emit-llvm -O3 -o - | FileCheck %s --check-prefix=CHECK-ARM64 +// RUN: %clang_cc1 -triple=i686 %s -emit-llvm -O3 -o - | FileCheck %s --check-prefix=CHECK-X86 +// RUN: %clang_cc1 -triple=x86_64 %s -emit-llvm -O3 -o - | FileCheck %s --check-prefix=CHECK-X64 + +// Sret tests +struct Big { + int a, b, c, d, e, f, g, h; +}; + +struct Big F1(signed short P0); + +struct Big F2(signed short P0) { + signed short P1 = 20391; + [[clang::musttail]] return F1(P1); +} + +// CHECK-NOT: alloca +// CHECK-ARM: musttail call arm_aapcscc void @_Z2F1s(ptr dead_on_unwind writable sret(%struct.Big) align 4 %agg.result, i16 noundef signext 20391) +// CHECK-ARM64: musttail call void @_Z2F1s(ptr dead_on_unwind writable sret(%struct.Big) align 4 %agg.result, i16 noundef 20391) +// CHECK-X86: musttail call void @_Z2F1s(ptr dead_on_unwind writable sret(%struct.Big) align 4 %agg.result, i16 noundef signext 20391) +// CHECK-X64: musttail call void @_Z2F1s(ptr dead_on_unwind writable sret(%struct.Big) align 4 %agg.result, i16 noundef signext 20391) + +struct ReallyBig { + int a[100]; +}; + +// Indirect sret tests +// Function pointer for testing indirect musttail call. +struct FunctionPointers { + ReallyBig (*F3)(int, int, int, int, float, double); + ReallyBig (*F4)(int, int, int, char, float, double); +}; + +struct ReallyBig F3(int P0, int P1, int P2, int P3, float P4, double P5); +struct ReallyBig F4(int P0, int P1, int P2, char P3, float P4, double P5); + +static struct FunctionPointers FP = {F3, F4}; + +struct ReallyBig F5 (int P0, int P1, int P2, int P3, float P4, double P5) { + [[clang::musttail]] return FP.F3(P0, P1, P2, P3, P4, P5); +} + +// CHECK-NOT: alloca +// CHECK-ARM: musttail call arm_aapcscc void @_Z2F3iiiifd(ptr dead_on_unwind writable sret(%struct.ReallyBig) align 4 %agg.result, i32 noundef %P0, i32 noundef %P1, i32 noundef %P2, i32 noundef %P3, float noundef %P4, double noundef %P5) +// CHECK-ARM64: musttail call void @_Z2F3iiiifd(ptr dead_on_unwind writable sret(%struct.ReallyBig) align 4 %agg.result, i32 noundef %P0, i32 noundef %P1, i32 noundef %P2, i32 noundef %P3, float noundef %P4, double noundef %P5) +// CHECK-X86: musttail call void @_Z2F3iiiifd(ptr dead_on_unwind writable sret(%struct.ReallyBig) align 4 %agg.result, i32 noundef %P0, i32 noundef %P1, i32 noundef %P2, i32 noundef %P3, float noundef %P4, double noundef %P5) +// CHECK-X64: musttail call void @_Z2F3iiiifd(ptr dead_on_unwind writable sret(%struct.ReallyBig) align 4 %agg.result, i32 noundef %P0, i32 noundef %P1, i32 noundef %P2, i32 noundef %P3, float noundef %P4, double noundef %P5) + +struct ReallyBig F6 (int P0, int P1, int P2, char P3, float P4, double P5) { + [[clang::musttail]] return FP.F4(P0, P1, P2, P3, P4, P5); +} + +// Complex and BitInt. Special cases for sret. +// CHECK-NOT: alloca +// CHECK-ARM: musttail call arm_aapcscc void @_Z2F4iiicfd(ptr dead_on_unwind writable sret(%struct.ReallyBig) align 4 %agg.result, i32 noundef %P0, i32 noundef %P1, i32 noundef %P2, i8 noundef signext %P3, float noundef %P4, double noundef %P5) +// CHECK-ARM64: musttail call void @_Z2F4iiicfd(ptr dead_on_unwind writable sret(%struct.ReallyBig) align 4 %agg.result, i32 noundef %P0, i32 noundef %P1, i32 noundef %P2, i8 noundef %P3, float noundef %P4, double noundef %P5) +// CHECK-X86: musttail call void @_Z2F4iiicfd(ptr dead_on_unwind writable sret(%struct.ReallyBig) align 4 %agg.result, i32 noundef %P0, i32 noundef %P1, i32 noundef %P2, i8 noundef signext %P3, float noundef %P4, double noundef %P5) +// CHECK-X64: musttail call void @_Z2F4iiicfd(ptr dead_on_unwind writable sret(%struct.ReallyBig) align 4 %agg.result, i32 noundef %P0, i32 noundef %P1, i32 noundef %P2, i8 noundef signext %P3, float noundef %P4, double noundef %P5) + +double _Complex F7(signed short P0); + +double _Complex F8(signed short P0) { + signed short P1 = 20391; + [[clang::musttail]] return F7(P1); +} + +// CHECK-NOT: alloca +// CHECK-ARM: musttail call arm_aapcscc void @_Z2F7s(ptr dead_on_unwind writable sret({ double, double }) align 8 %agg.result, i16 noundef signext 20391) +// CHECK-ARM64: musttail call noundef { double, double } @_Z2F7s(i16 noundef 20391) +// CHECK-X86: musttail call void @_Z2F7s(ptr dead_on_unwind writable sret({ double, double }) align 4 %agg.result, i16 noundef signext 20391) +// CHECK-X64: musttail call noundef { double, double } @_Z2F7s(i16 noundef signext 20391) + +signed _BitInt(100) F9(float P0, float P1, double P2, char P3); + +signed _BitInt(100) F10(float P0, float P1, double P2, char P3) { + [[clang::musttail]] return F9(P0, P1, P2, P3); +} + +// CHECK-NOT: alloca +// CHECK-ARM: musttail call arm_aapcscc void @_Z2F9ffdc(ptr dead_on_unwind writable sret(i128) align 8 %agg.result, float noundef %P0, float noundef %P1, double noundef %P2, i8 noundef signext %P3) +// CHECK-ARM64: musttail call noundef i100 @_Z2F9ffdc(float noundef %P0, float noundef %P1, double noundef %P2, i8 noundef %P3) +// CHECK-X86: musttail call void @_Z2F9ffdc(ptr dead_on_unwind writable sret(i128) align 4 %agg.result, float noundef %P0, float noundef %P1, double noundef %P2, i8 noundef signext %P3) +// CHECK-X64: musttail call noundef { i64, i64 } @_Z2F9ffdc(float noundef %P0, float noundef %P1, double noundef %P2, i8 noundef signext %P3) \ No newline at end of file diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 91c3e60bb0acb1..441a1998a04606 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -12658,10 +12658,10 @@ This instruction requires several arguments: the return value of the callee is returned to the caller's caller, even if a void return type is in use. - Both markers imply that the callee does not access allocas from the caller. - The ``tail`` marker additionally implies that the callee does not access - varargs from the caller. Calls marked ``musttail`` must obey the following - additional rules: + Both markers imply that the callee does not access allocas or ``byval`` + arguments from the caller. The ``tail`` marker additionally implies that the + callee does not access varargs from the caller. Calls marked ``musttail`` + must obey the following additional rules: - The call must immediately precede a :ref:`ret <i_ret>` instruction, or a pointer bitcast followed by a ret instruction. diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index a03928b618df03..dfb401487e1ded 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2407,8 +2407,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, isTailCall = false; // For both the non-secure calls and the returns from a CMSE entry function, - // the function needs to do some extra work afte r the call, or before the - // return, respectively, thus it cannot end with atail call + // the function needs to do some extra work after the call, or before the + // return, respectively, thus it cannot end with a tail call if (isCmseNSCall || AFI->isCmseNSEntryFunction()) isTailCall = false; @@ -2961,50 +2961,6 @@ void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size, Size = std::max<int>(Size - Excess, 0); } -/// MatchingStackOffset - Return true if the given stack call argument is -/// already available in the same position (relatively) of the caller's -/// incoming argument stack. -static -bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, - MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, - const TargetInstrInfo *TII) { - unsigned Bytes = Arg.getValueSizeInBits() / 8; - int FI = std::numeric_limits<int>::max(); - if (Arg.getOpcode() == ISD::CopyFromReg) { - Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); - if (!VR.isVirtual()) - return false; - MachineInstr *Def = MRI->getVRegDef(VR); - if (!Def) - return false; - if (!Flags.isByVal()) { - if (!TII->isLoadFromStackSlot(*Def, FI)) - return false; - } else { - return false; - } - } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { - if (Flags.isByVal()) - // ByVal argument is passed in as a pointer but it's now being - // dereferenced. e.g. - // define @foo(%struct.X* %A) { - // tail call @bar(%struct.X* byval %A) - // } - return false; - SDValue Ptr = Ld->getBasePtr(); - FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); - if (!FINode) - return false; - FI = FINode->getIndex(); - } else - return false; - - assert(FI != std::numeric_limits<int>::max()); - if (!MFI.isFixedObjectIndex(FI)) - return false; - return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI); -} - /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. Note that this function also @@ -3046,8 +3002,10 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( for (const CCValAssign &AL : ArgLocs) if (AL.isRegLoc()) AddressRegisters.erase(AL.getLocReg()); - if (AddressRegisters.empty()) + if (AddressRegisters.empty()) { + LLVM_DEBUG(dbgs() << "false (no reg to hold function pointer)\n"); return false; + } } // Look for obvious safe cases to perform tail call optimization that do not @@ -3056,18 +3014,25 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( // Exception-handling functions need a special set of instructions to indicate // a return to the hardware. Tail-calling another function would probably // break this. - if (CallerF.hasFnAttribute("interrupt")) + if (CallerF.hasFnAttribute("interrupt")) { + LLVM_DEBUG(dbgs() << "false (interrupt attribute)\n"); return false; + } - if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt)) + if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt)) { + LLVM_DEBUG(dbgs() << (CalleeCC == CallerCC ? "true" : "false") + << " (guaranteed tail-call CC)\n"); return CalleeCC == CallerCC; + } // Also avoid sibcall optimization if either caller or callee uses struct // return semantics. bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); bool isCallerStructRet = MF.getFunction().hasStructRetAttr(); - if (isCalleeStructRet || isCallerStructRet) + if (isCalleeStructRet != isCallerStructRet) { + LLVM_DEBUG(dbgs() << "false (struct-ret)\n"); return false; + } // Externally-defined functions with weak linkage should not be // tail-called on ARM when the OS does not support dynamic @@ -3080,8 +3045,10 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( const GlobalValue *GV = G->getGlobal(); const Triple &TT = getTargetMachine().getTargetTriple(); if (GV->hasExternalWeakLinkage() && - (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO())) + (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO())) { + LLVM_DEBUG(dbgs() << "false (external weak linkage)\n"); return false; + } } // Check that the call results are passed in the same way. @@ -3090,70 +3057,44 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( getEffectiveCallingConv(CalleeCC, isVarArg), getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins, CCAssignFnForReturn(CalleeCC, isVarArg), - CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) + CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) { + LLVM_DEBUG(dbgs() << "false (incompatible results)\n"); return false; + } // The callee has to preserve all registers the caller needs to preserve. const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); if (CalleeCC != CallerCC) { const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); - if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) + if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) { + LLVM_DEBUG(dbgs() << "false (not all registers preserved)\n"); return false; + } } - // If Caller's vararg or byval argument has been split between registers and - // stack, do not perform tail call, since part of the argument is in caller's - // local frame. + // If Caller's vararg argument has been split between registers and stack, do + // not perform tail call, since part of the argument is in caller's local + // frame. const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>(); - if (AFI_Caller->getArgRegsSaveSize()) + if (CLI.IsVarArg && AFI_Caller->getArgRegsSaveSize()) { + LLVM_DEBUG(dbgs() << "false (arg reg save area)\n"); return false; + } // If the callee takes no arguments then go on to check the results of the // call. - if (!Outs.empty()) { - if (CCInfo.getStackSize()) { - // Check if the arguments are already laid out in the right way as - // the caller's fixed stack objects. - MachineFrameInfo &MFI = MF.getFrameInfo(); - const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const TargetInstrInfo *TII = Subtarget->getInstrInfo(); - for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); - i != e; - ++i, ++realArgIdx) { - CCValAssign &VA = ArgLocs[i]; - EVT RegVT = VA.getLocVT(); - SDValue Arg = OutVals[realArgIdx]; - ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; - if (VA.getLocInfo() == CCValAssign::Indirect) - return false; - if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) { - // f64 and vector types are split into multiple registers or - // register/stack-slot combinations. The types will not match - // the registers; give up on memory f64 refs until we figure - // out what to do about this. - if (!VA.isRegLoc()) - return false; - if (!ArgLocs[++i].isRegLoc()) - return false; - if (RegVT == MVT::v2f64) { - if (!ArgLocs[++i].isRegLoc()) - return false; - if (!ArgLocs[++i].isRegLoc()) - return false; - } - } else if (!VA.isRegLoc()) { - if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, - MFI, MRI, TII)) - return false; - } - } - } - - const MachineRegisterInfo &MRI = MF.getRegInfo(); - if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) - return false; + const MachineRegisterInfo &MRI = MF.getRegInfo(); + if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) { + LLVM_DEBUG(dbgs() << "false (parameters in CSRs do not match)\n"); + return false; } + // If the stack arguments for this call do not fit into our own save area then + // the call cannot be made tail. + if (CCInfo.getStackSize() > AFI_Caller->getArgumentStackSize()) + return false; + + LLVM_DEBUG(dbgs() << "true\n"); return true; } diff --git a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll index d8e22f4f5312ae..e186ae3a961502 100644 --- a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll +++ b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll @@ -12,17 +12,11 @@ define void @check227( ; arg1 --> SP+188 entry: - -;CHECK: sub sp, sp, #12 -;CHECK: push {r11, lr} -;CHECK: sub sp, sp, #4 -;CHECK: add r0, sp, #12 -;CHECK: stm r0, {r1, r2, r3} -;CHECK: ldr r0, [sp, #212] -;CHECK: bl useInt -;CHECK: add sp, sp, #4 -;CHECK: pop {r11, lr} -;CHECK: add sp, sp, #12 +; CHECK: sub sp, sp, #12 +; CHECK: stm sp, {r1, r2, r3} +; CHECK: ldr r0, [sp, #200] +; CHECK: add sp, sp, #12 +; CHECK: b useInt %0 = ptrtoint ptr %arg1 to i32 tail call void @useInt(i32 %0) diff --git a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll index 0c5d22984b99e1..efdecce9ae723a 100644 --- a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll +++ b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll @@ -7,14 +7,11 @@ define void @foo(ptr byval(%struct4bytes) %p0, ; --> R0 ptr byval(%struct20bytes) %p1 ; --> R1,R2,R3, [SP+0 .. SP+8) ) { -;CHECK: sub sp, sp, #16 -;CHECK: push {r11, lr} -;CHECK: add r12, sp, #8 -;CHECK: stm r12, {r0, r1, r2, r3} -;CHECK: add r0, sp, #12 -;CHECK: bl useInt -;CHECK: pop {r11, lr} -;CHECK: add sp, sp, #16 +;CHECK: sub sp, sp, #16 +;CHECK: stm sp, {r0, r1, r2, r3} +;CHECK: add r0, sp, #4 +;CHECK: add sp, sp, #16 +;CHECK: b useInt %1 = ptrtoint ptr %p1 to i32 tail call void @useInt(i32 %1) diff --git a/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll b/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll index 4996cc8ecbf022..99c9602eee58bf 100644 --- a/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll +++ b/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll @@ -1,8 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=arm-eabi -mattr=+neon -float-abi=soft %s -o - | FileCheck %s ; CHECK: function1 ; CHECK-NOT: vmov define double @function1(double %a, double %b, double %c, double %d, double %e, double %f) nounwind noinline ssp { +; CHECK-LABEL: function1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r11, lr} +; CHECK-NEXT: push {r4, r5, r11, lr} +; CHECK-NEXT: vldr d16, [sp, #40] +; CHECK-NEXT: vldr d17, [sp, #32] +; CHECK-NEXT: vmov r12, lr, d16 +; CHECK-NEXT: vldr d16, [sp, #16] +; CHECK-NEXT: vmov r4, r5, d17 +; CHECK-NEXT: vldr d17, [sp, #24] +; CHECK-NEXT: str r3, [sp, #36] +; CHECK-NEXT: str r2, [sp, #32] +; CHECK-NEXT: str r1, [sp, #44] +; CHECK-NEXT: str r0, [sp, #40] +; CHECK-NEXT: vstr d17, [sp, #16] +; CHECK-NEXT: vstr d16, [sp, #24] +; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: mov r1, lr +; CHECK-NEXT: mov r2, r4 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: pop {r4, r5, r11, lr} +; CHECK-NEXT: b function2 entry: %call = tail call double @function2(double %f, double %e, double %d, double %c, double %b, double %a) nounwind ret double %call diff --git a/llvm/test/CodeGen/ARM/fp16-vector-argument.ll b/llvm/test/CodeGen/ARM/fp16-vector-argument.ll index 6fc56967bc7aa9..65aff46658fd1d 100644 --- a/llvm/test/CodeGen/ARM/fp16-vector-argument.ll +++ b/llvm/test/CodeGen/ARM/fp16-vector-argument.ll @@ -145,26 +145,21 @@ entry: define void @many_args_test(double, float, i16, <4 x half>, <8 x half>, <8 x half>, <8 x half>) { ; SOFT-LABEL: many_args_test: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: push {r11, lr} -; SOFT-NEXT: sub sp, sp, #32 -; SOFT-NEXT: add r12, sp, #80 +; SOFT-NEXT: add r12, sp, #40 ; SOFT-NEXT: vld1.64 {d16, d17}, [r12] -; SOFT-NEXT: add r12, sp, #48... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/109943 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits