pratlucas created this revision. Herald added subscribers: llvm-commits, cfe-commits, danielkiss, hiraditya, kristof.beyls. Herald added projects: clang, LLVM. pratlucas added a child revision: D75904: [ARM][CodeGen] Fixing stack alignment of HFA arguments on AArch32 PCS. pratlucas added reviewers: t.p.northover, rnk, olista01, bogner.
Properly complying with AArch64 PCS on the handling of over-aligned HFA arguments when those are placed on the stack. AAPCS64 specifies that the stacked argument address should be rounded up to the Natural Alignment of the argument before the argument is copied to memory. Over alignment information extracted from language attributes on clang was not properly propagated to the backend for those arguments, as it does not map to the backend's base type alignments. As the standard also specifies that, when placed in registers, a single FP register should be allocated for each individual HFA element, type coercion is no suitable for capturing the alignment constraints. This patch fixes the alignment of these arguments by capturing their stack alignment requirements in an IR argument attribute, making sure this information is available for the calling convention lowering stage. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D75903 Files: clang/include/clang/CodeGen/CGFunctionInfo.h clang/lib/CodeGen/CGCall.cpp clang/lib/CodeGen/TargetInfo.cpp clang/test/CodeGen/aarch64-args-hfa.c clang/test/CodeGen/arm64-arguments.c llvm/docs/LangRef.rst llvm/include/llvm/CodeGen/TargetCallingConv.h llvm/include/llvm/CodeGen/TargetLowering.h llvm/include/llvm/IR/Argument.h llvm/include/llvm/IR/Attributes.h llvm/include/llvm/IR/Attributes.td llvm/include/llvm/IR/Function.h llvm/include/llvm/IR/InstrTypes.h llvm/lib/AsmParser/LLParser.cpp llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp llvm/lib/IR/Attributes.cpp llvm/lib/IR/Function.cpp llvm/lib/IR/Verifier.cpp llvm/lib/Target/AArch64/AArch64CallingConvention.cpp llvm/test/Bitcode/compatibility.ll llvm/test/CodeGen/AArch64/arm64-abi-hfa-args.ll
Index: llvm/test/CodeGen/AArch64/arm64-abi-hfa-args.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/arm64-abi-hfa-args.ll @@ -0,0 +1,54 @@ +; RUN: llc < %s -mtriple=arm64-none-eabi | FileCheck %s + +; Over-aligned HFA argument placed on register - one element per register +define double @test_hfa_align_arg_reg([2 x double] alignstack(16) %h.coerce) local_unnamed_addr #0 { +entry: +; CHECK-LABEL: test_hfa_align_arg_reg: +; CHECK-DAG: ret + + %h.coerce.fca.0.extract = extractvalue [2 x double] %h.coerce, 0 + ret double %h.coerce.fca.0.extract +} + +; Call with over-aligned HFA argument placed on register - one element per register +define double @test_hfa_align_call_reg() local_unnamed_addr #0 { +entry: +; CHECK-LABEL: test_hfa_align_call_reg: +; CHECK-DAG: fmov d0, #1.00000000 +; CHECK-DAG: fmov d1, #2.00000000 +; CHECK-DAG: bl test_hfa_align_arg_reg +; CHECK-DAG: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-DAG: ret + + %call = call double @test_hfa_align_arg_reg([2 x double] alignstack(16) [double 1.000000e+00, double 2.000000e+00]) + ret double %call +} + +; Over-aligned HFA argument placed on stack - stack round up to alignment +define double @test_hfa_align_arg_stack(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, float %f, [2 x double] alignstack(16) %h.coerce) local_unnamed_addr #0 { +entry: +; CHECK-LABEL: test_hfa_align_arg_stack: +; CHECK-DAG: ldr d0, [sp, #16] +; CHECK-DAG: ret + + %h.coerce.fca.0.extract = extractvalue [2 x double] %h.coerce, 0 + ret double %h.coerce.fca.0.extract +} + +; Call with over-aligned HFA argument placed on stack - stack round up to alignment +define double @test_hfa_align_call_stack() local_unnamed_addr #0 { +entry: +; CHECK-LABEL: test_hfa_align_call_stack: +; CHECK-DAG: mov x8, #4611686018427387904 +; CHECK-DAG: mov x9, #4607182418800017408 +; CHECK-DAG: stp x8, x30, [sp, #24] // 8-byte Folded Spill +; CHECK-DAG: str x9, [sp, #16] +; CHECK-DAG: bl test_hfa_align_arg +; CHECK-DAG: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-DAG: add sp, sp, #48 // =48 +; CHECK-DAG: ret + + %call = call double @test_hfa_align_arg_stack(double undef, double undef, double undef, double undef, double undef, double undef, double undef, double undef, float undef, [2 x double] alignstack(16) [double 1.000000e+00, double 2.000000e+00]) + ret double %call +} + Index: llvm/test/Bitcode/compatibility.ll =================================================================== --- llvm/test/Bitcode/compatibility.ll +++ llvm/test/Bitcode/compatibility.ll @@ -548,6 +548,8 @@ ; CHECK: declare void @f.param.dereferenceable(i8* dereferenceable(4)) declare void @f.param.dereferenceable_or_null(i8* dereferenceable_or_null(4)) ; CHECK: declare void @f.param.dereferenceable_or_null(i8* dereferenceable_or_null(4)) +declare void @f.param.stack_align([2 x double] alignstack(16)) +; CHECK: declare void @f.param.stack_align([2 x double] alignstack(16)) ; Functions -- unnamed_addr and local_unnamed_addr declare void @f.unnamed_addr() unnamed_addr Index: llvm/lib/Target/AArch64/AArch64CallingConvention.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64CallingConvention.cpp +++ llvm/lib/Target/AArch64/AArch64CallingConvention.cpp @@ -43,11 +43,16 @@ const Align StackAlign = State.getMachineFunction().getDataLayout().getStackAlignment(); const Align OrigAlign(ArgFlags.getOrigAlign()); - const Align Align = std::min(OrigAlign, StackAlign); + const Align Alignment = std::min(OrigAlign, StackAlign); + + if (ArgFlags.getStackAlign()) { + const Align ArgStackAlign(ArgFlags.getStackAlign()); + State.AllocateStack(0, ArgStackAlign.value()); + } for (auto &It : PendingMembers) { It.convertToMem(State.AllocateStack( - Size, std::max((unsigned)Align.value(), SlotAlign))); + Size, std::max((unsigned)Alignment.value(), SlotAlign))); State.addLoc(It); SlotAlign = 1; } Index: llvm/lib/IR/Verifier.cpp =================================================================== --- llvm/lib/IR/Verifier.cpp +++ llvm/lib/IR/Verifier.cpp @@ -1523,7 +1523,6 @@ case Attribute::NoImplicitFloat: case Attribute::Naked: case Attribute::InlineHint: - case Attribute::StackAlignment: case Attribute::UWTable: case Attribute::NonLazyBind: case Attribute::ReturnsTwice: @@ -1560,7 +1559,8 @@ /// arguments. static bool isFuncOrArgAttr(Attribute::AttrKind Kind) { return Kind == Attribute::ReadOnly || Kind == Attribute::WriteOnly || - Kind == Attribute::ReadNone || Kind == Attribute::NoFree; + Kind == Attribute::ReadNone || Kind == Attribute::NoFree || + Kind == Attribute::StackAlignment; } void Verifier::verifyAttributeTypes(AttributeSet Attrs, bool IsFunction, Index: llvm/lib/IR/Function.cpp =================================================================== --- llvm/lib/IR/Function.cpp +++ llvm/lib/IR/Function.cpp @@ -131,6 +131,10 @@ return getParent()->getParamAlign(getArgNo()); } +MaybeAlign Argument::getParamStackAlign() const { + return getParent()->getParamStackAlign(getArgNo()); +} + Type *Argument::getParamByValType() const { assert(getType()->isPointerTy() && "Only pointers have byval types"); return getParent()->getParamByValType(getArgNo()); Index: llvm/lib/IR/Attributes.cpp =================================================================== --- llvm/lib/IR/Attributes.cpp +++ llvm/lib/IR/Attributes.cpp @@ -1392,6 +1392,10 @@ return getAttributes(ArgNo + FirstArgIndex).getAlignment(); } +MaybeAlign AttributeList::getParamStackAlignment(unsigned ArgNo) const { + return getAttributes(ArgNo + FirstArgIndex).getStackAlignment(); +} + Type *AttributeList::getParamByValType(unsigned Index) const { return getAttributes(Index+FirstArgIndex).getByValType(); } Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -115,6 +115,7 @@ IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf); IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError); Alignment = Call->getParamAlignment(ArgIdx); + StackAlignment = Call->getParamStackAlign(ArgIdx); ByValType = nullptr; if (Call->paramHasAttr(ArgIdx, Attribute::ByVal)) ByValType = Call->getParamByValType(ArgIdx); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -9315,6 +9315,8 @@ if (NeedsRegBlock) Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); + if (Args[i].StackAlignment) + Flags.setStackAlign(*Args[i].StackAlignment); MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), CLI.CallConv, VT); @@ -9826,6 +9828,8 @@ Flags.setCopyElisionCandidate(); if (Arg.hasAttribute(Attribute::Returned)) Flags.setReturned(); + if (Arg.getParamStackAlign()) + Flags.setStackAlign(*Arg.getParamStackAlign()); MVT RegisterVT = TLI->getRegisterTypeForCallingConv( *CurDAG->getContext(), F.getCallingConv(), VT); Index: llvm/lib/AsmParser/LLParser.cpp =================================================================== --- llvm/lib/AsmParser/LLParser.cpp +++ llvm/lib/AsmParser/LLParser.cpp @@ -1613,6 +1613,13 @@ B.addAlignmentAttr(Alignment); continue; } + case lltok::kw_alignstack: { + unsigned Alignment; + if (ParseOptionalStackAlignment(Alignment)) + return true; + B.addStackAlignmentAttr(Alignment); + continue; + } case lltok::kw_byval: { Type *Ty; if (ParseByValWithOptionalType(Ty)) @@ -1652,7 +1659,6 @@ case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break; case lltok::kw_immarg: B.addAttribute(Attribute::ImmArg); break; - case lltok::kw_alignstack: case lltok::kw_alwaysinline: case lltok::kw_argmemonly: case lltok::kw_builtin: Index: llvm/include/llvm/IR/InstrTypes.h =================================================================== --- llvm/include/llvm/IR/InstrTypes.h +++ llvm/include/llvm/IR/InstrTypes.h @@ -1603,6 +1603,11 @@ return Attrs.getParamAlignment(ArgNo); } + /// Extract the stack alignment for a call or parameter (0=unknown). + MaybeAlign getParamStackAlign(unsigned ArgNo) const { + return Attrs.getParamStackAlignment(ArgNo); + } + /// Extract the byval type for a call or parameter. Type *getParamByValType(unsigned ArgNo) const { Type *Ty = Attrs.getParamByValType(ArgNo); Index: llvm/include/llvm/IR/Function.h =================================================================== --- llvm/include/llvm/IR/Function.h +++ llvm/include/llvm/IR/Function.h @@ -447,6 +447,10 @@ return AttributeSets.getParamAlignment(ArgNo); } + MaybeAlign getParamStackAlign(unsigned ArgNo) const { + return AttributeSets.getParamStackAlignment(ArgNo); + } + /// Extract the byval type for a parameter. Type *getParamByValType(unsigned ArgNo) const { Type *Ty = AttributeSets.getParamByValType(ArgNo); Index: llvm/include/llvm/IR/Attributes.td =================================================================== --- llvm/include/llvm/IR/Attributes.td +++ llvm/include/llvm/IR/Attributes.td @@ -151,8 +151,8 @@ /// Sign extended before/after call. def SExt : EnumAttr<"signext">; -/// Alignment of stack for function (3 bits) stored as log2 of alignment with -/// +1 bias 0 means unaligned (different from alignstack=(1)). +/// Alignment of stack for function or argument (3 bits) stored as log2 of +/// alignment with +1 bias 0 means unaligned (different from alignstack=(1)). def StackAlignment : EnumAttr<"alignstack">; /// Function can be speculated. Index: llvm/include/llvm/IR/Attributes.h =================================================================== --- llvm/include/llvm/IR/Attributes.h +++ llvm/include/llvm/IR/Attributes.h @@ -617,6 +617,9 @@ /// Return the alignment for the specified function parameter. MaybeAlign getParamAlignment(unsigned ArgNo) const; + /// Return the stack alignment for the specified function parameter. + MaybeAlign getParamStackAlignment(unsigned ArgNo) const; + /// Return the byval type for the specified function parameter. Type *getParamByValType(unsigned ArgNo) const; Index: llvm/include/llvm/IR/Argument.h =================================================================== --- llvm/include/llvm/IR/Argument.h +++ llvm/include/llvm/IR/Argument.h @@ -83,6 +83,9 @@ /// If this is a byval or inalloca argument, return its alignment. MaybeAlign getParamAlign() const; + /// Return argument's stack alignment if specified + MaybeAlign getParamStackAlign() const; + /// If this is a byval argument, return its type. Type *getParamByValType() const; Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -278,13 +278,15 @@ bool IsSwiftSelf : 1; bool IsSwiftError : 1; bool IsCFGuardTarget : 1; + MaybeAlign StackAlignment; uint16_t Alignment = 0; Type *ByValType = nullptr; ArgListEntry() : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false), IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false), - IsSwiftSelf(false), IsSwiftError(false), IsCFGuardTarget(false) {} + IsSwiftSelf(false), IsSwiftError(false), IsCFGuardTarget(false), + StackAlignment() {} void setAttributes(const CallBase *Call, unsigned ArgIdx); Index: llvm/include/llvm/CodeGen/TargetCallingConv.h =================================================================== --- llvm/include/llvm/CodeGen/TargetCallingConv.h +++ llvm/include/llvm/CodeGen/TargetCallingConv.h @@ -44,6 +44,7 @@ unsigned IsSecArgPass : 1; ///< Second argument unsigned ByValAlign : 4; ///< Log 2 of byval alignment unsigned OrigAlign : 5; ///< Log 2 of original alignment + unsigned StackAlign : 3; ///< Log 2 of stack slot alignment unsigned IsInConsecutiveRegsLast : 1; unsigned IsInConsecutiveRegs : 1; unsigned IsCopyElisionCandidate : 1; ///< Argument copy elision candidate @@ -59,7 +60,7 @@ IsReturned(0), IsSplit(0), IsInAlloca(0), IsSplitEnd(0), IsSwiftSelf(0), IsSwiftError(0), IsCFGuardTarget(0), IsHva(0), IsHvaStart(0), IsSecArgPass(0), ByValAlign(0), OrigAlign(0), - IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0), + StackAlign(0), IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0), IsCopyElisionCandidate(0), IsPointer(0), ByValSize(0), PointerAddrSpace(0) { static_assert(sizeof(*this) == 3 * sizeof(unsigned), "flags are too big"); @@ -149,6 +150,15 @@ assert(getOrigAlign() == A.value() && "bitfield overflow"); } + unsigned getStackAlign() const { + MaybeAlign A = decodeMaybeAlign(StackAlign); + return A ? A->value() : 0; + } + void setStackAlign(Align A) { + StackAlign = encode(A); + assert(getStackAlign() == A.value() && "bitfield overflow"); + } + unsigned getByValSize() const { return ByValSize; } void setByValSize(unsigned S) { ByValSize = S; } Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -1216,6 +1216,15 @@ only valid on intrinsic declarations and cannot be applied to a call site or arbitrary function. +``alignstack(<n>)`` + This indicates the alignment that should be considered by the backend when + assigning this parameter to a stack slot during calling convention + lowering. The enforcement of the specified alignment is target-dependent, + as target-specific calling convention rules may override this value. This + attribute serves the purpose of carrying language specific alignment + information that is not mapped to base types in the backend (for example, + over-alignment specification through language attributes). + .. _gc: Garbage Collector Strategy Names Index: clang/test/CodeGen/arm64-arguments.c =================================================================== --- clang/test/CodeGen/arm64-arguments.c +++ clang/test/CodeGen/arm64-arguments.c @@ -196,7 +196,7 @@ typedef __attribute__((neon_vector_type(4))) float float32x4_t; float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) { -// CHECK: define <4 x float> @f35(i32 %i, [4 x float] %s1.coerce, [4 x float] %s2.coerce) +// CHECK: define <4 x float> @f35(i32 %i, [4 x float] alignstack(16) %s1.coerce, [4 x float] alignstack(16) %s2.coerce) // CHECK: %s1 = alloca %struct.s35, align 16 // CHECK: %s2 = alloca %struct.s35, align 16 // CHECK: %[[a:.*]] = bitcast %struct.s35* %s1 to <4 x float>* Index: clang/test/CodeGen/aarch64-args-hfa.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-args-hfa.c @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 -triple aarch64-none-eabi -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple arm64-apple-ios7.0 -target-abi darwinpcs -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-linux-gnu -emit-llvm -o - -x c %s | FileCheck %s + +// CHECK: %struct.hfa_align = type { [2 x double] } +typedef struct { + __attribute__((__aligned__(16))) double v[2]; +} hfa_align; + +// CHECK: define double @test_hfa_align_arg([2 x double] alignstack(16) %h.coerce) +double test_hfa_align_arg(hfa_align h) { + return h.v[0]; +} + +// CHECK: define double @test_hfa_align_call() +// CHECK: %call = call double @test_hfa_align_arg([2 x double] alignstack(16) %1) +double test_hfa_align_call() { + hfa_align h1 = {1.0, 2.0}; + return test_hfa_align_arg(h1); +} + Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -5233,8 +5233,12 @@ const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(Ty, Base, Members)) { + bool NeedsStackAlignment = getContext().getTypeAlignInChars(Ty) != + getContext().getTypeAlignInChars(Base); return ABIArgInfo::getDirect( - llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members)); + llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members), + /*Offset=*/0, /*Padding=*/nullptr, /*CanBeFlattened=*/true, + NeedsStackAlignment); } // Aggregates <= 16 bytes are passed directly in registers or on the stack. Index: clang/lib/CodeGen/CGCall.cpp =================================================================== --- clang/lib/CodeGen/CGCall.cpp +++ clang/lib/CodeGen/CGCall.cpp @@ -2123,6 +2123,9 @@ Attrs.addAttribute(llvm::Attribute::Nest); else if (AI.getInReg()) Attrs.addAttribute(llvm::Attribute::InReg); + if (AI.hasStackAlign()) + Attrs.addStackAlignmentAttr( + getContext().getTypeAlignInChars(ParamType).getQuantity()); break; case ABIArgInfo::Indirect: { Index: clang/include/clang/CodeGen/CGFunctionInfo.h =================================================================== --- clang/include/clang/CodeGen/CGFunctionInfo.h +++ clang/include/clang/CodeGen/CGFunctionInfo.h @@ -94,6 +94,7 @@ bool SRetAfterThis : 1; // isIndirect() bool InReg : 1; // isDirect() || isExtend() || isIndirect() bool CanBeFlattened: 1; // isDirect() + bool HasStackAlign: 1; // isDirect() bool SignExt : 1; // isExtend() bool canHavePaddingType() const { @@ -118,12 +119,14 @@ static ABIArgInfo getDirect(llvm::Type *T = nullptr, unsigned Offset = 0, llvm::Type *Padding = nullptr, - bool CanBeFlattened = true) { + bool CanBeFlattened = true, + bool HasStackAlign = false) { auto AI = ABIArgInfo(Direct); AI.setCoerceToType(T); AI.setPaddingType(Padding); AI.setDirectOffset(Offset); AI.setCanBeFlattened(CanBeFlattened); + AI.setStackAlign(HasStackAlign); return AI; } static ABIArgInfo getDirectInReg(llvm::Type *T = nullptr) { @@ -413,6 +416,15 @@ CanBeFlattened = Flatten; } + bool hasStackAlign() const { + return HasStackAlign; + } + + void setStackAlign(bool HasAlign) { + assert(isDirect() && "Invalid kind!"); + HasStackAlign = HasAlign; + } + void dump() const; };
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits