https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/116391
>From 90daf9c544bcb776c8a68ad504ba5eda50eafe8a Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell <benjamin.maxw...@arm.com> Date: Fri, 15 Nov 2024 14:35:41 +0000 Subject: [PATCH 1/2] [clang][SME] Ignore flatten for callees mismatched streaming attributes If `__attribute__((flatten))` is used on a function don't inline any callees with incompatible streaming attributes. Without this check, clang may produce incorrect code when `flatten` is used in code with streaming functions. Note: The docs for flatten say it can be ignored when inlining is impossible: "causes calls within the attributed function to be inlined unless it is impossible to do so". --- clang/lib/CodeGen/CGCall.cpp | 11 ++- clang/lib/CodeGen/TargetInfo.h | 9 +++ clang/lib/CodeGen/Targets/AArch64.cpp | 64 +++++++++++++--- .../AArch64/sme-flatten-streaming-attrs.c | 74 +++++++++++++++++++ 4 files changed, 143 insertions(+), 15 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/sme-flatten-streaming-attrs.c diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 8f4f5d3ed81601..b8a968fdf4e9eb 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5112,9 +5112,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Some architectures (such as x86-64) have the ABI changed based on // attribute-target/features. Give them a chance to diagnose. - CGM.getTargetCodeGenInfo().checkFunctionCallABI( - CGM, Loc, dyn_cast_or_null<FunctionDecl>(CurCodeDecl), - dyn_cast_or_null<FunctionDecl>(TargetDecl), CallArgs, RetTy); + const FunctionDecl *CallerDecl = dyn_cast_or_null<FunctionDecl>(CurCodeDecl); + const FunctionDecl *CalleeDecl = dyn_cast_or_null<FunctionDecl>(TargetDecl); + CGM.getTargetCodeGenInfo().checkFunctionCallABI(CGM, Loc, CallerDecl, + CalleeDecl, CallArgs, RetTy); // 1. Set up the arguments. @@ -5705,7 +5706,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // FIXME: should this really take priority over __try, below? if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() && !InNoInlineAttributedStmt && - !(TargetDecl && TargetDecl->hasAttr<NoInlineAttr>())) { + !(TargetDecl && TargetDecl->hasAttr<NoInlineAttr>()) && + !CGM.getTargetCodeGenInfo().wouldInliningViolateFunctionCallABI( + CallerDecl, CalleeDecl)) { Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline); } diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index 373f8b8a80fdb1..23ff476b0e33ce 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -98,6 +98,15 @@ class TargetCodeGenInfo { const CallArgList &Args, QualType ReturnType) const {} + /// Returns true if inlining the function call would produce incorrect code + /// for the current target and should be ignored (even with the always_inline + /// or flatten attributes). + virtual bool + wouldInliningViolateFunctionCallABI(const FunctionDecl *Caller, + const FunctionDecl *Callee) const { + return false; + } + /// Determines the size of struct _Unwind_Exception on this platform, /// in 8-bit units. The Itanium ABI defines this as: /// struct _Unwind_Exception { diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index 9320c6ef06efab..a9ea84b6575f92 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -177,6 +177,9 @@ class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { const FunctionDecl *Callee, const CallArgList &Args, QualType ReturnType) const override; + bool wouldInliningViolateFunctionCallABI( + const FunctionDecl *Caller, const FunctionDecl *Callee) const override; + private: // Diagnose calls between functions with incompatible Streaming SVE // attributes. @@ -1143,12 +1146,20 @@ void AArch64TargetCodeGenInfo::checkFunctionABI( } } -void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming( - CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller, - const FunctionDecl *Callee) const { - if (!Caller || !Callee || !Callee->hasAttr<AlwaysInlineAttr>()) - return; +enum class ArmStreamingInlinability : uint8_t { + Ok = 0, + IncompatibleStreamingModes = 1, + MismatchedStreamingCompatibility = 1 << 1, + CalleeHasNewZA = 1 << 2, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/CalleeHasNewZA), +}; +/// Determines if there are any streaming ABI issues with inlining \p Callee +/// into \p Caller. Returns the issues in the ArmStreamingInlinability bit enum +/// (multiple bits can be set). +static ArmStreamingInlinability +GetArmStreamingInlinability(const FunctionDecl *Caller, + const FunctionDecl *Callee) { bool CallerIsStreaming = IsArmStreamingFunction(Caller, /*IncludeLocallyStreaming=*/true); bool CalleeIsStreaming = @@ -1156,17 +1167,41 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming( bool CallerIsStreamingCompatible = isStreamingCompatible(Caller); bool CalleeIsStreamingCompatible = isStreamingCompatible(Callee); + ArmStreamingInlinability Inlinability = ArmStreamingInlinability::Ok; + if (!CalleeIsStreamingCompatible && - (CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible)) + (CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible)) { + Inlinability |= ArmStreamingInlinability::MismatchedStreamingCompatibility; + if (CalleeIsStreaming) + Inlinability |= ArmStreamingInlinability::IncompatibleStreamingModes; + } + if (auto *NewAttr = Callee->getAttr<ArmNewAttr>()) + if (NewAttr->isNewZA()) + Inlinability |= ArmStreamingInlinability::CalleeHasNewZA; + + return Inlinability; +} + +void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming( + CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller, + const FunctionDecl *Callee) const { + if (!Caller || !Callee || !Callee->hasAttr<AlwaysInlineAttr>()) + return; + + ArmStreamingInlinability Inlinability = + GetArmStreamingInlinability(Caller, Callee); + + if (bool(Inlinability & + ArmStreamingInlinability::MismatchedStreamingCompatibility)) CGM.getDiags().Report( - CallLoc, CalleeIsStreaming + CallLoc, bool(Inlinability & + ArmStreamingInlinability::IncompatibleStreamingModes) ? diag::err_function_always_inline_attribute_mismatch : diag::warn_function_always_inline_attribute_mismatch) << Caller->getDeclName() << Callee->getDeclName() << "streaming"; - if (auto *NewAttr = Callee->getAttr<ArmNewAttr>()) - if (NewAttr->isNewZA()) - CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za) - << Callee->getDeclName(); + if (bool(Inlinability & ArmStreamingInlinability::CalleeHasNewZA)) + CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za) + << Callee->getDeclName(); } // If the target does not have floating-point registers, but we are using a @@ -1200,6 +1235,13 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM, checkFunctionCallABISoftFloat(CGM, CallLoc, Caller, Callee, Args, ReturnType); } +bool AArch64TargetCodeGenInfo::wouldInliningViolateFunctionCallABI( + const FunctionDecl *Caller, const FunctionDecl *Callee) const { + return Caller && Callee && + GetArmStreamingInlinability(Caller, Callee) != + ArmStreamingInlinability::Ok; +} + void AArch64ABIInfo::appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index, raw_ostream &Out) const { diff --git a/clang/test/CodeGen/AArch64/sme-flatten-streaming-attrs.c b/clang/test/CodeGen/AArch64/sme-flatten-streaming-attrs.c new file mode 100644 index 00000000000000..33ff8f33ca43f3 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sme-flatten-streaming-attrs.c @@ -0,0 +1,74 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme %s -o - | FileCheck %s + +// REQUIRES: aarch64-registered-target + +extern void was_inlined(void); + +#define __flatten __attribute__((flatten)) +void fn(void) { was_inlined(); } +void fn_streaming_compatible(void) __arm_streaming_compatible { was_inlined(); } +void fn_streaming(void) __arm_streaming { was_inlined(); } +__arm_locally_streaming void fn_locally_streaming(void) { was_inlined(); } +__arm_new("za") void fn_streaming_new_za(void) __arm_streaming { was_inlined(); } + +__flatten +void caller(void) { + fn(); + fn_streaming_compatible(); + fn_streaming(); + fn_locally_streaming(); + fn_streaming_new_za(); +} +// CHECK-LABEL: void @caller() +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @was_inlined +// CHECK-NEXT: call void @was_inlined +// CHECK-NEXT: call void @fn_streaming +// CHECK-NEXT: call void @fn_locally_streaming +// CHECK-NEXT: call void @fn_streaming_new_za + +__flatten void caller_streaming_compatible(void) __arm_streaming_compatible { + fn(); + fn_streaming_compatible(); + fn_streaming(); + fn_locally_streaming(); + fn_streaming_new_za(); +} +// CHECK-LABEL: void @caller_streaming_compatible() +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @fn +// CHECK-NEXT: call void @was_inlined +// CHECK-NEXT: call void @fn_streaming +// CHECK-NEXT: call void @fn_locally_streaming +// CHECK-NEXT: call void @fn_streaming_new_za + +__flatten void caller_streaming(void) __arm_streaming { + fn(); + fn_streaming_compatible(); + fn_streaming(); + fn_locally_streaming(); + fn_streaming_new_za(); +} +// CHECK-LABEL: void @caller_streaming() +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @fn +// CHECK-NEXT: call void @was_inlined +// CHECK-NEXT: call void @was_inlined +// CHECK-NEXT: call void @was_inlined +// CHECK-NEXT: call void @fn_streaming_new_za + +__flatten __arm_locally_streaming +void caller_locally_streaming(void) { + fn(); + fn_streaming_compatible(); + fn_streaming(); + fn_locally_streaming(); + fn_streaming_new_za(); +} +// CHECK-LABEL: void @caller_locally_streaming() +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @fn +// CHECK-NEXT: call void @was_inlined +// CHECK-NEXT: call void @was_inlined +// CHECK-NEXT: call void @was_inlined +// CHECK-NEXT: call void @fn_streaming_new_za >From cbbe0397324f4e6dec4b6e6a802be37370462e56 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell <benjamin.maxw...@arm.com> Date: Thu, 21 Nov 2024 12:03:03 +0000 Subject: [PATCH 2/2] Add note to `TargetInfo.h` --- clang/lib/CodeGen/TargetInfo.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index 23ff476b0e33ce..75d70b0116acd2 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -101,6 +101,15 @@ class TargetCodeGenInfo { /// Returns true if inlining the function call would produce incorrect code /// for the current target and should be ignored (even with the always_inline /// or flatten attributes). + /// + /// Note: This probably should be handled in LLVM. However, the `alwaysinline` + /// attribute currently means the inliner will ignore mismatched attributes + /// (which sometimes can generate invalid code). So, this hook allows targets + /// to avoid adding the `alwaysinline` attributes based on attributes or other + /// target-specific reasons. + /// + /// See previous discussion here: + /// https://discourse.llvm.org/t/rfc-avoid-inlining-alwaysinline-functions-when-they-cannot-be-inlined/79528 virtual bool wouldInliningViolateFunctionCallABI(const FunctionDecl *Caller, const FunctionDecl *Callee) const { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits