llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Haojian Wu (hokein) <details> <summary>Changes</summary> The lifetime bound warning in Clang currently only considers initializations. This patch extends the warning to include assignments. - **NFC refactoring (first commit)**: this moves the existing lifetime checking code from `SemaInit.cpp` to a new location for better code isolation and reuse. - **Support for assignments of built-in pointer types (second commit)**: this is done is by reusing the existing statement-local implementation. Clang now warns if the pointer is assigned to a temporary object that being destoryed at the end of the full assignment expression. With this patch, we will detect more cases under the on-by-default diagnostic `-Wdangling`. I have added a new category for this specific diagnostic so that people can temporarily disable it if their codebase is not yet clean. This is the first step to address #<!-- -->63310, focusing only on pointer types. Support for C++ assignment operators will come in a follow-up patch. --- Patch is 109.79 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/96475.diff 10 Files Affected: - (modified) clang/include/clang/Basic/DiagnosticGroups.td (+3-1) - (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (+5) - (modified) clang/lib/Sema/CMakeLists.txt (+1) - (added) clang/lib/Sema/CheckExprLifetime.cpp (+1285) - (added) clang/lib/Sema/CheckExprLifetime.h (+39) - (modified) clang/lib/Sema/SemaExpr.cpp (+4) - (modified) clang/lib/Sema/SemaInit.cpp (+3-1229) - (modified) clang/test/Parser/compound_literal.c (+3-2) - (modified) clang/test/SemaCXX/attr-lifetimebound.cpp (+6) - (modified) clang/test/SemaCXX/warn-dangling-local.cpp (+2) ``````````diff diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 9b37d4bd3205b..e828d0c459651 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -430,6 +430,7 @@ def LogicalOpParentheses: DiagGroup<"logical-op-parentheses">; def LogicalNotParentheses: DiagGroup<"logical-not-parentheses">; def ShiftOpParentheses: DiagGroup<"shift-op-parentheses">; def OverloadedShiftOpParentheses: DiagGroup<"overloaded-shift-op-parentheses">; +def DanglingAssignment: DiagGroup<"dangling-assignment">; def DanglingElse: DiagGroup<"dangling-else">; def DanglingField : DiagGroup<"dangling-field">; def DanglingInitializerList : DiagGroup<"dangling-initializer-list">; @@ -437,7 +438,8 @@ def DanglingGsl : DiagGroup<"dangling-gsl">; def ReturnStackAddress : DiagGroup<"return-stack-address">; // Name of this warning in GCC def : DiagGroup<"return-local-addr", [ReturnStackAddress]>; -def Dangling : DiagGroup<"dangling", [DanglingField, +def Dangling : DiagGroup<"dangling", [DanglingAssignment, + DanglingField, DanglingInitializerList, DanglingGsl, ReturnStackAddress]>; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 25a87078a5709..207529660b37b 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10092,6 +10092,11 @@ def warn_new_dangling_initializer_list : Warning< "the allocated initializer list}0 " "will be destroyed at the end of the full-expression">, InGroup<DanglingInitializerList>; +def warn_dangling_pointer_assignment : Warning< + "object backing the pointer %0 " + "will be destroyed at the end of the full-expression">, + InGroup<DanglingAssignment>; + def warn_unsupported_lifetime_extension : Warning< "lifetime extension of " "%select{temporary|backing array of initializer list}0 created " diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt index f152d243d39a5..980a83d4431aa 100644 --- a/clang/lib/Sema/CMakeLists.txt +++ b/clang/lib/Sema/CMakeLists.txt @@ -15,6 +15,7 @@ clang_tablegen(OpenCLBuiltins.inc -gen-clang-opencl-builtins add_clang_library(clangSema AnalysisBasedWarnings.cpp + CheckExprLifetime.cpp CodeCompleteConsumer.cpp DeclSpec.cpp DelayedDiagnostic.cpp diff --git a/clang/lib/Sema/CheckExprLifetime.cpp b/clang/lib/Sema/CheckExprLifetime.cpp new file mode 100644 index 0000000000000..73b3fd2d3a138 --- /dev/null +++ b/clang/lib/Sema/CheckExprLifetime.cpp @@ -0,0 +1,1285 @@ +//===--- CheckExprLifetime.cpp --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CheckExprLifetime.h" +#include "clang/AST/Expr.h" +#include "clang/Basic/DiagnosticSema.h" +#include "clang/Sema/Initialization.h" +#include "clang/Sema/Sema.h" +#include "llvm/ADT/PointerIntPair.h" + +namespace clang::sema { +namespace { +enum LifetimeKind { + /// The lifetime of a temporary bound to this entity ends at the end of the + /// full-expression, and that's (probably) fine. + LK_FullExpression, + + /// The lifetime of a temporary bound to this entity is extended to the + /// lifeitme of the entity itself. + LK_Extended, + + /// The lifetime of a temporary bound to this entity probably ends too soon, + /// because the entity is allocated in a new-expression. + LK_New, + + /// The lifetime of a temporary bound to this entity ends too soon, because + /// the entity is a return object. + LK_Return, + + /// The lifetime of a temporary bound to this entity ends too soon, because + /// the entity is the result of a statement expression. + LK_StmtExprResult, + + /// This is a mem-initializer: if it would extend a temporary (other than via + /// a default member initializer), the program is ill-formed. + LK_MemInitializer, +}; +using LifetimeResult = + llvm::PointerIntPair<const InitializedEntity *, 3, LifetimeKind>; +} + +/// Determine the declaration which an initialized entity ultimately refers to, +/// for the purpose of lifetime-extending a temporary bound to a reference in +/// the initialization of \p Entity. +static LifetimeResult getEntityLifetime( + const InitializedEntity *Entity, + const InitializedEntity *InitField = nullptr) { + // C++11 [class.temporary]p5: + switch (Entity->getKind()) { + case InitializedEntity::EK_Variable: + // The temporary [...] persists for the lifetime of the reference + return {Entity, LK_Extended}; + + + case InitializedEntity::EK_Member: + // For subobjects, we look at the complete object. + if (Entity->getParent()) + return getEntityLifetime(Entity->getParent(), Entity); + + // except: + // C++17 [class.base.init]p8: + // A temporary expression bound to a reference member in a + // mem-initializer is ill-formed. + // C++17 [class.base.init]p11: + // A temporary expression bound to a reference member from a + // default member initializer is ill-formed. + // + // The context of p11 and its example suggest that it's only the use of a + // default member initializer from a constructor that makes the program + // ill-formed, not its mere existence, and that it can even be used by + // aggregate initialization. + return {Entity, Entity->isDefaultMemberInitializer() ? LK_Extended + : LK_MemInitializer}; + + case InitializedEntity::EK_Binding: + // Per [dcl.decomp]p3, the binding is treated as a variable of reference + // type. + return {Entity, LK_Extended}; + + case InitializedEntity::EK_Parameter: + case InitializedEntity::EK_Parameter_CF_Audited: + // -- A temporary bound to a reference parameter in a function call + // persists until the completion of the full-expression containing + // the call. + return {nullptr, LK_FullExpression}; + + case InitializedEntity::EK_TemplateParameter: + // FIXME: This will always be ill-formed; should we eagerly diagnose it here? + return {nullptr, LK_FullExpression}; + + case InitializedEntity::EK_Result: + // -- The lifetime of a temporary bound to the returned value in a + // function return statement is not extended; the temporary is + // destroyed at the end of the full-expression in the return statement. + return {nullptr, LK_Return}; + + case InitializedEntity::EK_StmtExprResult: + // FIXME: Should we lifetime-extend through the result of a statement + // expression? + return {nullptr, LK_StmtExprResult}; + + case InitializedEntity::EK_New: + // -- A temporary bound to a reference in a new-initializer persists + // until the completion of the full-expression containing the + // new-initializer. + return {nullptr, LK_New}; + + case InitializedEntity::EK_Temporary: + case InitializedEntity::EK_CompoundLiteralInit: + case InitializedEntity::EK_RelatedResult: + // We don't yet know the storage duration of the surrounding temporary. + // Assume it's got full-expression duration for now, it will patch up our + // storage duration if that's not correct. + return {nullptr, LK_FullExpression}; + + case InitializedEntity::EK_ArrayElement: + // For subobjects, we look at the complete object. + return getEntityLifetime(Entity->getParent(), InitField); + + case InitializedEntity::EK_Base: + // For subobjects, we look at the complete object. + if (Entity->getParent()) + return getEntityLifetime(Entity->getParent(), InitField); + return {InitField, LK_MemInitializer}; + + case InitializedEntity::EK_Delegating: + // We can reach this case for aggregate initialization in a constructor: + // struct A { int &&r; }; + // struct B : A { B() : A{0} {} }; + // In this case, use the outermost field decl as the context. + return {InitField, LK_MemInitializer}; + + case InitializedEntity::EK_BlockElement: + case InitializedEntity::EK_LambdaToBlockConversionBlockElement: + case InitializedEntity::EK_LambdaCapture: + case InitializedEntity::EK_VectorElement: + case InitializedEntity::EK_ComplexElement: + return {nullptr, LK_FullExpression}; + + case InitializedEntity::EK_Exception: + // FIXME: Can we diagnose lifetime problems with exceptions? + return {nullptr, LK_FullExpression}; + + case InitializedEntity::EK_ParenAggInitMember: + // -- A temporary object bound to a reference element of an aggregate of + // class type initialized from a parenthesized expression-list + // [dcl.init, 9.3] persists until the completion of the full-expression + // containing the expression-list. + return {nullptr, LK_FullExpression}; + } + + llvm_unreachable("unknown entity kind"); +} + +namespace { +enum ReferenceKind { + /// Lifetime would be extended by a reference binding to a temporary. + RK_ReferenceBinding, + /// Lifetime would be extended by a std::initializer_list object binding to + /// its backing array. + RK_StdInitializerList, +}; + +/// A temporary or local variable. This will be one of: +/// * A MaterializeTemporaryExpr. +/// * A DeclRefExpr whose declaration is a local. +/// * An AddrLabelExpr. +/// * A BlockExpr for a block with captures. +using Local = Expr*; + +/// Expressions we stepped over when looking for the local state. Any steps +/// that would inhibit lifetime extension or take us out of subexpressions of +/// the initializer are included. +struct IndirectLocalPathEntry { + enum EntryKind { + DefaultInit, + AddressOf, + VarInit, + LValToRVal, + LifetimeBoundCall, + TemporaryCopy, + LambdaCaptureInit, + GslReferenceInit, + GslPointerInit + } Kind; + Expr *E; + union { + const Decl *D = nullptr; + const LambdaCapture *Capture; + }; + IndirectLocalPathEntry() {} + IndirectLocalPathEntry(EntryKind K, Expr *E) : Kind(K), E(E) {} + IndirectLocalPathEntry(EntryKind K, Expr *E, const Decl *D) + : Kind(K), E(E), D(D) {} + IndirectLocalPathEntry(EntryKind K, Expr *E, const LambdaCapture *Capture) + : Kind(K), E(E), Capture(Capture) {} +}; + +using IndirectLocalPath = llvm::SmallVectorImpl<IndirectLocalPathEntry>; + +struct RevertToOldSizeRAII { + IndirectLocalPath &Path; + unsigned OldSize = Path.size(); + RevertToOldSizeRAII(IndirectLocalPath &Path) : Path(Path) {} + ~RevertToOldSizeRAII() { Path.resize(OldSize); } +}; + +using LocalVisitor = llvm::function_ref<bool(IndirectLocalPath &Path, Local L, + ReferenceKind RK)>; +} // namespace + +static bool isVarOnPath(IndirectLocalPath &Path, VarDecl *VD) { + for (auto E : Path) + if (E.Kind == IndirectLocalPathEntry::VarInit && E.D == VD) + return true; + return false; +} + +static bool pathContainsInit(IndirectLocalPath &Path) { + return llvm::any_of(Path, [=](IndirectLocalPathEntry E) { + return E.Kind == IndirectLocalPathEntry::DefaultInit || + E.Kind == IndirectLocalPathEntry::VarInit; + }); +} + +static void visitLocalsRetainedByInitializer(IndirectLocalPath &Path, + Expr *Init, LocalVisitor Visit, + bool RevisitSubinits, + bool EnableLifetimeWarnings); + +static void visitLocalsRetainedByReferenceBinding(IndirectLocalPath &Path, + Expr *Init, ReferenceKind RK, + LocalVisitor Visit, + bool EnableLifetimeWarnings); + +template <typename T> static bool isRecordWithAttr(QualType Type) { + if (auto *RD = Type->getAsCXXRecordDecl()) + return RD->hasAttr<T>(); + return false; +} + +// Decl::isInStdNamespace will return false for iterators in some STL +// implementations due to them being defined in a namespace outside of the std +// namespace. +static bool isInStlNamespace(const Decl *D) { + const DeclContext *DC = D->getDeclContext(); + if (!DC) + return false; + if (const auto *ND = dyn_cast<NamespaceDecl>(DC)) + if (const IdentifierInfo *II = ND->getIdentifier()) { + StringRef Name = II->getName(); + if (Name.size() >= 2 && Name.front() == '_' && + (Name[1] == '_' || isUppercase(Name[1]))) + return true; + } + + return DC->isStdNamespace(); +} + +static bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee) { + if (auto *Conv = dyn_cast_or_null<CXXConversionDecl>(Callee)) + if (isRecordWithAttr<PointerAttr>(Conv->getConversionType())) + return true; + if (!isInStlNamespace(Callee->getParent())) + return false; + if (!isRecordWithAttr<PointerAttr>( + Callee->getFunctionObjectParameterType()) && + !isRecordWithAttr<OwnerAttr>(Callee->getFunctionObjectParameterType())) + return false; + if (Callee->getReturnType()->isPointerType() || + isRecordWithAttr<PointerAttr>(Callee->getReturnType())) { + if (!Callee->getIdentifier()) + return false; + return llvm::StringSwitch<bool>(Callee->getName()) + .Cases("begin", "rbegin", "cbegin", "crbegin", true) + .Cases("end", "rend", "cend", "crend", true) + .Cases("c_str", "data", "get", true) + // Map and set types. + .Cases("find", "equal_range", "lower_bound", "upper_bound", true) + .Default(false); + } else if (Callee->getReturnType()->isReferenceType()) { + if (!Callee->getIdentifier()) { + auto OO = Callee->getOverloadedOperator(); + return OO == OverloadedOperatorKind::OO_Subscript || + OO == OverloadedOperatorKind::OO_Star; + } + return llvm::StringSwitch<bool>(Callee->getName()) + .Cases("front", "back", "at", "top", "value", true) + .Default(false); + } + return false; +} + +static bool shouldTrackFirstArgument(const FunctionDecl *FD) { + if (!FD->getIdentifier() || FD->getNumParams() != 1) + return false; + const auto *RD = FD->getParamDecl(0)->getType()->getPointeeCXXRecordDecl(); + if (!FD->isInStdNamespace() || !RD || !RD->isInStdNamespace()) + return false; + if (!isRecordWithAttr<PointerAttr>(QualType(RD->getTypeForDecl(), 0)) && + !isRecordWithAttr<OwnerAttr>(QualType(RD->getTypeForDecl(), 0))) + return false; + if (FD->getReturnType()->isPointerType() || + isRecordWithAttr<PointerAttr>(FD->getReturnType())) { + return llvm::StringSwitch<bool>(FD->getName()) + .Cases("begin", "rbegin", "cbegin", "crbegin", true) + .Cases("end", "rend", "cend", "crend", true) + .Case("data", true) + .Default(false); + } else if (FD->getReturnType()->isReferenceType()) { + return llvm::StringSwitch<bool>(FD->getName()) + .Cases("get", "any_cast", true) + .Default(false); + } + return false; +} + +static void handleGslAnnotatedTypes(IndirectLocalPath &Path, Expr *Call, + LocalVisitor Visit) { + auto VisitPointerArg = [&](const Decl *D, Expr *Arg, bool Value) { + // We are not interested in the temporary base objects of gsl Pointers: + // Temp().ptr; // Here ptr might not dangle. + if (isa<MemberExpr>(Arg->IgnoreImpCasts())) + return; + // Once we initialized a value with a reference, it can no longer dangle. + if (!Value) { + for (const IndirectLocalPathEntry &PE : llvm::reverse(Path)) { + if (PE.Kind == IndirectLocalPathEntry::GslReferenceInit) + continue; + if (PE.Kind == IndirectLocalPathEntry::GslPointerInit) + return; + break; + } + } + Path.push_back({Value ? IndirectLocalPathEntry::GslPointerInit + : IndirectLocalPathEntry::GslReferenceInit, + Arg, D}); + if (Arg->isGLValue()) + visitLocalsRetainedByReferenceBinding(Path, Arg, RK_ReferenceBinding, + Visit, + /*EnableLifetimeWarnings=*/true); + else + visitLocalsRetainedByInitializer(Path, Arg, Visit, true, + /*EnableLifetimeWarnings=*/true); + Path.pop_back(); + }; + + if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Call)) { + const auto *MD = cast_or_null<CXXMethodDecl>(MCE->getDirectCallee()); + if (MD && shouldTrackImplicitObjectArg(MD)) + VisitPointerArg(MD, MCE->getImplicitObjectArgument(), + !MD->getReturnType()->isReferenceType()); + return; + } else if (auto *OCE = dyn_cast<CXXOperatorCallExpr>(Call)) { + FunctionDecl *Callee = OCE->getDirectCallee(); + if (Callee && Callee->isCXXInstanceMember() && + shouldTrackImplicitObjectArg(cast<CXXMethodDecl>(Callee))) + VisitPointerArg(Callee, OCE->getArg(0), + !Callee->getReturnType()->isReferenceType()); + return; + } else if (auto *CE = dyn_cast<CallExpr>(Call)) { + FunctionDecl *Callee = CE->getDirectCallee(); + if (Callee && shouldTrackFirstArgument(Callee)) + VisitPointerArg(Callee, CE->getArg(0), + !Callee->getReturnType()->isReferenceType()); + return; + } + + if (auto *CCE = dyn_cast<CXXConstructExpr>(Call)) { + const auto *Ctor = CCE->getConstructor(); + const CXXRecordDecl *RD = Ctor->getParent(); + if (CCE->getNumArgs() > 0 && RD->hasAttr<PointerAttr>()) + VisitPointerArg(Ctor->getParamDecl(0), CCE->getArgs()[0], true); + } +} + +static bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD) { + const TypeSourceInfo *TSI = FD->getTypeSourceInfo(); + if (!TSI) + return false; + // Don't declare this variable in the second operand of the for-statement; + // GCC miscompiles that by ending its lifetime before evaluating the + // third operand. See gcc.gnu.org/PR86769. + AttributedTypeLoc ATL; + for (TypeLoc TL = TSI->getTypeLoc(); + (ATL = TL.getAsAdjusted<AttributedTypeLoc>()); + TL = ATL.getModifiedLoc()) { + if (ATL.getAttrAs<LifetimeBoundAttr>()) + return true; + } + + // Assume that all assignment operators with a "normal" return type return + // *this, that is, an lvalue reference that is the same type as the implicit + // object parameter (or the LHS for a non-member operator$=). + OverloadedOperatorKind OO = FD->getDeclName().getCXXOverloadedOperator(); + if (OO == OO_Equal || isCompoundAssignmentOperator(OO)) { + QualType RetT = FD->getReturnType(); + if (RetT->isLValueReferenceType()) { + ASTContext &Ctx = FD->getASTContext(); + QualType LHST; + auto *MD = dyn_cast<CXXMethodDecl>(FD); + if (MD && MD->isCXXInstanceMember()) + LHST = Ctx.getLValueReferenceType(MD->getFunctionObjectParameterType()); + else + LHST = MD->getParamDecl(0)->getType(); + if (Ctx.hasSameType(RetT, LHST)) + return true; + } + } + + return false; +} + +static void visitLifetimeBoundArguments(IndirectLocalPath &Path, Expr *Call, + LocalVisitor Visit) { + const FunctionDecl *Callee; + ArrayRef<Expr*> Args; + + if (auto *CE = dyn_cast<CallExpr>(Call)) { + Callee = CE->getDirectCallee(); + Args = llvm::ArrayRef(CE->getArgs(), CE->getNumArgs()); + } else { + auto *CCE = cast<CXXConstructExpr>(Call); + Callee = CCE->getConstructor(); + Args = llvm::ArrayRef(CCE->getArgs(), CCE->getNumArgs()); + } + if (!Callee) + return; + + Expr *ObjectArg = nullptr; + if (isa<CXXOperatorCallExpr>(Call) && Callee->isCXXInstanceMember()) { + ObjectArg = Args[0]; + Args = Args.slice(1); + } else if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Call)) { + ObjectArg = MCE->getImplicitObjectArgument(); + } + + auto VisitLifetimeBoundArg = [&](const Decl *D, Expr *Arg) { + Path.push_back({IndirectLocalPathEntry::LifetimeBoundCall, Arg, D}); + if (Arg->isGLValue()) + visitLocalsRetainedByReferenceBinding(Path, Arg, RK_ReferenceBinding, + ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/96475 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits