https://github.com/usx95 created https://github.com/llvm/llvm-project/pull/142313
None >From c8f62770b164216bd67810a1035996180ec6c3d8 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena <u...@google.com> Date: Sun, 1 Jun 2025 15:44:37 +0000 Subject: [PATCH] Introduce Intra-procedural lifetime analysis in Clang --- .../clang/Analysis/Analyses/LifetimeSafety.h | 13 + clang/lib/Analysis/CMakeLists.txt | 1 + clang/lib/Analysis/LifetimeSafety.cpp | 708 ++++++++++++++++++ clang/lib/Sema/AnalysisBasedWarnings.cpp | 8 + clang/test/Sema/warn-lifetime-safety.cpp | 120 +++ llvm/include/llvm/ADT/ImmutableMap.h | 2 +- 6 files changed, 851 insertions(+), 1 deletion(-) create mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety.h create mode 100644 clang/lib/Analysis/LifetimeSafety.cpp create mode 100644 clang/test/Sema/warn-lifetime-safety.cpp diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h new file mode 100644 index 0000000000000..5b33d582f7278 --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h @@ -0,0 +1,13 @@ +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIME_SAFETY_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIME_SAFETY_H +#include "clang/AST/DeclBase.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +namespace clang { + +void runLifetimeAnalysis(const DeclContext &dc, const CFG &cfg, + AnalysisDeclContext &ac); + +} // namespace clang + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIME_SAFETY_H diff --git a/clang/lib/Analysis/CMakeLists.txt b/clang/lib/Analysis/CMakeLists.txt index 8cd3990db4c3e..0523d92480cb3 100644 --- a/clang/lib/Analysis/CMakeLists.txt +++ b/clang/lib/Analysis/CMakeLists.txt @@ -21,6 +21,7 @@ add_clang_library(clangAnalysis FixitUtil.cpp IntervalPartition.cpp IssueHash.cpp + LifetimeSafety.cpp LiveVariables.cpp MacroExpansionContext.cpp ObjCNoReturn.cpp diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp new file mode 100644 index 0000000000000..1124badcd9e2c --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -0,0 +1,708 @@ +#include "clang/Analysis/Analyses/LifetimeSafety.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/ADT/ImmutableSet.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/Support/Debug.h" +#include <vector> + +namespace clang { +namespace { + +constexpr char kLifetimeFacts[] = "LifetimeFacts"; +constexpr char kLifetimeDataflow[] = "LifetimeDataflow"; + +/// TODO: Why do we need this? +struct Point { + const clang::CFGBlock *Block; + // Index into Block->Elements(). + // Value is Block->size() if it's a point *after* the last element / before + // terminator. + unsigned ElementIndex; + + Point(const clang::CFGBlock *B = nullptr, unsigned Idx = 0) + : Block(B), ElementIndex(Idx) {} + + bool operator==(const Point &Other) const { + return Block == Other.Block && ElementIndex == Other.ElementIndex; + } +}; + +struct Path { + llvm::PointerUnion<const clang::ValueDecl *, const clang::Expr *> Ptr; + + enum class Kind : uint8_t { + StackVariableAddress, + HeapAllocationAddress, + FieldAddress, + ArrayElementAddress, + TemporaryObjectAddress, + StaticOrGlobalAddress, + StringLiteralAddress + }; + + Kind PathKind; + + Path(llvm::PointerUnion<const clang::ValueDecl *, const clang::Expr *> P, + Kind K) + : Ptr(P), PathKind(K) {} + /// TODO: Add accessors or other methods as needed +}; + +using LoanID = uint32_t; +using OriginID = uint32_t; + +struct LoanInfo { + /// TODO: Represent opaque loans. + LoanID ID; + Path SourcePath; + SourceLocation IssueLoc; + + LoanInfo(LoanID id, Path path, SourceLocation loc) + : ID(id), SourcePath(path), IssueLoc(loc) {} +}; + +enum class OriginKind : uint8_t { Variable, ExpressionResult }; + +struct OriginInfo { + OriginID ID; + OriginKind Kind; + /// TODO: Finalise this representation. Maybe + union { + const clang::ValueDecl *Decl; + const clang::Expr *Expression; + }; + OriginInfo(OriginID id, OriginKind kind, const clang::ValueDecl *D) + : ID(id), Kind(kind), Decl(D) {} + OriginInfo(OriginID id, OriginKind kind, const clang::Expr *E) + : ID(id), Kind(kind), Expression(E) {} +}; + +enum class FactKind : uint8_t { Issue, Expire, AssignOrigin, ReturnOfOrigin }; + +class LoanManager { +public: + LoanManager() = default; + + LoanID getNextLoanID() { return NextLoanIDVal++; } + + LoanInfo &addLoanInfo(LoanID id, Path path, SourceLocation loc) { + AllLoans.emplace_back(id, path, loc); + return AllLoans.back(); + } + + const LoanInfo *getLoanInfo(LoanID id) const { + if (id < AllLoans.size()) + return &AllLoans[id]; + return nullptr; + } + llvm::ArrayRef<LoanInfo> getLoanInfos() const { return AllLoans; } + +private: + LoanID NextLoanIDVal = 0; + std::vector<LoanInfo> AllLoans; +}; + +class OriginManager { +public: + OriginManager() = default; + + OriginID getNextOriginID() { return NextOriginIDVal++; } + OriginInfo &addOriginInfo(OriginID id, const clang::ValueDecl *D) { + assert(D != nullptr); + AllOrigins.emplace_back(id, OriginKind::Variable, D); + return AllOrigins.back(); + } + OriginInfo &addOriginInfo(OriginID id, const clang::Expr *E) { + assert(E != nullptr); + AllOrigins.emplace_back(id, OriginKind::ExpressionResult, E); + return AllOrigins.back(); + } + + OriginID getOrCreate(const Expr *E) { + // Canonicalize the expression by looking through parentheses and + // implicit casts. This helps ensure that syntactically different but + // semantically equivalent expressions (for our origin tracking purposes) + // map to the same OriginID. + auto It = ExprToOriginID.find(E); + if (It != ExprToOriginID.end()) + return It->second; + + // Reconsider this. We need to decide whether an Origin is being read or + // being assigned to. We consider reads as LValue to RValue casts. + // const Expr *CanonicalExpr = E->IgnoreCasts(); + + if (const auto *DRE = dyn_cast<DeclRefExpr>(E)) { + // Origin of DeclRefExpr is that of the declaration it refers to. + return getOrCreate(DRE->getDecl()); + } + + // It = ExprToOriginID.find(CanonicalExpr); + // if (It != ExprToOriginID.end()) + // return It->second; + OriginID NewID = getNextOriginID(); + addOriginInfo(NewID, E); // Store the canonical expression + ExprToOriginID[E] = NewID; + return NewID; + } + + const OriginInfo *getOriginInfo(OriginID id) const { + if (id < AllOrigins.size()) + return &AllOrigins[id]; + return nullptr; + } + + llvm::ArrayRef<OriginInfo> getOriginInfos() const { return AllOrigins; } + + OriginID getOrCreate(const ValueDecl *D) { + auto It = DeclToOriginID.find(D); + if (It != DeclToOriginID.end()) + return It->second; + OriginID NewID = getNextOriginID(); + addOriginInfo(NewID, D); + DeclToOriginID[D] = NewID; + return NewID; + } + +private: + OriginID NextOriginIDVal = 0; + std::vector<OriginInfo> AllOrigins; + llvm::DenseMap<const clang::ValueDecl *, OriginID> DeclToOriginID; + llvm::DenseMap<const clang::Expr *, OriginID> ExprToOriginID; +}; + +class Fact { + FactKind Kind; + +protected: + Point P; + Fact(FactKind K, Point Pt) : Kind(K), P(Pt) {} + +public: + virtual ~Fact() = default; + FactKind getKind() const { return Kind; } + Point getPoint() const { return P; } + + template <typename T> const T *getAs() const { + if (T::classof(this)) + return static_cast<const T *>(this); + return nullptr; + } + + virtual void dump(llvm::raw_ostream &OS) const { + OS << "Fact (Kind: " << static_cast<int>(Kind) << ", Point: B" + << P.Block->getBlockID() << ":" << P.ElementIndex << ")\n"; + } +}; + +class IssueFact : public Fact { + LoanID LID; + OriginID OID; + +public: + IssueFact(LoanID LID, OriginID OID, Point Pt) + : Fact(FactKind::Issue, Pt), LID(LID), OID(OID) {} + LoanID getLoanID() const { return LID; } + OriginID getOriginID() const { return OID; } + static bool classof(const Fact *F) { return F->getKind() == FactKind::Issue; } + void dump(llvm::raw_ostream &OS) const override { + OS << "Issue (LoanID: " << getLoanID() << ", OriginID: " << getOriginID() + << ")\n"; + } +}; + +class ExpireFact : public Fact { + LoanID LID; + +public: + ExpireFact(LoanID LID, Point Pt) : Fact(FactKind::Expire, Pt), LID(LID) {} + LoanID getLoanID() const { return LID; } + static bool classof(const Fact *F) { + return F->getKind() == FactKind::Expire; + } + void dump(llvm::raw_ostream &OS) const override { + OS << "Expire (LoanID: " << getLoanID() << ")\n"; + } +}; + +class AssignOriginFact : public Fact { + OriginID OIDDest; + OriginID OIDSrc; + +public: + AssignOriginFact(OriginID OIDDest, OriginID OIDSrc, Point Pt) + : Fact(FactKind::AssignOrigin, Pt), OIDDest(OIDDest), OIDSrc(OIDSrc) {} + OriginID getDestOriginID() const { return OIDDest; } + OriginID getSrcOriginID() const { return OIDSrc; } + static bool classof(const Fact *F) { + return F->getKind() == FactKind::AssignOrigin; + } + void dump(llvm::raw_ostream &OS) const override { + OS << "AssignOrigin (DestID: " << getDestOriginID() + << ", SrcID: " << getSrcOriginID() << ")\n"; + } +}; + +class ReturnOfOriginFact : public Fact { + OriginID OID; + +public: + ReturnOfOriginFact(OriginID OID, Point Pt) + : Fact(FactKind::ReturnOfOrigin, Pt), OID(OID) {} + OriginID getReturnedOriginID() const { return OID; } + static bool classof(const Fact *F) { + return F->getKind() == FactKind::ReturnOfOrigin; + } + void dump(llvm::raw_ostream &OS) const override { + OS << "ReturnOfOrigin (OriginID: " << getReturnedOriginID() << ")\n"; + } +}; + +class FactStore { +public: + llvm::ArrayRef<Fact *> getFacts(const CFGBlock *B) const { + auto It = BlockToFactsMap.find(B); + if (It != BlockToFactsMap.end()) + return It->second; + return {}; + } + + void addFact(const CFGBlock *B, Fact *NewFact) { + BlockToFactsMap[B].push_back(NewFact); + DEBUG_WITH_TYPE("dev", NewFact->dump(llvm::dbgs())); + } + + template <typename FactType, typename... Args> + FactType *createFact(Args &&...args) { + void *Mem = FactAllocator.Allocate<FactType>(); + return new (Mem) FactType(std::forward<Args>(args)...); + } + + void dump(const CFG &Cfg, AnalysisDeclContext &AC) const { + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << " Lifetime Analysis Facts:\n"; + llvm::dbgs() << "==========================================\n"; + if (const Decl *D = AC.getDecl()) { + if (const auto *ND = dyn_cast<NamedDecl>(D)) + llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n"; + } + ForwardDataflowWorklist worklist(Cfg, AC); + for (const CFGBlock *B : Cfg.const_nodes()) + worklist.enqueueBlock(B); + while (const CFGBlock *B = worklist.dequeue()) { + llvm::dbgs() << " Block B" << B->getBlockID() << ":\n"; + auto It = BlockToFactsMap.find(B); + if (It != BlockToFactsMap.end()) { + for (const Fact *F : It->second) { + llvm::dbgs() << " "; + F->dump(llvm::dbgs()); + } + } + llvm::dbgs() << " End of Block\n"; + } + } + +private: + /// TODO: Consider using BumpVector for performance if BlockToFactsMap becomes + // a bottleneck. For now, std::vector is simpler for managing Fact pointers, + // especially with custom deletion logic. + llvm::DenseMap<const clang::CFGBlock *, std::vector<Fact *>> BlockToFactsMap; + llvm::BumpPtrAllocator FactAllocator; +}; + +class FactsContext { +public: + /// TODO: Rename to Facts, Loans and Origins. + FactStore FMgr; + LoanManager LMgr; + OriginManager OMgr; +}; + +class FactGenerator : public ConstStmtVisitor<FactGenerator> { + using Base = ConstStmtVisitor<FactGenerator>; + +public: + FactGenerator(const CFG &Cfg, FactsContext &DCtx) + : DCtx(DCtx), Cfg(Cfg), CurrentBlock(nullptr) {} + + void generateFacts() { + for (const CFGBlock *Block : Cfg.const_nodes()) { + CurrentBlock = Block; + unsigned ElementIdx = 0; + for (const CFGElement &Element : *Block) { + CurrentPoint = Point(Block, ElementIdx); + DEBUG_WITH_TYPE("dev", Element.dumpToStream(llvm::dbgs())); + if (std::optional<CFGStmt> CS = Element.getAs<CFGStmt>()) { + DEBUG_WITH_TYPE("dev", CS->getStmt()->dumpColor()); + Visit(CS->getStmt()); + } else if (std::optional<CFGAutomaticObjDtor> DtorOpt = + Element.getAs<CFGAutomaticObjDtor>()) { + HandleDestructor(*DtorOpt); + } + /// TODO: Handle other CFGElements if necessary for specific facts. + ElementIdx++; + } + + // Handle Terminator. TODO: Why ? + if (const Stmt *Term = Block->getTerminatorStmt()) { + CurrentPoint = Point(Block, ElementIdx); // Point of the terminator + Visit(Term); + } + } + } + + void HandleDestructor(const CFGAutomaticObjDtor &DtorOpt) { + /// TODO: Also handle trivial destructors (e.g., for `int` + // variables) which will never have a CFGAutomaticObjDtor node. + /// TODO: Handle loans to temporaries. + const VarDecl *DestructedVD = DtorOpt.getVarDecl(); + if (!DestructedVD) + return; + // Iterate through all loans to see if any expire. + for (const LoanInfo &Loan : DCtx.LMgr.getLoanInfos()) { + const Path &LoanPath = Loan.SourcePath; + // Check if the loan is for a stack variable and if that variable + // is the one being destructed. + if (LoanPath.PathKind == Path::Kind::StackVariableAddress && + isa<const clang::ValueDecl *>(LoanPath.Ptr)) { + if (cast<const clang::ValueDecl *>(LoanPath.Ptr) == DestructedVD) { + DCtx.FMgr.addFact(CurrentBlock, DCtx.FMgr.createFact<ExpireFact>( + Loan.ID, CurrentPoint)); + } + } + } + } + + void VisitDeclStmt(const DeclStmt *DS) { + for (const Decl *D : DS->decls()) { + if (const auto *VD = dyn_cast<VarDecl>(D)) { + if (HasOrigin(VD->getType())) { + if (const Expr *InitExpr = VD->getInit()) { + OriginID DestOID = DCtx.OMgr.getOrCreate(VD); + OriginID SrcOID = DCtx.OMgr.getOrCreate(InitExpr); + DCtx.FMgr.addFact(CurrentBlock, + DCtx.FMgr.createFact<AssignOriginFact>( + DestOID, SrcOID, CurrentPoint)); + } + } + } + } + } + + void VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { + if (!HasOrigin(ICE->getType())) + return; + // An ImplicitCastExpr node itself gets an origin, which flows from the + // origin of its sub-expression (after stripping its own parens/casts). + if (ICE->getCastKind() == CK_LValueToRValue) { + OriginID IceOID = DCtx.OMgr.getOrCreate(ICE); + OriginID SubExprOID = DCtx.OMgr.getOrCreate(ICE->getSubExpr()); + DCtx.FMgr.addFact(CurrentBlock, DCtx.FMgr.createFact<AssignOriginFact>( + IceOID, SubExprOID, CurrentPoint)); + } + } + + void VisitUnaryOperator(const UnaryOperator *UO) { + /// TODO: Explain. + if (UO->getOpcode() == UO_AddrOf) { + const Expr *SubExpr = UO->getSubExpr(); + if (const auto *DRE = dyn_cast<DeclRefExpr>(SubExpr)) { + if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { + // Check if it's a local variable (stack-allocated). + // We might need a more robust check depending on the analysis's + // definition of "local". For now, hasLocalStorage() is a good + // start. + if (VD->hasLocalStorage()) { + OriginID OID = DCtx.OMgr.getOrCreate(UO); + Path AddrOfLocalVarPath(VD, Path::Kind::StackVariableAddress); + LoanID LID = DCtx.LMgr.getNextLoanID(); + DCtx.LMgr.addLoanInfo(LID, AddrOfLocalVarPath, + UO->getOperatorLoc()); + DCtx.FMgr.addFact(CurrentBlock, DCtx.FMgr.createFact<IssueFact>( + LID, OID, CurrentPoint)); + } + } + } + } + } + + void VisitReturnStmt(const ReturnStmt *RS) { + if (const Expr *RetExpr = RS->getRetValue()) { + if (HasOrigin(RetExpr->getType())) { + // Assuming getOrCreateOriginID can handle expressions directly + // or you have a way to map RetExpr to an existing OriginID. + OriginID OID = DCtx.OMgr.getOrCreate(RetExpr); + DCtx.FMgr.addFact( + CurrentBlock, + DCtx.FMgr.createFact<ReturnOfOriginFact>(OID, CurrentPoint)); + } + } + } + + void VisitBinaryOperator(const BinaryOperator *BO) { + if (BO->isAssignmentOp()) { + const Expr *LHSExpr = BO->getLHS(); + const Expr *RHSExpr = BO->getRHS(); + + // We are interested in assignments like `ptr1 = ptr2` or `ptr = &var` + // LHS must be a pointer/reference type that can be an origin. + // RHS must also represent an origin (either another pointer/ref or an + // address-of). + if (const auto *DRE_LHS = dyn_cast<DeclRefExpr>(LHSExpr)) { + if (const auto *VD_LHS = + dyn_cast<ValueDecl>(DRE_LHS->getDecl()->getCanonicalDecl()); + VD_LHS && HasOrigin(VD_LHS->getType())) { + OriginID DestOID = DCtx.OMgr.getOrCreate(VD_LHS); + OriginID SrcOID = DCtx.OMgr.getOrCreate(RHSExpr); + DCtx.FMgr.addFact(CurrentBlock, + DCtx.FMgr.createFact<AssignOriginFact>( + DestOID, SrcOID, CurrentPoint)); + } + } + } + } + +private: + /// TODO: Document. + // Check if a type have an origin. + bool HasOrigin(QualType QT) { return QT->isPointerOrReferenceType(); } + + FactsContext &DCtx; + const CFG &Cfg; + const CFGBlock *CurrentBlock; + Point CurrentPoint; +}; + +// ========================================================================= // +// The Dataflow Lattice +// ========================================================================= // + +// Using LLVM's immutable collections is efficient for dataflow analysis +// as it avoids deep copies during state transitions. +// TODO(opt): Consider using a bitset to represent the set of loans. +// TODO: Consider making this private to Lattice. +using LoanSet = llvm::ImmutableSet<LoanID>; +using OriginLoanMap = llvm::ImmutableMap<OriginID, LoanSet>; + +/// A context object to hold the factories for immutable collections, ensuring +/// that all created states share the same underlying memory management. +struct LifetimeAnalysisContext { + OriginLoanMap::Factory MapFact; + LoanSet::Factory SetFact; + + LoanSet CreateLoan(LoanID LID) { + return SetFact.add(SetFact.getEmptySet(), LID); + } +}; + +/// LifetimeLattice represents the state of our analysis at a given program +/// point. It is an immutable object, and all operations produce a new +/// instance rather than modifying the existing one. +struct LifetimeLattice { + // The map from an origin to the set of loans it contains. + OriginLoanMap Origins = OriginLoanMap(nullptr); + + explicit LifetimeLattice(const OriginLoanMap &S) : Origins(S) {} + LifetimeLattice() = default; + + bool operator==(const LifetimeLattice &Other) const { + return Origins == Other.Origins; + } + bool operator!=(const LifetimeLattice &Other) const { + return !(*this == Other); + } + + LoanSet getLoans(OriginID OID, LifetimeAnalysisContext &Ctx) const { + if (auto *Loans = Origins.lookup(OID)) + return *Loans; + return Ctx.SetFact.getEmptySet(); + } + + /// TODO: UpperCammelCase function names ? + LifetimeLattice merge(const LifetimeLattice &Other, + LifetimeAnalysisContext &Ctx) const { + /// TODO(opt): Prefer merging from a smaller state to a larger state + if (Origins.getHeight() < Other.Origins.getHeight()) + return Other.merge(*this, Ctx); + OriginLoanMap MergedState = Origins; + // For each origin in the other map, union its loan set with ours. + for (const auto &Entry : Other.Origins) { + OriginID OID = Entry.first; + LoanSet OtherLoanSet = Entry.second; + /// TODO(opt): Prefer merging from a smaller set to a larger set. + llvm::ImmutableSet<LoanID> UnionLoanSet = getLoans(OID, Ctx); + for (LoanID LID : OtherLoanSet) + UnionLoanSet = Ctx.SetFact.add(UnionLoanSet, LID); + MergedState = Ctx.MapFact.add(MergedState, OID, UnionLoanSet); + } + return LifetimeLattice(MergedState); + } + + /// Dumps the current state to a stream for debugging purposes. + void dump(llvm::raw_ostream &OS) const { + OS << "LifetimeLattice State:\n"; + if (Origins.isEmpty()) { + OS << " <empty>\n"; + return; + } + for (const auto &Entry : Origins) { + OS << " Origin " << Entry.first << " -> { "; + for (const LoanID &LID : Entry.second) { + OS << "L" << LID << " "; + } + OS << "}\n"; + } + } +}; + +// ========================================================================= // +// The Transfer Function +// ========================================================================= // +class Transferer { + FactStore &AllFacts; + LifetimeAnalysisContext &LAC; + +public: + explicit Transferer(FactStore &F, LifetimeAnalysisContext &LAC) + : AllFacts(F), LAC(LAC) {} + + /// Computes the exit state of a block by applying all its facts sequentially + /// to a given entry state. + /// TODO: We might need to store intermediate states per-fact in the block. + LifetimeLattice TransferBlock(const CFGBlock *Block, + LifetimeLattice EntryState) { + LifetimeLattice BlockState = EntryState; + llvm::ArrayRef<Fact *> Facts = AllFacts.getFacts(Block); + + for (Fact *F : Facts) { + BlockState = TransferFact(BlockState, F); + } + return BlockState; + } + +private: + LifetimeLattice TransferFact(LifetimeLattice In, const Fact *F) { + switch (F->getKind()) { + case FactKind::Issue: + return TransferIssue(In, cast<IssueFact>(F)); + case FactKind::AssignOrigin: + return TransferAssignOrigin(In, cast<AssignOriginFact>(F)); + // Expire and ReturnOfOrigin facts don't modify the Origins. + case FactKind::Expire: + case FactKind::ReturnOfOrigin: + return In; + } + llvm_unreachable("Unknown fact kind"); + } + + /// A new loan is issued to the origin. Old loans are erased. + LifetimeLattice TransferIssue(LifetimeLattice In, const IssueFact *F) { + OriginID OID = F->getOriginID(); + LoanID LID = F->getLoanID(); + DEBUG_WITH_TYPE("transfer", llvm::dbgs() << "Issue Fact: Origin " << OID + << " gets Loan " << LID << "\n"); + return LifetimeLattice( + LAC.MapFact.add(In.Origins, OID, LAC.CreateLoan(LID))); + } + + /// The destination origin's loan set is replaced by the source's. + /// This implicitly "resets" the old loans of the destination. + LifetimeLattice TransferAssignOrigin(LifetimeLattice InState, + const AssignOriginFact *F) { + OriginID DestOID = F->getDestOriginID(); + OriginID SrcOID = F->getSrcOriginID(); + LoanSet SrcLoans = InState.getLoans(SrcOID, LAC); + return LifetimeLattice(LAC.MapFact.add(InState.Origins, DestOID, SrcLoans)); + } +}; +// ========================================================================= // +// The Lifetime Analysis Driver +// ========================================================================= // + +/// TODO: Document. +class LifetimeAnalysis { + const CFG &Cfg; + AnalysisDeclContext &AC; + LifetimeAnalysisContext LAC; + + Transferer Xfer; + + /// TODO: doc. + llvm::DenseMap<const CFGBlock *, LifetimeLattice> BlockEntryStates; + llvm::DenseMap<const CFGBlock *, LifetimeLattice> BlockExitStates; + +public: + LifetimeAnalysis(const CFG &C, FactStore &FS, AnalysisDeclContext &AC) + : Cfg(C), AC(AC), Xfer(FS, LAC) {} + + void run() { + ForwardDataflowWorklist Worklist(Cfg, AC); + const CFGBlock *Entry = &Cfg.getEntry(); + BlockEntryStates[Entry] = LifetimeLattice{}; + Worklist.enqueueBlock(Entry); + while (const CFGBlock *B = Worklist.dequeue()) { + LifetimeLattice EntryState = getEntryState(B); + LifetimeLattice ExitState = Xfer.TransferBlock(B, EntryState); + BlockExitStates[B] = ExitState; + + for (const CFGBlock *Successor : B->succs()) { + auto SuccIt = BlockEntryStates.find(Successor); + LifetimeLattice OldSuccEntryState = (SuccIt != BlockEntryStates.end()) + ? SuccIt->second + : LifetimeLattice{}; + LifetimeLattice NewSuccEntryState = + OldSuccEntryState.merge(ExitState, LAC); + if (SuccIt == BlockEntryStates.end() || + NewSuccEntryState != OldSuccEntryState) { + BlockEntryStates[Successor] = NewSuccEntryState; + Worklist.enqueueBlock(Successor); + } + } + } + } + + void dump() const { + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << " Dataflow results:\n"; + llvm::dbgs() << "==========================================\n"; + const CFGBlock &B = Cfg.getExit(); + getExitState(&B).dump(llvm::dbgs()); + } + + LifetimeLattice getEntryState(const CFGBlock *B) const { + auto It = BlockEntryStates.find(B); + if (It != BlockEntryStates.end()) { + return It->second; + } + return LifetimeLattice{}; + } + + LifetimeLattice getExitState(const CFGBlock *B) const { + auto It = BlockExitStates.find(B); + if (It != BlockExitStates.end()) { + return It->second; + } + return LifetimeLattice{}; + } +}; + +} // anonymous namespace + +void runLifetimeAnalysis(const DeclContext &dc, const CFG &cfg, + AnalysisDeclContext &ac) { + FactsContext DCtx; + FactGenerator FGen(cfg, DCtx); + FGen.generateFacts(); + DEBUG_WITH_TYPE(kLifetimeFacts, DCtx.FMgr.dump(cfg, ac)); + + LifetimeAnalysis Analysis(cfg, DCtx.FMgr, ac); + Analysis.run(); + DEBUG_WITH_TYPE(kLifetimeDataflow, Analysis.dump()); +} +} // namespace clang \ No newline at end of file diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index d95844cfed614..dd4bede775a40 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -29,6 +29,7 @@ #include "clang/Analysis/Analyses/CFGReachabilityAnalysis.h" #include "clang/Analysis/Analyses/CalledOnceCheck.h" #include "clang/Analysis/Analyses/Consumed.h" +#include "clang/Analysis/Analyses/LifetimeSafety.h" #include "clang/Analysis/Analyses/ReachableCode.h" #include "clang/Analysis/Analyses/ThreadSafety.h" #include "clang/Analysis/Analyses/UninitializedValues.h" @@ -49,6 +50,7 @@ #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Debug.h" #include <algorithm> #include <deque> #include <iterator> @@ -2842,6 +2844,12 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( } } + DEBUG_WITH_TYPE( + "ExperimentalLifetimeAnalysis", if (S.getLangOpts().CPlusPlus) { + if (CFG *cfg = AC.getCFG()) { + runLifetimeAnalysis(*cast<DeclContext>(D), *cfg, AC); + } + }); // Check for violations of "called once" parameter properties. if (S.getLangOpts().ObjC && !S.getLangOpts().CPlusPlus && shouldAnalyzeCalledOnceParameters(Diags, D->getBeginLoc())) { diff --git a/clang/test/Sema/warn-lifetime-safety.cpp b/clang/test/Sema/warn-lifetime-safety.cpp new file mode 100644 index 0000000000000..92c499e143fd1 --- /dev/null +++ b/clang/test/Sema/warn-lifetime-safety.cpp @@ -0,0 +1,120 @@ +// RUN: %clang_cc1 -mllvm -debug-only=ExperimentalLifetimeAnalysis,LifetimeFacts,LifetimeDataflow -Wreturn-stack-address-cfg %s 2>&1 | FileCheck %s + +struct MyObj { + int id; + ~MyObj() {} // Non-trivial destructor +}; + +// Simple Local Variable Address and Return +// CHECK-LABEL: Function: return_local_addr +// CHECK-NEXT: Block B{{[0-9]+}}: +MyObj* return_local_addr() { + MyObj x {10}; + MyObj* p = &x; +// CHECK: Issue (LoanID: [[L_X:[0-9]+]], OriginID: [[O_ADDR_X:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_X]]) + return p; +// CHECK: AssignOrigin (DestID: [[O_P_RVAL:[0-9]+]], SrcID: [[O_P]]) +// CHECK: ReturnOfOrigin (OriginID: [[O_P_RVAL]]) +// CHECK: Expire (LoanID: [[L_X]]) +} +// CHECK-LABEL: Dataflow results: +// CHECK-DAG: Origin [[O_ADDR_X]] -> { L[[L_X]] } +// CHECK-DAG: Origin [[O_P]] -> { L[[L_X]] } +// CHECK-DAG: Origin [[O_P_RVAL]] -> { L[[L_X]] } + +// Pointer Assignment and Return +// CHECK-LABEL: Function: assign_and_return_local_addr +// CHECK-NEXT: Block B{{[0-9]+}}: +MyObj* assign_and_return_local_addr() { + MyObj y{20}; + MyObj* ptr1 = &y; +// CHECK: Issue (LoanID: [[L_Y:[0-9]+]], OriginID: [[O_ADDR_Y:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_PTR1:[0-9]+]], SrcID: [[O_ADDR_Y]]) + MyObj* ptr2 = ptr1; +// CHECK: AssignOrigin (DestID: [[O_PTR1_RVAL_1:[0-9]+]], SrcID: [[O_PTR1]]) +// CHECK: AssignOrigin (DestID: [[O_PTR2:[0-9]+]], SrcID: [[O_PTR1_RVAL_1]]) + ptr2 = ptr1; +// CHECK: AssignOrigin (DestID: [[O_PTR1_RVAL_2:[0-9]+]], SrcID: [[O_PTR1]]) +// CHECK: AssignOrigin (DestID: [[O_PTR2]], SrcID: [[O_PTR1_RVAL_2]]) + ptr2 = ptr2; // Self assignment. +// CHECK: AssignOrigin (DestID: [[O_PTR2_RVAL_1:[0-9]+]], SrcID: [[O_PTR2]]) +// CHECK: AssignOrigin (DestID: [[O_PTR2]], SrcID: [[O_PTR2_RVAL_1]]) + return ptr2; +// CHECK: AssignOrigin (DestID: [[O_PTR2_RVAL_2:[0-9]+]], SrcID: [[O_PTR2]]) +// CHECK: ReturnOfOrigin (OriginID: [[O_PTR2_RVAL_2]]) +// CHECK: Expire (LoanID: [[L_Y]]) +} +// CHECK-LABEL: Dataflow results: +// CHECK-DAG: Origin [[O_ADDR_Y]] -> { L[[L_Y]] } +// CHECK-DAG: Origin [[O_PTR1]] -> { L[[L_Y]] } +// CHECK-DAG: Origin [[O_PTR2]] -> { L[[L_Y]] } +// CHECK-DAG: Origin [[O_PTR1_RVAL_1]] -> { L[[L_Y]] } +// CHECK-DAG: Origin [[O_PTR1_RVAL_2]] -> { L[[L_Y]] } +// CHECK-DAG: Origin [[O_PTR2_RVAL_1]] -> { L[[L_Y]] } +// CHECK-DAG: Origin [[O_PTR2_RVAL_2]] -> { L[[L_Y]] } + + +// Return of Non-Pointer Type +// CHECK-LABEL: Function: return_int_val +// CHECK-NEXT: Block B{{[0-9]+}}: +int return_int_val() { + int x = 10; + return x; +} +// CHECK-NEXT: End of Block +// CHECK-LABEL: Dataflow results: +// CHECK: <empty> + + +// Loan Expiration (Automatic Variable, C++) +// CHECK-LABEL: Function: loan_expires_cpp +// CHECK-NEXT: Block B{{[0-9]+}}: +void loan_expires_cpp() { + MyObj obj{1}; + MyObj* pObj = &obj; +// CHECK: Issue (LoanID: [[L_OBJ:[0-9]+]], OriginID: [[O_ADDR_OBJ:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_POBJ:[0-9]+]], SrcID: [[O_ADDR_OBJ]]) +// CHECK: Expire (LoanID: [[L_OBJ]]) +} +// CHECK-LABEL: Dataflow results: +// CHECK-DAG: Origin [[O_ADDR_OBJ]] -> { L[[L_OBJ]] } +// CHECK-DAG: Origin [[O_POBJ]] -> { L[[L_OBJ]] } + + +// FIXME: No expire for Trivial Destructors +// CHECK-LABEL: Function: loan_expires_trivial +// CHECK-NEXT: Block B{{[0-9]+}}: +void loan_expires_trivial() { + int trivial_obj = 1; + int* pTrivialObj = &trivial_obj; +// CHECK: Issue (LoanID: [[L_TRIVIAL_OBJ:[0-9]+]], OriginID: [[O_ADDR_TRIVIAL_OBJ:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_PTOBJ:[0-9]+]], SrcID: [[O_ADDR_TRIVIAL_OBJ]]) +// CHECK-NOT: Expire (LoanID: [[L_TRIVIAL_OBJ]]) +// CHECK-NEXT: End of Block + // FIXME: Add check for Expire once trivial destructors are handled for expiration. +} + +// CHECK-LABEL: Dataflow results: +// CHECK-DAG: Origin [[O_ADDR_TRIVIAL_OBJ]] -> { L[[L_TRIVIAL_OBJ]] } +// CHECK-DAG: Origin [[O_PTOBJ]] -> { L[[L_TRIVIAL_OBJ]] } + +// CHECK-LABEL: Function: conditional +void conditional(bool condition) { + int a = 5; + int b = 10; + int* p = nullptr; + + if (condition) + p = &a; + // CHECK: Issue (LoanID: [[L_A:[0-9]+]], OriginID: [[O_ADDR_A:[0-9]+]]) + // CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_A]]) + else + p = &b; + // CHECK: Issue (LoanID: [[L_B:[0-9]+]], OriginID: [[O_ADDR_B:[0-9]+]]) + // CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_B]]) +} +// CHECK-LABEL: Dataflow results: +// CHECK-DAG: Origin [[O_ADDR_A]] -> { L[[L_A]] } +// CHECK-DAG: Origin [[O_ADDR_B]] -> { L[[L_B]] } +// CHECK-DAG: Origin [[O_P]] -> { L[[L_B]] L[[L_A]] } diff --git a/llvm/include/llvm/ADT/ImmutableMap.h b/llvm/include/llvm/ADT/ImmutableMap.h index 3d19ca41a5be0..593b557e0db11 100644 --- a/llvm/include/llvm/ADT/ImmutableMap.h +++ b/llvm/include/llvm/ADT/ImmutableMap.h @@ -93,7 +93,7 @@ class ImmutableMap { Factory(const Factory &) = delete; Factory &operator=(const Factory &) = delete; - ImmutableMap getEmptyMap() { return ImmutableMap(F.getEmptyTree()); } + ImmutableMap getEmptyMap() const { return ImmutableMap(F.getEmptyTree()); } [[nodiscard]] ImmutableMap add(ImmutableMap Old, key_type_ref K, data_type_ref D) { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits