https://github.com/usx95 created https://github.com/llvm/llvm-project/pull/124133
None >From 22990789b61e9f9d22e88a6b008eb3166fd1cb56 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena <u...@google.com> Date: Thu, 23 Jan 2025 15:47:39 +0000 Subject: [PATCH] [experimental] Detect return-stack-addr using CFG --- .../Analysis/Analyses/DanglingReference.h | 14 + clang/include/clang/Basic/DiagnosticGroups.td | 3 + .../clang/Basic/DiagnosticSemaKinds.td | 8 + clang/lib/Analysis/CMakeLists.txt | 1 + clang/lib/Analysis/DanglingReference.cpp | 351 ++++++++++++++++++ clang/lib/Sema/AnalysisBasedWarnings.cpp | 8 + .../test/Sema/warn-lifetime-analysis-cfg.cpp | 136 +++++++ 7 files changed, 521 insertions(+) create mode 100644 clang/include/clang/Analysis/Analyses/DanglingReference.h create mode 100644 clang/lib/Analysis/DanglingReference.cpp create mode 100644 clang/test/Sema/warn-lifetime-analysis-cfg.cpp diff --git a/clang/include/clang/Analysis/Analyses/DanglingReference.h b/clang/include/clang/Analysis/Analyses/DanglingReference.h new file mode 100644 index 00000000000000..c9f5753eed070e --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/DanglingReference.h @@ -0,0 +1,14 @@ +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_DANGLING_REFERENCE_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_DANGLING_REFERENCE_H +#include "clang/AST/DeclBase.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/Sema/Sema.h" + +namespace clang { +void runDanglingReferenceAnalysis(const DeclContext &dc, const CFG &cfg, + AnalysisDeclContext &ac, Sema &S); + +} // namespace clang + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_DANGLING_REFERENCE_H diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 594e99a19b64d6..eeddd6eb82a301 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -472,6 +472,9 @@ def Dangling : DiagGroup<"dangling", [DanglingAssignment, DanglingInitializerList, DanglingGsl, ReturnStackAddress]>; +def ReturnStackAddressCFG : DiagGroup<"return-stack-address-cfg">; +def DanglingCFG : DiagGroup<"dangling-cfg", [ReturnStackAddressCFG]>; + def DistributedObjectModifiers : DiagGroup<"distributed-object-modifiers">; def DllexportExplicitInstantiationDecl : DiagGroup<"dllexport-explicit-instantiation-decl">; def ExcessInitializers : DiagGroup<"excess-initializers">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 8be4f946dce1cc..846cf6b3d45f8a 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10169,6 +10169,14 @@ def err_lifetimebound_implicit_object_parameter_void_return_type : Error< "parameter of a function that returns void; " "did you mean 'lifetime_capture_by(X)'">; +// CFG based lifetime analysis. +def warn_ret_stack_variable_ref_cfg : Warning< + "returning reference to a stack variable">, InGroup<ReturnStackAddressCFG>, DefaultIgnore; +def note_local_variable_declared_here : Note<"reference to this stack variable is returned">; + +def warn_ret_stack_temporary_ref_cfg : Warning< + "returning reference to a temporary object">, InGroup<ReturnStackAddressCFG>, DefaultIgnore; + // CHECK: returning address/reference of stack memory def warn_ret_stack_addr_ref : Warning< "%select{address of|reference to}0 stack memory associated with " diff --git a/clang/lib/Analysis/CMakeLists.txt b/clang/lib/Analysis/CMakeLists.txt index 7914c12d429ef9..d6ea1e907e7f09 100644 --- a/clang/lib/Analysis/CMakeLists.txt +++ b/clang/lib/Analysis/CMakeLists.txt @@ -16,6 +16,7 @@ add_clang_library(clangAnalysis ConstructionContext.cpp Consumed.cpp CodeInjector.cpp + DanglingReference.cpp Dominators.cpp ExprMutationAnalyzer.cpp IntervalPartition.cpp diff --git a/clang/lib/Analysis/DanglingReference.cpp b/clang/lib/Analysis/DanglingReference.cpp new file mode 100644 index 00000000000000..2602cc597a36b8 --- /dev/null +++ b/clang/lib/Analysis/DanglingReference.cpp @@ -0,0 +1,351 @@ +#include "clang/Analysis/Analyses/DanglingReference.h" +#include "clang/AST/Attrs.inc" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclVisitor.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/CFG.h" +#include "clang/Basic/DiagnosticSema.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Support/raw_ostream.h" +#include <sstream> + +namespace clang { +namespace { + +template <typename T> static bool isRecordWithAttr(QualType Type) { + auto *RD = Type->getAsCXXRecordDecl(); + if (!RD) + return false; + bool Result = RD->hasAttr<T>(); + + if (auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(RD)) + Result |= CTSD->getSpecializedTemplate()->getTemplatedDecl()->hasAttr<T>(); + + return Result; +} +bool isOwner(const Expr *E) { + return isRecordWithAttr<OwnerAttr>(E->getType()); +} +bool isOwner(const Decl *D) { + return isa<ValueDecl>(D) && + isRecordWithAttr<OwnerAttr>(dyn_cast<ValueDecl>(D)->getType()); +} +bool isPointer(const Expr *E) { + return isRecordWithAttr<PointerAttr>(E->getType()); +} +bool isPointer(const Decl *D) { + return isa<ValueDecl>(D) && + isRecordWithAttr<PointerAttr>(dyn_cast<ValueDecl>(D)->getType()); +} + +struct MemoryLoc { + enum MemoryType { + EMPTY, // Pointer is null. + STACK, // Pointer points to something on stack. + UNKNOWN, // Pointer points to an unknown entity. + } Loc; + // Details of stack location. + const Decl *D = nullptr; + const Expr *MTE = nullptr; + + bool IsEmpty() { return Loc == EMPTY; } + bool IsOnStack() { return Loc == STACK; } + bool IsUnkown() { return Loc == UNKNOWN; } + + const Decl *getDecl() { return D; } + const Expr *getExpr() { return MTE; } + + static MemoryLoc Unknown() { return {UNKNOWN, nullptr, nullptr}; } + static MemoryLoc Empty() { return {EMPTY, nullptr, nullptr}; } + static MemoryLoc VarOnStack(const Decl *D) { return {STACK, D, nullptr}; } + static MemoryLoc Temporary(const Expr *MTE) { return {STACK, nullptr, MTE}; } + + std::string str() { + std::ostringstream os; + switch (Loc) { + case EMPTY: + os << "Empty"; + break; + case UNKNOWN: + os << "Unknown"; + break; + case STACK: + os << "Stack"; + if (auto *VD = dyn_cast_or_null<VarDecl>(D)) + os << " \"" << VD->getName().str() << "\""; + if (MTE) + os << " (temporary)"; + break; + } + return os.str(); + } +}; + +class PointsToTracker : public ConstStmtVisitor<PointsToTracker> { +public: + void Handle(const Stmt *S) { + if (auto *E = dyn_cast<Expr>(S); + E && ExprPointsTo.find(E) != ExprPointsTo.end()) + return; + Visit(S); + } + + void MaybeInitaliseDecl(const Decl *D) { + if (!D) + return; + auto *VD = dyn_cast<VarDecl>(D); + if (!VD) + return; + // Initialise the pointer if we are seeing it for the first time. + if (isPointer(VD)) { + if (DeclPointsTo.find(D) == DeclPointsTo.end()) + UpdatePointer(VD, ResolveExpr(VD->getInit())); + } + if (isOwner(VD)) { + if (StackDecls.find(D) == StackDecls.end() && VD->hasLocalStorage()) + AddToStack(VD); + } + } + + // Merge above and below in VisitVarDecl !! + void VisitDeclStmt(const DeclStmt *DS) { + MaybeInitaliseDecl(DS->getSingleDecl()); + } + + void VisitDeclRefExpr(const DeclRefExpr *DRE) { + SetExprPointer(DRE, DRE->getDecl()); + } + + void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *MTE) { + // Ignore MTE of pointer types. + if (isPointer(MTE)) { + Handle(MTE->getSubExpr()); + SetExprPointer(MTE, MTE->getSubExpr()); + } + if (isOwner(MTE)) { + // We have a temporary owner on stack. + AddToStack(MTE); + } + } + + void VisitImplicitCastExpr(const ImplicitCastExpr *E) { + auto *SE = E->IgnoreImpCasts(); + Handle(SE); + SetExprPointer(E, SE); + } + + void VisitExprWithCleanups(const ExprWithCleanups *E) { + // Handle(E->getSubExpr()); + SetExprPointer(E, E->getSubExpr()); + } + + void VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) { + // Conversion from Owner to a Pointer. + const Expr *ConversionFrom = MCE->IgnoreConversionOperatorSingleStep(); + if (ConversionFrom != MCE) { + if (isOwner(ConversionFrom) && isPointer(MCE)) { + SetExprPointer(MCE, ConversionFrom); + } + } + } + + void VisitCXXConstructExpr(const CXXConstructExpr *CCE) { + if (!isPointer(CCE)) + return; + if (CCE->getNumArgs() == 1) + SetExprPointer(CCE, CCE->getArg(0)); + } + + void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE) { + if (OCE->isAssignmentOp()) { + assert(OCE->getNumArgs() == 2); + Handle(OCE->getArg(0)); + Handle(OCE->getArg(1)); + HandleAssignment(OCE->getArg(0), OCE->getArg(1)); + } + } + +private: + // Returns the Decl that is aliased by this expression. + const Decl *DeclReferencedBy(const Expr *E) { + if (auto *DRE = dyn_cast<DeclRefExpr>(E)) { + return DRE->getDecl(); + } + return nullptr; + } + + void HandleAssignment(const Expr *A, const Expr *B) { + if (!isPointer(A)) + return; + + if (const Decl *PointerDecl = DeclReferencedBy(A)) + UpdatePointer(PointerDecl, B); + } + + // Update the contents of a Pointer. + void UpdatePointer(const Decl *PointerD, MemoryLoc ML) { + assert(isPointer(PointerD)); + DeclPointsTo[PointerD] = ML; + } + void UpdatePointer(const Decl *PointerD, const Expr *A) { + UpdatePointer(PointerD, ResolveExpr(A)); + } + + void SetExprPointer(const Expr *E, MemoryLoc ML) { + assert(ExprPointsTo.insert({E, ML}).second); + } + void SetExprPointer(const Expr *E, const Expr *PointeeE) { + SetExprPointer(E, ResolveExpr(PointeeE)); + } + void SetExprPointer(const Expr *E, const Decl *D) { + SetExprPointer(E, ResolveDecl(D)); + } + +public: + MemoryLoc ResolveExpr(const Expr *E) { + if (!E) + return MemoryLoc::Empty(); + Handle(E); + auto Res = ExprPointsTo.find(E); + if (Res != ExprPointsTo.end()) { + return Res->getSecond(); + } + return ExprPointsTo[E] = MemoryLoc::Unknown(); + } + + // Returns the memory location pointed to by D. If D is a pointer-type, + // returns the memory pointed to by the pointer. + MemoryLoc ResolveDecl(const Decl *D) { + MaybeInitaliseDecl(D); + if (isPointer(D)) + return ResolvePointer(D); + if (isOwner(D)) + return ResolveOwner(D); + return MemoryLoc::Unknown(); + } + + MemoryLoc ResolvePointer(const Decl *D) { + auto *VD = dyn_cast<VarDecl>(D); + assert(VD); + if (!VD->hasLocalStorage()) + return MemoryLoc::Unknown(); + assert(isPointer(D)); + auto Res = DeclPointsTo.find(D); + assert(Res != DeclPointsTo.end()); + return Res->getSecond(); + } + + MemoryLoc ResolveOwner(const Decl *D) { + assert(isOwner(D)); + if (IsOnStack(D)) + return MemoryLoc::VarOnStack(D); + return MemoryLoc::Unknown(); + } + + void AddToStack(const Decl *D) { + assert(isOwner(D)); + StackDecls.insert(D); + } + void AddToStack(const Expr *E) { + assert(isa<MaterializeTemporaryExpr>(E)); + assert(isOwner(E)); + StackExprs.insert(E); + // Add a self edge. + assert(ExprPointsTo.insert({E, MemoryLoc::Temporary(E)}).second); + } + bool IsOnStack(const Decl *D) { return StackDecls.contains(D); } + bool IsOnStack(const Expr *E) { return StackExprs.contains(E); } + + // ExpressionResolver &ExprResolver; + // Map from an expression of View type to its pointee and Owner type to the + // reference<TODO simplify>. This should follow single assignment because + // Expr* cannot be reassigned in the program. + llvm::DenseMap<const Expr *, MemoryLoc> ExprPointsTo; + // Map from a decl of View type to it pointee. This can be reassigned at + // various points in the program due to transfer functions. + llvm::DenseMap<const Decl *, MemoryLoc> DeclPointsTo; + + // Collection of Expr* and Decl* stored on stack. + llvm::DenseSet<const Expr *> StackExprs; + llvm::DenseSet<const Decl *> StackDecls; +}; +class DanglingReferenceAnalyzer { +public: + DanglingReferenceAnalyzer(const DeclContext &DC, const CFG &cfg, + AnalysisDeclContext &AC, Sema &S) + : DC(DC), cfg(cfg), AC(AC), S(S) {} + void RunAnalysis() { + + // For simplicity in protoytpe, avoid joins and stick to functions without + // branches. + if (!cfg.isLinear()) + return; + cfg.dump(AC.getASTContext().getLangOpts(), true); + for (auto I = cfg.begin(), E = cfg.end(); I != E; ++I) { + for (CFGBlock::const_iterator BI = (*I)->begin(), BE = (*I)->end(); + BI != BE; ++BI) { + if (auto cfgstmt = BI->getAs<CFGStmt>()) { + auto *stmt = cfgstmt->getStmt(); + llvm::errs() << "================================================\n"; + cfgstmt->dump(); + if (auto *E = dyn_cast<Expr>(stmt)) + E->dumpColor(); + PointsTo.Handle(stmt); + if (auto *E = dyn_cast<Expr>(stmt)) { + llvm::errs() << "Points To : " << PointsTo.ResolveExpr(E).str() + << "\n"; + } + if (auto *RS = dyn_cast<ReturnStmt>(stmt)) + HandleReturnStmt(RS); + } + } + } + } + +private: + void HandleReturnStmt(const ReturnStmt *RS) { + // Diagnose possible return of stack variable. + if (!RS) + return; + const Expr *RetExpr = RS->getRetValue(); + if (!RetExpr || !isPointer(RetExpr)) + return; + PointsTo.Handle(RetExpr); + auto RetPointee = PointsTo.ResolveExpr(RetExpr); + // RetExpr->dumpColor(); + // llvm::errs() << "Returning pointer to " << RetPointee.str() << "\n"; + if (RetPointee.IsOnStack()) { + // This points to something on stack!! + if (auto *D = RetPointee.getDecl()) { + S.Diag(RetExpr->getExprLoc(), diag::warn_ret_stack_variable_ref_cfg) + << RetExpr->getExprLoc(); + S.Diag(D->getLocation(), diag::note_local_variable_declared_here) + << D->getLocation(); + } + if (auto *E = RetPointee.getExpr()) + S.Diag(E->getExprLoc(), diag::warn_ret_stack_temporary_ref_cfg) + << E->getExprLoc(); + } + } + + [[maybe_unused]] const DeclContext &DC; + const CFG &cfg; + [[maybe_unused]] AnalysisDeclContext &AC; + Sema &S; + // ExpressionResolver ExprResolver; + PointsToTracker PointsTo; +}; +} // namespace + +void runDanglingReferenceAnalysis(const DeclContext &DC, const CFG &cfg, + AnalysisDeclContext &AC, Sema &S) { + DanglingReferenceAnalyzer DRA(DC, cfg, AC, S); + DRA.RunAnalysis(); +} + +} // namespace clang diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 589869d0186575..721d4bdba3db54 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -29,6 +29,7 @@ #include "clang/Analysis/Analyses/CFGReachabilityAnalysis.h" #include "clang/Analysis/Analyses/CalledOnceCheck.h" #include "clang/Analysis/Analyses/Consumed.h" +#include "clang/Analysis/Analyses/DanglingReference.h" #include "clang/Analysis/Analyses/ReachableCode.h" #include "clang/Analysis/Analyses/ThreadSafety.h" #include "clang/Analysis/Analyses/UninitializedValues.h" @@ -2826,6 +2827,13 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( } } + if (S.getLangOpts().CPlusPlus && + !Diags.isIgnored(diag::warn_ret_stack_variable_ref_cfg, + D->getBeginLoc())) { + if (CFG *cfg = AC.getCFG()) { + runDanglingReferenceAnalysis(*cast<DeclContext>(D), *cfg, AC, S); + } + } // Check for violations of "called once" parameter properties. if (S.getLangOpts().ObjC && !S.getLangOpts().CPlusPlus && shouldAnalyzeCalledOnceParameters(Diags, D->getBeginLoc())) { diff --git a/clang/test/Sema/warn-lifetime-analysis-cfg.cpp b/clang/test/Sema/warn-lifetime-analysis-cfg.cpp new file mode 100644 index 00000000000000..e337c75f8344d2 --- /dev/null +++ b/clang/test/Sema/warn-lifetime-analysis-cfg.cpp @@ -0,0 +1,136 @@ +// RUN: %clang_cc1 -Wno-dangling -Wdangling-cfg -verify -std=c++11 %s +#include "Inputs/lifetime-analysis.h" + +std::string_view unknown(std::string_view s); +std::string_view unknown(); +std::string temporary(); + +std::string_view simple_with_ctor() { + std::string s1; // expected-note {{reference to this stack variable is returned}} + std::string_view cs1 = s1; + return cs1; // expected-warning {{returning reference to a stack variable}} +} + +std::string_view simple_with_assignment() { + std::string s1; // expected-note {{reference to this stack variable is returned}} + std::string_view cs1; + cs1 = s1; + return cs1; // expected-warning {{returning reference to a stack variable}} +} + +std::string_view use_unknown() { + std::string s1; + std::string_view cs1 = s1; + cs1 = unknown(); + return cs1; // ok. +} + +std::string_view overwrite_unknown() { + std::string s1; // expected-note {{reference to this stack variable is returned}} + std::string_view cs1 = s1; + cs1 = unknown(); + cs1 = s1; + return cs1; // expected-warning {{returning reference to a stack variable}} +} + +std::string_view overwrite_with_unknown() { + std::string s1; + std::string_view cs1 = s1; + cs1 = s1; + cs1 = unknown(); + return cs1; // Ok. +} + +std::string_view return_unknown() { + std::string s1; + std::string_view cs1 = s1; + return unknown(cs1); +} + +std::string_view multiple_assignments() { + std::string s1; + std::string s2 = s1; // expected-note {{reference to this stack variable is returned}} + std::string_view cs1 = s1; + std::string_view cs2 = s1; + s2 = s1; // Ignore owner transfers. + cs1 = s1; + cs2 = s2; + cs1 = cs2; + cs2 = s1; + return cs1; // expected-warning {{returning reference to a stack variable}} +} + +std::string_view global_view; +std::string_view ignore_global_view() { + std::string s; + global_view = s; // TODO: We can still track writes to static storage! + return global_view; // Ok. +} +std::string global_owner; +std::string_view ignore_global_owner() { + std::string_view sv; + sv = global_owner; + return sv; // Ok. +} + +std::string_view return_temporary() { + return + temporary(); // expected-warning {{returning reference to a temporary object}} +} + +std::string_view store_temporary() { + std::string_view sv = + temporary(); // expected-warning {{returning reference to a temporary object}} + return sv; +} + +std::string_view return_local() { + std::string local; // expected-note {{reference to this stack variable is returned}} + return // expected-warning {{returning reference to a stack variable}} + local; +} + +// Parameters +std::string_view params_owner_and_views( + std::string_view sv, + std::string s) { // expected-note {{reference to this stack variable is returned}} + sv = s; + return sv; // expected-warning {{returning reference to a stack variable}} +} + +std::string_view param_owner(std::string s) { // expected-note {{reference to this stack variable is returned}} + return s; // expected-warning {{returning reference to a stack variable}} +} +std::string_view param_owner_ref(const std::string& s) { + return s; +} + +std::string& get_str_ref(); +std::string_view ref_to_global_str() { + const std::string& s = get_str_ref(); + std::string_view sv = s; + return sv; +} +std::string_view view_to_global_str() { + std::string_view s = get_str_ref(); + std::string_view sv = s; + sv = s; + return sv; +} +std::string_view copy_of_global_str() { + std::string s = get_str_ref(); // expected-note {{reference to this stack variable is returned}} + std::string_view sv = s; + return sv; // expected-warning {{returning reference to a stack variable}} +} + +// TODO: Use lifetimebound in function calls. Use Pointer to Owner of pointer +// std::string_view containerOfString() { +// std::vector<std::string> local; +// return local.at(0); +// } + +// std::string_view containerOfViewMultistmt() { +// std::vector<std::string> local; +// std::string_view sv = local.at(0); +// return sv; +// } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits