llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-analysis Author: None (martinboehme) <details> <summary>Changes</summary> Previously, we were propagating storage locations the other way around, i.e. from initializers to result objects, using `RecordValue::getLoc()`. This gave the wrong behavior in some cases -- see the newly added or fixed tests in this patch. In addition, this patch now unblocks removing the `RecordValue` class entirely, as we no longer need `RecordValue::getLoc()`. With this patch, the test `TransferTest.DifferentReferenceLocInJoin` started to fail because the framework now always uses the same storge location for a `MaterializeTemporaryExpr`, meaning that the code under test no longer set up the desired state where a variable of reference type is mapped to two different storage locations in environments being joined. Rather than trying to modify this test to set up the test condition again, I have chosen to replace the test with an equivalent test in DataflowEnvironmentTest.cpp that sets up the test condition directly; because this test is more direct, it will also be less brittle in the face of future changes. --- Patch is 52.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/87320.diff 6 Files Affected: - (modified) clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h (+44-20) - (modified) clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp (+304-98) - (modified) clang/lib/Analysis/FlowSensitive/Transfer.cpp (+73-99) - (modified) clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp (+3-10) - (modified) clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp (+43) - (modified) clang/unittests/Analysis/FlowSensitive/TransferTest.cpp (+115-57) ``````````diff diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h index c30bccd06674a4..dc3a9c239552be 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -30,6 +30,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include <memory> #include <type_traits> #include <utility> @@ -330,17 +331,6 @@ class Environment { /// location of the result object to pass in `this`, even though prvalues are /// otherwise not associated with storage locations. /// - /// FIXME: Currently, this simply returns a stable storage location for `E`, - /// but this doesn't do the right thing in scenarios like the following: - /// ``` - /// MyClass c = some_condition()? MyClass(foo) : MyClass(bar); - /// ``` - /// Here, `MyClass(foo)` and `MyClass(bar)` will have two different storage - /// locations, when in fact their storage locations should be the same. - /// Eventually, we want to propagate storage locations from result objects - /// down to the prvalues that initialize them, similar to the way that this is - /// done in Clang's CodeGen. - /// /// Requirements: /// `E` must be a prvalue of record type. RecordStorageLocation & @@ -448,7 +438,13 @@ class Environment { /// Initializes the fields (including synthetic fields) of `Loc` with values, /// unless values of the field type are not supported or we hit one of the /// limits at which we stop producing values. - void initializeFieldsWithValues(RecordStorageLocation &Loc); + /// If `Type` is provided, initializes only those fields that are modeled for + /// `Type`; this is intended for use in cases where `Loc` is a derived type + /// and we only want to initialize the fields of a base type. + void initializeFieldsWithValues(RecordStorageLocation &Loc, QualType Type); + void initializeFieldsWithValues(RecordStorageLocation &Loc) { + initializeFieldsWithValues(Loc, Loc.getType()); + } /// Assigns `Val` as the value of `Loc` in the environment. void setValue(const StorageLocation &Loc, Value &Val); @@ -639,6 +635,9 @@ class Environment { LLVM_DUMP_METHOD void dump(raw_ostream &OS) const; private: + using PrValueToResultObject = + llvm::DenseMap<const Expr *, RecordStorageLocation *>; + // The copy-constructor is for use in fork() only. Environment(const Environment &) = default; @@ -668,8 +667,10 @@ class Environment { /// Initializes the fields (including synthetic fields) of `Loc` with values, /// unless values of the field type are not supported or we hit one of the /// limits at which we stop producing values (controlled by `Visited`, - /// `Depth`, and `CreatedValuesCount`). - void initializeFieldsWithValues(RecordStorageLocation &Loc, + /// `Depth`, and `CreatedValuesCount`). If `Type` is different from + /// `Loc.getType()`, initializes only those fields that are modeled for + /// `Type`. + void initializeFieldsWithValues(RecordStorageLocation &Loc, QualType Type, llvm::DenseSet<QualType> &Visited, int Depth, int &CreatedValuesCount); @@ -688,22 +689,45 @@ class Environment { /// and functions referenced in `FuncDecl`. `FuncDecl` must have a body. void initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl); + static PrValueToResultObject + buildResultObjectMap(DataflowAnalysisContext *DACtx, + const FunctionDecl *FuncDecl, + RecordStorageLocation *ThisPointeeLoc, + RecordStorageLocation *LocForRecordReturnVal); + // `DACtx` is not null and not owned by this object. DataflowAnalysisContext *DACtx; - // FIXME: move the fields `CallStack`, `ReturnVal`, `ReturnLoc` and - // `ThisPointeeLoc` into a separate call-context object, shared between - // environments in the same call. + // FIXME: move the fields `CallStack`, `ResultObjectMap`, `ReturnVal`, + // `ReturnLoc` and `ThisPointeeLoc` into a separate call-context object, + // shared between environments in the same call. // https://github.com/llvm/llvm-project/issues/59005 // `DeclContext` of the block being analysed if provided. std::vector<const DeclContext *> CallStack; - // Value returned by the function (if it has non-reference return type). + // Maps from prvalues of record type to their result objects. Shared between + // all environments for the same function. + // FIXME: It's somewhat unsatisfactory that we have to use a `shared_ptr` + // here, though the cost is acceptable: The overhead of a `shared_ptr` is + // incurred when it is copied, and this happens only relatively rarely (when + // we fork the environment). The need for a `shared_ptr` will go away once we + // introduce a shared call-context object (see above). + std::shared_ptr<PrValueToResultObject> ResultObjectMap; + + // The following three member variables handle various different types of + // return values. + // - If the return type is not a reference and not a record: Value returned + // by the function. Value *ReturnVal = nullptr; - // Storage location of the reference returned by the function (if it has - // reference return type). + // - If the return type is a reference: Storage location of the reference + // returned by the function. StorageLocation *ReturnLoc = nullptr; + // - If the return type is a record or the function being analyzed is a + // constructor: Storage location into which the return value should be + // constructed. + RecordStorageLocation *LocForRecordReturnVal = nullptr; + // The storage location of the `this` pointee. Should only be null if the // function being analyzed is only a function and not a method. RecordStorageLocation *ThisPointeeLoc = nullptr; diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index f729d676dd0de8..67d5c444d52cb6 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -15,6 +15,7 @@ #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" +#include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Type.h" #include "clang/Analysis/FlowSensitive/DataflowLattice.h" #include "clang/Analysis/FlowSensitive/Value.h" @@ -353,6 +354,8 @@ getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields, for (auto *Child : S.children()) if (Child != nullptr) getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs); + if (const auto *DefaultArg = dyn_cast<CXXDefaultArgExpr>(&S)) + getFieldsGlobalsAndFuncs(*DefaultArg->getExpr(), Fields, Vars, Funcs); if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S)) getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs); @@ -385,6 +388,185 @@ getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields, } } +namespace { + +// Visitor that builds a map from record prvalues to result objects. +// This traverses the body of the function to be analyzed; for each result +// object that it encounters, it propagates the storage location of the result +// object to all record prvalues that can initialize it. +class ResultObjectVisitor : public RecursiveASTVisitor<ResultObjectVisitor> { +public: + // `ResultObjectMap` will be filled with a map from record prvalues to result + // object. If the function being analyzed returns a record by value, + // `LocForRecordReturnVal` is the location to which this record should be + // written; otherwise, it is null. + explicit ResultObjectVisitor( + llvm::DenseMap<const Expr *, RecordStorageLocation *> &ResultObjectMap, + RecordStorageLocation *LocForRecordReturnVal, + DataflowAnalysisContext &DACtx) + : ResultObjectMap(ResultObjectMap), + LocForRecordReturnVal(LocForRecordReturnVal), DACtx(DACtx) {} + + bool shouldVisitImplicitCode() { return true; } + + bool shouldVisitLambdaBody() const { return false; } + + // Traverse all member and base initializers of `Ctor`. This function is not + // called by `RecursiveASTVisitor`; it should be called manually if we are + // analyzing a constructor. `ThisPointeeLoc` is the storage location that + // `this` points to. + void TraverseConstructorInits(const CXXConstructorDecl *Ctor, + RecordStorageLocation *ThisPointeeLoc) { + assert(ThisPointeeLoc != nullptr); + for (const CXXCtorInitializer *Init : Ctor->inits()) { + Expr *InitExpr = Init->getInit(); + if (FieldDecl *Field = Init->getMember(); + Field != nullptr && Field->getType()->isRecordType()) { + PropagateResultObject(InitExpr, cast<RecordStorageLocation>( + ThisPointeeLoc->getChild(*Field))); + } else if (Init->getBaseClass()) { + PropagateResultObject(InitExpr, ThisPointeeLoc); + } + + // Ensure that any result objects within `InitExpr` (e.g. temporaries) + // are also propagated to the prvalues that initialize them. + TraverseStmt(InitExpr); + + // If this is a `CXXDefaultInitExpr`, also propagate any result objects + // within the default expression. + if (auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(InitExpr)) + TraverseStmt(DefaultInit->getExpr()); + } + } + + bool TraverseBindingDecl(BindingDecl *BD) { + // `RecursiveASTVisitor` doesn't traverse holding variables for + // `BindingDecl`s by itself, so we need to tell it to. + if (VarDecl *HoldingVar = BD->getHoldingVar()) + TraverseDecl(HoldingVar); + return RecursiveASTVisitor<ResultObjectVisitor>::TraverseBindingDecl(BD); + } + + bool VisitVarDecl(VarDecl *VD) { + if (VD->getType()->isRecordType() && VD->hasInit()) + PropagateResultObject( + VD->getInit(), + &cast<RecordStorageLocation>(DACtx.getStableStorageLocation(*VD))); + return true; + } + + bool VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *MTE) { + if (MTE->getType()->isRecordType()) + PropagateResultObject( + MTE->getSubExpr(), + &cast<RecordStorageLocation>(DACtx.getStableStorageLocation(*MTE))); + return true; + } + + bool VisitReturnStmt(ReturnStmt *Return) { + Expr *RetValue = Return->getRetValue(); + if (RetValue != nullptr && RetValue->getType()->isRecordType() && + RetValue->isPRValue()) + PropagateResultObject(RetValue, LocForRecordReturnVal); + return true; + } + + bool VisitExpr(Expr *E) { + // Clang's AST can have record-type prvalues without a result object -- for + // example as full-expressions contained in a compound statement or as + // arguments of call expressions. We notice this if we get here and a + // storage location has not yet been associated with `E`. In this case, + // treat this as if it was a `MaterializeTemporaryExpr`. + if (E->isPRValue() && E->getType()->isRecordType() && + !ResultObjectMap.contains(E)) + PropagateResultObject( + E, &cast<RecordStorageLocation>(DACtx.getStableStorageLocation(*E))); + return true; + } + + // Assigns `Loc` as the result object location of `E`, then propagates the + // location to all lower-level prvalues that initialize the same object as + // `E` (or one of its base classes or member variables). + void PropagateResultObject(Expr *E, RecordStorageLocation *Loc) { + if (!E->isPRValue() || !E->getType()->isRecordType()) { + assert(false); + // Ensure we don't propagate the result object if we hit this in a + // release build. + return; + } + + ResultObjectMap[E] = Loc; + + // The following AST node kinds are "original initializers": They are the + // lowest-level AST node that initializes a given object, and nothing + // below them can initialize the same object (or part of it). + if (isa<CXXConstructExpr>(E) || isa<CallExpr>(E) || isa<LambdaExpr>(E) || + isa<CXXDefaultArgExpr>(E) || isa<CXXDefaultInitExpr>(E) || + isa<CXXStdInitializerListExpr>(E)) { + return; + } + + if (auto *InitList = dyn_cast<InitListExpr>(E)) { + if (!InitList->isSemanticForm()) + return; + if (InitList->isTransparent()) { + for (Expr *Init : InitList->inits()) + PropagateResultObject(Init, Loc); + return; + } + + RecordInitListHelper InitListHelper(InitList); + + for (auto [Base, Init] : InitListHelper.base_inits()) { + assert(Base->getType().getCanonicalType() == + Init->getType().getCanonicalType()); + + // Storage location for the base class is the same as that of the + // derived class because we "flatten" the object hierarchy and put all + // fields in `RecordStorageLocation` of the derived class. + PropagateResultObject(Init, Loc); + } + + for (auto [Field, Init] : InitListHelper.field_inits()) { + // Fields of non-record type are handled in + // `TransferVisitor::VisitInitListExpr()`. + if (!Field->getType()->isRecordType()) + continue; + PropagateResultObject( + Init, cast<RecordStorageLocation>(Loc->getChild(*Field))); + } + return; + } + + if (auto *Op = dyn_cast<BinaryOperator>(E); Op && Op->isCommaOp()) { + PropagateResultObject(Op->getRHS(), Loc); + return; + } + + if (auto *Cond = dyn_cast<AbstractConditionalOperator>(E)) { + PropagateResultObject(Cond->getTrueExpr(), Loc); + PropagateResultObject(Cond->getFalseExpr(), Loc); + return; + } + + // All other expression nodes that propagate a record prvalue should have + // exactly one child. + llvm::SmallVector<Stmt *> Children(E->child_begin(), E->child_end()); + if (Children.size() != 1) + E->dump(); + assert(Children.size() == 1); + for (Stmt *S : Children) + PropagateResultObject(cast<Expr>(S), Loc); + } + +private: + llvm::DenseMap<const Expr *, RecordStorageLocation *> &ResultObjectMap; + RecordStorageLocation *LocForRecordReturnVal; + DataflowAnalysisContext &DACtx; +}; + +} // namespace + Environment::Environment(DataflowAnalysisContext &DACtx) : DACtx(&DACtx), FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {} @@ -400,17 +582,23 @@ void Environment::initialize() { if (DeclCtx == nullptr) return; - if (const auto *FuncDecl = dyn_cast<FunctionDecl>(DeclCtx)) { - assert(FuncDecl->doesThisDeclarationHaveABody()); + const auto *FuncDecl = dyn_cast<FunctionDecl>(DeclCtx); + if (FuncDecl == nullptr) + return; + + assert(FuncDecl->doesThisDeclarationHaveABody()); - initFieldsGlobalsAndFuncs(FuncDecl); + initFieldsGlobalsAndFuncs(FuncDecl); - for (const auto *ParamDecl : FuncDecl->parameters()) { - assert(ParamDecl != nullptr); - setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr)); - } + for (const auto *ParamDecl : FuncDecl->parameters()) { + assert(ParamDecl != nullptr); + setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr)); } + if (FuncDecl->getReturnType()->isRecordType()) + LocForRecordReturnVal = &cast<RecordStorageLocation>( + createStorageLocation(FuncDecl->getReturnType())); + if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(DeclCtx)) { auto *Parent = MethodDecl->getParent(); assert(Parent != nullptr); @@ -443,6 +631,12 @@ void Environment::initialize() { initializeFieldsWithValues(ThisLoc); } } + + // We do this below the handling of `CXXMethodDecl` above so that we can + // be sure that the storage location for `this` has been set. + ResultObjectMap = std::make_shared<PrValueToResultObject>( + buildResultObjectMap(DACtx, FuncDecl, getThisPointeeStorageLocation(), + LocForRecordReturnVal)); } // FIXME: Add support for resetting globals after function calls to enable @@ -483,13 +677,18 @@ void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) { if (getStorageLocation(*D) != nullptr) continue; - setStorageLocation(*D, createObject(*D)); + // We don't run transfer functions on the initializers of global variables, + // so they won't be associated with a value or storage location. We + // therefore intentionally don't pass an initializer to `createObject()`; + // in particular, this ensures that `createObject()` will initialize the + // fields of record-type variables with values. + setStorageLocation(*D, createObject(*D, nullptr)); } for (const FunctionDecl *FD : Funcs) { if (getStorageLocation(*FD) != nullptr) continue; - auto &Loc = createStorageLocation(FD->getType()); + auto &Loc = createStorageLocation(*FD); setStorageLocation(*FD, Loc); } } @@ -518,6 +717,9 @@ Environment Environment::pushCall(const CallExpr *Call) const { } } + if (Call->getType()->isRecordType() && Call->isPRValue()) + Env.LocForRecordReturnVal = &Env.getResultObjectLocation(*Call); + Env.pushCallInternal(Call->getDirectCallee(), llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); @@ -528,6 +730,7 @@ Environment Environment::pushCall(const CXXConstructExpr *Call) const { Environment Env(*this); Env.ThisPointeeLoc = &Env.getResultObjectLocation(*Call); + Env.LocForRecordReturnVal = &Env.getResultObjectLocation(*Call); Env.pushCallInternal(Call->getConstructor(), llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); @@ -556,6 +759,10 @@ void Environment::pushCallInternal(const FunctionDecl *FuncDecl, const VarDecl *Param = *ParamIt; setStorageLocation(*Param, createObject(*Param, Args[ArgIndex])); } + + ResultObjectMap = std::make_shared<PrValueToResultObject>( + buildResultObjectMap(DACtx, FuncDecl, getThisPointeeStorageLocation(), + LocForRecordReturnVal)); } void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) { @@ -599,6 +806,9 @@ bool Environment::equivalentTo(const Environment &Other, if (ReturnLoc != Other.ReturnLoc) return false; + if (LocForRecordReturnVal != Other.LocForRecordReturnVal) + return false; + if (ThisPointeeLoc != Other.ThisPointeeLoc) return false; @@ -622,8 +832,10 @@ LatticeJoinEffect Environment::widen(const Environment &PrevEnv, assert(DACtx == PrevEnv.DACtx); assert(ReturnVal == PrevEnv.ReturnVal); assert(ReturnLoc == PrevEnv.ReturnLoc); + assert(LocForRecordReturnVal == PrevEnv.LocForRecordReturnVal); assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc); assert(CallStack == PrevEnv.CallStack); + assert(ResultObjectMap == PrevEnv.ResultObjectMap); auto Effect = LatticeJoinEffect::Unchanged; @@ -655,12 +867,16 @@ Environment Environment::join(const Environment &EnvA, const Environment &EnvB, Environment::ValueModel &Model, ExprJoinBehavior ExprBehavior) { assert(EnvA.DACtx == EnvB.DACtx); + assert(EnvA.LocForRecordReturnVal == EnvB.LocForRecordReturnVal); assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc); assert(EnvA.CallStack == EnvB.CallStack); + assert(EnvA.ResultObjectMap == EnvB.ResultObjectMap); Environment JoinedEnv(*EnvA.DACtx); JoinedEnv.CallStack = EnvA.CallStack; + JoinedEnv.ResultObjectMap = EnvA.ResultObjectMap; + JoinedEnv.LocForRecordReturnVal = EnvA.LocForRecordReturnVal; JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc; if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) { @@ -729,6 +945,12 @@ StorageLocation &Environment::createStorageLocation(const Expr &E) { void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { assert(!DeclToLoc.contains(&D)); + // The only kinds of declarations that may have a "variable" storage location + // are declarations of reference type and `BindingDecl`. For all other + // declaration, the storage location should be the stable storage location + // returned by `createStorageLocation()`. + assert(D.getType()->isReferenceType() || isa<BindingDecl>(D) || + &Loc... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/87320 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits