llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-static-analyzer-1 Author: Balazs Benics (steakhal) <details> <summary>Changes</summary> This is an alternative implementation of the idea present in #<!-- -->109804. In this implementation, the suppression is purely implemented by a `BugReportVisitor`, to avoid coupling the suppression with the engine itself. The idea is to visit the bug-path, and along the way record each time we entered the basic-block that has a comparison in the end that drives the decision of entering or avoiding a loop construct. We also record expressions that caused a state-split. These are basically the expressions that eagerly assume would force a split on - but this approach works even if eager-assumptions are disabled. Once we collected all the data, we go over the loops that we saw, and check if the condition of that loop construct was evaluated N times. This then triggers a heuristic to decide if we should suppress this, e.g. because we "assumed" not to enter the loop, or "assumed" to iterate more than 2 times in the loop. --- This is WIP, because I just scratched the idea, but it seems to work good enough to have some discussion about this, while refining it if I have some time later. --- Full diff: https://github.com/llvm/llvm-project/pull/111258.diff 2 Files Affected: - (modified) clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp (+164-1) - (modified) clang/test/Analysis/out-of-bounds.c (+149-1) ``````````diff diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index 3f837564cf47c4..aafd44d037273d 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -11,18 +11,28 @@ // //===----------------------------------------------------------------------===// +#include "clang/AST/ASTContext.h" #include "clang/AST/CharUnits.h" +#include "clang/AST/Expr.h" #include "clang/AST/ParentMapContext.h" +#include "clang/AST/Stmt.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Analysis/ProgramPoint.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Checkers/Taint.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" -#include "llvm/ADT/SmallString.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" #include <optional> @@ -32,6 +42,158 @@ using namespace ento; using namespace taint; using llvm::formatv; +namespace { + +class SuppressReportsHavingWeakLoopAssumption : public BugReporterVisitor { +public: + SuppressReportsHavingWeakLoopAssumption(const PathSensitiveBugReport &R) + : OrigCursor(R.getErrorNode()) {} + +private: + friend class BugReporterVisitor; + + // The exploded node we are currently visiting, but in the original exploded + // graph. This graph is not trimmed, unlike the one we usually visit. + const ExplodedNode *OrigCursor; + + llvm::DenseSet<const Stmt *> LoopsSeen; + llvm::DenseMap<const Expr *, unsigned> NumTimesTakenTrueDecision; + llvm::DenseMap<const Expr *, unsigned> NumTimesTakenFalseDecision; + + void Profile(llvm::FoldingSetNodeID &ID) const final { + static const bool Tag = 0; + ID.AddPointer(&Tag); + } + + static const ExplodedNode *selectMatchingPred(const ExplodedNode *OrigSucc, + unsigned SoughtPredId) { + auto MatchesSoughtId = [SoughtPredId](const ExplodedNode *N) { + return N->getID() == SoughtPredId; + }; + auto It = llvm::find_if(OrigSucc->preds(), MatchesSoughtId); + assert(It != OrigSucc->pred_end()); + return *It; + } + + static bool isLoopAndNonNull(const Stmt *S) { + return isa_and_nonnull<ForStmt, WhileStmt, CXXForRangeStmt, DoStmt>(S); + } + + void registerLoopStatements(const ExplodedNode *N) { + if (auto Entrance = N->getLocation().getAs<BlockEntrance>()) { + const Stmt *TermStmt = Entrance->getBlock()->getTerminatorStmt(); + if (isLoopAndNonNull(TermStmt)) + LoopsSeen.insert(TermStmt); + } + } + + void registerOccurrence(llvm::DenseMap<const Expr *, unsigned> &Map, + const Expr *Key) { + if (auto [Place, Inserted] = Map.try_emplace(Key, 1); !Inserted) + ++Place->second; + } + + PathDiagnosticPieceRef VisitNode(const ExplodedNode *Succ, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) final { + OrigCursor = selectMatchingPred(OrigCursor, Succ->getID()); + assert(OrigCursor->getID() == Succ->getID()); + + registerLoopStatements(Succ); + + auto AsPostStmt = Succ->getLocationAs<PostStmt>(); + const Expr *CurrExpr = + AsPostStmt ? dyn_cast<Expr>(AsPostStmt->getStmt()) : nullptr; + + bool IsSplittingTwoWays = OrigCursor->succ_size() == 2; + + if (!CurrExpr || !IsSplittingTwoWays) + return nullptr; + + const ExplodedNode *Next = Succ->getFirstSucc(); + SValBuilder &SVB = Succ->getState()->getStateManager().getSValBuilder(); + auto Query = SVB.evalCast(Succ->getSVal(CurrExpr), SVB.getConditionType(), + CurrExpr->getType()) + .getAs<DefinedOrUnknownSVal>(); + if (!Query) + return nullptr; + + ProgramStateRef TakenTrueBranch = Next->getState()->assume(*Query, true); + registerOccurrence(TakenTrueBranch ? NumTimesTakenTrueDecision + : NumTimesTakenFalseDecision, + CurrExpr); + + return nullptr; + } + + static const Expr *getCond(const Stmt *S) { + assert(isLoopAndNonNull(S)); + switch (S->getStmtClass()) { + case Stmt::StmtClass::ForStmtClass: + return cast<ForStmt>(S)->getCond(); + case Stmt::StmtClass::WhileStmtClass: + return cast<WhileStmt>(S)->getCond(); + case Stmt::StmtClass::CXXForRangeStmtClass: + return cast<CXXForRangeStmt>(S)->getCond(); + case Stmt::StmtClass::DoStmtClass: + return cast<DoStmt>(S)->getCond(); + default: + return nullptr; + } + } + + void trySuppressReport(PathSensitiveBugReport &R, const Stmt *Loop, + const Expr *CondSubExpr) const { + unsigned NumTimesTakenLoopBody = + NumTimesTakenTrueDecision.lookup(CondSubExpr); + unsigned NumTimesNotTakenLoopBody = + NumTimesTakenFalseDecision.lookup(CondSubExpr); + + // Adjust for do-while loops, where the loop body is always taken. + if (isa<DoStmt>(Loop)) + NumTimesTakenLoopBody += 1; + + // Suppress the report if it avoided a loop with an eager assumption. + if (NumTimesTakenLoopBody == 0 && NumTimesNotTakenLoopBody == 1) { + // llvm::errs() << "eagerly decided never taking the loop body\n"; + R.markInvalid("eagerly decided never taking the loop body", CondSubExpr); + return; + } + + // Suppress the report if it iterates with eager assumptions 3 or more + // times. + if (NumTimesTakenLoopBody > 2 && NumTimesNotTakenLoopBody == 0) { + // llvm::errs() << "eagerly taken the loop body at least 2 times\n"; + R.markInvalid("eagerly taken the loop body at least 2 times", + CondSubExpr); + } + } + + void finalizeVisitor(BugReporterContext &BRC, const ExplodedNode *, + PathSensitiveBugReport &R) final { + ASTContext &Ctx = BRC.getASTContext(); + + // Go over all the loops we have seen (either avoided or entered), and check + // if the condition of such loops were eagerly decided. + for (const Stmt *Loop : LoopsSeen) { + if (const Expr *Cond = getCond(Loop)) { + // Let's try all sub-exprs to cover cases that use short-circuiting. + // We need to check if any sub-expression of the condition was eagerly + // decided, because the '&&' and '||' logical operators could + // short-circuit, thus the expression on which we recorded the + // "eager decision" was a sub-expression of the Loop condition. + using namespace ast_matchers; + for (auto Binding : match(findAll(expr().bind("e")), *Cond, Ctx)) { + trySuppressReport(R, Loop, Binding.getNodeAs<Expr>("e")); + if (!R.isValid()) + return; + } + } + } + } +}; +} // namespace + namespace { /// If `E` is a "clean" array subscript expression, return the type of the /// accessed element. If the base of the subscript expression is modified by @@ -722,6 +884,7 @@ void ArrayBoundCheckerV2::reportOOB(CheckerContext &C, if (Extent) markPartsInteresting(*BR, ErrorState, *Extent, IsTaintBug); + BR->addVisitor<SuppressReportsHavingWeakLoopAssumption>(*BR); C.emitReport(std::move(BR)); } diff --git a/clang/test/Analysis/out-of-bounds.c b/clang/test/Analysis/out-of-bounds.c index 1f771c2b3bd138..1b62b2eed4ee84 100644 --- a/clang/test/Analysis/out-of-bounds.c +++ b/clang/test/Analysis/out-of-bounds.c @@ -1,4 +1,9 @@ -// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-checker=core,alpha.security.ArrayBoundV2,debug.ExprInspection -verify %s +// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-checker=core,alpha.security.ArrayBoundV2,debug.ExprInspection -verify=expected,eagerlyassume %s +// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-checker=core,alpha.security.ArrayBoundV2,debug.ExprInspection -analyzer-config eagerly-assume=false -verify %s + +// Note that eagerly-assume=false is tested separately because the +// WeakLoopAssumption suppression heuristic uses different code paths to +// achieve the same result with and without eagerly-assume. void clang_analyzer_eval(int); @@ -194,3 +199,146 @@ char test_comparison_with_extent_symbol(struct incomplete *p) { return ((char *)p)[-1]; // no-warning } +// WeakLoopAssumption suppression +/////////////////////////////////////////////////////////////////////// + +int GlobalArray[100]; +int loop_suppress_after_zero_iterations(unsigned len) { + for (unsigned i = 0; i < len; i++) + if (GlobalArray[i] > 0) + return GlobalArray[i]; + // Previously this would have produced an overflow warning because splitting + // the state on the loop condition introduced an execution path where the + // analyzer thinks that len == 0. + // There are many situations where the programmer knows that an argument is + // positive, but this is not indicated in the source code, so we must avoid + // reporting errors (especially out of bounds errors) on these branches, + // because otherwise we'd get prohibitively many false positives. + return GlobalArray[len - 1]; // no-warning +} + +void loop_report_in_second_iteration(int len) { + int buf[1] = {0}; + for (int i = 0; i < len; i++) { + // When a programmer writes a loop, we may assume that they intended at + // least two iterations. + buf[i] = 1; // expected-warning{{Out of bound access to memory}} + } +} + +void loop_suppress_in_third_iteration(int len) { + int buf[2] = {0}; + for (int i = 0; i < len; i++) { + // We should suppress array bounds errors on the third and later iterations + // of loops, because sometimes programmers write a loop in sitiuations + // where they know that there will be at most two iterations, but the + // analyzer cannot deduce this property. + buf[i] = 1; // no-warning + } +} + +int no_suppression_when_no_assumption_zero_iterations(unsigned len) { + if (len != 0) { + // This 'if' introduces a state split between len == 0 and len != 0. + } + + // On the branch where we assumed that len is zero, this loop will be + // skipped. We (intentionally) don't suppress this execution path becasue + // here the analyzer doesn't assume anything new when it evaluates the loop + // condition. + for (unsigned i = 0; i < len; i++) + if (GlobalArray[i] > 0) + return GlobalArray[i]; + + return GlobalArray[len - 1]; // expected-warning{{Out of bound access to memory}} +} + +void no_suppression_when_no_assumption_third_iteration(int len) { + if (len < 20) { + // This 'if' introduces a state split with len >= 20 on one branch. + } + + int buf[2] = {0}; + for (int i = 0; i < len; i++) { + // As in no_suppression_when_no_assumption_zero_iterations, the suppression + // only activates when the analyzer assumes something new in the loop + // condition. On the branch where `len >= 20` entering the third iteration + // doesn't involve a new assumption, so this warning is not suppressed: + buf[i] = 1; // expected-warning{{Out of bound access to memory}} + } +} + +void loop_suppress_in_third_iteration_logical_and(int len, int flag) { + int buf[2] = {0}; + for (int i = 0; i < len && flag; i++) { + // FIXME: In this case the checker should suppress the warning the same way + // as it's suppressed in loop_suppress_in_third_iteration, but the + // suppression is not activated because the terminator statement associated + // with the loop is just the expression 'flag', while 'i < len' is a + // separate terminator statement that's associated with the + // short-circuiting operator '&&'. + // I have seen a real-world FP that looks like this, but it is much rarer + // than the basic setup. + buf[i] = 1; // no-warning + } +} + +void loop_suppress_in_third_iteration_logical_and_2(int len, int flag) { + int buf[2] = {0}; + for (int i = 0; flag && i < len; i++) { + // If the two operands of '&&' are flipped, the suppression works, because + // then 'flag' is the terminator statement associated with '&&' (which + // determines whether the short-circuiting happens or not) and 'i < len' is + // the terminator statement of the loop itself. + buf[i] = 1; // no-warning + } +} + +void loop_suppress_in_third_iteration_cast(int len) { + int buf[2] = {0}; + for (int i = 0; (unsigned)(i < len); i++) { + // The behavior of this suppression is slightly different under + // eagerly-assume=true (the default) and eagerly-assume=false: + // * When eager assumptions are disabled, it's enough to look for cases + // where we get two non-null states from splitting the state over the + // 'SVal' that represents the full loop condition. + // * When eager assumptions are enabled, we also accept situations when the + // loop condition expression triggers an eager state split and therefore + // we won't see a state split at the "normal" point because it's executed + // on two already separated execution paths. + // However, for the sake of simplicity we don't activate the suppression in + // cases when _a subexpression_ of the loop condition triggers an eager + // assumption. There are already many differences between analysis with and + // without eager assumptions, so it would be pointless to write more + // complicated code to eliminate these rare differences. + buf[i] = 1; // no-warning + } +} + +int coinflip(void); +int do_while_report_after_one_iteration(void) { + int i = 0; + do { + i++; + } while (coinflip()); + // Unlike `loop_suppress_after_zero_iterations`, running just one iteration + // in a do-while is not a corner case that would produce too many false + // positives, so don't suppress bounds errors in these situations. + return GlobalArray[i-2]; // expected-warning{{Out of bound access to memory}} +} + +void do_while_report_in_second_iteration(int len) { + int buf[1] = {0}; + int i = 0; + do { + buf[i] = 1; // expected-warning{{Out of bound access to memory}} + } while (i++ < len); +} + +void do_while_suppress_in_third_iteration(int len) { + int buf[2] = {0}; + int i = 0; + do { + buf[i] = 1; // no-warning + } while (i++ < len); +} `````````` </details> https://github.com/llvm/llvm-project/pull/111258 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits