https://github.com/NagyDonat updated https://github.com/llvm/llvm-project/pull/98621
From 2765bc97d3242d50fd73aedb9e9d38dfdcef814c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com> Date: Fri, 12 Jul 2024 13:57:53 +0200 Subject: [PATCH 1/2] [analyzer] Don't display the offset value in underflows Previously alpha.security.ArrayBoundV2 displayed the (negative) offset value when it reported an underflow, but this produced lots of very similar and redundant reports in certain situations. After this commit the offset won't be printed so the usual deduplication will handle these reports as equivalent (and print only one of them). See https://github.com/llvm/llvm-project/issues/86969 for background. --- .../Checkers/ArrayBoundCheckerV2.cpp | 15 ++++++----- .../test/Analysis/out-of-bounds-diagnostics.c | 26 +++++++++++++++++-- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index f82288f1099e8..f177041abc411 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -374,13 +374,14 @@ static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) { static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) { std::string RegName = getRegionName(Region); - SmallString<128> Buf; - llvm::raw_svector_ostream Out(Buf); - Out << "Access of " << RegName << " at negative byte offset"; - if (auto ConcreteIdx = Offset.getAs<nonloc::ConcreteInt>()) - Out << ' ' << ConcreteIdx->getValue(); + + // We're not reporting the Offset, because we don't want to spam the user + // with similar reports that differ only in different offset values. + // See https://github.com/llvm/llvm-project/issues/86969 for details. + (void)Offset; + return {formatv("Out of bound access to memory preceding {0}", RegName), - std::string(Buf)}; + formatv("Access of {0} at negative byte offset", RegName)}; } /// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if @@ -609,7 +610,7 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const { // CHECK UPPER BOUND DefinedOrUnknownSVal Size = getDynamicExtent(State, Reg, SVB); if (auto KnownSize = Size.getAs<NonLoc>()) { - // In a situation where both overflow and overflow are possible (but the + // In a situation where both underflow and overflow are possible (but the // index is either tainted or known to be invalid), the logic of this // checker will first assume that the offset is non-negative, and then // (with this additional assumption) it will detect an overflow error. diff --git a/clang/test/Analysis/out-of-bounds-diagnostics.c b/clang/test/Analysis/out-of-bounds-diagnostics.c index 92f983d8b1561..10cfe95a79a55 100644 --- a/clang/test/Analysis/out-of-bounds-diagnostics.c +++ b/clang/test/Analysis/out-of-bounds-diagnostics.c @@ -6,7 +6,7 @@ int TenElements[10]; void arrayUnderflow(void) { TenElements[-3] = 5; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset -12}} + // expected-note@-2 {{Access of 'TenElements' at negative byte offset}} } int underflowWithDeref(void) { @@ -14,7 +14,29 @@ int underflowWithDeref(void) { --p; return *p; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset -4}} + // expected-note@-2 {{Access of 'TenElements' at negative byte offset}} +} + +int rng(void); +int getIndex(void) { + switch (rng()) { + case 1: return -152; + case 2: return -160; + case 3: return -168; + default: return -172; + } +} + +void gh86959(void) { + // Previously code like this produced many almost-identical bug reports that + // only differed in the byte offset value (which was reported by the checker + // at that time). Verify that now we only see one report. + + // expected-note@+1 {{Entering loop body}} + while (rng()) + TenElements[getIndex()] = 10; + // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} + // expected-note@-2 {{Access of 'TenElements' at negative byte offset}} } int scanf(const char *restrict fmt, ...); From d7b71a21368c70da21b2ca6e7d02c7022ec21dde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com> Date: Fri, 12 Jul 2024 21:11:22 +0200 Subject: [PATCH 2/2] Alternative approach: tweak the `Profile()`` method of `BugReport`s This commit re-adds the concrete offset value at the end of the (long) `Description` of the underflow report, but ensures that the `Profile()` method of `PathSensitiveBugReport` only uses the _short_ description (as returned by `getShortDescription()`) instead of the the `Description` field. For the sake of consistency, the same modification is also applied to `BasicBugReport::Profile()`. This modification of `Profile()` is a no-op for most of the checkers, because there are very few checkers that set a separate short description for their bug reports and `getShortDescription()` defaults to returning the long `Description` when the field `ShortDescription` is an empty string (i.e. unspecified). I'd say that it was a bug that the short description (which is arguably _the_ human-readable "hash" of the report) wasn't included in the hash calculations performed by `Profile()`. On the other hand, I think it'll be useful that `Profile()` ignores the long description, because then we'll have a nice place where we can print the "nice to mention, but not enough to create a fundamentally different report" secondary information. (Note that the long description is essentially the "final note on the bug path" and the other notes are also ignored by `Profile()` -- because they are calculated later by the visitors.) --- .../StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp | 13 ++++++------- clang/lib/StaticAnalyzer/Core/BugReporter.cpp | 4 ++-- clang/test/Analysis/out-of-bounds-diagnostics.c | 6 +++--- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index f177041abc411..3f837564cf47c 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -373,15 +373,14 @@ static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) { } static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) { - std::string RegName = getRegionName(Region); + std::string RegName = getRegionName(Region), OffsetStr = ""; - // We're not reporting the Offset, because we don't want to spam the user - // with similar reports that differ only in different offset values. - // See https://github.com/llvm/llvm-project/issues/86969 for details. - (void)Offset; + if (auto ConcreteOffset = getConcreteValue(Offset)) + OffsetStr = formatv(" {0}", ConcreteOffset); - return {formatv("Out of bound access to memory preceding {0}", RegName), - formatv("Access of {0} at negative byte offset", RegName)}; + return { + formatv("Out of bound access to memory preceding {0}", RegName), + formatv("Access of {0} at negative byte offset{1}", RegName, OffsetStr)}; } /// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if diff --git a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp index e2002bfbe594a..d73dc40cf03fb 100644 --- a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp +++ b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp @@ -2198,7 +2198,7 @@ const Decl *PathSensitiveBugReport::getDeclWithIssue() const { void BasicBugReport::Profile(llvm::FoldingSetNodeID& hash) const { hash.AddInteger(static_cast<int>(getKind())); hash.AddPointer(&BT); - hash.AddString(Description); + hash.AddString(getShortDescription()); assert(Location.isValid()); Location.Profile(hash); @@ -2213,7 +2213,7 @@ void BasicBugReport::Profile(llvm::FoldingSetNodeID& hash) const { void PathSensitiveBugReport::Profile(llvm::FoldingSetNodeID &hash) const { hash.AddInteger(static_cast<int>(getKind())); hash.AddPointer(&BT); - hash.AddString(Description); + hash.AddString(getShortDescription()); PathDiagnosticLocation UL = getUniqueingLocation(); if (UL.isValid()) { UL.Profile(hash); diff --git a/clang/test/Analysis/out-of-bounds-diagnostics.c b/clang/test/Analysis/out-of-bounds-diagnostics.c index 10cfe95a79a55..ed95e9ad51725 100644 --- a/clang/test/Analysis/out-of-bounds-diagnostics.c +++ b/clang/test/Analysis/out-of-bounds-diagnostics.c @@ -6,7 +6,7 @@ int TenElements[10]; void arrayUnderflow(void) { TenElements[-3] = 5; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset}} + // expected-note@-2 {{Access of 'TenElements' at negative byte offset -12}} } int underflowWithDeref(void) { @@ -14,7 +14,7 @@ int underflowWithDeref(void) { --p; return *p; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset}} + // expected-note@-2 {{Access of 'TenElements' at negative byte offset -4}} } int rng(void); @@ -36,7 +36,7 @@ void gh86959(void) { while (rng()) TenElements[getIndex()] = 10; // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at negative byte offset}} + // expected-note@-2 {{Access of 'TenElements' at negative byte offset -688}} } int scanf(const char *restrict fmt, ...); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits