https://github.com/NagyDonat created 
https://github.com/llvm/llvm-project/pull/158639

The underflow reports of checker security.ArrayBound already displayed the 
(negative) byte offset of the accessed location; but those numbers were 
sometimes a bit hard to decipher, so I'm extending the message to also display 
this offset as a multiple of the size of the accessed element.

This logic is currently inactive when the byte offset is not an integer 
multiple of the size of the accessed element -- primarily because it would be a 
bit cumbersome to report the division and the remainder.

This change only affects the messages; the checker will report the same issues 
before and after this commit.

From db0723ca737ec4613d186ff1137c7405c480baf3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com>
Date: Mon, 15 Sep 2025 15:48:12 +0200
Subject: [PATCH] [analyzer] Show element count in ArrayBound underflow reports

The underflow reports of checker security.ArrayBound already displayed
the (negative) byte offset of the accessed location; but those numbers
were sometimes a bit hard to decipher, so I'm extending the message to
also display this offset as a multiple of the size of the accessed
element.

This logic is currently inactive when the byte offset is not an integer
multiple of the size of the accessed element -- primarily because it
would be a bit cumbersome to report the division and the remainder.

This change only affects the messages; the checker will report the same
issues before and after this commit.
---
 .../Checkers/ArrayBoundChecker.cpp            | 48 ++++++++++++-------
 .../test/Analysis/ArrayBound/verbose-tests.c  | 36 ++++++++++++--
 2 files changed, 64 insertions(+), 20 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
index d35031b5c22df..c3c9eec3ad2fd 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
@@ -389,15 +389,26 @@ static std::optional<int64_t> 
getConcreteValue(std::optional<NonLoc> SV) {
 }
 
 static Messages getPrecedesMsgs(const MemSpaceRegion *Space,
-                                const SubRegion *Region, NonLoc Offset) {
-  std::string RegName = getRegionName(Space, Region), OffsetStr = "";
+                                const SubRegion *Region, NonLoc Offset,
+                                QualType ElemType, int64_t ElemSize) {
+  std::string RegName = getRegionName(Space, Region);
 
-  if (auto ConcreteOffset = getConcreteValue(Offset))
+  std::string OffsetStr = "", ElemInfoStr = "";
+  if (std::optional<int64_t> ConcreteOffset = getConcreteValue(Offset)) {
     OffsetStr = formatv(" {0}", ConcreteOffset);
+    if (*ConcreteOffset % ElemSize == 0) {
+      int64_t Count = *ConcreteOffset / ElemSize;
+      if (Count != -1)
+        ElemInfoStr =
+            formatv(" = {0} * sizeof({1})", Count, ElemType.getAsString());
+      else
+        ElemInfoStr = formatv(" = -sizeof({0})", ElemType.getAsString());
+    }
+  }
 
-  return {
-      formatv("Out of bound access to memory preceding {0}", RegName),
-      formatv("Access of {0} at negative byte offset{1}", RegName, OffsetStr)};
+  return {formatv("Out of bound access to memory preceding {0}", RegName),
+          formatv("Access of {0} at negative byte offset{1}{2}", RegName,
+                  OffsetStr, ElemInfoStr)};
 }
 
 /// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if
@@ -419,20 +430,15 @@ static bool tryDividePair(std::optional<int64_t> &Val1,
   return true;
 }
 
-static Messages getExceedsMsgs(ASTContext &ACtx, const MemSpaceRegion *Space,
+static Messages getExceedsMsgs(const MemSpaceRegion *Space,
                                const SubRegion *Region, NonLoc Offset,
-                               NonLoc Extent, SVal Location,
-                               bool AlsoMentionUnderflow) {
+                               NonLoc Extent, bool AlsoMentionUnderflow,
+                               QualType ElemType, int64_t ElemSize) {
   std::string RegName = getRegionName(Space, Region);
-  const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
-  assert(EReg && "this checker only handles element access");
-  QualType ElemType = EReg->getElementType();
 
   std::optional<int64_t> OffsetN = getConcreteValue(Offset);
   std::optional<int64_t> ExtentN = getConcreteValue(Extent);
 
-  int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity();
-
   bool UseByteOffsets = !tryDividePair(OffsetN, ExtentN, ElemSize);
   const char *OffsetOrIndex = UseByteOffsets ? "byte offset" : "index";
 
@@ -585,6 +591,13 @@ void ArrayBoundChecker::performCheck(const Expr *E, 
CheckerContext &C) const {
   if (!RawOffset)
     return;
 
+  const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
+  assert(EReg && "this checker only handles element access");
+  QualType ElemType = EReg->getElementType();
+
+  int64_t ElemSize =
+      C.getASTContext().getTypeSizeInChars(ElemType).getQuantity();
+
   auto [Reg, ByteOffset] = *RawOffset;
 
   // The state updates will be reported as a single note tag, which will be
@@ -635,7 +648,8 @@ void ArrayBoundChecker::performCheck(const Expr *E, 
CheckerContext &C) const {
       } else {
         if (!WithinLowerBound) {
           // ...and it cannot be valid (>= 0), so report an error.
-          Messages Msgs = getPrecedesMsgs(Space, Reg, ByteOffset);
+          Messages Msgs =
+              getPrecedesMsgs(Space, Reg, ByteOffset, ElemType, ElemSize);
           reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt);
           return;
         }
@@ -678,8 +692,8 @@ void ArrayBoundChecker::performCheck(const Expr *E, 
CheckerContext &C) const {
         }
 
         Messages Msgs =
-            getExceedsMsgs(C.getASTContext(), Space, Reg, ByteOffset,
-                           *KnownSize, Location, AlsoMentionUnderflow);
+            getExceedsMsgs(Space, Reg, ByteOffset, *KnownSize,
+                           AlsoMentionUnderflow, ElemType, ElemSize);
         reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize);
         return;
       }
diff --git a/clang/test/Analysis/ArrayBound/verbose-tests.c 
b/clang/test/Analysis/ArrayBound/verbose-tests.c
index 84d238ed1a2a4..9b6e33dce8a60 100644
--- a/clang/test/Analysis/ArrayBound/verbose-tests.c
+++ b/clang/test/Analysis/ArrayBound/verbose-tests.c
@@ -11,7 +11,7 @@ int TenElements[10];
 void arrayUnderflow(void) {
   TenElements[-3] = 5;
   // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
-  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -12}}
+  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -12 = 
-3 * sizeof(int)}}
 }
 
 int underflowWithDeref(void) {
@@ -19,9 +19,39 @@ int underflowWithDeref(void) {
   --p;
   return *p;
   // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
-  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -4}}
+  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -4 = 
-sizeof(int)}}
 }
 
+char underflowReportedAsChar(void) {
+  // The "= -... * sizeof(type)" part uses the type of the accessed element
+  // (here 'char'), not the type that appears in the declaration of the
+  // original array (which would be 'int').
+  return ((char *)TenElements)[-1];
+  // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -1 = 
-sizeof(char)}}
+}
+
+struct TwoInts {
+  int a, b;
+};
+
+struct TwoInts underflowReportedAsStruct(void) {
+  // Another case where the accessed type is used for reporting the offset.
+  return *(struct TwoInts*)(TenElements - 4);
+  // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -16 = 
-2 * sizeof(struct TwoInts)}}
+}
+
+struct TwoInts underflowOnlyByteOffset(void) {
+  // In this case the negative byte offset is not a multiple of the size of the
+  // accessed element, so the part "= -... * sizeof(type)" is omitted at the
+  // end of the message.
+  return *(struct TwoInts*)(TenElements - 3);
+  // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
+  // expected-note-re@-2 {{Access of 'TenElements' at negative byte offset 
-12{{$}}}}
+}
+
+
 int rng(void);
 int getIndex(void) {
   switch (rng()) {
@@ -40,7 +70,7 @@ void gh86959(void) {
   while (rng())
     TenElements[getIndex()] = 10;
   // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
-  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -688}}
+  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -688 = 
-172 * sizeof(int)}}
 }
 
 int scanf(const char *restrict fmt, ...);

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to