https://github.com/balazske created 
https://github.com/llvm/llvm-project/pull/149106

Check for non-presence of terminating zero in simple cases.

From ba535f818b36e5ab758b4148e46816e8f3ee6550 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bal=C3=A1zs=20K=C3=A9ri?= <balazs.k...@ericsson.com>
Date: Wed, 2 Jul 2025 11:09:58 +0200
Subject: [PATCH] [clang][analyzer] Improve checker
 'unix.cstring.NotNullTerminated'

Check for non-presence of terminating zero in simple cases.
---
 clang/docs/analyzer/checkers.rst              |  20 ++-
 .../Checkers/CStringChecker.cpp               |  94 ++++++++++-
 .../Analysis/Inputs/system-header-simulator.h |   2 +
 clang/test/Analysis/cstring-notnullterm.c     | 158 ++++++++++++++++++
 4 files changed, 263 insertions(+), 11 deletions(-)
 create mode 100644 clang/test/Analysis/cstring-notnullterm.c

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 26c5028e04955..3d6e0e7d73756 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -2105,10 +2105,11 @@ unix.cstring.NotNullTerminated (C)
 Check for arguments which are not null-terminated strings;
 applies to the ``strlen``, ``strcpy``, ``strcat``, ``strcmp`` family of 
functions.
 
-Only very fundamental cases are detected where the passed memory block is
-absolutely different from a null-terminated string. This checker does not
-find if a memory buffer is passed where the terminating zero character
-is missing.
+The checker can detect if the passed memory block is not a string object at 
all,
+like address of a label or function. Additionally it can detect simple cases
+when the terminating zero is missing, for example if the data was initialized
+as array without terminating zero, or the terminating zero was overwritten by
+an assignment (with a value that is provably not zero).
 
 .. code-block:: c
 
@@ -2121,6 +2122,17 @@ is missing.
    int l = strlen((char *)&&label); // warn
  }
 
+ int test3() {
+   char buf[4] = {1, 2, 3, 4};
+   return strlen(buf); // warn
+ }
+
+ int test4() {
+   char buf[] = "abcd";
+   buf[4] = 'e';
+   return strlen(buf); // warn
+ }
+
 .. _unix-cstring-NullArg:
 
 unix.cstring.NullArg (C)
diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 31cb150892a5d..4f14853931c54 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -78,12 +78,9 @@ static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) 
{
                                                     : Ctx.WideCharTy);
 }
 
-class CStringChecker : public Checker< eval::Call,
-                                         check::PreStmt<DeclStmt>,
-                                         check::LiveSymbols,
-                                         check::DeadSymbols,
-                                         check::RegionChanges
-                                         > {
+class CStringChecker
+    : public Checker<eval::Call, check::PreStmt<DeclStmt>, check::LiveSymbols,
+                     check::DeadSymbols, check::RegionChanges> {
   mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
       BT_NotCString, BT_AdditionOverflow, BT_UninitRead;
 
@@ -324,11 +321,14 @@ class CStringChecker : public Checker< eval::Call,
                                SizeArgExpr Size, AnyArgExpr First,
                                AnyArgExpr Second,
                                CharKind CK = CharKind::Regular) const;
+  // Check for presence of terminating zero in a string argument.
+  ProgramStateRef checkNullTerminated(CheckerContext &C, ProgramStateRef State,
+                                      AnyArgExpr Arg, SVal ArgVal) const;
+
   void emitOverlapBug(CheckerContext &C,
                       ProgramStateRef state,
                       const Stmt *First,
                       const Stmt *Second) const;
-
   void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S,
                       StringRef WarningMsg) const;
   void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State,
@@ -959,6 +959,66 @@ ProgramStateRef 
CStringChecker::checkAdditionOverflow(CheckerContext &C,
   return state;
 }
 
+ProgramStateRef CStringChecker::checkNullTerminated(CheckerContext &C,
+                                                    ProgramStateRef State,
+                                                    AnyArgExpr Arg,
+                                                    SVal ArgVal) const {
+  if (!State)
+    return nullptr;
+
+  if (!Filter.CheckCStringNotNullTerm)
+    return State;
+
+  SValBuilder &SVB = C.getSValBuilder();
+
+  auto TryGetTypedValueR = [](const MemRegion *R) -> const TypedValueRegion * {
+    if (!R)
+      return nullptr;
+    return R->StripCasts()->getAs<TypedValueRegion>();
+  };
+
+  const TypedValueRegion *StrReg = TryGetTypedValueR(ArgVal.getAsRegion());
+  if (!StrReg)
+    return State;
+  int Offset = 0;
+  if (const auto *ElemReg = StrReg->getAs<ElementRegion>()) {
+    RegionRawOffset ROffset = ElemReg->getAsArrayOffset();
+    StrReg = TryGetTypedValueR(ROffset.getRegion());
+    if (!StrReg)
+      return State;
+    Offset = ROffset.getOffset().getQuantity();
+  }
+
+  DefinedOrUnknownSVal Extent = getDynamicExtent(State, StrReg, SVB);
+  if (Extent.isUnknown())
+    return State;
+  const llvm::APSInt *KnownExtent = SVB.getKnownValue(State, Extent);
+  if (!KnownExtent)
+    return State;
+  MemRegionManager &RegionM = SVB.getRegionManager();
+  int RegionExtent = KnownExtent->getExtValue();
+  if (Offset >= RegionExtent)
+    return State;
+  for (int I = Offset; I < RegionExtent; ++I) {
+    const ElementRegion *ElemR = 
RegionM.getElementRegion(C.getASTContext().CharTy, SVB.makeArrayIndex(I), 
StrReg, C.getASTContext());
+    SVal ElemVal = State->getSValAsScalarOrLoc(ElemR);
+    if (!State->isNonNull(ElemVal).isConstrainedTrue())
+      // We have here a lower bound for the string length.
+      // Try to update the CStringLength value?
+      return State;
+  }
+
+  SmallString<80> Buf;
+  llvm::raw_svector_ostream OS(Buf);
+  assert(CurrentFunctionDescription);
+  OS << "Terminating zero missing from string passed as "
+     << (Arg.ArgumentIndex + 1) << llvm::getOrdinalSuffix(Arg.ArgumentIndex + 
1)
+     << " argument to " << CurrentFunctionDescription;
+
+  emitNotCStringBug(C, State, Arg.Expression, OS.str());
+  return nullptr;
+}
+
 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
                                                 const MemRegion *MR,
                                                 SVal strLength) {
@@ -1718,6 +1778,9 @@ void CStringChecker::evalstrLengthCommon(CheckerContext 
&C,
   SVal ArgVal = state->getSVal(Arg.Expression, LCtx);
   state = checkNonNull(C, state, Arg, ArgVal);
 
+  if (!IsStrnlen)
+    state = checkNullTerminated(C, state, Arg, ArgVal);
+
   if (!state)
     return;
 
@@ -1882,6 +1945,9 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, 
const CallEvent &Call,
   DestinationArgExpr Dst = {{Call.getArgExpr(0), 0}};
   SVal DstVal = state->getSVal(Dst.Expression, LCtx);
   state = checkNonNull(C, state, Dst, DstVal);
+  // Look for terminating zero.
+  if (!IsBounded || appendK != ConcatFnKind::none)
+    state = checkNullTerminated(C, state, Dst, DstVal);
   if (!state)
     return;
 
@@ -1889,6 +1955,9 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, 
const CallEvent &Call,
   SourceArgExpr srcExpr = {{Call.getArgExpr(1), 1}};
   SVal srcVal = state->getSVal(srcExpr.Expression, LCtx);
   state = checkNonNull(C, state, srcExpr, srcVal);
+  // Look for terminating zero.
+  if (!IsBounded)
+    state = checkNullTerminated(C, state, srcExpr, srcVal);
   if (!state)
     return;
 
@@ -2340,6 +2409,9 @@ void CStringChecker::evalStrcmpCommon(CheckerContext &C, 
const CallEvent &Call,
   AnyArgExpr Left = {Call.getArgExpr(0), 0};
   SVal LeftVal = state->getSVal(Left.Expression, LCtx);
   state = checkNonNull(C, state, Left, LeftVal);
+  // Look for terminating zero.
+  if (!IsBounded)
+    state = checkNullTerminated(C, state, Left, LeftVal);
   if (!state)
     return;
 
@@ -2347,6 +2419,9 @@ void CStringChecker::evalStrcmpCommon(CheckerContext &C, 
const CallEvent &Call,
   AnyArgExpr Right = {Call.getArgExpr(1), 1};
   SVal RightVal = state->getSVal(Right.Expression, LCtx);
   state = checkNonNull(C, state, Right, RightVal);
+  // Look for terminating zero.
+  if (!IsBounded)
+    state = checkNullTerminated(C, state, Right, RightVal);
   if (!state)
     return;
 
@@ -2478,6 +2553,8 @@ void CStringChecker::evalStrsep(CheckerContext &C,
   // a null string).
   SVal SearchStrVal = State->getSVal(SearchStrPtr.Expression, LCtx);
   State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
+  // Look for terminating zero.
+  State = checkNullTerminated(C, State, SearchStrPtr, SearchStrVal);
   if (!State)
     return;
 
@@ -2485,6 +2562,8 @@ void CStringChecker::evalStrsep(CheckerContext &C,
   AnyArgExpr DelimStr = {Call.getArgExpr(1), 1};
   SVal DelimStrVal = State->getSVal(DelimStr.Expression, LCtx);
   State = checkNonNull(C, State, DelimStr, DelimStrVal);
+  // Look for terminating zero.
+  State = checkNullTerminated(C, State, DelimStr, DelimStrVal);
   if (!State)
     return;
 
@@ -2675,6 +2754,7 @@ void CStringChecker::evalSprintfCommon(CheckerContext &C, 
const CallEvent &Call,
     // This is an invalid call, let's just ignore it.
     return;
   }
+  // FIXME: Why not check for null pointer (and null-terminated string)?
 
   const auto AllArguments =
       llvm::make_range(CE->getArgs(), CE->getArgs() + CE->getNumArgs());
diff --git a/clang/test/Analysis/Inputs/system-header-simulator.h 
b/clang/test/Analysis/Inputs/system-header-simulator.h
index fadc09f65d536..3a0cf75cfb9ff 100644
--- a/clang/test/Analysis/Inputs/system-header-simulator.h
+++ b/clang/test/Analysis/Inputs/system-header-simulator.h
@@ -80,6 +80,8 @@ size_t strlen(const char *);
 
 char *strcpy(char *restrict, const char *restrict);
 char *strncpy(char *restrict dst, const char *restrict src, size_t n);
+char *strcat(char *restrict, const char *restrict);
+char *strncat(char *restrict, const char *restrict, size_t);
 char *strsep(char **restrict stringp, const char *restrict delim);
 void *memcpy(void *restrict dst, const void *restrict src, size_t n);
 void *memset(void *s, int c, size_t n);
diff --git a/clang/test/Analysis/cstring-notnullterm.c 
b/clang/test/Analysis/cstring-notnullterm.c
new file mode 100644
index 0000000000000..8d586444fc835
--- /dev/null
+++ b/clang/test/Analysis/cstring-notnullterm.c
@@ -0,0 +1,158 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=unix.cstring.NotNullTerminated 
-verify %s
+
+#include "Inputs/system-header-simulator.h"
+
+extern void *malloc(size_t);
+
+size_t test_addr_fn() {
+  return strlen((char *)&malloc); // expected-warning{{Argument to string 
length function is the address of the function 'malloc', which is not a 
null-terminated string}}
+}
+
+size_t test_addr_label() {
+lab:
+  return strlen((char *)&&lab); // expected-warning{{Argument to string length 
function is the address of the label 'lab', which is not a null-terminated 
string}}
+}
+
+size_t test_init_compound(int i) {
+  char src1[6] = {1,2,3,4,5,6};
+  char src2[6] = {1,2,3,0,5,6};
+  switch (i) {
+  case 1:
+    return strlen(src1); // expected-warning{{Terminating zero missing from 
string passed as 1st argument to string length function}}
+  case 2:
+    return strlen(src1 + 1); // expected-warning{{Terminating zero missing 
from string}}
+  case 3:
+    return strlen(src2);
+  case 4:
+    return strlen(src2 + 4); // expected-warning{{Terminating zero missing 
from string}}
+  case 5:
+    return strlen(src2 + 3);
+  }
+  src1[5] = 0;
+  return strlen(src1);
+}
+
+size_t test_init_literal(int i) {
+  char src1[] = "abcdef";
+  int l = strlen(src1);
+  src1[6] = '.';
+  src1[3] = 0;
+  switch (i) {
+  case 1:
+    return strlen(src1);
+  case 2:
+    return strlen(src1 + 4); // expected-warning{{Terminating zero missing 
from string}}
+  }
+  return l;
+}
+
+size_t test_init_assign(int i, char a) {
+  char src[6];
+  src[1] = '1';
+  src[2] = '2';
+  src[4] = '4';
+  src[5] = '5';
+
+  switch (i) {
+  case 0:
+    return strlen(src);
+  case 1:
+    return strlen(src + 1);
+  case 2:
+    return strlen(src + 2);
+  case 3:
+    return strlen(src + 3);
+  case 4:
+    return strlen(src + 4); // expected-warning{{Terminating zero missing from 
string}}
+  }
+  src[5] = a;
+  size_t l = strlen(src + 4);
+  src[5] = 0;
+  l += strlen(src + 4);
+  src[5] = '5';
+  return l + strlen(src + 4); // expected-warning{{Terminating zero missing 
from string}}
+}
+
+size_t test_assign1() {
+  char str1[5] = {'0','1','2','3','4'};
+  char str2[5];
+  str2[0] = str1[0];
+  str2[1] = str1[1];
+  str2[4] = str1[4];
+  size_t l = strlen(str2);
+  return l + strlen(str2 + 4); // expected-warning{{Terminating zero missing 
from string}}
+}
+
+size_t test_assign2() {
+  char str1[5] = {1,2,3,4,5};
+  char str2[5];
+  str2[0] = str1[0];
+  str2[4] = str2[0];
+  return strlen(str2 + 4); // expected-warning{{Terminating zero missing from 
string}}
+}
+
+void test_ignore(char *dst) {
+  char str1[5] = {1,2,3,4,5};
+  strncpy(dst, str1, 5);
+  strcpy(dst, str1); // expected-warning{{Terminating zero missing from 
string}}
+}
+
+size_t test_malloc() {
+  char *buf = (char *)malloc(4);
+  if (!buf)
+    return 0;
+  buf[3] = 'a';
+  return strlen(buf);
+}
+
+extern void f_ext(char *);
+char *g_buf = 0;
+
+size_t test_escape1() {
+  char buf[4] = {1,2,3,4};
+  f_ext(buf);
+  return strlen(buf);
+}
+
+size_t test_escape2(char *x) {
+  char buf[4] = {1,2,3,4};
+  g_buf = buf;
+  f_ext(x);
+  return strlen(buf);
+}
+
+size_t test_escape3() {
+  char buf[4] = {1,2,3,4};
+  f_ext(buf + 3);
+  return strlen(buf);
+}
+
+void test_str_fn(int i, char *dst) {
+  char buf[] = {1, 2, 3};
+  switch (i) {
+  case 1:
+    strcpy(buf, "aa"); // expected-warning{{Terminating zero missing from 
string}}
+    break;
+  case 2:
+    strcpy(dst, buf); // expected-warning{{Terminating zero missing from 
string}}
+    break;
+  case 3:
+    strncpy(buf, "aa", 3);
+    break;
+  case 4:
+    strncpy(dst, buf, 3);
+    break;
+  case 5:
+    strcat(buf, "aa"); // expected-warning{{Terminating zero missing from 
string}}
+    break;
+  case 6:
+    strcat(dst, buf); // expected-warning{{Terminating zero missing from 
string}}
+    break;
+  case 7:
+    strncat(buf, "aa", 3); // expected-warning{{Terminating zero missing from 
string}}
+    break;
+  case 8:
+    strncat(dst, buf, 3);
+    break;
+  }
+}

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to