https://github.com/balazske created https://github.com/llvm/llvm-project/pull/149106
Check for non-presence of terminating zero in simple cases. From ba535f818b36e5ab758b4148e46816e8f3ee6550 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20K=C3=A9ri?= <balazs.k...@ericsson.com> Date: Wed, 2 Jul 2025 11:09:58 +0200 Subject: [PATCH] [clang][analyzer] Improve checker 'unix.cstring.NotNullTerminated' Check for non-presence of terminating zero in simple cases. --- clang/docs/analyzer/checkers.rst | 20 ++- .../Checkers/CStringChecker.cpp | 94 ++++++++++- .../Analysis/Inputs/system-header-simulator.h | 2 + clang/test/Analysis/cstring-notnullterm.c | 158 ++++++++++++++++++ 4 files changed, 263 insertions(+), 11 deletions(-) create mode 100644 clang/test/Analysis/cstring-notnullterm.c diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index 26c5028e04955..3d6e0e7d73756 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -2105,10 +2105,11 @@ unix.cstring.NotNullTerminated (C) Check for arguments which are not null-terminated strings; applies to the ``strlen``, ``strcpy``, ``strcat``, ``strcmp`` family of functions. -Only very fundamental cases are detected where the passed memory block is -absolutely different from a null-terminated string. This checker does not -find if a memory buffer is passed where the terminating zero character -is missing. +The checker can detect if the passed memory block is not a string object at all, +like address of a label or function. Additionally it can detect simple cases +when the terminating zero is missing, for example if the data was initialized +as array without terminating zero, or the terminating zero was overwritten by +an assignment (with a value that is provably not zero). .. code-block:: c @@ -2121,6 +2122,17 @@ is missing. int l = strlen((char *)&&label); // warn } + int test3() { + char buf[4] = {1, 2, 3, 4}; + return strlen(buf); // warn + } + + int test4() { + char buf[] = "abcd"; + buf[4] = 'e'; + return strlen(buf); // warn + } + .. _unix-cstring-NullArg: unix.cstring.NullArg (C) diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index 31cb150892a5d..4f14853931c54 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -78,12 +78,9 @@ static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) { : Ctx.WideCharTy); } -class CStringChecker : public Checker< eval::Call, - check::PreStmt<DeclStmt>, - check::LiveSymbols, - check::DeadSymbols, - check::RegionChanges - > { +class CStringChecker + : public Checker<eval::Call, check::PreStmt<DeclStmt>, check::LiveSymbols, + check::DeadSymbols, check::RegionChanges> { mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap, BT_NotCString, BT_AdditionOverflow, BT_UninitRead; @@ -324,11 +321,14 @@ class CStringChecker : public Checker< eval::Call, SizeArgExpr Size, AnyArgExpr First, AnyArgExpr Second, CharKind CK = CharKind::Regular) const; + // Check for presence of terminating zero in a string argument. + ProgramStateRef checkNullTerminated(CheckerContext &C, ProgramStateRef State, + AnyArgExpr Arg, SVal ArgVal) const; + void emitOverlapBug(CheckerContext &C, ProgramStateRef state, const Stmt *First, const Stmt *Second) const; - void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S, StringRef WarningMsg) const; void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State, @@ -959,6 +959,66 @@ ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C, return state; } +ProgramStateRef CStringChecker::checkNullTerminated(CheckerContext &C, + ProgramStateRef State, + AnyArgExpr Arg, + SVal ArgVal) const { + if (!State) + return nullptr; + + if (!Filter.CheckCStringNotNullTerm) + return State; + + SValBuilder &SVB = C.getSValBuilder(); + + auto TryGetTypedValueR = [](const MemRegion *R) -> const TypedValueRegion * { + if (!R) + return nullptr; + return R->StripCasts()->getAs<TypedValueRegion>(); + }; + + const TypedValueRegion *StrReg = TryGetTypedValueR(ArgVal.getAsRegion()); + if (!StrReg) + return State; + int Offset = 0; + if (const auto *ElemReg = StrReg->getAs<ElementRegion>()) { + RegionRawOffset ROffset = ElemReg->getAsArrayOffset(); + StrReg = TryGetTypedValueR(ROffset.getRegion()); + if (!StrReg) + return State; + Offset = ROffset.getOffset().getQuantity(); + } + + DefinedOrUnknownSVal Extent = getDynamicExtent(State, StrReg, SVB); + if (Extent.isUnknown()) + return State; + const llvm::APSInt *KnownExtent = SVB.getKnownValue(State, Extent); + if (!KnownExtent) + return State; + MemRegionManager &RegionM = SVB.getRegionManager(); + int RegionExtent = KnownExtent->getExtValue(); + if (Offset >= RegionExtent) + return State; + for (int I = Offset; I < RegionExtent; ++I) { + const ElementRegion *ElemR = RegionM.getElementRegion(C.getASTContext().CharTy, SVB.makeArrayIndex(I), StrReg, C.getASTContext()); + SVal ElemVal = State->getSValAsScalarOrLoc(ElemR); + if (!State->isNonNull(ElemVal).isConstrainedTrue()) + // We have here a lower bound for the string length. + // Try to update the CStringLength value? + return State; + } + + SmallString<80> Buf; + llvm::raw_svector_ostream OS(Buf); + assert(CurrentFunctionDescription); + OS << "Terminating zero missing from string passed as " + << (Arg.ArgumentIndex + 1) << llvm::getOrdinalSuffix(Arg.ArgumentIndex + 1) + << " argument to " << CurrentFunctionDescription; + + emitNotCStringBug(C, State, Arg.Expression, OS.str()); + return nullptr; +} + ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state, const MemRegion *MR, SVal strLength) { @@ -1718,6 +1778,9 @@ void CStringChecker::evalstrLengthCommon(CheckerContext &C, SVal ArgVal = state->getSVal(Arg.Expression, LCtx); state = checkNonNull(C, state, Arg, ArgVal); + if (!IsStrnlen) + state = checkNullTerminated(C, state, Arg, ArgVal); + if (!state) return; @@ -1882,6 +1945,9 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call, DestinationArgExpr Dst = {{Call.getArgExpr(0), 0}}; SVal DstVal = state->getSVal(Dst.Expression, LCtx); state = checkNonNull(C, state, Dst, DstVal); + // Look for terminating zero. + if (!IsBounded || appendK != ConcatFnKind::none) + state = checkNullTerminated(C, state, Dst, DstVal); if (!state) return; @@ -1889,6 +1955,9 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call, SourceArgExpr srcExpr = {{Call.getArgExpr(1), 1}}; SVal srcVal = state->getSVal(srcExpr.Expression, LCtx); state = checkNonNull(C, state, srcExpr, srcVal); + // Look for terminating zero. + if (!IsBounded) + state = checkNullTerminated(C, state, srcExpr, srcVal); if (!state) return; @@ -2340,6 +2409,9 @@ void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallEvent &Call, AnyArgExpr Left = {Call.getArgExpr(0), 0}; SVal LeftVal = state->getSVal(Left.Expression, LCtx); state = checkNonNull(C, state, Left, LeftVal); + // Look for terminating zero. + if (!IsBounded) + state = checkNullTerminated(C, state, Left, LeftVal); if (!state) return; @@ -2347,6 +2419,9 @@ void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallEvent &Call, AnyArgExpr Right = {Call.getArgExpr(1), 1}; SVal RightVal = state->getSVal(Right.Expression, LCtx); state = checkNonNull(C, state, Right, RightVal); + // Look for terminating zero. + if (!IsBounded) + state = checkNullTerminated(C, state, Right, RightVal); if (!state) return; @@ -2478,6 +2553,8 @@ void CStringChecker::evalStrsep(CheckerContext &C, // a null string). SVal SearchStrVal = State->getSVal(SearchStrPtr.Expression, LCtx); State = checkNonNull(C, State, SearchStrPtr, SearchStrVal); + // Look for terminating zero. + State = checkNullTerminated(C, State, SearchStrPtr, SearchStrVal); if (!State) return; @@ -2485,6 +2562,8 @@ void CStringChecker::evalStrsep(CheckerContext &C, AnyArgExpr DelimStr = {Call.getArgExpr(1), 1}; SVal DelimStrVal = State->getSVal(DelimStr.Expression, LCtx); State = checkNonNull(C, State, DelimStr, DelimStrVal); + // Look for terminating zero. + State = checkNullTerminated(C, State, DelimStr, DelimStrVal); if (!State) return; @@ -2675,6 +2754,7 @@ void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallEvent &Call, // This is an invalid call, let's just ignore it. return; } + // FIXME: Why not check for null pointer (and null-terminated string)? const auto AllArguments = llvm::make_range(CE->getArgs(), CE->getArgs() + CE->getNumArgs()); diff --git a/clang/test/Analysis/Inputs/system-header-simulator.h b/clang/test/Analysis/Inputs/system-header-simulator.h index fadc09f65d536..3a0cf75cfb9ff 100644 --- a/clang/test/Analysis/Inputs/system-header-simulator.h +++ b/clang/test/Analysis/Inputs/system-header-simulator.h @@ -80,6 +80,8 @@ size_t strlen(const char *); char *strcpy(char *restrict, const char *restrict); char *strncpy(char *restrict dst, const char *restrict src, size_t n); +char *strcat(char *restrict, const char *restrict); +char *strncat(char *restrict, const char *restrict, size_t); char *strsep(char **restrict stringp, const char *restrict delim); void *memcpy(void *restrict dst, const void *restrict src, size_t n); void *memset(void *s, int c, size_t n); diff --git a/clang/test/Analysis/cstring-notnullterm.c b/clang/test/Analysis/cstring-notnullterm.c new file mode 100644 index 0000000000000..8d586444fc835 --- /dev/null +++ b/clang/test/Analysis/cstring-notnullterm.c @@ -0,0 +1,158 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=unix.cstring.NotNullTerminated -verify %s + +#include "Inputs/system-header-simulator.h" + +extern void *malloc(size_t); + +size_t test_addr_fn() { + return strlen((char *)&malloc); // expected-warning{{Argument to string length function is the address of the function 'malloc', which is not a null-terminated string}} +} + +size_t test_addr_label() { +lab: + return strlen((char *)&&lab); // expected-warning{{Argument to string length function is the address of the label 'lab', which is not a null-terminated string}} +} + +size_t test_init_compound(int i) { + char src1[6] = {1,2,3,4,5,6}; + char src2[6] = {1,2,3,0,5,6}; + switch (i) { + case 1: + return strlen(src1); // expected-warning{{Terminating zero missing from string passed as 1st argument to string length function}} + case 2: + return strlen(src1 + 1); // expected-warning{{Terminating zero missing from string}} + case 3: + return strlen(src2); + case 4: + return strlen(src2 + 4); // expected-warning{{Terminating zero missing from string}} + case 5: + return strlen(src2 + 3); + } + src1[5] = 0; + return strlen(src1); +} + +size_t test_init_literal(int i) { + char src1[] = "abcdef"; + int l = strlen(src1); + src1[6] = '.'; + src1[3] = 0; + switch (i) { + case 1: + return strlen(src1); + case 2: + return strlen(src1 + 4); // expected-warning{{Terminating zero missing from string}} + } + return l; +} + +size_t test_init_assign(int i, char a) { + char src[6]; + src[1] = '1'; + src[2] = '2'; + src[4] = '4'; + src[5] = '5'; + + switch (i) { + case 0: + return strlen(src); + case 1: + return strlen(src + 1); + case 2: + return strlen(src + 2); + case 3: + return strlen(src + 3); + case 4: + return strlen(src + 4); // expected-warning{{Terminating zero missing from string}} + } + src[5] = a; + size_t l = strlen(src + 4); + src[5] = 0; + l += strlen(src + 4); + src[5] = '5'; + return l + strlen(src + 4); // expected-warning{{Terminating zero missing from string}} +} + +size_t test_assign1() { + char str1[5] = {'0','1','2','3','4'}; + char str2[5]; + str2[0] = str1[0]; + str2[1] = str1[1]; + str2[4] = str1[4]; + size_t l = strlen(str2); + return l + strlen(str2 + 4); // expected-warning{{Terminating zero missing from string}} +} + +size_t test_assign2() { + char str1[5] = {1,2,3,4,5}; + char str2[5]; + str2[0] = str1[0]; + str2[4] = str2[0]; + return strlen(str2 + 4); // expected-warning{{Terminating zero missing from string}} +} + +void test_ignore(char *dst) { + char str1[5] = {1,2,3,4,5}; + strncpy(dst, str1, 5); + strcpy(dst, str1); // expected-warning{{Terminating zero missing from string}} +} + +size_t test_malloc() { + char *buf = (char *)malloc(4); + if (!buf) + return 0; + buf[3] = 'a'; + return strlen(buf); +} + +extern void f_ext(char *); +char *g_buf = 0; + +size_t test_escape1() { + char buf[4] = {1,2,3,4}; + f_ext(buf); + return strlen(buf); +} + +size_t test_escape2(char *x) { + char buf[4] = {1,2,3,4}; + g_buf = buf; + f_ext(x); + return strlen(buf); +} + +size_t test_escape3() { + char buf[4] = {1,2,3,4}; + f_ext(buf + 3); + return strlen(buf); +} + +void test_str_fn(int i, char *dst) { + char buf[] = {1, 2, 3}; + switch (i) { + case 1: + strcpy(buf, "aa"); // expected-warning{{Terminating zero missing from string}} + break; + case 2: + strcpy(dst, buf); // expected-warning{{Terminating zero missing from string}} + break; + case 3: + strncpy(buf, "aa", 3); + break; + case 4: + strncpy(dst, buf, 3); + break; + case 5: + strcat(buf, "aa"); // expected-warning{{Terminating zero missing from string}} + break; + case 6: + strcat(dst, buf); // expected-warning{{Terminating zero missing from string}} + break; + case 7: + strncat(buf, "aa", 3); // expected-warning{{Terminating zero missing from string}} + break; + case 8: + strncat(dst, buf, 3); + break; + } +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits