https://github.com/BgZun updated https://github.com/llvm/llvm-project/pull/170637
>From 254f9912b84a64d33b9bbf967ab0d50cfefd142b Mon Sep 17 00:00:00 2001 From: Bogdan Zunic <[email protected]> Date: Thu, 4 Dec 2025 02:00:59 -0800 Subject: [PATCH 1/2] [Clang] Added clang diagnostic when snprintf/vsnprintf uses sizeof(dest) for the len parameter --- clang/docs/ReleaseNotes.rst | 3 + .../clang/Basic/DiagnosticSemaKinds.td | 4 + clang/lib/Sema/SemaChecking.cpp | 73 +++++++++++-------- clang/test/SemaCXX/warn-memset-bad-sizeof.cpp | 9 +++ 4 files changed, 58 insertions(+), 31 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 51f07256c5d9f..ee15880ce92ab 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -444,6 +444,9 @@ Improvements to Clang's diagnostics comparison operators when mixed with bitwise operators in enum value initializers. This can be locally disabled by explicitly casting the initializer value. +- Clang now emits ``-Wsizeof-pointer-memaccess`` when snprintf/vsnprintf use the sizeof + the destination buffer(dynamically allocated) in the len parameter(#GH162366) + Improvements to Clang's time-trace ---------------------------------- diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 53aa86a7dabde..a496aee8e0c9e 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -895,6 +895,10 @@ def warn_sizeof_pointer_type_memaccess : Warning< "argument to 'sizeof' in %0 call is the same pointer type %1 as the " "%select{destination|source}2; expected %3 or an explicit length">, InGroup<SizeofPointerMemaccess>; +def warn_sizeof_pointer_dest_type_memacess : Warning< + "argument to 'sizeof' in %0 call is the same expression as the " + "destination; did you mean to put an explicit length?">, + InGroup<SizeofPointerMemaccess>; def warn_strlcpycat_wrong_size : Warning< "size argument in %0 call appears to be size of the source; " "expected the size of the destination">, diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index f4e58de91286b..00e0fc8521fa6 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1139,6 +1139,37 @@ static bool ProcessFormatStringLiteral(const Expr *FormatExpr, return false; } +static const UnaryExprOrTypeTraitExpr *getAsSizeOfExpr(const Expr *E) { + if (const auto *Unary = dyn_cast<UnaryExprOrTypeTraitExpr>(E)) + if (Unary->getKind() == UETT_SizeOf) + return Unary; + return nullptr; +} + +/// If E is a sizeof expression, returns its argument expression, +/// otherwise returns NULL. +static const Expr *getSizeOfExprArg(const Expr *E) { + if (const UnaryExprOrTypeTraitExpr *SizeOf = getAsSizeOfExpr(E)) + if (!SizeOf->isArgumentType()) + return SizeOf->getArgumentExpr()->IgnoreParenImpCasts(); + return nullptr; +} + +/// If E is a sizeof expression, returns its argument type. +static QualType getSizeOfArgType(const Expr *E) { + if (const UnaryExprOrTypeTraitExpr *SizeOf = getAsSizeOfExpr(E)) + return SizeOf->getTypeOfArgument(); + return QualType(); +} + +/// Check if two expressions refer to the same declaration. +static bool referToTheSameDecl(const Expr *E1, const Expr *E2) { + if (const DeclRefExpr *D1 = dyn_cast_or_null<DeclRefExpr>(E1)) + if (const DeclRefExpr *D2 = dyn_cast_or_null<DeclRefExpr>(E2)) + return D1->getDecl() == D2->getDecl(); + return false; +} + void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, CallExpr *TheCall) { if (TheCall->isValueDependent() || TheCall->isTypeDependent() || @@ -1449,6 +1480,17 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, } } DestinationSize = ComputeSizeArgument(0); + const Expr *SizeOfArg = TheCall->getArg(1)->IgnoreParenImpCasts(); + const Expr *Dest = TheCall->getArg(0)->IgnoreParenImpCasts(); + const Expr *SizeOfArgExpr = getSizeOfExprArg(SizeOfArg); + const QualType SizeOfArgType = getSizeOfArgType(SizeOfArg); + const Type *ExprType = SizeOfArgType.getTypePtrOrNull(); + if (ExprType && ExprType->isPointerType() && + referToTheSameDecl(SizeOfArgExpr, Dest)) { + DiagRuntimeBehavior(SizeOfArg->getExprLoc(), Dest, + PDiag(diag::warn_sizeof_pointer_dest_type_memacess) + << FD->getNameInfo().getName()); + } } } @@ -9979,29 +10021,6 @@ static const CXXRecordDecl *getContainedDynamicClass(QualType T, return nullptr; } -static const UnaryExprOrTypeTraitExpr *getAsSizeOfExpr(const Expr *E) { - if (const auto *Unary = dyn_cast<UnaryExprOrTypeTraitExpr>(E)) - if (Unary->getKind() == UETT_SizeOf) - return Unary; - return nullptr; -} - -/// If E is a sizeof expression, returns its argument expression, -/// otherwise returns NULL. -static const Expr *getSizeOfExprArg(const Expr *E) { - if (const UnaryExprOrTypeTraitExpr *SizeOf = getAsSizeOfExpr(E)) - if (!SizeOf->isArgumentType()) - return SizeOf->getArgumentExpr()->IgnoreParenImpCasts(); - return nullptr; -} - -/// If E is a sizeof expression, returns its argument type. -static QualType getSizeOfArgType(const Expr *E) { - if (const UnaryExprOrTypeTraitExpr *SizeOf = getAsSizeOfExpr(E)) - return SizeOf->getTypeOfArgument(); - return QualType(); -} - namespace { struct SearchNonTrivialToInitializeField @@ -10499,14 +10518,6 @@ void Sema::CheckStrlcpycatArguments(const CallExpr *Call, OS.str()); } -/// Check if two expressions refer to the same declaration. -static bool referToTheSameDecl(const Expr *E1, const Expr *E2) { - if (const DeclRefExpr *D1 = dyn_cast_or_null<DeclRefExpr>(E1)) - if (const DeclRefExpr *D2 = dyn_cast_or_null<DeclRefExpr>(E2)) - return D1->getDecl() == D2->getDecl(); - return false; -} - static const Expr *getStrlenExprArg(const Expr *E) { if (const CallExpr *CE = dyn_cast<CallExpr>(E)) { const FunctionDecl *FD = CE->getDirectCallee(); diff --git a/clang/test/SemaCXX/warn-memset-bad-sizeof.cpp b/clang/test/SemaCXX/warn-memset-bad-sizeof.cpp index 0a78caa924ea9..bacb2c167af14 100644 --- a/clang/test/SemaCXX/warn-memset-bad-sizeof.cpp +++ b/clang/test/SemaCXX/warn-memset-bad-sizeof.cpp @@ -188,3 +188,12 @@ void strcpy_and_friends() { strndup(FOO, sizeof(FOO)); // \ // expected-warning {{'strndup' call operates on objects of type 'const char' while the size is based on a different type 'const char *'}} expected-note{{did you mean to provide an explicit length?}} } + +extern "C" int snprintf(char* buffer, __SIZE_TYPE__ buf_size, const char* format, ...); +extern "C" void* malloc(unsigned size); + +void check_prints(){ + char* a = (char*) malloc(20); + const char* b = "Hello World"; + snprintf(a, sizeof(a), "%s", b); // expected-warning{{argument to 'sizeof' in 'snprintf' call is the same expression as the destination; did you mean to put an explicit length?}} +} >From 8340bb3e090d3cfe1800635bb69f5a064d4e48a2 Mon Sep 17 00:00:00 2001 From: Bogdan Zunic <[email protected]> Date: Tue, 9 Dec 2025 02:24:26 -0800 Subject: [PATCH 2/2] Moved diagnostic logic to a separate function --- .../clang/Basic/DiagnosticSemaKinds.td | 4 - clang/include/clang/Sema/Sema.h | 3 + clang/lib/Sema/SemaChecking.cpp | 197 +++++++++--------- clang/test/SemaCXX/warn-memset-bad-sizeof.cpp | 2 +- 4 files changed, 106 insertions(+), 100 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index a496aee8e0c9e..53aa86a7dabde 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -895,10 +895,6 @@ def warn_sizeof_pointer_type_memaccess : Warning< "argument to 'sizeof' in %0 call is the same pointer type %1 as the " "%select{destination|source}2; expected %3 or an explicit length">, InGroup<SizeofPointerMemaccess>; -def warn_sizeof_pointer_dest_type_memacess : Warning< - "argument to 'sizeof' in %0 call is the same expression as the " - "destination; did you mean to put an explicit length?">, - InGroup<SizeofPointerMemaccess>; def warn_strlcpycat_wrong_size : Warning< "size argument in %0 call appears to be size of the source; " "expected the size of the destination">, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index ae500139ee6f7..69c921c8d2d35 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -3043,6 +3043,9 @@ class Sema final : public SemaBase { void CheckMemaccessArguments(const CallExpr *Call, unsigned BId, IdentifierInfo *FnName); + bool CheckSizeOfExpression(const Expr *SizeOfArg, const Expr *Dest, + llvm::FoldingSetNodeID SizeOfArgID, + IdentifierInfo *FnName); // Warn if the user has made the 'size' argument to strlcpy or strlcat // be the size of the source, instead of the destination. void CheckStrlcpycatArguments(const CallExpr *Call, IdentifierInfo *FnName); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 00e0fc8521fa6..51d9cf75cc424 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1139,37 +1139,6 @@ static bool ProcessFormatStringLiteral(const Expr *FormatExpr, return false; } -static const UnaryExprOrTypeTraitExpr *getAsSizeOfExpr(const Expr *E) { - if (const auto *Unary = dyn_cast<UnaryExprOrTypeTraitExpr>(E)) - if (Unary->getKind() == UETT_SizeOf) - return Unary; - return nullptr; -} - -/// If E is a sizeof expression, returns its argument expression, -/// otherwise returns NULL. -static const Expr *getSizeOfExprArg(const Expr *E) { - if (const UnaryExprOrTypeTraitExpr *SizeOf = getAsSizeOfExpr(E)) - if (!SizeOf->isArgumentType()) - return SizeOf->getArgumentExpr()->IgnoreParenImpCasts(); - return nullptr; -} - -/// If E is a sizeof expression, returns its argument type. -static QualType getSizeOfArgType(const Expr *E) { - if (const UnaryExprOrTypeTraitExpr *SizeOf = getAsSizeOfExpr(E)) - return SizeOf->getTypeOfArgument(); - return QualType(); -} - -/// Check if two expressions refer to the same declaration. -static bool referToTheSameDecl(const Expr *E1, const Expr *E2) { - if (const DeclRefExpr *D1 = dyn_cast_or_null<DeclRefExpr>(E1)) - if (const DeclRefExpr *D2 = dyn_cast_or_null<DeclRefExpr>(E2)) - return D1->getDecl() == D2->getDecl(); - return false; -} - void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, CallExpr *TheCall) { if (TheCall->isValueDependent() || TheCall->isTypeDependent() || @@ -1480,17 +1449,11 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, } } DestinationSize = ComputeSizeArgument(0); - const Expr *SizeOfArg = TheCall->getArg(1)->IgnoreParenImpCasts(); + const Expr *LenArg = TheCall->getArg(1)->IgnoreParenImpCasts(); const Expr *Dest = TheCall->getArg(0)->IgnoreParenImpCasts(); - const Expr *SizeOfArgExpr = getSizeOfExprArg(SizeOfArg); - const QualType SizeOfArgType = getSizeOfArgType(SizeOfArg); - const Type *ExprType = SizeOfArgType.getTypePtrOrNull(); - if (ExprType && ExprType->isPointerType() && - referToTheSameDecl(SizeOfArgExpr, Dest)) { - DiagRuntimeBehavior(SizeOfArg->getExprLoc(), Dest, - PDiag(diag::warn_sizeof_pointer_dest_type_memacess) - << FD->getNameInfo().getName()); - } + llvm::FoldingSetNodeID SizeOfArgID; + IdentifierInfo *FnInfo = FD->getIdentifier(); + CheckSizeOfExpression(LenArg, Dest, SizeOfArgID, FnInfo); } } @@ -10021,6 +9984,29 @@ static const CXXRecordDecl *getContainedDynamicClass(QualType T, return nullptr; } +static const UnaryExprOrTypeTraitExpr *getAsSizeOfExpr(const Expr *E) { + if (const auto *Unary = dyn_cast<UnaryExprOrTypeTraitExpr>(E)) + if (Unary->getKind() == UETT_SizeOf) + return Unary; + return nullptr; +} + +/// If E is a sizeof expression, returns its argument expression, +/// otherwise returns NULL. +static const Expr *getSizeOfExprArg(const Expr *E) { + if (const UnaryExprOrTypeTraitExpr *SizeOf = getAsSizeOfExpr(E)) + if (!SizeOf->isArgumentType()) + return SizeOf->getArgumentExpr()->IgnoreParenImpCasts(); + return nullptr; +} + +/// If E is a sizeof expression, returns its argument type. +static QualType getSizeOfArgType(const Expr *E) { + if (const UnaryExprOrTypeTraitExpr *SizeOf = getAsSizeOfExpr(E)) + return SizeOf->getTypeOfArgument(); + return QualType(); +} + namespace { struct SearchNonTrivialToInitializeField @@ -10257,60 +10243,8 @@ void Sema::CheckMemaccessArguments(const CallExpr *Call, // actually comparing the expressions for equality. Because computing the // expression IDs can be expensive, we only do this if the diagnostic is // enabled. - if (SizeOfArg && - !Diags.isIgnored(diag::warn_sizeof_pointer_expr_memaccess, - SizeOfArg->getExprLoc())) { - // We only compute IDs for expressions if the warning is enabled, and - // cache the sizeof arg's ID. - if (SizeOfArgID == llvm::FoldingSetNodeID()) - SizeOfArg->Profile(SizeOfArgID, Context, true); - llvm::FoldingSetNodeID DestID; - Dest->Profile(DestID, Context, true); - if (DestID == SizeOfArgID) { - // TODO: For strncpy() and friends, this could suggest sizeof(dst) - // over sizeof(src) as well. - unsigned ActionIdx = 0; // Default is to suggest dereferencing. - StringRef ReadableName = FnName->getName(); - - if (const UnaryOperator *UnaryOp = dyn_cast<UnaryOperator>(Dest)) - if (UnaryOp->getOpcode() == UO_AddrOf) - ActionIdx = 1; // If its an address-of operator, just remove it. - if (!PointeeTy->isIncompleteType() && - (Context.getTypeSize(PointeeTy) == Context.getCharWidth())) - ActionIdx = 2; // If the pointee's size is sizeof(char), - // suggest an explicit length. - - // If the function is defined as a builtin macro, do not show macro - // expansion. - SourceLocation SL = SizeOfArg->getExprLoc(); - SourceRange DSR = Dest->getSourceRange(); - SourceRange SSR = SizeOfArg->getSourceRange(); - SourceManager &SM = getSourceManager(); - - if (SM.isMacroArgExpansion(SL)) { - ReadableName = Lexer::getImmediateMacroName(SL, SM, LangOpts); - SL = SM.getSpellingLoc(SL); - DSR = SourceRange(SM.getSpellingLoc(DSR.getBegin()), - SM.getSpellingLoc(DSR.getEnd())); - SSR = SourceRange(SM.getSpellingLoc(SSR.getBegin()), - SM.getSpellingLoc(SSR.getEnd())); - } - - DiagRuntimeBehavior(SL, SizeOfArg, - PDiag(diag::warn_sizeof_pointer_expr_memaccess) - << ReadableName - << PointeeTy - << DestTy - << DSR - << SSR); - DiagRuntimeBehavior(SL, SizeOfArg, - PDiag(diag::warn_sizeof_pointer_expr_memaccess_note) - << ActionIdx - << SSR); - - break; - } - } + if (CheckSizeOfExpression(LenExpr, Dest, SizeOfArgID, FnName)) + break; // Also check for cases where the sizeof argument is the exact same // type as the memory argument, and where it points to a user-defined @@ -10413,6 +10347,71 @@ void Sema::CheckMemaccessArguments(const CallExpr *Call, } } +bool Sema::CheckSizeOfExpression(const Expr *LenExpr, const Expr *Dest, + llvm::FoldingSetNodeID SizeOfArgID, + IdentifierInfo *FnName) { + const Expr *SizeOfArg = getSizeOfExprArg(LenExpr); + QualType DestTy = Dest->getType(); + QualType PointeeTy; + if (const PointerType *DestPtrTy = DestTy->getAs<PointerType>()) { + PointeeTy = DestPtrTy->getPointeeType(); + + // Catch "memset(p, 0, sizeof(p))" -- needs to be sizeof(*p). Do this by + // actually comparing the expressions for equality. Because computing the + // expression IDs can be expensive, we only do this if the diagnostic is + // enabled. + if (SizeOfArg && !Diags.isIgnored(diag::warn_sizeof_pointer_expr_memaccess, + SizeOfArg->getExprLoc())) { + // We only compute IDs for expressions if the warning is enabled, and + // cache the sizeof arg's ID. + if (SizeOfArgID == llvm::FoldingSetNodeID()) + SizeOfArg->Profile(SizeOfArgID, Context, true); + llvm::FoldingSetNodeID DestID; + Dest->Profile(DestID, Context, true); + if (DestID == SizeOfArgID) { + // TODO: For strncpy() and friends, this could suggest sizeof(dst) + // over sizeof(src) as well. + unsigned ActionIdx = 0; // Default is to suggest dereferencing. + StringRef ReadableName = FnName->getName(); + + if (const UnaryOperator *UnaryOp = dyn_cast<UnaryOperator>(Dest)) + if (UnaryOp->getOpcode() == UO_AddrOf) + ActionIdx = 1; // If its an address-of operator, just remove it. + if (!PointeeTy->isIncompleteType() && + (Context.getTypeSize(PointeeTy) == Context.getCharWidth())) + ActionIdx = 2; // If the pointee's size is sizeof(char), + // suggest an explicit length. + + // If the function is defined as a builtin macro, do not show macro + // expansion. + SourceLocation SL = SizeOfArg->getExprLoc(); + SourceRange DSR = Dest->getSourceRange(); + SourceRange SSR = SizeOfArg->getSourceRange(); + SourceManager &SM = getSourceManager(); + + if (SM.isMacroArgExpansion(SL)) { + ReadableName = Lexer::getImmediateMacroName(SL, SM, LangOpts); + SL = SM.getSpellingLoc(SL); + DSR = SourceRange(SM.getSpellingLoc(DSR.getBegin()), + SM.getSpellingLoc(DSR.getEnd())); + SSR = SourceRange(SM.getSpellingLoc(SSR.getBegin()), + SM.getSpellingLoc(SSR.getEnd())); + } + + DiagRuntimeBehavior(SL, SizeOfArg, + PDiag(diag::warn_sizeof_pointer_expr_memaccess) + << ReadableName << PointeeTy << DestTy << DSR + << SSR); + DiagRuntimeBehavior(SL, SizeOfArg, + PDiag(diag::warn_sizeof_pointer_expr_memaccess_note) + << ActionIdx << SSR); + return true; + } + } + } + return false; +} + // A little helper routine: ignore addition and subtraction of integer literals. // This intentionally does not ignore all integer constant expressions because // we don't want to remove sizeof(). @@ -10518,6 +10517,14 @@ void Sema::CheckStrlcpycatArguments(const CallExpr *Call, OS.str()); } +/// Check if two expressions refer to the same declaration. +static bool referToTheSameDecl(const Expr *E1, const Expr *E2) { + if (const DeclRefExpr *D1 = dyn_cast_or_null<DeclRefExpr>(E1)) + if (const DeclRefExpr *D2 = dyn_cast_or_null<DeclRefExpr>(E2)) + return D1->getDecl() == D2->getDecl(); + return false; +} + static const Expr *getStrlenExprArg(const Expr *E) { if (const CallExpr *CE = dyn_cast<CallExpr>(E)) { const FunctionDecl *FD = CE->getDirectCallee(); diff --git a/clang/test/SemaCXX/warn-memset-bad-sizeof.cpp b/clang/test/SemaCXX/warn-memset-bad-sizeof.cpp index bacb2c167af14..8b305f3ba2884 100644 --- a/clang/test/SemaCXX/warn-memset-bad-sizeof.cpp +++ b/clang/test/SemaCXX/warn-memset-bad-sizeof.cpp @@ -195,5 +195,5 @@ extern "C" void* malloc(unsigned size); void check_prints(){ char* a = (char*) malloc(20); const char* b = "Hello World"; - snprintf(a, sizeof(a), "%s", b); // expected-warning{{argument to 'sizeof' in 'snprintf' call is the same expression as the destination; did you mean to put an explicit length?}} + snprintf(a, sizeof(a), "%s", b); // expected-warning{{'snprintf' call operates on objects of type 'char' while the size is based on a different type 'char *'}} expected-note {{did you mean to provide an explicit length?}} } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
