https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/101583
>From cce5781733a7c294f10dc75f48372ff6ee331239 Mon Sep 17 00:00:00 2001 From: Ziqing Luo <ziq...@udel.edu> Date: Thu, 1 Aug 2024 16:36:27 -0700 Subject: [PATCH 1/5] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc functions Warning about calls to libc functions involving buffer access. Warned functions are hardcoded by names. (rdar://117182250) --- .../Analysis/Analyses/UnsafeBufferUsage.h | 12 + .../Analyses/UnsafeBufferUsageGadgets.def | 1 + .../clang/Basic/DiagnosticSemaKinds.td | 7 + clang/lib/Analysis/UnsafeBufferUsage.cpp | 408 +++++++++++++++++- clang/lib/Sema/AnalysisBasedWarnings.cpp | 12 + ...-usage-libc-functions-inline-namespace.cpp | 60 +++ ...arn-unsafe-buffer-usage-libc-functions.cpp | 86 ++++ ...n-unsafe-buffer-usage-test-unreachable.cpp | 4 +- 8 files changed, 586 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h index 228b4ae1e3e115..3e0fae6db7562d 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -15,6 +15,7 @@ #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H #include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" #include "clang/AST/Stmt.h" #include "clang/Basic/SourceLocation.h" #include "llvm/Support/Debug.h" @@ -106,6 +107,17 @@ class UnsafeBufferUsageHandler { virtual void handleUnsafeOperation(const Stmt *Operation, bool IsRelatedToDecl, ASTContext &Ctx) = 0; + /// Invoked when a call to an unsafe libc function is found. + /// \param PrintfInfo + /// is 0 if the callee function is not a member of the printf family; + /// is 1 if the callee is `sprintf`; + /// is 2 if arguments of the call have `__size_by` relation but are not in a + /// safe pattern; + /// is 3 if string arguments do not guarantee null-termination + /// is 4 if the callee takes va_list + virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo, + ASTContext &Ctx) = 0; + /// Invoked when an unsafe operation with a std container is found. virtual void handleUnsafeOperationInContainer(const Stmt *Operation, bool IsRelatedToDecl, diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 242ad763ba62b9..ac01b285ae833b 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) WARNING_GADGET(UnsafeBufferUsageCtorAttr) WARNING_GADGET(DataInvocation) +WARNING_GADGET(UnsafeLibcFunctionCall) WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)` FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 554dbaff2ce0d8..7e1e3686ce6554 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12383,6 +12383,13 @@ def warn_unsafe_buffer_operation : Warning< "%select{unsafe pointer operation|unsafe pointer arithmetic|" "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe invocation of span::data}0">, InGroup<UnsafeBufferUsage>, DefaultIgnore; +def warn_unsafe_buffer_libc_call : Warning< + "function %0 introduces unsafe buffer access">, + InGroup<UnsafeBufferUsage>, DefaultIgnore; +def note_unsafe_buffer_printf_call : Note< + "%select{| change to 'snprintf' for explicit bounds checking | buffer pointer and size may not match" + "| use 'std::string::c_str' or string literal as string pointer to guarantee null-termination" + "| do not use va_list that cannot be checked at compile-time for bounds safety}0">; def note_unsafe_buffer_operation : Note< "used%select{| in pointer arithmetic| in buffer access}0 here">; def note_unsafe_buffer_variable_fixit_group : Note< diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 866222380974b6..751fb75f6ed602 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -9,20 +9,24 @@ #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtVisitor.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" -#include "clang/Basic/CharInfo.h" #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/APSInt.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Casting.h" +#include <functional> #include <memory> #include <optional> #include <queue> @@ -443,6 +447,337 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { return false; } +namespace libc_fun_disjoint_inner_matchers { +// `libc_fun_disjoint_inner_matchers` covers a set of matchers that match +// disjoint node sets. They all take a `CoreName`, which is the substring of a +// function name after `ignoreLibcPrefixAndSuffix`. They are suppose to be used +// as an inner matcher of `ignoreLibcPrefixAndSuffix` to deal with different +// libc function calls. + +// Matches a function call node such that +// 1. It's name, after stripping off predefined prefix and suffix, is +// `CoreName`; and +// 2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which +// is a set of libc function names. +// +// Note: For predefined prefix and suffix, see `ignoreLibcPrefixAndSuffix`. +// And, the notation `CoreName[str/wcs]` means a new name obtained from replace +// string "wcs" with "str" in `CoreName`. +// +// Also note, the set of predefined function names does not include `printf` +// functions, they are checked exclusively with other matchers below. +// Maintaining the invariant that all matchers under +// `libc_fun_disjoint_inner_matchers` are disjoint. +AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) { + static const std::set<StringRef> PredefinedNames{ + // numeric conversion: + "atof", + "atoi", + "atol", + "atoll", + "strtol", + "strtoll", + "strtoul", + "strtoull", + "strtof", + "strtod", + "strtold", + "strtoimax", + "strtoumax", + // "strfromf", "strfromd", "strfroml", // C23? + // string manipulation: + "strcpy", + "strncpy", + "strlcpy", + "strcat", + "strncat", + "strlcat", + "strxfrm", + "strdup", + "strndup", + // string examination: + "strlen", + "strnlen", + "strcmp", + "strncmp", + "stricmp", + "strcasecmp", + "strcoll", + "strchr", + "strrchr", + "strspn", + "strcspn", + "strpbrk", + "strstr", + "strtok", + // "mem-" functions + "memchr", + "wmemchr", + "memcmp", + "wmemcmp", + "memcpy", + "memccpy", + "mempcpy", + "wmemcpy", + "memmove", + "wmemmove", + "memset", + "wmemset", + // IO: + "fread", + "fwrite", + "fgets", + "fgetws", + "gets", + "fputs", + "fputws", + "puts", + // others + "strerror_s", + "strerror_r", + "bcopy", + "bzero", + "bsearch", + "qsort", + }; + // This is safe: strlen("hello"). We don't want to be noisy on this case. + auto isSafeStrlen = [&Node](StringRef Name) -> bool { + return Name == "strlen" && Node.getNumArgs() == 1 && + isa<StringLiteral>(Node.getArg(0)->IgnoreParenImpCasts()); + }; + + // Match predefined names: + if (PredefinedNames.find(CoreName.str()) != PredefinedNames.end()) + return !isSafeStrlen(CoreName); + + std::string NameWCS = CoreName.str(); + size_t WcsPos = NameWCS.find("wcs"); + + while (WcsPos != std::string::npos) { + NameWCS[WcsPos++] = 's'; + NameWCS[WcsPos++] = 't'; + NameWCS[WcsPos++] = 'r'; + WcsPos = NameWCS.find("wcs", WcsPos); + } + if (PredefinedNames.find(NameWCS) != PredefinedNames.end()) + return !isSafeStrlen(NameWCS); + // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. They + // all should end with "scanf"): + return CoreName.ends_with("scanf"); +} + +// Match a call to one of the `-printf` functions taking `va_list`. We cannot +// check safety for these functions so they should be changed to their +// non-va_list versions. +AST_MATCHER_P(CallExpr, unsafeVaListPrintfs, StringRef, CoreName) { + StringRef Name = CoreName; + + if (!Name.ends_with("printf")) + return false; // neither printf nor scanf + return Name.starts_with("v"); +} + +// Matches a call to one of the `-sprintf` functions (excluding the ones with +// va_list) as they are always unsafe and should be changed to corresponding +// `-snprintf`s. +AST_MATCHER_P(CallExpr, unsafeSprintfs, StringRef, CoreName) { + StringRef Name = CoreName; + + if (!Name.ends_with("printf") || Name.starts_with("v")) + return false; + + StringRef Prefix = Name.drop_back(6); + + if (Prefix.ends_with("w")) + Prefix = Prefix.drop_back(1); + return Prefix == "s"; +} + +// A pointer type expression is known to be null-terminated, if it has the +// form: E.c_str(), for any expression E of `std::string` type. +static bool isNullTermPointer(const Expr *Ptr) { + if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts())) + return true; + if (isa<PredefinedExpr>(Ptr->IgnoreParenImpCasts())) + return true; + if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) { + const CXXMethodDecl *MD = MCE->getMethodDecl(); + const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl(); + + if (MD && RD && RD->isInStdNamespace()) + if (MD->getName() == "c_str" && RD->getName() == "basic_string") + return true; + } + return false; +} + +// Matches a call to one of the `-printf" functions (excluding the ones with +// va_list, or `-sprintf`s) that taking pointer-to-char-as-string arguments but +// fail to guarantee their null-termination. In other words, these calls are +// safe if they use null-termination guaranteed string pointers. +AST_MATCHER_P(CallExpr, unsafeStringInPrintfs, StringRef, CoreName) { + StringRef Name = CoreName; + + if (!Name.ends_with("printf") || Name.starts_with("v")) + return false; + + StringRef Prefix = Name.drop_back(6); + + if (Prefix.ends_with("w")) + Prefix = Prefix.drop_back(1); + + auto AnyUnsafeStrPtr = [](const Expr *Arg) -> bool { + return Arg->getType()->isPointerType() && !isNullTermPointer(Arg); + }; + + if (Prefix.empty() || + Prefix == "k") // printf: all pointer args should be null-terminated + return any_of(Node.arguments(), AnyUnsafeStrPtr); + if (Prefix == "f" && Node.getNumArgs() > 1) + return any_of(llvm::make_range(Node.arg_begin() + 1, Node.arg_end()), + AnyUnsafeStrPtr); + if (Prefix == "sn" && Node.getNumArgs() > 2) { + return any_of(llvm::make_range(Node.arg_begin() + 2, Node.arg_end()), + AnyUnsafeStrPtr); + } + return false; // A call to a "-printf" falls into another category. +} + +// Matches a call to one of the `snprintf` functions (excluding the ones with +// va_list) such that the first two arguments fail to conform to safe patterns. +// +// For the first two arguments: `ptr` and `size`, they are safe if in the +// following patterns: +// ptr := DRE.data(); +// size:= DRE.size()/DRE.size_bytes() +// And DRE is a hardened container or view. +AST_MATCHER_P(CallExpr, unsafeSizedByInSnprintfs, StringRef, CoreName) { + StringRef Name = CoreName; + + if (!Name.ends_with("printf") || Name.starts_with("v")) + return false; // not snprint or use va_list + + StringRef Prefix = Name.drop_back(6); + + if (Prefix.ends_with("w")) + Prefix = Prefix.drop_back(1); + + if (Prefix != "sn") + return false; // not snprint + + static StringRef SizedObjs[] = {"span", "array", "vector", + "basic_string_view", "basic_string"}; + const Expr *CharPtr = (*Node.arg_begin())->IgnoreParenImpCasts(); + const Expr *Size = (*(Node.arg_begin() + 1))->IgnoreParenImpCasts(); + + if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(CharPtr)) + if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Size)) { + auto *DREOfPtr = dyn_cast<DeclRefExpr>( + MCEPtr->getImplicitObjectArgument()->IgnoreParenImpCasts()); + auto *DREOfSize = dyn_cast<DeclRefExpr>( + MCESize->getImplicitObjectArgument()->IgnoreParenImpCasts()); + + if (!DREOfPtr || !DREOfSize) + return true; // not in safe pattern + if (DREOfPtr->getDecl() != DREOfSize->getDecl()) + return true; // not in safe pattern + if (MCEPtr->getMethodDecl()->getName() != "data") + return true; // not in safe pattern + + if (MCESize->getMethodDecl()->getName() == "size_bytes" || + // Note here the pointer must be a pointer-to-char type unless there + // is explicit casting. If there is explicit casting, this branch + // is unreachable. Thus, at this branch "size" and "size_bytes" are + // equivalent as the pointer is a char pointer: + MCESize->getMethodDecl()->getName() == "size") + for (StringRef SizedObj : SizedObjs) + if (MCEPtr->getRecordDecl()->isInStdNamespace() && + MCEPtr->getRecordDecl()->getCanonicalDecl()->getName() == + SizedObj) + return false; // It is in fact safe + } + return true; // ptr and size are not in safe pattern +} +} // namespace libc_fun_disjoint_inner_matchers + +// Match call to a function whose name may have prefixes like "__builtin_" or +// "__asan_" and suffixes like "_s" or "_chk". This matcher takes an argument, +// which should be applied to the core name---the subtring after stripping off +// prefix and suffix of the function name. +// The application results in an inner matcher that matches the call node with +// respect to the core name of the callee. +AST_MATCHER_P(CallExpr, ignoreLibcPrefixAndSuffix, + std::function<internal::Matcher<CallExpr>(StringRef)>, + InnerMatcher) { + // Given a function name, returns its core name `CoreName` according to the + // following grammar. + // + // LibcName := CoreName | CoreName + "_s" + // MatchingName := "__builtin_" + LibcName | + // "__builtin___" + LibcName + "_chk" | + // "__asan_" + LibcName + // + struct NameParser { + StringRef matchName(StringRef FunName, bool isBuiltin) { + // Try to match __builtin_: + if (isBuiltin && FunName.starts_with("__builtin_")) + // Then either it is __builtin_LibcName or __builtin___LibcName_chk or + // no match: + return matchLibcNameOrBuiltinChk( + FunName.drop_front(10 /* truncate "__builtin_" */)); + // Try to match __asan_: + if (FunName.starts_with("__asan_")) + return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */)); + return matchLibcName(FunName); + } + + // Parameter `Name` is the substring after stripping off the prefix + // "__builtin_". + StringRef matchLibcNameOrBuiltinChk(StringRef Name) { + if (Name.starts_with("__") && Name.ends_with("_chk")) + return matchLibcName( + Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */); + return matchLibcName(Name); + } + + StringRef matchLibcName(StringRef Name) { + if (Name.ends_with("_s")) + return Name.drop_back(2 /* truncate "_s" */); + return Name; + } + } TheLittleParser; + + const FunctionDecl *FD = Node.getDirectCallee(); + const IdentifierInfo *II; + + if (!FD) + return false; + II = FD->getIdentifier(); + // If this is a special C++ name without IdentifierInfo, it can't be a + // C library function. + if (!II) + return false; + + // Look through 'extern "C"' and anything similar invented in the future. + // In general, C library functions will be in the TU directly. + if (!FD->getDeclContext()->getRedeclContext()->isTranslationUnit()) { + // If that's not the case, we also consider "C functions" re-declared in + // `std` namespace. + if (!FD->getDeclContext()->getRedeclContext()->isStdNamespace()) + return false; + } + + // If this function is not externally visible, it is not a C library function. + // Note that we make an exception for inline functions, which may be + // declared in header files without external linkage. + if (!FD->isInlined() && !FD->isExternallyVisible()) + return false; + + StringRef CoreName = + TheLittleParser.matchName(II->getName(), FD->getBuiltinID()); + + return InnerMatcher(CoreName).matches(Node, Finder, Builder); +} } // namespace clang::ast_matchers namespace { @@ -1025,6 +1360,77 @@ class DataInvocationGadget : public WarningGadget { DeclUseList getClaimedVarUseSites() const override { return {}; } }; +class UnsafeLibcFunctionCallGadget : public WarningGadget { + const CallExpr *const Call; + constexpr static const char *const Tag = "UnsafeLibcFunctionCall"; + // Extra tags for additional information: + constexpr static const char *const UnsafeSprintfTag = + "UnsafeLibcFunctionCall_sprintf"; + constexpr static const char *const UnsafeSizedByTag = + "UnsafeLibcFunctionCall_sized_by"; + constexpr static const char *const UnsafeStringTag = + "UnsafeLibcFunctionCall_string"; + constexpr static const char *const UnsafeVaListTag = + "UnsafeLibcFunctionCall_va_list"; + + enum UnsafeKind { + OTHERS = 0, // no specific information, the callee function is unsafe + SPRINTF = 1, // never call `-sprintf`s, call `-snprintf`s instead. + SIZED_BY = 2, // a pair of function arguments have "__sized_by" relation but + // they do not conform to safe patterns + STRING = 3, // an argument is a pointer-to-char-as-string but does not + // guarantee null-termination + VA_LIST = 4, // one of the `-printf`s function that take va_list, which is + // considered unsafe as it is not compile-time check + } WarnedFunKind = OTHERS; + +public: + UnsafeLibcFunctionCallGadget(const MatchFinder::MatchResult &Result) + : WarningGadget(Kind::UnsafeLibcFunctionCall), + Call(Result.Nodes.getNodeAs<CallExpr>(Tag)) { + if (Result.Nodes.getNodeAs<CallExpr>("UnsafeLibcFunctionCall_sprintf")) + WarnedFunKind = SPRINTF; + else if (Result.Nodes.getNodeAs<CallExpr>("UnsafeLibcFunctionCall_string")) + WarnedFunKind = STRING; + else if (Result.Nodes.getNodeAs<CallExpr>( + "UnsafeLibcFunctionCall_sized_by")) + WarnedFunKind = SIZED_BY; + else if (Result.Nodes.getNodeAs<CallExpr>("UnsafeLibcFunctionCall_va_list")) + WarnedFunKind = VA_LIST; + } + + static Matcher matcher() { + auto anyOfLibcInnerMatcher = [](StringRef S) { + return anyOf( + libc_fun_disjoint_inner_matchers::predefinedUnsafeLibcFunCall(S), + callExpr(libc_fun_disjoint_inner_matchers::unsafeStringInPrintfs(S)) + .bind(UnsafeStringTag), + callExpr( + libc_fun_disjoint_inner_matchers::unsafeSizedByInSnprintfs(S)) + .bind(UnsafeSizedByTag), + callExpr(libc_fun_disjoint_inner_matchers::unsafeSprintfs(S)) + .bind(UnsafeSprintfTag), + callExpr(libc_fun_disjoint_inner_matchers::unsafeVaListPrintfs(S)) + .bind(UnsafeVaListTag)); + }; + + return stmt( + callExpr(ignoreLibcPrefixAndSuffix(anyOfLibcInnerMatcher)).bind(Tag)); + } + + const Stmt *getBaseStmt() const { return Call; } + + SourceLocation getSourceLoc() const override { return Call->getBeginLoc(); } + + void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, + bool IsRelatedToDecl, + ASTContext &Ctx) const override { + Handler.handleUnsafeLibcCall(Call, WarnedFunKind, Ctx); + } + + DeclUseList getClaimedVarUseSites() const override { return {}; } +}; + // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue // Context (see `isInUnspecifiedLvalueContext`). // Note here `[]` is the built-in subscript operator. diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 0f604c61fa3af9..53ade2df5c311b 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -2292,6 +2292,18 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler { } } + void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo, + ASTContext &Ctx) override { + // We have checked that there is a direct callee with an identifier name: + StringRef Name = Call->getDirectCallee()->getName(); + + S.Diag(Call->getBeginLoc(), diag::warn_unsafe_buffer_libc_call) + << Name << Call->getSourceRange(); + if (PrintfInfo > 0) + S.Diag(Call->getBeginLoc(), diag::note_unsafe_buffer_printf_call) + << PrintfInfo << Call->getSourceRange(); + } + void handleUnsafeOperationInContainer(const Stmt *Operation, bool IsRelatedToDecl, ASTContext &Ctx) override { diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp new file mode 100644 index 00000000000000..eebbc381a262ff --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp @@ -0,0 +1,60 @@ +// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \ +// RUN: -verify %s + +namespace std { + inline namespace __1 { + template< class InputIt, class OutputIt > + OutputIt copy( InputIt first, InputIt last, + OutputIt d_first ); + + struct iterator{}; + template<typename T> + struct span { + T * ptr; + T * data(); + unsigned size_bytes(); + unsigned size(); + iterator begin() const noexcept; + iterator end() const noexcept; + }; + + template<typename T> + struct basic_string { + T* p; + T *c_str(); + T *data(); + unsigned size_bytes(); + }; + + typedef basic_string<char> string; + typedef basic_string<wchar_t> wstring; + + // C function under std: + void memcpy(); + void strcpy(); + int snprintf( char* buffer, unsigned buf_size, const char* format, ... ); + } +} + +void f(char * p, char * q, std::span<char> s) { + std::memcpy(); // expected-warning{{function memcpy introduces unsafe buffer access}} + std::strcpy(); // expected-warning{{function strcpy introduces unsafe buffer access}} + std::__1::memcpy(); // expected-warning{{function memcpy introduces unsafe buffer access}} + std::__1::strcpy(); // expected-warning{{function strcpy introduces unsafe buffer access}} + + /* Test printfs */ + std::snprintf(s.data(), 10, "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} + std::__1::snprintf(s.data(), 10, "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} + std::snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn + std::__1::snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn +} + +void v(std::string s1) { + std::snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn + std::__1::snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn +} + +void g(char *begin, char *end, char *p, std::span<char> s) { + std::copy(begin, end, p); // no warn + std::copy(s.begin(), s.end(), s.begin()); // no warn +} diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp new file mode 100644 index 00000000000000..ea2c5ec82211b2 --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp @@ -0,0 +1,86 @@ +// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \ +// RUN: -verify %s + +typedef struct {} FILE; +void memcpy(); +void __asan_memcpy(); +void strcpy(); +void strcpy_s(); +void wcscpy_s(); +unsigned strlen( const char* str ); +int fprintf( FILE* stream, const char* format, ... ); +int printf( const char* format, ... ); +int sprintf( char* buffer, const char* format, ... ); +int snprintf( char* buffer, unsigned buf_size, const char* format, ... ); +int vsnprintf( char* buffer, unsigned buf_size, const char* format, ... ); +int sscanf_s(const char * buffer, const char * format, ...); +int sscanf(const char * buffer, const char * format, ... ); + +namespace std { + template< class InputIt, class OutputIt > + OutputIt copy( InputIt first, InputIt last, + OutputIt d_first ); + + struct iterator{}; + template<typename T> + struct span { + T * ptr; + T * data(); + unsigned size_bytes(); + unsigned size(); + iterator begin() const noexcept; + iterator end() const noexcept; + }; + + template<typename T> + struct basic_string { + T* p; + T *c_str(); + T *data(); + unsigned size_bytes(); + }; + + typedef basic_string<char> string; + typedef basic_string<wchar_t> wstring; + + // C function under std: + void memcpy(); + void strcpy(); +} + +void f(char * p, char * q, std::span<char> s, std::span<char> s2) { + memcpy(); // expected-warning{{function memcpy introduces unsafe buffer access}} + std::memcpy(); // expected-warning{{function memcpy introduces unsafe buffer access}} + __builtin_memcpy(p, q, 64); // expected-warning{{function __builtin_memcpy introduces unsafe buffer access}} + __builtin___memcpy_chk(p, q, 8, 64); // expected-warning{{function __builtin___memcpy_chk introduces unsafe buffer access}} + __asan_memcpy(); // expected-warning{{function __asan_memcpy introduces unsafe buffer access}} + strcpy(); // expected-warning{{function strcpy introduces unsafe buffer access}} + std::strcpy(); // expected-warning{{function strcpy introduces unsafe buffer access}} + strcpy_s(); // expected-warning{{function strcpy_s introduces unsafe buffer access}} + wcscpy_s(); // expected-warning{{function wcscpy_s introduces unsafe buffer access}} + + + /* Test printfs */ + fprintf((FILE*)p, "%s%d", p, *p); // expected-warning{{function fprintf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}} + printf("%s%d", p, *p); // expected-warning{{function printf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}} + sprintf(q, "%s%d", "hello", *p); // expected-warning{{function sprintf introduces unsafe buffer access}} expected-note{{change to 'snprintf' for explicit bounds checking}} + snprintf(q, 10, "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} + snprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} + vsnprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // expected-warning{{function vsnprintf introduces unsafe buffer access}} expected-note{{do not use va_list that cannot be checked at compile-time for bounds safety}} + sscanf(p, "%s%d", "hello", *p); // expected-warning{{function sscanf introduces unsafe buffer access}} + sscanf_s(p, "%s%d", "hello", *p); // expected-warning{{function sscanf_s introduces unsafe buffer access}} + fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function fprintf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}} + printf("%s%d", "hello", *p); // no warn + snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn + strlen("hello");// no warn +} + +void v(std::string s1) { + snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn +} + + +void g(char *begin, char *end, char *p, std::span<char> s) { + std::copy(begin, end, p); // no warn + std::copy(s.begin(), s.end(), s.begin()); // no warn +} diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp index 844311c3a51a58..668efe0e178b53 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp @@ -1,8 +1,6 @@ // RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -fsafe-buffer-usage-suggestions -verify %s -// expected-no-diagnostics - typedef unsigned __darwin_size_t; typedef __darwin_size_t size_t; #define bzero(s, n) __builtin_bzero(s, n) -void __nosan_bzero(void *dst, size_t sz) { bzero(dst, sz); } +void __nosan_bzero(void *dst, size_t sz) { bzero(dst, sz); } // expected-warning{{function __builtin_bzero introduces unsafe buffer access}} >From e9a5b7a5dd064728c143a8b8a26c8951b427392b Mon Sep 17 00:00:00 2001 From: ziqingluo-90 <ziqing_...@apple.com> Date: Mon, 12 Aug 2024 17:46:48 -0700 Subject: [PATCH 2/5] Reduce false positives in warnings about printfs - Predefined name macros such as `__PRETTY_FUNCTION__` are considered safe. - We need to distinguish between `%s` and `%p` as both accept pointers but the latter is not a buffer operation. This leaves us no choice other than parsing the format string. Fortunately, the building blocks of format parsing have already existed and are quite handy. --- clang/include/clang/AST/FormatString.h | 12 ++++ clang/lib/AST/PrintfFormatString.cpp | 28 ++++++++++ clang/lib/Analysis/UnsafeBufferUsage.cpp | 56 +++++++++++++++---- ...arn-unsafe-buffer-usage-libc-functions.cpp | 11 +++- 4 files changed, 93 insertions(+), 14 deletions(-) diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h index a074dd23e2ad4c..cdba2a7abe49d9 100644 --- a/clang/include/clang/AST/FormatString.h +++ b/clang/include/clang/AST/FormatString.h @@ -783,6 +783,18 @@ bool ParsePrintfString(FormatStringHandler &H, bool ParseFormatStringHasSArg(const char *beg, const char *end, const LangOptions &LO, const TargetInfo &Target); +/// Parse C format string and return index (relative to `ArgIndex`) of the first +/// found `s` specifier. Return 0 if not found. +/// \param I The start of the C format string; Updated to the first unparsed +/// position upon return. +/// \param E The end of the C format string; +/// \param ArgIndex The argument index of the last found `s` specifier; Or the +/// argument index of the formatter in initial case. +unsigned ParseFormatStringFirstSArgIndex(const char *&I, const char *E, + unsigned ArgIndex, + const LangOptions &LO, + const TargetInfo &Target); + bool ParseScanfString(FormatStringHandler &H, const char *beg, const char *end, const LangOptions &LO, const TargetInfo &Target); diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp index 3c6cd2d0f43417..9992afd402d370 100644 --- a/clang/lib/AST/PrintfFormatString.cpp +++ b/clang/lib/AST/PrintfFormatString.cpp @@ -483,6 +483,34 @@ bool clang::analyze_format_string::ParseFormatStringHasSArg(const char *I, return false; } +unsigned clang::analyze_format_string::ParseFormatStringFirstSArgIndex( + const char *&I, const char *E, unsigned ArgIndex, const LangOptions &LO, + const TargetInfo &Target) { + unsigned argIndex = ArgIndex; + + // Keep looking for a %s format specifier until we have exhausted the string. + FormatStringHandler H; + while (I != E) { + const PrintfSpecifierResult &FSR = + ParsePrintfSpecifier(H, I, E, argIndex, LO, Target, false, false); + // Did a fail-stop error of any kind occur when parsing the specifier? + // If so, don't do any more processing. + if (FSR.shouldStop()) + return false; + // Did we exhaust the string or encounter an error that + // we can recover from? + if (!FSR.hasValue()) + continue; + const analyze_printf::PrintfSpecifier &FS = FSR.getValue(); + // Return true if this a %s format specifier. + if (FS.getConversionSpecifier().getKind() == + ConversionSpecifier::Kind::sArg) { + return FS.getPositionalArgIndex(); + } + } + return false; +} + bool clang::analyze_format_string::parseFormatStringHasFormattingSpecifiers( const char *Begin, const char *End, const LangOptions &LO, const TargetInfo &Target) { diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 751fb75f6ed602..745de657fcd274 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -12,6 +12,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/FormatString.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtVisitor.h" @@ -611,6 +612,38 @@ static bool isNullTermPointer(const Expr *Ptr) { return false; } +// Return true iff at least one of following cases holds: +// 1. Format string is a literal and there is an unsafe pointer argument +// corresponding to an `s` specifier; +// 2. Format string is not a literal and there is least an unsafe pointer +// argument (including the formatter argument). +static bool hasUnsafeFormatOrSArg(const Expr *Fmt, unsigned FmtArgIdx, + const CallExpr *Call, ASTContext &Ctx) { + if (auto *SL = dyn_cast<StringLiteral>(Fmt->IgnoreParenImpCasts())) { + StringRef FmtStr = SL->getString(); + auto I = FmtStr.begin(); + auto E = FmtStr.end(); + unsigned ArgIdx = FmtArgIdx; + + do { + ArgIdx = analyze_format_string::ParseFormatStringFirstSArgIndex( + I, E, ArgIdx, Ctx.getLangOpts(), Ctx.getTargetInfo()); + if (ArgIdx && Call->getNumArgs() > ArgIdx && + !isNullTermPointer(Call->getArg(ArgIdx))) + return true; + } while (ArgIdx); + return false; + } + // If format is not a string literal, we cannot analyze the format string. + // In this case, this call is considered unsafe if at least one argument + // (including the format argument) is unsafe pointer. + return llvm::any_of( + llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()), + [](const Expr *Arg) { + return Arg->getType()->isPointerType() && !isNullTermPointer(Arg); + }); +} + // Matches a call to one of the `-printf" functions (excluding the ones with // va_list, or `-sprintf`s) that taking pointer-to-char-as-string arguments but // fail to guarantee their null-termination. In other words, these calls are @@ -626,19 +659,18 @@ AST_MATCHER_P(CallExpr, unsafeStringInPrintfs, StringRef, CoreName) { if (Prefix.ends_with("w")) Prefix = Prefix.drop_back(1); - auto AnyUnsafeStrPtr = [](const Expr *Arg) -> bool { - return Arg->getType()->isPointerType() && !isNullTermPointer(Arg); - }; - if (Prefix.empty() || Prefix == "k") // printf: all pointer args should be null-terminated - return any_of(Node.arguments(), AnyUnsafeStrPtr); - if (Prefix == "f" && Node.getNumArgs() > 1) - return any_of(llvm::make_range(Node.arg_begin() + 1, Node.arg_end()), - AnyUnsafeStrPtr); - if (Prefix == "sn" && Node.getNumArgs() > 2) { - return any_of(llvm::make_range(Node.arg_begin() + 2, Node.arg_end()), - AnyUnsafeStrPtr); + return hasUnsafeFormatOrSArg(Node.getArg(0), 0, &Node, + Finder->getASTContext()); + if (Prefix == "f") + return hasUnsafeFormatOrSArg(Node.getArg(1), 1, &Node, + Finder->getASTContext()); + if (Prefix == "sn") { + // The first two arguments need to be in safe patterns, which is checked + // by `isSafeSizedby`: + return hasUnsafeFormatOrSArg(Node.getArg(2), 2, &Node, + Finder->getASTContext()); } return false; // A call to a "-printf" falls into another category. } @@ -775,7 +807,7 @@ AST_MATCHER_P(CallExpr, ignoreLibcPrefixAndSuffix, StringRef CoreName = TheLittleParser.matchName(II->getName(), FD->getBuiltinID()); - + return InnerMatcher(CoreName).matches(Node, Finder, Builder); } } // namespace clang::ast_matchers diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp index ea2c5ec82211b2..c3d7f8fd05435b 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp @@ -70,13 +70,20 @@ void f(char * p, char * q, std::span<char> s, std::span<char> s2) { sscanf(p, "%s%d", "hello", *p); // expected-warning{{function sscanf introduces unsafe buffer access}} sscanf_s(p, "%s%d", "hello", *p); // expected-warning{{function sscanf_s introduces unsafe buffer access}} fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function fprintf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}} + fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, "hello"); // no warn printf("%s%d", "hello", *p); // no warn snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn + snprintf(s.data(), s.size_bytes(), "%s%d", __PRETTY_FUNCTION__, *p); // no warn strlen("hello");// no warn } -void v(std::string s1) { - snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn +void v(std::string s1, int *p) { + snprintf(s1.data(), s1.size_bytes(), "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str()); // no warn + snprintf(s1.data(), s1.size_bytes(), s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str()); // no warn + printf("%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str()); // no warn + printf(s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str()); // no warn + fprintf((FILE*)0, "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str()); // no warn + fprintf((FILE*)0, s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str()); // no warn } >From fa21260c1ec31eec115125a777a66874df9b38c4 Mon Sep 17 00:00:00 2001 From: ziqingluo-90 <ziqing_...@apple.com> Date: Wed, 14 Aug 2024 17:02:19 -0700 Subject: [PATCH 3/5] Address comments --- clang/include/clang/AST/FormatString.h | 12 - clang/lib/AST/PrintfFormatString.cpp | 28 - clang/lib/Analysis/UnsafeBufferUsage.cpp | 624 ++++++++++-------- clang/lib/Sema/AnalysisBasedWarnings.cpp | 6 +- ...-usage-libc-functions-inline-namespace.cpp | 12 +- ...arn-unsafe-buffer-usage-libc-functions.cpp | 44 +- ...n-unsafe-buffer-usage-test-unreachable.cpp | 2 +- 7 files changed, 369 insertions(+), 359 deletions(-) diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h index cdba2a7abe49d9..a074dd23e2ad4c 100644 --- a/clang/include/clang/AST/FormatString.h +++ b/clang/include/clang/AST/FormatString.h @@ -783,18 +783,6 @@ bool ParsePrintfString(FormatStringHandler &H, bool ParseFormatStringHasSArg(const char *beg, const char *end, const LangOptions &LO, const TargetInfo &Target); -/// Parse C format string and return index (relative to `ArgIndex`) of the first -/// found `s` specifier. Return 0 if not found. -/// \param I The start of the C format string; Updated to the first unparsed -/// position upon return. -/// \param E The end of the C format string; -/// \param ArgIndex The argument index of the last found `s` specifier; Or the -/// argument index of the formatter in initial case. -unsigned ParseFormatStringFirstSArgIndex(const char *&I, const char *E, - unsigned ArgIndex, - const LangOptions &LO, - const TargetInfo &Target); - bool ParseScanfString(FormatStringHandler &H, const char *beg, const char *end, const LangOptions &LO, const TargetInfo &Target); diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp index 9992afd402d370..3c6cd2d0f43417 100644 --- a/clang/lib/AST/PrintfFormatString.cpp +++ b/clang/lib/AST/PrintfFormatString.cpp @@ -483,34 +483,6 @@ bool clang::analyze_format_string::ParseFormatStringHasSArg(const char *I, return false; } -unsigned clang::analyze_format_string::ParseFormatStringFirstSArgIndex( - const char *&I, const char *E, unsigned ArgIndex, const LangOptions &LO, - const TargetInfo &Target) { - unsigned argIndex = ArgIndex; - - // Keep looking for a %s format specifier until we have exhausted the string. - FormatStringHandler H; - while (I != E) { - const PrintfSpecifierResult &FSR = - ParsePrintfSpecifier(H, I, E, argIndex, LO, Target, false, false); - // Did a fail-stop error of any kind occur when parsing the specifier? - // If so, don't do any more processing. - if (FSR.shouldStop()) - return false; - // Did we exhaust the string or encounter an error that - // we can recover from? - if (!FSR.hasValue()) - continue; - const analyze_printf::PrintfSpecifier &FS = FSR.getValue(); - // Return true if this a %s format specifier. - if (FS.getConversionSpecifier().getKind() == - ConversionSpecifier::Kind::sArg) { - return FS.getPositionalArgIndex(); - } - } - return false; -} - bool clang::analyze_format_string::parseFormatStringHasFormattingSpecifiers( const char *Begin, const char *End, const LangOptions &LO, const TargetInfo &Target) { diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 745de657fcd274..a1526125edf098 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -16,6 +16,7 @@ #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtVisitor.h" +#include "clang/AST/Type.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Basic/SourceLocation.h" @@ -23,11 +24,11 @@ #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Support/Casting.h" -#include <functional> #include <memory> #include <optional> #include <queue> @@ -448,110 +449,223 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { return false; } -namespace libc_fun_disjoint_inner_matchers { -// `libc_fun_disjoint_inner_matchers` covers a set of matchers that match -// disjoint node sets. They all take a `CoreName`, which is the substring of a -// function name after `ignoreLibcPrefixAndSuffix`. They are suppose to be used -// as an inner matcher of `ignoreLibcPrefixAndSuffix` to deal with different -// libc function calls. +AST_MATCHER_P(CallExpr, hasNumArgs, unsigned, Num) { + return Node.getNumArgs() == Num; +} + +namespace libc_func_matchers { +// Under `libc_func_matchers`, define a set of matchers that match unsafe +// functions in libc and unsafe calls to them. + +// A tiny parser to strip off common prefix and suffix of libc function names +// in real code. +// +// Given a function name, `matchName` returns `CoreName` according to the +// following grammar: +// +// LibcName := CoreName | CoreName + "_s" +// MatchingName := "__builtin_" + LibcName | +// "__builtin___" + LibcName + "_chk" | +// "__asan_" + LibcName +// +struct LibcFunNamePrefixSuffixParser { + StringRef matchName(StringRef FunName, bool isBuiltin) { + // Try to match __builtin_: + if (isBuiltin && FunName.starts_with("__builtin_")) + // Then either it is __builtin_LibcName or __builtin___LibcName_chk or + // no match: + return matchLibcNameOrBuiltinChk( + FunName.drop_front(10 /* truncate "__builtin_" */)); + // Try to match __asan_: + if (FunName.starts_with("__asan_")) + return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */)); + return matchLibcName(FunName); + } + + // Parameter `Name` is the substring after stripping off the prefix + // "__builtin_". + StringRef matchLibcNameOrBuiltinChk(StringRef Name) { + if (Name.starts_with("__") && Name.ends_with("_chk")) + return matchLibcName( + Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */); + return matchLibcName(Name); + } + + StringRef matchLibcName(StringRef Name) { + if (Name.ends_with("_s")) + return Name.drop_back(2 /* truncate "_s" */); + return Name; + } +}; + +// A pointer type expression is known to be null-terminated, if it has the +// form: E.c_str(), for any expression E of `std::string` type. +static bool isNullTermPointer(const Expr *Ptr) { + if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts())) + return true; + if (isa<PredefinedExpr>(Ptr->IgnoreParenImpCasts())) + return true; + if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) { + const CXXMethodDecl *MD = MCE->getMethodDecl(); + const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl(); -// Matches a function call node such that + if (MD && RD && RD->isInStdNamespace()) + if (MD->getName() == "c_str" && RD->getName() == "basic_string") + return true; + } + return false; +} + +// Return true iff at least one of following cases holds: +// 1. Format string is a literal and there is an unsafe pointer argument +// corresponding to an `s` specifier; +// 2. Format string is not a literal and there is least an unsafe pointer +// argument (including the formatter argument). +static bool hasUnsafeFormatOrSArg(const CallExpr *Call, unsigned FmtArgIdx, + ASTContext &Ctx, bool isKprintf = false) { + class StringFormatStringHandler + : public analyze_format_string::FormatStringHandler { + const CallExpr *Call; + unsigned FmtArgIdx; + + public: + StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx) + : Call(Call), FmtArgIdx(FmtArgIdx) {} + + bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, + const char *startSpecifier, + unsigned specifierLen, + const TargetInfo &Target) override { + if (FS.getConversionSpecifier().getKind() == + analyze_printf::PrintfConversionSpecifier::sArg) { + unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx; + + if (0 < ArgIdx && ArgIdx < Call->getNumArgs()) + if (!isNullTermPointer(Call->getArg(ArgIdx))) + return false; // stop parsing + } + return true; // continue parsing + } + }; + + const Expr *Fmt = Call->getArg(FmtArgIdx); + + if (auto *SL = dyn_cast<StringLiteral>(Fmt->IgnoreParenImpCasts())) { + StringRef FmtStr = SL->getString(); + StringFormatStringHandler Handler(Call, FmtArgIdx); + + return analyze_format_string::ParsePrintfString( + Handler, FmtStr.begin(), FmtStr.end(), Ctx.getLangOpts(), + Ctx.getTargetInfo(), isKprintf); + } + // If format is not a string literal, we cannot analyze the format string. + // In this case, this call is considered unsafe if at least one argument + // (including the format argument) is unsafe pointer. + return llvm::any_of( + llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()), + [](const Expr *Arg) { + return Arg->getType()->isPointerType() && !isNullTermPointer(Arg); + }); +} + +// Matches a FunctionDecl node such that // 1. It's name, after stripping off predefined prefix and suffix, is // `CoreName`; and // 2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which // is a set of libc function names. // -// Note: For predefined prefix and suffix, see `ignoreLibcPrefixAndSuffix`. -// And, the notation `CoreName[str/wcs]` means a new name obtained from replace +// Note: For predefined prefix and suffix, see `LibcFunNamePrefixSuffixParser`. +// The notation `CoreName[str/wcs]` means a new name obtained from replace // string "wcs" with "str" in `CoreName`. -// -// Also note, the set of predefined function names does not include `printf` -// functions, they are checked exclusively with other matchers below. -// Maintaining the invariant that all matchers under -// `libc_fun_disjoint_inner_matchers` are disjoint. -AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) { - static const std::set<StringRef> PredefinedNames{ - // numeric conversion: - "atof", - "atoi", - "atol", - "atoll", - "strtol", - "strtoll", - "strtoul", - "strtoull", - "strtof", - "strtod", - "strtold", - "strtoimax", - "strtoumax", - // "strfromf", "strfromd", "strfroml", // C23? - // string manipulation: - "strcpy", - "strncpy", - "strlcpy", - "strcat", - "strncat", - "strlcat", - "strxfrm", - "strdup", - "strndup", - // string examination: - "strlen", - "strnlen", - "strcmp", - "strncmp", - "stricmp", - "strcasecmp", - "strcoll", - "strchr", - "strrchr", - "strspn", - "strcspn", - "strpbrk", - "strstr", - "strtok", - // "mem-" functions - "memchr", - "wmemchr", - "memcmp", - "wmemcmp", - "memcpy", - "memccpy", - "mempcpy", - "wmemcpy", - "memmove", - "wmemmove", - "memset", - "wmemset", - // IO: - "fread", - "fwrite", - "fgets", - "fgetws", - "gets", - "fputs", - "fputws", - "puts", - // others - "strerror_s", - "strerror_r", - "bcopy", - "bzero", - "bsearch", - "qsort", - }; - // This is safe: strlen("hello"). We don't want to be noisy on this case. - auto isSafeStrlen = [&Node](StringRef Name) -> bool { - return Name == "strlen" && Node.getNumArgs() == 1 && - isa<StringLiteral>(Node.getArg(0)->IgnoreParenImpCasts()); - }; +AST_MATCHER(FunctionDecl, isPredefinedUnsafeLibcFunc) { + static std::unique_ptr<std::set<StringRef>> PredefinedNames = nullptr; + if (!PredefinedNames) + PredefinedNames = + std::make_unique<std::set<StringRef>, std::set<StringRef>>({ + // numeric conversion: + "atof", + "atoi", + "atol", + "atoll", + "strtol", + "strtoll", + "strtoul", + "strtoull", + "strtof", + "strtod", + "strtold", + "strtoimax", + "strtoumax", + // "strfromf", "strfromd", "strfroml", // C23? + // string manipulation: + "strcpy", + "strncpy", + "strlcpy", + "strcat", + "strncat", + "strlcat", + "strxfrm", + "strdup", + "strndup", + // string examination: + "strlen", + "strnlen", + "strcmp", + "strncmp", + "stricmp", + "strcasecmp", + "strcoll", + "strchr", + "strrchr", + "strspn", + "strcspn", + "strpbrk", + "strstr", + "strtok", + // "mem-" functions + "memchr", + "wmemchr", + "memcmp", + "wmemcmp", + "memcpy", + "memccpy", + "mempcpy", + "wmemcpy", + "memmove", + "wmemmove", + "memset", + "wmemset", + // IO: + "fread", + "fwrite", + "fgets", + "fgetws", + "gets", + "fputs", + "fputws", + "puts", + // others + "strerror_s", + "strerror_r", + "bcopy", + "bzero", + "bsearch", + "qsort", + }); + + auto *II = Node.getIdentifier(); + + if (!II) + return false; + + StringRef Name = LibcFunNamePrefixSuffixParser().matchName( + II->getName(), Node.getBuiltinID()); // Match predefined names: - if (PredefinedNames.find(CoreName.str()) != PredefinedNames.end()) - return !isSafeStrlen(CoreName); + if (PredefinedNames->find(Name) != PredefinedNames->end()) + return true; - std::string NameWCS = CoreName.str(); + std::string NameWCS = Name.str(); size_t WcsPos = NameWCS.find("wcs"); while (WcsPos != std::string::npos) { @@ -560,31 +674,44 @@ AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) { NameWCS[WcsPos++] = 'r'; WcsPos = NameWCS.find("wcs", WcsPos); } - if (PredefinedNames.find(NameWCS) != PredefinedNames.end()) - return !isSafeStrlen(NameWCS); + if (PredefinedNames->find(NameWCS) != PredefinedNames->end()) + return true; // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. They // all should end with "scanf"): - return CoreName.ends_with("scanf"); + return Name.ends_with("scanf"); } -// Match a call to one of the `-printf` functions taking `va_list`. We cannot +// Match a call to one of the `v*printf` functions taking `va_list`. We cannot // check safety for these functions so they should be changed to their // non-va_list versions. -AST_MATCHER_P(CallExpr, unsafeVaListPrintfs, StringRef, CoreName) { - StringRef Name = CoreName; +AST_MATCHER(FunctionDecl, isUnsafeVaListPrintfFunc) { + auto *II = Node.getIdentifier(); + + if (!II) + return false; + + StringRef Name = LibcFunNamePrefixSuffixParser().matchName( + II->getName(), Node.getBuiltinID()); if (!Name.ends_with("printf")) return false; // neither printf nor scanf return Name.starts_with("v"); } -// Matches a call to one of the `-sprintf` functions (excluding the ones with -// va_list) as they are always unsafe and should be changed to corresponding -// `-snprintf`s. -AST_MATCHER_P(CallExpr, unsafeSprintfs, StringRef, CoreName) { - StringRef Name = CoreName; +// Matches a call to one of the `sprintf` functions as they are always unsafe +// and should be changed to `snprintf`. +AST_MATCHER(FunctionDecl, isUnsafeSprintfFunc) { + auto *II = Node.getIdentifier(); - if (!Name.ends_with("printf") || Name.starts_with("v")) + if (!II) + return false; + + StringRef Name = LibcFunNamePrefixSuffixParser().matchName( + II->getName(), Node.getBuiltinID()); + + if (!Name.ends_with("printf") || + // Let `isUnsafeVaListPrintfFunc` check for cases with va-list: + Name.starts_with("v")) return false; StringRef Prefix = Name.drop_back(6); @@ -594,62 +721,17 @@ AST_MATCHER_P(CallExpr, unsafeSprintfs, StringRef, CoreName) { return Prefix == "s"; } -// A pointer type expression is known to be null-terminated, if it has the -// form: E.c_str(), for any expression E of `std::string` type. -static bool isNullTermPointer(const Expr *Ptr) { - if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts())) - return true; - if (isa<PredefinedExpr>(Ptr->IgnoreParenImpCasts())) - return true; - if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) { - const CXXMethodDecl *MD = MCE->getMethodDecl(); - const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl(); - - if (MD && RD && RD->isInStdNamespace()) - if (MD->getName() == "c_str" && RD->getName() == "basic_string") - return true; - } - return false; -} +// Match function declarations of `printf`, `fprintf`, `snprintf` and their wide +// character versions. Calls to these functions can be safe if their arguments +// are carefully made safe. +AST_MATCHER(FunctionDecl, isNormalPrintfFunc) { + auto *II = Node.getIdentifier(); -// Return true iff at least one of following cases holds: -// 1. Format string is a literal and there is an unsafe pointer argument -// corresponding to an `s` specifier; -// 2. Format string is not a literal and there is least an unsafe pointer -// argument (including the formatter argument). -static bool hasUnsafeFormatOrSArg(const Expr *Fmt, unsigned FmtArgIdx, - const CallExpr *Call, ASTContext &Ctx) { - if (auto *SL = dyn_cast<StringLiteral>(Fmt->IgnoreParenImpCasts())) { - StringRef FmtStr = SL->getString(); - auto I = FmtStr.begin(); - auto E = FmtStr.end(); - unsigned ArgIdx = FmtArgIdx; - - do { - ArgIdx = analyze_format_string::ParseFormatStringFirstSArgIndex( - I, E, ArgIdx, Ctx.getLangOpts(), Ctx.getTargetInfo()); - if (ArgIdx && Call->getNumArgs() > ArgIdx && - !isNullTermPointer(Call->getArg(ArgIdx))) - return true; - } while (ArgIdx); + if (!II) return false; - } - // If format is not a string literal, we cannot analyze the format string. - // In this case, this call is considered unsafe if at least one argument - // (including the format argument) is unsafe pointer. - return llvm::any_of( - llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()), - [](const Expr *Arg) { - return Arg->getType()->isPointerType() && !isNullTermPointer(Arg); - }); -} -// Matches a call to one of the `-printf" functions (excluding the ones with -// va_list, or `-sprintf`s) that taking pointer-to-char-as-string arguments but -// fail to guarantee their null-termination. In other words, these calls are -// safe if they use null-termination guaranteed string pointers. -AST_MATCHER_P(CallExpr, unsafeStringInPrintfs, StringRef, CoreName) { - StringRef Name = CoreName; + StringRef Name = LibcFunNamePrefixSuffixParser().matchName( + II->getName(), Node.getBuiltinID()); if (!Name.ends_with("printf") || Name.starts_with("v")) return false; @@ -659,50 +741,76 @@ AST_MATCHER_P(CallExpr, unsafeStringInPrintfs, StringRef, CoreName) { if (Prefix.ends_with("w")) Prefix = Prefix.drop_back(1); - if (Prefix.empty() || - Prefix == "k") // printf: all pointer args should be null-terminated - return hasUnsafeFormatOrSArg(Node.getArg(0), 0, &Node, - Finder->getASTContext()); - if (Prefix == "f") - return hasUnsafeFormatOrSArg(Node.getArg(1), 1, &Node, - Finder->getASTContext()); - if (Prefix == "sn") { - // The first two arguments need to be in safe patterns, which is checked - // by `isSafeSizedby`: - return hasUnsafeFormatOrSArg(Node.getArg(2), 2, &Node, - Finder->getASTContext()); + return Prefix.empty() || Prefix == "k" || Prefix == "f" || Prefix == "sn"; +} + +// This matcher requires that it is known that the callee `isNormalPrintf`. +// Then if the format string is a string literal, this matcher matches when at +// least one string argument is unsafe. If the format is not a string literal, +// this matcher matches when at least one pointer type argument is unsafe. +AST_MATCHER(CallExpr, hasUnsafePrintfStringArg) { + // Determine what printf it is: + const Expr *FirstArg = Node.getArg(0); + ASTContext &Ctx = Finder->getASTContext(); + + if (isa<StringLiteral>(FirstArg->IgnoreParenImpCasts())) { + // It is a printf/kprintf. And, the format is a string literal: + bool isKprintf = false; + if (auto *Callee = Node.getDirectCallee()) + if (auto *II = Node.getDirectCallee()->getIdentifier()) + isKprintf = II->getName() == "kprintf"; + return hasUnsafeFormatOrSArg(&Node, 0, Ctx, isKprintf); + } + + QualType PtrTy = FirstArg->getType(); + + assert(PtrTy->isPointerType()); + + QualType PteTy = (cast<PointerType>(PtrTy))->getPointeeType(); + + if (!Ctx.getFILEType().isNull() /* If `FILE *` is not ever in the ASTContext, + there can't be any file pointer then */ + && PteTy.getCanonicalType() == Ctx.getFILEType().getCanonicalType()) { + // It is a fprintf: + return hasUnsafeFormatOrSArg(&Node, 1, Ctx, false); } - return false; // A call to a "-printf" falls into another category. + + const Expr *SecondArg = Node.getArg(1); + + if (SecondArg->getType()->isIntegerType()) { + // It is a snprintf: + return hasUnsafeFormatOrSArg(&Node, 2, Ctx, false); + } + // It is printf but the format string is passed by pointer. The only thing we + // can do is to require all pointers to be null-terminated: + return llvm::any_of(Node.arguments(), [](const Expr *Arg) -> bool { + return Arg->getType()->isPointerType() && !isNullTermPointer(Arg); + }); } -// Matches a call to one of the `snprintf` functions (excluding the ones with -// va_list) such that the first two arguments fail to conform to safe patterns. +// This matcher requires that it is known that the callee `isNormalPrintf`. +// Then it matches if the first two arguments of the call is a pointer and an +// integer and they are not in a safe pattern. // // For the first two arguments: `ptr` and `size`, they are safe if in the // following patterns: // ptr := DRE.data(); // size:= DRE.size()/DRE.size_bytes() // And DRE is a hardened container or view. -AST_MATCHER_P(CallExpr, unsafeSizedByInSnprintfs, StringRef, CoreName) { - StringRef Name = CoreName; - - if (!Name.ends_with("printf") || Name.starts_with("v")) - return false; // not snprint or use va_list +AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) { + if (Node.getNumArgs() < 3) + return false; // not an snprintf call - StringRef Prefix = Name.drop_back(6); + const Expr *Buf = Node.getArg(0), *Size = Node.getArg(1); - if (Prefix.ends_with("w")) - Prefix = Prefix.drop_back(1); - - if (Prefix != "sn") - return false; // not snprint + if (!Buf->getType()->isPointerType() || !Size->getType()->isIntegerType()) + return false; // not an snprintf call static StringRef SizedObjs[] = {"span", "array", "vector", "basic_string_view", "basic_string"}; - const Expr *CharPtr = (*Node.arg_begin())->IgnoreParenImpCasts(); - const Expr *Size = (*(Node.arg_begin() + 1))->IgnoreParenImpCasts(); - - if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(CharPtr)) + Buf = Buf->IgnoreParenImpCasts(); + Size = Size->IgnoreParenImpCasts(); + if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(Buf)) if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Size)) { auto *DREOfPtr = dyn_cast<DeclRefExpr>( MCEPtr->getImplicitObjectArgument()->IgnoreParenImpCasts()); @@ -730,86 +838,7 @@ AST_MATCHER_P(CallExpr, unsafeSizedByInSnprintfs, StringRef, CoreName) { } return true; // ptr and size are not in safe pattern } -} // namespace libc_fun_disjoint_inner_matchers - -// Match call to a function whose name may have prefixes like "__builtin_" or -// "__asan_" and suffixes like "_s" or "_chk". This matcher takes an argument, -// which should be applied to the core name---the subtring after stripping off -// prefix and suffix of the function name. -// The application results in an inner matcher that matches the call node with -// respect to the core name of the callee. -AST_MATCHER_P(CallExpr, ignoreLibcPrefixAndSuffix, - std::function<internal::Matcher<CallExpr>(StringRef)>, - InnerMatcher) { - // Given a function name, returns its core name `CoreName` according to the - // following grammar. - // - // LibcName := CoreName | CoreName + "_s" - // MatchingName := "__builtin_" + LibcName | - // "__builtin___" + LibcName + "_chk" | - // "__asan_" + LibcName - // - struct NameParser { - StringRef matchName(StringRef FunName, bool isBuiltin) { - // Try to match __builtin_: - if (isBuiltin && FunName.starts_with("__builtin_")) - // Then either it is __builtin_LibcName or __builtin___LibcName_chk or - // no match: - return matchLibcNameOrBuiltinChk( - FunName.drop_front(10 /* truncate "__builtin_" */)); - // Try to match __asan_: - if (FunName.starts_with("__asan_")) - return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */)); - return matchLibcName(FunName); - } - - // Parameter `Name` is the substring after stripping off the prefix - // "__builtin_". - StringRef matchLibcNameOrBuiltinChk(StringRef Name) { - if (Name.starts_with("__") && Name.ends_with("_chk")) - return matchLibcName( - Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */); - return matchLibcName(Name); - } - - StringRef matchLibcName(StringRef Name) { - if (Name.ends_with("_s")) - return Name.drop_back(2 /* truncate "_s" */); - return Name; - } - } TheLittleParser; - - const FunctionDecl *FD = Node.getDirectCallee(); - const IdentifierInfo *II; - - if (!FD) - return false; - II = FD->getIdentifier(); - // If this is a special C++ name without IdentifierInfo, it can't be a - // C library function. - if (!II) - return false; - - // Look through 'extern "C"' and anything similar invented in the future. - // In general, C library functions will be in the TU directly. - if (!FD->getDeclContext()->getRedeclContext()->isTranslationUnit()) { - // If that's not the case, we also consider "C functions" re-declared in - // `std` namespace. - if (!FD->getDeclContext()->getRedeclContext()->isStdNamespace()) - return false; - } - - // If this function is not externally visible, it is not a C library function. - // Note that we make an exception for inline functions, which may be - // declared in header files without external linkage. - if (!FD->isInlined() && !FD->isExternallyVisible()) - return false; - - StringRef CoreName = - TheLittleParser.matchName(II->getName(), FD->getBuiltinID()); - - return InnerMatcher(CoreName).matches(Node, Finder, Builder); -} +} // namespace libc_func_matchers } // namespace clang::ast_matchers namespace { @@ -1420,34 +1449,49 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget { UnsafeLibcFunctionCallGadget(const MatchFinder::MatchResult &Result) : WarningGadget(Kind::UnsafeLibcFunctionCall), Call(Result.Nodes.getNodeAs<CallExpr>(Tag)) { - if (Result.Nodes.getNodeAs<CallExpr>("UnsafeLibcFunctionCall_sprintf")) + if (Result.Nodes.getNodeAs<CallExpr>(UnsafeSprintfTag)) WarnedFunKind = SPRINTF; - else if (Result.Nodes.getNodeAs<CallExpr>("UnsafeLibcFunctionCall_string")) + else if (Result.Nodes.getNodeAs<CallExpr>(UnsafeStringTag)) WarnedFunKind = STRING; - else if (Result.Nodes.getNodeAs<CallExpr>( - "UnsafeLibcFunctionCall_sized_by")) + else if (Result.Nodes.getNodeAs<CallExpr>(UnsafeSizedByTag)) WarnedFunKind = SIZED_BY; - else if (Result.Nodes.getNodeAs<CallExpr>("UnsafeLibcFunctionCall_va_list")) + else if (Result.Nodes.getNodeAs<CallExpr>(UnsafeVaListTag)) WarnedFunKind = VA_LIST; } static Matcher matcher() { - auto anyOfLibcInnerMatcher = [](StringRef S) { - return anyOf( - libc_fun_disjoint_inner_matchers::predefinedUnsafeLibcFunCall(S), - callExpr(libc_fun_disjoint_inner_matchers::unsafeStringInPrintfs(S)) - .bind(UnsafeStringTag), - callExpr( - libc_fun_disjoint_inner_matchers::unsafeSizedByInSnprintfs(S)) - .bind(UnsafeSizedByTag), - callExpr(libc_fun_disjoint_inner_matchers::unsafeSprintfs(S)) - .bind(UnsafeSprintfTag), - callExpr(libc_fun_disjoint_inner_matchers::unsafeVaListPrintfs(S)) - .bind(UnsafeVaListTag)); - }; - return stmt( - callExpr(ignoreLibcPrefixAndSuffix(anyOfLibcInnerMatcher)).bind(Tag)); + stmt( + anyOf( + // Match a call to a predefined unsafe libc function (unless the + // call has a sole string literal argument): + callExpr(callee(functionDecl( + libc_func_matchers::isPredefinedUnsafeLibcFunc())), + unless(allOf(hasArgument(0, expr(stringLiteral())), + hasNumArgs(1)))), + // Match a call to one of the `v*printf` functions taking + // va-list, which cannot be checked at compile-time: + callExpr(callee(functionDecl( + libc_func_matchers::isUnsafeVaListPrintfFunc()))) + .bind(UnsafeVaListTag), + // Match a call to a `sprintf` function, which is never safe: + callExpr(callee(functionDecl( + libc_func_matchers::isUnsafeSprintfFunc()))) + .bind(UnsafeSprintfTag), + // Match a call to an `snprintf` function. And first two + // arguments of the call (that describe a buffer) are not in + // safe patterns: + callExpr(callee(functionDecl( + libc_func_matchers::isNormalPrintfFunc())), + libc_func_matchers::hasUnsafeSnprintfBuffer()) + .bind(UnsafeSizedByTag), + // Match a call to a `printf` function, which can be safe if all + // arguments are null-terminated: + callExpr(callee(functionDecl( + libc_func_matchers::isNormalPrintfFunc())), + libc_func_matchers::hasUnsafePrintfStringArg()) + .bind(UnsafeStringTag))) + .bind(Tag)); } const Stmt *getBaseStmt() const { return Call; } diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 53ade2df5c311b..b0f3b4ed61acc4 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -2294,11 +2294,9 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler { void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo, ASTContext &Ctx) override { - // We have checked that there is a direct callee with an identifier name: - StringRef Name = Call->getDirectCallee()->getName(); - S.Diag(Call->getBeginLoc(), diag::warn_unsafe_buffer_libc_call) - << Name << Call->getSourceRange(); + << Call->getDirectCallee() // We've checked there is a direct callee + << Call->getSourceRange(); if (PrintfInfo > 0) S.Diag(Call->getBeginLoc(), diag::note_unsafe_buffer_printf_call) << PrintfInfo << Call->getSourceRange(); diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp index eebbc381a262ff..61a316456ace6a 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp @@ -37,14 +37,14 @@ namespace std { } void f(char * p, char * q, std::span<char> s) { - std::memcpy(); // expected-warning{{function memcpy introduces unsafe buffer access}} - std::strcpy(); // expected-warning{{function strcpy introduces unsafe buffer access}} - std::__1::memcpy(); // expected-warning{{function memcpy introduces unsafe buffer access}} - std::__1::strcpy(); // expected-warning{{function strcpy introduces unsafe buffer access}} + std::memcpy(); // expected-warning{{function 'memcpy' introduces unsafe buffer access}} + std::strcpy(); // expected-warning{{function 'strcpy' introduces unsafe buffer access}} + std::__1::memcpy(); // expected-warning{{function 'memcpy' introduces unsafe buffer access}} + std::__1::strcpy(); // expected-warning{{function 'strcpy' introduces unsafe buffer access}} /* Test printfs */ - std::snprintf(s.data(), 10, "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} - std::__1::snprintf(s.data(), 10, "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} + std::snprintf(s.data(), 10, "%s%d", "hello", *p); // expected-warning{{function 'snprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} + std::__1::snprintf(s.data(), 10, "%s%d", "hello", *p); // expected-warning{{function 'snprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} std::snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn std::__1::snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn } diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp index c3d7f8fd05435b..0eeedff611b588 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp @@ -11,7 +11,10 @@ unsigned strlen( const char* str ); int fprintf( FILE* stream, const char* format, ... ); int printf( const char* format, ... ); int sprintf( char* buffer, const char* format, ... ); +int swprintf( char* buffer, const char* format, ... ); int snprintf( char* buffer, unsigned buf_size, const char* format, ... ); +int snwprintf( char* buffer, unsigned buf_size, const char* format, ... ); +int snwprintf_s( char* buffer, unsigned buf_size, const char* format, ... ); int vsnprintf( char* buffer, unsigned buf_size, const char* format, ... ); int sscanf_s(const char * buffer, const char * format, ...); int sscanf(const char * buffer, const char * format, ... ); @@ -49,31 +52,36 @@ namespace std { } void f(char * p, char * q, std::span<char> s, std::span<char> s2) { - memcpy(); // expected-warning{{function memcpy introduces unsafe buffer access}} - std::memcpy(); // expected-warning{{function memcpy introduces unsafe buffer access}} - __builtin_memcpy(p, q, 64); // expected-warning{{function __builtin_memcpy introduces unsafe buffer access}} - __builtin___memcpy_chk(p, q, 8, 64); // expected-warning{{function __builtin___memcpy_chk introduces unsafe buffer access}} - __asan_memcpy(); // expected-warning{{function __asan_memcpy introduces unsafe buffer access}} - strcpy(); // expected-warning{{function strcpy introduces unsafe buffer access}} - std::strcpy(); // expected-warning{{function strcpy introduces unsafe buffer access}} - strcpy_s(); // expected-warning{{function strcpy_s introduces unsafe buffer access}} - wcscpy_s(); // expected-warning{{function wcscpy_s introduces unsafe buffer access}} + memcpy(); // expected-warning{{function 'memcpy' introduces unsafe buffer access}} + std::memcpy(); // expected-warning{{function 'memcpy' introduces unsafe buffer access}} + __builtin_memcpy(p, q, 64); // expected-warning{{function '__builtin_memcpy' introduces unsafe buffer access}} + __builtin___memcpy_chk(p, q, 8, 64); // expected-warning{{function '__builtin___memcpy_chk' introduces unsafe buffer access}} + __asan_memcpy(); // expected-warning{{function '__asan_memcpy' introduces unsafe buffer access}} + strcpy(); // expected-warning{{function 'strcpy' introduces unsafe buffer access}} + std::strcpy(); // expected-warning{{function 'strcpy' introduces unsafe buffer access}} + strcpy_s(); // expected-warning{{function 'strcpy_s' introduces unsafe buffer access}} + wcscpy_s(); // expected-warning{{function 'wcscpy_s' introduces unsafe buffer access}} /* Test printfs */ - fprintf((FILE*)p, "%s%d", p, *p); // expected-warning{{function fprintf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}} - printf("%s%d", p, *p); // expected-warning{{function printf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}} - sprintf(q, "%s%d", "hello", *p); // expected-warning{{function sprintf introduces unsafe buffer access}} expected-note{{change to 'snprintf' for explicit bounds checking}} - snprintf(q, 10, "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} - snprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} - vsnprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // expected-warning{{function vsnprintf introduces unsafe buffer access}} expected-note{{do not use va_list that cannot be checked at compile-time for bounds safety}} - sscanf(p, "%s%d", "hello", *p); // expected-warning{{function sscanf introduces unsafe buffer access}} - sscanf_s(p, "%s%d", "hello", *p); // expected-warning{{function sscanf_s introduces unsafe buffer access}} - fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function fprintf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}} + fprintf((FILE*)p, "%s%d", p, *p); // expected-warning{{function 'fprintf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}} + printf("%s%d", p, *p); // expected-warning{{function 'printf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}} + sprintf(q, "%s%d", "hello", *p); // expected-warning{{function 'sprintf' introduces unsafe buffer access}} expected-note{{change to 'snprintf' for explicit bounds checking}} + swprintf(q, "%s%d", "hello", *p); // expected-warning{{function 'swprintf' introduces unsafe buffer access}} expected-note{{change to 'snprintf' for explicit bounds checking}} + snprintf(q, 10, "%s%d", "hello", *p); // expected-warning{{function 'snprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} + snprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} + snwprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snwprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} + snwprintf_s(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snwprintf_s' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} + vsnprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // expected-warning{{function 'vsnprintf' introduces unsafe buffer access}} expected-note{{do not use va_list that cannot be checked at compile-time for bounds safety}} + sscanf(p, "%s%d", "hello", *p); // expected-warning{{function 'sscanf' introduces unsafe buffer access}} + sscanf_s(p, "%s%d", "hello", *p); // expected-warning{{function 'sscanf_s' introduces unsafe buffer access}} + fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function 'fprintf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}} fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, "hello"); // no warn printf("%s%d", "hello", *p); // no warn snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn snprintf(s.data(), s.size_bytes(), "%s%d", __PRETTY_FUNCTION__, *p); // no warn + snwprintf(s.data(), s.size_bytes(), "%s%d", __PRETTY_FUNCTION__, *p); // no warn + snwprintf_s(s.data(), s.size_bytes(), "%s%d", __PRETTY_FUNCTION__, *p); // no warn strlen("hello");// no warn } diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp index 668efe0e178b53..22a43490551c20 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp @@ -3,4 +3,4 @@ typedef unsigned __darwin_size_t; typedef __darwin_size_t size_t; #define bzero(s, n) __builtin_bzero(s, n) -void __nosan_bzero(void *dst, size_t sz) { bzero(dst, sz); } // expected-warning{{function __builtin_bzero introduces unsafe buffer access}} +void __nosan_bzero(void *dst, size_t sz) { bzero(dst, sz); } // expected-warning{{function '__builtin_bzero' introduces unsafe buffer access}} >From 276dee85324cc2df2a6c6461330609c57b833dc5 Mon Sep 17 00:00:00 2001 From: Ziqing Luo <ziq...@udel.edu> Date: Wed, 28 Aug 2024 17:00:49 -0700 Subject: [PATCH 4/5] Address more comments, mainly for improving diag messsages --- .../Analysis/Analyses/UnsafeBufferUsage.h | 5 +- .../clang/Basic/DiagnosticSemaKinds.td | 4 +- clang/lib/Analysis/UnsafeBufferUsage.cpp | 142 +++++++++++------- clang/lib/Sema/AnalysisBasedWarnings.cpp | 12 +- ...arn-unsafe-buffer-usage-libc-functions.cpp | 15 +- 5 files changed, 112 insertions(+), 66 deletions(-) diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h index 3e0fae6db7562d..aa2c01ad10d45d 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -115,8 +115,11 @@ class UnsafeBufferUsageHandler { /// safe pattern; /// is 3 if string arguments do not guarantee null-termination /// is 4 if the callee takes va_list + /// \param UnsafeArg one of the actual arguments that is unsafe, non-null + /// only when `2 <= PrintfInfo <= 3` virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo, - ASTContext &Ctx) = 0; + ASTContext &Ctx, + const Expr *UnsafeArg = nullptr) = 0; /// Invoked when an unsafe operation with a std container is found. virtual void handleUnsafeOperationInContainer(const Stmt *Operation, diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 7e1e3686ce6554..3d8dcfaf3a7132 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12388,8 +12388,8 @@ def warn_unsafe_buffer_libc_call : Warning< InGroup<UnsafeBufferUsage>, DefaultIgnore; def note_unsafe_buffer_printf_call : Note< "%select{| change to 'snprintf' for explicit bounds checking | buffer pointer and size may not match" - "| use 'std::string::c_str' or string literal as string pointer to guarantee null-termination" - "| do not use va_list that cannot be checked at compile-time for bounds safety}0">; + "| use 'std::string::c_str' or a string literal as string pointer to guarantee null-termination" + "| do not use va_list, which cannot be checked at compile-time for bounds safety}0">; def note_unsafe_buffer_operation : Note< "used%select{| in pointer arithmetic| in buffer access}0 here">; def note_unsafe_buffer_variable_fixit_group : Note< diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index a1526125edf098..7bcc3c05dee98c 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -521,16 +521,22 @@ static bool isNullTermPointer(const Expr *Ptr) { // corresponding to an `s` specifier; // 2. Format string is not a literal and there is least an unsafe pointer // argument (including the formatter argument). -static bool hasUnsafeFormatOrSArg(const CallExpr *Call, unsigned FmtArgIdx, - ASTContext &Ctx, bool isKprintf = false) { +// +// `UnsafeArg` is the output argument that will be set only if this function +// returns true. +static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg, + const unsigned FmtArgIdx, ASTContext &Ctx, + bool isKprintf = false) { class StringFormatStringHandler : public analyze_format_string::FormatStringHandler { const CallExpr *Call; unsigned FmtArgIdx; + const Expr *&UnsafeArg; public: - StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx) - : Call(Call), FmtArgIdx(FmtArgIdx) {} + StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx, + const Expr *&UnsafeArg) + : Call(Call), FmtArgIdx(FmtArgIdx), UnsafeArg(UnsafeArg) {} bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, const char *startSpecifier, @@ -541,8 +547,11 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, unsigned FmtArgIdx, unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx; if (0 < ArgIdx && ArgIdx < Call->getNumArgs()) - if (!isNullTermPointer(Call->getArg(ArgIdx))) - return false; // stop parsing + if (!isNullTermPointer(Call->getArg(ArgIdx))) { + UnsafeArg = Call->getArg(ArgIdx); // output + // returning false stops parsing immediately + return false; + } } return true; // continue parsing } @@ -552,7 +561,7 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, unsigned FmtArgIdx, if (auto *SL = dyn_cast<StringLiteral>(Fmt->IgnoreParenImpCasts())) { StringRef FmtStr = SL->getString(); - StringFormatStringHandler Handler(Call, FmtArgIdx); + StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg); return analyze_format_string::ParsePrintfString( Handler, FmtStr.begin(), FmtStr.end(), Ctx.getLangOpts(), @@ -563,8 +572,12 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, unsigned FmtArgIdx, // (including the format argument) is unsafe pointer. return llvm::any_of( llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()), - [](const Expr *Arg) { - return Arg->getType()->isPointerType() && !isNullTermPointer(Arg); + [&UnsafeArg](const Expr *Arg) -> bool { + if (Arg->getType()->isPointerType() && !isNullTermPointer(Arg)) { + UnsafeArg = Arg; + return true; + } + return false; }); } @@ -748,7 +761,9 @@ AST_MATCHER(FunctionDecl, isNormalPrintfFunc) { // Then if the format string is a string literal, this matcher matches when at // least one string argument is unsafe. If the format is not a string literal, // this matcher matches when at least one pointer type argument is unsafe. -AST_MATCHER(CallExpr, hasUnsafePrintfStringArg) { +AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg, + clang::ast_matchers::internal::Matcher<Expr>, + UnsafeStringArgMatcher) { // Determine what printf it is: const Expr *FirstArg = Node.getArg(0); ASTContext &Ctx = Finder->getASTContext(); @@ -756,10 +771,14 @@ AST_MATCHER(CallExpr, hasUnsafePrintfStringArg) { if (isa<StringLiteral>(FirstArg->IgnoreParenImpCasts())) { // It is a printf/kprintf. And, the format is a string literal: bool isKprintf = false; + const Expr *UnsafeArg; + if (auto *Callee = Node.getDirectCallee()) if (auto *II = Node.getDirectCallee()->getIdentifier()) isKprintf = II->getName() == "kprintf"; - return hasUnsafeFormatOrSArg(&Node, 0, Ctx, isKprintf); + if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 0, Ctx, isKprintf)) + return UnsafeStringArgMatcher.matches(*UnsafeArg, Finder, Builder); + return false; } QualType PtrTy = FirstArg->getType(); @@ -772,20 +791,31 @@ AST_MATCHER(CallExpr, hasUnsafePrintfStringArg) { there can't be any file pointer then */ && PteTy.getCanonicalType() == Ctx.getFILEType().getCanonicalType()) { // It is a fprintf: - return hasUnsafeFormatOrSArg(&Node, 1, Ctx, false); + const Expr *UnsafeArg; + + if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 1, Ctx, false)) + return UnsafeStringArgMatcher.matches(*UnsafeArg, Finder, Builder); + return false; } const Expr *SecondArg = Node.getArg(1); if (SecondArg->getType()->isIntegerType()) { // It is a snprintf: - return hasUnsafeFormatOrSArg(&Node, 2, Ctx, false); + const Expr *UnsafeArg; + + if (unsigned UnsafeArgIdx = + hasUnsafeFormatOrSArg(&Node, UnsafeArg, 2, Ctx, false)) + return UnsafeStringArgMatcher.matches(*UnsafeArg, Finder, Builder); + return false; } // It is printf but the format string is passed by pointer. The only thing we // can do is to require all pointers to be null-terminated: - return llvm::any_of(Node.arguments(), [](const Expr *Arg) -> bool { - return Arg->getType()->isPointerType() && !isNullTermPointer(Arg); - }); + for (auto Arg : Node.arguments()) + if (Arg->getType()->isPointerType() && !isNullTermPointer(Arg)) + if (UnsafeStringArgMatcher.matches(*Arg, Finder, Builder)) + return true; + return false; } // This matcher requires that it is known that the callee `isNormalPrintf`. @@ -1423,6 +1453,7 @@ class DataInvocationGadget : public WarningGadget { class UnsafeLibcFunctionCallGadget : public WarningGadget { const CallExpr *const Call; + const Expr *UnsafeArg = nullptr; constexpr static const char *const Tag = "UnsafeLibcFunctionCall"; // Extra tags for additional information: constexpr static const char *const UnsafeSprintfTag = @@ -1437,8 +1468,8 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget { enum UnsafeKind { OTHERS = 0, // no specific information, the callee function is unsafe SPRINTF = 1, // never call `-sprintf`s, call `-snprintf`s instead. - SIZED_BY = 2, // a pair of function arguments have "__sized_by" relation but - // they do not conform to safe patterns + SIZED_BY = 2, // the first two arguments of `snprintf` function have + // "__sized_by" relation but they do not conform to safe patterns STRING = 3, // an argument is a pointer-to-char-as-string but does not // guarantee null-termination VA_LIST = 4, // one of the `-printf`s function that take va_list, which is @@ -1449,49 +1480,52 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget { UnsafeLibcFunctionCallGadget(const MatchFinder::MatchResult &Result) : WarningGadget(Kind::UnsafeLibcFunctionCall), Call(Result.Nodes.getNodeAs<CallExpr>(Tag)) { - if (Result.Nodes.getNodeAs<CallExpr>(UnsafeSprintfTag)) + if (Result.Nodes.getNodeAs<Decl>(UnsafeSprintfTag)) WarnedFunKind = SPRINTF; - else if (Result.Nodes.getNodeAs<CallExpr>(UnsafeStringTag)) + else if (auto *E = Result.Nodes.getNodeAs<Expr>(UnsafeStringTag)) { WarnedFunKind = STRING; - else if (Result.Nodes.getNodeAs<CallExpr>(UnsafeSizedByTag)) + UnsafeArg = E; + } else if (Result.Nodes.getNodeAs<CallExpr>(UnsafeSizedByTag)) { WarnedFunKind = SIZED_BY; - else if (Result.Nodes.getNodeAs<CallExpr>(UnsafeVaListTag)) + UnsafeArg = Call->getArg(0); + } else if (Result.Nodes.getNodeAs<Decl>(UnsafeVaListTag)) WarnedFunKind = VA_LIST; } static Matcher matcher() { - return stmt( - stmt( - anyOf( - // Match a call to a predefined unsafe libc function (unless the - // call has a sole string literal argument): - callExpr(callee(functionDecl( - libc_func_matchers::isPredefinedUnsafeLibcFunc())), - unless(allOf(hasArgument(0, expr(stringLiteral())), - hasNumArgs(1)))), - // Match a call to one of the `v*printf` functions taking - // va-list, which cannot be checked at compile-time: - callExpr(callee(functionDecl( - libc_func_matchers::isUnsafeVaListPrintfFunc()))) + return stmt(anyOf( + callExpr( + callee(functionDecl(anyOf( + // Match a predefined unsafe libc + // function: + functionDecl(libc_func_matchers::isPredefinedUnsafeLibcFunc()), + // Match a call to one of the `v*printf` functions + // taking va-list, which cannot be checked at + // compile-time: + functionDecl(libc_func_matchers::isUnsafeVaListPrintfFunc()) .bind(UnsafeVaListTag), - // Match a call to a `sprintf` function, which is never safe: - callExpr(callee(functionDecl( - libc_func_matchers::isUnsafeSprintfFunc()))) - .bind(UnsafeSprintfTag), - // Match a call to an `snprintf` function. And first two - // arguments of the call (that describe a buffer) are not in - // safe patterns: - callExpr(callee(functionDecl( - libc_func_matchers::isNormalPrintfFunc())), - libc_func_matchers::hasUnsafeSnprintfBuffer()) - .bind(UnsafeSizedByTag), - // Match a call to a `printf` function, which can be safe if all - // arguments are null-terminated: - callExpr(callee(functionDecl( - libc_func_matchers::isNormalPrintfFunc())), - libc_func_matchers::hasUnsafePrintfStringArg()) - .bind(UnsafeStringTag))) - .bind(Tag)); + // Match a call to a `sprintf` function, which is never + // safe: + functionDecl(libc_func_matchers::isUnsafeSprintfFunc()) + .bind(UnsafeSprintfTag)))), + // (unless the call has a sole string literal argument): + unless( + allOf(hasArgument(0, expr(stringLiteral())), hasNumArgs(1)))), + + // The following two cases require checking against actual + // arguments of the call: + + // Match a call to an `snprintf` function. And first two + // arguments of the call (that describe a buffer) are not in + // safe patterns: + callExpr(callee(functionDecl(libc_func_matchers::isNormalPrintfFunc())), + libc_func_matchers::hasUnsafeSnprintfBuffer()) + .bind(UnsafeSizedByTag), + // Match a call to a `printf` function, which can be safe if + // all arguments are null-terminated: + callExpr(callee(functionDecl(libc_func_matchers::isNormalPrintfFunc())), + libc_func_matchers::hasUnsafePrintfStringArg( + expr().bind(UnsafeStringTag))))); } const Stmt *getBaseStmt() const { return Call; } @@ -1501,7 +1535,7 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget { void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, bool IsRelatedToDecl, ASTContext &Ctx) const override { - Handler.handleUnsafeLibcCall(Call, WarnedFunKind, Ctx); + Handler.handleUnsafeLibcCall(Call, WarnedFunKind, Ctx, UnsafeArg); } DeclUseList getClaimedVarUseSites() const override { return {}; } diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index b0f3b4ed61acc4..598f095f541110 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -2293,13 +2293,17 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler { } void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo, - ASTContext &Ctx) override { + ASTContext &Ctx, + const Expr *UnsafeArg = nullptr) override { S.Diag(Call->getBeginLoc(), diag::warn_unsafe_buffer_libc_call) << Call->getDirectCallee() // We've checked there is a direct callee << Call->getSourceRange(); - if (PrintfInfo > 0) - S.Diag(Call->getBeginLoc(), diag::note_unsafe_buffer_printf_call) - << PrintfInfo << Call->getSourceRange(); + if (PrintfInfo > 0) { + SourceRange R = + UnsafeArg ? UnsafeArg->getSourceRange() : Call->getSourceRange(); + S.Diag(R.getBegin(), diag::note_unsafe_buffer_printf_call) + << PrintfInfo << R; + } } void handleUnsafeOperationInContainer(const Stmt *Operation, diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp index 0eeedff611b588..c116289ee96a69 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp @@ -64,18 +64,23 @@ void f(char * p, char * q, std::span<char> s, std::span<char> s2) { /* Test printfs */ - fprintf((FILE*)p, "%s%d", p, *p); // expected-warning{{function 'fprintf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}} - printf("%s%d", p, *p); // expected-warning{{function 'printf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}} + fprintf((FILE*)p, "%s%d", p, *p); // expected-warning{{function 'fprintf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or a string literal as string pointer to guarantee null-termination}} + printf("%s%d", // expected-warning{{function 'printf' introduces unsafe buffer access}} + p, // expected-note{{use 'std::string::c_str' or a string literal as string pointer to guarantee null-termination}} note attached to the unsafe argument + *p); sprintf(q, "%s%d", "hello", *p); // expected-warning{{function 'sprintf' introduces unsafe buffer access}} expected-note{{change to 'snprintf' for explicit bounds checking}} swprintf(q, "%s%d", "hello", *p); // expected-warning{{function 'swprintf' introduces unsafe buffer access}} expected-note{{change to 'snprintf' for explicit bounds checking}} snprintf(q, 10, "%s%d", "hello", *p); // expected-warning{{function 'snprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} snprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} snwprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snwprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} - snwprintf_s(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snwprintf_s' introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} - vsnprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // expected-warning{{function 'vsnprintf' introduces unsafe buffer access}} expected-note{{do not use va_list that cannot be checked at compile-time for bounds safety}} + snwprintf_s( // expected-warning{{function 'snwprintf_s' introduces unsafe buffer access}} + s.data(), // expected-note{{buffer pointer and size may not match}} // note attached to the buffer + s2.size(), + "%s%d", "hello", *p); + vsnprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // expected-warning{{function 'vsnprintf' introduces unsafe buffer access}} expected-note{{do not use va_list, which cannot be checked at compile-time for bounds safety}} sscanf(p, "%s%d", "hello", *p); // expected-warning{{function 'sscanf' introduces unsafe buffer access}} sscanf_s(p, "%s%d", "hello", *p); // expected-warning{{function 'sscanf_s' introduces unsafe buffer access}} - fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function 'fprintf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}} + fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function 'fprintf' introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or a string literal as string pointer to guarantee null-termination}} fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, "hello"); // no warn printf("%s%d", "hello", *p); // no warn snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn >From f85a487c4cbb57678081fd95bfe5e2bf691f7226 Mon Sep 17 00:00:00 2001 From: Ziqing Luo <ziq...@udel.edu> Date: Wed, 28 Aug 2024 17:19:40 -0700 Subject: [PATCH 5/5] clean up include list --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 7bcc3c05dee98c..8e62ae4bbda6f1 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -9,25 +9,19 @@ #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" -#include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" -#include "clang/AST/ExprCXX.h" #include "clang/AST/FormatString.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtVisitor.h" -#include "clang/AST/Type.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/APSInt.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Casting.h" #include <memory> #include <optional> _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits