https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/101583
>From cce5781733a7c294f10dc75f48372ff6ee331239 Mon Sep 17 00:00:00 2001 From: Ziqing Luo <ziq...@udel.edu> Date: Thu, 1 Aug 2024 16:36:27 -0700 Subject: [PATCH 1/2] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc functions Warning about calls to libc functions involving buffer access. Warned functions are hardcoded by names. (rdar://117182250) --- .../Analysis/Analyses/UnsafeBufferUsage.h | 12 + .../Analyses/UnsafeBufferUsageGadgets.def | 1 + .../clang/Basic/DiagnosticSemaKinds.td | 7 + clang/lib/Analysis/UnsafeBufferUsage.cpp | 408 +++++++++++++++++- clang/lib/Sema/AnalysisBasedWarnings.cpp | 12 + ...-usage-libc-functions-inline-namespace.cpp | 60 +++ ...arn-unsafe-buffer-usage-libc-functions.cpp | 86 ++++ ...n-unsafe-buffer-usage-test-unreachable.cpp | 4 +- 8 files changed, 586 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h index 228b4ae1e3e115..3e0fae6db7562d 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -15,6 +15,7 @@ #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H #include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" #include "clang/AST/Stmt.h" #include "clang/Basic/SourceLocation.h" #include "llvm/Support/Debug.h" @@ -106,6 +107,17 @@ class UnsafeBufferUsageHandler { virtual void handleUnsafeOperation(const Stmt *Operation, bool IsRelatedToDecl, ASTContext &Ctx) = 0; + /// Invoked when a call to an unsafe libc function is found. + /// \param PrintfInfo + /// is 0 if the callee function is not a member of the printf family; + /// is 1 if the callee is `sprintf`; + /// is 2 if arguments of the call have `__size_by` relation but are not in a + /// safe pattern; + /// is 3 if string arguments do not guarantee null-termination + /// is 4 if the callee takes va_list + virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo, + ASTContext &Ctx) = 0; + /// Invoked when an unsafe operation with a std container is found. virtual void handleUnsafeOperationInContainer(const Stmt *Operation, bool IsRelatedToDecl, diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 242ad763ba62b9..ac01b285ae833b 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) WARNING_GADGET(UnsafeBufferUsageCtorAttr) WARNING_GADGET(DataInvocation) +WARNING_GADGET(UnsafeLibcFunctionCall) WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)` FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 554dbaff2ce0d8..7e1e3686ce6554 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12383,6 +12383,13 @@ def warn_unsafe_buffer_operation : Warning< "%select{unsafe pointer operation|unsafe pointer arithmetic|" "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe invocation of span::data}0">, InGroup<UnsafeBufferUsage>, DefaultIgnore; +def warn_unsafe_buffer_libc_call : Warning< + "function %0 introduces unsafe buffer access">, + InGroup<UnsafeBufferUsage>, DefaultIgnore; +def note_unsafe_buffer_printf_call : Note< + "%select{| change to 'snprintf' for explicit bounds checking | buffer pointer and size may not match" + "| use 'std::string::c_str' or string literal as string pointer to guarantee null-termination" + "| do not use va_list that cannot be checked at compile-time for bounds safety}0">; def note_unsafe_buffer_operation : Note< "used%select{| in pointer arithmetic| in buffer access}0 here">; def note_unsafe_buffer_variable_fixit_group : Note< diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 866222380974b6..751fb75f6ed602 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -9,20 +9,24 @@ #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtVisitor.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" -#include "clang/Basic/CharInfo.h" #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/APSInt.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Casting.h" +#include <functional> #include <memory> #include <optional> #include <queue> @@ -443,6 +447,337 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { return false; } +namespace libc_fun_disjoint_inner_matchers { +// `libc_fun_disjoint_inner_matchers` covers a set of matchers that match +// disjoint node sets. They all take a `CoreName`, which is the substring of a +// function name after `ignoreLibcPrefixAndSuffix`. They are suppose to be used +// as an inner matcher of `ignoreLibcPrefixAndSuffix` to deal with different +// libc function calls. + +// Matches a function call node such that +// 1. It's name, after stripping off predefined prefix and suffix, is +// `CoreName`; and +// 2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which +// is a set of libc function names. +// +// Note: For predefined prefix and suffix, see `ignoreLibcPrefixAndSuffix`. +// And, the notation `CoreName[str/wcs]` means a new name obtained from replace +// string "wcs" with "str" in `CoreName`. +// +// Also note, the set of predefined function names does not include `printf` +// functions, they are checked exclusively with other matchers below. +// Maintaining the invariant that all matchers under +// `libc_fun_disjoint_inner_matchers` are disjoint. +AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) { + static const std::set<StringRef> PredefinedNames{ + // numeric conversion: + "atof", + "atoi", + "atol", + "atoll", + "strtol", + "strtoll", + "strtoul", + "strtoull", + "strtof", + "strtod", + "strtold", + "strtoimax", + "strtoumax", + // "strfromf", "strfromd", "strfroml", // C23? + // string manipulation: + "strcpy", + "strncpy", + "strlcpy", + "strcat", + "strncat", + "strlcat", + "strxfrm", + "strdup", + "strndup", + // string examination: + "strlen", + "strnlen", + "strcmp", + "strncmp", + "stricmp", + "strcasecmp", + "strcoll", + "strchr", + "strrchr", + "strspn", + "strcspn", + "strpbrk", + "strstr", + "strtok", + // "mem-" functions + "memchr", + "wmemchr", + "memcmp", + "wmemcmp", + "memcpy", + "memccpy", + "mempcpy", + "wmemcpy", + "memmove", + "wmemmove", + "memset", + "wmemset", + // IO: + "fread", + "fwrite", + "fgets", + "fgetws", + "gets", + "fputs", + "fputws", + "puts", + // others + "strerror_s", + "strerror_r", + "bcopy", + "bzero", + "bsearch", + "qsort", + }; + // This is safe: strlen("hello"). We don't want to be noisy on this case. + auto isSafeStrlen = [&Node](StringRef Name) -> bool { + return Name == "strlen" && Node.getNumArgs() == 1 && + isa<StringLiteral>(Node.getArg(0)->IgnoreParenImpCasts()); + }; + + // Match predefined names: + if (PredefinedNames.find(CoreName.str()) != PredefinedNames.end()) + return !isSafeStrlen(CoreName); + + std::string NameWCS = CoreName.str(); + size_t WcsPos = NameWCS.find("wcs"); + + while (WcsPos != std::string::npos) { + NameWCS[WcsPos++] = 's'; + NameWCS[WcsPos++] = 't'; + NameWCS[WcsPos++] = 'r'; + WcsPos = NameWCS.find("wcs", WcsPos); + } + if (PredefinedNames.find(NameWCS) != PredefinedNames.end()) + return !isSafeStrlen(NameWCS); + // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. They + // all should end with "scanf"): + return CoreName.ends_with("scanf"); +} + +// Match a call to one of the `-printf` functions taking `va_list`. We cannot +// check safety for these functions so they should be changed to their +// non-va_list versions. +AST_MATCHER_P(CallExpr, unsafeVaListPrintfs, StringRef, CoreName) { + StringRef Name = CoreName; + + if (!Name.ends_with("printf")) + return false; // neither printf nor scanf + return Name.starts_with("v"); +} + +// Matches a call to one of the `-sprintf` functions (excluding the ones with +// va_list) as they are always unsafe and should be changed to corresponding +// `-snprintf`s. +AST_MATCHER_P(CallExpr, unsafeSprintfs, StringRef, CoreName) { + StringRef Name = CoreName; + + if (!Name.ends_with("printf") || Name.starts_with("v")) + return false; + + StringRef Prefix = Name.drop_back(6); + + if (Prefix.ends_with("w")) + Prefix = Prefix.drop_back(1); + return Prefix == "s"; +} + +// A pointer type expression is known to be null-terminated, if it has the +// form: E.c_str(), for any expression E of `std::string` type. +static bool isNullTermPointer(const Expr *Ptr) { + if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts())) + return true; + if (isa<PredefinedExpr>(Ptr->IgnoreParenImpCasts())) + return true; + if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) { + const CXXMethodDecl *MD = MCE->getMethodDecl(); + const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl(); + + if (MD && RD && RD->isInStdNamespace()) + if (MD->getName() == "c_str" && RD->getName() == "basic_string") + return true; + } + return false; +} + +// Matches a call to one of the `-printf" functions (excluding the ones with +// va_list, or `-sprintf`s) that taking pointer-to-char-as-string arguments but +// fail to guarantee their null-termination. In other words, these calls are +// safe if they use null-termination guaranteed string pointers. +AST_MATCHER_P(CallExpr, unsafeStringInPrintfs, StringRef, CoreName) { + StringRef Name = CoreName; + + if (!Name.ends_with("printf") || Name.starts_with("v")) + return false; + + StringRef Prefix = Name.drop_back(6); + + if (Prefix.ends_with("w")) + Prefix = Prefix.drop_back(1); + + auto AnyUnsafeStrPtr = [](const Expr *Arg) -> bool { + return Arg->getType()->isPointerType() && !isNullTermPointer(Arg); + }; + + if (Prefix.empty() || + Prefix == "k") // printf: all pointer args should be null-terminated + return any_of(Node.arguments(), AnyUnsafeStrPtr); + if (Prefix == "f" && Node.getNumArgs() > 1) + return any_of(llvm::make_range(Node.arg_begin() + 1, Node.arg_end()), + AnyUnsafeStrPtr); + if (Prefix == "sn" && Node.getNumArgs() > 2) { + return any_of(llvm::make_range(Node.arg_begin() + 2, Node.arg_end()), + AnyUnsafeStrPtr); + } + return false; // A call to a "-printf" falls into another category. +} + +// Matches a call to one of the `snprintf` functions (excluding the ones with +// va_list) such that the first two arguments fail to conform to safe patterns. +// +// For the first two arguments: `ptr` and `size`, they are safe if in the +// following patterns: +// ptr := DRE.data(); +// size:= DRE.size()/DRE.size_bytes() +// And DRE is a hardened container or view. +AST_MATCHER_P(CallExpr, unsafeSizedByInSnprintfs, StringRef, CoreName) { + StringRef Name = CoreName; + + if (!Name.ends_with("printf") || Name.starts_with("v")) + return false; // not snprint or use va_list + + StringRef Prefix = Name.drop_back(6); + + if (Prefix.ends_with("w")) + Prefix = Prefix.drop_back(1); + + if (Prefix != "sn") + return false; // not snprint + + static StringRef SizedObjs[] = {"span", "array", "vector", + "basic_string_view", "basic_string"}; + const Expr *CharPtr = (*Node.arg_begin())->IgnoreParenImpCasts(); + const Expr *Size = (*(Node.arg_begin() + 1))->IgnoreParenImpCasts(); + + if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(CharPtr)) + if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Size)) { + auto *DREOfPtr = dyn_cast<DeclRefExpr>( + MCEPtr->getImplicitObjectArgument()->IgnoreParenImpCasts()); + auto *DREOfSize = dyn_cast<DeclRefExpr>( + MCESize->getImplicitObjectArgument()->IgnoreParenImpCasts()); + + if (!DREOfPtr || !DREOfSize) + return true; // not in safe pattern + if (DREOfPtr->getDecl() != DREOfSize->getDecl()) + return true; // not in safe pattern + if (MCEPtr->getMethodDecl()->getName() != "data") + return true; // not in safe pattern + + if (MCESize->getMethodDecl()->getName() == "size_bytes" || + // Note here the pointer must be a pointer-to-char type unless there + // is explicit casting. If there is explicit casting, this branch + // is unreachable. Thus, at this branch "size" and "size_bytes" are + // equivalent as the pointer is a char pointer: + MCESize->getMethodDecl()->getName() == "size") + for (StringRef SizedObj : SizedObjs) + if (MCEPtr->getRecordDecl()->isInStdNamespace() && + MCEPtr->getRecordDecl()->getCanonicalDecl()->getName() == + SizedObj) + return false; // It is in fact safe + } + return true; // ptr and size are not in safe pattern +} +} // namespace libc_fun_disjoint_inner_matchers + +// Match call to a function whose name may have prefixes like "__builtin_" or +// "__asan_" and suffixes like "_s" or "_chk". This matcher takes an argument, +// which should be applied to the core name---the subtring after stripping off +// prefix and suffix of the function name. +// The application results in an inner matcher that matches the call node with +// respect to the core name of the callee. +AST_MATCHER_P(CallExpr, ignoreLibcPrefixAndSuffix, + std::function<internal::Matcher<CallExpr>(StringRef)>, + InnerMatcher) { + // Given a function name, returns its core name `CoreName` according to the + // following grammar. + // + // LibcName := CoreName | CoreName + "_s" + // MatchingName := "__builtin_" + LibcName | + // "__builtin___" + LibcName + "_chk" | + // "__asan_" + LibcName + // + struct NameParser { + StringRef matchName(StringRef FunName, bool isBuiltin) { + // Try to match __builtin_: + if (isBuiltin && FunName.starts_with("__builtin_")) + // Then either it is __builtin_LibcName or __builtin___LibcName_chk or + // no match: + return matchLibcNameOrBuiltinChk( + FunName.drop_front(10 /* truncate "__builtin_" */)); + // Try to match __asan_: + if (FunName.starts_with("__asan_")) + return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */)); + return matchLibcName(FunName); + } + + // Parameter `Name` is the substring after stripping off the prefix + // "__builtin_". + StringRef matchLibcNameOrBuiltinChk(StringRef Name) { + if (Name.starts_with("__") && Name.ends_with("_chk")) + return matchLibcName( + Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */); + return matchLibcName(Name); + } + + StringRef matchLibcName(StringRef Name) { + if (Name.ends_with("_s")) + return Name.drop_back(2 /* truncate "_s" */); + return Name; + } + } TheLittleParser; + + const FunctionDecl *FD = Node.getDirectCallee(); + const IdentifierInfo *II; + + if (!FD) + return false; + II = FD->getIdentifier(); + // If this is a special C++ name without IdentifierInfo, it can't be a + // C library function. + if (!II) + return false; + + // Look through 'extern "C"' and anything similar invented in the future. + // In general, C library functions will be in the TU directly. + if (!FD->getDeclContext()->getRedeclContext()->isTranslationUnit()) { + // If that's not the case, we also consider "C functions" re-declared in + // `std` namespace. + if (!FD->getDeclContext()->getRedeclContext()->isStdNamespace()) + return false; + } + + // If this function is not externally visible, it is not a C library function. + // Note that we make an exception for inline functions, which may be + // declared in header files without external linkage. + if (!FD->isInlined() && !FD->isExternallyVisible()) + return false; + + StringRef CoreName = + TheLittleParser.matchName(II->getName(), FD->getBuiltinID()); + + return InnerMatcher(CoreName).matches(Node, Finder, Builder); +} } // namespace clang::ast_matchers namespace { @@ -1025,6 +1360,77 @@ class DataInvocationGadget : public WarningGadget { DeclUseList getClaimedVarUseSites() const override { return {}; } }; +class UnsafeLibcFunctionCallGadget : public WarningGadget { + const CallExpr *const Call; + constexpr static const char *const Tag = "UnsafeLibcFunctionCall"; + // Extra tags for additional information: + constexpr static const char *const UnsafeSprintfTag = + "UnsafeLibcFunctionCall_sprintf"; + constexpr static const char *const UnsafeSizedByTag = + "UnsafeLibcFunctionCall_sized_by"; + constexpr static const char *const UnsafeStringTag = + "UnsafeLibcFunctionCall_string"; + constexpr static const char *const UnsafeVaListTag = + "UnsafeLibcFunctionCall_va_list"; + + enum UnsafeKind { + OTHERS = 0, // no specific information, the callee function is unsafe + SPRINTF = 1, // never call `-sprintf`s, call `-snprintf`s instead. + SIZED_BY = 2, // a pair of function arguments have "__sized_by" relation but + // they do not conform to safe patterns + STRING = 3, // an argument is a pointer-to-char-as-string but does not + // guarantee null-termination + VA_LIST = 4, // one of the `-printf`s function that take va_list, which is + // considered unsafe as it is not compile-time check + } WarnedFunKind = OTHERS; + +public: + UnsafeLibcFunctionCallGadget(const MatchFinder::MatchResult &Result) + : WarningGadget(Kind::UnsafeLibcFunctionCall), + Call(Result.Nodes.getNodeAs<CallExpr>(Tag)) { + if (Result.Nodes.getNodeAs<CallExpr>("UnsafeLibcFunctionCall_sprintf")) + WarnedFunKind = SPRINTF; + else if (Result.Nodes.getNodeAs<CallExpr>("UnsafeLibcFunctionCall_string")) + WarnedFunKind = STRING; + else if (Result.Nodes.getNodeAs<CallExpr>( + "UnsafeLibcFunctionCall_sized_by")) + WarnedFunKind = SIZED_BY; + else if (Result.Nodes.getNodeAs<CallExpr>("UnsafeLibcFunctionCall_va_list")) + WarnedFunKind = VA_LIST; + } + + static Matcher matcher() { + auto anyOfLibcInnerMatcher = [](StringRef S) { + return anyOf( + libc_fun_disjoint_inner_matchers::predefinedUnsafeLibcFunCall(S), + callExpr(libc_fun_disjoint_inner_matchers::unsafeStringInPrintfs(S)) + .bind(UnsafeStringTag), + callExpr( + libc_fun_disjoint_inner_matchers::unsafeSizedByInSnprintfs(S)) + .bind(UnsafeSizedByTag), + callExpr(libc_fun_disjoint_inner_matchers::unsafeSprintfs(S)) + .bind(UnsafeSprintfTag), + callExpr(libc_fun_disjoint_inner_matchers::unsafeVaListPrintfs(S)) + .bind(UnsafeVaListTag)); + }; + + return stmt( + callExpr(ignoreLibcPrefixAndSuffix(anyOfLibcInnerMatcher)).bind(Tag)); + } + + const Stmt *getBaseStmt() const { return Call; } + + SourceLocation getSourceLoc() const override { return Call->getBeginLoc(); } + + void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, + bool IsRelatedToDecl, + ASTContext &Ctx) const override { + Handler.handleUnsafeLibcCall(Call, WarnedFunKind, Ctx); + } + + DeclUseList getClaimedVarUseSites() const override { return {}; } +}; + // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue // Context (see `isInUnspecifiedLvalueContext`). // Note here `[]` is the built-in subscript operator. diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 0f604c61fa3af9..53ade2df5c311b 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -2292,6 +2292,18 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler { } } + void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo, + ASTContext &Ctx) override { + // We have checked that there is a direct callee with an identifier name: + StringRef Name = Call->getDirectCallee()->getName(); + + S.Diag(Call->getBeginLoc(), diag::warn_unsafe_buffer_libc_call) + << Name << Call->getSourceRange(); + if (PrintfInfo > 0) + S.Diag(Call->getBeginLoc(), diag::note_unsafe_buffer_printf_call) + << PrintfInfo << Call->getSourceRange(); + } + void handleUnsafeOperationInContainer(const Stmt *Operation, bool IsRelatedToDecl, ASTContext &Ctx) override { diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp new file mode 100644 index 00000000000000..eebbc381a262ff --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp @@ -0,0 +1,60 @@ +// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \ +// RUN: -verify %s + +namespace std { + inline namespace __1 { + template< class InputIt, class OutputIt > + OutputIt copy( InputIt first, InputIt last, + OutputIt d_first ); + + struct iterator{}; + template<typename T> + struct span { + T * ptr; + T * data(); + unsigned size_bytes(); + unsigned size(); + iterator begin() const noexcept; + iterator end() const noexcept; + }; + + template<typename T> + struct basic_string { + T* p; + T *c_str(); + T *data(); + unsigned size_bytes(); + }; + + typedef basic_string<char> string; + typedef basic_string<wchar_t> wstring; + + // C function under std: + void memcpy(); + void strcpy(); + int snprintf( char* buffer, unsigned buf_size, const char* format, ... ); + } +} + +void f(char * p, char * q, std::span<char> s) { + std::memcpy(); // expected-warning{{function memcpy introduces unsafe buffer access}} + std::strcpy(); // expected-warning{{function strcpy introduces unsafe buffer access}} + std::__1::memcpy(); // expected-warning{{function memcpy introduces unsafe buffer access}} + std::__1::strcpy(); // expected-warning{{function strcpy introduces unsafe buffer access}} + + /* Test printfs */ + std::snprintf(s.data(), 10, "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} + std::__1::snprintf(s.data(), 10, "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} + std::snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn + std::__1::snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn +} + +void v(std::string s1) { + std::snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn + std::__1::snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn +} + +void g(char *begin, char *end, char *p, std::span<char> s) { + std::copy(begin, end, p); // no warn + std::copy(s.begin(), s.end(), s.begin()); // no warn +} diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp new file mode 100644 index 00000000000000..ea2c5ec82211b2 --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp @@ -0,0 +1,86 @@ +// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \ +// RUN: -verify %s + +typedef struct {} FILE; +void memcpy(); +void __asan_memcpy(); +void strcpy(); +void strcpy_s(); +void wcscpy_s(); +unsigned strlen( const char* str ); +int fprintf( FILE* stream, const char* format, ... ); +int printf( const char* format, ... ); +int sprintf( char* buffer, const char* format, ... ); +int snprintf( char* buffer, unsigned buf_size, const char* format, ... ); +int vsnprintf( char* buffer, unsigned buf_size, const char* format, ... ); +int sscanf_s(const char * buffer, const char * format, ...); +int sscanf(const char * buffer, const char * format, ... ); + +namespace std { + template< class InputIt, class OutputIt > + OutputIt copy( InputIt first, InputIt last, + OutputIt d_first ); + + struct iterator{}; + template<typename T> + struct span { + T * ptr; + T * data(); + unsigned size_bytes(); + unsigned size(); + iterator begin() const noexcept; + iterator end() const noexcept; + }; + + template<typename T> + struct basic_string { + T* p; + T *c_str(); + T *data(); + unsigned size_bytes(); + }; + + typedef basic_string<char> string; + typedef basic_string<wchar_t> wstring; + + // C function under std: + void memcpy(); + void strcpy(); +} + +void f(char * p, char * q, std::span<char> s, std::span<char> s2) { + memcpy(); // expected-warning{{function memcpy introduces unsafe buffer access}} + std::memcpy(); // expected-warning{{function memcpy introduces unsafe buffer access}} + __builtin_memcpy(p, q, 64); // expected-warning{{function __builtin_memcpy introduces unsafe buffer access}} + __builtin___memcpy_chk(p, q, 8, 64); // expected-warning{{function __builtin___memcpy_chk introduces unsafe buffer access}} + __asan_memcpy(); // expected-warning{{function __asan_memcpy introduces unsafe buffer access}} + strcpy(); // expected-warning{{function strcpy introduces unsafe buffer access}} + std::strcpy(); // expected-warning{{function strcpy introduces unsafe buffer access}} + strcpy_s(); // expected-warning{{function strcpy_s introduces unsafe buffer access}} + wcscpy_s(); // expected-warning{{function wcscpy_s introduces unsafe buffer access}} + + + /* Test printfs */ + fprintf((FILE*)p, "%s%d", p, *p); // expected-warning{{function fprintf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}} + printf("%s%d", p, *p); // expected-warning{{function printf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}} + sprintf(q, "%s%d", "hello", *p); // expected-warning{{function sprintf introduces unsafe buffer access}} expected-note{{change to 'snprintf' for explicit bounds checking}} + snprintf(q, 10, "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} + snprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function snprintf introduces unsafe buffer access}} expected-note{{buffer pointer and size may not match}} + vsnprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // expected-warning{{function vsnprintf introduces unsafe buffer access}} expected-note{{do not use va_list that cannot be checked at compile-time for bounds safety}} + sscanf(p, "%s%d", "hello", *p); // expected-warning{{function sscanf introduces unsafe buffer access}} + sscanf_s(p, "%s%d", "hello", *p); // expected-warning{{function sscanf_s introduces unsafe buffer access}} + fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function fprintf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}} + printf("%s%d", "hello", *p); // no warn + snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn + strlen("hello");// no warn +} + +void v(std::string s1) { + snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn +} + + +void g(char *begin, char *end, char *p, std::span<char> s) { + std::copy(begin, end, p); // no warn + std::copy(s.begin(), s.end(), s.begin()); // no warn +} diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp index 844311c3a51a58..668efe0e178b53 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp @@ -1,8 +1,6 @@ // RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -fsafe-buffer-usage-suggestions -verify %s -// expected-no-diagnostics - typedef unsigned __darwin_size_t; typedef __darwin_size_t size_t; #define bzero(s, n) __builtin_bzero(s, n) -void __nosan_bzero(void *dst, size_t sz) { bzero(dst, sz); } +void __nosan_bzero(void *dst, size_t sz) { bzero(dst, sz); } // expected-warning{{function __builtin_bzero introduces unsafe buffer access}} >From e9a5b7a5dd064728c143a8b8a26c8951b427392b Mon Sep 17 00:00:00 2001 From: ziqingluo-90 <ziqing_...@apple.com> Date: Mon, 12 Aug 2024 17:46:48 -0700 Subject: [PATCH 2/2] Reduce false positives in warnings about printfs - Predefined name macros such as `__PRETTY_FUNCTION__` are considered safe. - We need to distinguish between `%s` and `%p` as both accept pointers but the latter is not a buffer operation. This leaves us no choice other than parsing the format string. Fortunately, the building blocks of format parsing have already existed and are quite handy. --- clang/include/clang/AST/FormatString.h | 12 ++++ clang/lib/AST/PrintfFormatString.cpp | 28 ++++++++++ clang/lib/Analysis/UnsafeBufferUsage.cpp | 56 +++++++++++++++---- ...arn-unsafe-buffer-usage-libc-functions.cpp | 11 +++- 4 files changed, 93 insertions(+), 14 deletions(-) diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h index a074dd23e2ad4c..cdba2a7abe49d9 100644 --- a/clang/include/clang/AST/FormatString.h +++ b/clang/include/clang/AST/FormatString.h @@ -783,6 +783,18 @@ bool ParsePrintfString(FormatStringHandler &H, bool ParseFormatStringHasSArg(const char *beg, const char *end, const LangOptions &LO, const TargetInfo &Target); +/// Parse C format string and return index (relative to `ArgIndex`) of the first +/// found `s` specifier. Return 0 if not found. +/// \param I The start of the C format string; Updated to the first unparsed +/// position upon return. +/// \param E The end of the C format string; +/// \param ArgIndex The argument index of the last found `s` specifier; Or the +/// argument index of the formatter in initial case. +unsigned ParseFormatStringFirstSArgIndex(const char *&I, const char *E, + unsigned ArgIndex, + const LangOptions &LO, + const TargetInfo &Target); + bool ParseScanfString(FormatStringHandler &H, const char *beg, const char *end, const LangOptions &LO, const TargetInfo &Target); diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp index 3c6cd2d0f43417..9992afd402d370 100644 --- a/clang/lib/AST/PrintfFormatString.cpp +++ b/clang/lib/AST/PrintfFormatString.cpp @@ -483,6 +483,34 @@ bool clang::analyze_format_string::ParseFormatStringHasSArg(const char *I, return false; } +unsigned clang::analyze_format_string::ParseFormatStringFirstSArgIndex( + const char *&I, const char *E, unsigned ArgIndex, const LangOptions &LO, + const TargetInfo &Target) { + unsigned argIndex = ArgIndex; + + // Keep looking for a %s format specifier until we have exhausted the string. + FormatStringHandler H; + while (I != E) { + const PrintfSpecifierResult &FSR = + ParsePrintfSpecifier(H, I, E, argIndex, LO, Target, false, false); + // Did a fail-stop error of any kind occur when parsing the specifier? + // If so, don't do any more processing. + if (FSR.shouldStop()) + return false; + // Did we exhaust the string or encounter an error that + // we can recover from? + if (!FSR.hasValue()) + continue; + const analyze_printf::PrintfSpecifier &FS = FSR.getValue(); + // Return true if this a %s format specifier. + if (FS.getConversionSpecifier().getKind() == + ConversionSpecifier::Kind::sArg) { + return FS.getPositionalArgIndex(); + } + } + return false; +} + bool clang::analyze_format_string::parseFormatStringHasFormattingSpecifiers( const char *Begin, const char *End, const LangOptions &LO, const TargetInfo &Target) { diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 751fb75f6ed602..745de657fcd274 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -12,6 +12,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/FormatString.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtVisitor.h" @@ -611,6 +612,38 @@ static bool isNullTermPointer(const Expr *Ptr) { return false; } +// Return true iff at least one of following cases holds: +// 1. Format string is a literal and there is an unsafe pointer argument +// corresponding to an `s` specifier; +// 2. Format string is not a literal and there is least an unsafe pointer +// argument (including the formatter argument). +static bool hasUnsafeFormatOrSArg(const Expr *Fmt, unsigned FmtArgIdx, + const CallExpr *Call, ASTContext &Ctx) { + if (auto *SL = dyn_cast<StringLiteral>(Fmt->IgnoreParenImpCasts())) { + StringRef FmtStr = SL->getString(); + auto I = FmtStr.begin(); + auto E = FmtStr.end(); + unsigned ArgIdx = FmtArgIdx; + + do { + ArgIdx = analyze_format_string::ParseFormatStringFirstSArgIndex( + I, E, ArgIdx, Ctx.getLangOpts(), Ctx.getTargetInfo()); + if (ArgIdx && Call->getNumArgs() > ArgIdx && + !isNullTermPointer(Call->getArg(ArgIdx))) + return true; + } while (ArgIdx); + return false; + } + // If format is not a string literal, we cannot analyze the format string. + // In this case, this call is considered unsafe if at least one argument + // (including the format argument) is unsafe pointer. + return llvm::any_of( + llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()), + [](const Expr *Arg) { + return Arg->getType()->isPointerType() && !isNullTermPointer(Arg); + }); +} + // Matches a call to one of the `-printf" functions (excluding the ones with // va_list, or `-sprintf`s) that taking pointer-to-char-as-string arguments but // fail to guarantee their null-termination. In other words, these calls are @@ -626,19 +659,18 @@ AST_MATCHER_P(CallExpr, unsafeStringInPrintfs, StringRef, CoreName) { if (Prefix.ends_with("w")) Prefix = Prefix.drop_back(1); - auto AnyUnsafeStrPtr = [](const Expr *Arg) -> bool { - return Arg->getType()->isPointerType() && !isNullTermPointer(Arg); - }; - if (Prefix.empty() || Prefix == "k") // printf: all pointer args should be null-terminated - return any_of(Node.arguments(), AnyUnsafeStrPtr); - if (Prefix == "f" && Node.getNumArgs() > 1) - return any_of(llvm::make_range(Node.arg_begin() + 1, Node.arg_end()), - AnyUnsafeStrPtr); - if (Prefix == "sn" && Node.getNumArgs() > 2) { - return any_of(llvm::make_range(Node.arg_begin() + 2, Node.arg_end()), - AnyUnsafeStrPtr); + return hasUnsafeFormatOrSArg(Node.getArg(0), 0, &Node, + Finder->getASTContext()); + if (Prefix == "f") + return hasUnsafeFormatOrSArg(Node.getArg(1), 1, &Node, + Finder->getASTContext()); + if (Prefix == "sn") { + // The first two arguments need to be in safe patterns, which is checked + // by `isSafeSizedby`: + return hasUnsafeFormatOrSArg(Node.getArg(2), 2, &Node, + Finder->getASTContext()); } return false; // A call to a "-printf" falls into another category. } @@ -775,7 +807,7 @@ AST_MATCHER_P(CallExpr, ignoreLibcPrefixAndSuffix, StringRef CoreName = TheLittleParser.matchName(II->getName(), FD->getBuiltinID()); - + return InnerMatcher(CoreName).matches(Node, Finder, Builder); } } // namespace clang::ast_matchers diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp index ea2c5ec82211b2..c3d7f8fd05435b 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp @@ -70,13 +70,20 @@ void f(char * p, char * q, std::span<char> s, std::span<char> s2) { sscanf(p, "%s%d", "hello", *p); // expected-warning{{function sscanf introduces unsafe buffer access}} sscanf_s(p, "%s%d", "hello", *p); // expected-warning{{function sscanf_s introduces unsafe buffer access}} fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function fprintf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}} + fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, "hello"); // no warn printf("%s%d", "hello", *p); // no warn snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn + snprintf(s.data(), s.size_bytes(), "%s%d", __PRETTY_FUNCTION__, *p); // no warn strlen("hello");// no warn } -void v(std::string s1) { - snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn +void v(std::string s1, int *p) { + snprintf(s1.data(), s1.size_bytes(), "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str()); // no warn + snprintf(s1.data(), s1.size_bytes(), s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str()); // no warn + printf("%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str()); // no warn + printf(s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str()); // no warn + fprintf((FILE*)0, "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str()); // no warn + fprintf((FILE*)0, s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str()); // no warn } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits