https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/101583
>From 0ccb5a8fc0855b2dfb948c4bb844e0394b5cedb0 Mon Sep 17 00:00:00 2001 From: Ziqing Luo <ziq...@udel.edu> Date: Thu, 1 Aug 2024 16:36:27 -0700 Subject: [PATCH 1/2] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc functions Warning about calls to libc functions involving buffer access. Warned functions are hardcoded by names. --- .../Analyses/UnsafeBufferUsageGadgets.def | 1 + clang/lib/Analysis/UnsafeBufferUsage.cpp | 279 +++++++++++++++++- ...arn-unsafe-buffer-usage-libc-functions.cpp | 80 +++++ ...n-unsafe-buffer-usage-test-unreachable.cpp | 4 +- 4 files changed, 360 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 242ad763ba62b..ac01b285ae833 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) WARNING_GADGET(UnsafeBufferUsageCtorAttr) WARNING_GADGET(DataInvocation) +WARNING_GADGET(UnsafeLibcFunctionCall) WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)` FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 866222380974b..926a78b6f0096 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -9,20 +9,25 @@ #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtVisitor.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" -#include "clang/Basic/CharInfo.h" #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/APSInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Casting.h" +#include <cstddef> #include <memory> #include <optional> #include <queue> @@ -443,6 +448,260 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { return false; } +AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) { + static const std::set<StringRef> PredefinedNames{ + // numeric conversion: + "atof", + "atoi", + "atol", + "atoll", + "strtol", + "strtoll", + "strtoul", + "strtoull", + "strtof", + "strtod", + "strtold", + "strtoimax", + "strtoumax", + // "strfromf", "strfromd", "strfroml", // C23? + // string manipulation: + "strcpy", + "strncpy", + "strlcpy", + "strcat", + "strncat", + "strlcat", + "strxfrm", + "strdup", + "strndup", + // string examination: + "strlen", + "strnlen", + "strcmp", + "strncmp", + "stricmp", + "strcasecmp", + "strcoll", + "strchr", + "strrchr", + "strspn", + "strcspn", + "strpbrk", + "strstr", + "strtok", + // "mem-" functions + "memchr", + "wmemchr", + "memcmp", + "wmemcmp", + "memcpy", + "memccpy", + "mempcpy", + "wmemcpy", + "memmove", + "wmemmove", + "memset", + "wmemset", + // IO: + "fread", + "fwrite", + "fgets", + "fgetws", + "gets", + "fputs", + "fputws", + "puts", + // others + "strerror_s", + "strerror_r", + "bcopy", + "bzero", + "bsearch", + "qsort", + }; + + // A tiny name parser for unsafe libc function names with additional + // checks for `printf`s: + struct FuncNameMatch { + const CallExpr *const Call; + FuncNameMatch(const CallExpr *Call) : Call(Call) {} + + // For a name `S` in `PredefinedNames` or a member of the printf/scanf + // family, define matching function names with `S` by the grammar below: + // + // CoreName := S | S["wcs"/"str"] + // LibcName := CoreName | CoreName + "_s" + // MatchingName := "__builtin_" + LibcName | + // "__builtin___" + LibcName + "_chk" | + // "__asan_" + LibcName + // + // (Note S["wcs"/"str"] means substitute "str" with "wcs" in S.) + bool matchName(StringRef FunName) { + // Try to match __builtin_: + if (FunName.starts_with("__builtin_")) + // Then either it is __builtin_LibcName or __builtin___LibcName_chk or + // no match: + return matchLibcNameOrBuiltinChk( + FunName.drop_front(10 /* truncate "__builtin_" */)); + // Try to match __asan_: + if (FunName.starts_with("__asan_")) + return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */)); + return matchLibcName(FunName); + } + + // Parameter `Name` is the substring after stripping off the prefix + // "__builtin_". + bool matchLibcNameOrBuiltinChk(StringRef Name) { + if (Name.starts_with("__") && Name.ends_with("_chk")) + return matchLibcName( + Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */); + return matchLibcName(Name); + } + + bool matchLibcName(StringRef Name) { + if (Name.ends_with("_s")) + return matchCoreName(Name.drop_back(2 /* truncate "_s" */)); + return matchCoreName(Name); + } + + bool matchCoreName(StringRef Name) { + if (PredefinedNames.find(Name.str()) != PredefinedNames.end()) + return !isSafeStrlen(Name); // match unless it's a safe strlen call + + std::string NameWCS = Name.str(); + size_t WcsPos = NameWCS.find("wcs"); + + while (WcsPos != std::string::npos) { + NameWCS[WcsPos++] = 's'; + NameWCS[WcsPos++] = 't'; + NameWCS[WcsPos++] = 'r'; + WcsPos = NameWCS.find("wcs", WcsPos); + } + if (PredefinedNames.find(NameWCS) != PredefinedNames.end()) + return !isSafeStrlen(NameWCS); + return matchPrintfOrScanfFamily(Name); + } + + bool matchPrintfOrScanfFamily(StringRef Name) { + if (Name.ends_with("scanf")) + return true; // simply warn about scanf functions + if (!Name.ends_with("printf")) + return false; // neither printf nor scanf + if (Name.starts_with("v")) + // cannot check args for va_list, so `vprintf`s are treated as regular + // unsafe libc calls: + return true; + + // Truncate "printf", focus on prefixes. There are different possible + // name prefixes: "k", "f", "s", "sn", "fw", ..., "snw". We strip off the + // 'w' and handle printfs differently by "k", "f", "s", "sn" or no prefix: + StringRef Prefix = Name.drop_back(6); + + if (Prefix.ends_with("w")) + Prefix = Prefix.drop_back(1); + return isUnsafePrintf(Prefix); + } + + // A pointer type expression is known to be null-terminated, if it has the + // form: E.c_str(), for any expression E of `std::string` type. + static bool isNullTermPointer(const Expr *Ptr) { + if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts())) + return true; + if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) { + const CXXMethodDecl *MD = MCE->getMethodDecl(); + const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl(); + + if (MD && RD && RD->isInStdNamespace()) + if (MD->getName() == "c_str" && RD->getName() == "basic_string") + return true; + } + return false; + } + + // Check safe patterns for printfs w.r.t their prefixes: + bool isUnsafePrintf(StringRef Prefix /* empty, 'k', 'f', 's', or 'sn' */) { + auto AnyUnsafeStrPtr = [](const Expr *Arg) -> bool { + return Arg->getType()->isPointerType() && !isNullTermPointer(Arg); + }; + + if (Prefix.empty() || Prefix == "k") // printf: all pointer args should be null-terminated + return llvm::any_of(Call->arguments(), AnyUnsafeStrPtr); + + if (Prefix == "f" && Call->getNumArgs() > 1) { + // fprintf, same as printf but skip the first arg + auto Range = llvm::make_range(Call->arg_begin() + 1, Call->arg_end()); + return llvm::any_of(Range, AnyUnsafeStrPtr); + } + if (Prefix == "sn" && Call->getNumArgs() > 2) { + // The first two arguments need to be in safe patterns, which is checked + // by `isSafeSizedby`: + auto Range = llvm::make_range(Call->arg_begin() + 2, Call->arg_end()); + return !isSafeSizedby(*Call->arg_begin(), *(Call->arg_begin() + 1)) || + llvm::any_of(Range, AnyUnsafeStrPtr); + } + // Note `sprintf`s should be changed to `snprintf`s, so they are treated + // as regular unsafe libc calls: + return true; + } + + // Checks if the two Exprs `SizedByPtr` and `Size` are in the pattern: + // SizedByPtr := DRE.data() + // Size := DRE.size_bytes(), for a same DRE of sized-container/view + // type. + bool isSafeSizedby(const Expr *SizedByPtr, const Expr *Size) { + static StringRef SizedObjs[] = {"span", "array", "vector", + "basic_string_view", "basic_string"}; + if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(SizedByPtr)) + if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Size)) { + if (auto *DREPtr = + dyn_cast<DeclRefExpr>(MCEPtr->getImplicitObjectArgument())) + if (auto *DRESize = + dyn_cast<DeclRefExpr>(MCESize->getImplicitObjectArgument())) + if (DREPtr->getDecl() == + DRESize->getDecl()) // coming out of the same variable + if (MCEPtr->getMethodDecl()->getName() == "data") + if (MCESize->getMethodDecl()->getName() == "size_bytes") + for (StringRef SizedObj : SizedObjs) + if (MCEPtr->getRecordDecl()->isInStdNamespace() && + MCEPtr->getRecordDecl() + ->getCanonicalDecl() + ->getName() == SizedObj) + return true; + } + return false; + } + + // This is safe: strlen("hello"). We don't want to be noisy on this case. + bool isSafeStrlen(StringRef Name) { + return Name == "strlen" && Call->getNumArgs() == 1 && + isa<StringLiteral>(Call->getArg(0)->IgnoreParenImpCasts()); + } + } FuncNameMatch{&Node}; + + const FunctionDecl *FD = Node.getDirectCallee(); + const IdentifierInfo *II; + + if (!FD) + return false; + II = FD->getIdentifier(); + // If this is a special C++ name without IdentifierInfo, it can't be a + // C library function. + if (!II) + return false; + + // Look through 'extern "C"' and anything similar invented in the future. + // If this function is not in TU directly, it is not a C library function. + if (!FD->getDeclContext()->getRedeclContext()->isTranslationUnit()) + return false; + + // If this function is not externally visible, it is not a C library function. + // Note that we make an exception for inline functions, which may be + // declared in header files without external linkage. + if (!FD->isInlined() && !FD->isExternallyVisible()) + return false; + return FuncNameMatch.matchName(II->getName()); +} } // namespace clang::ast_matchers namespace { @@ -1025,6 +1284,24 @@ class DataInvocationGadget : public WarningGadget { DeclUseList getClaimedVarUseSites() const override { return {}; } }; +class UnsafeLibcFunctionCallGadget : public WarningGadget { + const CallExpr *const Call; + constexpr static const char *const Tag = "UnsafeLibcFunctionCall"; + +public: + UnsafeLibcFunctionCallGadget(const MatchFinder::MatchResult &Result) + : WarningGadget(Kind::UnsafeLibcFunctionCall), + Call(Result.Nodes.getNodeAs<CallExpr>(Tag)) {} + + static Matcher matcher() { + return stmt(callExpr(isUnsafeLibcFunctionCall()).bind(Tag)); + } + + const Stmt *getBaseStmt() const override { return Call; } + + DeclUseList getClaimedVarUseSites() const override { return {}; } +}; + // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue // Context (see `isInUnspecifiedLvalueContext`). // Note here `[]` is the built-in subscript operator. diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp new file mode 100644 index 0000000000000..326ecd6f284e5 --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp @@ -0,0 +1,80 @@ +// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \ +// RUN: -verify %s + +typedef struct {} FILE; +void memcpy(); +void __builtin___memcpy_chk(); +void strcpy(); +void strcpy_s(); +void wcscpy_s(); +unsigned strlen( const char* str ); +int fprintf( FILE* stream, const char* format, ... ); +int printf( const char* format, ... ); +int sprintf( char* buffer, const char* format, ... ); +int snprintf( char* buffer, unsigned buf_size, const char* format, ... ); +int vsnprintf( char* buffer, unsigned buf_size, const char* format, ... ); +int sscanf_s(const char * buffer, const char * format, ...); +int sscanf(const char * buffer, const char * format, ... ); + +namespace std { + template< class InputIt, class OutputIt > + OutputIt copy( InputIt first, InputIt last, + OutputIt d_first ); + + struct iterator{}; + template<typename T> + struct span { + T * ptr; + T * data(); + unsigned size_bytes(); + unsigned size(); + iterator begin() const noexcept; + iterator end() const noexcept; + }; + + template<typename T> + struct basic_string { + T* p; + T *c_str(); + T *data(); + unsigned size_bytes(); + }; + + typedef basic_string<char> string; + typedef basic_string<wchar_t> wstring; +} + +void f(char * p, char * q, std::span<char> s) { + memcpy(); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} + __builtin_memcpy(p, q, 64); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} + __builtin___memcpy_chk(); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} + strcpy(); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} + strcpy_s(); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} + wcscpy_s(); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} + + + /* Test printfs */ + + fprintf((FILE*)p, "%s%d", p, *p); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} + printf("%s%d", p, *p); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} + sprintf(q, "%s%d", "hello", *p); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} + snprintf(q, 10, "%s%d", "hello", *p); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} + snprintf(s.data(), s.size(), "%s%d", "hello", *p); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} + vsnprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} + sscanf(p, "%s%d", "hello", *p); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} + sscanf_s(p, "%s%d", "hello", *p); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} + fprintf((FILE*)p, "%s%d", "hello", *p); // no warn + printf("%s%d", "hello", *p); // no warn + snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn + strlen("hello");// no warn +} + +void v(std::string s1, std::wstring s2) { + snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn +} + + +void g(char *begin, char *end, char *p, std::span<char> s) { + std::copy(begin, end, p); // no warn + std::copy(s.begin(), s.end(), s.begin()); // no warn +} diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp index 844311c3a51a5..6a4541c403a48 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp @@ -1,8 +1,6 @@ // RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -fsafe-buffer-usage-suggestions -verify %s -// expected-no-diagnostics - typedef unsigned __darwin_size_t; typedef __darwin_size_t size_t; #define bzero(s, n) __builtin_bzero(s, n) -void __nosan_bzero(void *dst, size_t sz) { bzero(dst, sz); } +void __nosan_bzero(void *dst, size_t sz) { bzero(dst, sz); } // expected-warning{{function introduces unsafe buffer manipulation}} >From 2dff88db07d59a2b6e52230abf66ac3d4d723196 Mon Sep 17 00:00:00 2001 From: Ziqing Luo <ziq...@udel.edu> Date: Thu, 1 Aug 2024 16:38:50 -0700 Subject: [PATCH 2/2] [-Wunsafe-buffer-usage] Warn string_view constructions that not guarantee null-termination One could use `string_view` as the safe view over strings in read-only scenarios as it is hardened, preserving bounds info and guaranteeing null-termination with the new warning. If one finds the warning being too restrictive, turn it off with `-Wno-unsafe-buffer-usage-in-string-view`. The rest of the warnings will not assume null-termination guarantee of `string_view` as well then. --- .../Analysis/Analyses/UnsafeBufferUsage.h | 8 +- .../Analyses/UnsafeBufferUsageGadgets.def | 11 ++- clang/include/clang/Basic/DiagnosticGroups.td | 3 +- .../clang/Basic/DiagnosticSemaKinds.td | 3 + clang/lib/Analysis/UnsafeBufferUsage.cpp | 86 ++++++++++++++++--- clang/lib/Sema/AnalysisBasedWarnings.cpp | 17 ++++ ...arn-unsafe-buffer-usage-in-string_view.cpp | 85 ++++++++++++++++++ ...arn-unsafe-buffer-usage-libc-functions.cpp | 22 ++++- 8 files changed, 216 insertions(+), 19 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-in-string_view.cpp diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h index 228b4ae1e3e11..f7c5b887756d9 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -147,10 +147,16 @@ class UnsafeBufferUsageHandler { virtual bool isSafeBufferOptOut(const SourceLocation &Loc) const = 0; /// \return true iff unsafe uses in containers should NOT be reported at - /// `Loc`; false otherwise. + /// `Loc`; false otherwise. Controlled by `-Wunsafe-buffer-usage-in-container` virtual bool ignoreUnsafeBufferInContainer(const SourceLocation &Loc) const = 0; + /// \return true iff unsafe uses in string_views should NOT be reported at + /// `Loc`; false otherwise. Controlled by + /// `-Wunsafe-buffer-usage-in-string-view` + virtual bool + ignoreUnsafeBufferInStringView(const SourceLocation &Loc) const = 0; + virtual std::string getUnsafeBufferUsageAttributeTextAt(SourceLocation Loc, StringRef WSSuffix = "") const = 0; diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index ac01b285ae833..7f3fd24b5cdc7 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -18,6 +18,12 @@ #define WARNING_GADGET(name) GADGET(name) #endif +/// Make `WARNING_LIBC_GADGET` self a subset of WARNING_GADGET so that it can be +/// tuned specially according to whether we warn `StringViewUnsafeConstructor`. +#ifndef WARNING_LIBC_GADGET +#define WARNING_LIBC_GADGET(name) WARNING_GADGET(name) +#endif + /// A `WARNING_GADGET` subset, where the code pattern of each gadget /// corresponds uses of a (possibly hardened) contatiner (e.g., `std::span`). #ifndef WARNING_CONTAINER_GADGET @@ -38,8 +44,9 @@ WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) WARNING_GADGET(UnsafeBufferUsageCtorAttr) WARNING_GADGET(DataInvocation) -WARNING_GADGET(UnsafeLibcFunctionCall) +WARNING_LIBC_GADGET(UnsafeLibcFunctionCall) WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)` +WARNING_CONTAINER_GADGET(StringViewUnsafeConstructor) // `std::string_view` constructed from std::string guarantees null-termination FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) FIXABLE_GADGET(PointerDereference) @@ -53,5 +60,7 @@ FIXABLE_GADGET(PointerInit) #undef FIXABLE_GADGET #undef WARNING_GADGET +#undef WARNING_LIBC_GADGET #undef WARNING_CONTAINER_GADGET +#undef WARNING_STRINGVIEW_GADGET #undef GADGET diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 19c3f1e043349..68413fab136fe 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1551,7 +1551,8 @@ def HLSLAvailability : DiagGroup<"hlsl-availability">; def ReadOnlyPlacementChecks : DiagGroup<"read-only-types">; // Warnings and fixes to support the "safe buffers" programming model. -def UnsafeBufferUsageInContainer : DiagGroup<"unsafe-buffer-usage-in-container">; +def UnsafeBufferUsageInStringView : DiagGroup<"unsafe-buffer-usage-in-string-view">; +def UnsafeBufferUsageInContainer : DiagGroup<"unsafe-buffer-usage-in-container", [UnsafeBufferUsageInStringView]>; def UnsafeBufferUsage : DiagGroup<"unsafe-buffer-usage", [UnsafeBufferUsageInContainer]>; // Warnings and notes related to the function effects system underlying diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 581434d33c5c9..713a3b10b413b 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12388,6 +12388,9 @@ def note_safe_buffer_usage_suggestions_disabled : Note< def warn_unsafe_buffer_usage_in_container : Warning< "the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information">, InGroup<UnsafeBufferUsageInContainer>, DefaultIgnore; +def warn_unsafe_buffer_usage_in_string_view : Warning< + "construct string_view from raw pointers does not guarantee null-termination, construct from std::string instead">, + InGroup<UnsafeBufferUsageInStringView>, DefaultIgnore; #ifndef NDEBUG // Not a user-facing diagnostic. Useful for debugging false negatives in // -fsafe-buffer-usage-suggestions (i.e. lack of -Wunsafe-buffer-usage fixits). diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 926a78b6f0096..bd29296ee2dde 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -247,11 +247,16 @@ AST_MATCHER_P(Stmt, notInSafeBufferOptOut, const UnsafeBufferUsageHandler *, return !Handler->isSafeBufferOptOut(Node.getBeginLoc()); } -AST_MATCHER_P(Stmt, ignoreUnsafeBufferInContainer, +AST_MATCHER_P(Stmt, ignoreSpanTwoParamConstructor, const UnsafeBufferUsageHandler *, Handler) { return Handler->ignoreUnsafeBufferInContainer(Node.getBeginLoc()); } +AST_MATCHER_P(Stmt, ignoreStringViewUnsafeConstructor, + const UnsafeBufferUsageHandler *, Handler) { + return Handler->ignoreUnsafeBufferInStringView(Node.getBeginLoc()); +} + AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher<Expr>, innerMatcher) { return innerMatcher.matches(*Node.getSubExpr(), Finder, Builder); } @@ -448,7 +453,8 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { return false; } -AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) { +AST_MATCHER_P(CallExpr, isUnsafeLibcFunctionCall, internal::Matcher<Expr>, + hasUnsafeStringView) { static const std::set<StringRef> PredefinedNames{ // numeric conversion: "atof", @@ -525,7 +531,9 @@ AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) { // checks for `printf`s: struct FuncNameMatch { const CallExpr *const Call; - FuncNameMatch(const CallExpr *Call) : Call(Call) {} + const bool hasUnsafeStringView; + FuncNameMatch(const CallExpr *Call, bool hasUnsafeStringView) + : Call(Call), hasUnsafeStringView(hasUnsafeStringView) {} // For a name `S` in `PredefinedNames` or a member of the printf/scanf // family, define matching function names with `S` by the grammar below: @@ -605,27 +613,35 @@ AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) { // A pointer type expression is known to be null-terminated, if it has the // form: E.c_str(), for any expression E of `std::string` type. - static bool isNullTermPointer(const Expr *Ptr) { + static bool isNullTermPointer(const Expr *Ptr, + bool UnsafeStringView = true) { if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts())) return true; if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) { const CXXMethodDecl *MD = MCE->getMethodDecl(); const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl(); - if (MD && RD && RD->isInStdNamespace()) + if (MD && RD && RD->isInStdNamespace()) { if (MD->getName() == "c_str" && RD->getName() == "basic_string") return true; + if (!UnsafeStringView && MD->getName() == "data" && + RD->getName() == "basic_string_view") + return true; + } } return false; } // Check safe patterns for printfs w.r.t their prefixes: bool isUnsafePrintf(StringRef Prefix /* empty, 'k', 'f', 's', or 'sn' */) { - auto AnyUnsafeStrPtr = [](const Expr *Arg) -> bool { - return Arg->getType()->isPointerType() && !isNullTermPointer(Arg); + const bool hasUnsafeStringView = this->hasUnsafeStringView; + auto AnyUnsafeStrPtr = [&hasUnsafeStringView](const Expr *Arg) -> bool { + return Arg->getType()->isPointerType() && + !isNullTermPointer(Arg, hasUnsafeStringView); }; - if (Prefix.empty() || Prefix == "k") // printf: all pointer args should be null-terminated + if (Prefix.empty() || + Prefix == "k") // printf: all pointer args should be null-terminated return llvm::any_of(Call->arguments(), AnyUnsafeStrPtr); if (Prefix == "f" && Call->getNumArgs() > 1) { @@ -677,7 +693,7 @@ AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) { return Name == "strlen" && Call->getNumArgs() == 1 && isa<StringLiteral>(Call->getArg(0)->IgnoreParenImpCasts()); } - } FuncNameMatch{&Node}; + } FuncNameMatch{&Node, hasUnsafeStringView.matches(Node, Finder, Builder)}; const FunctionDecl *FD = Node.getDirectCallee(); const IdentifierInfo *II; @@ -1037,6 +1053,47 @@ class SpanTwoParamConstructorGadget : public WarningGadget { } }; +class StringViewUnsafeConstructorGadget : public WarningGadget { + static constexpr const char *const Tag = "StringViewUnsafeConstructor"; + const CXXConstructExpr *Ctor; + +public: + StringViewUnsafeConstructorGadget(const MatchFinder::MatchResult &Result) + : WarningGadget(Kind::StringViewUnsafeConstructor), + Ctor(Result.Nodes.getNodeAs<CXXConstructExpr>(Tag)) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::StringViewUnsafeConstructor; + } + + // Matches any `basic_string_view` constructor call that is not a copy + // constructor or on a string literal: + static Matcher matcher() { + auto isStringViewDecl = hasDeclaration(cxxConstructorDecl( + hasDeclContext(isInStdNamespace()), hasName("basic_string_view"))); + + return stmt( + cxxConstructExpr( + isStringViewDecl, + unless( + hasArgument(0, expr(ignoringParenImpCasts(stringLiteral())))), + unless(hasDeclaration(cxxConstructorDecl(isCopyConstructor())))) + .bind(Tag)); + } + + const Stmt *getBaseStmt() const override { return Ctor; } + + DeclUseList getClaimedVarUseSites() const override { + // If the constructor call is of the form `std::basic_string_view{ptrVar, + // ...}`, `ptrVar` is considered an unsafe variable. + if (auto *DRE = dyn_cast<DeclRefExpr>(Ctor->getArg(0))) { + if (DRE->getType()->isPointerType() && isa<VarDecl>(DRE->getDecl())) + return {DRE}; + } + return {}; + } +}; + /// A pointer initialization expression of the form: /// \code /// int *p = q; @@ -1293,8 +1350,10 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget { : WarningGadget(Kind::UnsafeLibcFunctionCall), Call(Result.Nodes.getNodeAs<CallExpr>(Tag)) {} - static Matcher matcher() { - return stmt(callExpr(isUnsafeLibcFunctionCall()).bind(Tag)); + static Matcher + matcher(ast_matchers::internal::Matcher<Expr> HasUnsafeStringView) { + return stmt( + callExpr(isUnsafeLibcFunctionCall(HasUnsafeStringView)).bind(Tag)); } const Stmt *getBaseStmt() const override { return Call; } @@ -1724,10 +1783,13 @@ findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler, #define WARNING_GADGET(x) \ allOf(x ## Gadget::matcher().bind(#x), \ notInSafeBufferOptOut(&Handler)), +#define WARNING_LIBC_GADGET(x) \ + allOf(x ## Gadget::matcher(ignoreStringViewUnsafeConstructor(&Handler)).bind(#x), \ + notInSafeBufferOptOut(&Handler)), #define WARNING_CONTAINER_GADGET(x) \ allOf(x ## Gadget::matcher().bind(#x), \ notInSafeBufferOptOut(&Handler), \ - unless(ignoreUnsafeBufferInContainer(&Handler))), + unless(ignore ## x(&Handler))), #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" // Avoid a hanging comma. unless(stmt()) diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 0f604c61fa3af..e5f38be388a5c 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -2256,6 +2256,17 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler { Range = UO->getSubExpr()->getSourceRange(); MsgParam = 1; } + } else if (const auto *CtorExpr = dyn_cast<CXXConstructExpr>(Operation)) { + if (CtorExpr->getConstructor()->getCanonicalDecl()->getNameAsString() == + "span") + S.Diag(CtorExpr->getLocation(), + diag::warn_unsafe_buffer_usage_in_container) + << CtorExpr->getSourceRange(); + else // it is warning about "string_view": + S.Diag(CtorExpr->getLocation(), + diag::warn_unsafe_buffer_usage_in_string_view) + << CtorExpr->getSourceRange(); + return; } else { if (isa<CallExpr>(Operation) || isa<CXXConstructExpr>(Operation)) { // note_unsafe_buffer_operation doesn't have this mode yet. @@ -2370,6 +2381,12 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler { return S.Diags.isIgnored(diag::warn_unsafe_buffer_usage_in_container, Loc); } + bool + ignoreUnsafeBufferInStringView(const SourceLocation &Loc) const override { + return S.Diags.isIgnored(diag::warn_unsafe_buffer_usage_in_string_view, + Loc); + } + // Returns the text representation of clang::unsafe_buffer_usage attribute. // `WSSuffix` holds customized "white-space"s, e.g., newline or whilespace // characters. diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-string_view.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-string_view.cpp new file mode 100644 index 0000000000000..536751446f585 --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-string_view.cpp @@ -0,0 +1,85 @@ +// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage -verify %s +// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage -verify %s -Wno-unsafe-buffer-usage-in-string-view -D_IGNORE_STRING_VIEW + +namespace std { + struct iterator{}; + + template<typename T> + struct basic_string_view { + T* p; + T *data(); + unsigned size_bytes(); + iterator begin(); + iterator end(); + + constexpr basic_string_view( const T* s ){}; + constexpr basic_string_view( const T* s, unsigned count ); + constexpr basic_string_view( const basic_string_view& other ) noexcept = default; + template< class It, class End > + constexpr basic_string_view( It first, End last ) {}; + + }; + + typedef basic_string_view<char> string_view; + typedef basic_string_view<wchar_t> wstring_view; + + template<typename T> + struct basic_string { + T *c_str(); + operator std::basic_string_view<T>() { + } + }; + + typedef basic_string<char> string; + typedef basic_string<wchar_t> wstring; + + template <class T> struct span { + constexpr span(T *, unsigned){} + }; +} + +void foo(std::string_view); +void f(char * p, std::string S, std::string_view V) { + std::string_view SVs[] {S, "S"}; + foo(S); + foo("S"); + foo({S}); + foo({"S"}); + foo(V); +#ifndef _IGNORE_STRING_VIEW + foo(p); // expected-warning{{construct string_view from raw pointers does not guarantee null-termination, construct from std::string instead}} +#else + foo(p); // no warn +#endif +#ifndef _IGNORE_STRING_VIEW + foo({p, 10}); // expected-warning{{construct string_view from raw pointers does not guarantee null-termination, construct from std::string instead}} +#else + foo({p, 10}); // no warn +#endif + + std::string_view SV{S}; + std::string_view SV2{"S"}; + std::string_view SV3 = S; + std::string_view SV4 = "S"; + std::string_view SV5 = std::string_view(S); +#ifndef _IGNORE_STRING_VIEW + std::string_view SV10 = p; // expected-warning{{construct string_view from raw pointers does not guarantee null-termination, construct from std::string instead}} +#else + std::string_view SV10 = p; // no warn +#endif +#ifndef _IGNORE_STRING_VIEW + std::string_view SV11{p}; // expected-warning{{construct string_view from raw pointers does not guarantee null-termination, construct from std::string instead}} +#else + std::string_view SV11{p}; // no warn +#endif +#ifndef _IGNORE_STRING_VIEW + std::string_view SV12{V.begin(), V.end()}; // expected-warning{{construct string_view from raw pointers does not guarantee null-termination, construct from std::string instead}} +#else + std::string_view SV12{V.begin(), V.end()}; // no warn +#endif +} + +void g(int * p) { + // This warning is not affected: + std::span<int> S{p, 10}; // expected-warning{{the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information}} +} diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp index 326ecd6f284e5..feaeb23d73127 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \ -// RUN: -verify %s +// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage -verify %s +// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage -verify %s -Wno-unsafe-buffer-usage-in-string-view -D_IGNORE_STRING_VIEW typedef struct {} FILE; void memcpy(); @@ -42,6 +42,15 @@ namespace std { typedef basic_string<char> string; typedef basic_string<wchar_t> wstring; + + template<typename T> + struct basic_string_view { + T* p; + T *data(); + unsigned size_bytes(); + }; + + typedef basic_string_view<char> string_view; } void f(char * p, char * q, std::span<char> s) { @@ -69,8 +78,13 @@ void f(char * p, char * q, std::span<char> s) { strlen("hello");// no warn } -void v(std::string s1, std::wstring s2) { - snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn +void v(std::string s, std::string_view sv) { + snprintf(s.data(), s.size_bytes(), "%s%d", s.c_str(), 0); // no warn +#ifndef _IGNORE_STRING_VIEW + snprintf(sv.data(), sv.size_bytes(), "%s%d", sv.data(), 0); // no warn +#else + snprintf(sv.data(), sv.size_bytes(), "%s%d", sv.data(), 0); // expected-warning{{function introduces unsafe buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +#endif } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits