https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/101583

>From 0ccb5a8fc0855b2dfb948c4bb844e0394b5cedb0 Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziq...@udel.edu>
Date: Thu, 1 Aug 2024 16:36:27 -0700
Subject: [PATCH 1/2] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc
 functions

Warning about calls to libc functions involving buffer access.  Warned
functions are hardcoded by names.
---
 .../Analyses/UnsafeBufferUsageGadgets.def     |   1 +
 clang/lib/Analysis/UnsafeBufferUsage.cpp      | 279 +++++++++++++++++-
 ...arn-unsafe-buffer-usage-libc-functions.cpp |  80 +++++
 ...n-unsafe-buffer-usage-test-unreachable.cpp |   4 +-
 4 files changed, 360 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp

diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index 242ad763ba62b..ac01b285ae833 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic)
 WARNING_GADGET(UnsafeBufferUsageAttr)
 WARNING_GADGET(UnsafeBufferUsageCtorAttr)
 WARNING_GADGET(DataInvocation)
+WARNING_GADGET(UnsafeLibcFunctionCall)
 WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, 
arg1)`
 FIXABLE_GADGET(ULCArraySubscript)          // `DRE[any]` in an Unspecified 
Lvalue Context
 FIXABLE_GADGET(DerefSimplePtrArithFixable)
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 866222380974b..926a78b6f0096 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -9,20 +9,25 @@
 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/ExprCXX.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Stmt.h"
 #include "clang/AST/StmtVisitor.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
-#include "clang/Basic/CharInfo.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Casting.h"
+#include <cstddef>
 #include <memory>
 #include <optional>
 #include <queue>
@@ -443,6 +448,260 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
   return false;
 }
 
+AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) {
+  static const std::set<StringRef> PredefinedNames{
+      // numeric conversion:
+      "atof",
+      "atoi",
+      "atol",
+      "atoll",
+      "strtol",
+      "strtoll",
+      "strtoul",
+      "strtoull",
+      "strtof",
+      "strtod",
+      "strtold",
+      "strtoimax",
+      "strtoumax",
+      // "strfromf",  "strfromd", "strfroml", // C23?
+      // string manipulation:
+      "strcpy",
+      "strncpy",
+      "strlcpy",
+      "strcat",
+      "strncat",
+      "strlcat",
+      "strxfrm",
+      "strdup",
+      "strndup",
+      // string examination:
+      "strlen",
+      "strnlen",
+      "strcmp",
+      "strncmp",
+      "stricmp",
+      "strcasecmp",
+      "strcoll",
+      "strchr",
+      "strrchr",
+      "strspn",
+      "strcspn",
+      "strpbrk",
+      "strstr",
+      "strtok",
+      // "mem-" functions
+      "memchr",
+      "wmemchr",
+      "memcmp",
+      "wmemcmp",
+      "memcpy",
+      "memccpy",
+      "mempcpy",
+      "wmemcpy",
+      "memmove",
+      "wmemmove",
+      "memset",
+      "wmemset",
+      // IO:
+      "fread",
+      "fwrite",
+      "fgets",
+      "fgetws",
+      "gets",
+      "fputs",
+      "fputws",
+      "puts",
+      // others
+      "strerror_s",
+      "strerror_r",
+      "bcopy",
+      "bzero",
+      "bsearch",
+      "qsort",
+  };
+
+  // A tiny name parser for unsafe libc function names with additional
+  // checks for `printf`s:
+  struct FuncNameMatch {
+    const CallExpr *const Call;
+    FuncNameMatch(const CallExpr *Call) : Call(Call) {}
+
+    // For a name `S` in `PredefinedNames` or a member of the printf/scanf
+    // family, define matching function names with `S` by the grammar below:
+    //
+    //  CoreName     := S | S["wcs"/"str"]
+    //  LibcName     := CoreName | CoreName + "_s"
+    //  MatchingName := "__builtin_" + LibcName              |
+    //                  "__builtin___" + LibcName + "_chk"   |
+    //                  "__asan_" + LibcName
+    //
+    // (Note S["wcs"/"str"] means substitute "str" with "wcs" in S.)
+    bool matchName(StringRef FunName) {
+      // Try to match __builtin_:
+      if (FunName.starts_with("__builtin_"))
+        // Then either it is __builtin_LibcName or __builtin___LibcName_chk or
+        // no match:
+        return matchLibcNameOrBuiltinChk(
+            FunName.drop_front(10 /* truncate "__builtin_" */));
+      // Try to match __asan_:
+      if (FunName.starts_with("__asan_"))
+        return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" 
*/));
+      return matchLibcName(FunName);
+    }
+
+    // Parameter `Name` is the substring after stripping off the prefix
+    // "__builtin_".
+    bool matchLibcNameOrBuiltinChk(StringRef Name) {
+      if (Name.starts_with("__") && Name.ends_with("_chk"))
+        return matchLibcName(
+            Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */);
+      return matchLibcName(Name);
+    }
+
+    bool matchLibcName(StringRef Name) {
+      if (Name.ends_with("_s"))
+        return matchCoreName(Name.drop_back(2 /* truncate "_s" */));
+      return matchCoreName(Name);
+    }
+
+    bool matchCoreName(StringRef Name) {
+      if (PredefinedNames.find(Name.str()) != PredefinedNames.end())
+        return !isSafeStrlen(Name); // match unless it's a safe strlen call
+
+      std::string NameWCS = Name.str();
+      size_t WcsPos = NameWCS.find("wcs");
+
+      while (WcsPos != std::string::npos) {
+        NameWCS[WcsPos++] = 's';
+        NameWCS[WcsPos++] = 't';
+        NameWCS[WcsPos++] = 'r';
+        WcsPos = NameWCS.find("wcs", WcsPos);
+      }
+      if (PredefinedNames.find(NameWCS) != PredefinedNames.end())
+        return !isSafeStrlen(NameWCS);
+      return matchPrintfOrScanfFamily(Name);
+    }
+
+    bool matchPrintfOrScanfFamily(StringRef Name) {
+      if (Name.ends_with("scanf"))
+        return true; // simply warn about scanf functions
+      if (!Name.ends_with("printf"))
+        return false; // neither printf nor scanf
+      if (Name.starts_with("v"))
+        // cannot check args for va_list, so `vprintf`s are treated as regular
+        // unsafe libc calls:
+        return true;
+
+      // Truncate "printf", focus on prefixes.  There are different possible
+      // name prefixes: "k", "f", "s", "sn", "fw", ..., "snw".  We strip off 
the
+      // 'w' and handle printfs differently by "k", "f", "s", "sn" or no 
prefix:
+      StringRef Prefix = Name.drop_back(6);
+
+      if (Prefix.ends_with("w"))
+        Prefix = Prefix.drop_back(1);
+      return isUnsafePrintf(Prefix);
+    }
+
+    // A pointer type expression is known to be null-terminated, if it has the
+    // form: E.c_str(), for any expression E of `std::string` type.
+    static bool isNullTermPointer(const Expr *Ptr) {
+      if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts()))
+        return true;
+      if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) 
{
+        const CXXMethodDecl *MD = MCE->getMethodDecl();
+        const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
+
+        if (MD && RD && RD->isInStdNamespace())
+          if (MD->getName() == "c_str" && RD->getName() == "basic_string")
+            return true;
+      }
+      return false;
+    }
+
+    // Check safe patterns for printfs w.r.t their prefixes:
+    bool isUnsafePrintf(StringRef Prefix /* empty, 'k', 'f', 's', or 'sn' */) {
+      auto AnyUnsafeStrPtr = [](const Expr *Arg) -> bool {
+        return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
+      };
+
+      if (Prefix.empty() || Prefix == "k") // printf: all pointer args should 
be null-terminated
+        return llvm::any_of(Call->arguments(), AnyUnsafeStrPtr);
+
+      if (Prefix == "f" && Call->getNumArgs() > 1) {
+        // fprintf, same as printf but skip the first arg
+        auto Range = llvm::make_range(Call->arg_begin() + 1, Call->arg_end());
+        return llvm::any_of(Range, AnyUnsafeStrPtr);
+      }
+      if (Prefix == "sn" && Call->getNumArgs() > 2) {
+        // The first two arguments need to be in safe patterns, which is 
checked
+        // by `isSafeSizedby`:
+        auto Range = llvm::make_range(Call->arg_begin() + 2, Call->arg_end());
+        return !isSafeSizedby(*Call->arg_begin(), *(Call->arg_begin() + 1)) ||
+               llvm::any_of(Range, AnyUnsafeStrPtr);
+      }
+      // Note `sprintf`s should be changed to `snprintf`s, so they are treated
+      // as regular unsafe libc calls:
+      return true;
+    }
+
+    // Checks if the two Exprs `SizedByPtr` and `Size` are in the pattern:
+    // SizedByPtr := DRE.data()
+    // Size       := DRE.size_bytes(), for a same DRE of sized-container/view
+    // type.
+    bool isSafeSizedby(const Expr *SizedByPtr, const Expr *Size) {
+      static StringRef SizedObjs[] = {"span", "array", "vector",
+                                      "basic_string_view", "basic_string"};
+      if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(SizedByPtr))
+        if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Size)) {
+          if (auto *DREPtr =
+                  dyn_cast<DeclRefExpr>(MCEPtr->getImplicitObjectArgument()))
+            if (auto *DRESize =
+                    
dyn_cast<DeclRefExpr>(MCESize->getImplicitObjectArgument()))
+              if (DREPtr->getDecl() ==
+                  DRESize->getDecl()) // coming out of the same variable
+                if (MCEPtr->getMethodDecl()->getName() == "data")
+                  if (MCESize->getMethodDecl()->getName() == "size_bytes")
+                    for (StringRef SizedObj : SizedObjs)
+                      if (MCEPtr->getRecordDecl()->isInStdNamespace() &&
+                          MCEPtr->getRecordDecl()
+                                  ->getCanonicalDecl()
+                                  ->getName() == SizedObj)
+                        return true;
+        }
+      return false;
+    }
+
+    // This is safe: strlen("hello").  We don't want to be noisy on this case.
+    bool isSafeStrlen(StringRef Name) {
+      return Name == "strlen" && Call->getNumArgs() == 1 &&
+             isa<StringLiteral>(Call->getArg(0)->IgnoreParenImpCasts());
+    }
+  } FuncNameMatch{&Node};
+
+  const FunctionDecl *FD = Node.getDirectCallee();
+  const IdentifierInfo *II;
+
+  if (!FD)
+    return false;
+  II = FD->getIdentifier();
+  // If this is a special C++ name without IdentifierInfo, it can't be a
+  // C library function.
+  if (!II)
+    return false;
+
+  // Look through 'extern "C"' and anything similar invented in the future.
+  // If this function is not in TU directly, it is not a C library function.
+  if (!FD->getDeclContext()->getRedeclContext()->isTranslationUnit())
+    return false;
+
+  // If this function is not externally visible, it is not a C library 
function.
+  // Note that we make an exception for inline functions, which may be
+  // declared in header files without external linkage.
+  if (!FD->isInlined() && !FD->isExternallyVisible())
+    return false;
+  return FuncNameMatch.matchName(II->getName());
+}
 } // namespace clang::ast_matchers
 
 namespace {
@@ -1025,6 +1284,24 @@ class DataInvocationGadget : public WarningGadget {
   DeclUseList getClaimedVarUseSites() const override { return {}; }
 };
 
+class UnsafeLibcFunctionCallGadget : public WarningGadget {
+  const CallExpr *const Call;
+  constexpr static const char *const Tag = "UnsafeLibcFunctionCall";
+
+public:
+  UnsafeLibcFunctionCallGadget(const MatchFinder::MatchResult &Result)
+      : WarningGadget(Kind::UnsafeLibcFunctionCall),
+        Call(Result.Nodes.getNodeAs<CallExpr>(Tag)) {}
+
+  static Matcher matcher() {
+    return stmt(callExpr(isUnsafeLibcFunctionCall()).bind(Tag));
+  }
+
+  const Stmt *getBaseStmt() const override { return Call; }
+
+  DeclUseList getClaimedVarUseSites() const override { return {}; }
+};
+
 // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
 // Context (see `isInUnspecifiedLvalueContext`).
 // Note here `[]` is the built-in subscript operator.
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp 
b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
new file mode 100644
index 0000000000000..326ecd6f284e5
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
@@ -0,0 +1,80 @@
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \
+// RUN:            -verify %s
+
+typedef struct {} FILE;
+void memcpy();
+void __builtin___memcpy_chk();
+void strcpy();
+void strcpy_s();
+void wcscpy_s();
+unsigned strlen( const char* str );
+int fprintf( FILE* stream, const char* format, ... );
+int printf( const char* format, ... );
+int sprintf( char* buffer, const char* format, ... );
+int snprintf( char* buffer, unsigned buf_size, const char* format, ... );
+int vsnprintf( char* buffer, unsigned buf_size, const char* format, ... );
+int sscanf_s(const char * buffer, const char * format, ...);
+int sscanf(const char * buffer, const char * format, ... );
+
+namespace std {
+  template< class InputIt, class OutputIt >
+  OutputIt copy( InputIt first, InputIt last,
+                OutputIt d_first );
+
+  struct iterator{};
+  template<typename T>
+  struct span {
+    T * ptr;
+    T * data();
+    unsigned size_bytes();
+    unsigned size();
+    iterator begin() const noexcept;
+    iterator end() const noexcept;
+  };
+
+  template<typename T>
+  struct basic_string {
+    T* p;
+    T *c_str();
+    T *data();
+    unsigned size_bytes();
+  };
+
+  typedef basic_string<char> string;
+  typedef basic_string<wchar_t> wstring;
+}
+
+void f(char * p, char * q, std::span<char> s) {
+  memcpy();                   // expected-warning{{function introduces unsafe 
buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to 
receive code hardening suggestions}}
+  __builtin_memcpy(p, q, 64); // expected-warning{{function introduces unsafe 
buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to 
receive code hardening suggestions}}
+  __builtin___memcpy_chk();  // expected-warning{{function introduces unsafe 
buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to 
receive code hardening suggestions}}
+  strcpy();                   // expected-warning{{function introduces unsafe 
buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to 
receive code hardening suggestions}}
+  strcpy_s();                 // expected-warning{{function introduces unsafe 
buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to 
receive code hardening suggestions}}
+  wcscpy_s();                 // expected-warning{{function introduces unsafe 
buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to 
receive code hardening suggestions}}
+
+
+  /* Test printfs */
+
+  fprintf((FILE*)p, "%s%d", p, *p);  // expected-warning{{function introduces 
unsafe buffer manipulation}} expected-note{{pass 
-fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+  printf("%s%d", p, *p);  // expected-warning{{function introduces unsafe 
buffer manipulation}} expected-note{{pass -fsafe-buffer-usage-suggestions to 
receive code hardening suggestions}}
+  sprintf(q, "%s%d", "hello", *p); // expected-warning{{function introduces 
unsafe buffer manipulation}} expected-note{{pass 
-fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+  snprintf(q, 10, "%s%d", "hello", *p); // expected-warning{{function 
introduces unsafe buffer manipulation}} expected-note{{pass 
-fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+  snprintf(s.data(), s.size(), "%s%d", "hello", *p); // 
expected-warning{{function introduces unsafe buffer manipulation}} 
expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening 
suggestions}}
+  vsnprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // 
expected-warning{{function introduces unsafe buffer manipulation}} 
expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening 
suggestions}}
+  sscanf(p, "%s%d", "hello", *p);  // expected-warning{{function introduces 
unsafe buffer manipulation}} expected-note{{pass 
-fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+  sscanf_s(p, "%s%d", "hello", *p);  // expected-warning{{function introduces 
unsafe buffer manipulation}} expected-note{{pass 
-fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+  fprintf((FILE*)p, "%s%d", "hello", *p); // no warn
+  printf("%s%d", "hello", *p); // no warn
+  snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn
+  strlen("hello");// no warn
+}
+
+void v(std::string s1, std::wstring s2) {
+  snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn
+}
+
+
+void g(char *begin, char *end, char *p, std::span<char> s) {
+  std::copy(begin, end, p); // no warn
+  std::copy(s.begin(), s.end(), s.begin()); // no warn
+}
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp 
b/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp
index 844311c3a51a5..6a4541c403a48 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp
@@ -1,8 +1,6 @@
 // RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage 
-fsafe-buffer-usage-suggestions -verify %s
 
-// expected-no-diagnostics
-
 typedef unsigned __darwin_size_t;
 typedef __darwin_size_t size_t;
  #define bzero(s, n) __builtin_bzero(s, n)
-void __nosan_bzero(void *dst, size_t sz) { bzero(dst, sz); }
+void __nosan_bzero(void *dst, size_t sz) { bzero(dst, sz); } // 
expected-warning{{function introduces unsafe buffer manipulation}}

>From 2dff88db07d59a2b6e52230abf66ac3d4d723196 Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziq...@udel.edu>
Date: Thu, 1 Aug 2024 16:38:50 -0700
Subject: [PATCH 2/2] [-Wunsafe-buffer-usage] Warn string_view constructions
 that not guarantee null-termination

One could use `string_view` as the safe view over strings in read-only
scenarios as it is hardened, preserving bounds info and guaranteeing
null-termination with the new warning.

If one finds the warning being too restrictive, turn it off with
`-Wno-unsafe-buffer-usage-in-string-view`.  The rest of the warnings
will not assume null-termination guarantee of `string_view` as well
then.
---
 .../Analysis/Analyses/UnsafeBufferUsage.h     |  8 +-
 .../Analyses/UnsafeBufferUsageGadgets.def     | 11 ++-
 clang/include/clang/Basic/DiagnosticGroups.td |  3 +-
 .../clang/Basic/DiagnosticSemaKinds.td        |  3 +
 clang/lib/Analysis/UnsafeBufferUsage.cpp      | 86 ++++++++++++++++---
 clang/lib/Sema/AnalysisBasedWarnings.cpp      | 17 ++++
 ...arn-unsafe-buffer-usage-in-string_view.cpp | 85 ++++++++++++++++++
 ...arn-unsafe-buffer-usage-libc-functions.cpp | 22 ++++-
 8 files changed, 216 insertions(+), 19 deletions(-)
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-in-string_view.cpp

diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
index 228b4ae1e3e11..f7c5b887756d9 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -147,10 +147,16 @@ class UnsafeBufferUsageHandler {
   virtual bool isSafeBufferOptOut(const SourceLocation &Loc) const = 0;
 
   /// \return true iff unsafe uses in containers should NOT be reported at
-  /// `Loc`; false otherwise.
+  /// `Loc`; false otherwise. Controlled by 
`-Wunsafe-buffer-usage-in-container`
   virtual bool
   ignoreUnsafeBufferInContainer(const SourceLocation &Loc) const = 0;
 
+  /// \return true iff unsafe uses in string_views should NOT be reported at
+  /// `Loc`; false otherwise. Controlled by
+  /// `-Wunsafe-buffer-usage-in-string-view`
+  virtual bool
+  ignoreUnsafeBufferInStringView(const SourceLocation &Loc) const = 0;
+
   virtual std::string
   getUnsafeBufferUsageAttributeTextAt(SourceLocation Loc,
                                       StringRef WSSuffix = "") const = 0;
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index ac01b285ae833..7f3fd24b5cdc7 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -18,6 +18,12 @@
 #define WARNING_GADGET(name) GADGET(name)
 #endif
 
+/// Make `WARNING_LIBC_GADGET` self a subset of WARNING_GADGET so that it can 
be
+/// tuned specially according to whether we warn `StringViewUnsafeConstructor`.
+#ifndef WARNING_LIBC_GADGET
+#define WARNING_LIBC_GADGET(name) WARNING_GADGET(name)
+#endif
+
 /// A `WARNING_GADGET` subset, where the code pattern of each gadget
 /// corresponds uses of a (possibly hardened) contatiner (e.g., `std::span`).
 #ifndef WARNING_CONTAINER_GADGET
@@ -38,8 +44,9 @@ WARNING_GADGET(PointerArithmetic)
 WARNING_GADGET(UnsafeBufferUsageAttr)
 WARNING_GADGET(UnsafeBufferUsageCtorAttr)
 WARNING_GADGET(DataInvocation)
-WARNING_GADGET(UnsafeLibcFunctionCall)
+WARNING_LIBC_GADGET(UnsafeLibcFunctionCall)
 WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, 
arg1)`
+WARNING_CONTAINER_GADGET(StringViewUnsafeConstructor) // `std::string_view` 
constructed from std::string guarantees null-termination
 FIXABLE_GADGET(ULCArraySubscript)          // `DRE[any]` in an Unspecified 
Lvalue Context
 FIXABLE_GADGET(DerefSimplePtrArithFixable)
 FIXABLE_GADGET(PointerDereference)
@@ -53,5 +60,7 @@ FIXABLE_GADGET(PointerInit)
 
 #undef FIXABLE_GADGET
 #undef WARNING_GADGET
+#undef WARNING_LIBC_GADGET
 #undef WARNING_CONTAINER_GADGET
+#undef WARNING_STRINGVIEW_GADGET
 #undef GADGET
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td 
b/clang/include/clang/Basic/DiagnosticGroups.td
index 19c3f1e043349..68413fab136fe 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -1551,7 +1551,8 @@ def HLSLAvailability : DiagGroup<"hlsl-availability">;
 def ReadOnlyPlacementChecks : DiagGroup<"read-only-types">;
 
 // Warnings and fixes to support the "safe buffers" programming model.
-def UnsafeBufferUsageInContainer : 
DiagGroup<"unsafe-buffer-usage-in-container">;
+def UnsafeBufferUsageInStringView : 
DiagGroup<"unsafe-buffer-usage-in-string-view">;
+def UnsafeBufferUsageInContainer : 
DiagGroup<"unsafe-buffer-usage-in-container", [UnsafeBufferUsageInStringView]>;
 def UnsafeBufferUsage : DiagGroup<"unsafe-buffer-usage", 
[UnsafeBufferUsageInContainer]>;
 
 // Warnings and notes related to the function effects system underlying
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 581434d33c5c9..713a3b10b413b 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12388,6 +12388,9 @@ def note_safe_buffer_usage_suggestions_disabled : Note<
 def warn_unsafe_buffer_usage_in_container : Warning<
   "the two-parameter std::span construction is unsafe as it can introduce 
mismatch between buffer size and the bound information">,
   InGroup<UnsafeBufferUsageInContainer>, DefaultIgnore;
+def warn_unsafe_buffer_usage_in_string_view : Warning<
+  "construct string_view from raw pointers does not guarantee 
null-termination, construct from std::string instead">,
+  InGroup<UnsafeBufferUsageInStringView>, DefaultIgnore;
 #ifndef NDEBUG
 // Not a user-facing diagnostic. Useful for debugging false negatives in
 // -fsafe-buffer-usage-suggestions (i.e. lack of -Wunsafe-buffer-usage fixits).
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 926a78b6f0096..bd29296ee2dde 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -247,11 +247,16 @@ AST_MATCHER_P(Stmt, notInSafeBufferOptOut, const 
UnsafeBufferUsageHandler *,
   return !Handler->isSafeBufferOptOut(Node.getBeginLoc());
 }
 
-AST_MATCHER_P(Stmt, ignoreUnsafeBufferInContainer,
+AST_MATCHER_P(Stmt, ignoreSpanTwoParamConstructor,
               const UnsafeBufferUsageHandler *, Handler) {
   return Handler->ignoreUnsafeBufferInContainer(Node.getBeginLoc());
 }
 
+AST_MATCHER_P(Stmt, ignoreStringViewUnsafeConstructor,
+              const UnsafeBufferUsageHandler *, Handler) {
+  return Handler->ignoreUnsafeBufferInStringView(Node.getBeginLoc());
+}
+
 AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher<Expr>, innerMatcher) {
   return innerMatcher.matches(*Node.getSubExpr(), Finder, Builder);
 }
@@ -448,7 +453,8 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
   return false;
 }
 
-AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) {
+AST_MATCHER_P(CallExpr, isUnsafeLibcFunctionCall, internal::Matcher<Expr>,
+              hasUnsafeStringView) {
   static const std::set<StringRef> PredefinedNames{
       // numeric conversion:
       "atof",
@@ -525,7 +531,9 @@ AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) {
   // checks for `printf`s:
   struct FuncNameMatch {
     const CallExpr *const Call;
-    FuncNameMatch(const CallExpr *Call) : Call(Call) {}
+    const bool hasUnsafeStringView;
+    FuncNameMatch(const CallExpr *Call, bool hasUnsafeStringView)
+        : Call(Call), hasUnsafeStringView(hasUnsafeStringView) {}
 
     // For a name `S` in `PredefinedNames` or a member of the printf/scanf
     // family, define matching function names with `S` by the grammar below:
@@ -605,27 +613,35 @@ AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) {
 
     // A pointer type expression is known to be null-terminated, if it has the
     // form: E.c_str(), for any expression E of `std::string` type.
-    static bool isNullTermPointer(const Expr *Ptr) {
+    static bool isNullTermPointer(const Expr *Ptr,
+                                  bool UnsafeStringView = true) {
       if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts()))
         return true;
       if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) 
{
         const CXXMethodDecl *MD = MCE->getMethodDecl();
         const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
 
-        if (MD && RD && RD->isInStdNamespace())
+        if (MD && RD && RD->isInStdNamespace()) {
           if (MD->getName() == "c_str" && RD->getName() == "basic_string")
             return true;
+          if (!UnsafeStringView && MD->getName() == "data" &&
+              RD->getName() == "basic_string_view")
+            return true;
+        }
       }
       return false;
     }
 
     // Check safe patterns for printfs w.r.t their prefixes:
     bool isUnsafePrintf(StringRef Prefix /* empty, 'k', 'f', 's', or 'sn' */) {
-      auto AnyUnsafeStrPtr = [](const Expr *Arg) -> bool {
-        return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
+      const bool hasUnsafeStringView = this->hasUnsafeStringView;
+      auto AnyUnsafeStrPtr = [&hasUnsafeStringView](const Expr *Arg) -> bool {
+        return Arg->getType()->isPointerType() &&
+               !isNullTermPointer(Arg, hasUnsafeStringView);
       };
 
-      if (Prefix.empty() || Prefix == "k") // printf: all pointer args should 
be null-terminated
+      if (Prefix.empty() ||
+          Prefix == "k") // printf: all pointer args should be null-terminated
         return llvm::any_of(Call->arguments(), AnyUnsafeStrPtr);
 
       if (Prefix == "f" && Call->getNumArgs() > 1) {
@@ -677,7 +693,7 @@ AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) {
       return Name == "strlen" && Call->getNumArgs() == 1 &&
              isa<StringLiteral>(Call->getArg(0)->IgnoreParenImpCasts());
     }
-  } FuncNameMatch{&Node};
+  } FuncNameMatch{&Node, hasUnsafeStringView.matches(Node, Finder, Builder)};
 
   const FunctionDecl *FD = Node.getDirectCallee();
   const IdentifierInfo *II;
@@ -1037,6 +1053,47 @@ class SpanTwoParamConstructorGadget : public 
WarningGadget {
   }
 };
 
+class StringViewUnsafeConstructorGadget : public WarningGadget {
+  static constexpr const char *const Tag = "StringViewUnsafeConstructor";
+  const CXXConstructExpr *Ctor;
+
+public:
+  StringViewUnsafeConstructorGadget(const MatchFinder::MatchResult &Result)
+      : WarningGadget(Kind::StringViewUnsafeConstructor),
+        Ctor(Result.Nodes.getNodeAs<CXXConstructExpr>(Tag)) {}
+
+  static bool classof(const Gadget *G) {
+    return G->getKind() == Kind::StringViewUnsafeConstructor;
+  }
+
+  // Matches any `basic_string_view` constructor call that is not a copy
+  // constructor or on a string literal:
+  static Matcher matcher() {
+    auto isStringViewDecl = hasDeclaration(cxxConstructorDecl(
+        hasDeclContext(isInStdNamespace()), hasName("basic_string_view")));
+
+    return stmt(
+        cxxConstructExpr(
+            isStringViewDecl,
+            unless(
+                hasArgument(0, expr(ignoringParenImpCasts(stringLiteral())))),
+            unless(hasDeclaration(cxxConstructorDecl(isCopyConstructor()))))
+            .bind(Tag));
+  }
+
+  const Stmt *getBaseStmt() const override { return Ctor; }
+
+  DeclUseList getClaimedVarUseSites() const override {
+    // If the constructor call is of the form `std::basic_string_view{ptrVar,
+    // ...}`, `ptrVar` is considered an unsafe variable.
+    if (auto *DRE = dyn_cast<DeclRefExpr>(Ctor->getArg(0))) {
+      if (DRE->getType()->isPointerType() && isa<VarDecl>(DRE->getDecl()))
+        return {DRE};
+    }
+    return {};
+  }
+};
+
 /// A pointer initialization expression of the form:
 ///  \code
 ///  int *p = q;
@@ -1293,8 +1350,10 @@ class UnsafeLibcFunctionCallGadget : public 
WarningGadget {
       : WarningGadget(Kind::UnsafeLibcFunctionCall),
         Call(Result.Nodes.getNodeAs<CallExpr>(Tag)) {}
 
-  static Matcher matcher() {
-    return stmt(callExpr(isUnsafeLibcFunctionCall()).bind(Tag));
+  static Matcher
+  matcher(ast_matchers::internal::Matcher<Expr> HasUnsafeStringView) {
+    return stmt(
+        callExpr(isUnsafeLibcFunctionCall(HasUnsafeStringView)).bind(Tag));
   }
 
   const Stmt *getBaseStmt() const override { return Call; }
@@ -1724,10 +1783,13 @@ findGadgets(const Decl *D, const 
UnsafeBufferUsageHandler &Handler,
 #define WARNING_GADGET(x)                                                      
\
           allOf(x ## Gadget::matcher().bind(#x),                               
\
                 notInSafeBufferOptOut(&Handler)),
+#define WARNING_LIBC_GADGET(x)                                                 
         \
+          allOf(x ## 
Gadget::matcher(ignoreStringViewUnsafeConstructor(&Handler)).bind(#x), \
+                notInSafeBufferOptOut(&Handler)),
 #define WARNING_CONTAINER_GADGET(x)                                            
\
           allOf(x ## Gadget::matcher().bind(#x),                               
\
                 notInSafeBufferOptOut(&Handler),                               
\
-                unless(ignoreUnsafeBufferInContainer(&Handler))),
+                unless(ignore ## x(&Handler))),
 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
             // Avoid a hanging comma.
             unless(stmt())
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp 
b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 0f604c61fa3af..e5f38be388a5c 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -2256,6 +2256,17 @@ class UnsafeBufferUsageReporter : public 
UnsafeBufferUsageHandler {
         Range = UO->getSubExpr()->getSourceRange();
         MsgParam = 1;
       }
+    } else if (const auto *CtorExpr = dyn_cast<CXXConstructExpr>(Operation)) {
+      if (CtorExpr->getConstructor()->getCanonicalDecl()->getNameAsString() ==
+          "span")
+        S.Diag(CtorExpr->getLocation(),
+               diag::warn_unsafe_buffer_usage_in_container)
+            << CtorExpr->getSourceRange();
+      else // it is warning about "string_view":
+        S.Diag(CtorExpr->getLocation(),
+               diag::warn_unsafe_buffer_usage_in_string_view)
+            << CtorExpr->getSourceRange();
+      return;
     } else {
       if (isa<CallExpr>(Operation) || isa<CXXConstructExpr>(Operation)) {
         // note_unsafe_buffer_operation doesn't have this mode yet.
@@ -2370,6 +2381,12 @@ class UnsafeBufferUsageReporter : public 
UnsafeBufferUsageHandler {
     return S.Diags.isIgnored(diag::warn_unsafe_buffer_usage_in_container, Loc);
   }
 
+  bool
+  ignoreUnsafeBufferInStringView(const SourceLocation &Loc) const override {
+    return S.Diags.isIgnored(diag::warn_unsafe_buffer_usage_in_string_view,
+                             Loc);
+  }
+
   // Returns the text representation of clang::unsafe_buffer_usage attribute.
   // `WSSuffix` holds customized "white-space"s, e.g., newline or whilespace
   // characters.
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-string_view.cpp 
b/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-string_view.cpp
new file mode 100644
index 0000000000000..536751446f585
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-string_view.cpp
@@ -0,0 +1,85 @@
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage -verify %s
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage -verify %s 
-Wno-unsafe-buffer-usage-in-string-view -D_IGNORE_STRING_VIEW
+
+namespace std {
+  struct iterator{};
+
+  template<typename T>
+  struct basic_string_view {
+    T* p;
+    T *data();
+    unsigned size_bytes();
+    iterator begin();
+    iterator end();
+
+    constexpr basic_string_view( const T* s ){};
+    constexpr basic_string_view( const T* s, unsigned count );
+    constexpr basic_string_view( const basic_string_view& other ) noexcept = 
default;
+    template< class It, class End >
+    constexpr basic_string_view( It first, End last ) {};
+
+  };
+
+  typedef basic_string_view<char> string_view;
+  typedef basic_string_view<wchar_t> wstring_view;
+
+  template<typename T>
+  struct basic_string {
+    T *c_str();
+    operator std::basic_string_view<T>() {
+    }
+  };
+
+  typedef basic_string<char> string;
+  typedef basic_string<wchar_t> wstring;
+
+  template <class T> struct span {
+    constexpr span(T *, unsigned){}
+  };
+}
+
+void foo(std::string_view);
+void f(char * p, std::string S, std::string_view V) {
+  std::string_view SVs[] {S, "S"};
+  foo(S);
+  foo("S");
+  foo({S});
+  foo({"S"});
+  foo(V);
+#ifndef _IGNORE_STRING_VIEW
+  foo(p);       // expected-warning{{construct string_view from raw pointers 
does not guarantee null-termination, construct from std::string instead}}
+#else
+  foo(p);       // no warn
+#endif
+#ifndef _IGNORE_STRING_VIEW
+  foo({p, 10}); // expected-warning{{construct string_view from raw pointers 
does not guarantee null-termination, construct from std::string instead}}
+#else
+  foo({p, 10}); // no warn
+#endif
+
+  std::string_view SV{S};
+  std::string_view SV2{"S"};
+  std::string_view SV3 = S;
+  std::string_view SV4 = "S";
+  std::string_view SV5 = std::string_view(S);
+#ifndef _IGNORE_STRING_VIEW
+  std::string_view SV10 = p; // expected-warning{{construct string_view from 
raw pointers does not guarantee null-termination, construct from std::string 
instead}}
+#else
+  std::string_view SV10 = p; // no warn
+#endif
+#ifndef _IGNORE_STRING_VIEW
+  std::string_view SV11{p};  // expected-warning{{construct string_view from 
raw pointers does not guarantee null-termination, construct from std::string 
instead}}
+#else
+  std::string_view SV11{p};  // no warn
+#endif
+#ifndef _IGNORE_STRING_VIEW
+  std::string_view SV12{V.begin(), V.end()}; // expected-warning{{construct 
string_view from raw pointers does not guarantee null-termination, construct 
from std::string instead}}
+#else
+  std::string_view SV12{V.begin(), V.end()}; // no warn
+#endif
+}
+
+void g(int * p) {
+  // This warning is not affected:
+  std::span<int> S{p, 10}; // expected-warning{{the two-parameter std::span 
construction is unsafe as it can introduce mismatch between buffer size and the 
bound information}}
+}
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp 
b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
index 326ecd6f284e5..feaeb23d73127 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \
-// RUN:            -verify %s
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage -verify %s
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage -verify %s 
-Wno-unsafe-buffer-usage-in-string-view -D_IGNORE_STRING_VIEW
 
 typedef struct {} FILE;
 void memcpy();
@@ -42,6 +42,15 @@ namespace std {
 
   typedef basic_string<char> string;
   typedef basic_string<wchar_t> wstring;
+
+  template<typename T>
+  struct basic_string_view {
+    T* p;
+    T *data();
+    unsigned size_bytes();
+  };
+
+  typedef basic_string_view<char> string_view;
 }
 
 void f(char * p, char * q, std::span<char> s) {
@@ -69,8 +78,13 @@ void f(char * p, char * q, std::span<char> s) {
   strlen("hello");// no warn
 }
 
-void v(std::string s1, std::wstring s2) {
-  snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn
+void v(std::string s, std::string_view sv) {
+  snprintf(s.data(), s.size_bytes(), "%s%d", s.c_str(), 0); // no warn
+#ifndef _IGNORE_STRING_VIEW
+  snprintf(sv.data(), sv.size_bytes(), "%s%d", sv.data(), 0); // no warn
+#else
+  snprintf(sv.data(), sv.size_bytes(), "%s%d", sv.data(), 0); // 
expected-warning{{function introduces unsafe buffer manipulation}} 
expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening 
suggestions}}
+#endif
 }
 
 

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to