llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: None (apple-fcloutier)

<details>
<summary>Changes</summary>

I [asked on the 
forums](https://discourse.llvm.org/t/should-attribute-format-checking-try-to-const-evaluate-strings/85854/4)
 and people were generally supportive of the idea, so:

Clang's -Wformat checker can see through an inconsistent set of operations. We 
can fall back to the recently-updated constant string evaluation infrastructure 
when Clang's initial evaluation fails for a second chance at figuring out what 
the format string is intended to be. This enables analyzing format strings that 
were built at compile-time with std::string and other constexpr-capable types 
in C++, as long as all pieces are also constexpr-visible, and a number of other 
patterns.

As a side effect, it also enables `tryEvaluateString` on char arrays (rather 
than only char pointers).

Radar-ID: rdar://99940060

---
Full diff: https://github.com/llvm/llvm-project/pull/135864.diff


7 Files Affected:

- (modified) clang/docs/ReleaseNotes.rst (+3) 
- (modified) clang/include/clang/AST/Expr.h (+8-1) 
- (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (+2) 
- (modified) clang/lib/AST/ExprConstant.cpp (+32-8) 
- (modified) clang/lib/Sema/SemaChecking.cpp (+94-50) 
- (modified) clang/test/Sema/format-strings.c (+14) 
- (modified) clang/test/SemaCXX/format-strings.cpp (+74) 


``````````diff
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 77bf3355af9da..05566d66a65d2 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -265,6 +265,9 @@ related warnings within the method body.
   ``format_matches`` accepts an example valid format string as its third
   argument. For more information, see the Clang attributes documentation.
 
+- Format string checking now supports the compile-time evaluation of format
+  strings as a fallback mechanism.
+
 - Introduced a new statement attribute ``[[clang::atomic]]`` that enables
   fine-grained control over atomic code generation on a per-statement basis.
   Supported options include ``[no_]remote_memory``,
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 20f70863a05b3..78eda8bc3c43e 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -791,7 +791,14 @@ class Expr : public ValueStmt {
                                  const Expr *PtrExpression, ASTContext &Ctx,
                                  EvalResult &Status) const;
 
-  /// If the current Expr can be evaluated to a pointer to a null-terminated
+  /// Fill \c Into with the first characters that can be constant-evaluated
+  /// from this \c Expr . When encountering a null character, stop and return
+  /// \c true (the null is not returned in \c Into ). Return \c false if
+  /// evaluation runs off the end of the constant-evaluated string before it
+  /// encounters a null character.
+  bool tryEvaluateString(ASTContext &Ctx, std::string &Into) const;
+
+  /// If the current \c Expr can be evaluated to a pointer to a null-terminated
   /// constant string, return the constant string (without the terminating
   /// null).
   std::optional<std::string> tryEvaluateString(ASTContext &Ctx) const;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 3cb2731488fab..4139ff2737c76 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10170,6 +10170,8 @@ def warn_format_bool_as_character : Warning<
   "using '%0' format specifier, but argument has boolean value">,
   InGroup<Format>;
 def note_format_string_defined : Note<"format string is defined here">;
+def note_format_string_evaluated_to : Note<
+  "format string was constant-evaluated">;
 def note_format_fix_specifier : Note<"did you mean to use '%0'?">;
 def note_printf_c_str: Note<"did you mean to call the %0 method?">;
 def note_format_security_fixit: Note<
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 80ece3c4ed7e1..fec92edf49096 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -17945,15 +17945,36 @@ bool Expr::tryEvaluateObjectSize(uint64_t &Result, 
ASTContext &Ctx,
 
 static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
                                   EvalInfo &Info, std::string *StringResult) {
-  if (!E->getType()->hasPointerRepresentation() || !E->isPRValue())
+  QualType Ty = E->getType();
+  if (!E->isPRValue())
     return false;
 
   LValue String;
-
-  if (!EvaluatePointer(E, String, Info))
+  QualType CharTy;
+  if (Ty->canDecayToPointerType()) {
+    if (E->isGLValue()) {
+      if (!EvaluateLValue(E, String, Info))
+        return false;
+    } else {
+      APValue &Value = Info.CurrentCall->createTemporary(
+          E, Ty, ScopeKind::FullExpression, String);
+      if (!EvaluateInPlace(Value, Info, String, E))
+        return false;
+    }
+    // The result is a pointer to the first element of the array.
+    auto *AT = Info.Ctx.getAsArrayType(Ty);
+    CharTy = AT->getElementType();
+    if (auto *CAT = dyn_cast<ConstantArrayType>(AT))
+      String.addArray(Info, E, CAT);
+    else
+      String.addUnsizedArray(Info, E, CharTy);
+  } else if (Ty->hasPointerRepresentation()) {
+    if (!EvaluatePointer(E, String, Info))
+      return false;
+    CharTy = Ty->getPointeeType();
+  } else {
     return false;
-
-  QualType CharTy = E->getType()->getPointeeType();
+  }
 
   // Fast path: if it's a string literal, search the string value.
   if (const StringLiteral *S = dyn_cast_or_null<StringLiteral>(
@@ -17995,13 +18016,16 @@ static bool EvaluateBuiltinStrLen(const Expr *E, 
uint64_t &Result,
   }
 }
 
-std::optional<std::string> Expr::tryEvaluateString(ASTContext &Ctx) const {
+bool Expr::tryEvaluateString(ASTContext &Ctx, std::string &StringResult) const 
{
   Expr::EvalStatus Status;
   EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold);
   uint64_t Result;
-  std::string StringResult;
+  return EvaluateBuiltinStrLen(this, Result, Info, &StringResult);
+}
 
-  if (EvaluateBuiltinStrLen(this, Result, Info, &StringResult))
+std::optional<std::string> Expr::tryEvaluateString(ASTContext &Ctx) const {
+  std::string StringResult;
+  if (tryEvaluateString(Ctx, StringResult))
     return StringResult;
   return {};
 }
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index bffd0dd461d3d..017be929ca18e 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -98,6 +98,7 @@
 #include "llvm/Support/Locale.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/SaveAndRestore.h"
+#include "llvm/Support/SmallVectorMemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TargetParser/RISCVTargetParser.h"
 #include "llvm/TargetParser/Triple.h"
@@ -5935,8 +5936,14 @@ static void CheckFormatString(
     llvm::SmallBitVector &CheckedVarArgs, UncoveredArgHandler &UncoveredArg,
     bool IgnoreStringsWithoutSpecifiers);
 
-static const Expr *maybeConstEvalStringLiteral(ASTContext &Context,
-                                               const Expr *E);
+enum StringLiteralConstEvalResult {
+  SLCER_NotEvaluated,
+  SLCER_NotNullTerminated,
+  SLCER_Evaluated,
+};
+
+static StringLiteralConstEvalResult
+constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL);
 
 // Determine if an expression is a string literal or constant string.
 // If this function returns false on the arguments to a function expecting a
@@ -5968,14 +5975,9 @@ static StringLiteralCheckType checkFormatStringExpr(
 
   switch (E->getStmtClass()) {
   case Stmt::InitListExprClass:
-    // Handle expressions like {"foobar"}.
-    if (const clang::Expr *SLE = maybeConstEvalStringLiteral(S.Context, E)) {
-      return checkFormatStringExpr(
-          S, ReferenceFormatString, SLE, Args, APK, format_idx, firstDataArg,
-          Type, CallType, /*InFunctionCall*/ false, CheckedVarArgs,
-          UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
-    }
-    return SLCT_NotALiteral;
+    // try to constant-evaluate the string
+    break;
+
   case Stmt::BinaryConditionalOperatorClass:
   case Stmt::ConditionalOperatorClass: {
     // The expression is a literal if both sub-expressions were, and it was
@@ -6066,10 +6068,9 @@ static StringLiteralCheckType checkFormatStringExpr(
             if (InitList->isStringLiteralInit())
               Init = InitList->getInit(0)->IgnoreParenImpCasts();
           }
-          return checkFormatStringExpr(
-              S, ReferenceFormatString, Init, Args, APK, format_idx,
-              firstDataArg, Type, CallType,
-              /*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg, Offset);
+          InFunctionCall = false;
+          E = Init;
+          goto tryAgain;
         }
       }
 
@@ -6142,11 +6143,9 @@ static StringLiteralCheckType checkFormatStringExpr(
                 }
                 return SLCT_UncheckedLiteral;
               }
-              return checkFormatStringExpr(
-                  S, ReferenceFormatString, PVFormatMatches->getFormatString(),
-                  Args, APK, format_idx, firstDataArg, Type, CallType,
-                  /*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg,
-                  Offset, IgnoreStringsWithoutSpecifiers);
+              E = PVFormatMatches->getFormatString();
+              InFunctionCall = false;
+              goto tryAgain;
             }
           }
 
@@ -6214,20 +6213,13 @@ static StringLiteralCheckType checkFormatStringExpr(
         unsigned BuiltinID = FD->getBuiltinID();
         if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString ||
             BuiltinID == Builtin::BI__builtin___NSStringMakeConstantString) {
-          const Expr *Arg = CE->getArg(0);
-          return checkFormatStringExpr(
-              S, ReferenceFormatString, Arg, Args, APK, format_idx,
-              firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs,
-              UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
+          E = CE->getArg(0);
+          goto tryAgain;
         }
       }
     }
-    if (const Expr *SLE = maybeConstEvalStringLiteral(S.Context, E))
-      return checkFormatStringExpr(
-          S, ReferenceFormatString, SLE, Args, APK, format_idx, firstDataArg,
-          Type, CallType, /*InFunctionCall*/ false, CheckedVarArgs,
-          UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
-    return SLCT_NotALiteral;
+    // try to constant-evaluate the string
+    break;
   }
   case Stmt::ObjCMessageExprClass: {
     const auto *ME = cast<ObjCMessageExpr>(E);
@@ -6248,11 +6240,8 @@ static StringLiteralCheckType checkFormatStringExpr(
           IgnoreStringsWithoutSpecifiers = true;
         }
 
-        const Expr *Arg = ME->getArg(FA->getFormatIdx().getASTIndex());
-        return checkFormatStringExpr(
-            S, ReferenceFormatString, Arg, Args, APK, format_idx, firstDataArg,
-            Type, CallType, InFunctionCall, CheckedVarArgs, UncoveredArg,
-            Offset, IgnoreStringsWithoutSpecifiers);
+        E = ME->getArg(FA->getFormatIdx().getASTIndex());
+        goto tryAgain;
       }
     }
 
@@ -6314,7 +6303,8 @@ static StringLiteralCheckType checkFormatStringExpr(
       }
     }
 
-    return SLCT_NotALiteral;
+    // try to constant-evaluate the string
+    break;
   }
   case Stmt::UnaryOperatorClass: {
     const UnaryOperator *UnaOp = cast<UnaryOperator>(E);
@@ -6331,26 +6321,79 @@ static StringLiteralCheckType checkFormatStringExpr(
       }
     }
 
-    return SLCT_NotALiteral;
+    // try to constant-evaluate the string
+    break;
   }
 
   default:
+    // try to constant-evaluate the string
+    break;
+  }
+
+  const StringLiteral *FakeLiteral = nullptr;
+  switch (constEvalStringAsLiteral(S, E, FakeLiteral)) {
+  case SLCER_NotEvaluated:
     return SLCT_NotALiteral;
+
+  case SLCER_NotNullTerminated:
+    S.Diag(Args[format_idx]->getBeginLoc(),
+           diag::warn_printf_format_string_not_null_terminated)
+        << Args[format_idx]->getSourceRange();
+    if (!InFunctionCall)
+      S.Diag(E->getBeginLoc(), diag::note_format_string_defined);
+    // Stop checking, as this might just mean we're missing a chunk of the
+    // format string and there would be other spurious format issues.
+    return SLCT_UncheckedLiteral;
+
+  case SLCER_Evaluated:
+    InFunctionCall = false;
+    E = FakeLiteral;
+    goto tryAgain;
   }
 }
 
-// If this expression can be evaluated at compile-time,
-// check if the result is a StringLiteral and return it
-// otherwise return nullptr
-static const Expr *maybeConstEvalStringLiteral(ASTContext &Context,
-                                               const Expr *E) {
+static StringLiteralConstEvalResult
+constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL) {
+  // As a last resort, try to constant-evaluate the format string. If it
+  // evaluates to a string literal in the first place, we can point to that
+  // string literal in source and use that.
   Expr::EvalResult Result;
-  if (E->EvaluateAsRValue(Result, Context) && Result.Val.isLValue()) {
+  if (E->EvaluateAsRValue(Result, S.Context) && Result.Val.isLValue()) {
     const auto *LVE = Result.Val.getLValueBase().dyn_cast<const Expr *>();
-    if (isa_and_nonnull<StringLiteral>(LVE))
-      return LVE;
+    if (auto *BaseSL = dyn_cast_or_null<StringLiteral>(LVE)) {
+      SL = BaseSL;
+      return SLCER_Evaluated;
+    }
   }
-  return nullptr;
+
+  // Otherwise, try to evaluate the expression as a string constant.
+  std::string FormatString;
+  if (!E->tryEvaluateString(S.Context, FormatString)) {
+    return FormatString.empty() ? SLCER_NotEvaluated : SLCER_NotNullTerminated;
+  }
+
+  std::unique_ptr<llvm::MemoryBuffer> MemBuf;
+  {
+    llvm::SmallString<80> EscapedString;
+    {
+      llvm::raw_svector_ostream OS(EscapedString);
+      OS << '"';
+      OS.write_escaped(FormatString);
+      OS << '"';
+    }
+    MemBuf.reset(new llvm::SmallVectorMemoryBuffer(std::move(EscapedString),
+                                                   "<scratch space>", true));
+  }
+
+  // Plop that string into a scratch buffer, create a string literal and then
+  // go with that.
+  auto ScratchFile = S.getSourceManager().createFileID(std::move(MemBuf));
+  SourceLocation Begin = 
S.getSourceManager().getLocForStartOfFile(ScratchFile);
+  QualType SLType = S.Context.getStringLiteralArrayType(S.Context.CharTy,
+                                                        FormatString.length());
+  SL = StringLiteral::Create(S.Context, FormatString,
+                             StringLiteralKind::Ordinary, false, SLType, 
Begin);
+  return SLCER_Evaluated;
 }
 
 StringRef Sema::GetFormatStringTypeName(Sema::FormatStringType FST) {
@@ -6973,10 +7016,11 @@ void CheckFormatHandler::EmitFormatDiagnostic(
     S.Diag(IsStringLocation ? ArgumentExpr->getExprLoc() : Loc, PDiag)
       << ArgumentExpr->getSourceRange();
 
-    const Sema::SemaDiagnosticBuilder &Note =
-      S.Diag(IsStringLocation ? Loc : StringRange.getBegin(),
-             diag::note_format_string_defined);
-
+    SourceLocation DiagLoc = IsStringLocation ? Loc : StringRange.getBegin();
+    unsigned DiagID = S.getSourceManager().isWrittenInScratchSpace(DiagLoc)
+                          ? diag::note_format_string_evaluated_to
+                          : diag::note_format_string_defined;
+    const Sema::SemaDiagnosticBuilder &Note = S.Diag(DiagLoc, DiagID);
     Note << StringRange;
     Note << FixIt;
   }
diff --git a/clang/test/Sema/format-strings.c b/clang/test/Sema/format-strings.c
index af30ad5d15fe2..a94e0619ce843 100644
--- a/clang/test/Sema/format-strings.c
+++ b/clang/test/Sema/format-strings.c
@@ -3,6 +3,11 @@
 // RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem 
%S/Inputs -triple=x86_64-unknown-fuchsia %s
 // RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem 
%S/Inputs -triple=x86_64-linux-android %s
 
+// expected-note@-5{{format string was constant-evaluated}}
+// ^^^ there will be a <scratch space> SourceLocation caused by the
+// test_consteval_init_array test, that -verify treats as if it showed up at
+// line 1 of this file.
+
 #include <stdarg.h>
 #include <stddef.h>
 #define __need_wint_t
@@ -900,3 +905,12 @@ void test_promotion(void) {
   // pointers
   printf("%s", i); // expected-warning{{format specifies type 'char *' but the 
argument has type 'int'}}
 }
+
+void test_consteval_init_array(void) {
+  const char buf_not_terminated[] = {'%', 55 * 2 + 5, '\n'}; // 
expected-note{{format string is defined here}}
+  printf(buf_not_terminated, "hello"); // expected-warning{{format string is 
not null-terminated}}
+
+  const char buf[] = {'%', 55 * 2 + 5, '\n', 0};
+  printf(buf, "hello"); // no-warning
+  printf(buf, 123); // expected-warning{{format specifies type 'char *' but 
the argument has type 'int'}}
+}
diff --git a/clang/test/SemaCXX/format-strings.cpp 
b/clang/test/SemaCXX/format-strings.cpp
index 48cf23999a94f..7b04ea7d8bc75 100644
--- a/clang/test/SemaCXX/format-strings.cpp
+++ b/clang/test/SemaCXX/format-strings.cpp
@@ -1,6 +1,14 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso 
-Wformat-pedantic -fblocks %s
 // RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso 
-fblocks -std=c++98 %s
 // RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso 
-Wformat-pedantic -fblocks -std=c++11 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso 
-Wformat-pedantic -fblocks -std=c++20 %s
+
+#if __cplusplus >= 202000l
+// expected-note@-6{{format string was constant-evaluated}}
+// ^^^ there will be a <scratch space> SourceLocation caused by the
+// test_constexpr_string test, that -verify treats as if it showed up at
+// line 1 of this file.
+#endif
 
 #include <stdarg.h>
 
@@ -238,3 +246,69 @@ void f(Scoped1 S1, Scoped2 S2) {
 }
 
 #endif
+
+#if __cplusplus >= 202000L
+class my_string {
+  char *data;
+  unsigned size;
+
+public:
+  template<unsigned N>
+  constexpr my_string(const char (&literal)[N]) {
+    data = new char[N+1];
+    for (size = 0; size < N; ++size) {
+      data[size] = literal[size];
+      if (data[size] == 0)
+        break;
+    }
+    data[size] = 0;
+  }
+
+  my_string(const my_string &) = delete;
+
+  constexpr my_string(my_string &&that) {
+    data = that.data;
+    size = that.size;
+    that.data = nullptr;
+    that.size = 0;
+  }
+
+  constexpr ~my_string() {
+    delete[] data;
+  }
+
+  template<unsigned N>
+  constexpr void append(const char (&literal)[N]) {
+    char *cat = new char[size + N + 1];
+    char *tmp = cat;
+    for (unsigned i = 0; i < size; ++i) {
+      *tmp++ = data[i];
+    }
+    for (unsigned i = 0; i < N; ++i) {
+      *tmp = literal[i];
+      if (*tmp == 0)
+        break;
+      ++tmp;
+    }
+    *tmp = 0;
+    delete[] data;
+    size = tmp - cat;
+    data = cat;
+  }
+
+  constexpr const char *c_str() const {
+    return data;
+  }
+};
+
+constexpr my_string const_string() {
+  my_string str("hello %s");
+  str.append(", %d");
+  return str;
+}
+
+void test_constexpr_string() {
+  printf(const_string().c_str(), "hello", 123); // no-warning
+  printf(const_string().c_str(), 123, 456); // expected-warning {{format 
specifies type 'char *' but the argument has type 'int'}}
+}
+#endif

``````````

</details>


https://github.com/llvm/llvm-project/pull/135864
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to