=?utf-8?q?Félix?= Cloutier <fclout...@apple.com>,
=?utf-8?q?Félix?= Cloutier <fclout...@apple.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/135...@github.com>


https://github.com/apple-fcloutier updated 
https://github.com/llvm/llvm-project/pull/135864

>From 7e15ec24e6de96b9828f13f21099446e55831591 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A9lix=20Cloutier?= <fclout...@apple.com>
Date: Tue, 15 Apr 2025 14:28:02 -0700
Subject: [PATCH 1/3] [clang] Constant-evaluate format strings as last resort

Clang's -Wformat checker can see through an inconsistent set of
operations. We can fall back to the recently-updated constant string
evaluation infrastructure when Clang's initial evaluation fails for
a second chance at figuring out what the format string is intended
to be. This enables analyzing format strings that were built at
compile-time with std::string and other constexpr-capable types in
C++, as long as all pieces are also constexpr-visible, and a number
of other patterns.

Radar-ID: rdar://99940060
---
 clang/docs/ReleaseNotes.rst                   |   3 +
 clang/include/clang/AST/Expr.h                |   9 +-
 .../clang/Basic/DiagnosticSemaKinds.td        |   2 +
 clang/lib/AST/ExprConstant.cpp                |  40 ++++-
 clang/lib/Sema/SemaChecking.cpp               | 144 ++++++++++++------
 clang/test/Sema/format-strings.c              |  14 ++
 clang/test/SemaCXX/format-strings.cpp         |  74 +++++++++
 7 files changed, 227 insertions(+), 59 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 77bf3355af9da..05566d66a65d2 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -265,6 +265,9 @@ related warnings within the method body.
   ``format_matches`` accepts an example valid format string as its third
   argument. For more information, see the Clang attributes documentation.
 
+- Format string checking now supports the compile-time evaluation of format
+  strings as a fallback mechanism.
+
 - Introduced a new statement attribute ``[[clang::atomic]]`` that enables
   fine-grained control over atomic code generation on a per-statement basis.
   Supported options include ``[no_]remote_memory``,
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 20f70863a05b3..78eda8bc3c43e 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -791,7 +791,14 @@ class Expr : public ValueStmt {
                                  const Expr *PtrExpression, ASTContext &Ctx,
                                  EvalResult &Status) const;
 
-  /// If the current Expr can be evaluated to a pointer to a null-terminated
+  /// Fill \c Into with the first characters that can be constant-evaluated
+  /// from this \c Expr . When encountering a null character, stop and return
+  /// \c true (the null is not returned in \c Into ). Return \c false if
+  /// evaluation runs off the end of the constant-evaluated string before it
+  /// encounters a null character.
+  bool tryEvaluateString(ASTContext &Ctx, std::string &Into) const;
+
+  /// If the current \c Expr can be evaluated to a pointer to a null-terminated
   /// constant string, return the constant string (without the terminating
   /// null).
   std::optional<std::string> tryEvaluateString(ASTContext &Ctx) const;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 3cb2731488fab..4139ff2737c76 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10170,6 +10170,8 @@ def warn_format_bool_as_character : Warning<
   "using '%0' format specifier, but argument has boolean value">,
   InGroup<Format>;
 def note_format_string_defined : Note<"format string is defined here">;
+def note_format_string_evaluated_to : Note<
+  "format string was constant-evaluated">;
 def note_format_fix_specifier : Note<"did you mean to use '%0'?">;
 def note_printf_c_str: Note<"did you mean to call the %0 method?">;
 def note_format_security_fixit: Note<
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 80ece3c4ed7e1..fec92edf49096 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -17945,15 +17945,36 @@ bool Expr::tryEvaluateObjectSize(uint64_t &Result, 
ASTContext &Ctx,
 
 static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
                                   EvalInfo &Info, std::string *StringResult) {
-  if (!E->getType()->hasPointerRepresentation() || !E->isPRValue())
+  QualType Ty = E->getType();
+  if (!E->isPRValue())
     return false;
 
   LValue String;
-
-  if (!EvaluatePointer(E, String, Info))
+  QualType CharTy;
+  if (Ty->canDecayToPointerType()) {
+    if (E->isGLValue()) {
+      if (!EvaluateLValue(E, String, Info))
+        return false;
+    } else {
+      APValue &Value = Info.CurrentCall->createTemporary(
+          E, Ty, ScopeKind::FullExpression, String);
+      if (!EvaluateInPlace(Value, Info, String, E))
+        return false;
+    }
+    // The result is a pointer to the first element of the array.
+    auto *AT = Info.Ctx.getAsArrayType(Ty);
+    CharTy = AT->getElementType();
+    if (auto *CAT = dyn_cast<ConstantArrayType>(AT))
+      String.addArray(Info, E, CAT);
+    else
+      String.addUnsizedArray(Info, E, CharTy);
+  } else if (Ty->hasPointerRepresentation()) {
+    if (!EvaluatePointer(E, String, Info))
+      return false;
+    CharTy = Ty->getPointeeType();
+  } else {
     return false;
-
-  QualType CharTy = E->getType()->getPointeeType();
+  }
 
   // Fast path: if it's a string literal, search the string value.
   if (const StringLiteral *S = dyn_cast_or_null<StringLiteral>(
@@ -17995,13 +18016,16 @@ static bool EvaluateBuiltinStrLen(const Expr *E, 
uint64_t &Result,
   }
 }
 
-std::optional<std::string> Expr::tryEvaluateString(ASTContext &Ctx) const {
+bool Expr::tryEvaluateString(ASTContext &Ctx, std::string &StringResult) const 
{
   Expr::EvalStatus Status;
   EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold);
   uint64_t Result;
-  std::string StringResult;
+  return EvaluateBuiltinStrLen(this, Result, Info, &StringResult);
+}
 
-  if (EvaluateBuiltinStrLen(this, Result, Info, &StringResult))
+std::optional<std::string> Expr::tryEvaluateString(ASTContext &Ctx) const {
+  std::string StringResult;
+  if (tryEvaluateString(Ctx, StringResult))
     return StringResult;
   return {};
 }
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index bffd0dd461d3d..017be929ca18e 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -98,6 +98,7 @@
 #include "llvm/Support/Locale.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/SaveAndRestore.h"
+#include "llvm/Support/SmallVectorMemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TargetParser/RISCVTargetParser.h"
 #include "llvm/TargetParser/Triple.h"
@@ -5935,8 +5936,14 @@ static void CheckFormatString(
     llvm::SmallBitVector &CheckedVarArgs, UncoveredArgHandler &UncoveredArg,
     bool IgnoreStringsWithoutSpecifiers);
 
-static const Expr *maybeConstEvalStringLiteral(ASTContext &Context,
-                                               const Expr *E);
+enum StringLiteralConstEvalResult {
+  SLCER_NotEvaluated,
+  SLCER_NotNullTerminated,
+  SLCER_Evaluated,
+};
+
+static StringLiteralConstEvalResult
+constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL);
 
 // Determine if an expression is a string literal or constant string.
 // If this function returns false on the arguments to a function expecting a
@@ -5968,14 +5975,9 @@ static StringLiteralCheckType checkFormatStringExpr(
 
   switch (E->getStmtClass()) {
   case Stmt::InitListExprClass:
-    // Handle expressions like {"foobar"}.
-    if (const clang::Expr *SLE = maybeConstEvalStringLiteral(S.Context, E)) {
-      return checkFormatStringExpr(
-          S, ReferenceFormatString, SLE, Args, APK, format_idx, firstDataArg,
-          Type, CallType, /*InFunctionCall*/ false, CheckedVarArgs,
-          UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
-    }
-    return SLCT_NotALiteral;
+    // try to constant-evaluate the string
+    break;
+
   case Stmt::BinaryConditionalOperatorClass:
   case Stmt::ConditionalOperatorClass: {
     // The expression is a literal if both sub-expressions were, and it was
@@ -6066,10 +6068,9 @@ static StringLiteralCheckType checkFormatStringExpr(
             if (InitList->isStringLiteralInit())
               Init = InitList->getInit(0)->IgnoreParenImpCasts();
           }
-          return checkFormatStringExpr(
-              S, ReferenceFormatString, Init, Args, APK, format_idx,
-              firstDataArg, Type, CallType,
-              /*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg, Offset);
+          InFunctionCall = false;
+          E = Init;
+          goto tryAgain;
         }
       }
 
@@ -6142,11 +6143,9 @@ static StringLiteralCheckType checkFormatStringExpr(
                 }
                 return SLCT_UncheckedLiteral;
               }
-              return checkFormatStringExpr(
-                  S, ReferenceFormatString, PVFormatMatches->getFormatString(),
-                  Args, APK, format_idx, firstDataArg, Type, CallType,
-                  /*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg,
-                  Offset, IgnoreStringsWithoutSpecifiers);
+              E = PVFormatMatches->getFormatString();
+              InFunctionCall = false;
+              goto tryAgain;
             }
           }
 
@@ -6214,20 +6213,13 @@ static StringLiteralCheckType checkFormatStringExpr(
         unsigned BuiltinID = FD->getBuiltinID();
         if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString ||
             BuiltinID == Builtin::BI__builtin___NSStringMakeConstantString) {
-          const Expr *Arg = CE->getArg(0);
-          return checkFormatStringExpr(
-              S, ReferenceFormatString, Arg, Args, APK, format_idx,
-              firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs,
-              UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
+          E = CE->getArg(0);
+          goto tryAgain;
         }
       }
     }
-    if (const Expr *SLE = maybeConstEvalStringLiteral(S.Context, E))
-      return checkFormatStringExpr(
-          S, ReferenceFormatString, SLE, Args, APK, format_idx, firstDataArg,
-          Type, CallType, /*InFunctionCall*/ false, CheckedVarArgs,
-          UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
-    return SLCT_NotALiteral;
+    // try to constant-evaluate the string
+    break;
   }
   case Stmt::ObjCMessageExprClass: {
     const auto *ME = cast<ObjCMessageExpr>(E);
@@ -6248,11 +6240,8 @@ static StringLiteralCheckType checkFormatStringExpr(
           IgnoreStringsWithoutSpecifiers = true;
         }
 
-        const Expr *Arg = ME->getArg(FA->getFormatIdx().getASTIndex());
-        return checkFormatStringExpr(
-            S, ReferenceFormatString, Arg, Args, APK, format_idx, firstDataArg,
-            Type, CallType, InFunctionCall, CheckedVarArgs, UncoveredArg,
-            Offset, IgnoreStringsWithoutSpecifiers);
+        E = ME->getArg(FA->getFormatIdx().getASTIndex());
+        goto tryAgain;
       }
     }
 
@@ -6314,7 +6303,8 @@ static StringLiteralCheckType checkFormatStringExpr(
       }
     }
 
-    return SLCT_NotALiteral;
+    // try to constant-evaluate the string
+    break;
   }
   case Stmt::UnaryOperatorClass: {
     const UnaryOperator *UnaOp = cast<UnaryOperator>(E);
@@ -6331,26 +6321,79 @@ static StringLiteralCheckType checkFormatStringExpr(
       }
     }
 
-    return SLCT_NotALiteral;
+    // try to constant-evaluate the string
+    break;
   }
 
   default:
+    // try to constant-evaluate the string
+    break;
+  }
+
+  const StringLiteral *FakeLiteral = nullptr;
+  switch (constEvalStringAsLiteral(S, E, FakeLiteral)) {
+  case SLCER_NotEvaluated:
     return SLCT_NotALiteral;
+
+  case SLCER_NotNullTerminated:
+    S.Diag(Args[format_idx]->getBeginLoc(),
+           diag::warn_printf_format_string_not_null_terminated)
+        << Args[format_idx]->getSourceRange();
+    if (!InFunctionCall)
+      S.Diag(E->getBeginLoc(), diag::note_format_string_defined);
+    // Stop checking, as this might just mean we're missing a chunk of the
+    // format string and there would be other spurious format issues.
+    return SLCT_UncheckedLiteral;
+
+  case SLCER_Evaluated:
+    InFunctionCall = false;
+    E = FakeLiteral;
+    goto tryAgain;
   }
 }
 
-// If this expression can be evaluated at compile-time,
-// check if the result is a StringLiteral and return it
-// otherwise return nullptr
-static const Expr *maybeConstEvalStringLiteral(ASTContext &Context,
-                                               const Expr *E) {
+static StringLiteralConstEvalResult
+constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL) {
+  // As a last resort, try to constant-evaluate the format string. If it
+  // evaluates to a string literal in the first place, we can point to that
+  // string literal in source and use that.
   Expr::EvalResult Result;
-  if (E->EvaluateAsRValue(Result, Context) && Result.Val.isLValue()) {
+  if (E->EvaluateAsRValue(Result, S.Context) && Result.Val.isLValue()) {
     const auto *LVE = Result.Val.getLValueBase().dyn_cast<const Expr *>();
-    if (isa_and_nonnull<StringLiteral>(LVE))
-      return LVE;
+    if (auto *BaseSL = dyn_cast_or_null<StringLiteral>(LVE)) {
+      SL = BaseSL;
+      return SLCER_Evaluated;
+    }
   }
-  return nullptr;
+
+  // Otherwise, try to evaluate the expression as a string constant.
+  std::string FormatString;
+  if (!E->tryEvaluateString(S.Context, FormatString)) {
+    return FormatString.empty() ? SLCER_NotEvaluated : SLCER_NotNullTerminated;
+  }
+
+  std::unique_ptr<llvm::MemoryBuffer> MemBuf;
+  {
+    llvm::SmallString<80> EscapedString;
+    {
+      llvm::raw_svector_ostream OS(EscapedString);
+      OS << '"';
+      OS.write_escaped(FormatString);
+      OS << '"';
+    }
+    MemBuf.reset(new llvm::SmallVectorMemoryBuffer(std::move(EscapedString),
+                                                   "<scratch space>", true));
+  }
+
+  // Plop that string into a scratch buffer, create a string literal and then
+  // go with that.
+  auto ScratchFile = S.getSourceManager().createFileID(std::move(MemBuf));
+  SourceLocation Begin = 
S.getSourceManager().getLocForStartOfFile(ScratchFile);
+  QualType SLType = S.Context.getStringLiteralArrayType(S.Context.CharTy,
+                                                        FormatString.length());
+  SL = StringLiteral::Create(S.Context, FormatString,
+                             StringLiteralKind::Ordinary, false, SLType, 
Begin);
+  return SLCER_Evaluated;
 }
 
 StringRef Sema::GetFormatStringTypeName(Sema::FormatStringType FST) {
@@ -6973,10 +7016,11 @@ void CheckFormatHandler::EmitFormatDiagnostic(
     S.Diag(IsStringLocation ? ArgumentExpr->getExprLoc() : Loc, PDiag)
       << ArgumentExpr->getSourceRange();
 
-    const Sema::SemaDiagnosticBuilder &Note =
-      S.Diag(IsStringLocation ? Loc : StringRange.getBegin(),
-             diag::note_format_string_defined);
-
+    SourceLocation DiagLoc = IsStringLocation ? Loc : StringRange.getBegin();
+    unsigned DiagID = S.getSourceManager().isWrittenInScratchSpace(DiagLoc)
+                          ? diag::note_format_string_evaluated_to
+                          : diag::note_format_string_defined;
+    const Sema::SemaDiagnosticBuilder &Note = S.Diag(DiagLoc, DiagID);
     Note << StringRange;
     Note << FixIt;
   }
diff --git a/clang/test/Sema/format-strings.c b/clang/test/Sema/format-strings.c
index af30ad5d15fe2..a94e0619ce843 100644
--- a/clang/test/Sema/format-strings.c
+++ b/clang/test/Sema/format-strings.c
@@ -3,6 +3,11 @@
 // RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem 
%S/Inputs -triple=x86_64-unknown-fuchsia %s
 // RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem 
%S/Inputs -triple=x86_64-linux-android %s
 
+// expected-note@-5{{format string was constant-evaluated}}
+// ^^^ there will be a <scratch space> SourceLocation caused by the
+// test_consteval_init_array test, that -verify treats as if it showed up at
+// line 1 of this file.
+
 #include <stdarg.h>
 #include <stddef.h>
 #define __need_wint_t
@@ -900,3 +905,12 @@ void test_promotion(void) {
   // pointers
   printf("%s", i); // expected-warning{{format specifies type 'char *' but the 
argument has type 'int'}}
 }
+
+void test_consteval_init_array(void) {
+  const char buf_not_terminated[] = {'%', 55 * 2 + 5, '\n'}; // 
expected-note{{format string is defined here}}
+  printf(buf_not_terminated, "hello"); // expected-warning{{format string is 
not null-terminated}}
+
+  const char buf[] = {'%', 55 * 2 + 5, '\n', 0};
+  printf(buf, "hello"); // no-warning
+  printf(buf, 123); // expected-warning{{format specifies type 'char *' but 
the argument has type 'int'}}
+}
diff --git a/clang/test/SemaCXX/format-strings.cpp 
b/clang/test/SemaCXX/format-strings.cpp
index 48cf23999a94f..7b04ea7d8bc75 100644
--- a/clang/test/SemaCXX/format-strings.cpp
+++ b/clang/test/SemaCXX/format-strings.cpp
@@ -1,6 +1,14 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso 
-Wformat-pedantic -fblocks %s
 // RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso 
-fblocks -std=c++98 %s
 // RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso 
-Wformat-pedantic -fblocks -std=c++11 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso 
-Wformat-pedantic -fblocks -std=c++20 %s
+
+#if __cplusplus >= 202000l
+// expected-note@-6{{format string was constant-evaluated}}
+// ^^^ there will be a <scratch space> SourceLocation caused by the
+// test_constexpr_string test, that -verify treats as if it showed up at
+// line 1 of this file.
+#endif
 
 #include <stdarg.h>
 
@@ -238,3 +246,69 @@ void f(Scoped1 S1, Scoped2 S2) {
 }
 
 #endif
+
+#if __cplusplus >= 202000L
+class my_string {
+  char *data;
+  unsigned size;
+
+public:
+  template<unsigned N>
+  constexpr my_string(const char (&literal)[N]) {
+    data = new char[N+1];
+    for (size = 0; size < N; ++size) {
+      data[size] = literal[size];
+      if (data[size] == 0)
+        break;
+    }
+    data[size] = 0;
+  }
+
+  my_string(const my_string &) = delete;
+
+  constexpr my_string(my_string &&that) {
+    data = that.data;
+    size = that.size;
+    that.data = nullptr;
+    that.size = 0;
+  }
+
+  constexpr ~my_string() {
+    delete[] data;
+  }
+
+  template<unsigned N>
+  constexpr void append(const char (&literal)[N]) {
+    char *cat = new char[size + N + 1];
+    char *tmp = cat;
+    for (unsigned i = 0; i < size; ++i) {
+      *tmp++ = data[i];
+    }
+    for (unsigned i = 0; i < N; ++i) {
+      *tmp = literal[i];
+      if (*tmp == 0)
+        break;
+      ++tmp;
+    }
+    *tmp = 0;
+    delete[] data;
+    size = tmp - cat;
+    data = cat;
+  }
+
+  constexpr const char *c_str() const {
+    return data;
+  }
+};
+
+constexpr my_string const_string() {
+  my_string str("hello %s");
+  str.append(", %d");
+  return str;
+}
+
+void test_constexpr_string() {
+  printf(const_string().c_str(), "hello", 123); // no-warning
+  printf(const_string().c_str(), 123, 456); // expected-warning {{format 
specifies type 'char *' but the argument has type 'int'}}
+}
+#endif

>From 3632a83d3af5ab3958226ebf6e416643d7bfb3f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A9lix=20Cloutier?= <fclout...@apple.com>
Date: Tue, 22 Apr 2025 13:01:40 -0700
Subject: [PATCH 2/3] Create StringEvalResult to hold result of compile-time
 string evaluation

Several users of compile-time string evaluation can meaningfully use the
special case that compile-time string evaluation resolves to a string
literal in source (for instance, to improve diagnostics). This changes
Expr::tryEvaluateString to return a StringEvalResult, which can hold
either a string literal and an offset or a std::string of evaluated
characters.
---
 clang/include/clang/AST/Expr.h                |  30 ++--
 .../clang/Basic/DiagnosticSemaKinds.td        |   2 +-
 clang/lib/AST/ExprConstant.cpp                | 141 +++++++++++++-----
 clang/lib/Analysis/UnsafeBufferUsage.cpp      |   5 +-
 clang/lib/CodeGen/CGBuiltin.cpp               |   5 +-
 clang/lib/Sema/SemaChecking.cpp               |  80 +++++-----
 clang/test/SemaCXX/verbose-trap.cpp           |  10 +-
 7 files changed, 182 insertions(+), 91 deletions(-)

diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 78eda8bc3c43e..54f0ac28ac082 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -791,17 +791,25 @@ class Expr : public ValueStmt {
                                  const Expr *PtrExpression, ASTContext &Ctx,
                                  EvalResult &Status) const;
 
-  /// Fill \c Into with the first characters that can be constant-evaluated
-  /// from this \c Expr . When encountering a null character, stop and return
-  /// \c true (the null is not returned in \c Into ). Return \c false if
-  /// evaluation runs off the end of the constant-evaluated string before it
-  /// encounters a null character.
-  bool tryEvaluateString(ASTContext &Ctx, std::string &Into) const;
-
-  /// If the current \c Expr can be evaluated to a pointer to a null-terminated
-  /// constant string, return the constant string (without the terminating
-  /// null).
-  std::optional<std::string> tryEvaluateString(ASTContext &Ctx) const;
+  class StringEvalResult {
+    std::string Storage;
+    const StringLiteral *SL;
+    uint64_t Offset;
+
+  public:
+    StringEvalResult(std::string Contents);
+    StringEvalResult(const StringLiteral *SL, uint64_t Offset);
+
+    llvm::StringRef getString() const;
+    bool getStringLiteral(const StringLiteral *&SL, uint64_t &Offset) const;
+  };
+
+  /// If the current \c Expr can be evaluated to a pointer to a constant 
string,
+  /// return the constant string. The string may not be NUL-terminated. If
+  /// \c NullTerminated is supplied, it is set to whether there is at least one
+  /// NUL character in the string.
+  std::optional<StringEvalResult>
+  tryEvaluateString(ASTContext &Ctx, bool *NullTerminated = nullptr) const;
 
   /// Enumeration used to describe the kind of Null pointer constant
   /// returned from \c isNullPointerConstant().
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 4139ff2737c76..e3b73c149b3d5 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9105,7 +9105,7 @@ def err_expected_callable_argument : Error<
 def note_building_builtin_dump_struct_call : Note<
   "in call to printing function with arguments '(%0)' while dumping struct">;
 def err_builtin_verbose_trap_arg : Error<
-  "argument to __builtin_verbose_trap must %select{be a pointer to a constant 
string|not contain $}0">;
+  "argument to __builtin_verbose_trap must %select{be a pointer to a constant 
NUL-terminated string|not contain $}0">;
 
 def err_atomic_load_store_uses_lib : Error<
   "atomic %select{load|store}0 requires runtime support that is not "
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index fec92edf49096..6c5e8260131bb 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -1922,9 +1922,17 @@ static bool EvaluateComplex(const Expr *E, ComplexValue 
&Res, EvalInfo &Info);
 static bool EvaluateAtomic(const Expr *E, const LValue *This, APValue &Result,
                            EvalInfo &Info);
 static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result);
+static bool EvaluateStringAsLValue(EvalInfo &Info, const Expr *E,
+                                   QualType &CharTy, LValue &String);
+static const StringLiteral *StringLValueIsLiteral(EvalInfo &Info,
+                                                  LValue &String,
+                                                  QualType CharTy,
+                                                  uint64_t &Offset);
+template <typename CharAction>
+static bool IterateStringLValue(EvalInfo &Info, const Expr *E, QualType CharTy,
+                                LValue &String, CharAction &&Action);
 static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
-                                  EvalInfo &Info,
-                                  std::string *StringResult = nullptr);
+                                  EvalInfo &Info);
 
 /// Evaluate an integer or fixed point expression into an APResult.
 static bool EvaluateFixedPointOrInteger(const Expr *E, APFixedPoint &Result,
@@ -17943,14 +17951,12 @@ bool Expr::tryEvaluateObjectSize(uint64_t &Result, 
ASTContext &Ctx,
   return tryEvaluateBuiltinObjectSize(this, Type, Info, Result);
 }
 
-static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
-                                  EvalInfo &Info, std::string *StringResult) {
+static bool EvaluateStringAsLValue(EvalInfo &Info, const Expr *E,
+                                   QualType &CharTy, LValue &String) {
   QualType Ty = E->getType();
   if (!E->isPRValue())
     return false;
 
-  LValue String;
-  QualType CharTy;
   if (Ty->canDecayToPointerType()) {
     if (E->isGLValue()) {
       if (!EvaluateLValue(E, String, Info))
@@ -17975,8 +17981,13 @@ static bool EvaluateBuiltinStrLen(const Expr *E, 
uint64_t &Result,
   } else {
     return false;
   }
+  return true;
+}
 
-  // Fast path: if it's a string literal, search the string value.
+static const StringLiteral *StringLValueIsLiteral(EvalInfo &Info,
+                                                  LValue &String,
+                                                  QualType CharTy,
+                                                  uint64_t &Offset) {
   if (const StringLiteral *S = dyn_cast_or_null<StringLiteral>(
           String.getLValueBase().dyn_cast<const Expr *>())) {
     StringRef Str = S->getBytes();
@@ -17985,49 +17996,111 @@ static bool EvaluateBuiltinStrLen(const Expr *E, 
uint64_t &Result,
         S->getCharByteWidth() == 1 &&
         // FIXME: Add fast-path for wchar_t too.
         Info.Ctx.hasSameUnqualifiedType(CharTy, Info.Ctx.CharTy)) {
-      Str = Str.substr(Off);
-
-      StringRef::size_type Pos = Str.find(0);
-      if (Pos != StringRef::npos)
-        Str = Str.substr(0, Pos);
-
-      Result = Str.size();
-      if (StringResult)
-        *StringResult = Str;
-      return true;
+      Offset = static_cast<uint64_t>(Off);
+      return S;
     }
-
-    // Fall through to slow path.
   }
+  return nullptr;
+}
 
-  // Slow path: scan the bytes of the string looking for the terminating 0.
-  for (uint64_t Strlen = 0; /**/; ++Strlen) {
+template <typename CharAction>
+static bool IterateStringLValue(EvalInfo &Info, const Expr *E, QualType CharTy,
+                                LValue &String, CharAction &&Action) {
+  while (true) {
     APValue Char;
     if (!handleLValueToRValueConversion(Info, E, CharTy, String, Char) ||
         !Char.isInt())
       return false;
-    if (!Char.getInt()) {
-      Result = Strlen;
+    if (!Action(Char.getInt().getExtValue()))
       return true;
-    } else if (StringResult)
-      StringResult->push_back(Char.getInt().getExtValue());
     if (!HandleLValueArrayAdjustment(Info, E, String, CharTy, 1))
       return false;
   }
 }
 
-bool Expr::tryEvaluateString(ASTContext &Ctx, std::string &StringResult) const 
{
+static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
+                                  EvalInfo &Info) {
+  LValue String;
+  QualType CharTy;
+  if (!EvaluateStringAsLValue(Info, E, CharTy, String))
+    return false;
+
+  // Fast path: if it's a string literal, search the string value.
+  uint64_t Off;
+  if (const auto *S = StringLValueIsLiteral(Info, String, CharTy, Off)) {
+    StringRef Str = S->getBytes().substr(Off);
+
+    StringRef::size_type Pos = Str.find(0);
+    if (Pos != StringRef::npos)
+      Str = Str.substr(0, Pos);
+
+    Result = Str.size();
+    return true;
+  }
+
+  // Slow path: scan the bytes of the string looking for the terminating 0.
+  Result = 0;
+  return IterateStringLValue(Info, E, CharTy, String, [&](int Char) {
+    if (Char) {
+      Result++;
+      return true;
+    } else
+      return false;
+  });
+}
+
+Expr::StringEvalResult::StringEvalResult(const StringLiteral *SL,
+                                         uint64_t Offset)
+    : SL(SL), Offset(Offset) {}
+
+Expr::StringEvalResult::StringEvalResult(std::string Contents)
+    : Storage(std::move(Contents)), SL(nullptr), Offset(0) {}
+
+llvm::StringRef Expr::StringEvalResult::getString() const {
+  return SL ? SL->getBytes().substr(Offset) : Storage;
+}
+
+bool Expr::StringEvalResult::getStringLiteral(const StringLiteral *&SL,
+                                              uint64_t &Offset) const {
+  if (this->SL) {
+    SL = this->SL;
+    Offset = this->Offset;
+    return true;
+  }
+  return false;
+}
+
+std::optional<Expr::StringEvalResult>
+Expr::tryEvaluateString(ASTContext &Ctx, bool *NullTerminated) const {
+  if (NullTerminated)
+    *NullTerminated = false;
+
   Expr::EvalStatus Status;
   EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold);
-  uint64_t Result;
-  return EvaluateBuiltinStrLen(this, Result, Info, &StringResult);
-}
+  LValue String;
+  QualType CharTy;
+  if (!EvaluateStringAsLValue(Info, this, CharTy, String))
+    return {};
+
+  uint64_t Off;
+  if (const auto *S = StringLValueIsLiteral(Info, String, CharTy, Off)) {
+    if (NullTerminated)
+      *NullTerminated = true;
+    return StringEvalResult(S, Off);
+  }
+
+  std::string Result;
+  bool NTFound = IterateStringLValue(Info, this, CharTy, String, [&](int Char) 
{
+    if (Char) {
+      Result.push_back(Char);
+      return true;
+    } else
+      return false;
+  });
 
-std::optional<std::string> Expr::tryEvaluateString(ASTContext &Ctx) const {
-  std::string StringResult;
-  if (tryEvaluateString(Ctx, StringResult))
-    return StringResult;
-  return {};
+  if (NullTerminated)
+    *NullTerminated = NTFound;
+  return StringEvalResult(Result);
 }
 
 template <typename T>
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index fbe753de9ef1f..10d4c63a1da05 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -725,12 +725,13 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, 
const Expr *&UnsafeArg,
   const Expr *Fmt = Call->getArg(FmtArgIdx);
 
   if (auto *SL = dyn_cast<clang::StringLiteral>(Fmt->IgnoreParenImpCasts())) {
+    std::optional<Expr::StringEvalResult> SER;
     StringRef FmtStr;
 
     if (SL->getCharByteWidth() == 1)
       FmtStr = SL->getString();
-    else if (auto EvaledFmtStr = SL->tryEvaluateString(Ctx))
-      FmtStr = *EvaledFmtStr;
+    else if ((SER = SL->tryEvaluateString(Ctx)))
+      FmtStr = SER->getString();
     else
       goto CHECK_UNSAFE_PTR;
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index fe55dfffc1cbe..d92bb977c7546 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3478,8 +3478,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
     llvm::DILocation *TrapLocation = Builder.getCurrentDebugLocation();
     if (getDebugInfo()) {
       TrapLocation = getDebugInfo()->CreateTrapFailureMessageFor(
-          TrapLocation, *E->getArg(0)->tryEvaluateString(getContext()),
-          *E->getArg(1)->tryEvaluateString(getContext()));
+          TrapLocation,
+          E->getArg(0)->tryEvaluateString(getContext())->getString(),
+          E->getArg(1)->tryEvaluateString(getContext())->getString());
     }
     ApplyDebugLocation ApplyTrapDI(*this, TrapLocation);
     // Currently no attempt is made to prevent traps from being merged.
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 017be929ca18e..0ca41c9758af9 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -180,12 +180,14 @@ static bool checkBuiltinVerboseTrap(CallExpr *Call, Sema 
&S) {
     if (Arg->isValueDependent())
       continue;
 
-    std::optional<std::string> ArgString = Arg->tryEvaluateString(S.Context);
+    // Arguments must be pointers to constant strings, must be NUL-terminated,
+    // and cannot contain '$'.
+    bool HasNulTerminator;
+    auto ArgString = Arg->tryEvaluateString(S.Context, &HasNulTerminator);
     int DiagMsgKind = -1;
-    // Arguments must be pointers to constant strings and cannot use '$'.
-    if (!ArgString.has_value())
+    if (!(ArgString && HasNulTerminator))
       DiagMsgKind = 0;
-    else if (ArgString->find('$') != std::string::npos)
+    else if (ArgString->getString().find('$') != llvm::StringRef::npos)
       DiagMsgKind = 1;
 
     if (DiagMsgKind >= 0) {
@@ -5943,7 +5945,8 @@ enum StringLiteralConstEvalResult {
 };
 
 static StringLiteralConstEvalResult
-constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL);
+constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL,
+                         uint64_t &Offset);
 
 // Determine if an expression is a string literal or constant string.
 // If this function returns false on the arguments to a function expecting a
@@ -6330,8 +6333,9 @@ static StringLiteralCheckType checkFormatStringExpr(
     break;
   }
 
+  uint64_t EvalOffset = 0;
   const StringLiteral *FakeLiteral = nullptr;
-  switch (constEvalStringAsLiteral(S, E, FakeLiteral)) {
+  switch (constEvalStringAsLiteral(S, E, FakeLiteral, EvalOffset)) {
   case SLCER_NotEvaluated:
     return SLCT_NotALiteral;
 
@@ -6348,51 +6352,49 @@ static StringLiteralCheckType checkFormatStringExpr(
   case SLCER_Evaluated:
     InFunctionCall = false;
     E = FakeLiteral;
+    Offset = EvalOffset;
     goto tryAgain;
   }
 }
 
 static StringLiteralConstEvalResult
-constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL) {
-  // As a last resort, try to constant-evaluate the format string. If it
-  // evaluates to a string literal in the first place, we can point to that
-  // string literal in source and use that.
-  Expr::EvalResult Result;
-  if (E->EvaluateAsRValue(Result, S.Context) && Result.Val.isLValue()) {
-    const auto *LVE = Result.Val.getLValueBase().dyn_cast<const Expr *>();
-    if (auto *BaseSL = dyn_cast_or_null<StringLiteral>(LVE)) {
-      SL = BaseSL;
-      return SLCER_Evaluated;
-    }
-  }
-
-  // Otherwise, try to evaluate the expression as a string constant.
-  std::string FormatString;
-  if (!E->tryEvaluateString(S.Context, FormatString)) {
-    return FormatString.empty() ? SLCER_NotEvaluated : SLCER_NotNullTerminated;
-  }
-
+constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL,
+                         uint64_t &Offset) {
+  // As a last resort, try to constant-evaluate the format string.
+  bool HasNul;
+  auto SER = E->tryEvaluateString(S.Context, &HasNul);
+  if (!SER)
+    return SLCER_NotEvaluated;
+  if (!HasNul)
+    return SLCER_NotNullTerminated;
+
+  // If it evaluates to a string literal in the first place, we can point to
+  // that string literal in source and use that.
+  if (SER->getStringLiteral(SL, Offset))
+    return SLCER_Evaluated;
+
+  // Otherwise, lop that string into a scratch buffer, create a string literal
+  // and then go with that.
   std::unique_ptr<llvm::MemoryBuffer> MemBuf;
   {
     llvm::SmallString<80> EscapedString;
     {
       llvm::raw_svector_ostream OS(EscapedString);
       OS << '"';
-      OS.write_escaped(FormatString);
+      OS.write_escaped(SER->getString());
       OS << '"';
     }
     MemBuf.reset(new llvm::SmallVectorMemoryBuffer(std::move(EscapedString),
                                                    "<scratch space>", true));
   }
 
-  // Plop that string into a scratch buffer, create a string literal and then
-  // go with that.
   auto ScratchFile = S.getSourceManager().createFileID(std::move(MemBuf));
   SourceLocation Begin = 
S.getSourceManager().getLocForStartOfFile(ScratchFile);
-  QualType SLType = S.Context.getStringLiteralArrayType(S.Context.CharTy,
-                                                        FormatString.length());
-  SL = StringLiteral::Create(S.Context, FormatString,
+  QualType SLType = S.Context.getStringLiteralArrayType(
+      S.Context.CharTy, SER->getString().size());
+  SL = StringLiteral::Create(S.Context, SER->getString(),
                              StringLiteralKind::Ordinary, false, SLType, 
Begin);
+  Offset = 0;
   return SLCER_Evaluated;
 }
 
@@ -6481,6 +6483,11 @@ bool Sema::CheckFormatArguments(ArrayRef<const Expr *> 
Args,
                                 VariadicCallType CallType, SourceLocation Loc,
                                 SourceRange Range,
                                 llvm::SmallBitVector &CheckedVarArgs) {
+  // As a last resort, Clang attempts to evaluate the format string as a
+  // constant, which is expensive. Before we go down that route, check that
+  // the warnings are at least enabled at Loc, which in the common case points
+  // at the opening parenthesis of the function call.
+
   // CHECK: printf/scanf-like function is called with no format string.
   if (format_idx >= Args.size()) {
     Diag(Loc, diag::warn_missing_format_string) << Range;
@@ -6493,14 +6500,9 @@ bool Sema::CheckFormatArguments(ArrayRef<const Expr *> 
Args,
   //
   // Dynamically generated format strings are difficult to
   // automatically vet at compile time.  Requiring that format strings
-  // are string literals: (1) permits the checking of format strings by
-  // the compiler and thereby (2) can practically remove the source of
-  // many format string exploits.
-
-  // Format string can be either ObjC string (e.g. @"%d") or
-  // C string (e.g. "%d")
-  // ObjC string uses the same format specifiers as C string, so we can use
-  // the same format string checking logic for both ObjC and C strings.
+  // can evaluate to constant strings: (1) permits the checking of format
+  // strings by the compiler and thereby (2) can practically remove the source
+  // of many format string exploits.
   UncoveredArgHandler UncoveredArg;
   StringLiteralCheckType CT = checkFormatStringExpr(
       *this, ReferenceFormatString, OrigFormatExpr, Args, APK, format_idx,
diff --git a/clang/test/SemaCXX/verbose-trap.cpp 
b/clang/test/SemaCXX/verbose-trap.cpp
index 2503f9860d9c3..5562fb453948c 100644
--- a/clang/test/SemaCXX/verbose-trap.cpp
+++ b/clang/test/SemaCXX/verbose-trap.cpp
@@ -15,6 +15,9 @@ char const constMsg3[] = "hello";
 
 template <const char * const category, const char * const reason>
 void f(const char * arg) {
+  const char buf[] = {'a', 'b', 'c'};
+  const char buf_nt1[] = {'a', 'b', 'c', 0};
+  const char buf_nt2[] = {'a', 'b', 0, 'c'};
   __builtin_verbose_trap("cat1", "Arbitrary string literals can be used!");
   __builtin_verbose_trap(" cat1 ", "Argument_must_not_be_null");
   __builtin_verbose_trap("cat" "egory1", "hello" "world");
@@ -24,9 +27,12 @@ void f(const char * arg) {
   __builtin_verbose_trap(); // expected-error {{too few arguments}}
   __builtin_verbose_trap(""); // expected-error {{too few arguments}}
   __builtin_verbose_trap("", "", ""); // expected-error {{too many arguments}}
-  __builtin_verbose_trap("", 0); // expected-error {{argument to 
__builtin_verbose_trap must be a pointer to a constant string}}
+  __builtin_verbose_trap("", 0); // expected-error {{argument to 
__builtin_verbose_trap must be a pointer to a constant NUL-terminated string}}
   __builtin_verbose_trap(1, ""); // expected-error {{cannot initialize a 
parameter of type 'const char *' with an rvalue of type 'int'}}
-  __builtin_verbose_trap(arg, ""); // expected-error {{argument to 
__builtin_verbose_trap must be a pointer to a constant string}}
+  __builtin_verbose_trap(arg, ""); // expected-error {{argument to 
__builtin_verbose_trap must be a pointer to a constant NUL-terminated string}}
+  __builtin_verbose_trap(buf, ""); // expected-error {{argument to 
__builtin_verbose_trap must be a pointer to a constant NUL-terminated string}}
+  __builtin_verbose_trap(buf_nt1, "");
+  __builtin_verbose_trap(buf_nt2, "");
   __builtin_verbose_trap("cat$1", "hel$lo"); // expected-error 2 {{argument to 
__builtin_verbose_trap must not contain $}}
   __builtin_verbose_trap(category, reason);
   __builtin_verbose_trap(u8"cat1", u8"hello");

>From 504cdb609d50414c6825a0ced8afb3a91146d4c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A9lix=20Cloutier?= <fclout...@apple.com>
Date: Tue, 22 Apr 2025 14:12:17 -0700
Subject: [PATCH 3/3] Address naming/documentation-level review feedback

---
 clang/docs/ReleaseNotes.rst                   | 35 +++++++++++++++++--
 .../clang/Basic/DiagnosticSemaKinds.td        |  2 +-
 clang/lib/AST/ExprConstant.cpp                | 11 +++---
 clang/lib/Sema/SemaChecking.cpp               | 25 ++++++++-----
 4 files changed, 57 insertions(+), 16 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 05566d66a65d2..8529e5b991cba 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -265,8 +265,39 @@ related warnings within the method body.
   ``format_matches`` accepts an example valid format string as its third
   argument. For more information, see the Clang attributes documentation.
 
-- Format string checking now supports the compile-time evaluation of format
-  strings as a fallback mechanism.
+- Clang can now verify format strings that can be constant-folded even if they
+  do not resolve to a string literal. For instance, all of these can now be
+  verified:
+
+  .. code-block:: c++
+
+    const char format[] = {'h', 'e', 'l', 'l', 'o', ' ', '%', 's', 0};
+    printf(format, "world");
+    // no warning
+    
+    printf(format, 123);
+    // warning: format specifies type 'char *' but the argument has type 'int'
+
+    printf(("%"s + "i"s).c_str(), "world");
+    // warning: format specifies type 'int' but the argument has type 'char *'
+  
+  When the format expression does not evaluate to a string literal, Clang
+  points diagnostics into a pseudo-file called ``<scratch space>`` that 
contains
+  the format string literal as it evaluated, like so:
+
+  .. code-block:: text
+
+    example.c:6:17: warning: format specifies type 'char *' but the argument 
has type 'int' [-Wformat]
+        6 |     printf(format, 123);
+          |            ~~~~~~  ^~~
+    <scratch space>:1:4: note: format string resolved to a constant string
+        1 | "hello %s"
+          |        ^~
+          |        %d
+  
+  This may mean that format strings which were previously unverified (or which
+  triggered ``-Wformat-nonliteral``) are now verified by ``-Wformat`` and its
+  allies.
 
 - Introduced a new statement attribute ``[[clang::atomic]]`` that enables
   fine-grained control over atomic code generation on a per-statement basis.
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index e3b73c149b3d5..57287bb8e0115 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10171,7 +10171,7 @@ def warn_format_bool_as_character : Warning<
   InGroup<Format>;
 def note_format_string_defined : Note<"format string is defined here">;
 def note_format_string_evaluated_to : Note<
-  "format string was constant-evaluated">;
+  "format string resolved to a constant string">;
 def note_format_fix_specifier : Note<"did you mean to use '%0'?">;
 def note_printf_c_str: Note<"did you mean to call the %0 method?">;
 def note_format_security_fixit: Note<
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 6c5e8260131bb..e5f41a9f9b294 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -17974,14 +17974,17 @@ static bool EvaluateStringAsLValue(EvalInfo &Info, 
const Expr *E,
       String.addArray(Info, E, CAT);
     else
       String.addUnsizedArray(Info, E, CharTy);
-  } else if (Ty->hasPointerRepresentation()) {
+    return true;
+  }
+
+  if (Ty->hasPointerRepresentation()) {
     if (!EvaluatePointer(E, String, Info))
       return false;
     CharTy = Ty->getPointeeType();
-  } else {
-    return false;
+    return true;
   }
-  return true;
+
+  return false;
 }
 
 static const StringLiteral *StringLValueIsLiteral(EvalInfo &Info,
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 0ca41c9758af9..6184fe384fee5 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -5938,15 +5938,22 @@ static void CheckFormatString(
     llvm::SmallBitVector &CheckedVarArgs, UncoveredArgHandler &UncoveredArg,
     bool IgnoreStringsWithoutSpecifiers);
 
-enum StringLiteralConstEvalResult {
+enum StringLiteralConstantEvaluationResult {
   SLCER_NotEvaluated,
   SLCER_NotNullTerminated,
   SLCER_Evaluated,
 };
 
-static StringLiteralConstEvalResult
-constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL,
-                         uint64_t &Offset);
+/// Attempt to fold \c E into a constant string that \c checkFormatStringExpr
+/// can use. If \c E folds to a string literal, that string literal will be 
used
+/// for diagnostics. If \c E has a constant string value but it does not fold 
to
+/// a literal (for instance, ("%"s + "i"s).c_str() constant-folds to "%i"), a
+/// <scratch space> pseudo-source file will be allocated, containing a string
+/// literal representation of the constant string, and format diagnostics will
+/// point to it.
+static StringLiteralConstantEvaluationResult
+EvaluateStringAndCreateLiteral(Sema &S, const Expr *E, const StringLiteral 
*&SL,
+                               uint64_t &Offset);
 
 // Determine if an expression is a string literal or constant string.
 // If this function returns false on the arguments to a function expecting a
@@ -6335,7 +6342,7 @@ static StringLiteralCheckType checkFormatStringExpr(
 
   uint64_t EvalOffset = 0;
   const StringLiteral *FakeLiteral = nullptr;
-  switch (constEvalStringAsLiteral(S, E, FakeLiteral, EvalOffset)) {
+  switch (EvaluateStringAndCreateLiteral(S, E, FakeLiteral, EvalOffset)) {
   case SLCER_NotEvaluated:
     return SLCT_NotALiteral;
 
@@ -6357,9 +6364,9 @@ static StringLiteralCheckType checkFormatStringExpr(
   }
 }
 
-static StringLiteralConstEvalResult
-constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL,
-                         uint64_t &Offset) {
+static StringLiteralConstantEvaluationResult
+EvaluateStringAndCreateLiteral(Sema &S, const Expr *E, const StringLiteral 
*&SL,
+                               uint64_t &Offset) {
   // As a last resort, try to constant-evaluate the format string.
   bool HasNul;
   auto SER = E->tryEvaluateString(S.Context, &HasNul);
@@ -6388,7 +6395,7 @@ constEvalStringAsLiteral(Sema &S, const Expr *E, const 
StringLiteral *&SL,
                                                    "<scratch space>", true));
   }
 
-  auto ScratchFile = S.getSourceManager().createFileID(std::move(MemBuf));
+  FileID ScratchFile = S.getSourceManager().createFileID(std::move(MemBuf));
   SourceLocation Begin = 
S.getSourceManager().getLocForStartOfFile(ScratchFile);
   QualType SLType = S.Context.getStringLiteralArrayType(
       S.Context.CharTy, SER->getString().size());

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to