llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: None (apple-fcloutier) <details> <summary>Changes</summary> I [asked on the forums](https://discourse.llvm.org/t/should-attribute-format-checking-try-to-const-evaluate-strings/85854/4) and people were generally supportive of the idea, so: Clang's -Wformat checker can see through an inconsistent set of operations. We can fall back to the recently-updated constant string evaluation infrastructure when Clang's initial evaluation fails for a second chance at figuring out what the format string is intended to be. This enables analyzing format strings that were built at compile-time with std::string and other constexpr-capable types in C++, as long as all pieces are also constexpr-visible, and a number of other patterns. As a side effect, it also enables `tryEvaluateString` on char arrays (rather than only char pointers). Radar-ID: rdar://99940060 --- Full diff: https://github.com/llvm/llvm-project/pull/135864.diff 7 Files Affected: - (modified) clang/docs/ReleaseNotes.rst (+3) - (modified) clang/include/clang/AST/Expr.h (+8-1) - (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (+2) - (modified) clang/lib/AST/ExprConstant.cpp (+32-8) - (modified) clang/lib/Sema/SemaChecking.cpp (+94-50) - (modified) clang/test/Sema/format-strings.c (+14) - (modified) clang/test/SemaCXX/format-strings.cpp (+74) ``````````diff diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 77bf3355af9da..05566d66a65d2 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -265,6 +265,9 @@ related warnings within the method body. ``format_matches`` accepts an example valid format string as its third argument. For more information, see the Clang attributes documentation. +- Format string checking now supports the compile-time evaluation of format + strings as a fallback mechanism. + - Introduced a new statement attribute ``[[clang::atomic]]`` that enables fine-grained control over atomic code generation on a per-statement basis. Supported options include ``[no_]remote_memory``, diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index 20f70863a05b3..78eda8bc3c43e 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -791,7 +791,14 @@ class Expr : public ValueStmt { const Expr *PtrExpression, ASTContext &Ctx, EvalResult &Status) const; - /// If the current Expr can be evaluated to a pointer to a null-terminated + /// Fill \c Into with the first characters that can be constant-evaluated + /// from this \c Expr . When encountering a null character, stop and return + /// \c true (the null is not returned in \c Into ). Return \c false if + /// evaluation runs off the end of the constant-evaluated string before it + /// encounters a null character. + bool tryEvaluateString(ASTContext &Ctx, std::string &Into) const; + + /// If the current \c Expr can be evaluated to a pointer to a null-terminated /// constant string, return the constant string (without the terminating /// null). std::optional<std::string> tryEvaluateString(ASTContext &Ctx) const; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 3cb2731488fab..4139ff2737c76 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10170,6 +10170,8 @@ def warn_format_bool_as_character : Warning< "using '%0' format specifier, but argument has boolean value">, InGroup<Format>; def note_format_string_defined : Note<"format string is defined here">; +def note_format_string_evaluated_to : Note< + "format string was constant-evaluated">; def note_format_fix_specifier : Note<"did you mean to use '%0'?">; def note_printf_c_str: Note<"did you mean to call the %0 method?">; def note_format_security_fixit: Note< diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 80ece3c4ed7e1..fec92edf49096 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -17945,15 +17945,36 @@ bool Expr::tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result, EvalInfo &Info, std::string *StringResult) { - if (!E->getType()->hasPointerRepresentation() || !E->isPRValue()) + QualType Ty = E->getType(); + if (!E->isPRValue()) return false; LValue String; - - if (!EvaluatePointer(E, String, Info)) + QualType CharTy; + if (Ty->canDecayToPointerType()) { + if (E->isGLValue()) { + if (!EvaluateLValue(E, String, Info)) + return false; + } else { + APValue &Value = Info.CurrentCall->createTemporary( + E, Ty, ScopeKind::FullExpression, String); + if (!EvaluateInPlace(Value, Info, String, E)) + return false; + } + // The result is a pointer to the first element of the array. + auto *AT = Info.Ctx.getAsArrayType(Ty); + CharTy = AT->getElementType(); + if (auto *CAT = dyn_cast<ConstantArrayType>(AT)) + String.addArray(Info, E, CAT); + else + String.addUnsizedArray(Info, E, CharTy); + } else if (Ty->hasPointerRepresentation()) { + if (!EvaluatePointer(E, String, Info)) + return false; + CharTy = Ty->getPointeeType(); + } else { return false; - - QualType CharTy = E->getType()->getPointeeType(); + } // Fast path: if it's a string literal, search the string value. if (const StringLiteral *S = dyn_cast_or_null<StringLiteral>( @@ -17995,13 +18016,16 @@ static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result, } } -std::optional<std::string> Expr::tryEvaluateString(ASTContext &Ctx) const { +bool Expr::tryEvaluateString(ASTContext &Ctx, std::string &StringResult) const { Expr::EvalStatus Status; EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold); uint64_t Result; - std::string StringResult; + return EvaluateBuiltinStrLen(this, Result, Info, &StringResult); +} - if (EvaluateBuiltinStrLen(this, Result, Info, &StringResult)) +std::optional<std::string> Expr::tryEvaluateString(ASTContext &Ctx) const { + std::string StringResult; + if (tryEvaluateString(Ctx, StringResult)) return StringResult; return {}; } diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index bffd0dd461d3d..017be929ca18e 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -98,6 +98,7 @@ #include "llvm/Support/Locale.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SaveAndRestore.h" +#include "llvm/Support/SmallVectorMemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/RISCVTargetParser.h" #include "llvm/TargetParser/Triple.h" @@ -5935,8 +5936,14 @@ static void CheckFormatString( llvm::SmallBitVector &CheckedVarArgs, UncoveredArgHandler &UncoveredArg, bool IgnoreStringsWithoutSpecifiers); -static const Expr *maybeConstEvalStringLiteral(ASTContext &Context, - const Expr *E); +enum StringLiteralConstEvalResult { + SLCER_NotEvaluated, + SLCER_NotNullTerminated, + SLCER_Evaluated, +}; + +static StringLiteralConstEvalResult +constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL); // Determine if an expression is a string literal or constant string. // If this function returns false on the arguments to a function expecting a @@ -5968,14 +5975,9 @@ static StringLiteralCheckType checkFormatStringExpr( switch (E->getStmtClass()) { case Stmt::InitListExprClass: - // Handle expressions like {"foobar"}. - if (const clang::Expr *SLE = maybeConstEvalStringLiteral(S.Context, E)) { - return checkFormatStringExpr( - S, ReferenceFormatString, SLE, Args, APK, format_idx, firstDataArg, - Type, CallType, /*InFunctionCall*/ false, CheckedVarArgs, - UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers); - } - return SLCT_NotALiteral; + // try to constant-evaluate the string + break; + case Stmt::BinaryConditionalOperatorClass: case Stmt::ConditionalOperatorClass: { // The expression is a literal if both sub-expressions were, and it was @@ -6066,10 +6068,9 @@ static StringLiteralCheckType checkFormatStringExpr( if (InitList->isStringLiteralInit()) Init = InitList->getInit(0)->IgnoreParenImpCasts(); } - return checkFormatStringExpr( - S, ReferenceFormatString, Init, Args, APK, format_idx, - firstDataArg, Type, CallType, - /*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg, Offset); + InFunctionCall = false; + E = Init; + goto tryAgain; } } @@ -6142,11 +6143,9 @@ static StringLiteralCheckType checkFormatStringExpr( } return SLCT_UncheckedLiteral; } - return checkFormatStringExpr( - S, ReferenceFormatString, PVFormatMatches->getFormatString(), - Args, APK, format_idx, firstDataArg, Type, CallType, - /*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg, - Offset, IgnoreStringsWithoutSpecifiers); + E = PVFormatMatches->getFormatString(); + InFunctionCall = false; + goto tryAgain; } } @@ -6214,20 +6213,13 @@ static StringLiteralCheckType checkFormatStringExpr( unsigned BuiltinID = FD->getBuiltinID(); if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString || BuiltinID == Builtin::BI__builtin___NSStringMakeConstantString) { - const Expr *Arg = CE->getArg(0); - return checkFormatStringExpr( - S, ReferenceFormatString, Arg, Args, APK, format_idx, - firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs, - UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers); + E = CE->getArg(0); + goto tryAgain; } } } - if (const Expr *SLE = maybeConstEvalStringLiteral(S.Context, E)) - return checkFormatStringExpr( - S, ReferenceFormatString, SLE, Args, APK, format_idx, firstDataArg, - Type, CallType, /*InFunctionCall*/ false, CheckedVarArgs, - UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers); - return SLCT_NotALiteral; + // try to constant-evaluate the string + break; } case Stmt::ObjCMessageExprClass: { const auto *ME = cast<ObjCMessageExpr>(E); @@ -6248,11 +6240,8 @@ static StringLiteralCheckType checkFormatStringExpr( IgnoreStringsWithoutSpecifiers = true; } - const Expr *Arg = ME->getArg(FA->getFormatIdx().getASTIndex()); - return checkFormatStringExpr( - S, ReferenceFormatString, Arg, Args, APK, format_idx, firstDataArg, - Type, CallType, InFunctionCall, CheckedVarArgs, UncoveredArg, - Offset, IgnoreStringsWithoutSpecifiers); + E = ME->getArg(FA->getFormatIdx().getASTIndex()); + goto tryAgain; } } @@ -6314,7 +6303,8 @@ static StringLiteralCheckType checkFormatStringExpr( } } - return SLCT_NotALiteral; + // try to constant-evaluate the string + break; } case Stmt::UnaryOperatorClass: { const UnaryOperator *UnaOp = cast<UnaryOperator>(E); @@ -6331,26 +6321,79 @@ static StringLiteralCheckType checkFormatStringExpr( } } - return SLCT_NotALiteral; + // try to constant-evaluate the string + break; } default: + // try to constant-evaluate the string + break; + } + + const StringLiteral *FakeLiteral = nullptr; + switch (constEvalStringAsLiteral(S, E, FakeLiteral)) { + case SLCER_NotEvaluated: return SLCT_NotALiteral; + + case SLCER_NotNullTerminated: + S.Diag(Args[format_idx]->getBeginLoc(), + diag::warn_printf_format_string_not_null_terminated) + << Args[format_idx]->getSourceRange(); + if (!InFunctionCall) + S.Diag(E->getBeginLoc(), diag::note_format_string_defined); + // Stop checking, as this might just mean we're missing a chunk of the + // format string and there would be other spurious format issues. + return SLCT_UncheckedLiteral; + + case SLCER_Evaluated: + InFunctionCall = false; + E = FakeLiteral; + goto tryAgain; } } -// If this expression can be evaluated at compile-time, -// check if the result is a StringLiteral and return it -// otherwise return nullptr -static const Expr *maybeConstEvalStringLiteral(ASTContext &Context, - const Expr *E) { +static StringLiteralConstEvalResult +constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL) { + // As a last resort, try to constant-evaluate the format string. If it + // evaluates to a string literal in the first place, we can point to that + // string literal in source and use that. Expr::EvalResult Result; - if (E->EvaluateAsRValue(Result, Context) && Result.Val.isLValue()) { + if (E->EvaluateAsRValue(Result, S.Context) && Result.Val.isLValue()) { const auto *LVE = Result.Val.getLValueBase().dyn_cast<const Expr *>(); - if (isa_and_nonnull<StringLiteral>(LVE)) - return LVE; + if (auto *BaseSL = dyn_cast_or_null<StringLiteral>(LVE)) { + SL = BaseSL; + return SLCER_Evaluated; + } } - return nullptr; + + // Otherwise, try to evaluate the expression as a string constant. + std::string FormatString; + if (!E->tryEvaluateString(S.Context, FormatString)) { + return FormatString.empty() ? SLCER_NotEvaluated : SLCER_NotNullTerminated; + } + + std::unique_ptr<llvm::MemoryBuffer> MemBuf; + { + llvm::SmallString<80> EscapedString; + { + llvm::raw_svector_ostream OS(EscapedString); + OS << '"'; + OS.write_escaped(FormatString); + OS << '"'; + } + MemBuf.reset(new llvm::SmallVectorMemoryBuffer(std::move(EscapedString), + "<scratch space>", true)); + } + + // Plop that string into a scratch buffer, create a string literal and then + // go with that. + auto ScratchFile = S.getSourceManager().createFileID(std::move(MemBuf)); + SourceLocation Begin = S.getSourceManager().getLocForStartOfFile(ScratchFile); + QualType SLType = S.Context.getStringLiteralArrayType(S.Context.CharTy, + FormatString.length()); + SL = StringLiteral::Create(S.Context, FormatString, + StringLiteralKind::Ordinary, false, SLType, Begin); + return SLCER_Evaluated; } StringRef Sema::GetFormatStringTypeName(Sema::FormatStringType FST) { @@ -6973,10 +7016,11 @@ void CheckFormatHandler::EmitFormatDiagnostic( S.Diag(IsStringLocation ? ArgumentExpr->getExprLoc() : Loc, PDiag) << ArgumentExpr->getSourceRange(); - const Sema::SemaDiagnosticBuilder &Note = - S.Diag(IsStringLocation ? Loc : StringRange.getBegin(), - diag::note_format_string_defined); - + SourceLocation DiagLoc = IsStringLocation ? Loc : StringRange.getBegin(); + unsigned DiagID = S.getSourceManager().isWrittenInScratchSpace(DiagLoc) + ? diag::note_format_string_evaluated_to + : diag::note_format_string_defined; + const Sema::SemaDiagnosticBuilder &Note = S.Diag(DiagLoc, DiagID); Note << StringRange; Note << FixIt; } diff --git a/clang/test/Sema/format-strings.c b/clang/test/Sema/format-strings.c index af30ad5d15fe2..a94e0619ce843 100644 --- a/clang/test/Sema/format-strings.c +++ b/clang/test/Sema/format-strings.c @@ -3,6 +3,11 @@ // RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem %S/Inputs -triple=x86_64-unknown-fuchsia %s // RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem %S/Inputs -triple=x86_64-linux-android %s +// expected-note@-5{{format string was constant-evaluated}} +// ^^^ there will be a <scratch space> SourceLocation caused by the +// test_consteval_init_array test, that -verify treats as if it showed up at +// line 1 of this file. + #include <stdarg.h> #include <stddef.h> #define __need_wint_t @@ -900,3 +905,12 @@ void test_promotion(void) { // pointers printf("%s", i); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}} } + +void test_consteval_init_array(void) { + const char buf_not_terminated[] = {'%', 55 * 2 + 5, '\n'}; // expected-note{{format string is defined here}} + printf(buf_not_terminated, "hello"); // expected-warning{{format string is not null-terminated}} + + const char buf[] = {'%', 55 * 2 + 5, '\n', 0}; + printf(buf, "hello"); // no-warning + printf(buf, 123); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}} +} diff --git a/clang/test/SemaCXX/format-strings.cpp b/clang/test/SemaCXX/format-strings.cpp index 48cf23999a94f..7b04ea7d8bc75 100644 --- a/clang/test/SemaCXX/format-strings.cpp +++ b/clang/test/SemaCXX/format-strings.cpp @@ -1,6 +1,14 @@ // RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso -Wformat-pedantic -fblocks %s // RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso -fblocks -std=c++98 %s // RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso -Wformat-pedantic -fblocks -std=c++11 %s +// RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso -Wformat-pedantic -fblocks -std=c++20 %s + +#if __cplusplus >= 202000l +// expected-note@-6{{format string was constant-evaluated}} +// ^^^ there will be a <scratch space> SourceLocation caused by the +// test_constexpr_string test, that -verify treats as if it showed up at +// line 1 of this file. +#endif #include <stdarg.h> @@ -238,3 +246,69 @@ void f(Scoped1 S1, Scoped2 S2) { } #endif + +#if __cplusplus >= 202000L +class my_string { + char *data; + unsigned size; + +public: + template<unsigned N> + constexpr my_string(const char (&literal)[N]) { + data = new char[N+1]; + for (size = 0; size < N; ++size) { + data[size] = literal[size]; + if (data[size] == 0) + break; + } + data[size] = 0; + } + + my_string(const my_string &) = delete; + + constexpr my_string(my_string &&that) { + data = that.data; + size = that.size; + that.data = nullptr; + that.size = 0; + } + + constexpr ~my_string() { + delete[] data; + } + + template<unsigned N> + constexpr void append(const char (&literal)[N]) { + char *cat = new char[size + N + 1]; + char *tmp = cat; + for (unsigned i = 0; i < size; ++i) { + *tmp++ = data[i]; + } + for (unsigned i = 0; i < N; ++i) { + *tmp = literal[i]; + if (*tmp == 0) + break; + ++tmp; + } + *tmp = 0; + delete[] data; + size = tmp - cat; + data = cat; + } + + constexpr const char *c_str() const { + return data; + } +}; + +constexpr my_string const_string() { + my_string str("hello %s"); + str.append(", %d"); + return str; +} + +void test_constexpr_string() { + printf(const_string().c_str(), "hello", 123); // no-warning + printf(const_string().c_str(), 123, 456); // expected-warning {{format specifies type 'char *' but the argument has type 'int'}} +} +#endif `````````` </details> https://github.com/llvm/llvm-project/pull/135864 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits