llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-debuginfo @llvm/pr-subscribers-lldb Author: Rahul Joshi (jurahul) <details> <summary>Changes</summary> - Change formatv() to validate that the number of arguments passed matches number of replacement fields in the format string. - When the format string is a literal, this failure can be easily fixed, but when it's dynamically generated, it may not be as straightforward. - So changed existing formatv() to only accept literal strings as a format string and enable argument count validation for that case only. - Add a `formatvv` variant that allows non-literal format strings and disables argument count verification (the additional v will stand for "variable format string"). --- Patch is 39.55 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/105745.diff 15 Files Affected: - (modified) clang/lib/StaticAnalyzer/Checkers/CheckPlacementNew.cpp (+1-1) - (modified) clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp (+1-1) - (modified) lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp (+1-1) - (modified) llvm/include/llvm/Support/FormatVariadic.h (+37-17) - (modified) llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp (+1-2) - (modified) llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp (+4-4) - (modified) llvm/lib/Support/FormatVariadic.cpp (+102-70) - (modified) llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp (+2-2) - (modified) llvm/unittests/Support/FormatVariadicTest.cpp (+86-29) - (modified) llvm/utils/TableGen/IntrinsicEmitter.cpp (+2-2) - (modified) mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp (+2-2) - (modified) mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp (+1-1) - (modified) mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp (+6-7) - (modified) mlir/tools/mlir-tblgen/OpFormatGen.cpp (+6-8) - (modified) mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp (+1-2) ``````````diff diff --git a/clang/lib/StaticAnalyzer/Checkers/CheckPlacementNew.cpp b/clang/lib/StaticAnalyzer/Checkers/CheckPlacementNew.cpp index 99e11a15c08dc2..1b89951397cfb1 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CheckPlacementNew.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CheckPlacementNew.cpp @@ -131,7 +131,7 @@ bool PlacementNewChecker::checkPlaceCapacityIsSufficient( "Storage provided to placement new is only {0} bytes, " "whereas the allocated array type requires more space for " "internal needs", - SizeOfPlaceCI->getValue(), SizeOfTargetCI->getValue())); + SizeOfPlaceCI->getValue())); else Msg = std::string(llvm::formatv( "Storage provided to placement new is only {0} bytes, " diff --git a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp index 8f4bd17afc8581..60c035612dcd44 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp @@ -1401,7 +1401,7 @@ void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call, ErrnoNote = llvm::formatv("After calling '{0}' {1}", FunctionName, ErrnoNote); } else { - CaseNote = llvm::formatv(Case.getNote().str().c_str(), FunctionName); + CaseNote = llvm::formatvv(Case.getNote().str().c_str(), FunctionName); } const SVal RV = Call.getReturnValue(); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp index feb72f6244a18c..cdb266f70f0e3e 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp @@ -655,7 +655,7 @@ DWARFUnit::GetDIE(dw_offset_t die_offset) { if (!ContainsDIEOffset(die_offset)) { GetSymbolFileDWARF().GetObjectFile()->GetModule()->ReportError( - "GetDIE for DIE {0:x16} is outside of its CU {0:x16}", die_offset, + "GetDIE for DIE {0:x16} is outside of its CU {1:x16}", die_offset, GetOffset()); return DWARFDIE(); // Not found } diff --git a/llvm/include/llvm/Support/FormatVariadic.h b/llvm/include/llvm/Support/FormatVariadic.h index 595f2cf559a428..b42e24646b31b5 100644 --- a/llvm/include/llvm/Support/FormatVariadic.h +++ b/llvm/include/llvm/Support/FormatVariadic.h @@ -66,24 +66,24 @@ struct ReplacementItem { class formatv_object_base { protected: StringRef Fmt; + bool Validate; ArrayRef<support::detail::format_adapter *> Adapters; - static bool consumeFieldLayout(StringRef &Spec, AlignStyle &Where, - size_t &Align, char &Pad); - - static std::pair<ReplacementItem, StringRef> - splitLiteralAndReplacement(StringRef Fmt); - - formatv_object_base(StringRef Fmt, + formatv_object_base(StringRef Fmt, bool Validate, ArrayRef<support::detail::format_adapter *> Adapters) - : Fmt(Fmt), Adapters(Adapters) {} + : Fmt(Fmt), Validate(Validate), Adapters(Adapters) {} formatv_object_base(formatv_object_base const &rhs) = delete; formatv_object_base(formatv_object_base &&rhs) = default; public: void format(raw_ostream &S) const { - for (auto &R : parseFormatString(Fmt)) { + const auto [Replacements, IsValid] = + parseFormatString(S, Fmt, Adapters.size(), Validate); + if (!IsValid) + return; + + for (const auto &R : Replacements) { if (R.Type == ReplacementType::Empty) continue; if (R.Type == ReplacementType::Literal) { @@ -101,9 +101,13 @@ class formatv_object_base { Align.format(S, R.Options); } } - static SmallVector<ReplacementItem, 2> parseFormatString(StringRef Fmt); - static std::optional<ReplacementItem> parseReplacementItem(StringRef Spec); + // Parse format string and return the array of replacement items. If there is + // an error in the format string, return false for the second member of the + // pair, and print the error message to `S`. + static std::pair<SmallVector<ReplacementItem, 2>, bool> + parseFormatString(raw_ostream &S, StringRef Fmt, size_t NumArgs, + bool Validate); std::string str() const { std::string Result; @@ -149,8 +153,8 @@ template <typename Tuple> class formatv_object : public formatv_object_base { }; public: - formatv_object(StringRef Fmt, Tuple &&Params) - : formatv_object_base(Fmt, ParameterPointers), + formatv_object(StringRef Fmt, bool ValidateNumArgs, Tuple &&Params) + : formatv_object_base(Fmt, ValidateNumArgs, ParameterPointers), Parameters(std::move(Params)) { ParameterPointers = std::apply(create_adapters(), Parameters); } @@ -174,7 +178,7 @@ template <typename Tuple> class formatv_object : public formatv_object_base { // // rep_field ::= "{" index ["," layout] [":" format] "}" // index ::= <non-negative integer> -// layout ::= [[[char]loc]width] +// layout ::= [[[pad]loc]width] // format ::= <any string not containing "{" or "}"> // char ::= <any character except "{" or "}"> // loc ::= "-" | "=" | "+" @@ -187,7 +191,7 @@ template <typename Tuple> class formatv_object : public formatv_object_base { // format - A type-dependent string used to provide additional options to // the formatting operation. Refer to the documentation of the // various individual format providers for per-type options. -// char - The padding character. Defaults to ' ' (space). Only valid if +// pad - The padding character. Defaults to ' ' (space). Only valid if // `loc` is also specified. // loc - Where to print the formatted text within the field. Only valid if // `width` is also specified. @@ -247,6 +251,8 @@ template <typename Tuple> class formatv_object : public formatv_object_base { // assertion. Otherwise, it will try to do something reasonable, but in general // the details of what that is are undefined. // + +// formatv() with validation enabled. template <typename... Ts> inline auto formatv(const char *Fmt, Ts &&...Vals) -> formatv_object<decltype(std::make_tuple( @@ -254,8 +260,22 @@ inline auto formatv(const char *Fmt, Ts &&...Vals) using ParamTuple = decltype(std::make_tuple( support::detail::build_format_adapter(std::forward<Ts>(Vals))...)); return formatv_object<ParamTuple>( - Fmt, std::make_tuple(support::detail::build_format_adapter( - std::forward<Ts>(Vals))...)); + Fmt, /*Validate=*/true, + std::make_tuple( + support::detail::build_format_adapter(std::forward<Ts>(Vals))...)); +} + +// formatvv() perform no argument/format string validation. +template <typename... Ts> +inline auto formatvv(const char *Fmt, Ts &&...Vals) + -> formatv_object<decltype(std::make_tuple( + support::detail::build_format_adapter(std::forward<Ts>(Vals))...))> { + using ParamTuple = decltype(std::make_tuple( + support::detail::build_format_adapter(std::forward<Ts>(Vals))...)); + return formatv_object<ParamTuple>( + Fmt, /*Validate=*/false, + std::make_tuple( + support::detail::build_format_adapter(std::forward<Ts>(Vals))...)); } } // end namespace llvm diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp index 77e8ece9439cf9..eb2751ab30ac50 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -1518,8 +1518,7 @@ DWARFVerifier::verifyNameIndexAbbrevs(const DWARFDebugNames::NameIndex &NI) { error() << formatv("NameIndex @ {0:x}: Indexing multiple compile units " "and abbreviation {1:x} has no DW_IDX_compile_unit " "or DW_IDX_type_unit attribute.\n", - NI.getUnitOffset(), Abbrev.Code, - dwarf::DW_IDX_compile_unit); + NI.getUnitOffset(), Abbrev.Code); }); ++NumErrors; } diff --git a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp index 6448adaa0ceb36..49f239b73e8a01 100644 --- a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp @@ -348,10 +348,10 @@ void CompileOnDemandLayer::emitPartition( HC = hash_combine(HC, hash_combine_range(GVName.begin(), GVName.end())); } raw_string_ostream(SubModuleName) - << ".submodule." - << formatv(sizeof(size_t) == 8 ? "{0:x16}" : "{0:x8}", - static_cast<size_t>(HC)) - << ".ll"; + << ".submodule." + << formatv(sizeof(size_t) == 8 ? "{0:x16}" : "{0:x8}", + static_cast<size_t>(HC)) + << ".ll"; } // Extract the requested partiton (plus any necessary aliases) and diff --git a/llvm/lib/Support/FormatVariadic.cpp b/llvm/lib/Support/FormatVariadic.cpp index e25d036cdf1e8c..f86c0d05b5d6b3 100644 --- a/llvm/lib/Support/FormatVariadic.cpp +++ b/llvm/lib/Support/FormatVariadic.cpp @@ -6,8 +6,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/FormatVariadic.h" +#include "llvm/ADT/SmallSet.h" #include <cassert> #include <optional> +#include <variant> using namespace llvm; @@ -25,8 +27,8 @@ static std::optional<AlignStyle> translateLocChar(char C) { LLVM_BUILTIN_UNREACHABLE; } -bool formatv_object_base::consumeFieldLayout(StringRef &Spec, AlignStyle &Where, - size_t &Align, char &Pad) { +static bool consumeFieldLayout(StringRef &Spec, AlignStyle &Where, + size_t &Align, char &Pad) { Where = AlignStyle::Right; Align = 0; Pad = ' '; @@ -35,8 +37,7 @@ bool formatv_object_base::consumeFieldLayout(StringRef &Spec, AlignStyle &Where, if (Spec.size() > 1) { // A maximum of 2 characters at the beginning can be used for something - // other - // than the width. + // other than the width. // If Spec[1] is a loc char, then Spec[0] is a pad char and Spec[2:...] // contains the width. // Otherwise, if Spec[0] is a loc char, then Spec[1:...] contains the width. @@ -55,8 +56,8 @@ bool formatv_object_base::consumeFieldLayout(StringRef &Spec, AlignStyle &Where, return !Failed; } -std::optional<ReplacementItem> -formatv_object_base::parseReplacementItem(StringRef Spec) { +static std::variant<ReplacementItem, StringRef> +parseReplacementItem(StringRef Spec) { StringRef RepString = Spec.trim("{}"); // If the replacement sequence does not start with a non-negative integer, @@ -67,14 +68,12 @@ formatv_object_base::parseReplacementItem(StringRef Spec) { StringRef Options; size_t Index = 0; RepString = RepString.trim(); - if (RepString.consumeInteger(0, Index)) { - assert(false && "Invalid replacement sequence index!"); - return ReplacementItem{}; - } + if (RepString.consumeInteger(0, Index)) + return "Invalid replacement sequence index!"; RepString = RepString.trim(); if (RepString.consume_front(",")) { if (!consumeFieldLayout(RepString, Where, Align, Pad)) - assert(false && "Invalid replacement field layout specification!"); + return "Invalid replacement field layout specification!"; } RepString = RepString.trim(); if (RepString.consume_front(":")) { @@ -82,77 +81,110 @@ formatv_object_base::parseReplacementItem(StringRef Spec) { RepString = StringRef(); } RepString = RepString.trim(); - if (!RepString.empty()) { - assert(false && "Unexpected characters found in replacement string!"); - } - + if (!RepString.empty()) + return "Unexpected character found in replacement string!"; return ReplacementItem{Spec, Index, Align, Where, Pad, Options}; } -std::pair<ReplacementItem, StringRef> -formatv_object_base::splitLiteralAndReplacement(StringRef Fmt) { - while (!Fmt.empty()) { - // Everything up until the first brace is a literal. - if (Fmt.front() != '{') { - std::size_t BO = Fmt.find_first_of('{'); - return std::make_pair(ReplacementItem{Fmt.substr(0, BO)}, Fmt.substr(BO)); - } - - StringRef Braces = Fmt.take_while([](char C) { return C == '{'; }); - // If there is more than one brace, then some of them are escaped. Treat - // these as replacements. - if (Braces.size() > 1) { - size_t NumEscapedBraces = Braces.size() / 2; - StringRef Middle = Fmt.take_front(NumEscapedBraces); - StringRef Right = Fmt.drop_front(NumEscapedBraces * 2); - return std::make_pair(ReplacementItem{Middle}, Right); - } - // An unterminated open brace is undefined. Assert to indicate that this is - // undefined and that we consider it an error. When asserts are disabled, - // build a replacement item with an error message. - std::size_t BC = Fmt.find_first_of('}'); - if (BC == StringRef::npos) { - assert( - false && - "Unterminated brace sequence. Escape with {{ for a literal brace."); - return std::make_pair( - ReplacementItem{"Unterminated brace sequence. Escape with {{ for a " - "literal brace."}, - StringRef()); - } - - // Even if there is a closing brace, if there is another open brace before - // this closing brace, treat this portion as literal, and try again with the - // next one. - std::size_t BO2 = Fmt.find_first_of('{', 1); - if (BO2 < BC) - return std::make_pair(ReplacementItem{Fmt.substr(0, BO2)}, - Fmt.substr(BO2)); - - StringRef Spec = Fmt.slice(1, BC); - StringRef Right = Fmt.substr(BC + 1); - - auto RI = parseReplacementItem(Spec); - if (RI) - return std::make_pair(*RI, Right); +static std::variant<std::pair<ReplacementItem, StringRef>, StringRef> +splitLiteralAndReplacement(StringRef Fmt) { + // Everything up until the first brace is a literal. + if (Fmt.front() != '{') { + std::size_t BO = Fmt.find_first_of('{'); + return std::make_pair(ReplacementItem(Fmt.substr(0, BO)), Fmt.substr(BO)); + } - // If there was an error parsing the replacement item, treat it as an - // invalid replacement spec, and just continue. - Fmt = Fmt.drop_front(BC + 1); + StringRef Braces = Fmt.take_while([](char C) { return C == '{'; }); + // If there is more than one brace, then some of them are escaped. Treat + // these as replacements. + if (Braces.size() > 1) { + size_t NumEscapedBraces = Braces.size() / 2; + StringRef Middle = Fmt.take_front(NumEscapedBraces); + StringRef Right = Fmt.drop_front(NumEscapedBraces * 2); + return std::make_pair(ReplacementItem(Middle), Right); } - return std::make_pair(ReplacementItem{Fmt}, StringRef()); + // An unterminated open brace is undefined. Assert to indicate that this is + // undefined and that we consider it an error. When asserts are disabled, + // build a replacement item with an error message. + std::size_t BC = Fmt.find_first_of('}'); + if (BC == StringRef::npos) + return "Unterminated brace sequence. Escape with {{ for a literal brace."; + + // Even if there is a closing brace, if there is another open brace before + // this closing brace, treat this portion as literal, and try again with the + // next one. + std::size_t BO2 = Fmt.find_first_of('{', 1); + if (BO2 < BC) + return std::make_pair(ReplacementItem{Fmt.substr(0, BO2)}, Fmt.substr(BO2)); + + StringRef Spec = Fmt.slice(1, BC); + StringRef Right = Fmt.substr(BC + 1); + + auto RI = parseReplacementItem(Spec); + if (const StringRef *ErrMsg = std::get_if<1>(&RI)) + return *ErrMsg; + + return std::make_pair(std::get<0>(RI), Right); } -SmallVector<ReplacementItem, 2> -formatv_object_base::parseFormatString(StringRef Fmt) { +std::pair<SmallVector<ReplacementItem, 2>, bool> +formatv_object_base::parseFormatString(raw_ostream &S, const StringRef Fmt, + size_t NumArgs, bool Validate) { SmallVector<ReplacementItem, 2> Replacements; ReplacementItem I; - while (!Fmt.empty()) { - std::tie(I, Fmt) = splitLiteralAndReplacement(Fmt); + size_t NumExpectedArgs = 0; + + // Make a copy for pasring as it updates it. + StringRef ParseFmt = Fmt; + while (!ParseFmt.empty()) { + auto RI = splitLiteralAndReplacement(ParseFmt); + if (const StringRef *ErrMsg = std::get_if<1>(&RI)) { + // If there was an error parsing the format string, write the error to the + // stream, and return false as second member of the pair. + errs() << "Invalid format string: " << Fmt << "\n"; + assert(0 && "Invalid format string"); + S << *ErrMsg; + return {{}, false}; + } + std::tie(I, ParseFmt) = std::get<0>(RI); if (I.Type != ReplacementType::Empty) Replacements.push_back(I); + if (I.Type == ReplacementType::Format) + NumExpectedArgs = std::max(NumExpectedArgs, I.Index + 1); } - return Replacements; + if (!Validate) + return {Replacements, true}; + + // Perform additional validation. Verify that the number of arguments matches + // the number of replacement indices and that there are no holes in the + // replacement indexes. + if (NumExpectedArgs != NumArgs) { + errs() << "Expected " << NumExpectedArgs << " Args, but got " << NumArgs + << " for format string '" << Fmt << "'\n"; + assert(0 && "Invalid formatv() call"); + S << "Expected " << NumExpectedArgs << " Args, but got " << NumArgs + << " for format string '" << Fmt << "'\n"; + return {{}, false}; + } + + SmallSet<size_t, 2> Indices; + for (const ReplacementItem &R : Replacements) { + if (R.Type != ReplacementType::Format) + continue; + Indices.insert(R.Index); + } + + if (Indices.size() != NumExpectedArgs) { + errs() << "Invalid format string: Replacement field indices " + "cannot have holes for format string '" + << Fmt << "'\n"; + assert(0 && "Invalid format string"); + S << "Replacement field indices cannot have holes for format string '" + << Fmt << "'\n"; + return {{}, false}; + } + + return {Replacements, true}; } void support::detail::format_adapter::anchor() {} diff --git a/llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp b/llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp index ce9d659335b30b..200e9037de3cbe 100644 --- a/llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp +++ b/llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp @@ -139,8 +139,8 @@ bool ExplainOutputStyle::explainPdbBlockStatus() { FileOffset, File.pdb().getFileSize()); return false; } - P.formatLine("Block:Offset = {2:X-}:{1:X-4}.", FileOffset, pdbBlockOffset(), - pdbBlockIndex()); + P.formatLine("Block:Offset = {0:X-}:{1:X-4}.", pdbBlockIndex(), + pdbBlockOffset()); bool IsFree = File.pdb().getMsfLayout().FreePageMap[pdbBlockIndex()]; P.formatLine("Address is in block {0} ({1}allocated).", pdbBlockIndex(), diff --git a/llvm/unittests/Support/FormatVariadicTest.cpp b/llvm/unittests/Support/FormatVariadicTest.cpp index a78b25c53d7e43..23399babe7337e 100644 --- a/llvm/unittests/Support/FormatVariadicTest.cpp +++ b/llvm/unittests/Support/FormatVariadicTest.cpp @@ -9,9 +9,11 @@ #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Error.h" #include "llvm/Support/FormatAdapters.h" +#include "gmock/gmock.h" #include "gtest/gtest.h" using namespace llvm; +using ::testing::HasSubstr; // Compile-time tests templates in the detail namespace. namespace { @@ -35,14 +37,38 @@ struct NoFormat {}; static_assert(uses_missing_provider<NoFormat>::value, ""); } +// Helper to parse format string with no validation. +static SmallVector<ReplacementItem, 2> parseFormatString(StringRef Fmt) { + std::string Error; + raw_string_ostream ErrorStream(Error); + auto [Replacements, IsValid] = + formatv_object_base::parseFormatString(ErrorStream, Fmt, 0, false); + ErrorStream.flush(); + assert(IsValid && Error.empty()); + return Replacements; +} + +#if NDEBUG +// Helper for parse format string with errors. +static std::string parseFormatStringError(StringRef Fmt) { + std::string Error; + raw_string_ostream ErrorStream(Error); + auto [... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/105745 _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits