Author: Viktoriia Bakalova Date: 2025-04-11T12:48:18+02:00 New Revision: 9eeafc63d2f06f7aa216ddcf2370b20fbb1d9015
URL: https://github.com/llvm/llvm-project/commit/9eeafc63d2f06f7aa216ddcf2370b20fbb1d9015 DIFF: https://github.com/llvm/llvm-project/commit/9eeafc63d2f06f7aa216ddcf2370b20fbb1d9015.diff LOG: Implement `-dump-minimization-hints` flag. (#133910) This PR implements a CC1 flag `-dump-minimization-hints`. The flag allows to specify a file path to dump ranges of deserialized declarations in `ASTReader`. Example usage: ``` clang -Xclang=-dump-minimization-hints=/tmp/decls -c file.cc -o file.o ``` Example output: ``` // /tmp/decls { "required_ranges": [ { "file": "foo.h", "range": [ { "from": { "line": 26, "column": 1 }, "to": { "line": 27, "column": 77 } } ] }, { "file": "bar.h", "range": [ { "from": { "line": 30, "column": 1 }, "to": { "line": 35, "column": 1 } }, { "from": { "line": 92, "column": 1 }, "to": { "line": 95, "column": 1 } } ] } ] } ``` Specifying the flag creates an instance of `DeserializedDeclsSourceRangePrinter`, which dumps ranges of deserialized declarations to aid debugging and bug minimization (we use is as input to [C-Vise](https://github.com/emaxx-google/cvise/tree/multifile-hints). Required ranges are computed from source ranges of Decls. `TranslationUnitDecl`, `LinkageSpecDecl` and `NamespaceDecl` are ignored for the sake of this PR. Technical details: * `DeserializedDeclsSourceRangePrinter` implements `ASTConsumer` and `ASTDeserializationListener`, so that an object of `DeserializedDeclsSourceRangePrinter` registers as its own listener. * `ASTDeserializationListener` interface provides the `DeclRead` callback that we use to collect the deserialized Decls. Printing or otherwise processing them as this point is dangerous, since that could trigger additional deserialization and crash compilation. * The collected Decls are processed in `HandleTranslationUnit` method of `ASTConsumer`. This is a safe point, since we know that by this point all the Decls needed by the compiler frontend have been deserialized. * In case our processing causes further deserialization, `DeclRead` from the listener might be called again. However, at that point we don't accept any more Decls for processing. Added: clang/test/Frontend/dump-minimization-hints.cpp Modified: clang/include/clang/Driver/Options.td clang/include/clang/Frontend/FrontendOptions.h clang/lib/Frontend/FrontendAction.cpp Removed: ################################################################################ diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 2242269c30b0f..49c2ff4cb91ce 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -8045,6 +8045,10 @@ def print_dependency_directives_minimized_source : Flag<["-"], "print-dependency-directives-minimized-source">, HelpText<"Print the output of the dependency directives source minimizer">; } +def dump_minimization_hints : Joined<["-"], + "dump-minimization-hints=">, + HelpText<"Dump ranges of deserialized declarations to use as bug minimization hints">, + MarshallingInfoString<FrontendOpts<"DumpMinimizationHintsPath">>; defm emit_llvm_uselists : BoolOption<"", "emit-llvm-uselists", CodeGenOpts<"EmitLLVMUseLists">, DefaultFalse, diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h index a9c9849ff52ab..c919a53ae089e 100644 --- a/clang/include/clang/Frontend/FrontendOptions.h +++ b/clang/include/clang/Frontend/FrontendOptions.h @@ -530,6 +530,10 @@ class FrontendOptions { /// Output Path for module output file. std::string ModuleOutputPath; + /// Output path to dump ranges of deserialized declarations to use as + /// minimization hints. + std::string DumpMinimizationHintsPath; + public: FrontendOptions() : DisableFree(false), RelocatablePCH(false), ShowHelp(false), diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp index 2d77f06be7446..bd084aa94fc37 100644 --- a/clang/lib/Frontend/FrontendAction.cpp +++ b/clang/lib/Frontend/FrontendAction.cpp @@ -13,9 +13,13 @@ #include "clang/Basic/Builtins.h" #include "clang/Basic/DiagnosticOptions.h" #include "clang/Basic/FileEntry.h" +#include "clang/Basic/LangOptions.h" #include "clang/Basic/LangStandard.h" #include "clang/Basic/Sarif.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" #include "clang/Basic/Stack.h" +#include "clang/Basic/TokenKinds.h" #include "clang/Frontend/ASTUnit.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendDiagnostic.h" @@ -35,6 +39,7 @@ #include "clang/Serialization/ASTReader.h" #include "clang/Serialization/GlobalModuleIndex.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/BuryPointer.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" @@ -49,6 +54,185 @@ LLVM_INSTANTIATE_REGISTRY(FrontendPluginRegistry) namespace { +/// DeserializedDeclsLineRangePrinter dumps ranges of deserialized declarations +/// to aid debugging and bug minimization. It implements ASTConsumer and +/// ASTDeserializationListener, so that an object of +/// DeserializedDeclsLineRangePrinter registers as its own listener. The +/// ASTDeserializationListener interface provides the DeclRead callback that we +/// use to collect the deserialized Decls. Note that printing or otherwise +/// processing them as this point is dangerous, since that could trigger +/// additional deserialization and crash compilation. Therefore, we process the +/// collected Decls in HandleTranslationUnit method of ASTConsumer. This is a +/// safe point, since we know that by this point all the Decls needed by the +/// compiler frontend have been deserialized. In case our processing causes +/// further deserialization, DeclRead from the listener might be called again. +/// However, at that point we don't accept any more Decls for processing. +class DeserializedDeclsSourceRangePrinter : public ASTConsumer, + ASTDeserializationListener { +public: + explicit DeserializedDeclsSourceRangePrinter( + SourceManager &SM, std::unique_ptr<llvm::raw_fd_ostream> OS) + : ASTDeserializationListener(), SM(SM), OS(std::move(OS)) {} + + ASTDeserializationListener *GetASTDeserializationListener() override { + return this; + } + + void DeclRead(GlobalDeclID ID, const Decl *D) override { + if (!IsCollectingDecls) + return; + if (!D || isa<TranslationUnitDecl>(D) || isa<LinkageSpecDecl>(D) || + isa<NamespaceDecl>(D)) { + // These decls cover a lot of nested declarations that might not be used, + // reducing the granularity and making the output less useful. + return; + } + if (auto *DC = D->getDeclContext(); !DC || !DC->isFileContext()) { + // We choose to work at namespace level to reduce complexity and the + // number of cases we care about. + return; + } + PendingDecls.push_back(D); + } + + struct Position { + unsigned Line; + unsigned Column; + + bool operator<(const Position &other) const { + if (Line < other.Line) + return true; + if (Line > other.Line) + return false; + return Column < other.Column; + } + + static Position GetBeginSpelling(const SourceManager &SM, + const CharSourceRange &R) { + SourceLocation Begin = R.getBegin(); + return {SM.getSpellingLineNumber(Begin), + SM.getSpellingColumnNumber(Begin)}; + } + + static Position GetEndSpelling(const SourceManager &SM, + const CharSourceRange &Range, + const LangOptions &LangOpts) { + // For token ranges, compute end location for end character of the range. + CharSourceRange R = Lexer::getAsCharRange(Range, SM, LangOpts); + SourceLocation End = R.getEnd(); + // Relex the token past the end location of the last token in the source + // range. If it's a semicolon, advance the location by one token. + Token PossiblySemi; + Lexer::getRawToken(End, PossiblySemi, SM, LangOpts, true); + if (PossiblySemi.is(tok::semi)) + End = End.getLocWithOffset(1); + // Column number of the returned end position is exclusive. + return {SM.getSpellingLineNumber(End), SM.getSpellingColumnNumber(End)}; + } + }; + + struct RequiredRanges { + StringRef Filename; + std::vector<std::pair<Position, Position>> FromTo; + }; + void HandleTranslationUnit(ASTContext &Context) override { + assert(IsCollectingDecls && "HandleTranslationUnit called twice?"); + IsCollectingDecls = false; + + // Merge ranges in each of the files. + struct FileData { + std::vector<std::pair<Position, Position>> FromTo; + OptionalFileEntryRef Ref; + }; + llvm::DenseMap<const FileEntry *, FileData> FileToRanges; + for (const Decl *D : PendingDecls) { + CharSourceRange R = SM.getExpansionRange(D->getSourceRange()); + if (!R.isValid()) + continue; + + auto *F = SM.getFileEntryForID(SM.getFileID(R.getBegin())); + if (F != SM.getFileEntryForID(SM.getFileID(R.getEnd()))) { + // Such cases are rare and diff icult to handle. + continue; + } + + auto &Data = FileToRanges[F]; + if (!Data.Ref) + Data.Ref = SM.getFileEntryRefForID(SM.getFileID(R.getBegin())); + Data.FromTo.push_back( + {Position::GetBeginSpelling(SM, R), + Position::GetEndSpelling(SM, R, D->getLangOpts())}); + } + + // To simplify output, merge consecutive and intersecting ranges. + std::vector<RequiredRanges> Result; + for (auto &[F, Data] : FileToRanges) { + auto &FromTo = Data.FromTo; + assert(!FromTo.empty()); + + if (!Data.Ref) + continue; + + llvm::sort(FromTo); + + std::vector<std::pair<Position, Position>> MergedRanges; + MergedRanges.push_back(FromTo.front()); + for (auto It = FromTo.begin() + 1; It < FromTo.end(); ++It) { + if (MergedRanges.back().second < It->first) { + MergedRanges.push_back(*It); + continue; + } + if (MergedRanges.back().second < It->second) + MergedRanges.back().second = It->second; + } + Result.push_back({Data.Ref->getName(), MergedRanges}); + } + printJson(Result); + } + +private: + std::vector<const Decl *> PendingDecls; + bool IsCollectingDecls = true; + const SourceManager &SM; + std::unique_ptr<llvm::raw_ostream> OS; + + void printJson(llvm::ArrayRef<RequiredRanges> Result) { + *OS << "{\n"; + *OS << R"( "required_ranges": [)" << "\n"; + for (size_t I = 0; I < Result.size(); ++I) { + auto &F = Result[I].Filename; + auto &MergedRanges = Result[I].FromTo; + *OS << R"( {)" << "\n"; + *OS << R"( "file": ")" << F << "\"," << "\n"; + *OS << R"( "range": [)" << "\n"; + for (size_t J = 0; J < MergedRanges.size(); ++J) { + auto &From = MergedRanges[J].first; + auto &To = MergedRanges[J].second; + *OS << R"( {)" << "\n"; + *OS << R"( "from": {)" << "\n"; + *OS << R"( "line": )" << From.Line << ",\n"; + *OS << R"( "column": )" << From.Column << "\n" + << R"( },)" << "\n"; + *OS << R"( "to": {)" << "\n"; + *OS << R"( "line": )" << To.Line << ",\n"; + *OS << R"( "column": )" << To.Column << "\n" + << R"( })" << "\n"; + *OS << R"( })"; + if (J < MergedRanges.size() - 1) { + *OS << ","; + } + *OS << "\n"; + } + *OS << " ]" << "\n" << " }"; + if (I < Result.size() - 1) + *OS << ","; + *OS << "\n"; + } + *OS << " ]\n"; + *OS << "}\n"; + } +}; + /// Dumps deserialized declarations. class DeserializedDeclsDumper : public DelegatingDeserializationListener { public: @@ -121,6 +305,25 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI, if (!Consumer) return nullptr; + std::vector<std::unique_ptr<ASTConsumer>> Consumers; + llvm::StringRef DumpDeserializedDeclarationRangesPath = + CI.getFrontendOpts().DumpMinimizationHintsPath; + if (!DumpDeserializedDeclarationRangesPath.empty()) { + std::error_code ErrorCode; + auto FileStream = std::make_unique<llvm::raw_fd_ostream>( + DumpDeserializedDeclarationRangesPath, ErrorCode, + llvm::sys::fs::OF_None); + if (!ErrorCode) { + Consumers.push_back(std::make_unique<DeserializedDeclsSourceRangePrinter>( + CI.getSourceManager(), std::move(FileStream))); + } else { + llvm::errs() << "Failed to create output file for " + "-dump-minimization-hints flag, file path: " + << DumpDeserializedDeclarationRangesPath + << ", error: " << ErrorCode.message() << "\n"; + } + } + // Validate -add-plugin args. bool FoundAllPlugins = true; for (const std::string &Arg : CI.getFrontendOpts().AddPluginActions) { @@ -138,17 +341,12 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI, if (!FoundAllPlugins) return nullptr; - // If there are no registered plugins we don't need to wrap the consumer - if (FrontendPluginRegistry::begin() == FrontendPluginRegistry::end()) - return Consumer; - // If this is a code completion run, avoid invoking the plugin consumers if (CI.hasCodeCompletionConsumer()) return Consumer; // Collect the list of plugins that go before the main action (in Consumers) // or after it (in AfterConsumers) - std::vector<std::unique_ptr<ASTConsumer>> Consumers; std::vector<std::unique_ptr<ASTConsumer>> AfterConsumers; for (const FrontendPluginRegistry::entry &Plugin : FrontendPluginRegistry::entries()) { @@ -191,6 +389,9 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI, Consumers.push_back(std::move(C)); } + assert(Consumers.size() >= 1 && "should have added the main consumer"); + if (Consumers.size() == 1) + return std::move(Consumers.front()); return std::make_unique<MultiplexConsumer>(std::move(Consumers)); } diff --git a/clang/test/Frontend/dump-minimization-hints.cpp b/clang/test/Frontend/dump-minimization-hints.cpp new file mode 100644 index 0000000000000..484378606da2b --- /dev/null +++ b/clang/test/Frontend/dump-minimization-hints.cpp @@ -0,0 +1,79 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// RUN: %clang_cc1 -xc++ -fmodules -fmodule-name=foo -fmodule-map-file=%t/foo.cppmap -emit-module %t/foo.cppmap -o %t/foo.pcm +// RUN: %clang_cc1 -xc++ -fmodules -dump-minimization-hints=%t/decls -fmodule-file=%t/foo.pcm %t/foo.cpp -o %t/foo.o +// RUN: cat %t/decls +// RUN: cat %t/decls | FileCheck -check-prefix=RANGE %s +// RANGE:{ +// RANGE-NEXT: "required_ranges": [ +// RANGE-NEXT: { +// RANGE-NEXT: "file": "{{.+}}foo.h", +// RANGE-NEXT: "range": [ +// RANGE-NEXT: { +// RANGE-NEXT: "from": { +// RANGE-NEXT: "line": 1, +// RANGE-NEXT: "column": 1 +// RANGE-NEXT: }, +// RANGE-NEXT: "to": { +// RANGE-NEXT: "line": 9, +// RANGE-NEXT: "column": 3 +// RANGE-NEXT: } +// RANGE-NEXT: }, +// RANGE-NEXT: { +// RANGE-NEXT: "from": { +// RANGE-NEXT: "line": 11, +// RANGE-NEXT: "column": 1 +// RANGE-NEXT: }, +// RANGE-NEXT: "to": { +// RANGE-NEXT: "line": 11, +// RANGE-NEXT: "column": 25 +// RANGE-NEXT: } +// RANGE-NEXT: }, +// RANGE-NEXT: { +// RANGE-NEXT: "from": { +// RANGE-NEXT: "line": 13, +// RANGE-NEXT: "column": 1 +// RANGE-NEXT: }, +// RANGE-NEXT: "to": { +// RANGE-NEXT: "line": 15, +// RANGE-NEXT: "column": 2 +// RANGE-NEXT: } +// RANGE-NEXT: } +// RANGE-NEXT: ] +// RANGE-NEXT: } +// RANGE-NEXT: ] +// RANGE-NEXT:} + +//--- foo.cppmap +module foo { + header "foo.h" + export * +} + +//--- foo.h +class MyData { +public: + MyData(int val): value_(val) {} + int getValue() const { + return 5; + } +private: + int value_; +}; + +extern int global_value; + +int multiply(int a, int b) { + return a * b; +} + +//--- foo.cpp +#include "foo.h" +int global_value = 5; +int main() { + MyData data(5); + int current_value = data.getValue(); + int doubled_value = multiply(current_value, 2); + int final_result = doubled_value + global_value; +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits