sammccall created this revision.
sammccall added a reviewer: kadircet.
Herald added subscribers: cfe-commits, arphaman, jkorous, MaskRay, ioeric, 
ilya-biryukov, mgorny.

Drop support for MR-via-YAML, it's slow and the merge still doesn't scale.


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D52465

Files:
  clangd/CMakeLists.txt
  clangd/index/IndexAction.cpp
  clangd/index/IndexAction.h
  clangd/index/Serialization.h
  clangd/index/SymbolCollector.h
  clangd/indexer/IndexerMain.cpp

Index: clangd/indexer/IndexerMain.cpp
===================================================================
--- clangd/indexer/IndexerMain.cpp
+++ clangd/indexer/IndexerMain.cpp
@@ -15,6 +15,7 @@
 #include "RIFF.h"
 #include "index/CanonicalIncludes.h"
 #include "index/Index.h"
+#include "index/IndexAction.h"
 #include "index/Merge.h"
 #include "index/Serialization.h"
 #include "index/SymbolCollector.h"
@@ -50,163 +51,43 @@
                    "not given, such headers will have relative paths."),
     llvm::cl::init(""));
 
-static llvm::cl::opt<bool> MergeOnTheFly(
-    "merge-on-the-fly",
-    llvm::cl::desc(
-        "Merges symbols for each processed translation unit as soon "
-        "they become available. This results in a smaller memory "
-        "usage and an almost instant reduce stage. Optimal for running as a "
-        "standalone tool, but cannot be used with multi-process executors like "
-        "MapReduce."),
-    llvm::cl::init(true), llvm::cl::Hidden);
-
 enum IndexFormat { YAML, Binary };
 static llvm::cl::opt<IndexFormat> Format(
     "format", llvm::cl::desc("Format of the index to be written"),
     llvm::cl::values(clEnumValN(YAML, "yaml", "human-readable YAML format"),
                      clEnumValN(Binary, "binary", "binary RIFF format")),
     llvm::cl::init(YAML));
 
-/// Responsible for aggregating symbols from each processed file and producing
-/// the final results. All methods in this class must be thread-safe,
-/// 'consumeSymbols' may be called from multiple threads.
-class SymbolsConsumer {
-public:
-  virtual ~SymbolsConsumer() = default;
-
-  /// Consume a SymbolSlab build for a file.
-  virtual void consumeSymbols(SymbolSlab Symbols) = 0;
-  /// Produce a resulting symbol slab, by combining  occurrences of the same
-  /// symbols across translation units.
-  virtual SymbolSlab mergeResults() = 0;
-};
-
-class SymbolIndexActionFactory : public tooling::FrontendActionFactory {
+class IndexActionFactory : public tooling::FrontendActionFactory {
 public:
-  SymbolIndexActionFactory(SymbolsConsumer &Consumer) : Consumer(Consumer) {}
+  IndexActionFactory(IndexFileIn &Result) : Result(Result) {}
 
   clang::FrontendAction *create() override {
-    // Wraps the index action and reports collected symbols to the execution
-    // context at the end of each translation unit.
-    class WrappedIndexAction : public WrapperFrontendAction {
-    public:
-      WrappedIndexAction(std::shared_ptr<SymbolCollector> C,
-                         std::unique_ptr<CanonicalIncludes> Includes,
-                         const index::IndexingOptions &Opts,
-                         SymbolsConsumer &Consumer)
-          : WrapperFrontendAction(
-                index::createIndexingAction(C, Opts, nullptr)),
-            Consumer(Consumer), Collector(C), Includes(std::move(Includes)),
-            PragmaHandler(collectIWYUHeaderMaps(this->Includes.get())) {}
-
-      std::unique_ptr<ASTConsumer>
-      CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override {
-        CI.getPreprocessor().addCommentHandler(PragmaHandler.get());
-        return WrapperFrontendAction::CreateASTConsumer(CI, InFile);
-      }
-
-      bool BeginInvocation(CompilerInstance &CI) override {
-        // We want all comments, not just the doxygen ones.
-        CI.getLangOpts().CommentOpts.ParseAllComments = true;
-        return WrapperFrontendAction::BeginInvocation(CI);
-      }
-
-      void EndSourceFileAction() override {
-        WrapperFrontendAction::EndSourceFileAction();
-
-        const auto &CI = getCompilerInstance();
-        if (CI.hasDiagnostics() &&
-            CI.getDiagnostics().hasUncompilableErrorOccurred()) {
-          llvm::errs()
-              << "Found uncompilable errors in the translation unit. Igoring "
-                 "collected symbols...\n";
-          return;
-        }
-
-        Consumer.consumeSymbols(Collector->takeSymbols());
-      }
-
-    private:
-      SymbolsConsumer &Consumer;
-      std::shared_ptr<SymbolCollector> Collector;
-      std::unique_ptr<CanonicalIncludes> Includes;
-      std::unique_ptr<CommentHandler> PragmaHandler;
-    };
-
-    index::IndexingOptions IndexOpts;
-    IndexOpts.SystemSymbolFilter =
-        index::IndexingOptions::SystemSymbolFilterKind::All;
-    IndexOpts.IndexFunctionLocals = false;
-    auto CollectorOpts = SymbolCollector::Options();
-    CollectorOpts.FallbackDir = AssumedHeaderDir;
-    CollectorOpts.CollectIncludePath = true;
-    CollectorOpts.CountReferences = true;
-    CollectorOpts.Origin = SymbolOrigin::Static;
-    auto Includes = llvm::make_unique<CanonicalIncludes>();
-    addSystemHeadersMapping(Includes.get());
-    CollectorOpts.Includes = Includes.get();
-    return new WrappedIndexAction(
-        std::make_shared<SymbolCollector>(std::move(CollectorOpts)),
-        std::move(Includes), IndexOpts, Consumer);
+    SymbolCollector::Options Opts;
+    Opts.FallbackDir = AssumedHeaderDir;
+    return createStaticIndexingAction(
+               Opts,
+               [&](SymbolSlab S) {
+                 // Merge as we go.
+                 std::lock_guard<std::mutex> Lock(SymbolsMu);
+                 for (const auto &Sym : S) {
+                   if (const auto *Existing = Symbols.find(Sym.ID))
+                     Symbols.insert(mergeSymbol(*Existing, Sym));
+                   else
+                     Symbols.insert(Sym);
+                 }
+               })
+        .release();
   }
 
-  SymbolsConsumer &Consumer;
-};
-
-/// Stashes per-file results inside ExecutionContext, merges all of them at the
-/// end. Useful for running on MapReduce infrastructure to avoid keeping symbols
-/// from multiple files in memory.
-class ToolExecutorConsumer : public SymbolsConsumer {
-public:
-  ToolExecutorConsumer(ToolExecutor &Executor) : Executor(Executor) {}
-
-  void consumeSymbols(SymbolSlab Symbols) override {
-    for (const auto &Sym : Symbols)
-      Executor.getExecutionContext()->reportResult(Sym.ID.str(),
-                                                   SymbolToYAML(Sym));
-  }
-
-  SymbolSlab mergeResults() override {
-    SymbolSlab::Builder UniqueSymbols;
-    Executor.getToolResults()->forEachResult(
-        [&](llvm::StringRef Key, llvm::StringRef Value) {
-          llvm::yaml::Input Yin(Value);
-          auto Sym = clang::clangd::SymbolFromYAML(Yin);
-          auto ID = cantFail(clang::clangd::SymbolID::fromStr(Key));
-          if (const auto *Existing = UniqueSymbols.find(ID))
-            UniqueSymbols.insert(mergeSymbol(*Existing, Sym));
-          else
-            UniqueSymbols.insert(Sym);
-        });
-    return std::move(UniqueSymbols).build();
-  }
+  // Awkward: we write the result in the destructor, because the executor
+  // takes ownership so it's the easiest way to get our data back out.
+  ~IndexActionFactory() { Result.Symbols = std::move(Symbols).build(); }
 
 private:
-  ToolExecutor &Executor;
-};
-
-/// Merges symbols for each translation unit as soon as the file is processed.
-/// Optimal choice for standalone tools.
-class OnTheFlyConsumer : public SymbolsConsumer {
-public:
-  void consumeSymbols(SymbolSlab Symbols) override {
-    std::lock_guard<std::mutex> Lock(Mut);
-    for (auto &&Sym : Symbols) {
-      if (const auto *Existing = Result.find(Sym.ID))
-        Result.insert(mergeSymbol(*Existing, Sym));
-      else
-        Result.insert(Sym);
-    }
-  }
-
-  SymbolSlab mergeResults() override {
-    std::lock_guard<std::mutex> Lock(Mut);
-    return std::move(Result).build();
-  }
-
-private:
-  std::mutex Mut;
-  SymbolSlab::Builder Result;
+  IndexFileIn &Result;
+  std::mutex SymbolsMu;
+  SymbolSlab::Builder Symbols;
 };
 
 } // namespace
@@ -217,20 +98,18 @@
   llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
 
   const char *Overview = R"(
-  This is an **experimental** tool to extract symbols from a whole project
-  for clangd (global code completion). It will be changed and deprecated
-  eventually. Don't use it in production code!
+  Creates an index of symbol information etc in a whole project.
+  This is **experimental** and not production-ready!
 
-  Example usage for building index for the whole project using CMake compile
-  commands:
+  Example usage for a project using CMake compile commands:
 
   $ clangd-indexer --executor=all-TUs compile_commands.json > index.yaml
 
   Example usage for file sequence index without flags:
 
   $ clangd-indexer File1.cpp File2.cpp ... FileN.cpp > index.yaml
 
-  Note: only symbols from header files will be collected.
+  Note: only symbols from header files will be indexed.
   )";
 
   auto Executor = clang::tooling::createExecutorFromCommandLineArgs(
@@ -247,36 +126,21 @@
     return 1;
   }
 
-  if (clang::clangd::MergeOnTheFly && !Executor->get()->isSingleProcess()) {
-    llvm::errs()
-        << "Found multi-process executor, forcing the use of intermediate YAML "
-           "serialization instead of the on-the-fly merge.\n";
-    clang::clangd::MergeOnTheFly = false;
-  }
-
-  std::unique_ptr<clang::clangd::SymbolsConsumer> Consumer;
-  if (clang::clangd::MergeOnTheFly)
-    Consumer = llvm::make_unique<clang::clangd::OnTheFlyConsumer>();
-  else
-    Consumer =
-        llvm::make_unique<clang::clangd::ToolExecutorConsumer>(**Executor);
-
   // Map phase: emit symbols found in each translation unit.
+  clang::clangd::IndexFileIn Data;
   auto Err = Executor->get()->execute(
-      llvm::make_unique<clang::clangd::SymbolIndexActionFactory>(*Consumer));
+      llvm::make_unique<clang::clangd::IndexActionFactory>(Data));
   if (Err) {
     llvm::errs() << llvm::toString(std::move(Err)) << "\n";
   }
-  // Reduce phase: combine symbols with the same IDs.
-  auto UniqueSymbols = Consumer->mergeResults();
+
   // Output phase: emit result symbols.
   switch (clang::clangd::Format) {
   case clang::clangd::IndexFormat::YAML:
-    SymbolsToYAML(UniqueSymbols, llvm::outs());
+    SymbolsToYAML(*Data.Symbols, llvm::outs());
     break;
   case clang::clangd::IndexFormat::Binary: {
-    clang::clangd::IndexFileOut Out;
-    Out.Symbols = &UniqueSymbols;
+    clang::clangd::IndexFileOut Out(Data);
     llvm::outs() << Out;
   }
   }
Index: clangd/index/SymbolCollector.h
===================================================================
--- clangd/index/SymbolCollector.h
+++ clangd/index/SymbolCollector.h
@@ -6,6 +6,8 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_COLLECTOR_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_COLLECTOR_H
 
 #include "CanonicalIncludes.h"
 #include "Index.h"
@@ -122,3 +124,4 @@
 
 } // namespace clangd
 } // namespace clang
+#endif
Index: clangd/index/Serialization.h
===================================================================
--- clangd/index/Serialization.h
+++ clangd/index/Serialization.h
@@ -26,21 +26,26 @@
 namespace clang {
 namespace clangd {
 
+// Holds the contents of an index file that was read.
+struct IndexFileIn {
+  llvm::Optional<SymbolSlab> Symbols;
+};
+// Parse an index file. The input must be a RIFF container chunk.
+llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef);
+
 // Specifies the contents of an index file to be written.
 struct IndexFileOut {
   const SymbolSlab *Symbols;
   // TODO: Support serializing symbol occurrences.
   // TODO: Support serializing Dex posting lists.
+
+  IndexFileOut() = default;
+  IndexFileOut(const IndexFileIn &I)
+      : Symbols(I.Symbols ? I.Symbols.getPointer() : nullptr) {}
 };
 // Serializes an index file. (This is a RIFF container chunk).
 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const IndexFileOut &);
 
-// Holds the contents of an index file that was read.
-struct IndexFileIn {
-  llvm::Optional<SymbolSlab> Symbols;
-};
-// Parse an index file. The input must be a RIFF container chunk.
-llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef);
 
 } // namespace clangd
 } // namespace clang
Index: clangd/index/IndexAction.h
===================================================================
--- /dev/null
+++ clangd/index/IndexAction.h
@@ -0,0 +1,33 @@
+//===--- IndexAction.h - Run the indexer as a frontend action ----*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_ACTION_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_ACTION_H
+#include "SymbolCollector.h"
+#include "clang/Frontend/FrontendActions.h"
+
+namespace clang {
+namespace clangd {
+
+// Creates an action that indexes translation units and delivers the results
+// for SymbolsCallback (each slab corresponds to one TU).
+//
+// Only a subset of SymbolCollector::Options are respected:
+//   - include paths are always collected, and canonicalized appropriately
+//   - references are always counted
+//   - (all) references ane provided if RefsCallback is non-null
+//   - the symbol origin is always Static
+std::unique_ptr<FrontendAction>
+createStaticIndexingAction(SymbolCollector::Options Opts,
+                           std::function<void(SymbolSlab)> SymbolsCallback);
+
+} // namespace clangd
+} // namespace clang
+
+#endif
Index: clangd/index/IndexAction.cpp
===================================================================
--- /dev/null
+++ clangd/index/IndexAction.cpp
@@ -0,0 +1,76 @@
+#include "IndexAction.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Index/IndexDataConsumer.h"
+#include "clang/Index/IndexingAction.h"
+#include "clang/Tooling/Tooling.h"
+namespace clang {
+namespace clangd {
+namespace {
+
+// Wraps the index action and reports index data after each translation unit.
+class IndexAction : public WrapperFrontendAction {
+public:
+  IndexAction(std::shared_ptr<SymbolCollector> C,
+              std::unique_ptr<CanonicalIncludes> Includes,
+              const index::IndexingOptions &Opts,
+              std::function<void(SymbolSlab)> &SymbolsCallback)
+      : WrapperFrontendAction(index::createIndexingAction(C, Opts, nullptr)),
+        SymbolsCallback(SymbolsCallback), Collector(C),
+        Includes(std::move(Includes)),
+        PragmaHandler(collectIWYUHeaderMaps(this->Includes.get())) {}
+
+  std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
+                                                 StringRef InFile) override {
+    CI.getPreprocessor().addCommentHandler(PragmaHandler.get());
+    return WrapperFrontendAction::CreateASTConsumer(CI, InFile);
+  }
+
+  bool BeginInvocation(CompilerInstance &CI) override {
+    // We want all comments, not just the doxygen ones.
+    CI.getLangOpts().CommentOpts.ParseAllComments = true;
+    return WrapperFrontendAction::BeginInvocation(CI);
+  }
+
+  void EndSourceFileAction() override {
+    WrapperFrontendAction::EndSourceFileAction();
+
+    const auto &CI = getCompilerInstance();
+    if (CI.hasDiagnostics() &&
+        CI.getDiagnostics().hasUncompilableErrorOccurred()) {
+      llvm::errs()
+          << "Found uncompilable errors in the translation unit. Igoring "
+             "collected symbols...\n";
+      return;
+    }
+    SymbolsCallback(Collector->takeSymbols());
+  }
+
+private:
+  std::function<void(SymbolSlab)> SymbolsCallback;
+  std::shared_ptr<SymbolCollector> Collector;
+  std::unique_ptr<CanonicalIncludes> Includes;
+  std::unique_ptr<CommentHandler> PragmaHandler;
+};
+
+} // namespace
+
+std::unique_ptr<FrontendAction>
+createStaticIndexingAction(SymbolCollector::Options Opts,
+                           std::function<void(SymbolSlab)> SymbolsCallback) {
+  index::IndexingOptions IndexOpts;
+  IndexOpts.SystemSymbolFilter =
+      index::IndexingOptions::SystemSymbolFilterKind::All;
+  IndexOpts.IndexFunctionLocals = false;
+  Opts.CollectIncludePath = true;
+  Opts.CountReferences = true;
+  Opts.Origin = SymbolOrigin::Static;
+  auto Includes = llvm::make_unique<CanonicalIncludes>();
+  addSystemHeadersMapping(Includes.get());
+  Opts.Includes = Includes.get();
+  return llvm::make_unique<IndexAction>(
+      std::make_shared<SymbolCollector>(std::move(Opts)), std::move(Includes),
+      IndexOpts, SymbolsCallback);
+};
+
+} // namespace clangd
+} // namespace clang
Index: clangd/CMakeLists.txt
===================================================================
--- clangd/CMakeLists.txt
+++ clangd/CMakeLists.txt
@@ -40,6 +40,7 @@
   index/CanonicalIncludes.cpp
   index/FileIndex.cpp
   index/Index.cpp
+  index/IndexAction.cpp
   index/MemIndex.cpp
   index/Merge.cpp
   index/Serialization.cpp
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to