hokein created this revision.
hokein added a reviewer: sammccall.
Herald added subscribers: ilya-biryukov, mgorny, klimek.

The tools is used to generate global symbols for clangd (global code 
completion),
The format is YAML, which is only for **experiment**.

TEST: used the tool to generate global symbols for LLVM (~72MB).


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D41491

Files:
  clangd/CMakeLists.txt
  clangd/global-symbol-builder/CMakeLists.txt
  clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
  clangd/global-symbol-builder/run-global-symbol-builder.py
  clangd/index/SymbolCollector.cpp
  clangd/index/SymbolCollector.h

Index: clangd/index/SymbolCollector.h
===================================================================
--- clangd/index/SymbolCollector.h
+++ clangd/index/SymbolCollector.h
@@ -10,6 +10,7 @@
 #include "Index.h"
 #include "clang/Index/IndexDataConsumer.h"
 #include "clang/Index/IndexSymbol.h"
+#include "llvm/ADT/SmallPtrSet.h"
 
 namespace clang {
 namespace clangd {
@@ -21,8 +22,24 @@
 // changed.
 class SymbolCollector : public index::IndexDataConsumer {
 public:
+  // Callback to get AST/Symbol information when collecting symbols.
+  class SymbolCallback {
+  public:
+    virtual ~SymbolCallback() = default;
+    // Called when SymbolCollector finish collecting all symbols.
+    // Note: Symbols is a immutable object.
+    virtual void onFinish(const ASTContext &ASTCtx, const SymbolSlab &Symbols) {
+    }
+  };
+
   SymbolCollector() = default;
 
+  void addSymbolCallback(SymbolCallback* Callback) {
+    AllCallbacks.insert(Callback);
+  }
+
+  void initialize(ASTContext &Ctx) override;
+
   bool
   handleDeclOccurence(const Decl *D, index::SymbolRoleSet Roles,
                       ArrayRef<index::SymbolRelation> Relations, FileID FID,
@@ -36,6 +53,11 @@
 private:
   // All Symbols collected from the AST.
   SymbolSlab Symbols;
+
+  ASTContext *ASTCtx;
+
+  // All symbol callbacks.
+  llvm::SmallPtrSet<SymbolCallback *, 16> AllCallbacks;
 };
 
 } // namespace clangd
Index: clangd/index/SymbolCollector.cpp
===================================================================
--- clangd/index/SymbolCollector.cpp
+++ clangd/index/SymbolCollector.cpp
@@ -107,7 +107,15 @@
   return true;
 }
 
-void SymbolCollector::finish() { Symbols.freeze(); }
+void SymbolCollector::initialize(ASTContext &Ctx) {
+  ASTCtx = &Ctx;
+}
+
+void SymbolCollector::finish() {
+  Symbols.freeze();
+  for (auto *CB : AllCallbacks)
+    CB->onFinish(*ASTCtx, Symbols);
+}
 
 } // namespace clangd
 } // namespace clang
Index: clangd/global-symbol-builder/run-global-symbol-builder.py
===================================================================
--- /dev/null
+++ clangd/global-symbol-builder/run-global-symbol-builder.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python
+#
+#=- run-global-symbol-builder.py -------------------------------*- python  -*-=#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+"""
+Parallel global-symbol-builder runner
+====================================
+
+Runs global-symbol-builder over all files in a compilation database.
+
+Example invocations.
+- Run global-symbol-builder on all files in the current working directory.
+    run-global-symbol-builder.py <source-file>
+
+Compilation database setup:
+http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html
+"""
+
+import argparse
+import json
+import multiprocessing
+import os
+import Queue
+import shutil
+import subprocess
+import sys
+import tempfile
+import threading
+
+
+def find_compilation_database(path):
+  """Adjusts the directory until a compilation database is found."""
+  result = './'
+  while not os.path.isfile(os.path.join(result, path)):
+    if os.path.realpath(result) == '/':
+      print 'Error: could not find compilation database.'
+      sys.exit(1)
+    result += '../'
+  return os.path.realpath(result)
+
+
+def MergeSymbols(directory, args):
+  """Merge all symbol files (yaml) in a given directory into a single file."""
+  invocation = [args.binary, '-merge-dir='+directory, args.saving_path]
+  subprocess.call(invocation)
+  print 'Merge is finished. Saving results in ' + args.saving_path
+
+
+def run_global_symbol_builder(args, tmpdir, build_path, queue):
+  """Takes filenames out of queue and runs global-symbol-builder on them."""
+  while True:
+    name = queue.get()
+    invocation = [args.binary, name, '-output-dir='+tmpdir, '-p='+build_path]
+    sys.stdout.write(' '.join(invocation) + '\n')
+    subprocess.call(invocation)
+    queue.task_done()
+
+
+def main():
+  parser = argparse.ArgumentParser(description='Runs global-symbol-builder over'
+                                   ' all files in a compilation database.')
+  parser.add_argument('-binary', metavar='PATH',
+                      default='./bin/global-symbol-builder',
+                      help='path to global-symbol-builder binary')
+  parser.add_argument('-j', type=int, default=0,
+                      help='number of instances to be run in parallel.')
+  parser.add_argument('-p', dest='build_path',
+                      help='path used to read a compilation database.')
+  parser.add_argument('-saving-path', default='./clangd-global-symbol.yaml',
+                      help='path of saving the temporary results')
+  parser.add_argument('-tmp-path', default='/tmp',
+                      help='path of saving the temporary results')
+  args = parser.parse_args()
+
+  db_path = 'compile_commands.json'
+
+  if args.build_path is not None:
+    build_path = args.build_path
+  else:
+    build_path = find_compilation_database(db_path)
+
+
+  # Load the database and extract all files.
+  database = json.load(open(os.path.join(build_path, db_path)))
+  files = [entry['file'] for entry in database]
+
+  max_task = args.j
+  if max_task == 0:
+    max_task = multiprocessing.cpu_count()
+
+  try:
+    # Spin up a bunch of launching threads.
+    queue = Queue.Queue(max_task)
+    for _ in range(max_task):
+      t = threading.Thread(target=run_global_symbol_builder,
+                           args=(args, args.tmp_path, build_path, queue))
+      t.daemon = True
+      t.start()
+
+    # Fill the queue with files.
+    for name in files:
+      queue.put(name)
+
+    # Wait for all threads to be done.
+    queue.join()
+
+    MergeSymbols(args.tmp_path, args)
+
+
+  except KeyboardInterrupt:
+    # This is a sad hack. Unfortunately subprocess goes
+    # bonkers with ctrl-c and we start forking merrily.
+    print '\nCtrl-C detected, goodbye.'
+    os.kill(0, 9)
+
+
+if __name__ == '__main__':
+  main()
Index: clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
===================================================================
--- /dev/null
+++ clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
@@ -0,0 +1,192 @@
+//===--- IndexSourceBuilderMain.cpp ------------------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// GlobalSymbolBuilder is a tool to generate YAML-format symbols across the
+// whole project. This tools is for **experimental** only. Don't use it in
+// production code.
+//
+//===---------------------------------------------------------------------===//
+
+#include "index/Index.h"
+#include "index/SymbolCollector.h"
+#include "index/SymbolYAML.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Frontend/FrontendActions.h"
+#include "clang/Index/IndexingAction.h"
+#include "clang/Index/IndexDataConsumer.h"
+#include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/Execution.h"
+#include "clang/Tooling/StandaloneExecution.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ThreadPool.h"
+
+using namespace llvm;
+using clang::clangd::SymbolSlab;
+
+namespace clang {
+namespace clangd {
+
+class YAMLSymbolCallback : public SymbolCollector::SymbolCallback {
+public:
+  YAMLSymbolCallback(tooling::ExecutionContext& Context) : Context(Context) {}
+
+  ~YAMLSymbolCallback() = default;
+
+  void onFinish(const ASTContext &ASTCtx, const SymbolSlab &Symbols) override {
+    auto FID = ASTCtx.getSourceManager().getMainFileID();
+    const auto *Entry = ASTCtx.getSourceManager().getFileEntryForID(FID);
+    Context.reportResult(Entry->tryGetRealPathName(), SymbolToYAML(Symbols));
+  }
+
+private:
+  tooling::ExecutionContext& Context;
+};
+
+class SymbolIndexActionFactory : public tooling::FrontendActionFactory {
+ public:
+   SymbolIndexActionFactory(tooling::ExecutionContext &Context)
+       : Context(Context) {}
+
+   clang::FrontendAction *create() override {
+     index::IndexingOptions IndexOpts;
+     IndexOpts.SystemSymbolFilter =
+         index::IndexingOptions::SystemSymbolFilterKind::All;
+     IndexOpts.IndexFunctionLocals = false;
+     Callback = llvm::make_unique<YAMLSymbolCallback>(Context);
+     Collector = std::make_shared<SymbolCollector>();
+     Collector->addSymbolCallback(Callback.get());
+     return index::createIndexingAction(Collector, IndexOpts, nullptr)
+         .release();
+  }
+
+  tooling::ExecutionContext &Context;
+  std::shared_ptr<SymbolCollector> Collector;
+  std::unique_ptr<YAMLSymbolCallback> Callback;
+};
+
+} // namespace clangd
+} // namespace clang
+
+static cl::OptionCategory IndexSourceCategory("index-source-builder options");
+
+static cl::opt<std::string> OutputDir("output-dir", cl::desc(R"(
+The output directory for saving the results.)"),
+                                      cl::init("."),
+                                      cl::cat(IndexSourceCategory));
+
+static cl::opt<std::string> MergeDir("merge-dir", cl::desc(R"(
+The directory for merging symbols.)"),
+                                     cl::init(""),
+                                     cl::cat(IndexSourceCategory));
+
+bool WriteFile(llvm::StringRef OutputFile, const SymbolSlab& Symbols) {
+  std::error_code EC;
+  llvm::raw_fd_ostream OS(OutputFile, EC, llvm::sys::fs::F_None);
+  if (EC) {
+    llvm::errs() << "Can't open '" << OutputFile << "': " << EC.message()
+                 << '\n';
+    return false;
+  }
+  OS << clang::clangd::SymbolToYAML(Symbols);
+  return true;
+}
+
+bool Merge(llvm::StringRef MergeDir, llvm::StringRef OutputFile) {
+  std::error_code EC;
+  SymbolSlab Result;
+  std::mutex SymbolMutex;
+  auto AddSymbols = [&](const SymbolSlab& NewSymbols) {
+    // Synchronize set accesses.
+    std::unique_lock<std::mutex> LockGuard(SymbolMutex);
+    for (const auto &Symbol : NewSymbols) {
+      auto it = Result.find(Symbol.second.ID);
+      if (it == Result.end())
+        Result.insert(Symbol.second);
+    }
+  };
+
+  // Load all symbol files in MergeDir.
+  {
+    llvm::ThreadPool Pool;
+    for (llvm::sys::fs::directory_iterator Dir(MergeDir, EC), DirEnd;
+         Dir != DirEnd && !EC; Dir.increment(EC)) {
+      // Parse YAML files in parallel.
+      Pool.async(
+          [&AddSymbols](std::string Path) {
+            auto Buffer = llvm::MemoryBuffer::getFile(Path);
+            if (!Buffer) {
+              llvm::errs() << "Can't open " << Path << "\n";
+              return;
+            }
+            auto Symbols =
+                clang::clangd::SymbolFromYAML(Buffer.get()->getBuffer());
+            // FIXME: Merge without creating such a heavy contention point.
+            AddSymbols(Symbols);
+          },
+          Dir->path());
+    }
+  }
+  WriteFile(OutputFile, Result);
+  return true;
+}
+
+
+int main(int argc, const char **argv) {
+  llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
+  auto Executor = clang::tooling::createExecutorFromCommandLineArgs(
+      argc, argv, IndexSourceCategory,
+      "This is an **experimental** tool to generate YAML-format "
+      "project-wide symbols for clangd (global code completion). It would be "
+      "changed and deprecated eventually. Don't use it in production code!");
+
+  if (!Executor) {
+    llvm::errs() << llvm::toString(Executor.takeError()) << "\n";
+    return 1;
+  }
+
+  if (!MergeDir.empty()) {
+    // FIXME: createExecutorFromCommandLineArgs will print "Eror while trying to
+    // load a compilation databse" for the `merge` mode, we don't want this
+    // warning during merging.
+    llvm::out() << "Merging symbols now\n";
+    // FIXME: use a safer way to do downcast -- ToolExecutor doesn't support
+    // LLVM-style RTTI yet. StandaloneToolExecutor is the expected executor of
+    // global-symbol-builder.
+    auto *STE =
+        static_cast<clang::tooling::StandaloneToolExecutor *>(Executor->get());
+    assert(!STE->getSourcePaths().empty());
+    Merge(MergeDir, STE->getSourcePaths()[0]);
+    return 0;
+  }
+
+  std::unique_ptr<clang::tooling::FrontendActionFactory> T(
+      new clang::clangd::SymbolIndexActionFactory(
+          *Executor->get()->getExecutionContext()));
+  auto Err = Executor->get()->execute(std::move(T));
+  if (Err) {
+    llvm::errs() << llvm::toString(std::move(Err)) << "\n";
+    return 1;
+  }
+
+  Executor->get()->getToolResults()->forEachResult(
+      [](llvm::StringRef Key, llvm::StringRef Value) {
+        int FD;
+        SmallString<128> ResultPath;
+        llvm::sys::fs::createUniqueFile(
+            OutputDir + "/" + llvm::sys::path::filename(Key) + "-%%%%%%.yaml",
+            FD, ResultPath);
+        llvm::raw_fd_ostream OS(FD, true);
+        OS << Value;
+      });
+  return 0;
+}
Index: clangd/global-symbol-builder/CMakeLists.txt
===================================================================
--- /dev/null
+++ clangd/global-symbol-builder/CMakeLists.txt
@@ -0,0 +1,19 @@
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../)
+
+set(LLVM_LINK_COMPONENTS
+    Support
+    )
+
+add_clang_executable(global-symbol-builder
+  GlobalSymbolBuilderMain.cpp
+  )
+
+target_link_libraries(global-symbol-builder
+  PRIVATE
+  clangAST
+  clangIndex
+  clangDaemon
+  clangBasic
+  clangFrontend
+  clangTooling
+)
Index: clangd/CMakeLists.txt
===================================================================
--- clangd/CMakeLists.txt
+++ clangd/CMakeLists.txt
@@ -47,3 +47,4 @@
   add_subdirectory(fuzzer)
 endif()
 add_subdirectory(tool)
+add_subdirectory(global-symbol-builder)
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to