sammccall created this revision.
sammccall added a reviewer: ilya-biryukov.
Herald added subscribers: cfe-commits, mgorny, klimek.

Not enabled because we need a threadsafe way to change VFS working directories.


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D41911

Files:
  clangd/CMakeLists.txt
  clangd/Compiler.cpp
  clangd/IncludeScanner.cpp
  clangd/IncludeScanner.h
  unittests/clangd/CMakeLists.txt
  unittests/clangd/IncludeScannerTests.cpp

Index: unittests/clangd/IncludeScannerTests.cpp
===================================================================
--- /dev/null
+++ unittests/clangd/IncludeScannerTests.cpp
@@ -0,0 +1,61 @@
+//===-- IncludeScannerTests.cpp  ----------------------*- C++ -*-----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IncludeScanner.h"
+#include "TestFS.h"
+#include "llvm/ADT/StringMap.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "Logger.h"
+
+namespace clang {
+namespace tooling {
+void PrintTo(const llvm::Optional<CompileCommand> &S, std::ostream *OS) {
+  if (S)
+    *OS << llvm::join(S->CommandLine, " ");
+  else
+    *OS << "<none>";
+}
+} // namespace tooling
+namespace clangd {
+namespace {
+
+MATCHER_P2(HasCmd, Args, Filename, "") {
+  return arg &&
+         arg->CommandLine == Args &&
+         arg->Filename == Filename;
+}
+
+testing::Matcher<llvm::Optional<tooling::CompileCommand>>
+Cmd(std::vector<std::string> Args) {
+  return HasCmd(Args, Args.back());
+}
+
+TEST(IncludeScanner, FindsFiles) {
+  llvm::StringMap<std::string> Files;
+  llvm::StringRef One = getVirtualTestFilePath("one.cc"),
+       Two = getVirtualTestFilePath("dir/two.h"),
+       Three = getVirtualTestFilePath("dir/three.h");
+  Files[One] = R"cpp(#include "dir/two.h")cpp";
+  Files[Two] = R"cpp(#include "three.h")cpp";
+  Files[Three] = "";
+  auto FS = buildTestFS(Files);
+
+  IncludeScanner Scanner;
+  Scanner.enqueue({
+      {".", One, {"clang", "-DX", One}, ""},
+  }, FS);
+  Scanner.wait();
+  EXPECT_THAT(Scanner.lookup(Two), Cmd({"clang", "-DX", "-x", "c++", Two}));
+  EXPECT_THAT(Scanner.lookup(Three), Cmd({"clang", "-DX", "-x", "c++", Three}));
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang
Index: unittests/clangd/CMakeLists.txt
===================================================================
--- unittests/clangd/CMakeLists.txt
+++ unittests/clangd/CMakeLists.txt
@@ -16,6 +16,7 @@
   ContextTests.cpp
   FileIndexTests.cpp
   FuzzyMatchTests.cpp
+  IncludeScannerTests.cpp
   IndexTests.cpp
   JSONExprTests.cpp
   TestFS.cpp
Index: clangd/IncludeScanner.h
===================================================================
--- /dev/null
+++ clangd/IncludeScanner.h
@@ -0,0 +1,94 @@
+//===--- IncludeScanner.h - Infer compile commands for headers --*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+// Typical compilation databases don't list commands for headers. But headers
+// can be compiled with the same flags as the files that include them.
+// So when we find a database, we scan through the commands it do
+// preprocessor to find #included files that command is valid for.
+//===---------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INCLUDESCANNER_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INCLUDESCANNER_H
+#include "Context.h"
+#include "Path.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSet.h"
+#include <condition_variable>
+#include <deque>
+#include <memory>
+#include <mutex>
+#include <thread>
+#include <vector>
+
+namespace clang {
+class PCHContainerOperations;
+namespace vfs {
+class FileSystem;
+}
+namespace tooling {
+struct CompileCommand;
+} // namespace tooling
+namespace clangd {
+
+// IncludeScanner runs a background thread that scans files for which compile
+// commands are known, recording the headers for which that command is valid.
+//
+// It supports lookup by header name. As scanning doesn't block, it's always
+// possible we won't have scanned the right compile command yet.
+// This means the compilation database should also have heuristics for headers.
+//
+// This class is threadsafe.
+class IncludeScanner {
+public:
+  IncludeScanner();
+
+  // The destructor may wait for the current file to finish scanning.
+  ~IncludeScanner();
+
+  // If we've scanned some file that #includes Header, return the inferred
+  // compile command. This will have the same flags, but the filename replaced.
+  llvm::Optional<tooling::CompileCommand> lookup(PathRef Header) const;
+
+  // Adds compile commands to scan. Files we've already scanned will be ignored.
+  void enqueue(std::vector<tooling::CompileCommand> Cmds,
+               llvm::IntrusiveRefCntPtr<vfs::FileSystem> VFS);
+
+  // Blocks until the scanner is idle. Mostly useful for tests.
+  void wait() const;
+
+private:
+  struct QueueEntry;
+
+  // Worker thread body: read commands off the queue and process them.
+  void
+  consumeQueue();
+
+  // Scan a compile command and record included headers. Runs on worker thread.
+  void process(QueueEntry Cmd);
+  bool canonicalize(tooling::CompileCommand &Cmd);
+
+  // Commands we've detected that are available for lookup.
+  mutable std::mutex CommandsMu;
+  llvm::StringMap<tooling::CompileCommand> Commands;
+
+  // The BFS state is locked by a mutex so other threads can enqueue.
+  mutable std::mutex QueueMu;
+  std::deque<QueueEntry> Queue;
+  // CompileCommand.Filenames we've processed, so we can skip duplicates.
+  llvm::StringSet<llvm::BumpPtrAllocator> SeenFiles;
+  bool Done = false; // Indicates the queue is closing down.
+  mutable std::condition_variable QueueCV; // Notified on queue writes.
+
+  std::shared_ptr<PCHContainerOperations> PCHContainerOps;
+  std::thread Worker;
+  Context Background;
+};
+
+} // namespace clangd
+} // namespace clang
+#endif
Index: clangd/IncludeScanner.cpp
===================================================================
--- /dev/null
+++ clangd/IncludeScanner.cpp
@@ -0,0 +1,197 @@
+//===--- IncludeScanner.cpp - Infer compile commands for headers *- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+#include "IncludeScanner.h"
+#include "Compiler.h"
+#include "Logger.h"
+#include "clang/Lex/PPCallbacks.h"
+#include "clang/Lex/PreprocessorOptions.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/FrontendActions.h"
+#include "clang/Frontend/PCHContainerOperations.h"
+#include "clang/Tooling/ArgumentsAdjusters.h"
+#include "clang/Tooling/CompilationDatabase.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+namespace clang {
+namespace clangd {
+using namespace llvm;
+using tooling::CompileCommand;
+
+struct IncludeScanner::QueueEntry : public CompileCommand {
+  QueueEntry(const CompileCommand &Cmd, IntrusiveRefCntPtr<vfs::FileSystem> VFS)
+      : CompileCommand(Cmd), VFS(std::move(VFS)) {}
+  IntrusiveRefCntPtr<vfs::FileSystem> VFS;
+};
+
+IncludeScanner::IncludeScanner()
+    : PCHContainerOps(std::make_shared<PCHContainerOperations>()),
+      Worker([this] { consumeQueue(); }) {}
+
+IncludeScanner::~IncludeScanner() {
+  {
+    std::lock_guard<std::mutex> Lock(QueueMu);
+    Done = true;
+  }
+  // Wake up the thread if it's blocking on data.
+  QueueCV.notify_all();
+  // Don't destroy members until the thread exits.
+  Worker.join();
+}
+
+Optional<CompileCommand> IncludeScanner::lookup(PathRef File) const {
+  std::lock_guard<std::mutex> Lock(CommandsMu);
+  auto I = Commands.find(File);
+  if (I == Commands.end())
+    return None;
+  return I->getValue();
+}
+
+// Ensures that a compile command we're going to scan is in canonical form,
+// with the filename at the end, so we can replace it with a header filename.
+bool IncludeScanner::canonicalize(CompileCommand &C) {
+  using sys::path::filename;
+  if (C.CommandLine.empty() ||
+      filename(C.Filename) != filename(C.CommandLine.back())) {
+    log(Background, "Compile command not canonical [" + C.Filename + "] " +
+                        join(C.CommandLine, " "));
+    return false;
+  }
+  C.CommandLine =
+      tooling::getClangStripOutputAdjuster()(C.CommandLine, C.Filename);
+  return true;
+}
+
+void IncludeScanner::enqueue(std::vector<CompileCommand> Cmds,
+                             IntrusiveRefCntPtr<vfs::FileSystem> VFS) {
+  std::lock_guard<std::mutex> Lock(QueueMu);
+  bool WasEmpty = Queue.empty();
+  for (const auto &Cmd : Cmds) {
+    QueueEntry E(Cmd, VFS);
+    if (canonicalize(E) && SeenFiles.insert(E.Filename).second) {
+      Queue.push_back(std::move(E));
+    }
+  }
+  if (WasEmpty && !Queue.empty())
+    QueueCV.notify_all();
+}
+
+void IncludeScanner::wait() const {
+  std::unique_lock<std::mutex> Lock(QueueMu);
+  QueueCV.wait(Lock, [&] { return Done || Queue.empty(); });
+}
+
+void IncludeScanner::consumeQueue() {
+  while (true) {
+    std::unique_lock<std::mutex> Lock(QueueMu);
+    QueueCV.wait(Lock, [&] { return Done || !Queue.empty(); });
+    if (Done)
+      return;
+    auto &Entry = Queue.front();
+    Lock.unlock();
+    process(std::move(Entry));
+    Lock.lock();
+    Queue.pop_front();
+    if (Queue.empty()) {
+      log(Background, "Finished scanning headers");
+      QueueCV.notify_all();
+    }
+  }
+}
+
+void IncludeScanner::process(QueueEntry Cmd) {
+  class RecordHeaders : public PPCallbacks {
+  public:
+    RecordHeaders(std::vector<std::string> &Headers) : Headers(Headers) {}
+
+    void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
+                            StringRef FileName, bool IsAngled,
+                            CharSourceRange FilenameRange,
+                            const FileEntry *File, StringRef SearchPath,
+                            StringRef RelativePath,
+                            const Module *Imported) override {
+      if (File != nullptr && !File->tryGetRealPathName().empty())
+        Headers.push_back(File->tryGetRealPathName());
+    }
+
+  private:
+    std::vector<std::string> &Headers;
+  };
+
+  std::vector<const char *> Argv;
+  for (const auto &S : Cmd.CommandLine)
+    Argv.push_back(S.c_str());
+  IgnoringDiagConsumer IgnoreDiags;
+  // XXX the VFS working directory is global state, this is unsafe.
+  Cmd.VFS->setCurrentWorkingDirectory(Cmd.Directory);
+  auto CI = clang::createInvocationFromCommandLine(
+      Argv,
+      CompilerInstance::createDiagnostics(new DiagnosticOptions(), &IgnoreDiags,
+                                          false),
+      Cmd.VFS);
+  if (!CI)
+    return;
+  CI->getFrontendOpts().DisableFree = false;
+  CI->getPreprocessorOpts().SingleFileParseMode = true;
+  auto Clang = prepareCompilerInstance(std::move(CI), /*Preamble=*/nullptr,
+                                       /*Buffer=*/nullptr, PCHContainerOps,
+                                       Cmd.VFS, IgnoreDiags);
+  PreprocessOnlyAction Action;
+  if (Clang->getFrontendOpts().Inputs.empty() ||
+      !Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0]))
+    return;
+  std::vector<std::string> Headers;
+  Clang->getPreprocessor().addPPCallbacks(
+      make_unique<RecordHeaders>(Headers));
+  if (!Action.Execute())
+    return;
+
+  // Now we turn Cmd into a template that we can use for each included header.
+  Cmd.CommandLine.pop_back(); // Remove the filename.
+  // `clang foo.cpp` builds as C++, but `clang foo.h` builds as C.
+  // When reusing flags, we add an explicit `-x cpp` to override the extension.
+  // TODO: copying CompilerInvocation would avoid this, and is more robust.
+  if (std::find(Cmd.CommandLine.begin(), Cmd.CommandLine.end(), "-x") ==
+      Cmd.CommandLine.end()) {
+    Cmd.CommandLine.push_back("-x");
+    switch (Clang->getFrontendOpts().Inputs[0].getKind().getLanguage()) {
+      case InputKind::C:
+        Cmd.CommandLine.push_back("c");
+        break;
+      case InputKind::CXX:
+        Cmd.CommandLine.push_back("c++");
+        break;
+      case InputKind::ObjC:
+        Cmd.CommandLine.push_back("objective-c");
+        break;
+      case InputKind::ObjCXX:
+        Cmd.CommandLine.push_back("objective-c++");
+        break;
+      default:
+        Cmd.CommandLine.pop_back();
+        break;
+    }
+  }
+  std::lock_guard<std::mutex> Lock(CommandsMu);
+  std::vector<CompileCommand> NextCommands;
+  for (const auto &S : Headers) {
+    auto R = Commands.try_emplace(S, Cmd);
+    if (!R.second)
+      continue;  // Already seen this header.
+    auto &NewCmd = R.first->getValue();
+    NewCmd.Filename = S;
+    NewCmd.CommandLine.push_back(S);
+    NextCommands.push_back(NewCmd);
+  }
+  if (!NextCommands.empty())
+    enqueue(std::move(NextCommands), std::move(Cmd.VFS));
+}
+
+} // namespace clangd
+} // namespace clang
Index: clangd/Compiler.cpp
===================================================================
--- clangd/Compiler.cpp
+++ clangd/Compiler.cpp
@@ -29,17 +29,18 @@
                         IntrusiveRefCntPtr<vfs::FileSystem> VFS,
                         DiagnosticConsumer &DiagsClient) {
   assert(VFS && "VFS is null");
-  assert(!CI->getPreprocessorOpts().RetainRemappedFileBuffers &&
-         "Setting RetainRemappedFileBuffers to true will cause a memory leak "
-         "of ContentsBuffer");
-
-  // NOTE: we use Buffer.get() when adding remapped files, so we have to make
-  // sure it will be released if no error is emitted.
-  if (Preamble) {
-    Preamble->AddImplicitPreamble(*CI, VFS, Buffer.get());
-  } else {
-    CI->getPreprocessorOpts().addRemappedFile(
-        CI->getFrontendOpts().Inputs[0].getFile(), Buffer.get());
+  if (Buffer != nullptr) {
+    assert(!CI->getPreprocessorOpts().RetainRemappedFileBuffers &&
+           "Setting RetainRemappedFileBuffers to true will cause a memory leak "
+           "of Buffer");
+    // NOTE: we use Buffer.get() when adding remapped files, so we have to make
+    // sure it will be released if no error is emitted.
+    if (Preamble) {
+      Preamble->AddImplicitPreamble(*CI, VFS, Buffer.get());
+    } else {
+      CI->getPreprocessorOpts().addRemappedFile(
+          CI->getFrontendOpts().Inputs[0].getFile(), Buffer.get());
+    }
   }
 
   auto Clang = llvm::make_unique<CompilerInstance>(PCHs);
@@ -58,7 +59,8 @@
 
   // RemappedFileBuffers will handle the lifetime of the Buffer pointer,
   // release it.
-  Buffer.release();
+  if (Buffer != nullptr)
+    Buffer.release();
   return Clang;
 }
 
Index: clangd/CMakeLists.txt
===================================================================
--- clangd/CMakeLists.txt
+++ clangd/CMakeLists.txt
@@ -14,6 +14,7 @@
   DraftStore.cpp
   FuzzyMatch.cpp
   GlobalCompilationDatabase.cpp
+  IncludeScanner.cpp
   JSONExpr.cpp
   JSONRPCDispatcher.cpp
   Logger.cpp
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to