kbobyrev updated this revision to Diff 366832.
kbobyrev added a comment.

[clangd] IncludeCleaner: Mark used headers.

Follow-up on D105426 <https://reviews.llvm.org/D105426>.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105426/new/

https://reviews.llvm.org/D105426

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/Headers.cpp
  clang-tools-extra/clangd/Headers.h
  clang-tools-extra/clangd/IncludeCleaner.cpp
  clang-tools-extra/clangd/IncludeCleaner.h
  clang-tools-extra/clangd/ParsedAST.cpp
  clang-tools-extra/clangd/ParsedAST.h
  clang-tools-extra/clangd/unittests/CMakeLists.txt
  clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp

Index: clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
@@ -0,0 +1,136 @@
+//===--- IncludeCleanerTests.cpp --------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Annotations.h"
+#include "IncludeCleaner.h"
+#include "TestTU.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace clangd {
+namespace {
+
+TEST(IncludeCleaner, ReferencedLocations) {
+  struct TestCase {
+    std::string HeaderCode;
+    std::string MainCode;
+  };
+  TestCase Cases[] = {
+      // DeclRefExpr
+      {
+          "int ^x();",
+          "int y = x();",
+      },
+      // RecordDecl
+      {
+          "class ^X;",
+          "X *y;",
+      },
+      // TypedefType and UsingDecls
+      {
+          "using ^Integer = int;",
+          "Integer x;",
+      },
+      {
+          "namespace ns { struct ^X; struct ^X {}; }",
+          "using ns::X;",
+      },
+      {
+          "namespace ns { struct X; struct X {}; }",
+          "using namespace ns;",
+      },
+      {
+          "struct ^A {}; using B = A; using ^C = B;",
+          "C a;",
+      },
+      {
+          "typedef bool ^Y; template <typename T> struct ^X {};",
+          "X<Y> x;",
+      },
+      {
+          "struct Foo; struct ^Foo{}; typedef Foo ^Bar;",
+          "Bar b;",
+      },
+      // MemberExpr
+      {
+          "struct ^X{int ^a;}; X ^foo();",
+          "int y = foo().a;",
+      },
+      // Expr (type is traversed)
+      {
+          "class ^X{}; X ^foo();",
+          "auto bar() { return foo(); }",
+      },
+      // Redecls
+      {
+          "class ^X; class ^X{}; class ^X;",
+          "X *y;",
+      },
+      // Constructor
+      {
+          "struct ^X { ^X(int) {} int ^foo(); };",
+          "auto x = X(42); auto y = x.foo();",
+      },
+      // Static function
+      {
+          "struct ^X { static bool ^foo(); }; bool X::^foo() {}",
+          "auto b = X::foo();",
+      },
+      // TemplateRecordDecl
+      {
+          "template <typename> class ^X;",
+          "X<int> *y;",
+      },
+      // Type name not spelled out in code
+      {
+          "class ^X{}; X ^getX();",
+          "auto x = getX();",
+      },
+      // Enums
+      {
+          "enum ^Color { ^Red = 42, Green = 9000};",
+          "int MyColor = Red;",
+      },
+      {
+          "struct ^X { enum ^Language { ^CXX = 42, Python = 9000}; };",
+          "int Lang = X::CXX;",
+      },
+      {
+          // When a type is resolved via a using declaration, the
+          // UsingShadowDecl is not referenced in the AST.
+          // Compare to TypedefType, or DeclRefExpr::getFoundDecl().
+          //                                 ^
+          "namespace ns { class ^X; }; using ns::X;",
+          "X *y;",
+      }};
+  for (const TestCase &T : Cases) {
+    TestTU TU;
+    TU.Code = T.MainCode;
+    Annotations Header(T.HeaderCode);
+    TU.HeaderCode = Header.code().str();
+    auto AST = TU.build();
+
+    std::vector<Position> Points;
+    for (const auto &Loc : findReferencedLocations(AST)) {
+      if (AST.getSourceManager().getBufferName(Loc).endswith(
+              TU.HeaderFilename)) {
+        Points.push_back(offsetToPosition(
+            TU.HeaderCode, AST.getSourceManager().getFileOffset(Loc)));
+      }
+    }
+    llvm::sort(Points);
+
+    EXPECT_EQ(Points, Header.points()) << T.HeaderCode << "\n---\n"
+                                       << T.MainCode;
+  }
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/clangd/unittests/CMakeLists.txt
===================================================================
--- clang-tools-extra/clangd/unittests/CMakeLists.txt
+++ clang-tools-extra/clangd/unittests/CMakeLists.txt
@@ -58,6 +58,7 @@
   HeadersTests.cpp
   HeaderSourceSwitchTests.cpp
   HoverTests.cpp
+  IncludeCleanerTests.cpp
   IndexActionTests.cpp
   IndexTests.cpp
   InlayHintTests.cpp
Index: clang-tools-extra/clangd/ParsedAST.h
===================================================================
--- clang-tools-extra/clangd/ParsedAST.h
+++ clang-tools-extra/clangd/ParsedAST.h
@@ -116,6 +116,8 @@
     return Resolver.get();
   }
 
+  void computeUsedIncludes();
+
 private:
   ParsedAST(llvm::StringRef Version,
             std::shared_ptr<const PreambleData> Preamble,
Index: clang-tools-extra/clangd/ParsedAST.cpp
===================================================================
--- clang-tools-extra/clangd/ParsedAST.cpp
+++ clang-tools-extra/clangd/ParsedAST.cpp
@@ -18,6 +18,7 @@
 #include "Features.h"
 #include "Headers.h"
 #include "HeuristicResolver.h"
+#include "IncludeCleaner.h"
 #include "IncludeFixer.h"
 #include "Preamble.h"
 #include "SourceCode.h"
@@ -601,5 +602,25 @@
     return llvm::None;
   return llvm::StringRef(Preamble->Version);
 }
+
+void ParsedAST::computeUsedIncludes() {
+  const auto &SM = getSourceManager();
+
+  auto Refs = findReferencedLocations(*this);
+  auto ReferencedFileIDs = findReferencedFiles(Refs, getSourceManager());
+  std::vector<llvm::StringRef> ReferencedFilenames;
+  ReferencedFilenames.reserve(ReferencedFileIDs.size());
+  for (FileID FID : ReferencedFileIDs) {
+    const FileEntry *FE = SM.getFileEntryForID(FID);
+    if (!FE) {
+      elog("Missing FE for {0}", SM.getComposedLoc(FID, 0).printToString(SM));
+      continue;
+    }
+    ReferencedFilenames.push_back(SM.getFileEntryForID(FID)->getName());
+  }
+  Includes.markUsed(SM.getFileEntryForID(SM.getMainFileID())->getName(),
+                    ReferencedFilenames, directlyReferencedFiles);
+}
+
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/clangd/IncludeCleaner.h
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/IncludeCleaner.h
@@ -0,0 +1,68 @@
+//===--- IncludeCleaner.h - Unused/Missing Headers Analysis -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Include Cleaner is clangd functionality for providing diagnostics for misuse
+/// of transitive headers and unused includes. It is inspired by
+/// Include-What-You-Use tool (https://include-what-you-use.org/). Our goal is
+/// to provide useful warnings in most popular scenarios but not 1:1 exact
+/// feature compatibility.
+///
+/// FIXME(kirillbobyrev): Add support for IWYU pragmas.
+/// FIXME(kirillbobyrev): Add support for standard library headers.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INCLUDE_CLEANER_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INCLUDE_CLEANER_H
+
+#include "Headers.h"
+#include "ParsedAST.h"
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/DenseSet.h"
+
+namespace clang {
+namespace clangd {
+
+using ReferencedLocations = llvm::DenseSet<SourceLocation>;
+/// Finds locations of all symbols used in the main file.
+///
+/// Uses RecursiveASTVisitor to go through main file AST and computes all the
+/// locations used symbols are coming from. Returned locations may be macro
+/// expansions, and are not resolved to their spelling/expansion location. These
+/// locations are later used to determine which headers should be marked as
+/// "used" and "directly used".
+///
+/// We use this to compute unused headers, so we:
+///
+/// - cover the whole file in a single traversal for efficiency
+/// - don't attempt to describe where symbols were referenced from in
+///   ambiguous cases (e.g. implicitly used symbols, multiple declarations)
+/// - err on the side of reporting all possible locations
+ReferencedLocations findReferencedLocations(ParsedAST &AST);
+
+/// Retrieves IDs of all files containing SourceLocations from \p Locs. Those
+/// locations could be within macro expansions and are not resolved to their
+/// spelling locations.
+llvm::DenseSet<FileID> findReferencedFiles(const ReferencedLocations &Locs,
+                                           const SourceManager &SM);
+
+inline llvm::DenseMap<unsigned, bool>
+directlyReferencedFiles(const IncludeStructure::AbstractIncludeGraph &Graph,
+                        const llvm::DenseSet<unsigned> &Referenced,
+                        unsigned EntryPoint) {
+  llvm::DenseMap<unsigned, bool> Result;
+  for (unsigned Inclusion : Graph.lookup(EntryPoint))
+    Result.try_emplace(Inclusion, Referenced.contains(Inclusion));
+  return Result;
+}
+
+} // namespace clangd
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INCLUDE_CLEANER_H
Index: clang-tools-extra/clangd/IncludeCleaner.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/IncludeCleaner.cpp
@@ -0,0 +1,160 @@
+//===--- IncludeCleaner.cpp - Unused/Missing Headers Analysis ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "IncludeCleaner.h"
+#include "support/Logger.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/Basic/SourceLocation.h"
+
+namespace clang {
+namespace clangd {
+namespace {
+
+/// Crawler traverses the AST and feeds in the locations of (sometimes
+/// implicitly) used symbols into \p Result.
+class ReferencedLocationCrawler
+    : public RecursiveASTVisitor<ReferencedLocationCrawler> {
+public:
+  ReferencedLocationCrawler(ReferencedLocations &Result) : Result(Result) {}
+
+  bool VisitDeclRefExpr(DeclRefExpr *DRE) {
+    add(DRE->getDecl());
+    add(DRE->getFoundDecl());
+    return true;
+  }
+
+  bool VisitMemberExpr(MemberExpr *ME) {
+    add(ME->getMemberDecl());
+    add(ME->getFoundDecl().getDecl());
+    return true;
+  }
+
+  bool VisitTagType(TagType *TT) {
+    add(TT->getDecl());
+    return true;
+  }
+
+  bool VisitCXXConstructExpr(CXXConstructExpr *CCE) {
+    add(CCE->getConstructor());
+    return true;
+  }
+
+  bool VisitTemplateSpecializationType(TemplateSpecializationType *TST) {
+    if (isNew(TST)) {
+      add(TST->getTemplateName().getAsTemplateDecl()); // Primary template.
+      add(TST->getAsCXXRecordDecl());                  // Specialization
+    }
+    return true;
+  }
+
+  bool VisitTypedefType(TypedefType *TT) {
+    add(TT->getDecl());
+    return true;
+  }
+
+  // Consider types of any subexpression used, even if the type is not named.
+  // This is helpful in getFoo().bar(), where Foo must be complete.
+  // FIXME(kirillbobyrev): Should we tweak this? It may not be desirable to
+  // consider types "used" when they are not directly spelled in code.
+  bool VisitExpr(Expr *E) {
+    TraverseType(E->getType());
+    return true;
+  }
+
+  bool TraverseType(QualType T) {
+    if (isNew(T.getTypePtrOrNull())) { // don't care about quals
+      Base::TraverseType(T);
+    }
+    return true;
+  }
+
+  bool VisitUsingDecl(UsingDecl *D) {
+    for (const auto *Shadow : D->shadows()) {
+      add(Shadow->getTargetDecl());
+    }
+    return true;
+  }
+
+private:
+  using Base = RecursiveASTVisitor<ReferencedLocationCrawler>;
+
+  void add(const Decl *D) {
+    if (!D || !isNew(D->getCanonicalDecl())) {
+      return;
+    }
+    for (const Decl *Redecl : D->redecls()) {
+      Result.insert(Redecl->getLocation());
+    }
+  }
+
+  bool isNew(const void *P) { return P && Visited.insert(P).second; }
+
+  ReferencedLocations &Result;
+  llvm::DenseSet<const void *> Visited;
+};
+
+// Given a set of referenced FileIDs, determines all the potentially-referenced
+// files and macros by traversing expansion/spelling locations of macro IDs.
+// This is used to map the referenced SourceLocations onto real files.
+struct ReferencedFiles {
+  ReferencedFiles(const SourceManager &SM) : SM(SM) {}
+  llvm::DenseSet<FileID> Files;
+  llvm::DenseSet<FileID> Macros;
+  const SourceManager &SM;
+
+  void add(SourceLocation Loc) { add(SM.getFileID(Loc), Loc); }
+
+  void add(FileID FID, SourceLocation Loc) {
+    if (FID.isInvalid())
+      return;
+    assert(SM.isInFileID(Loc, FID));
+    if (Loc.isFileID()) {
+      Files.insert(FID);
+      return;
+    }
+    // Don't process the same macro FID twice.
+    if (!Macros.insert(FID).second)
+      return;
+    const auto &Exp = SM.getSLocEntry(FID).getExpansion();
+    add(Exp.getSpellingLoc());
+    add(Exp.getExpansionLocStart());
+    add(Exp.getExpansionLocEnd());
+  }
+};
+
+} // namespace
+
+ReferencedLocations findReferencedLocations(ParsedAST &AST) {
+  ReferencedLocations Result;
+  ReferencedLocationCrawler Crawler(Result);
+  Crawler.TraverseAST(AST.getASTContext());
+  // FIXME(kirillbobyrev): Handle macros.
+  return Result;
+}
+
+llvm::DenseSet<FileID>
+findReferencedFiles(const llvm::DenseSet<SourceLocation> &Locs,
+                    const SourceManager &SM) {
+  std::vector<SourceLocation> Sorted{Locs.begin(), Locs.end()};
+  // Group by FileID.
+  llvm::sort(Sorted);
+  ReferencedFiles Result(SM);
+  for (auto It = Sorted.begin(); It < Sorted.end();) {
+    FileID FID = SM.getFileID(*It);
+    Result.add(FID, *It);
+    // Cheaply skip over all the other locations from the same FileID.
+    // This avoids lots of redundant Loc->File lookups for the same file.
+    do
+      ++It;
+    while (It != Sorted.end() && SM.isInFileID(*It, FID));
+  }
+  return std::move(Result.Files);
+}
+
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/clangd/Headers.h
===================================================================
--- clang-tools-extra/clangd/Headers.h
+++ clang-tools-extra/clangd/Headers.h
@@ -58,6 +58,7 @@
   unsigned HashOffset = 0; // Byte offset from start of file to #.
   int HashLine = 0;        // Line number containing the directive, 0-indexed.
   SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User;
+  bool Used = false; // Contains symbols used in the main file.
 };
 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Inclusion &);
 bool operator==(const Inclusion &LHS, const Inclusion &RHS);
@@ -129,6 +130,22 @@
                      llvm::StringRef IncludedName,
                      llvm::StringRef IncludedRealName);
 
+  // Classifying the main-file includes as "used" or "unused" is subtle
+  // (consider transitive includes), so we inject the algorithm.
+
+  // Maps including files (from) to included files (to).
+  using AbstractIncludeGraph = llvm::DenseMap<unsigned, SmallVector<unsigned>>;
+  // Decides usage for each file included by EntryPoint based on the set of
+  // files that contain some referenced symbol.
+  using UsedFunc = llvm::DenseMap<unsigned, bool>(
+      const AbstractIncludeGraph &, llvm::DenseSet<unsigned> Referenced,
+      unsigned EntryPoint);
+  // Produce decisions for all files included from \p EntryPoint (usually the
+  // main file).
+  void markUsed(llvm::StringRef EntryPoint,
+                llvm::ArrayRef<StringRef> ReferencedFiles,
+                llvm::function_ref<UsedFunc>);
+
 private:
   // Identifying files in a way that persists from preamble build to subsequent
   // builds is surprisingly hard. FileID is unavailable in InclusionDirective(),
Index: clang-tools-extra/clangd/Headers.cpp
===================================================================
--- clang-tools-extra/clangd/Headers.cpp
+++ clang-tools-extra/clangd/Headers.cpp
@@ -204,6 +204,47 @@
   return Result;
 }
 
+void IncludeStructure::markUsed(llvm::StringRef EntryPoint,
+                                llvm::ArrayRef<StringRef> ReferencedFiles,
+                                llvm::function_ref<UsedFunc> Algorithm) {
+  auto Root = NameToIndex.find(EntryPoint);
+  if (Root == NameToIndex.end()) {
+    elog("IncludeCleaner: EntryPoint {0} not found in include graph",
+         EntryPoint);
+    return;
+  }
+
+  llvm::DenseSet<unsigned> Referenced;
+  Referenced.reserve(ReferencedFiles.size());
+  for (llvm::StringRef RefName : ReferencedFiles) {
+    dlog("{0} is REFERENCED", RefName);
+    auto It = NameToIndex.find(RefName);
+    if (It != NameToIndex.end())
+      Referenced.insert(It->second);
+  }
+
+  auto Decisions =
+      Algorithm(IncludeChildren, std::move(Referenced), Root->second);
+  auto RootChildren = IncludeChildren.find(Root->second);
+  assert(RootChildren != IncludeChildren.end());
+  llvm::DenseMap</*RealPath*/ StringRef, /*Index*/ unsigned> IncludeIndex;
+  for (const auto Index : RootChildren->second) {
+    if (!RealPathNames[Index].empty())
+      IncludeIndex[RealPathNames[Index]] = Index;
+  }
+  for (auto &MFI : MainFileIncludes) {
+    // FIXME: Skip includes that are not self-contained.
+    auto It = IncludeIndex.find(MFI.Resolved);
+    if (It != IncludeIndex.end()) {
+      auto DIt = Decisions.find(It->second);
+      if (DIt != Decisions.end()) {
+        MFI.Used = DIt->second;
+        dlog("{0} is {1}", MFI.Written, MFI.Used ? "USED" : "UNUSED");
+      }
+    }
+  }
+}
+
 void IncludeInserter::addExisting(const Inclusion &Inc) {
   IncludedHeaders.insert(Inc.Written);
   if (!Inc.Resolved.empty())
Index: clang-tools-extra/clangd/CMakeLists.txt
===================================================================
--- clang-tools-extra/clangd/CMakeLists.txt
+++ clang-tools-extra/clangd/CMakeLists.txt
@@ -83,6 +83,7 @@
   HeaderSourceSwitch.cpp
   HeuristicResolver.cpp
   Hover.cpp
+  IncludeCleaner.cpp
   IncludeFixer.cpp
   InlayHints.cpp
   JSONTransport.cpp
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to