rsmith created this revision.
rsmith added a reviewer: aaron.ballman.
Herald added a subscriber: mgrang.
Herald added a project: All.
rsmith requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

If Clang runs out of source location address space, produce a more helpful 
diagnostic than "ran out of source locations" or "translation unit is too 
large". In addition to that diagnostic, also describe the current source 
location address space usage, listing the header files that are contributing 
most to that as well as how many times they were textually entered. The intent 
is to make it easier to determine if the problem is some kind of 
misconfiguration (for example, a header isn't properly include-guarded and gets 
textually entered a lot, or is entered in many AST files), problematic input 
(for example, a preprocessor metaprogram uses a huge amount of source location 
space), or a death by a thousand cuts due to the source program just plain 
being too large.

Also included is a debug pragma to produce the usage report, both to make this 
more readily testable and to provide visibility into source location address 
space usage when debugging clang.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D137751

Files:
  clang/include/clang/Basic/DiagnosticCommonKinds.td
  clang/include/clang/Basic/DiagnosticLexKinds.td
  clang/include/clang/Basic/SourceManager.h
  clang/lib/Basic/SourceManager.cpp
  clang/lib/Lex/Pragma.cpp
  clang/lib/Serialization/ASTReader.cpp
  clang/test/Misc/Inputs/include.h
  clang/test/Misc/sloc-usage.cpp

Index: clang/test/Misc/sloc-usage.cpp
===================================================================
--- /dev/null
+++ clang/test/Misc/sloc-usage.cpp
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+#include "Inputs/include.h"
+#include "Inputs/include.h"
+
+#define FOO(x) x + x
+int k = FOO(FOO(123));
+bool b = EQUALS(k, k);
+
+#pragma clang __debug sloc_usage // expected-remark {{address space usage}}
+// expected-note@* {{(0% of available space)}}
+// (this file)     expected-note-re@1 {{file entered 1 time using {{.*}}B of space plus 51B for macro expansions}}
+// (included file) expected-note-re@Inputs/include.h:1 {{file entered 2 times using {{.*}}B of space{{$}}}}
+// (builtins file) expected-note@* {{file entered}}
Index: clang/test/Misc/Inputs/include.h
===================================================================
--- clang/test/Misc/Inputs/include.h
+++ clang/test/Misc/Inputs/include.h
@@ -1,3 +1,8 @@
 #define EQUALS(a,b) a == b
 
+// It's important for sloc_usage.cpp that this file does not have proper
+// include guards.
+#ifndef FOO_DEFINED
+#define FOO_DEFINED
 int foo(int x) { return x; }
+#endif
Index: clang/lib/Serialization/ASTReader.cpp
===================================================================
--- clang/lib/Serialization/ASTReader.cpp
+++ clang/lib/Serialization/ASTReader.cpp
@@ -3401,9 +3401,14 @@
       std::tie(F.SLocEntryBaseID, F.SLocEntryBaseOffset) =
           SourceMgr.AllocateLoadedSLocEntries(F.LocalNumSLocEntries,
                                               SLocSpaceSize);
-      if (!F.SLocEntryBaseID)
+      if (!F.SLocEntryBaseID) {
+        if (!Diags.isDiagnosticInFlight()) {
+          Diags.Report(SourceLocation(), diag::remark_sloc_usage);
+          SourceMgr.noteSLocAddressSpaceUsage(Diags);
+        }
         return llvm::createStringError(std::errc::invalid_argument,
                                        "ran out of source locations");
+      }
       // Make our entry in the range map. BaseID is negative and growing, so
       // we invert it. Because we invert it, though, we need the other end of
       // the range.
Index: clang/lib/Lex/Pragma.cpp
===================================================================
--- clang/lib/Lex/Pragma.cpp
+++ clang/lib/Lex/Pragma.cpp
@@ -1043,7 +1043,7 @@
     Token Tok;
     PP.LexUnexpandedToken(Tok);
     if (Tok.isNot(tok::identifier)) {
-      PP.Diag(Tok, diag::warn_pragma_diagnostic_invalid);
+      PP.Diag(Tok, diag::warn_pragma_debug_missing_command);
       return;
     }
     IdentifierInfo *II = Tok.getIdentifierInfo();
@@ -1181,6 +1181,22 @@
         PP.Diag(Tok, diag::warn_pragma_debug_unexpected_command)
           << DumpII->getName();
       }
+    } else if (II->isStr("sloc_usage")) {
+      // An optional integer literal argument specifies the number of files to
+      // specifically report information about.
+      uint64_t MaxNotes = (uint64_t)-1;
+      Token ArgToken;
+      PP.Lex(ArgToken);
+      if (ArgToken.isNot(tok::eod)) {
+        if (ArgToken.isNot(tok::numeric_constant) ||
+            !PP.parseSimpleIntegerLiteral(ArgToken, MaxNotes)) {
+          PP.Diag(ArgToken, diag::warn_pragma_debug_unexpected_argument);
+        }
+      }
+
+      PP.Diag(Tok, diag::remark_sloc_usage);
+      PP.getSourceManager().noteSLocAddressSpaceUsage(PP.getDiagnostics(),
+                                                      MaxNotes);
     } else {
       PP.Diag(Tok, diag::warn_pragma_debug_unexpected_command)
         << II->getName();
Index: clang/lib/Basic/SourceManager.cpp
===================================================================
--- clang/lib/Basic/SourceManager.cpp
+++ clang/lib/Basic/SourceManager.cpp
@@ -615,6 +615,7 @@
   if (!(NextLocalOffset + FileSize + 1 > NextLocalOffset &&
         NextLocalOffset + FileSize + 1 <= CurrentLoadedOffset)) {
     Diag.Report(IncludePos, diag::err_include_too_large);
+    noteSLocAddressSpaceUsage(Diag);
     return FileID();
   }
   LocalSLocEntryTable.push_back(
@@ -671,6 +672,7 @@
     return SourceLocation::getMacroLoc(LoadedOffset);
   }
   LocalSLocEntryTable.push_back(SLocEntry::get(NextLocalOffset, Info));
+  // TODO: Produce a proper diagnostic for this case.
   assert(NextLocalOffset + Length + 1 > NextLocalOffset &&
          NextLocalOffset + Length + 1 <= CurrentLoadedOffset &&
          "Ran out of source locations!");
@@ -2230,6 +2232,90 @@
   }
 }
 
+void SourceManager::noteSLocAddressSpaceUsage(DiagnosticsEngine &Diag,
+                                              unsigned MaxNotes) const {
+  struct Info {
+    // A location where this file was entered.
+    SourceLocation Loc;
+    // Number of times this FileEntry was entered.
+    unsigned Inclusions = 0;
+    // Size usage from the file itself.
+    uint64_t DirectSize = 0;
+    // Total size usage from the file and its macro expansions.
+    uint64_t TotalSize = 0;
+  };
+  using UsageMap = llvm::DenseMap<const FileEntry*, Info>;
+
+  UsageMap Usage;
+  uint64_t CountedSize = 0;
+  for (int IDIndex = -(int)LoadedSLocEntryTable.size() - 1;
+       IDIndex < (int)LocalSLocEntryTable.size(); ++IDIndex) {
+    if (IDIndex == -1)
+      continue;
+    FileID ID = FileID::get(IDIndex);
+
+    // The +1 here is because getFileIDSize doesn't include the extra byte for
+    // the one-past-the-end location.
+    unsigned Size = getFileIDSize(ID) + 1;
+
+    // Find the file that used this address space, either directly or by
+    // macro expansion.
+    SourceLocation FileStart = getFileLoc(getComposedLoc(ID, 0));
+    FileID FileLocID = getFileID(FileStart);
+    const FileEntry *Entry = getFileEntryForID(FileLocID);
+
+    Info &EntryInfo = Usage[Entry];
+    if (EntryInfo.Loc.isInvalid())
+      EntryInfo.Loc = FileStart;
+    if (ID == FileLocID) {
+      ++EntryInfo.Inclusions;
+      EntryInfo.DirectSize += Size;
+    }
+    EntryInfo.TotalSize += Size;
+    CountedSize += Size;
+  }
+
+  // Sort the usage by size from largest to smallest.
+  std::vector<UsageMap::iterator> SortedUsage;
+  SortedUsage.reserve(Usage.size());
+  for (auto It = Usage.begin(); It != Usage.end(); ++It)
+    SortedUsage.push_back(It);
+  auto Cmp = [](UsageMap::iterator A, UsageMap::iterator B) {
+    return A->second.TotalSize > B->second.TotalSize;
+  };
+  auto SortedEnd = SortedUsage.end();
+  if (SortedUsage.size() > MaxNotes) {
+    SortedEnd = SortedUsage.begin() + MaxNotes;
+    std::nth_element(SortedUsage.begin(), SortedEnd, SortedUsage.end(), Cmp);
+  }
+  std::sort(SortedUsage.begin(), SortedEnd, Cmp);
+
+  // Produce note on sloc address space usage total.
+  uint64_t LocalUsage = NextLocalOffset;
+  uint64_t LoadedUsage = MaxLoadedOffset - CurrentLoadedOffset;
+  int UsagePercent = static_cast<int>(100.0 * double(LocalUsage + LoadedUsage) /
+                                      MaxLoadedOffset);
+  Diag.Report(SourceLocation(), diag::note_total_sloc_usage)
+    << LocalUsage << LoadedUsage << (LocalUsage + LoadedUsage) << UsagePercent;
+
+  // Produce notes on sloc address space usage for each file with a high usage.
+  uint64_t ReportedSize = 0;
+  for (UsageMap::iterator It :
+       llvm::make_range(SortedUsage.begin(), SortedEnd)) {
+    Info FileInfo = It->second;
+    Diag.Report(FileInfo.Loc, diag::note_file_sloc_usage)
+        << FileInfo.Inclusions << FileInfo.DirectSize
+        << (FileInfo.TotalSize - FileInfo.DirectSize);
+    ReportedSize += FileInfo.TotalSize;
+  }
+
+  // Describe any remaining usage not reported in the per-file usage.
+  if (ReportedSize != CountedSize) {
+    Diag.Report(SourceLocation(), diag::note_file_misc_sloc_usage)
+        << (SortedUsage.end() - SortedEnd) << CountedSize - ReportedSize;
+  }
+}
+
 ExternalSLocEntrySource::~ExternalSLocEntrySource() = default;
 
 /// Return the amount of memory used by memory buffers, breaking down
Index: clang/include/clang/Basic/SourceManager.h
===================================================================
--- clang/include/clang/Basic/SourceManager.h
+++ clang/include/clang/Basic/SourceManager.h
@@ -710,7 +710,7 @@
   /// not have been loaded, so that value would be unknown.
   SourceLocation::UIntTy CurrentLoadedOffset;
 
-  /// The highest possible offset is 2^32-1 (2^63-1 for 64-bit source
+  /// The highest possible offset is 2^31-1 (2^63-1 for 64-bit source
   /// locations), so CurrentLoadedOffset starts at 2^31 (2^63 resp.).
   static const SourceLocation::UIntTy MaxLoadedOffset =
       1ULL << (8 * sizeof(SourceLocation::UIntTy) - 1);
@@ -1691,6 +1691,10 @@
 
   void dump() const;
 
+  // Produce notes describing the current source location address space usage.
+  void noteSLocAddressSpaceUsage(DiagnosticsEngine &Diag,
+                                 unsigned MaxNotes = 50) const;
+
   /// Get the number of local SLocEntries we have.
   unsigned local_sloc_entry_size() const { return LocalSLocEntryTable.size(); }
 
Index: clang/include/clang/Basic/DiagnosticLexKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticLexKinds.td
+++ clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -648,10 +648,14 @@
    ExtWarn<"unknown warning group '%0', ignored">,
    InGroup<UnknownWarningOption>;
 // - #pragma __debug
+def warn_pragma_debug_missing_command : Warning<
+  "missing debug command">, InGroup<IgnoredPragmas>;
 def warn_pragma_debug_unexpected_command : Warning<
   "unexpected debug command '%0'">, InGroup<IgnoredPragmas>;
 def warn_pragma_debug_missing_argument : Warning<
   "missing argument to debug command '%0'">, InGroup<IgnoredPragmas>;
+def warn_pragma_debug_unexpected_argument : Warning<
+  "unexpected argument to debug command">, InGroup<IgnoredPragmas>;
 def warn_pragma_debug_unknown_module : Warning<
   "unknown module '%0'">, InGroup<IgnoredPragmas>;
 // #pragma module
Index: clang/include/clang/Basic/DiagnosticCommonKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -345,6 +345,17 @@
   "unable to rename temporary '%0' to output file '%1': '%2'">;
 def err_unable_to_make_temp : Error<
   "unable to make temporary file: %0">;
+def remark_sloc_usage : Remark<
+  "source manager location address space usage:">,
+  InGroup<DiagGroup<"sloc-usage">>, DefaultRemark, ShowInSystemHeader;
+def note_total_sloc_usage : Note<
+  "%0B in local locations, %1B in locations loaded from AST files, for a total "
+  "of %2B (%3%% of available space)">;
+def note_file_sloc_usage : Note<
+  "file entered %0 time%s0 using %1B of space"
+  "%plural{0:|: plus %2B for macro expansions}2">;
+def note_file_misc_sloc_usage : Note<
+  "%0 additional files entered using a total of %1B of space">;
 
 // Modules
 def err_module_format_unhandled : Error<
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D137751: ... Richard Smith - zygoloid via Phabricator via cfe-commits

Reply via email to