This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG0d9b91524ea4: [Preprocessor] Reduce the memory overhead of 
`#define` directives (authored by arphaman).
Herald added a project: clang.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D117348/new/

https://reviews.llvm.org/D117348

Files:
  clang/include/clang/Lex/MacroInfo.h
  clang/lib/Lex/MacroInfo.cpp
  clang/lib/Lex/PPDirectives.cpp
  clang/lib/Serialization/ASTReader.cpp
  clang/lib/Serialization/ASTWriter.cpp
  clang/unittests/Lex/CMakeLists.txt
  clang/unittests/Lex/PPMemoryAllocationsTest.cpp

Index: clang/unittests/Lex/PPMemoryAllocationsTest.cpp
===================================================================
--- /dev/null
+++ clang/unittests/Lex/PPMemoryAllocationsTest.cpp
@@ -0,0 +1,97 @@
+//===- unittests/Lex/PPMemoryAllocationsTest.cpp - ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--------------------------------------------------------------===//
+
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/TargetOptions.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/HeaderSearchOptions.h"
+#include "clang/Lex/ModuleLoader.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/PreprocessorOptions.h"
+#include "gtest/gtest.h"
+
+using namespace clang;
+
+namespace {
+
+class PPMemoryAllocationsTest : public ::testing::Test {
+protected:
+  PPMemoryAllocationsTest()
+      : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()),
+        Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
+        SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) {
+    TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
+    Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);
+  }
+
+  FileSystemOptions FileMgrOpts;
+  FileManager FileMgr;
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
+  DiagnosticsEngine Diags;
+  SourceManager SourceMgr;
+  LangOptions LangOpts;
+  std::shared_ptr<TargetOptions> TargetOpts;
+  IntrusiveRefCntPtr<TargetInfo> Target;
+};
+
+TEST_F(PPMemoryAllocationsTest, PPMacroDefinesAllocations) {
+  std::string Source;
+  size_t NumMacros = 1000000;
+  {
+    llvm::raw_string_ostream SourceOS(Source);
+
+    // Create a combination of 1 or 3 token macros.
+    for (size_t I = 0; I < NumMacros; ++I) {
+      SourceOS << "#define MACRO_ID_" << I << " ";
+      if ((I % 2) == 0)
+        SourceOS << "(" << I << ")";
+      else
+        SourceOS << I;
+      SourceOS << "\n";
+    }
+  }
+
+  std::unique_ptr<llvm::MemoryBuffer> Buf =
+      llvm::MemoryBuffer::getMemBuffer(Source);
+  SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf)));
+
+  TrivialModuleLoader ModLoader;
+  HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr,
+                          Diags, LangOpts, Target.get());
+  Preprocessor PP(std::make_shared<PreprocessorOptions>(), Diags, LangOpts,
+                  SourceMgr, HeaderInfo, ModLoader,
+                  /*IILookup =*/nullptr,
+                  /*OwnsHeaderSearch =*/false);
+  PP.Initialize(*Target);
+  PP.EnterMainSourceFile();
+
+  while (1) {
+    Token tok;
+    PP.Lex(tok);
+    if (tok.is(tok::eof))
+      break;
+  }
+
+  size_t NumAllocated = PP.getPreprocessorAllocator().getBytesAllocated();
+  float BytesPerDefine = float(NumAllocated) / float(NumMacros);
+  llvm::errs() << "Num preprocessor allocations for " << NumMacros
+               << " #define: " << NumAllocated << "\n";
+  llvm::errs() << "Bytes per #define: " << BytesPerDefine << "\n";
+  // On arm64-apple-macos, we get around 120 bytes per define.
+  // Assume a reasonable upper bound based on that number that we don't want
+  // to exceed when storing information about a macro #define with 1 or 3
+  // tokens.
+  EXPECT_LT(BytesPerDefine, 130.0f);
+}
+
+} // anonymous namespace
Index: clang/unittests/Lex/CMakeLists.txt
===================================================================
--- clang/unittests/Lex/CMakeLists.txt
+++ clang/unittests/Lex/CMakeLists.txt
@@ -9,6 +9,7 @@
   LexerTest.cpp
   PPCallbacksTest.cpp
   PPConditionalDirectiveRecordTest.cpp
+  PPMemoryAllocationsTest.cpp
   )
 
 clang_target_link_libraries(LexTests
Index: clang/lib/Serialization/ASTWriter.cpp
===================================================================
--- clang/lib/Serialization/ASTWriter.cpp
+++ clang/lib/Serialization/ASTWriter.cpp
@@ -2431,6 +2431,7 @@
     AddSourceLocation(MI->getDefinitionEndLoc(), Record);
     Record.push_back(MI->isUsed());
     Record.push_back(MI->isUsedForHeaderGuard());
+    Record.push_back(MI->getNumTokens());
     unsigned Code;
     if (MI->isObjectLike()) {
       Code = PP_MACRO_OBJECT_LIKE;
Index: clang/lib/Serialization/ASTReader.cpp
===================================================================
--- clang/lib/Serialization/ASTReader.cpp
+++ clang/lib/Serialization/ASTReader.cpp
@@ -1694,6 +1694,7 @@
   RecordData Record;
   SmallVector<IdentifierInfo*, 16> MacroParams;
   MacroInfo *Macro = nullptr;
+  llvm::MutableArrayRef<Token> MacroTokens;
 
   while (true) {
     // Advance to the next record, but if we get to the end of the block, don't
@@ -1748,7 +1749,8 @@
       MI->setDefinitionEndLoc(ReadSourceLocation(F, Record, NextIndex));
       MI->setIsUsed(Record[NextIndex++]);
       MI->setUsedForHeaderGuard(Record[NextIndex++]);
-
+      MacroTokens = MI->allocateTokens(Record[NextIndex++],
+                                       PP.getPreprocessorAllocator());
       if (RecType == PP_MACRO_FUNCTION_LIKE) {
         // Decode function-like macro info.
         bool isC99VarArgs = Record[NextIndex++];
@@ -1793,10 +1795,14 @@
       // If we see a TOKEN before a PP_MACRO_*, then the file is
       // erroneous, just pretend we didn't see this.
       if (!Macro) break;
+      if (MacroTokens.empty()) {
+        Error("unexpected number of macro tokens for a macro in AST file");
+        return Macro;
+      }
 
       unsigned Idx = 0;
-      Token Tok = ReadToken(F, Record, Idx);
-      Macro->AddTokenToBody(Tok);
+      MacroTokens[0] = ReadToken(F, Record, Idx);
+      MacroTokens = MacroTokens.drop_front();
       break;
     }
     }
Index: clang/lib/Lex/PPDirectives.cpp
===================================================================
--- clang/lib/Lex/PPDirectives.cpp
+++ clang/lib/Lex/PPDirectives.cpp
@@ -2711,12 +2711,14 @@
   if (!Tok.is(tok::eod))
     LastTok = Tok;
 
+  SmallVector<Token, 16> Tokens;
+
   // Read the rest of the macro body.
   if (MI->isObjectLike()) {
     // Object-like macros are very simple, just read their body.
     while (Tok.isNot(tok::eod)) {
       LastTok = Tok;
-      MI->AddTokenToBody(Tok);
+      Tokens.push_back(Tok);
       // Get the next token of the macro.
       LexUnexpandedToken(Tok);
     }
@@ -2731,7 +2733,7 @@
       LastTok = Tok;
 
       if (!Tok.isOneOf(tok::hash, tok::hashat, tok::hashhash)) {
-        MI->AddTokenToBody(Tok);
+        Tokens.push_back(Tok);
 
         if (VAOCtx.isVAOptToken(Tok)) {
           // If we're already within a VAOPT, emit an error.
@@ -2745,7 +2747,7 @@
             Diag(Tok, diag::err_pp_missing_lparen_in_vaopt_use);
             return nullptr;
           }
-          MI->AddTokenToBody(Tok);
+          Tokens.push_back(Tok);
           VAOCtx.sawVAOptFollowedByOpeningParens(Tok.getLocation());
           LexUnexpandedToken(Tok);
           if (Tok.is(tok::hashhash)) {
@@ -2756,10 +2758,10 @@
         } else if (VAOCtx.isInVAOpt()) {
           if (Tok.is(tok::r_paren)) {
             if (VAOCtx.sawClosingParen()) {
-              const unsigned NumTokens = MI->getNumTokens();
-              assert(NumTokens >= 3 && "Must have seen at least __VA_OPT__( "
-                                       "and a subsequent tok::r_paren");
-              if (MI->getReplacementToken(NumTokens - 2).is(tok::hashhash)) {
+              assert(Tokens.size() >= 3 &&
+                     "Must have seen at least __VA_OPT__( "
+                     "and a subsequent tok::r_paren");
+              if (Tokens[Tokens.size() - 2].is(tok::hashhash)) {
                 Diag(Tok, diag::err_vaopt_paste_at_end);
                 return nullptr;
               }
@@ -2778,7 +2780,7 @@
       // things.
       if (getLangOpts().TraditionalCPP) {
         Tok.setKind(tok::unknown);
-        MI->AddTokenToBody(Tok);
+        Tokens.push_back(Tok);
 
         // Get the next token of the macro.
         LexUnexpandedToken(Tok);
@@ -2794,17 +2796,16 @@
         LexUnexpandedToken(Tok);
 
         if (Tok.is(tok::eod)) {
-          MI->AddTokenToBody(LastTok);
+          Tokens.push_back(LastTok);
           break;
         }
 
-        unsigned NumTokens = MI->getNumTokens();
-        if (NumTokens && Tok.getIdentifierInfo() == Ident__VA_ARGS__ &&
-            MI->getReplacementToken(NumTokens-1).is(tok::comma))
+        if (!Tokens.empty() && Tok.getIdentifierInfo() == Ident__VA_ARGS__ &&
+            Tokens[Tokens.size() - 1].is(tok::comma))
           MI->setHasCommaPasting();
 
         // Things look ok, add the '##' token to the macro.
-        MI->AddTokenToBody(LastTok);
+        Tokens.push_back(LastTok);
         continue;
       }
 
@@ -2823,7 +2824,7 @@
         // confused.
         if (getLangOpts().AsmPreprocessor && Tok.isNot(tok::eod)) {
           LastTok.setKind(tok::unknown);
-          MI->AddTokenToBody(LastTok);
+          Tokens.push_back(LastTok);
           continue;
         } else {
           Diag(Tok, diag::err_pp_stringize_not_parameter)
@@ -2833,13 +2834,13 @@
       }
 
       // Things look ok, add the '#' and param name tokens to the macro.
-      MI->AddTokenToBody(LastTok);
+      Tokens.push_back(LastTok);
 
       // If the token following '#' is VAOPT, let the next iteration handle it
       // and check it for correctness, otherwise add the token and prime the
       // loop with the next one.
       if (!VAOCtx.isVAOptToken(Tok)) {
-        MI->AddTokenToBody(Tok);
+        Tokens.push_back(Tok);
         LastTok = Tok;
 
         // Get the next token of the macro.
@@ -2855,6 +2856,8 @@
     }
   }
   MI->setDefinitionEndLoc(LastTok.getLocation());
+
+  MI->setTokens(Tokens, BP);
   return MI;
 }
 /// HandleDefineDirective - Implements \#define.  This consumes the entire macro
@@ -3007,7 +3010,7 @@
     Tok.startToken();
     Tok.setKind(tok::kw__Static_assert);
     Tok.setIdentifierInfo(getIdentifierInfo("_Static_assert"));
-    MI->AddTokenToBody(Tok);
+    MI->setTokens({Tok}, BP);
     (void)appendDefMacroDirective(getIdentifierInfo("static_assert"), MI);
   }
 }
Index: clang/lib/Lex/MacroInfo.cpp
===================================================================
--- clang/lib/Lex/MacroInfo.cpp
+++ clang/lib/Lex/MacroInfo.cpp
@@ -28,6 +28,25 @@
 
 using namespace clang;
 
+namespace {
+
+// MacroInfo is expected to take 40 bytes on platforms with an 8 byte pointer
+// and 4 byte SourceLocation.
+template <int> class MacroInfoSizeChecker {
+public:
+  constexpr static bool AsExpected = true;
+};
+template <> class MacroInfoSizeChecker<8> {
+public:
+  constexpr static bool AsExpected =
+      sizeof(MacroInfo) == (32 + sizeof(SourceLocation) * 2);
+};
+
+static_assert(MacroInfoSizeChecker<sizeof(void *)>::AsExpected,
+              "Unexpected size of MacroInfo");
+
+} // end namespace
+
 MacroInfo::MacroInfo(SourceLocation DefLoc)
     : Location(DefLoc), IsDefinitionLengthCached(false), IsFunctionLike(false),
       IsC99Varargs(false), IsGNUVarargs(false), IsBuiltinMacro(false),
@@ -39,6 +58,7 @@
   assert(!IsDefinitionLengthCached);
   IsDefinitionLengthCached = true;
 
+  ArrayRef<Token> ReplacementTokens = tokens();
   if (ReplacementTokens.empty())
     return (DefinitionLength = 0);
 
@@ -76,7 +96,7 @@
   bool Lexically = !Syntactically;
 
   // Check # tokens in replacement, number of args, and various flags all match.
-  if (ReplacementTokens.size() != Other.ReplacementTokens.size() ||
+  if (getNumTokens() != Other.getNumTokens() ||
       getNumParams() != Other.getNumParams() ||
       isFunctionLike() != Other.isFunctionLike() ||
       isC99Varargs() != Other.isC99Varargs() ||
@@ -92,7 +112,7 @@
   }
 
   // Check all the tokens.
-  for (unsigned i = 0, e = ReplacementTokens.size(); i != e; ++i) {
+  for (unsigned i = 0; i != NumReplacementTokens; ++i) {
     const Token &A = ReplacementTokens[i];
     const Token &B = Other.ReplacementTokens[i];
     if (A.getKind() != B.getKind())
@@ -157,7 +177,7 @@
   }
 
   bool First = true;
-  for (const Token &Tok : ReplacementTokens) {
+  for (const Token &Tok : tokens()) {
     // Leading space is semantically meaningful in a macro definition,
     // so preserve it in the dump output.
     if (First || Tok.hasLeadingSpace())
Index: clang/include/clang/Lex/MacroInfo.h
===================================================================
--- clang/include/clang/Lex/MacroInfo.h
+++ clang/include/clang/Lex/MacroInfo.h
@@ -54,11 +54,14 @@
   /// macro, this includes the \c __VA_ARGS__ identifier on the list.
   IdentifierInfo **ParameterList = nullptr;
 
+  /// This is the list of tokens that the macro is defined to.
+  const Token *ReplacementTokens = nullptr;
+
   /// \see ParameterList
   unsigned NumParameters = 0;
 
-  /// This is the list of tokens that the macro is defined to.
-  SmallVector<Token, 8> ReplacementTokens;
+  /// \see ReplacementTokens
+  unsigned NumReplacementTokens = 0;
 
   /// Length in characters of the macro definition.
   mutable unsigned DefinitionLength;
@@ -230,26 +233,47 @@
   bool isWarnIfUnused() const { return IsWarnIfUnused; }
 
   /// Return the number of tokens that this macro expands to.
-  unsigned getNumTokens() const { return ReplacementTokens.size(); }
+  unsigned getNumTokens() const { return NumReplacementTokens; }
 
   const Token &getReplacementToken(unsigned Tok) const {
-    assert(Tok < ReplacementTokens.size() && "Invalid token #");
+    assert(Tok < NumReplacementTokens && "Invalid token #");
     return ReplacementTokens[Tok];
   }
 
-  using tokens_iterator = SmallVectorImpl<Token>::const_iterator;
+  using const_tokens_iterator = const Token *;
 
-  tokens_iterator tokens_begin() const { return ReplacementTokens.begin(); }
-  tokens_iterator tokens_end() const { return ReplacementTokens.end(); }
-  bool tokens_empty() const { return ReplacementTokens.empty(); }
-  ArrayRef<Token> tokens() const { return ReplacementTokens; }
+  const_tokens_iterator tokens_begin() const { return ReplacementTokens; }
+  const_tokens_iterator tokens_end() const {
+    return ReplacementTokens + NumReplacementTokens;
+  }
+  bool tokens_empty() const { return NumReplacementTokens == 0; }
+  ArrayRef<Token> tokens() const {
+    return llvm::makeArrayRef(ReplacementTokens, NumReplacementTokens);
+  }
 
-  /// Add the specified token to the replacement text for the macro.
-  void AddTokenToBody(const Token &Tok) {
+  llvm::MutableArrayRef<Token>
+  allocateTokens(unsigned NumTokens, llvm::BumpPtrAllocator &PPAllocator) {
+    assert(ReplacementTokens == nullptr && NumReplacementTokens == 0 &&
+           "Token list already allocated!");
+    NumReplacementTokens = NumTokens;
+    Token *NewReplacementTokens = PPAllocator.Allocate<Token>(NumTokens);
+    ReplacementTokens = NewReplacementTokens;
+    return llvm::makeMutableArrayRef(NewReplacementTokens, NumTokens);
+  }
+
+  void setTokens(ArrayRef<Token> Tokens, llvm::BumpPtrAllocator &PPAllocator) {
     assert(
         !IsDefinitionLengthCached &&
         "Changing replacement tokens after definition length got calculated");
-    ReplacementTokens.push_back(Tok);
+    assert(ReplacementTokens == nullptr && NumReplacementTokens == 0 &&
+           "Token list already set!");
+    if (Tokens.empty())
+      return;
+
+    NumReplacementTokens = Tokens.size();
+    Token *NewReplacementTokens = PPAllocator.Allocate<Token>(Tokens.size());
+    std::copy(Tokens.begin(), Tokens.end(), NewReplacementTokens);
+    ReplacementTokens = NewReplacementTokens;
   }
 
   /// Return true if this macro is enabled.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to