cameron314 updated this revision to Diff 115104.
cameron314 edited the summary of this revision.
cameron314 added a comment.

Alright, I've changed the patch so that the preamble takes into account the BOM 
presence and is invalidated when it changes. This automatically fixes all 
clients of `PrecompiledPreamble`, and ensures that all `SourceLocation`s are 
always consistent when using a PCH generated from the preamble.

I think this should do the trick!


https://reviews.llvm.org/D37491

Files:
  include/clang/Frontend/PrecompiledPreamble.h
  include/clang/Lex/Lexer.h
  include/clang/Lex/PreprocessorOptions.h
  lib/Frontend/FrontendActions.cpp
  lib/Frontend/PrecompiledPreamble.cpp
  lib/Lex/Lexer.cpp
  lib/Lex/Preprocessor.cpp
  unittests/Frontend/PCHPreambleTest.cpp

Index: unittests/Frontend/PCHPreambleTest.cpp
===================================================================
--- unittests/Frontend/PCHPreambleTest.cpp
+++ unittests/Frontend/PCHPreambleTest.cpp
@@ -153,4 +153,48 @@
   ASSERT_EQ(initialCounts[2], GetFileReadCount(Header2));
 }
 
+TEST_F(PCHPreambleTest, ParseWithBom) {
+  std::string Header = "//./header.h";
+  std::string Main = "//./main.cpp";
+  AddFile(Header, "int random() { return 4; }");
+  AddFile(Main,
+    "\xef\xbb\xbf"
+    "#include \"//./header.h\"\n"
+    "int main() { return random() -2; }");
+
+  std::unique_ptr<ASTUnit> AST(ParseAST(Main));
+  ASSERT_TRUE(AST.get());
+  ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred());
+
+  unsigned HeaderReadCount = GetFileReadCount(Header);
+
+  ASSERT_TRUE(ReparseAST(AST));
+  ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred());
+  
+  // Check preamble PCH was really reused
+  ASSERT_EQ(HeaderReadCount, GetFileReadCount(Header));
+
+  // Remove BOM
+  RemapFile(Main,
+    "#include \"//./header.h\"\n"
+    "int main() { return random() -2; }");
+
+  ASSERT_TRUE(ReparseAST(AST));
+  ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred());
+
+  ASSERT_LE(HeaderReadCount, GetFileReadCount(Header));
+  HeaderReadCount = GetFileReadCount(Header);
+
+  // Add BOM back
+  RemapFile(Main,
+    "\xef\xbb\xbf"
+    "#include \"//./header.h\"\n"
+    "int main() { return random() -2; }");
+
+  ASSERT_TRUE(ReparseAST(AST));
+  ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred());
+
+  ASSERT_LE(HeaderReadCount, GetFileReadCount(Header));
+}
+
 } // anonymous namespace
Index: lib/Lex/Preprocessor.cpp
===================================================================
--- lib/Lex/Preprocessor.cpp
+++ lib/Lex/Preprocessor.cpp
@@ -516,9 +516,9 @@
     // If we've been asked to skip bytes in the main file (e.g., as part of a
     // precompiled preamble), do so now.
     if (SkipMainFilePreamble.first > 0)
-      CurLexer->SkipBytes(SkipMainFilePreamble.first, 
-                          SkipMainFilePreamble.second);
-    
+      CurLexer->SetByteOffset(SkipMainFilePreamble.first,
+                              SkipMainFilePreamble.second);
+
     // Tell the header info that the main file was entered.  If the file is later
     // #imported, it won't be re-entered.
     if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
Index: lib/Lex/Lexer.cpp
===================================================================
--- lib/Lex/Lexer.cpp
+++ lib/Lex/Lexer.cpp
@@ -552,9 +552,9 @@
 
 } // end anonymous namespace
 
-std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer,
-                                                 const LangOptions &LangOpts,
-                                                 unsigned MaxLines) {
+PreambleBounds Lexer::ComputePreamble(StringRef Buffer,
+                                      const LangOptions &LangOpts,
+                                      unsigned MaxLines) {
   // Create a lexer starting at the beginning of the file. Note that we use a
   // "fake" file source location at offset 1 so that the lexer will track our
   // position within the file.
@@ -688,7 +688,8 @@
   else
     End = TheTok.getLocation();
 
-  return std::make_pair(End.getRawEncoding() - StartLoc.getRawEncoding(),
+  return PreambleBounds(StartLoc.getRawEncoding() - FileLoc.getRawEncoding(),
+                        End.getRawEncoding() - StartLoc.getRawEncoding(),
                         TheTok.isAtStartOfLine());
 }
 
@@ -1394,9 +1395,9 @@
 // Helper methods for lexing.
 //===----------------------------------------------------------------------===//
 
-/// \brief Routine that indiscriminately skips bytes in the source file.
-void Lexer::SkipBytes(unsigned Bytes, bool StartOfLine) {
-  BufferPtr += Bytes;
+/// \brief Routine that indiscriminately sets the offset into the source file.
+void Lexer::SetByteOffset(unsigned Offset, bool StartOfLine) {
+  BufferPtr = BufferStart + Offset;
   if (BufferPtr > BufferEnd)
     BufferPtr = BufferEnd;
   // FIXME: What exactly does the StartOfLine bit mean?  There are two
Index: lib/Frontend/PrecompiledPreamble.cpp
===================================================================
--- lib/Frontend/PrecompiledPreamble.cpp
+++ lib/Frontend/PrecompiledPreamble.cpp
@@ -195,8 +195,7 @@
 PreambleBounds clang::ComputePreambleBounds(const LangOptions &LangOpts,
                                             llvm::MemoryBuffer *Buffer,
                                             unsigned MaxLines) {
-  auto Pre = Lexer::ComputePreamble(Buffer->getBuffer(), LangOpts, MaxLines);
-  return PreambleBounds(Pre.first, Pre.second);
+  return Lexer::ComputePreamble(Buffer->getBuffer(), LangOpts, MaxLines);
 }
 
 llvm::ErrorOr<PrecompiledPreamble> PrecompiledPreamble::Build(
@@ -224,9 +223,9 @@
 
   // Save the preamble text for later; we'll need to compare against it for
   // subsequent reparses.
-  std::vector<char> PreambleBytes(MainFileBuffer->getBufferStart(),
-                                  MainFileBuffer->getBufferStart() +
-                                      Bounds.Size);
+  auto PreambleStart = MainFileBuffer->getBufferStart() + Bounds.StartOffset;
+  std::vector<char> PreambleBytes(PreambleStart,
+                                  PreambleStart + Bounds.Size);
   bool PreambleEndsAtStartOfLine = Bounds.PreambleEndsAtStartOfLine;
 
   // Tell the compiler invocation to generate a temporary precompiled header.
@@ -289,8 +288,9 @@
 
   // Remap the main source file to the preamble buffer.
   StringRef MainFilePath = FrontendOpts.Inputs[0].getFile();
+  auto EndOffset = Bounds.StartOffset + Bounds.Size;
   auto PreambleInputBuffer = llvm::MemoryBuffer::getMemBufferCopy(
-      MainFileBuffer->getBuffer().slice(0, Bounds.Size), MainFilePath);
+      MainFileBuffer->getBuffer().slice(0, EndOffset), MainFilePath);
   if (PreprocessorOpts.RetainRemappedFileBuffers) {
     // MainFileBuffer will be deleted by unique_ptr after leaving the method.
     PreprocessorOpts.addRemappedFile(MainFilePath, PreambleInputBuffer.get());
@@ -338,20 +338,20 @@
 
   return PrecompiledPreamble(
       std::move(*PreamblePCHFile), std::move(PreambleBytes),
-      PreambleEndsAtStartOfLine, std::move(FilesInPreamble));
+      Bounds, std::move(FilesInPreamble));
 }
 
 PreambleBounds PrecompiledPreamble::getBounds() const {
-  return PreambleBounds(PreambleBytes.size(), PreambleEndsAtStartOfLine);
+  return BuildBounds;
 }
 
 bool PrecompiledPreamble::CanReuse(const CompilerInvocation &Invocation,
                                    const llvm::MemoryBuffer *MainFileBuffer,
                                    PreambleBounds Bounds,
                                    vfs::FileSystem *VFS) const {
 
   assert(
-      Bounds.Size <= MainFileBuffer->getBufferSize() &&
+      Bounds.StartOffset + Bounds.Size <= MainFileBuffer->getBufferSize() &&
       "Buffer is too large. Bounds were calculated from a different buffer?");
 
   auto PreambleInvocation = std::make_shared<CompilerInvocation>(Invocation);
@@ -366,8 +366,11 @@
   // the main-file buffer within the precompiled preamble to fit the
   // new main file.
   if (PreambleBytes.size() != Bounds.Size ||
-      PreambleEndsAtStartOfLine != Bounds.PreambleEndsAtStartOfLine ||
-      memcmp(PreambleBytes.data(), MainFileBuffer->getBufferStart(),
+      BuildBounds.StartOffset != Bounds.StartOffset ||
+      BuildBounds.PreambleEndsAtStartOfLine !=
+          Bounds.PreambleEndsAtStartOfLine ||
+      memcmp(PreambleBytes.data(),
+             MainFileBuffer->getBufferStart() + Bounds.StartOffset,
              Bounds.Size) != 0)
     return false;
   // The preamble has not changed. We may be able to re-use the precompiled
@@ -430,8 +433,10 @@
   auto &PreprocessorOpts = CI.getPreprocessorOpts();
 
   // Configure ImpicitPCHInclude.
-  PreprocessorOpts.PrecompiledPreambleBytes.first = PreambleBytes.size();
-  PreprocessorOpts.PrecompiledPreambleBytes.second = PreambleEndsAtStartOfLine;
+  PreprocessorOpts.PrecompiledPreambleBytes.first = BuildBounds.StartOffset +
+      BuildBounds.Size;
+  PreprocessorOpts.PrecompiledPreambleBytes.second =
+      BuildBounds.PreambleEndsAtStartOfLine;
   PreprocessorOpts.ImplicitPCHInclude = PCHFile.getFilePath();
   PreprocessorOpts.DisablePCHValidation = true;
 
@@ -442,11 +447,11 @@
 
 PrecompiledPreamble::PrecompiledPreamble(
     TempPCHFile PCHFile, std::vector<char> PreambleBytes,
-    bool PreambleEndsAtStartOfLine,
+    PreambleBounds BuildBounds,
     llvm::StringMap<PreambleFileHash> FilesInPreamble)
     : PCHFile(std::move(PCHFile)), FilesInPreamble(FilesInPreamble),
       PreambleBytes(std::move(PreambleBytes)),
-      PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {}
+      BuildBounds(BuildBounds) {}
 
 llvm::ErrorOr<PrecompiledPreamble::TempPCHFile>
 PrecompiledPreamble::TempPCHFile::CreateNewPreamblePCHFile() {
Index: lib/Frontend/FrontendActions.cpp
===================================================================
--- lib/Frontend/FrontendActions.cpp
+++ lib/Frontend/FrontendActions.cpp
@@ -590,8 +590,9 @@
   CompilerInstance &CI = getCompilerInstance();
   auto Buffer = CI.getFileManager().getBufferForFile(getCurrentFile());
   if (Buffer) {
-    unsigned Preamble =
-        Lexer::ComputePreamble((*Buffer)->getBuffer(), CI.getLangOpts()).first;
+    PreambleBounds Bounds =
+        Lexer::ComputePreamble((*Buffer)->getBuffer(), CI.getLangOpts());
+    unsigned Preamble = Bounds.StartOffset + Bounds.Size;
     llvm::outs().write((*Buffer)->getBufferStart(), Preamble);
   }
 }
Index: include/clang/Lex/PreprocessorOptions.h
===================================================================
--- include/clang/Lex/PreprocessorOptions.h
+++ include/clang/Lex/PreprocessorOptions.h
@@ -160,7 +160,7 @@
                           DisablePCHValidation(false),
                           AllowPCHWithCompilerErrors(false),
                           DumpDeserializedPCHDecls(false),
-                          PrecompiledPreambleBytes(0, true),
+                          PrecompiledPreambleBytes(0, false),
                           GeneratePreamble(false),
                           RemappedFilesKeepOriginalName(true),
                           RetainRemappedFileBuffers(false),
@@ -195,7 +195,7 @@
     LexEditorPlaceholders = true;
     RetainRemappedFileBuffers = true;
     PrecompiledPreambleBytes.first = 0;
-    PrecompiledPreambleBytes.second = 0;
+    PrecompiledPreambleBytes.second = false;
   }
 };
 
Index: include/clang/Lex/Lexer.h
===================================================================
--- include/clang/Lex/Lexer.h
+++ include/clang/Lex/Lexer.h
@@ -39,6 +39,26 @@
   CMK_Perforce
 };
 
+/// Describes the bounds (start, size) of the preamble and a flag required by
+/// PreprocessorOptions::PrecompiledPreambleBytes.
+/// The preamble does not include the BOM, if any.
+struct PreambleBounds {
+  PreambleBounds(unsigned Offset, unsigned Size, bool PreambleEndsAtStartOfLine)
+    : StartOffset(Offset), Size(Size),
+      PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {}
+
+  /// \brief Start offset of the preamble, in bytes. The is always zero unless
+  /// a BOM is present at the start of the file.
+  unsigned StartOffset;
+  /// \brief Size of the preamble in bytes.
+  unsigned Size;
+  /// \brief Whether the preamble ends at the start of a new line.
+  ///
+  /// Used to inform the lexer as to whether it's starting at the beginning of
+  /// a line after skipping the preamble.
+  bool PreambleEndsAtStartOfLine;
+};
+
 /// Lexer - This provides a simple interface that turns a text buffer into a
 /// stream of tokens.  This provides no support for file reading or buffering,
 /// or buffering/seeking of tokens, only forward lexing is supported.  It relies
@@ -442,12 +462,12 @@
   /// \param MaxLines If non-zero, restrict the length of the preamble
   /// to fewer than this number of lines.
   ///
-  /// \returns The offset into the file where the preamble ends and the rest
-  /// of the file begins along with a boolean value indicating whether 
-  /// the preamble ends at the beginning of a new line.
-  static std::pair<unsigned, bool> ComputePreamble(StringRef Buffer,
-                                                   const LangOptions &LangOpts,
-                                                   unsigned MaxLines = 0);
+  /// \returns The offset into the file where the preamble starts, its size,
+  /// and a boolean value indicating whether the preamble ends at the
+  /// beginning of a new line.
+  static PreambleBounds ComputePreamble(StringRef Buffer,
+                                        const LangOptions &LangOpts,
+                                        unsigned MaxLines = 0);
 
   /// \brief Checks that the given token is the first token that occurs after
   /// the given location (this excludes comments and whitespace). Returns the
@@ -618,7 +638,7 @@
   //===--------------------------------------------------------------------===//
   // Other lexer functions.
 
-  void SkipBytes(unsigned Bytes, bool StartOfLine);
+  void SetByteOffset(unsigned Offset, bool StartOfLine);
 
   void PropagateLineStartLeadingSpaceInfo(Token &Result);
 
Index: include/clang/Frontend/PrecompiledPreamble.h
===================================================================
--- include/clang/Frontend/PrecompiledPreamble.h
+++ include/clang/Frontend/PrecompiledPreamble.h
@@ -36,21 +36,6 @@
 class DeclGroupRef;
 class PCHContainerOperations;
 
-/// A size of the preamble and a flag required by
-/// PreprocessorOptions::PrecompiledPreambleBytes.
-struct PreambleBounds {
-  PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)
-      : Size(Size), PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {}
-
-  /// \brief Size of the preamble in bytes.
-  unsigned Size;
-  /// \brief Whether the preamble ends at the start of a new line.
-  ///
-  /// Used to inform the lexer as to whether it's starting at the beginning of
-  /// a line after skipping the preamble.
-  bool PreambleEndsAtStartOfLine;
-};
-
 /// \brief Runs lexer to compute suggested preamble bounds.
 PreambleBounds ComputePreambleBounds(const LangOptions &LangOpts,
                                      llvm::MemoryBuffer *Buffer,
@@ -113,7 +98,7 @@
 
 private:
   PrecompiledPreamble(TempPCHFile PCHFile, std::vector<char> PreambleBytes,
-                      bool PreambleEndsAtStartOfLine,
+                      PreambleBounds BuildBounds,
                       llvm::StringMap<PreambleFileHash> FilesInPreamble);
 
   /// A temp file that would be deleted on destructor call. If destructor is not
@@ -195,8 +180,8 @@
   /// contains first PreambleBounds::Size bytes. Used to compare if the relevant
   /// part of the file has not changed, so that preamble can be reused.
   std::vector<char> PreambleBytes;
-  /// See PreambleBounds::PreambleEndsAtStartOfLine
-  bool PreambleEndsAtStartOfLine;
+  /// The preamble bounds used during preamble construction.
+  PreambleBounds BuildBounds;
 };
 
 /// A set of callbacks to gather useful information while building a preamble.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to