alexfh created this revision.
alexfh added a reviewer: krasimir.
Herald added a project: clang.

Before this patch clang-format crashed when trying to issue a diagnostic about 
an unsupported BOM. See the bug report here: 
https://bugs.llvm.org/show_bug.cgi?id=26032


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D60868

Files:
  clang/lib/Basic/SourceManager.cpp
  clang/lib/Format/Format.cpp
  clang/lib/Format/FormatTokenLexer.cpp
  clang/lib/Format/FormatTokenLexer.h
  clang/lib/Format/SortJavaScriptImports.cpp
  clang/lib/Format/TokenAnalyzer.cpp
  clang/lib/Format/TokenAnalyzer.h
  clang/test/Format/invalid-bom.cpp
  clang/tools/clang-format/ClangFormat.cpp

Index: clang/tools/clang-format/ClangFormat.cpp
===================================================================
--- clang/tools/clang-format/ClangFormat.cpp
+++ clang/tools/clang-format/ClangFormat.cpp
@@ -271,6 +271,7 @@
 
   if (SortIncludes.getNumOccurrences() != 0)
     FormatStyle->SortIncludes = SortIncludes;
+
   unsigned CursorPosition = Cursor;
   Replacements Replaces = sortIncludes(*FormatStyle, Code->getBuffer(), Ranges,
                                        AssumedFileName, &CursorPosition);
@@ -299,13 +300,13 @@
 
     outputReplacementsXML(Replaces);
     outs() << "</replacements>\n";
-  } else {
+  } else if (!Replaces.empty()) {
     IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
         new llvm::vfs::InMemoryFileSystem);
     FileManager Files(FileSystemOptions(), InMemoryFileSystem);
     DiagnosticsEngine Diagnostics(
         IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
-        new DiagnosticOptions);
+        new DiagnosticOptions, new IgnoringDiagConsumer);
     SourceManager Sources(Diagnostics, Files);
     FileID ID = createInMemoryFile(AssumedFileName, Code.get(), Sources, Files,
                                    InMemoryFileSystem.get());
@@ -326,6 +327,9 @@
       }
       Rewrite.getEditBuffer(ID).write(outs());
     }
+  } else if (!Inplace) { // Nothing changed, but we still need to dump the input
+                         // as is.
+    outs() << Code->getBuffer();
   }
   return false;
 }
Index: clang/test/Format/invalid-bom.cpp
===================================================================
--- /dev/null
+++ clang/test/Format/invalid-bom.cpp
@@ -0,0 +1,3 @@
+// REQUIRES: shell
+// RUN: echo -en "\xff\xfeinput with invalid BOM" | clang-format - >%t.out
+// RUN: grep "input with invalid BOM" %t.out
Index: clang/lib/Format/TokenAnalyzer.h
===================================================================
--- clang/lib/Format/TokenAnalyzer.h
+++ clang/lib/Format/TokenAnalyzer.h
@@ -35,19 +35,18 @@
 
 class Environment {
 public:
-  // This sets up an virtual file system with file \p FileName containing the
-  // fragment \p Code. Assumes that \p Code starts at \p FirstStartColumn,
-  // that the next lines of \p Code should start at \p NextStartColumn, and
-  // that \p Code should end at \p LastStartColumn if it ends in newline.
-  // See also the documentation of clang::format::internal::reformat.
-  Environment(StringRef Code, StringRef FileName,
-              ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn = 0,
-              unsigned NextStartColumn = 0, unsigned LastStartColumn = 0);
-
-  FileID getFileID() const { return ID; }
+  static std::unique_ptr<Environment> Create(StringRef Code, StringRef FileName,
+                                             ArrayRef<tooling::Range> Ranges,
+                                             unsigned FirstStartColumn = 0,
+                                             unsigned NextStartColumn = 0,
+                                             unsigned LastStartColumn = 0);
 
   const SourceManager &getSourceManager() const { return SM; }
 
+  StringRef getBufferData() const { return BufferData; }
+  SourceLocation getStartLoc() const { return SM.getLocForStartOfFile(ID); }
+  SourceLocation getEndLoc() const { return SM.getLocForEndOfFile(ID); }
+
   ArrayRef<CharSourceRange> getCharRanges() const { return CharRanges; }
 
   // Returns the column at which the fragment of code managed by this
@@ -63,6 +62,16 @@
   unsigned getLastStartColumn() const { return LastStartColumn; }
 
 private:
+  // This sets up an virtual file system with file \p FileName containing the
+  // fragment \p Code. Assumes that \p Code starts at \p FirstStartColumn,
+  // that the next lines of \p Code should start at \p NextStartColumn, and
+  // that \p Code should end at \p LastStartColumn if it ends in newline.
+  // See also the documentation of clang::format::internal::reformat.
+  Environment(StringRef Code, StringRef FileName,
+              ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn,
+              unsigned NextStartColumn, unsigned LastStartColumn,
+              bool *InvalidBuffer);
+
   // This is only set if constructed from string.
   std::unique_ptr<SourceManagerForFile> VirtualSM;
 
@@ -75,6 +84,7 @@
   unsigned FirstStartColumn;
   unsigned NextStartColumn;
   unsigned LastStartColumn;
+  StringRef BufferData;
 };
 
 class TokenAnalyzer : public UnwrappedLineConsumer {
Index: clang/lib/Format/TokenAnalyzer.cpp
===================================================================
--- clang/lib/Format/TokenAnalyzer.cpp
+++ clang/lib/Format/TokenAnalyzer.cpp
@@ -33,13 +33,29 @@
 namespace clang {
 namespace format {
 
+std::unique_ptr<Environment>
+Environment::Create(StringRef Code, StringRef FileName,
+                    ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn,
+                    unsigned NextStartColumn, unsigned LastStartColumn) {
+  bool InvalidBuffer = true;
+  std::unique_ptr<Environment> Env(
+      new Environment(Code, FileName, Ranges, FirstStartColumn, NextStartColumn,
+                      LastStartColumn, &InvalidBuffer));
+  if (InvalidBuffer)
+    return nullptr;
+  return Env;
+}
+
 Environment::Environment(StringRef Code, StringRef FileName,
                          ArrayRef<tooling::Range> Ranges,
                          unsigned FirstStartColumn, unsigned NextStartColumn,
-                         unsigned LastStartColumn)
+                         unsigned LastStartColumn, bool *InvalidBuffer)
     : VirtualSM(new SourceManagerForFile(FileName, Code)), SM(VirtualSM->get()),
       ID(VirtualSM->get().getMainFileID()), FirstStartColumn(FirstStartColumn),
       NextStartColumn(NextStartColumn), LastStartColumn(LastStartColumn) {
+  BufferData = SM.getBufferData(ID, InvalidBuffer);
+  if (*InvalidBuffer)
+    return;
   SourceLocation StartOfFile = SM.getLocForStartOfFile(ID);
   for (const tooling::Range &Range : Ranges) {
     SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
@@ -52,8 +68,7 @@
     : Style(Style), Env(Env),
       AffectedRangeMgr(Env.getSourceManager(), Env.getCharRanges()),
       UnwrappedLines(1),
-      Encoding(encoding::detectEncoding(
-          Env.getSourceManager().getBufferData(Env.getFileID()))) {
+      Encoding(encoding::detectEncoding(Env.getBufferData())) {
   LLVM_DEBUG(
       llvm::dbgs() << "File encoding: "
                    << (Encoding == encoding::Encoding_UTF8 ? "UTF8" : "unknown")
@@ -64,7 +79,8 @@
 
 std::pair<tooling::Replacements, unsigned> TokenAnalyzer::process() {
   tooling::Replacements Result;
-  FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(),
+  FormatTokenLexer Tokens(Env.getSourceManager(), Env.getStartLoc(),
+                          Env.getEndLoc(), Env.getBufferData(),
                           Env.getFirstStartColumn(), Style, Encoding);
 
   UnwrappedLineParser Parser(Style, Tokens.getKeywords(),
Index: clang/lib/Format/SortJavaScriptImports.cpp
===================================================================
--- clang/lib/Format/SortJavaScriptImports.cpp
+++ clang/lib/Format/SortJavaScriptImports.cpp
@@ -119,8 +119,7 @@
 class JavaScriptImportSorter : public TokenAnalyzer {
 public:
   JavaScriptImportSorter(const Environment &Env, const FormatStyle &Style)
-      : TokenAnalyzer(Env, Style),
-        FileContents(Env.getSourceManager().getBufferData(Env.getFileID())) {}
+      : TokenAnalyzer(Env, Style), FileContents(Env.getBufferData()) {}
 
   std::pair<tooling::Replacements, unsigned>
   analyze(TokenAnnotator &Annotator,
@@ -443,10 +442,11 @@
                                             StringRef Code,
                                             ArrayRef<tooling::Range> Ranges,
                                             StringRef FileName) {
-  // FIXME: Cursor support.
-  return JavaScriptImportSorter(Environment(Code, FileName, Ranges), Style)
-      .process()
-      .first;
+  if (auto Env = Environment::Create(Code, FileName, Ranges)) {
+    // FIXME: Cursor support.
+    return JavaScriptImportSorter(*Env, Style).process().first;
+  }
+  return {};
 }
 
 } // end namespace format
Index: clang/lib/Format/FormatTokenLexer.h
===================================================================
--- clang/lib/Format/FormatTokenLexer.h
+++ clang/lib/Format/FormatTokenLexer.h
@@ -36,7 +36,8 @@
 
 class FormatTokenLexer {
 public:
-  FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column,
+  FormatTokenLexer(const SourceManager &SourceMgr, SourceLocation StartLoc,
+                   SourceLocation EndLoc, StringRef BufferData, unsigned Column,
                    const FormatStyle &Style, encoding::Encoding Encoding);
 
   ArrayRef<FormatToken *> lex();
@@ -95,7 +96,9 @@
   unsigned TrailingWhitespace;
   std::unique_ptr<Lexer> Lex;
   const SourceManager &SourceMgr;
-  FileID ID;
+  SourceLocation StartLoc;
+  SourceLocation EndLoc;
+  StringRef BufferData;
   const FormatStyle &Style;
   IdentifierTable IdentTable;
   AdditionalKeywords Keywords;
Index: clang/lib/Format/FormatTokenLexer.cpp
===================================================================
--- clang/lib/Format/FormatTokenLexer.cpp
+++ clang/lib/Format/FormatTokenLexer.cpp
@@ -22,18 +22,19 @@
 namespace clang {
 namespace format {
 
-FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
+FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr,
+                                   SourceLocation StartLoc,
+                                   SourceLocation EndLoc, StringRef BufferData,
                                    unsigned Column, const FormatStyle &Style,
                                    encoding::Encoding Encoding)
     : FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}),
-      Column(Column), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID),
-      Style(Style), IdentTable(getFormattingLangOpts(Style)),
-      Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0),
-      FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),
+      Column(Column), TrailingWhitespace(0), SourceMgr(SourceMgr),
+      StartLoc(StartLoc), EndLoc(EndLoc), BufferData(BufferData), Style(Style),
+      IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
+      Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false),
+      MacroBlockBeginRegex(Style.MacroBlockBegin),
       MacroBlockEndRegex(Style.MacroBlockEnd) {
-  Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
-                      getFormattingLangOpts(Style)));
-  Lex->SetKeepWhitespaceMode(true);
+  resetLexer(0);
 
   for (const std::string &ForEachMacro : Style.ForEachMacros)
     Macros.insert({&IdentTable.get(ForEachMacro), TT_ForEachMacro});
@@ -458,7 +459,7 @@
 
   SourceLocation loc = Offset < Lex->getBuffer().end()
                            ? Lex->getSourceLocation(Offset + 1)
-                           : SourceMgr.getLocForEndOfFile(ID);
+                           : EndLoc;
   resetLexer(SourceMgr.getFileOffset(loc));
 }
 
@@ -479,7 +480,7 @@
   HashToken->TokenText = Lex->getBuffer().substr(From, Len);
   SourceLocation Loc = To < Lex->getBuffer().size()
                            ? Lex->getSourceLocation(CommentBegin + Len)
-                           : SourceMgr.getLocForEndOfFile(ID);
+                           : EndLoc;
   resetLexer(SourceMgr.getFileOffset(Loc));
 }
 
@@ -842,10 +843,9 @@
 }
 
 void FormatTokenLexer::resetLexer(unsigned Offset) {
-  StringRef Buffer = SourceMgr.getBufferData(ID);
-  Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID),
-                      getFormattingLangOpts(Style), Buffer.begin(),
-                      Buffer.begin() + Offset, Buffer.end()));
+  Lex = llvm::make_unique<Lexer>(StartLoc, getFormattingLangOpts(Style),
+                                 BufferData.begin(),
+                                 BufferData.begin() + Offset, BufferData.end());
   Lex->SetKeepWhitespaceMode(true);
   TrailingWhitespace = 0;
 }
Index: clang/lib/Format/Format.cpp
===================================================================
--- clang/lib/Format/Format.cpp
+++ clang/lib/Format/Format.cpp
@@ -1254,9 +1254,8 @@
     }
     Annotator.setCommentLineLevels(AnnotatedLines);
 
-    WhitespaceManager Whitespaces(
-        Env.getSourceManager(), Style,
-        inputUsesCRLF(Env.getSourceManager().getBufferData(Env.getFileID())));
+    WhitespaceManager Whitespaces(Env.getSourceManager(), Style,
+                                  inputUsesCRLF(Env.getBufferData()));
     ContinuationIndenter Indenter(Style, Tokens.getKeywords(),
                                   Env.getSourceManager(), Whitespaces, Encoding,
                                   BinPackInconclusiveFunctions);
@@ -2284,9 +2283,10 @@
     return Formatter(Env, Expanded, Status).process();
   });
 
-  auto Env =
-      llvm::make_unique<Environment>(Code, FileName, Ranges, FirstStartColumn,
-                                     NextStartColumn, LastStartColumn);
+  auto Env = Environment::Create(Code, FileName, Ranges, FirstStartColumn,
+                                 NextStartColumn, LastStartColumn);
+  if (!Env)
+    return {{}, 0};
   llvm::Optional<std::string> CurrentCode = None;
   tooling::Replacements Fixes;
   unsigned Penalty = 0;
@@ -2299,10 +2299,12 @@
       Penalty += PassFixes.second;
       if (I + 1 < E) {
         CurrentCode = std::move(*NewCode);
-        Env = llvm::make_unique<Environment>(
+        Env = Environment::Create(
             *CurrentCode, FileName,
             tooling::calculateRangesAfterReplacements(Fixes, Ranges),
             FirstStartColumn, NextStartColumn, LastStartColumn);
+        if (!Env)
+          break;
       }
     }
   }
@@ -2327,9 +2329,9 @@
                               StringRef FileName) {
   // cleanups only apply to C++ (they mostly concern ctor commas etc.)
   if (Style.Language != FormatStyle::LK_Cpp)
-    return tooling::Replacements();
-  return Cleaner(Environment(Code, FileName, Ranges), Style).process().first;
-}
+    return {};
+  if (auto Env = Environment::Create(Code, FileName, Ranges))
+    return Cleaner(*Env, Style).process().first; return {}; }
 
 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
                                ArrayRef<tooling::Range> Ranges,
@@ -2345,18 +2347,18 @@
                                               StringRef Code,
                                               ArrayRef<tooling::Range> Ranges,
                                               StringRef FileName) {
-  return NamespaceEndCommentsFixer(Environment(Code, FileName, Ranges), Style)
-      .process()
-      .first;
+  if (auto Env = Environment::Create(Code, FileName, Ranges))
+    return NamespaceEndCommentsFixer(*Env, Style).process().first;
+  return {};
 }
 
 tooling::Replacements sortUsingDeclarations(const FormatStyle &Style,
                                             StringRef Code,
                                             ArrayRef<tooling::Range> Ranges,
                                             StringRef FileName) {
-  return UsingDeclarationsSorter(Environment(Code, FileName, Ranges), Style)
-      .process()
-      .first;
+  if (auto Env = Environment::Create(Code, FileName, Ranges))
+    return UsingDeclarationsSorter(*Env, Style).process().first;
+  return {};
 }
 
 LangOptions getFormattingLangOpts(const FormatStyle &Style) {
@@ -2417,11 +2419,13 @@
     // of the code to see if it contains Objective-C.
     if (Extension.empty() || Extension == ".h") {
       auto NonEmptyFileName = FileName.empty() ? "guess.h" : FileName;
-      Environment Env(Code, NonEmptyFileName, /*Ranges=*/{});
-      ObjCHeaderStyleGuesser Guesser(Env, getLLVMStyle());
-      Guesser.process();
-      if (Guesser.isObjC())
-        return FormatStyle::LK_ObjC;
+      if (auto Env =
+              Environment::Create(Code, NonEmptyFileName, /*Ranges=*/{})) {
+        ObjCHeaderStyleGuesser Guesser(*Env, getLLVMStyle());
+        Guesser.process();
+        if (Guesser.isObjC())
+          return FormatStyle::LK_ObjC;
+      }
     }
   }
   return GuessedLanguage;
Index: clang/lib/Basic/SourceManager.cpp
===================================================================
--- clang/lib/Basic/SourceManager.cpp
+++ clang/lib/Basic/SourceManager.cpp
@@ -2232,7 +2232,7 @@
   // by `Environment` due to the same reason above.
   Diagnostics = llvm::make_unique<DiagnosticsEngine>(
       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
-      new DiagnosticOptions);
+      new DiagnosticOptions, new IgnoringDiagConsumer);
   SourceMgr = llvm::make_unique<SourceManager>(*Diagnostics, *FileMgr);
   FileID ID = SourceMgr->createFileID(FileMgr->getFile(FileName),
                                       SourceLocation(), clang::SrcMgr::C_User);
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to