Re: [PATCH] D18551: Added Fixer implementation and fix() interface in clang-format for removing redundant code.

Eric Liu via cfe-commits Wed, 06 Apr 2016 12:11:40 -0700

ioeric updated this revision to Diff 52832.
ioeric marked 4 inline comments as done.
ioeric added a comment.


- Change implementation of fixer to iterate by lines. TODO: refactor Formatter 
and Fixer to reduce duplication.


http://reviews.llvm.org/D18551

Files:
  include/clang/Format/Format.h
  lib/Format/Format.cpp
  lib/Format/FormatToken.h
  lib/Format/TokenAnnotator.cpp
  lib/Format/TokenAnnotator.h
  unittests/Format/CMakeLists.txt
  unittests/Format/FixTest.cpp
  unittests/Format/FormatTest.cpp

Index: unittests/Format/FormatTest.cpp
===================================================================
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -11274,6 +11274,44 @@
                           Code, formatReplacements(Code, Replaces, Style)));
 }
 
+TEST_F(ReplacementTest, FixCodeAfterReplacements) {
+  std::string Code = "class A {\n"
+                     "  A() : X(0), Y(0) {     }\n"
+                     "};";
+  std::string Expected = "class A {\n"
+                         "  A() : X(0) {}\n"
+                         "};";
+  FileID ID = Context.createInMemoryFile("format.cpp", Code);
+  tooling::Replacements Replaces;
+  Replaces.insert(tooling::Replacement(
+      Context.Sources, Context.getLocation(ID, 2, 15), 4, ""));
+
+  format::FormatStyle Style = format::getLLVMStyle();
+  auto FinalReplaces =
+      formatReplacements(Code, fixReplacements(Code, Replaces, Style), Style);
+  EXPECT_EQ(Expected, applyAllReplacements(Code, FinalReplaces));
+}
+
+TEST_F(ReplacementTest, DoNotReformatCodeNotAffectedByReplacements) {
+  std::string Code = "class A {\n"
+                     "  A() : X(0), Y(0) {}\n"
+                     "  f( int    x)    = 0;"
+                     "};";
+  std::string Expected = "class A {\n"
+                         "  A() : X(0) {}\n"
+                         "  f( int    x)    = 0;"
+                         "};";
+  FileID ID = Context.createInMemoryFile("format.cpp", Code);
+  tooling::Replacements Replaces;
+  Replaces.insert(tooling::Replacement(Context.Sources,
+                                       Context.getLocation(ID, 2, 15), 4, ""));
+
+  format::FormatStyle Style = format::getLLVMStyle();
+  auto FinalReplaces =
+      formatReplacements(Code, fixReplacements(Code, Replaces, Style), Style);
+  EXPECT_EQ(Expected, applyAllReplacements(Code, FinalReplaces));
+}
+
 } // end namespace
 } // end namespace format
 } // end namespace clang
Index: unittests/Format/FixTest.cpp
===================================================================
--- /dev/null
+++ unittests/Format/FixTest.cpp
@@ -0,0 +1,195 @@
+//===- unittest/Format/FixTest.cpp - Code fixing unit tests ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Format/Format.h"
+
+#include "clang/Tooling/Core/Replacement.h"
+
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace format {
+namespace {
+
+class FixTest : public ::testing::Test {
+protected:
+  std::string fix(llvm::StringRef Code,
+                  const std::vector<tooling::Range> &Ranges,
+                  const FormatStyle &Style = getLLVMStyle()) {
+    tooling::Replacements Replaces = format::fix(Style, Code, Ranges);
+
+    std::string Result = applyAllReplacements(Code, Replaces);
+    EXPECT_NE("", Result);
+    return Result;
+  }
+};
+
+TEST_F(FixTest, CtorInitializationSimpleRedundantComma) {
+  std::string Code = "class A {\nA() : , {} };";
+  std::string Expected = "class A {\nA()  {} };";
+  std::vector<tooling::Range> Ranges;
+  Ranges.push_back(tooling::Range(17, 0));
+  Ranges.push_back(tooling::Range(19, 0));
+  std::string Result = fix(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+
+  Code = "class A {\nA() : x(1), {} };";
+  Expected = "class A {\nA() : x(1) {} };";
+  Ranges.clear();
+  Ranges.push_back(tooling::Range(23, 0));
+  Result = fix(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+}
+
+TEST_F(FixTest, CtorInitializationBracesInParens) {
+  std::string Code = "class A {\nA() : x({1}),, {} };";
+  std::string Expected = "class A {\nA() : x({1}) {} };";
+  std::vector<tooling::Range> Ranges;
+  Ranges.push_back(tooling::Range(24, 0));
+  Ranges.push_back(tooling::Range(26, 0));
+  std::string Result = fix(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+}
+
+TEST_F(FixTest, RedundantCommaNotInAffectedRanges) {
+  std::string Code =
+      "class A {\nA() : x({1}), /* comment */, { int x = 0; } };";
+  std::string Expected =
+      "class A {\nA() : x({1}), /* comment */, { int x = 0; } };";
+  // Set the affected range to be "int x = 0", which does not intercept the
+  // constructor initialization list.
+  std::vector<tooling::Range> Ranges(1, tooling::Range(42, 9));
+  std::string Result = fix(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+
+  Code = "class A {\nA() : x(1), {} };";
+  Expected = "class A {\nA() : x(1), {} };";
+  // No range. Fixer should do nothing.
+  Ranges.clear();
+  Result = fix(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+}
+
+TEST_F(FixTest, CtorInitializationCommentAroundCommas) {
+  // Remove redundant commas and comment between them.
+  std::string Code = "class A {\nA() : x({1}), /* comment */, {} };";
+  std::string Expected = "class A {\nA() : x({1}) {} };";
+  std::vector<tooling::Range> Ranges;
+  Ranges.push_back(tooling::Range(25, 0));
+  Ranges.push_back(tooling::Range(40, 0));
+  std::string Result = fix(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+
+  // Remove trailing comma and comment.
+  Code = "class A {\nA() : x({1}), // comment\n{} };";
+  Expected = "class A {\nA() : x({1})\n{} };";
+  Ranges = std::vector<tooling::Range>(1, tooling::Range(25, 0));
+  Result = fix(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+
+  // Remove trailing comma, but leave the comment.
+  Code = "class A {\nA() : x({1}), // comment\n , y(1),{} };";
+  Expected = "class A {\nA() : x({1}), // comment\n  y(1){} };";
+  Ranges = std::vector<tooling::Range>(1, tooling::Range(38, 0));
+  Result = fix(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+
+  // Remove trailing comma and the comment before it.
+  Code = "class A {\nA() : x({1}), \n/* comment */, y(1),{} };";
+  Expected = "class A {\nA() : x({1}), \n y(1){} };";
+  Ranges = std::vector<tooling::Range>(1, tooling::Range(40, 0));
+  Result = fix(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+
+  // Remove trailing comma and the comment after it.
+  Code = "class A {\nA() : , // comment\n y(1),{} };";
+  Expected = "class A {\nA() : \n y(1){} };";
+  Ranges = std::vector<tooling::Range>(1, tooling::Range(17, 0));
+  Result = fix(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+}
+
+TEST_F(FixTest, SkipImbalancedParentheses) {
+  std::string Code = "class A {\nA() : x((),, {} };";
+  std::string Expected = "class A {\nA() : x((),, {} };";
+  std::vector<tooling::Range> Ranges(1, tooling::Range(0, Code.size()));
+  std::string Result = fix(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+}
+
+TEST_F(FixTest, DeleteEmptyNamespaces) {
+  std::string Code = "namespace A {\n"
+                     "namespace B {\n"
+                     "} // namespace B\n"
+                     "} // namespace A\n\n"
+                     "namespace C {\n"
+                     "namespace D { int i; }\n"
+                     "inline namespace E { namespace { } }\n"
+                     "}";
+  std::string Expected = "\n\nnamespace C {\n"
+                         "namespace D { int i; }\n\n"
+                         "}";
+  std::vector<tooling::Range> Ranges;
+  Ranges.push_back(tooling::Range(28, 0));
+  Ranges.push_back(tooling::Range(91, 6));
+  Ranges.push_back(tooling::Range(132, 0));
+  std::string Result = fix(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+}
+
+TEST_F(FixTest, NamespaceWithSyntaxError) {
+  std::string Code = "namespace A {\n"
+                     "namespace B {\n" // missing r_brace
+                     "} // namespace A\n\n"
+                     "namespace C {\n"
+                     "namespace D int i; }\n"
+                     "inline namespace E { namespace { } }\n"
+                     "}";
+  std::string Expected = "namespace A {\n"
+                         "\n\nnamespace C {\n"
+                         "namespace D int i; }\n\n"
+                         "}";
+  std::vector<tooling::Range> Ranges(1, tooling::Range(0, Code.size()));
+  std::string Result = fix(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+}
+
+TEST_F(FixTest, EmptyNamespaceNotAffected) {
+  std::string Code = "namespace A {\n\n"
+                     "namespace {\n\n}}";
+  // Even though the namespaces are empty, but the inner most empty namespace
+  // block is not affected by the changed ranges.
+  std::string Expected = "namespace A {\n\n"
+                         "namespace {\n\n}}";
+  // Set the changed range to be the second "\n".
+  std::vector<tooling::Range> Ranges(1, tooling::Range(14, 0));
+  std::string Result = fix(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+}
+
+TEST_F(FixTest, CtorInitializerInNamespace) {
+  std::string Code = "namespace A {\n"
+                     "namespace B {\n" // missing r_brace
+                     "} // namespace A\n\n"
+                     "namespace C {\n"
+                     "class A { A() : x(0),, {} };\n"
+                     "inline namespace E { namespace { } }\n"
+                     "}";
+  std::string Expected = "namespace A {\n"
+                         "\n\nnamespace C {\n"
+                         "class A { A() : x(0) {} };\n\n"
+                         "}";
+  std::vector<tooling::Range> Ranges(1, tooling::Range(0, Code.size()));
+  std::string Result = fix(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+}
+
+} // end namespace
+} // end namespace format
+} // end namespace clang
Index: unittests/Format/CMakeLists.txt
===================================================================
--- unittests/Format/CMakeLists.txt
+++ unittests/Format/CMakeLists.txt
@@ -3,6 +3,7 @@
   )
 
 add_clang_unittest(FormatTests
+  FixTest.cpp
   FormatTest.cpp
   FormatTestJava.cpp
   FormatTestJS.cpp
Index: lib/Format/TokenAnnotator.h
===================================================================
--- lib/Format/TokenAnnotator.h
+++ lib/Format/TokenAnnotator.h
@@ -42,8 +42,9 @@
       : First(Line.Tokens.front().Tok), Level(Line.Level),
         InPPDirective(Line.InPPDirective),
         MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
-        IsMultiVariableDeclStmt(false), Affected(false),
-        LeadingEmptyLinesAffected(false), ChildrenAffected(false) {
+        IsMultiVariableDeclStmt(false), IsConstructorInitializer(false),
+        Affected(false), LeadingEmptyLinesAffected(false),
+        ChildrenAffected(false) {
     assert(!Line.Tokens.empty());
 
     // Calculate Next and Previous for all tokens. Note that we must overwrite
@@ -106,6 +107,7 @@
   bool MustBeDeclaration;
   bool MightBeFunctionDecl;
   bool IsMultiVariableDeclStmt;
+  bool IsConstructorInitializer;
 
   /// \c True if this line should be formatted, i.e. intersects directly or
   /// indirectly with one of the input ranges.
@@ -118,6 +120,9 @@
   /// \c True if a one of this line's children intersects with an input range.
   bool ChildrenAffected;
 
+  /// \c True if the entire line is has been deleted by fixer.
+  bool Deleted = false;
+
 private:
   // Disallow copying.
   AnnotatedLine(const AnnotatedLine &) = delete;
Index: lib/Format/TokenAnnotator.cpp
===================================================================
--- lib/Format/TokenAnnotator.cpp
+++ lib/Format/TokenAnnotator.cpp
@@ -508,10 +508,12 @@
         Tok->Type = TT_BitFieldColon;
       } else if (Contexts.size() == 1 &&
                  !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) {
-        if (Tok->Previous->isOneOf(tok::r_paren, tok::kw_noexcept))
+        if (Tok->Previous->isOneOf(tok::r_paren, tok::kw_noexcept)) {
           Tok->Type = TT_CtorInitializerColon;
-        else
+          Line.IsConstructorInitializer = true;
+        } else {
           Tok->Type = TT_InheritanceColon;
+        }
       } else if (Tok->Previous->is(tok::identifier) && Tok->Next &&
                  Tok->Next->isOneOf(tok::r_paren, tok::comma)) {
         // This handles a special macro in ObjC code where selectors including
Index: lib/Format/FormatToken.h
===================================================================
--- lib/Format/FormatToken.h
+++ lib/Format/FormatToken.h
@@ -279,6 +279,9 @@
   /// changes.
   bool Finalized = false;
 
+  // \brief Fixer will set this to true if the token is going to be deleted.
+  bool Deleted = false;
+
   bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
   bool is(TokenType TT) const { return Type == TT; }
   bool is(const IdentifierInfo *II) const {
Index: lib/Format/Format.cpp
===================================================================
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -1443,14 +1443,436 @@
   }
 }
 
+class RangeManager {
+public:
+  RangeManager(SourceManager &SourceMgr,
+               const SmallVector<CharSourceRange, 8> &Ranges)
+      : SourceMgr(SourceMgr), Ranges(Ranges) {}
+
+  // Determines which lines are affected by the SourceRanges given as input.
+  // Returns \c true if at least one line between I and E or one of their
+  // children is affected.
+  bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I,
+                            SmallVectorImpl<AnnotatedLine *>::iterator E) {
+    bool SomeLineAffected = false;
+    const AnnotatedLine *PreviousLine = nullptr;
+    while (I != E) {
+      AnnotatedLine *Line = *I;
+      Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First);
+
+      // If a line is part of a preprocessor directive, it needs to be formatted
+      // if any token within the directive is affected.
+      if (Line->InPPDirective) {
+        FormatToken *Last = Line->Last;
+        SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1;
+        while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) {
+          Last = (*PPEnd)->Last;
+          ++PPEnd;
+        }
+
+        if (affectsTokenRange(*Line->First, *Last,
+                              /*IncludeLeadingNewlines=*/false)) {
+          SomeLineAffected = true;
+          markAllAsAffected(I, PPEnd);
+        }
+        I = PPEnd;
+        continue;
+      }
+
+      if (nonPPLineAffected(Line, PreviousLine))
+        SomeLineAffected = true;
+
+      PreviousLine = Line;
+      ++I;
+    }
+    return SomeLineAffected;
+  }
+
+  // Returns true if 'Range' intersects with one of the input ranges.
+  bool affectsCharSourceRange(const CharSourceRange &Range) {
+    for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
+                                                          E = Ranges.end();
+         I != E; ++I) {
+      if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
+          !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
+        return true;
+    }
+    return false;
+  }
+
+private:
+  // Returns true if the range from 'First' to 'Last' intersects with one of the
+  // input ranges.
+  bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
+                         bool IncludeLeadingNewlines) {
+    SourceLocation Start = First.WhitespaceRange.getBegin();
+    if (!IncludeLeadingNewlines)
+      Start = Start.getLocWithOffset(First.LastNewlineOffset);
+    SourceLocation End = Last.getStartOfNonWhitespace();
+    End = End.getLocWithOffset(Last.TokenText.size());
+    CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
+    return affectsCharSourceRange(Range);
+  }
+
+  // Returns true if one of the input ranges intersect the leading empty lines
+  // before 'Tok'.
+  bool affectsLeadingEmptyLines(const FormatToken &Tok) {
+    CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
+        Tok.WhitespaceRange.getBegin(),
+        Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
+    return affectsCharSourceRange(EmptyLineRange);
+  }
+
+  // Marks all lines between I and E as well as all their children as affected.
+  void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
+                         SmallVectorImpl<AnnotatedLine *>::iterator E) {
+    while (I != E) {
+      (*I)->Affected = true;
+      markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
+      ++I;
+    }
+  }
+
+  // Determines whether 'Line' is affected by the SourceRanges given as input.
+  // Returns \c true if line or one if its children is affected.
+  bool nonPPLineAffected(AnnotatedLine *Line,
+                         const AnnotatedLine *PreviousLine) {
+    bool SomeLineAffected = false;
+    Line->ChildrenAffected =
+        computeAffectedLines(Line->Children.begin(), Line->Children.end());
+    if (Line->ChildrenAffected)
+      SomeLineAffected = true;
+
+    // Stores whether one of the line's tokens is directly affected.
+    bool SomeTokenAffected = false;
+    // Stores whether we need to look at the leading newlines of the next token
+    // in order to determine whether it was affected.
+    bool IncludeLeadingNewlines = false;
+
+    // Stores whether the first child line of any of this line's tokens is
+    // affected.
+    bool SomeFirstChildAffected = false;
+
+    for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
+      // Determine whether 'Tok' was affected.
+      if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines))
+        SomeTokenAffected = true;
+
+      // Determine whether the first child of 'Tok' was affected.
+      if (!Tok->Children.empty() && Tok->Children.front()->Affected)
+        SomeFirstChildAffected = true;
+
+      IncludeLeadingNewlines = Tok->Children.empty();
+    }
+
+    // Was this line moved, i.e. has it previously been on the same line as an
+    // affected line?
+    bool LineMoved = PreviousLine && PreviousLine->Affected &&
+                     Line->First->NewlinesBefore == 0;
+
+    bool IsContinuedComment =
+        Line->First->is(tok::comment) && Line->First->Next == nullptr &&
+        Line->First->NewlinesBefore < 2 && PreviousLine &&
+        PreviousLine->Affected && PreviousLine->Last->is(tok::comment);
+
+    if (SomeTokenAffected || SomeFirstChildAffected || LineMoved ||
+        IsContinuedComment) {
+      Line->Affected = true;
+      SomeLineAffected = true;
+    }
+    return SomeLineAffected;
+  }
+
+  SourceManager &SourceMgr;
+  const SmallVector<CharSourceRange, 8> Ranges;
+};
+
+class Fixer : public UnwrappedLineConsumer {
+public:
+  Fixer(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID,
+        ArrayRef<CharSourceRange> Ranges)
+      : Style(Style), ID(ID), SourceMgr(SourceMgr),
+        RangeMgr(SourceMgr,
+                 SmallVector<CharSourceRange, 8>(Ranges.begin(), Ranges.end())),
+        UnwrappedLines(1),
+        Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))),
+        DeletedTokens(FormatTokenLess(SourceMgr)) {}
+
+  tooling::Replacements fix() {
+    tooling::Replacements Result;
+    FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding);
+
+    UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(),
+                               *this);
+    Parser.parse();
+    assert(UnwrappedLines.rbegin()->empty());
+
+    TokenAnnotator Annotator(Style, Tokens.getKeywords());
+    for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
+         ++Run) {
+      DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
+      for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
+        AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
+        Annotator.annotate(*AnnotatedLines.back());
+      }
+      // FIXME: in the current implementation the granularity of affected range
+      // is an annotated line. However, this is not sufficient. Furthermore,
+      // redundant code introduced by replacements does not necessarily
+      // intercept with ranges of replacements that result in the redundancy.
+      // To determine if some redundant code is actually introduced by
+      // replacements(e.g. deletions), we need to come up with a more
+      // sophisticated way of computing affected ranges.
+      RangeMgr.computeAffectedLines(AnnotatedLines.begin(),
+                                    AnnotatedLines.end());
+
+      tooling::Replacements RunResult = runFix();
+      DEBUG({
+        llvm::dbgs() << "Replacements for run " << Run << ":\n";
+        for (tooling::Replacements::iterator I = RunResult.begin(),
+                                             E = RunResult.end();
+             I != E; ++I) {
+          llvm::dbgs() << I->toString() << "\n";
+        }
+      });
+      for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i)
+        delete AnnotatedLines[i];
+      Result.insert(RunResult.begin(), RunResult.end());
+    }
+    return Result;
+  }
+
+private:
+  void consumeUnwrappedLine(const UnwrappedLine &TheLine) override {
+    assert(!UnwrappedLines.empty());
+    UnwrappedLines.back().push_back(TheLine);
+  }
+
+  void finishRun() override {
+    UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
+  }
+
+  tooling::Replacements runFix() {
+    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
+      auto &Line = *AnnotatedLines[i];
+      if (Line.Deleted)
+        continue;
+
+      if (Line.startsWith(tok::kw_namespace) ||
+          Line.startsWith(tok::kw_inline, tok::kw_namespace))
+        checkEmptyNamespace(i);
+
+      if (Line.IsConstructorInitializer)
+        checkConstructorInitList(Line);
+    }
+
+    return generateFixes();
+  }
+
+  bool containsOnlyComments(const AnnotatedLine &Line) {
+    bool CommentsOnly = true;
+    FormatToken *Tok = Line.First;
+    while (CommentsOnly && Tok) {
+      CommentsOnly = Tok->is(tok::comment);
+      Tok = Tok->Next;
+    }
+    return CommentsOnly;
+  }
+
+  // Returns the index of the last line of the namespace if the namespace is
+  // empty.
+  // Otherwise, returns -1.
+  int checkEmptyNamespace(unsigned CurrentLine) {
+    if (!AnnotatedLines[CurrentLine]->Last->is(tok::l_brace))
+      return -1; // FIXME: error handling.
+    unsigned InitLine = CurrentLine, End = AnnotatedLines.size();
+    bool IsEmpty = true;
+    while (IsEmpty && (++CurrentLine < End)) {
+      if (AnnotatedLines[CurrentLine]->startsWith(tok::kw_namespace) ||
+          AnnotatedLines[CurrentLine]->startsWith(tok::kw_inline,
+                                                  tok::kw_namespace)) {
+        int NewLine = checkEmptyNamespace(CurrentLine);
+        if (NewLine == -1) {
+          IsEmpty = false;
+          continue;
+        }
+        CurrentLine = NewLine;
+      } else if (AnnotatedLines[CurrentLine]->startsWith(tok::comment) &&
+                 containsOnlyComments(*AnnotatedLines[CurrentLine]))
+        continue;
+      else if (AnnotatedLines[CurrentLine]->startsWith(tok::r_brace))
+        break;
+      else
+        IsEmpty = false;
+    }
+
+    if (!IsEmpty || CurrentLine >= End)
+      return -1;
+
+    // Check if the empty namespace is actually affected by changed ranges.
+    if (!RangeMgr.affectsCharSourceRange(CharSourceRange::getCharRange(
+            AnnotatedLines[InitLine]->First->Tok.getLocation(),
+            AnnotatedLines[CurrentLine]->Last->Tok.getEndLoc())))
+      return -1;
+
+    for (unsigned i = InitLine; i <= CurrentLine; ++i) {
+      AnnotatedLines[i]->Deleted = true;
+      FormatToken *Tok = AnnotatedLines[i]->First;
+      while (Tok) {
+        deleteToken(Tok);
+        Tok = Tok->Next;
+      }
+    }
+
+    return CurrentLine;
+  }
+
+  void checkConstructorInitList(AnnotatedLine &Line) {
+    if (!Line.Affected || Line.Deleted)
+      return;
+
+    FormatToken *Tok = Line.First;
+    while (Tok->Type != TT_CtorInitializerColon) {
+      Tok = Tok->Next;
+    }
+    if (!Tok)
+      return;
+    assert(Tok->is(tok::colon) && Tok->Type == TT_CtorInitializerColon);
+    FormatToken *CtorColonTok = Tok;
+    FormatToken *LastTokenNotDeleted = Tok;
+    Tok = Tok->Next;
+    // This vector stores comments between the last token not deleted and the
+    // current token.
+    SmallVector<FormatToken *, 1> Comments;
+    bool IsListEmpty = true;
+    while (Tok) {
+      switch (Tok->Tok.getKind()) {
+      case tok::comma:
+        if (LastTokenNotDeleted->isOneOf(tok::comma, tok::colon)) {
+          deleteToken(Tok);
+          // If there is a new line before the deleted comma, the comment may
+          // belong to the previous token.
+          if (!Tok->HasUnescapedNewline)
+            for (auto *Comment : Comments)
+              deleteToken(Comment);
+        }
+        break;
+      case tok::l_paren:
+        // We need to skip a pair of parentheses here because it is possible
+        // that "({ ... })" appears in the initialization list, and we do not
+        // want to return when we get the "{" in the parentheses.
+        if (!Tok->MatchingParen)
+          return; // FIXME: error handling.
+        Tok = Tok->MatchingParen;
+        LastTokenNotDeleted = Tok->Previous;
+        break;
+      case tok::l_brace:
+        if (LastTokenNotDeleted->is(tok::comma)) {
+          deleteToken(LastTokenNotDeleted);
+          for (auto *Comment : Comments)
+            deleteToken(Comment);
+        }
+        break;
+      case tok::comment:
+        // If the last deleted token is followed by a comment "//...", then we
+        // delete the comment as well.
+        if (Tok->Previous && Tok->Previous->Deleted &&
+            Tok->TokenText.startswith("//"))
+          deleteToken(Tok);
+        else
+          Comments.push_back(Tok);
+        break;
+      default:
+        IsListEmpty = false;
+        break;
+      }
+      if (Tok->isNot(tok::comment)) {
+        Comments.clear();
+      }
+      if (!Tok->Deleted && Tok->isNot(tok::comment))
+        LastTokenNotDeleted = Tok;
+      Tok = Tok->Next;
+    }
+
+    if (IsListEmpty)
+      deleteToken(CtorColonTok);
+  }
+
+  // Delete the given token.
+  inline void deleteToken(FormatToken *Tok) {
+    Tok->Deleted = true;
+    DeletedTokens.insert(Tok);
+  }
+
+  tooling::Replacements generateFixes() {
+    tooling::Replacements Fixes;
+    std::vector<FormatToken *> Tokens;
+    std::copy(DeletedTokens.begin(), DeletedTokens.end(),
+              std::back_inserter(Tokens));
+
+    // Link all tokens.
+    for (unsigned Idx = 0, End = AnnotatedLines.size() - 1; Idx < End;
+         ++Idx) {
+      AnnotatedLines[Idx]->Last->Next = AnnotatedLines[Idx + 1]->First;
+    }
+
+    // Merge multiple continuous token deletions into one big deletion.
+    unsigned Idx = 0;
+    while (Idx < Tokens.size()) {
+      unsigned St = Idx, End = Idx;
+      while ((End + 1) < Tokens.size() &&
+             Tokens[End]->Next == Tokens[End + 1]) {
+        End++;
+      }
+      auto SR = CharSourceRange::getCharRange(Tokens[St]->Tok.getLocation(),
+                                              Tokens[End]->Tok.getEndLoc());
+      Fixes.insert(tooling::Replacement(SourceMgr, SR, ""));
+      Idx = End + 1;
+    }
+
+    // Unlink all tokens.
+    for (unsigned Idx = 0, End = AnnotatedLines.size()-1; Idx < End; ++Idx) {
+      AnnotatedLines[Idx]->Last->Next = nullptr;
+    }
+
+    return Fixes;
+  }
+
+  // Class for less-than inequality comparason for the set `RedundantTokens`.
+  // We store tokens in the order they appear in the translation unit so that
+  // we do not need to sort them in `generateFixes()`.
+  struct FormatTokenLess {
+    FormatTokenLess(SourceManager &SM) : SM(SM) {}
+
+    bool operator()(const FormatToken *LHS, const FormatToken *RHS) {
+      return SM.isBeforeInTranslationUnit(LHS->Tok.getLocation(),
+                                          RHS->Tok.getLocation());
+    }
+
+    SourceManager &SM;
+  };
+
+  FormatStyle Style;
+  FileID ID;
+  SourceManager &SourceMgr;
+  // RangeMgr stores the ranges to be fixed.
+  RangeManager RangeMgr;
+  SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
+  encoding::Encoding Encoding;
+  SmallVector<AnnotatedLine *, 16> AnnotatedLines;
+  // Tokens to be deleted.
+  std::set<FormatToken *, FormatTokenLess> DeletedTokens;
+};
+
 class Formatter : public UnwrappedLineConsumer {
 public:
   Formatter(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID,
             ArrayRef<CharSourceRange> Ranges)
       : Style(Style), ID(ID), SourceMgr(SourceMgr),
         Whitespaces(SourceMgr, Style,
                     inputUsesCRLF(SourceMgr.getBufferData(ID))),
-        Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
+        RangeMgr(SourceMgr,
+                 SmallVector<CharSourceRange, 8>(Ranges.begin(), Ranges.end())),
+        UnwrappedLines(1),
         Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))) {
     DEBUG(llvm::dbgs() << "File encoding: "
                        << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
@@ -1503,7 +1925,9 @@
       Annotator.annotate(*AnnotatedLines[i]);
     }
     deriveLocalStyle(AnnotatedLines);
-    computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end());
+
+    RangeMgr.computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end());
+
     if (Style.Language == FormatStyle::LK_JavaScript &&
         Style.JavaScriptQuotes != FormatStyle::JSQS_Leave)
       requoteJSStringLiteral(AnnotatedLines, Result);
@@ -1523,45 +1947,6 @@
   }
 
 private:
-  // Determines which lines are affected by the SourceRanges given as input.
-  // Returns \c true if at least one line between I and E or one of their
-  // children is affected.
-  bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I,
-                            SmallVectorImpl<AnnotatedLine *>::iterator E) {
-    bool SomeLineAffected = false;
-    const AnnotatedLine *PreviousLine = nullptr;
-    while (I != E) {
-      AnnotatedLine *Line = *I;
-      Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First);
-
-      // If a line is part of a preprocessor directive, it needs to be formatted
-      // if any token within the directive is affected.
-      if (Line->InPPDirective) {
-        FormatToken *Last = Line->Last;
-        SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1;
-        while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) {
-          Last = (*PPEnd)->Last;
-          ++PPEnd;
-        }
-
-        if (affectsTokenRange(*Line->First, *Last,
-                              /*IncludeLeadingNewlines=*/false)) {
-          SomeLineAffected = true;
-          markAllAsAffected(I, PPEnd);
-        }
-        I = PPEnd;
-        continue;
-      }
-
-      if (nonPPLineAffected(Line, PreviousLine))
-        SomeLineAffected = true;
-
-      PreviousLine = Line;
-      ++I;
-    }
-    return SomeLineAffected;
-  }
- 
   // If the last token is a double/single-quoted string literal, generates a
   // replacement with a single/double quoted string literal, re-escaping the
   // contents in the process.
@@ -1638,101 +2023,6 @@
     }
   }
 
-
-  // Determines whether 'Line' is affected by the SourceRanges given as input.
-  // Returns \c true if line or one if its children is affected.
-  bool nonPPLineAffected(AnnotatedLine *Line,
-                         const AnnotatedLine *PreviousLine) {
-    bool SomeLineAffected = false;
-    Line->ChildrenAffected =
-        computeAffectedLines(Line->Children.begin(), Line->Children.end());
-    if (Line->ChildrenAffected)
-      SomeLineAffected = true;
-
-    // Stores whether one of the line's tokens is directly affected.
-    bool SomeTokenAffected = false;
-    // Stores whether we need to look at the leading newlines of the next token
-    // in order to determine whether it was affected.
-    bool IncludeLeadingNewlines = false;
-
-    // Stores whether the first child line of any of this line's tokens is
-    // affected.
-    bool SomeFirstChildAffected = false;
-
-    for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
-      // Determine whether 'Tok' was affected.
-      if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines))
-        SomeTokenAffected = true;
-
-      // Determine whether the first child of 'Tok' was affected.
-      if (!Tok->Children.empty() && Tok->Children.front()->Affected)
-        SomeFirstChildAffected = true;
-
-      IncludeLeadingNewlines = Tok->Children.empty();
-    }
-
-    // Was this line moved, i.e. has it previously been on the same line as an
-    // affected line?
-    bool LineMoved = PreviousLine && PreviousLine->Affected &&
-                     Line->First->NewlinesBefore == 0;
-
-    bool IsContinuedComment =
-        Line->First->is(tok::comment) && Line->First->Next == nullptr &&
-        Line->First->NewlinesBefore < 2 && PreviousLine &&
-        PreviousLine->Affected && PreviousLine->Last->is(tok::comment);
-
-    if (SomeTokenAffected || SomeFirstChildAffected || LineMoved ||
-        IsContinuedComment) {
-      Line->Affected = true;
-      SomeLineAffected = true;
-    }
-    return SomeLineAffected;
-  }
-
-  // Marks all lines between I and E as well as all their children as affected.
-  void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
-                         SmallVectorImpl<AnnotatedLine *>::iterator E) {
-    while (I != E) {
-      (*I)->Affected = true;
-      markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
-      ++I;
-    }
-  }
-
-  // Returns true if the range from 'First' to 'Last' intersects with one of the
-  // input ranges.
-  bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
-                         bool IncludeLeadingNewlines) {
-    SourceLocation Start = First.WhitespaceRange.getBegin();
-    if (!IncludeLeadingNewlines)
-      Start = Start.getLocWithOffset(First.LastNewlineOffset);
-    SourceLocation End = Last.getStartOfNonWhitespace();
-    End = End.getLocWithOffset(Last.TokenText.size());
-    CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
-    return affectsCharSourceRange(Range);
-  }
-
-  // Returns true if one of the input ranges intersect the leading empty lines
-  // before 'Tok'.
-  bool affectsLeadingEmptyLines(const FormatToken &Tok) {
-    CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
-        Tok.WhitespaceRange.getBegin(),
-        Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
-    return affectsCharSourceRange(EmptyLineRange);
-  }
-
-  // Returns true if 'Range' intersects with one of the input ranges.
-  bool affectsCharSourceRange(const CharSourceRange &Range) {
-    for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
-                                                          E = Ranges.end();
-         I != E; ++I) {
-      if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
-          !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
-        return true;
-    }
-    return false;
-  }
-
   static bool inputUsesCRLF(StringRef Text) {
     return Text.count('\r') * 2 > Text.count('\n');
   }
@@ -1817,7 +2107,8 @@
   FileID ID;
   SourceManager &SourceMgr;
   WhitespaceManager Whitespaces;
-  SmallVector<CharSourceRange, 8> Ranges;
+  // RangeMgr stores ranges to be fixed.
+  RangeManager RangeMgr;
   SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
 
   encoding::Encoding Encoding;
@@ -1990,23 +2281,45 @@
   return Replaces;
 }
 
-tooling::Replacements formatReplacements(StringRef Code,
-                                         const tooling::Replacements &Replaces,
-                                         const FormatStyle &Style) {
+typedef std::function<tooling::Replacements(
+    const FormatStyle &, StringRef, std::vector<tooling::Range>, StringRef)>
+    ReplacementsProcessFunctionType;
+
+static tooling::Replacements
+processReplacements(StringRef Code, const tooling::Replacements &Replaces,
+                    ReplacementsProcessFunctionType ProcessFunc,
+                    const FormatStyle &Style) {
   if (Replaces.empty())
     return tooling::Replacements();
 
   std::string NewCode = applyAllReplacements(Code, Replaces);
   std::vector<tooling::Range> ChangedRanges =
       tooling::calculateChangedRanges(Replaces);
   StringRef FileName = Replaces.begin()->getFilePath();
+
   tooling::Replacements FormatReplaces =
-      reformat(Style, NewCode, ChangedRanges, FileName);
+      ProcessFunc(Style, NewCode, ChangedRanges, FileName);
 
-  tooling::Replacements MergedReplacements =
-      mergeReplacements(Replaces, FormatReplaces);
+  return mergeReplacements(Replaces, FormatReplaces);
+}
+
+tooling::Replacements formatReplacements(StringRef Code,
+                                         const tooling::Replacements &Replaces,
+                                         const FormatStyle &Style) {
+  // We need to use lambda function here since `reformat` has default parameter
+  // `IncompleteFormat`.
+  auto Reformat = [](const FormatStyle &Style, StringRef Code,
+                     std::vector<tooling::Range> Ranges,
+                     StringRef FileName) -> tooling::Replacements {
+    return reformat(Style, Code, Ranges, FileName);
+  };
+  return processReplacements(Code, Replaces, Reformat, Style);
+}
 
-  return MergedReplacements;
+tooling::Replacements fixReplacements(StringRef Code,
+                                      const tooling::Replacements &Replaces,
+                                      const FormatStyle &Style) {
+  return processReplacements(Code, Replaces, fix, Style);
 }
 
 tooling::Replacements reformat(const FormatStyle &Style,
@@ -2048,6 +2361,35 @@
   return reformat(Style, SourceMgr, ID, CharRanges, IncompleteFormat);
 }
 
+tooling::Replacements fix(const FormatStyle &Style, StringRef Code,
+                          ArrayRef<tooling::Range> Ranges, StringRef FileName) {
+  if (Style.DisableFormat)
+    return tooling::Replacements();
+
+  IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
+      new vfs::InMemoryFileSystem);
+  FileManager Files(FileSystemOptions(), InMemoryFileSystem);
+  DiagnosticsEngine Diagnostics(
+      IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
+      new DiagnosticOptions);
+  SourceManager SourceMgr(Diagnostics, Files);
+  InMemoryFileSystem->addFile(
+      FileName, 0, llvm::MemoryBuffer::getMemBuffer(
+                       Code, FileName, /*RequiresNullTerminator=*/false));
+  FileID ID = SourceMgr.createFileID(Files.getFile(FileName), SourceLocation(),
+                                     clang::SrcMgr::C_User);
+  SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
+  std::vector<CharSourceRange> CharRanges;
+  for (const tooling::Range &Range : Ranges) {
+    SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
+    SourceLocation End = Start.getLocWithOffset(Range.getLength());
+    CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
+  }
+
+  Fixer Fix(Style, SourceMgr, ID, CharRanges);
+  return Fix.fix();
+}
+
 LangOptions getFormattingLangOpts(const FormatStyle &Style) {
   LangOptions LangOpts;
   LangOpts.CPlusPlus = 1;
Index: include/clang/Format/Format.h
===================================================================
--- include/clang/Format/Format.h
+++ include/clang/Format/Format.h
@@ -766,6 +766,12 @@
                                          const tooling::Replacements &Replaces,
                                          const FormatStyle &Style);
 
+/// \brief Returns the replacements corresponding to applying and fixing
+/// \p Replaces.
+tooling::Replacements fixReplacements(StringRef Code,
+                                      const tooling::Replacements &Replaces,
+                                      const FormatStyle &Style);
+
 /// \brief Reformats the given \p Ranges in the file \p ID.
 ///
 /// Each range is extended on either end to its next bigger logic unit, i.e.
@@ -791,6 +797,13 @@
                                StringRef FileName = "<stdin>",
                                bool *IncompleteFormat = nullptr);
 
+/// \brief Fix any erroneous/redundant code in the given \p Ranges in \p Code.
+///
+/// Returns the ``Replacements`` that fix all \p Ranges in \p Code.
+tooling::Replacements fix(const FormatStyle &Style, StringRef Code,
+                          ArrayRef<tooling::Range> Ranges,
+                          StringRef FileName = "<stdin>");
+
 /// \brief Returns the ``LangOpts`` that the formatter expects you to set.
 ///
 /// \param Style determines specific settings for lexing mode.

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Re: [PATCH] D18551: Added Fixer implementation and fix() interface in clang-format for removing redundant code.

Reply via email to