ioeric created this revision.
ioeric added a reviewer: ilya-biryukov.
Herald added subscribers: cfe-commits, klimek.
The class will be moved into libToolingCore as followup.
The new behaviors in this patch:
- New #include is inserted in the right position in a #include block to
preserver sorted #includes. This is best effort - only works when the
block is already sorted.
- When inserting multiple #includes to the end of a file which doesn't
end with a "\n" character, a "\n" will be prepended to each #include.
This is a special and rare case that was previously handled. This is now
relaxed to avoid complexity as it's rare in practice.
Repository:
rC Clang
https://reviews.llvm.org/D46180
Files:
lib/Format/Format.cpp
unittests/Format/CleanupTest.cpp
Index: unittests/Format/CleanupTest.cpp
===================================================================
--- unittests/Format/CleanupTest.cpp
+++ unittests/Format/CleanupTest.cpp
@@ -471,19 +471,77 @@
EXPECT_EQ(Expected, apply(Code, Replaces));
}
+TEST_F(CleanUpReplacementsTest, InsertIntoBlockSorted) {
+ std::string Code = "#include \"x/fix.h\"\n"
+ "#include \"a.h\"\n"
+ "#include \"c.h\"\n"
+ "#include <memory>\n";
+ std::string Expected = "#include \"x/fix.h\"\n"
+ "#include \"a.h\"\n"
+ "#include \"b.h\"\n"
+ "#include \"c.h\"\n"
+ "#include <memory>\n";
+ tooling::Replacements Replaces =
+ toReplacements({createInsertion("#include \"b.h\"")});
+ EXPECT_EQ(Expected, apply(Code, Replaces));
+}
+
+TEST_F(CleanUpReplacementsTest, InsertIntoFirstBlockOfSameKind) {
+ std::string Code = "#include \"x/fix.h\"\n"
+ "#include \"c.h\"\n"
+ "#include \"e.h\"\n"
+ "#include \"f.h\"\n"
+ "#include <memory>\n"
+ "#include <vector>\n"
+ "#include \"m.h\"\n"
+ "#include \"n.h\"\n";
+ std::string Expected = "#include \"x/fix.h\"\n"
+ "#include \"c.h\"\n"
+ "#include \"d.h\"\n"
+ "#include \"e.h\"\n"
+ "#include \"f.h\"\n"
+ "#include <memory>\n"
+ "#include <vector>\n"
+ "#include \"m.h\"\n"
+ "#include \"n.h\"\n";
+ tooling::Replacements Replaces =
+ toReplacements({createInsertion("#include \"d.h\"")});
+ EXPECT_EQ(Expected, apply(Code, Replaces));
+}
+
+TEST_F(CleanUpReplacementsTest, InsertIntoSystemBlockSorted) {
+ std::string Code = "#include \"x/fix.h\"\n"
+ "#include \"a.h\"\n"
+ "#include \"c.h\"\n"
+ "#include <a>\n"
+ "#include <z>\n";
+ std::string Expected = "#include \"x/fix.h\"\n"
+ "#include \"a.h\"\n"
+ "#include \"c.h\"\n"
+ "#include <a>\n"
+ "#include <vector>\n"
+ "#include <z>\n";
+ tooling::Replacements Replaces =
+ toReplacements({createInsertion("#include <vector>")});
+ EXPECT_EQ(Expected, apply(Code, Replaces));
+}
+
+
TEST_F(CleanUpReplacementsTest, InsertMultipleIncludesLLVMStyle) {
std::string Code = "#include \"x/fix.h\"\n"
"#include \"a.h\"\n"
"#include \"b.h\"\n"
+ "#include \"z.h\"\n"
"#include \"clang/Format/Format.h\"\n"
"#include <memory>\n";
std::string Expected = "#include \"x/fix.h\"\n"
"#include \"a.h\"\n"
"#include \"b.h\"\n"
"#include \"new/new.h\"\n"
+ "#include \"z.h\"\n"
"#include \"clang/Format/Format.h\"\n"
- "#include <memory>\n"
- "#include <list>\n";
+ "#include <list>\n"
+ "#include <memory>\n";
tooling::Replacements Replaces =
toReplacements({createInsertion("#include <list>"),
createInsertion("#include \"new/new.h\"")});
@@ -517,12 +575,12 @@
"#include \"z/b.h\"\n";
std::string Expected = "#include \"x/fix.h\"\n"
"\n"
- "#include <vector>\n"
"#include <list>\n"
+ "#include <vector>\n"
"\n"
+ "#include \"x/x.h\"\n"
"#include \"y/a.h\"\n"
- "#include \"z/b.h\"\n"
- "#include \"x/x.h\"\n";
+ "#include \"z/b.h\"\n";
tooling::Replacements Replaces =
toReplacements({createInsertion("#include <list>"),
createInsertion("#include \"x/x.h\"")});
@@ -776,8 +834,10 @@
TEST_F(CleanUpReplacementsTest, NoNewLineAtTheEndOfCodeMultipleInsertions) {
std::string Code = "#include <map>";
+ // FIXME: a better behavior is to only append on newline to Code, but this
+ // case should be rare in practice.
std::string Expected =
- "#include <map>\n#include <string>\n#include <vector>\n";
+ "#include <map>\n#include <string>\n\n#include <vector>\n";
tooling::Replacements Replaces =
toReplacements({createInsertion("#include <string>"),
createInsertion("#include <vector>")});
@@ -795,14 +855,11 @@
EXPECT_EQ(Expected, apply(Code, Replaces));
}
-TEST_F(CleanUpReplacementsTest, AddIncludesWithDifferentForms) {
+TEST_F(CleanUpReplacementsTest, NoAddSameIncludesWithDifferentForms) {
std::string Code = "#include \"a.h\"\n"
"#include <vector>\n";
- // FIXME: this might not be the best behavior.
std::string Expected = "#include \"a.h\"\n"
- "#include \"vector\"\n"
- "#include <vector>\n"
- "#include <a.h>\n";
+ "#include <vector>\n";
tooling::Replacements Replaces =
toReplacements({createInsertion("#include \"vector\""),
createInsertion("#include <a.h>")});
Index: lib/Format/Format.cpp
===================================================================
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -41,6 +41,7 @@
#include <algorithm>
#include <memory>
#include <string>
+#include <unordered_map>
#define DEBUG_TYPE "format-formatter"
@@ -1687,7 +1688,7 @@
// Returns the priority of the category which \p IncludeName belongs to.
// If \p CheckMainHeader is true and \p IncludeName is a main header, returns
// 0. Otherwise, returns the priority of the matching category or INT_MAX.
- int getIncludePriority(StringRef IncludeName, bool CheckMainHeader) {
+ int getIncludePriority(StringRef IncludeName, bool CheckMainHeader) const {
int Ret = INT_MAX;
for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
if (CategoryRegexs[i].match(IncludeName)) {
@@ -1720,7 +1721,7 @@
bool IsMainFile;
StringRef FileName;
StringRef FileStem;
- SmallVector<llvm::Regex, 4> CategoryRegexs;
+ mutable SmallVector<llvm::Regex, 4> CategoryRegexs;
};
const char IncludeRegexPattern[] =
@@ -1984,103 +1985,117 @@
});
}
-bool isDeletedHeader(llvm::StringRef HeaderName,
- const std::set<llvm::StringRef> &HeadersToDelete) {
- return HeadersToDelete.count(HeaderName) ||
- HeadersToDelete.count(HeaderName.trim("\"<>"));
-}
+/// Generates replacements for inserting or deleting #include headers in a file.
+class HeaderIncludes {
+public:
+ HeaderIncludes(llvm::StringRef FileName, llvm::StringRef Code,
+ const FormatStyle &Style);
+
+ /// Inserts an #include directive of \p IncludeName into the code. If \p
+ /// IncludeName is quoted e.g. <...> or "...", it will be #included as is;
+ /// by default, it is quoted with "".
+ ///
+ /// When searching for points to insert new header, this ignores #include's
+ /// after the #include block(s) in the beginning of a file to avoid inserting
+ /// headers into code sections where new #include's should not be added by
+ /// default. These code sections include:
+ /// - raw string literals (containing #include).
+ /// - #if blocks.
+ /// - Special #include's among declarations (e.g. functions).
+ ///
+ /// Returns a replacement that inserts the new header into a suitable #include
+ /// block (#include order is preserved with best effort e.g. if they are
+ /// already sorted; caller should consider sorting the #includes with
+ /// clang-format after insertions). If \p IncludeName already exists, this
+ /// returns None.
+ llvm::Optional<tooling::Replacement> insert(llvm::StringRef IncludeName) const;
+
+ /// Removes all existing #includes of \p Header. If \p Header is not quoted
+ /// (e.g. without <> or ""), #include of \p Header with both quotations will
+ /// be removed.
+ tooling::Replacements remove(llvm::StringRef Header) const;
-// FIXME: insert empty lines between newly created blocks.
-tooling::Replacements
-fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces,
- const FormatStyle &Style) {
- if (!Style.isCpp())
- return Replaces;
+private:
+ struct Include {
+ Include(StringRef Name, tooling::Range R) : Name(Name), R(R) {}
+
+ // An include header quoted with either <> or "".
+ std::string Name;
+ // The range of the whole line of include directive including any eading
+ // whitespaces and trailing comment.
+ tooling::Range R;
+ };
- tooling::Replacements HeaderInsertions;
- std::set<llvm::StringRef> HeadersToDelete;
- tooling::Replacements Result;
- for (const auto &R : Replaces) {
- if (isHeaderInsertion(R)) {
- // Replacements from \p Replaces must be conflict-free already, so we can
- // simply consume the error.
- llvm::consumeError(HeaderInsertions.add(R));
- } else if (isHeaderDeletion(R)) {
- HeadersToDelete.insert(R.getReplacementText());
- } else if (R.getOffset() == UINT_MAX) {
- llvm::errs() << "Insertions other than header #include insertion are "
- "not supported! "
- << R.getReplacementText() << "\n";
- } else {
- llvm::consumeError(Result.add(R));
- }
- }
- if (HeaderInsertions.empty() && HeadersToDelete.empty())
- return Replaces;
+ void addExistingInclude(Include IncludeToAdd, unsigned NextLineOffset);
- llvm::Regex IncludeRegex(IncludeRegexPattern);
- llvm::Regex DefineRegex(R"(^[\t\ ]*#[\t\ ]*define[\t\ ]*[^\\]*$)");
- SmallVector<StringRef, 4> Matches;
+ StringRef FileName;
+ StringRef Code;
- StringRef FileName = Replaces.begin()->getFilePath();
- IncludeCategoryManager Categories(Style, FileName);
+ // Map from include name (quotation trimmed) to a list of existing includes (in case
+ // there are more than one) with the name in the current file.
+ // <x> and "x" will be treated as the same header when deleting #includes.
+ llvm::StringMap<llvm::SmallVector<Include, 1>> ExistingIncludes;
+
+ std::unordered_map<int, llvm::SmallVector<const Include *, 8>>
+ IncludesByPriority;
+ int FirstIncludeOffset;
+ // All new headers should be inserted after this offset.
+ unsigned MinInsertOffset;
+ // Code with the first MinInsertOffset characters trimmed.
+ StringRef TrimmedCode;
+ // Max insertion offset in the original code.
+ unsigned MaxInsertOffset;
+ IncludeCategoryManager Categories;
// Record the offset of the end of the last include in each category.
- std::map<int, int> CategoryEndOffsets;
+ std::unordered_map<int, int> CategoryEndOffsets;
+
// All possible priorities.
- // Add 0 for main header and INT_MAX for headers that are not in any category.
- std::set<int> Priorities = {0, INT_MAX};
+ std::set<int> Priorities;
+
+ // Matches a whole #include directive.
+ llvm::Regex IncludeRegex;
+};
+
+HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code,
+ const FormatStyle &Style)
+ : FileName(FileName), Code(Code), FirstIncludeOffset(-1),
+ MinInsertOffset(
+ getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)),
+ TrimmedCode(Code.drop_front(MinInsertOffset)),
+ MaxInsertOffset(MinInsertOffset + getMaxHeaderInsertionOffset(
+ FileName, TrimmedCode, Style)),
+ Categories(Style, FileName),
+ IncludeRegex(llvm::Regex(IncludeRegexPattern)) {
+ // Add 0 for main header and INT_MAX for headers that are not in any
+ // category.
+ Priorities = {0, INT_MAX};
for (const auto &Category : Style.IncludeCategories)
Priorities.insert(Category.Priority);
- int FirstIncludeOffset = -1;
- // All new headers should be inserted after this offset.
- unsigned MinInsertOffset =
- getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style);
- StringRef TrimmedCode = Code.drop_front(MinInsertOffset);
- // Max insertion offset in the original code.
- unsigned MaxInsertOffset =
- MinInsertOffset +
- getMaxHeaderInsertionOffset(FileName, TrimmedCode, Style);
SmallVector<StringRef, 32> Lines;
TrimmedCode.split(Lines, '\n');
+
unsigned Offset = MinInsertOffset;
unsigned NextLineOffset;
- std::set<StringRef> ExistingIncludes;
+ SmallVector<StringRef, 4> Matches;
for (auto Line : Lines) {
NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1);
if (IncludeRegex.match(Line, &Matches)) {
- // The header name with quotes or angle brackets.
- StringRef IncludeName = Matches[2];
- ExistingIncludes.insert(IncludeName);
- // Only record the offset of current #include if we can insert after it.
- if (Offset <= MaxInsertOffset) {
- int Category = Categories.getIncludePriority(
- IncludeName, /*CheckMainHeader=*/FirstIncludeOffset < 0);
- CategoryEndOffsets[Category] = NextLineOffset;
- if (FirstIncludeOffset < 0)
- FirstIncludeOffset = Offset;
- }
- if (isDeletedHeader(IncludeName, HeadersToDelete)) {
- // If this is the last line without trailing newline, we need to make
- // sure we don't delete across the file boundary.
- unsigned Length = std::min(Line.size() + 1, Code.size() - Offset);
- llvm::Error Err =
- Result.add(tooling::Replacement(FileName, Offset, Length, ""));
- if (Err) {
- // Ignore the deletion on conflict.
- llvm::errs() << "Failed to add header deletion replacement for "
- << IncludeName << ": " << llvm::toString(std::move(Err))
- << "\n";
- }
- }
+ // If this is the last line without trailing newline, we need to make
+ // sure we don't delete across the file boundary.
+ addExistingInclude(
+ Include(Matches[2],
+ tooling::Range(
+ Offset, std::min(Line.size() + 1, Code.size() - Offset))),
+ NextLineOffset);
}
Offset = NextLineOffset;
}
// Populate CategoryEndOfssets:
// - Ensure that CategoryEndOffset[Highest] is always populated.
- // - If CategoryEndOffset[Priority] isn't set, use the next higher value that
- // is set, up to CategoryEndOffset[Highest].
+ // - If CategoryEndOffset[Priority] isn't set, use the next higher value
+ // that is set, up to CategoryEndOffset[Highest].
auto Highest = Priorities.begin();
if (CategoryEndOffsets.find(*Highest) == CategoryEndOffsets.end()) {
if (FirstIncludeOffset >= 0)
@@ -2095,39 +2110,156 @@
for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I)
if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end())
CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(I)];
+}
+
+inline StringRef trimInclude(StringRef IncludeName) {
+ return IncludeName.trim("\"<>");
+}
- bool NeedNewLineAtEnd = !Code.empty() && Code.back() != '\n';
+// \p Offset: the start of the line following this include directive.
+void HeaderIncludes::addExistingInclude(Include IncludeToAdd,
+ unsigned NextLineOffset) {
+ auto Iter =
+ ExistingIncludes.try_emplace(trimInclude(IncludeToAdd.Name)).first;
+ Iter->second.push_back(std::move(IncludeToAdd));
+ auto &CurInclude = Iter->second.back();
+ // The header name with quotes or angle brackets.
+ // Only record the offset of current #include if we can insert after it.
+ if (CurInclude.R.getOffset() <= MaxInsertOffset) {
+ int Priority = Categories.getIncludePriority(
+ CurInclude.Name, /*CheckMainHeader=*/FirstIncludeOffset < 0);
+ CategoryEndOffsets[Priority] = NextLineOffset;
+ IncludesByPriority[Priority].push_back(&CurInclude);
+ if (FirstIncludeOffset < 0)
+ FirstIncludeOffset = CurInclude.R.getOffset();
+ }
+}
+
+llvm::Optional<tooling::Replacement>
+HeaderIncludes::insert(llvm::StringRef IncludeName) const {
+ std::string TrimmedName = trimInclude(IncludeName);
+ // If a <header> ("header") already exists in code, "header" (<header>) with
+ // different quotation will not be inserted.
+ if (ExistingIncludes.find(TrimmedName) != ExistingIncludes.end())
+ return llvm::None;
+ std::string Quoted =
+ (IncludeName.startswith("<") || IncludeName.startswith("\""))
+ ? IncludeName.str()
+ : ("\"" + IncludeName + "\"").str();
+ StringRef QuotedName = Quoted;
+ int Priority = Categories.getIncludePriority(
+ QuotedName, /*CheckMainHeader=*/FirstIncludeOffset < 0);
+ auto CatOffset = CategoryEndOffsets.find(Priority);
+ assert(CatOffset != CategoryEndOffsets.end());
+ unsigned InsertOffset = CatOffset->second; // Fall back offset
+ auto Iter = IncludesByPriority.find(Priority);
+ if (Iter != IncludesByPriority.end()) {
+ for (const auto *Inc : Iter->second) {
+ if (QuotedName < Inc->Name) {
+ InsertOffset = Inc->R.getOffset();
+ break;
+ }
+ }
+ }
+ assert(InsertOffset <= Code.size());
+ std::string NewInclude = ("#include " + QuotedName + "\n").str();
+ // When inserting headers at end of the code, also append '\n' to the code
+ // if it does not end with '\n'.
+ // FIXME: when inserting multiple #includes at the end of code, only one
+ // newline should be added.
+ if (InsertOffset == Code.size() && (!Code.empty() && Code.back() != '\n'))
+ NewInclude = "\n" + NewInclude;
+ return tooling::Replacement(FileName, InsertOffset, 0, NewInclude);
+}
+
+tooling::Replacements
+HeaderIncludes::remove(llvm::StringRef IncludeName) const {
+ StringRef TrimmedName = trimInclude(IncludeName);
+ tooling::Replacements Result;
+ auto Iter = ExistingIncludes.find(TrimmedName);
+ if (Iter == ExistingIncludes.end())
+ return Result;
+ for (const auto &Inc : Iter->second) {
+ assert(trimInclude(Inc.Name) == TrimmedName);
+ // If IncludeName is not quoted, then both quotations are deleted.
+ if (IncludeName != TrimmedName &&
+ IncludeName != Inc.Name) // Different quotations, skip.
+ continue;
+ llvm::Error Err = Result.add(tooling::Replacement(
+ FileName, Inc.R.getOffset(), Inc.R.getLength(), ""));
+ if (Err) {
+ auto ErrMsg = "Unexpected conflicts in #include deletions: " +
+ llvm::toString(std::move(Err));
+ llvm_unreachable(ErrMsg.c_str());
+ }
+ }
+ return Result;
+}
+
+// FIXME: insert empty lines between newly created blocks.
+tooling::Replacements
+fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces,
+ const FormatStyle &Style) {
+ if (!Style.isCpp())
+ return Replaces;
+
+ tooling::Replacements HeaderInsertions;
+ std::set<llvm::StringRef> HeadersToDelete;
+ tooling::Replacements Result;
+ for (const auto &R : Replaces) {
+ if (isHeaderInsertion(R)) {
+ // Replacements from \p Replaces must be conflict-free already, so we can
+ // simply consume the error.
+ llvm::consumeError(HeaderInsertions.add(R));
+ } else if (isHeaderDeletion(R)) {
+ HeadersToDelete.insert(R.getReplacementText());
+ } else if (R.getOffset() == UINT_MAX) {
+ llvm::errs() << "Insertions other than header #include insertion are "
+ "not supported! "
+ << R.getReplacementText() << "\n";
+ } else {
+ llvm::consumeError(Result.add(R));
+ }
+ }
+ if (HeaderInsertions.empty() && HeadersToDelete.empty())
+ return Replaces;
+
+
+ StringRef FileName = Replaces.begin()->getFilePath();
+ HeaderIncludes Includes(FileName, Code, Style);
+
+ for (const auto &Header : HeadersToDelete) {
+ tooling::Replacements Replaces = Includes.remove(Header);
+ for (const auto &R : Replaces) {
+ auto Err = Result.add(R);
+ if (Err) {
+ // Ignore the deletion on conflict.
+ llvm::errs() << "Failed to add header deletion replacement for "
+ << Header << ": " << llvm::toString(std::move(Err))
+ << "\n";
+ }
+ }
+ }
+
+ llvm::Regex IncludeRegex = llvm::Regex(IncludeRegexPattern);
+ llvm::SmallVector<StringRef, 4> Matches;
for (const auto &R : HeaderInsertions) {
auto IncludeDirective = R.getReplacementText();
bool Matched = IncludeRegex.match(IncludeDirective, &Matches);
assert(Matched && "Header insertion replacement must have replacement text "
"'#include ...'");
(void)Matched;
auto IncludeName = Matches[2];
- if (ExistingIncludes.find(IncludeName) != ExistingIncludes.end()) {
- DEBUG(llvm::dbgs() << "Skip adding existing include : " << IncludeName
- << "\n");
- continue;
- }
- int Category =
- Categories.getIncludePriority(IncludeName, /*CheckMainHeader=*/true);
- Offset = CategoryEndOffsets[Category];
- std::string NewInclude = !IncludeDirective.endswith("\n")
- ? (IncludeDirective + "\n").str()
- : IncludeDirective.str();
- // When inserting headers at end of the code, also append '\n' to the code
- // if it does not end with '\n'.
- if (NeedNewLineAtEnd && Offset == Code.size()) {
- NewInclude = "\n" + NewInclude;
- NeedNewLineAtEnd = false;
- }
- auto NewReplace = tooling::Replacement(FileName, Offset, 0, NewInclude);
- auto Err = Result.add(NewReplace);
- if (Err) {
- llvm::consumeError(std::move(Err));
- unsigned NewOffset = Result.getShiftedCodePosition(Offset);
- NewReplace = tooling::Replacement(FileName, NewOffset, 0, NewInclude);
- Result = Result.merge(tooling::Replacements(NewReplace));
+ auto Replace = Includes.insert(IncludeName);
+ if (Replace) {
+ auto Err = Result.add(*Replace);
+ if (Err) {
+ llvm::consumeError(std::move(Err));
+ unsigned NewOffset = Result.getShiftedCodePosition(Replace->getOffset());
+ auto Shifted = tooling::Replacement(FileName, NewOffset, 0,
+ Replace->getReplacementText());
+ Result = Result.merge(tooling::Replacements(Shifted));
+ }
}
}
return Result;
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits