kwk created this revision.
kwk added a reviewer: MyDeveloperDay.
Herald added a project: All.
kwk requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

The foundation for this patch was done in:
https://reviews.llvm.org/D121370. I wanted to "rescue" some pieces from
it.

Unlike D121370 <https://reviews.llvm.org/D121370>, this patch doesn't modify 
the regular expression itself.
It simply provides three functions to have a cleaner and more
transparent way how the include name is processed from an `#include`
line.

Here's how I've tested this patch:

  $ cd llvm-project
  $ mkdir build && cd build
  $ cmake ../llvm/ -DCMAKE_BUILD_TYPE=Release -DCLANG_BUILD_TOOLS=On 
-DLLVM_ENABLE_PROJECTS=clang -DCLANG_INCLUDE_TESTS=ON -DLLVM_BUILD_UTILS=ON -G 
Ninja
  $ ninja clangFormat
  $ ninja FormatTests
  $ ./tools/clang/unittests/Format/FormatTests --gtest_filter=SortIncludesTest*

And if that worked I doubled checked that nothing else broke by running
all format checks:

  $ ./tools/clang/unittests/Format/FormatTests


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D134733

Files:
  clang/include/clang/Tooling/Inclusions/HeaderIncludes.h
  clang/lib/Format/Format.cpp
  clang/lib/Tooling/Inclusions/HeaderIncludes.cpp

Index: clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
===================================================================
--- clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
+++ clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
@@ -169,13 +169,6 @@
       });
 }
 
-inline StringRef trimInclude(StringRef IncludeName) {
-  return IncludeName.trim("\"<>");
-}
-
-const char IncludeRegexPattern[] =
-    R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";
-
 // The filename of Path excluding extension.
 // Used to match implementation with headers, this differs from sys::path::stem:
 //  - in names with multiple dots (foo.cu.cc) it terminates at the *first*
@@ -274,8 +267,7 @@
       MaxInsertOffset(MinInsertOffset +
                       getMaxHeaderInsertionOffset(
                           FileName, Code.drop_front(MinInsertOffset), Style)),
-      Categories(Style, FileName),
-      IncludeRegex(llvm::Regex(IncludeRegexPattern)) {
+      Categories(Style, FileName), IncludeRegex(getCppIncludeRegex()) {
   // Add 0 for main header and INT_MAX for headers that are not in any
   // category.
   Priorities = {0, INT_MAX};
@@ -290,10 +282,11 @@
   for (auto Line : Lines) {
     NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1);
     if (IncludeRegex.match(Line, &Matches)) {
+      StringRef IncludeName = tooling::getIncludeNameFromMatches(Matches);
       // If this is the last line without trailing newline, we need to make
       // sure we don't delete across the file boundary.
       addExistingInclude(
-          Include(Matches[2],
+          Include(IncludeName,
                   tooling::Range(
                       Offset, std::min(Line.size() + 1, Code.size() - Offset))),
           NextLineOffset);
@@ -403,5 +396,24 @@
   return Result;
 }
 
+llvm::Regex getCppIncludeRegex() {
+  static const char CppIncludeRegexPattern[] =
+      R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";
+  return llvm::Regex(CppIncludeRegexPattern);
+}
+
+llvm::StringRef getIncludeNameFromMatches(
+    const llvm::SmallVectorImpl<llvm::StringRef> &Matches) {
+  if (Matches.size() >= 3) {
+    return Matches[2];
+  }
+  llvm_unreachable("Matches is too small");
+  return llvm::StringRef();
+}
+
+llvm::StringRef trimInclude(llvm::StringRef IncludeName) {
+  return IncludeName.trim("\"<>;");
+}
+
 } // namespace tooling
 } // namespace clang
Index: clang/lib/Format/Format.cpp
===================================================================
--- clang/lib/Format/Format.cpp
+++ clang/lib/Format/Format.cpp
@@ -44,6 +44,7 @@
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/YAMLTraits.h"
 #include <algorithm>
+#include <limits>
 #include <memory>
 #include <mutex>
 #include <string>
@@ -2769,13 +2770,6 @@
   }
 }
 
-namespace {
-
-const char CppIncludeRegexPattern[] =
-    R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";
-
-} // anonymous namespace
-
 tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,
                                       ArrayRef<tooling::Range> Ranges,
                                       StringRef FileName,
@@ -2785,7 +2779,7 @@
                       .StartsWith("\xEF\xBB\xBF", 3) // UTF-8 BOM
                       .Default(0);
   unsigned SearchFrom = 0;
-  llvm::Regex IncludeRegex(CppIncludeRegexPattern);
+  llvm::Regex IncludeRegex(tooling::getCppIncludeRegex());
   SmallVector<StringRef, 4> Matches;
   SmallVector<IncludeDirective, 16> IncludesInBlock;
 
@@ -2843,7 +2837,8 @@
     bool MergeWithNextLine = Trimmed.endswith("\\");
     if (!FormattingOff && !MergeWithNextLine) {
       if (IncludeRegex.match(Line, &Matches)) {
-        StringRef IncludeName = Matches[2];
+        StringRef IncludeName = tooling::getIncludeNameFromMatches(Matches);
+
         if (Line.contains("/*") && !Line.contains("*/")) {
           // #include with a start of a block comment, but without the end.
           // Need to keep all the lines until the end of the comment together.
@@ -3120,8 +3115,7 @@
 
 inline bool isHeaderInsertion(const tooling::Replacement &Replace) {
   return Replace.getOffset() == UINT_MAX && Replace.getLength() == 0 &&
-         llvm::Regex(CppIncludeRegexPattern)
-             .match(Replace.getReplacementText());
+         tooling::getCppIncludeRegex().match(Replace.getReplacementText());
 }
 
 inline bool isHeaderDeletion(const tooling::Replacement &Replace) {
@@ -3129,7 +3123,7 @@
 }
 
 // FIXME: insert empty lines between newly created blocks.
-tooling::Replacements
+static tooling::Replacements
 fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces,
                         const FormatStyle &Style) {
   if (!Style.isCpp())
@@ -3161,7 +3155,7 @@
 
   for (const auto &Header : HeadersToDelete) {
     tooling::Replacements Replaces =
-        Includes.remove(Header.trim("\"<>"), Header.startswith("<"));
+        Includes.remove(tooling::trimInclude(Header), Header.startswith("<"));
     for (const auto &R : Replaces) {
       auto Err = Result.add(R);
       if (Err) {
@@ -3173,7 +3167,7 @@
     }
   }
 
-  llvm::Regex IncludeRegex = llvm::Regex(CppIncludeRegexPattern);
+  llvm::Regex IncludeRegex = tooling::getCppIncludeRegex();
   llvm::SmallVector<StringRef, 4> Matches;
   for (const auto &R : HeaderInsertions) {
     auto IncludeDirective = R.getReplacementText();
@@ -3181,9 +3175,9 @@
     assert(Matched && "Header insertion replacement must have replacement text "
                       "'#include ...'");
     (void)Matched;
-    auto IncludeName = Matches[2];
-    auto Replace =
-        Includes.insert(IncludeName.trim("\"<>"), IncludeName.startswith("<"));
+    StringRef IncludeName = tooling::getIncludeNameFromMatches(Matches);
+    auto Replace = Includes.insert(tooling::trimInclude(IncludeName),
+                                   IncludeName.startswith("<"));
     if (Replace) {
       auto Err = Result.add(*Replace);
       if (Err) {
Index: clang/include/clang/Tooling/Inclusions/HeaderIncludes.h
===================================================================
--- clang/include/clang/Tooling/Inclusions/HeaderIncludes.h
+++ clang/include/clang/Tooling/Inclusions/HeaderIncludes.h
@@ -129,6 +129,22 @@
   llvm::Regex IncludeRegex;
 };
 
+/// \returns a regex that can match various styles of C++ includes.
+/// For example:
+/// \code
+/// #include <foo.h>
+/// #include "bar.h"
+/// \endcode
+llvm::Regex getCppIncludeRegex();
+
+/// \returns the last match in the list of matches that is not empty.
+llvm::StringRef getIncludeNameFromMatches(
+    const llvm::SmallVectorImpl<llvm::StringRef> &Matches);
+
+/// \returns the given include name and removes the following symbols from the
+/// beginning and ending of the include name: " > < ;
+llvm::StringRef trimInclude(llvm::StringRef IncludeName);
+
 } // namespace tooling
 } // namespace clang
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to