Author: Martin Probst Date: 2021-04-30T14:18:52+02:00 New Revision: b2780cd744eaad6f5c7f39165054cf7000a1ff07
URL: https://github.com/llvm/llvm-project/commit/b2780cd744eaad6f5c7f39165054cf7000a1ff07 DIFF: https://github.com/llvm/llvm-project/commit/b2780cd744eaad6f5c7f39165054cf7000a1ff07.diff LOG: clang-format: [JS] handle "off" in imports Previously, the JavaScript import sorter would ignore `// clang-format off` and `on` comments. This change fixes that. It tracks whether formatting is enabled for a stretch of imports, and then only sorts and merges the imports where formatting is enabled, in individual chunks. This means that there's no meaningful total order when module references are mixed with blocks that have formatting disabled. The alternative approach would have been to sort all imports that have formatting enabled in one group. However that raises the question where to insert the formatting-off block, which can also impact symbol visibility (in particular for exports). In practice, sorting in chunks probably isn't a big problem. This change also simplifies the general algorithm: instead of tracking indices separately and sorting them, it just sorts the vector of module references. And instead of attempting to do fine grained tracking of whether the code changed order, it just prints out the module references text, and compares that to the previous text. Given that source files typically have dozens, but not even hundreds of imports, the performance impact seems negligible. Differential Revision: https://reviews.llvm.org/D101515 Added: Modified: clang/lib/Format/SortJavaScriptImports.cpp clang/unittests/Format/SortImportsTestJS.cpp Removed: ################################################################################ diff --git a/clang/lib/Format/SortJavaScriptImports.cpp b/clang/lib/Format/SortJavaScriptImports.cpp index ca83f1926f6ce..4b88ece02a109 100644 --- a/clang/lib/Format/SortJavaScriptImports.cpp +++ b/clang/lib/Format/SortJavaScriptImports.cpp @@ -19,6 +19,7 @@ #include "clang/Basic/LLVM.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" +#include "clang/Basic/TokenKinds.h" #include "clang/Format/Format.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" @@ -69,6 +70,7 @@ struct JsImportedSymbol { // This struct represents both exports and imports to build up the information // required for sorting module references. struct JsModuleReference { + bool FormattingOff = false; bool IsExport = false; // Module references are sorted into these categories, in order. enum ReferenceCategory { @@ -146,39 +148,31 @@ class JavaScriptImportSorter : public TokenAnalyzer { if (References.empty()) return {Result, 0}; - SmallVector<unsigned, 16> Indices; - for (unsigned i = 0, e = References.size(); i != e; ++i) - Indices.push_back(i); - llvm::stable_sort(Indices, [&](unsigned LHSI, unsigned RHSI) { - return References[LHSI] < References[RHSI]; - }); - bool ReferencesInOrder = llvm::is_sorted(Indices); + // The text range of all parsed imports, to be replaced later. + SourceRange InsertionPoint = References[0].Range; + InsertionPoint.setEnd(References[References.size() - 1].Range.getEnd()); - mergeModuleReferences(References, Indices); + References = sortModuleReferences(References); std::string ReferencesText; - bool SymbolsInOrder = true; - for (unsigned i = 0, e = Indices.size(); i != e; ++i) { - JsModuleReference Reference = References[Indices[i]]; - if (appendReference(ReferencesText, Reference)) - SymbolsInOrder = false; - if (i + 1 < e) { + for (unsigned I = 0, E = References.size(); I != E; ++I) { + JsModuleReference Reference = References[I]; + appendReference(ReferencesText, Reference); + if (I + 1 < E) { // Insert breaks between imports and exports. ReferencesText += "\n"; // Separate imports groups with two line breaks, but keep all exports // in a single group. if (!Reference.IsExport && - (Reference.IsExport != References[Indices[i + 1]].IsExport || - Reference.Category != References[Indices[i + 1]].Category)) + (Reference.IsExport != References[I + 1].IsExport || + Reference.Category != References[I + 1].Category)) ReferencesText += "\n"; } } - if (ReferencesInOrder && SymbolsInOrder) + llvm::StringRef PreviousText = getSourceText(InsertionPoint); + if (ReferencesText == PreviousText) return {Result, 0}; - SourceRange InsertionPoint = References[0].Range; - InsertionPoint.setEnd(References[References.size() - 1].Range.getEnd()); - // The loop above might collapse previously existing line breaks between // import blocks, and thus shrink the file. SortIncludes must not shrink // overall source length as there is currently no re-calculation of ranges @@ -186,17 +180,19 @@ class JavaScriptImportSorter : public TokenAnalyzer { // This loop just backfills trailing spaces after the imports, which are // harmless and will be stripped by the subsequent formatting pass. // FIXME: A better long term fix is to re-calculate Ranges after sorting. - unsigned PreviousSize = getSourceText(InsertionPoint).size(); + unsigned PreviousSize = PreviousText.size(); while (ReferencesText.size() < PreviousSize) { ReferencesText += " "; } // Separate references from the main code body of the file. - if (FirstNonImportLine && FirstNonImportLine->First->NewlinesBefore < 2) + if (FirstNonImportLine && FirstNonImportLine->First->NewlinesBefore < 2 && + !(FirstNonImportLine->First->is(tok::comment) && + FirstNonImportLine->First->TokenText.trim() == "// clang-format on")) ReferencesText += "\n"; LLVM_DEBUG(llvm::dbgs() << "Replacing imports:\n" - << getSourceText(InsertionPoint) << "\nwith:\n" + << PreviousText << "\nwith:\n" << ReferencesText << "\n"); auto Err = Result.add(tooling::Replacement( Env.getSourceManager(), CharSourceRange::getCharRange(InsertionPoint), @@ -248,6 +244,38 @@ class JavaScriptImportSorter : public TokenAnalyzer { SM.getFileOffset(End) - SM.getFileOffset(Begin)); } + // Sorts the given module references. + // Imports can have formatting disabled (FormattingOff), so the code below + // skips runs of "no-formatting" module references, and sorts/merges the + // references that have formatting enabled in individual chunks. + SmallVector<JsModuleReference, 16> + sortModuleReferences(const SmallVector<JsModuleReference, 16> &References) { + // Sort module references. + // Imports can have formatting disabled (FormattingOff), so the code below + // skips runs of "no-formatting" module references, and sorts other + // references per group. + const auto *Start = References.begin(); + SmallVector<JsModuleReference, 16> ReferencesSorted; + while (Start != References.end()) { + while (Start != References.end() && Start->FormattingOff) { + // Skip over all imports w/ disabled formatting. + ReferencesSorted.push_back(*Start); + Start++; + } + SmallVector<JsModuleReference, 16> SortChunk; + while (Start != References.end() && !Start->FormattingOff) { + // Skip over all imports w/ disabled formatting. + SortChunk.push_back(*Start); + Start++; + } + llvm::stable_sort(SortChunk); + mergeModuleReferences(SortChunk); + ReferencesSorted.insert(ReferencesSorted.end(), SortChunk.begin(), + SortChunk.end()); + } + return ReferencesSorted; + } + // Merge module references. // After sorting, find all references that import named symbols from the // same URL and merge their names. E.g. @@ -255,16 +283,14 @@ class JavaScriptImportSorter : public TokenAnalyzer { // import {Y} from 'a'; // should be rewritten to: // import {X, Y} from 'a'; - // Note: this modifies the passed in ``Indices`` vector (by removing no longer - // needed references), but not ``References``. - // ``JsModuleReference``s that get merged have the ``SymbolsMerged`` flag - // flipped to true. - void mergeModuleReferences(SmallVector<JsModuleReference, 16> &References, - SmallVector<unsigned, 16> &Indices) { - JsModuleReference *PreviousReference = &References[Indices[0]]; - auto *It = std::next(Indices.begin()); - while (It != std::end(Indices)) { - JsModuleReference *Reference = &References[*It]; + // Note: this modifies the passed in ``References`` vector (by removing no + // longer needed references). + void mergeModuleReferences(SmallVector<JsModuleReference, 16> &References) { + if (References.empty()) + return; + JsModuleReference *PreviousReference = References.begin(); + auto *Reference = std::next(References.begin()); + while (Reference != References.end()) { // Skip: // import 'foo'; // import * as foo from 'foo'; on either previous or this. @@ -278,20 +304,19 @@ class JavaScriptImportSorter : public TokenAnalyzer { !Reference->DefaultImport.empty() || Reference->Symbols.empty() || PreviousReference->URL != Reference->URL) { PreviousReference = Reference; - ++It; + ++Reference; continue; } // Merge symbols from identical imports. PreviousReference->Symbols.append(Reference->Symbols); PreviousReference->SymbolsMerged = true; // Remove the merged import. - It = Indices.erase(It); + Reference = References.erase(Reference); } } - // Appends ``Reference`` to ``Buffer``, returning true if text within the - // ``Reference`` changed (e.g. symbol order). - bool appendReference(std::string &Buffer, JsModuleReference &Reference) { + // Appends ``Reference`` to ``Buffer``. + void appendReference(std::string &Buffer, JsModuleReference &Reference) { // Sort the individual symbols within the import. // E.g. `import {b, a} from 'x';` -> `import {a, b} from 'x';` SmallVector<JsImportedSymbol, 1> Symbols = Reference.Symbols; @@ -303,7 +328,7 @@ class JavaScriptImportSorter : public TokenAnalyzer { // Symbols didn't change, just emit the entire module reference. StringRef ReferenceStmt = getSourceText(Reference.Range); Buffer += ReferenceStmt; - return false; + return; } // Stitch together the module reference start... Buffer += getSourceText(Reference.Range.getBegin(), Reference.SymbolsStart); @@ -315,7 +340,6 @@ class JavaScriptImportSorter : public TokenAnalyzer { } // ... followed by the module reference end. Buffer += getSourceText(Reference.SymbolsEnd, Reference.Range.getEnd()); - return true; } // Parses module references in the given lines. Returns the module references, @@ -328,9 +352,30 @@ class JavaScriptImportSorter : public TokenAnalyzer { SourceLocation Start; AnnotatedLine *FirstNonImportLine = nullptr; bool AnyImportAffected = false; + bool FormattingOff = false; for (auto *Line : AnnotatedLines) { Current = Line->First; LineEnd = Line->Last; + // clang-format comments toggle formatting on/off. + // This is tracked in FormattingOff here and on JsModuleReference. + while (Current && Current->is(tok::comment)) { + StringRef CommentText = Current->TokenText.trim(); + if (CommentText == "// clang-format off") { + FormattingOff = true; + } else if (CommentText == "// clang-format on") { + FormattingOff = false; + // Special case: consider a trailing "clang-format on" line to be part + // of the module reference, so that it gets moved around together with + // it (as opposed to the next module reference, which might get sorted + // around). + if (!References.empty()) { + References.back().Range.setEnd(Current->Tok.getEndLoc()); + Start = Current->Tok.getEndLoc().getLocWithOffset(1); + } + } + // Handle all clang-format comments on a line, e.g. for an empty block. + Current = Current->Next; + } skipComments(); if (Start.isInvalid() || References.empty()) // After the first file level comment, consider line comments to be part @@ -343,6 +388,7 @@ class JavaScriptImportSorter : public TokenAnalyzer { continue; } JsModuleReference Reference; + Reference.FormattingOff = FormattingOff; Reference.Range.setBegin(Start); if (!parseModuleReference(Keywords, Reference)) { if (!FirstNonImportLine) @@ -354,13 +400,14 @@ class JavaScriptImportSorter : public TokenAnalyzer { Reference.Range.setEnd(LineEnd->Tok.getEndLoc()); LLVM_DEBUG({ llvm::dbgs() << "JsModuleReference: {" - << "is_export: " << Reference.IsExport + << "formatting_off: " << Reference.FormattingOff + << ", is_export: " << Reference.IsExport << ", cat: " << Reference.Category << ", url: " << Reference.URL << ", prefix: " << Reference.Prefix; - for (size_t i = 0; i < Reference.Symbols.size(); ++i) - llvm::dbgs() << ", " << Reference.Symbols[i].Symbol << " as " - << Reference.Symbols[i].Alias; + for (size_t I = 0; I < Reference.Symbols.size(); ++I) + llvm::dbgs() << ", " << Reference.Symbols[I].Symbol << " as " + << Reference.Symbols[I].Alias; llvm::dbgs() << ", text: " << getSourceText(Reference.Range); llvm::dbgs() << "}\n"; }); diff --git a/clang/unittests/Format/SortImportsTestJS.cpp b/clang/unittests/Format/SortImportsTestJS.cpp index 7e7669c0ab51d..031dadaaa7a22 100644 --- a/clang/unittests/Format/SortImportsTestJS.cpp +++ b/clang/unittests/Format/SortImportsTestJS.cpp @@ -358,7 +358,8 @@ TEST_F(SortImportsTestJS, MergeImports) { // do not merge imports and exports verifySort("import {A} from 'foo';\n" - "export {B} from 'foo';", + "\n" + "export {B} from 'foo';\n", "import {A} from 'foo';\n" "export {B} from 'foo';"); // do merge exports @@ -373,6 +374,63 @@ TEST_F(SortImportsTestJS, MergeImports) { "import './a';\n"); } +TEST_F(SortImportsTestJS, RespectsClangFormatOff) { + verifySort("// clang-format off\n" + "import {B} from './b';\n" + "import {A} from './a';\n" + "// clang-format on\n", + "// clang-format off\n" + "import {B} from './b';\n" + "import {A} from './a';\n" + "// clang-format on\n"); + + verifySort("import {A} from './sorted1_a';\n" + "import {B} from './sorted1_b';\n" + "// clang-format off\n" + "import {B} from './unsorted_b';\n" + "import {A} from './unsorted_a';\n" + "// clang-format on\n" + "import {A} from './sorted2_a';\n" + "import {B} from './sorted2_b';\n", + "import {B} from './sorted1_b';\n" + "import {A} from './sorted1_a';\n" + "// clang-format off\n" + "import {B} from './unsorted_b';\n" + "import {A} from './unsorted_a';\n" + "// clang-format on\n" + "import {B} from './sorted2_b';\n" + "import {A} from './sorted2_a';\n"); + + // Boundary cases + verifySort("// clang-format on\n", "// clang-format on\n"); + verifySort("// clang-format off\n", "// clang-format off\n"); + verifySort("// clang-format on\n" + "// clang-format off\n", + "// clang-format on\n" + "// clang-format off\n"); + verifySort("// clang-format off\n" + "// clang-format on\n" + "import {A} from './a';\n" + "import {B} from './b';\n", + "// clang-format off\n" + "// clang-format on\n" + "import {B} from './b';\n" + "import {A} from './a';\n"); + // section ends with comment + verifySort("// clang-format on\n" + "import {A} from './a';\n" + "import {B} from './b';\n" + "import {C} from './c';\n" + "\n" // inserted empty line is working as intended: splits imports + // section from main code body + "// clang-format off\n", + "// clang-format on\n" + "import {C} from './c';\n" + "import {B} from './b';\n" + "import {A} from './a';\n" + "// clang-format off\n"); +} + } // end namespace } // end namespace format } // end namespace clang _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits