hokein updated this revision to Diff 232101. hokein marked 3 inline comments as done. hokein added a comment.
address comments - re-define the concept of a near miss - add metric for evaluate how good a near miss is Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D70594/new/ https://reviews.llvm.org/D70594 Files: clang-tools-extra/clangd/refactor/Rename.cpp clang-tools-extra/clangd/refactor/Rename.h clang-tools-extra/clangd/unittests/RenameTests.cpp
Index: clang-tools-extra/clangd/unittests/RenameTests.cpp =================================================================== --- clang-tools-extra/clangd/unittests/RenameTests.cpp +++ clang-tools-extra/clangd/unittests/RenameTests.cpp @@ -702,6 +702,222 @@ expectedResult(Code, expectedResult(T, "abc"))); } +TEST(RangePatchingHeuristic, MappedRange) { + // Ranges in IndexedCode indicate the indexed occurrences; + // ranges in DraftCode indicate the expected mapped result, empty indicates + // we expect no matched result found. + struct { + llvm::StringRef IndexedCode; + llvm::StringRef DraftCode; + } Tests[] = { + { + // no lexed occurrences. + R"cpp(int [[x]] = 0)cpp", + R"cpp()cpp", + }, + { + // both line and column are changed, not a near miss. + R"cpp( + int [[x]] = 0; + )cpp", + R"cpp( + + int x = 0; + )cpp", + }, + { + // subset. + R"cpp( + int [[x]] = 0; + )cpp", + R"cpp( + int [[x]] = 0; + {int x = 0; } + )cpp", + }, + { + // column shift + R"cpp(int [[x]] = 0; void foo(int x);)cpp", + R"cpp(double [[x]] = 0; void foo(double x);)cpp", + }, + { + // insert a line + R"cpp( + int [[x]] = 0; + void foo(int x); + )cpp", + R"cpp( + // new line insert; + int [[x]] = 0; + void foo(int x); + )cpp", + }, + { + // insert a line, x refers to different symbols + R"cpp( + void foo(int [[x]]) { + [[x]] = 2; + { int x = 0; } + [[x]] = 3; + int y = [[x]]; + { int x = 0; } + } + )cpp", + R"cpp( + // insert a new line + void foo(int [[x]]) { + [[x]] = 2; + { int x = 0; } + [[x]] = 3; + int y = [[x]]; + { int x = 0; } + } + )cpp", + }, + { + // non-distinct result (have two best results), not a near miss. + R"cpp( + int [[x]](int a); + int a = [[x]](1); + int b = [[x]](2); + )cpp", + R"cpp( + int x(); + int x(int a); + int a = x(1); + int b = x(2); + )cpp", + }, + }; + LangOptions LangOpts; + LangOpts.CPlusPlus = true; + for (const auto &T : Tests) { + auto IndexedOccurrences = Annotations(T.IndexedCode).ranges(); + Annotations Draft(T.DraftCode); + auto ActualRanges = + patchRanges(Draft.code(), "x", IndexedOccurrences, LangOpts); + if (!Draft.ranges().empty()) { + EXPECT_TRUE((bool)ActualRanges) << "patchRange returned an error: " + << llvm::toString(ActualRanges.takeError()); + EXPECT_THAT(Draft.ranges(), + testing::UnorderedElementsAreArray(*ActualRanges)) + << T.DraftCode; + } else { + EXPECT_FALSE(ActualRanges) + << "expected an error: " << T.DraftCode; + llvm::consumeError(ActualRanges.takeError()); + } + } +} + +TEST(RangePatchingHeuristic, EditCost) { + struct { + llvm::StringRef RangeCode; + size_t ExpectedCost; + } Tests[] = { + { + R"( + $idx[[]]$lex[[]] // diff: 0 + )", + 0, + }, + { + // line offset shift. + R"( + $idx[[]] + $lex[[]] // line diff: +1 + $idx[[]] + $lex[[]] // line diff: +1 + $idx[[]] + $lex[[]] // line diff: +1 + + $idx[[]] + + $lex[[]] // line diff: +2 + )", + 1 + 1 + }, + { + R"( + $idx[[]] + $lex[[]] // line diff: +1 + $idx[[]] + + $lex[[]] // line diff: +2 + $idx[[]] + + + $lex[[]] // line diff: +3 + )", + 1 + 1 + 1 + }, + { + R"( + $idx[[]] + + + $lex[[]] // line diff: +3 + $idx[[]] + + $lex[[]] // line diff: +2 + $idx[[]] + $lex[[]] // line diff: +1 + )", + 3 + 1 + 1 + }, + { + R"( + $idx[[]] + $lex[[]] // line diff: +1 + $lex[[]] // line diff: -2 + + $idx[[]] + $idx[[]] + + + $lex[[]] // line diff: +3 + )", + 1 + 3 + 5 + }, + // line & column diff. + { + R"( + $idx[[]] $lex[[]] // column diff: +1 + $idx[[]]$lex[[]] // diff: 0 + )", + 1 + }, + { + R"( + $idx[[]] + $lex[[]] // diff: +1 + $idx[[]] $lex[[]] // column diff: +1 + $idx[[]]$lex[[]] // diff: 0 + )", + 1 + 1 + 1 + }, + // column offset shift. + { + R"( + $idx[[]] $lex[[]] // column diff: +1 + )", + 1 + }, + { + R"( + // column diffs: +1, +2, +3 + $idx[[]] $lex[[]] $idx[[]] $lex[[]] $idx[[]] $lex[[]] + )", + 1 + 1 + 1, + }, + }; + for (const auto &T : Tests) { + Annotations C(T.RangeCode); + EXPECT_EQ(editCost(C.ranges("idx"), C.ranges("lex")), T.ExpectedCost) + << T.RangeCode; + } +} + } // namespace } // namespace clangd } // namespace clang Index: clang-tools-extra/clangd/refactor/Rename.h =================================================================== --- clang-tools-extra/clangd/refactor/Rename.h +++ clang-tools-extra/clangd/refactor/Rename.h @@ -12,6 +12,7 @@ #include "Path.h" #include "Protocol.h" #include "SourceCode.h" +#include "clang/Basic/LangOptions.h" #include "clang/Tooling/Core/Replacement.h" #include "llvm/Support/Error.h" @@ -55,6 +56,52 @@ std::vector<Range> Occurrences, llvm::StringRef NewName); +/// Gets "authoritative" occurrences to perform the rename. +/// +/// Index is not always up-to-update. Applying stale indexed occurrences leads +/// to bad rename result, so this API helps to mitigate the stale-index issue. +/// It determines the indexed occurrences are good enough, and heuristically +/// repairs the indexed occurrences if necessary. +/// +/// Details: +/// - lex the draft code to get all rename candidates, this yields a superset +// of candidates. +/// - apply range patching heuristics to generate "authoritative" occurrences, +/// cases we consider: +/// (a) index returns a subset of candidates, we use the indexed results. +/// - fully equal, we are sure the index is up-to-date +/// - proper subset, index is correct in most cases? there may be false +/// positives (e.g. candidates got appended), but rename is still safe +/// (b) index returns non-candidate results, we use the best near miss. +/// +/// The API assumes that IndexedOccurrences contains only named occcurrences ( +/// each occurrence has the same length). +llvm::Expected<std::vector<Range>> patchRanges(llvm::StringRef DraftCode, + llvm::StringRef Identifier, + std::vector<Range> Indexed, + const LangOptions &LangOpts); + +/// Evaluates how good the mapping result is. 0 indicates a perfect match. +/// +/// It uses the sum of the implied edit sizes between successive diffs, only +/// simple edits are considered: +/// - insert/remove a line (change line offset) +/// - insert/remove a character on an existing line (change the column offset) +/// +/// Example I, total result is 1 + 1 = 2. +/// diff[0]: line + 1 <- insert a line before edit 0. +/// diff[1]: line + 1 +/// diff[2]: line + 1 +/// diff[3]: line + 2 <- insert a line before edits 2 and 3. +/// +/// Example II, total result is 1 + 1 + 1 = 3. +/// diff[0]: line + 1 <- insert a line before edit 0. +/// diff[1]: column + 1 <- remove a line between edits 0 and 1, and insert a +/// character on edit 1. +/// +/// REQUIRED: Indexed and Mapped are sorted, and have the same size. +size_t editCost(ArrayRef<Range> Indexed, ArrayRef<Range> Mapped); + } // namespace clangd } // namespace clang Index: clang-tools-extra/clangd/refactor/Rename.cpp =================================================================== --- clang-tools-extra/clangd/refactor/Rename.cpp +++ clang-tools-extra/clangd/refactor/Rename.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Error.h" #include "llvm/Support/FormatVariadic.h" +#include <algorithm> namespace clang { namespace clangd { @@ -355,9 +356,19 @@ elog("Fail to read file content: {0}", AffectedFileCode.takeError()); continue; } - auto RenameEdit = - buildRenameEdit(FilePath, *AffectedFileCode, - std::move(FileAndOccurrences.second), NewName); + auto RenameCandidates = + patchRanges(*AffectedFileCode, RenameDecl.getNameAsString(), + std::move(FileAndOccurrences.second), + RenameDecl.getASTContext().getLangOpts()); + if (!RenameCandidates) { + return llvm::make_error<llvm::StringError>( + llvm::formatv("Index results don't match the content of file {0} " + "(the index may be stale), details: {0}", + FilePath, llvm::toString(RenameCandidates.takeError())), + llvm::inconvertibleErrorCode()); + } + auto RenameEdit = buildRenameEdit(FilePath, *AffectedFileCode, + *RenameCandidates, NewName); if (!RenameEdit) { return llvm::make_error<llvm::StringError>( llvm::formatv("fail to build rename edit for file {0}: {1}", FilePath, @@ -370,6 +381,142 @@ return Results; } +bool isLineOrColumnEqual(const Position &LHS, const Position &RHS) { + return LHS.line == RHS.line || LHS.character == RHS.character; +} + +// Describe a single line-based/column edit diff. +struct Diff { + enum Type { + Line, + Column, + }; + Type DType; + int Offset = 0; + const Range *IndexedRange = nullptr; + const Range *LexedRange = nullptr; +}; +Diff computeDiff(const Range &LHS, const Range &RHS) { + assert(isLineOrColumnEqual(LHS.start, RHS.start)); + if (LHS.start.line != RHS.start.line) + return Diff{Diff::Line, LHS.start.line - RHS.start.line, &LHS, &RHS}; + return Diff{Diff::Column, LHS.start.character - RHS.start.character, &LHS, + &RHS}; +} + +// Calculate the quality of the give Batch. +size_t batchEditSize(ArrayRef<Diff> Batch) { + assert(!Batch.empty()); + size_t TotalCost = abs(Batch.front().Offset); + for (size_t I = 1; I < Batch.size(); ++I) + TotalCost += abs(Batch[I].Offset - Batch[I - 1].Offset); + return TotalCost; +} + +// Performs a DFS to enumerate all possible near-miss matches. +// It finds the locations where the indexed occurrences are now spelled in +// Lexed occurrences, a near miss is defined as: +// - a near miss maps all of the **name** occurrences from the index onto a +// *subset* of lexed occurrences (we allow a single name refers to more +// than one symbol) +// - all indexed occurrences must be mapped, and Result must be distinct and +// preseve order (only support detecting simple edits to ensure a +// robust mapping) +// - each indexed -> lexed occurrences mapping correspondence may change the +// *line* or *column*, but not both (increases chance of a robust mapping) +class NearMissFinder { +public: + NearMissFinder(ArrayRef<Range> Indexed, ArrayRef<Range> Lexed) + : Indexed(Indexed), Lexed(Lexed) { + assert(Indexed.size() <= Lexed.size()); + MatchedIndex.resize(Indexed.size()); + } + + using MatchedCallback = llvm::function_ref<void(std::vector<Range> Matched)>; + void find(MatchedCallback CB) { + MatchedCB = CB; + enumerate(0, 0, 0); + } + +private: + void enumerate(size_t NextMatched, size_t NextLexed, size_t IterCount) { + if (IterCount > KMaxIterations) { + vlog("Reach out the maximum iteration limit, stopped."); + return; + } + + if (NextMatched >= Indexed.size()) { + // find a match, emit it. + std::vector<Range> Mapped; + for (int Pos : MatchedIndex) + Mapped.push_back(Lexed[Pos]); + return MatchedCB(std::move(Mapped)); + } + + for (size_t I = NextLexed; I < Lexed.size(); ++I) { + if (Visited.count(I)) + continue; + if (isLineOrColumnEqual(Indexed[NextMatched].start, Lexed[I].start)) { + Visited.insert(I); + MatchedIndex[NextMatched] = I; + enumerate(NextMatched + 1, I + 1, IterCount + 1); + Visited.erase(I); // reset the state. + } + } + } + static constexpr size_t KMaxIterations = 10000; + ArrayRef<Range> Indexed; // indexed occurrences. + ArrayRef<Range> Lexed; // lexed occurrences + MatchedCallback MatchedCB; + + // Corresponding to the Indexed, values are the indices into Lexed array. + std::vector<int> MatchedIndex; + // Whether the elements of Lexed array visited. + llvm::DenseSet<int> Visited; +}; + +/// Returns the best near miss for the Indexed occurrences. +/// REQUIRED: Indexed and Lexed are sorted. +llvm::Expected<std::vector<Range>> getMappedRanges(ArrayRef<Range> Indexed, + ArrayRef<Range> Lexed) { + assert(!Indexed.empty()); + assert(std::is_sorted(Indexed.begin(), Indexed.end())); + assert(std::is_sorted(Lexed.begin(), Lexed.end())); + + if (Indexed.size() > Lexed.size()) + return llvm::make_error<llvm::StringError>( + llvm::inconvertibleErrorCode(), + llvm::formatv("The number of lexed occurrences is less than indexed " + "occurrences")); + // Fast check for the special subset case. + if (std::includes(Indexed.begin(), Indexed.end(), Lexed.begin(), Lexed.end())) + return std::vector<Range>{Indexed.begin(), Indexed.end()}; + + std::vector<Range> Best; + size_t BestCost = std::numeric_limits<size_t>::max(); + bool HasMultiple = 0; + NearMissFinder FindNearMiss(Indexed, Lexed); + FindNearMiss.find([&](std::vector<Range> Mapped) { + size_t MCost = editCost(Indexed, Mapped); + if (MCost < BestCost) { + BestCost = MCost; + Best = std::move(Mapped); + HasMultiple = false; // reset + return; + } + if (MCost == BestCost) + HasMultiple = true; + }); + if (HasMultiple) + return llvm::make_error<llvm::StringError>( + llvm::inconvertibleErrorCode(), + llvm::formatv("The best near miss is not distinct")); + if (Best.empty()) + return llvm::make_error<llvm::StringError>( + llvm::inconvertibleErrorCode(), + llvm::formatv("Didn't found a near miss")); + return Best; +} } // namespace llvm::Expected<FileEdits> rename(const RenameInputs &RInputs) { @@ -505,5 +652,49 @@ return Edit(InitialCode, std::move(RenameEdit)); } +llvm::Expected<std::vector<Range>> patchRanges(llvm::StringRef DraftCode, + llvm::StringRef Identifier, + std::vector<Range> Indexed, + const LangOptions &LangOpts) { + assert(!Indexed.empty()); + std::vector<Range> Lexed = + collectIdentifierRanges(Identifier, DraftCode, LangOpts); + llvm::sort(Indexed); + llvm::sort(Lexed); + return getMappedRanges(Indexed, Lexed); +} + +size_t editCost(ArrayRef<Range> Indexed, ArrayRef<Range> Mapped) { + assert(Indexed.size() == Mapped.size()); + assert(std::is_sorted(Indexed.begin(), Indexed.end())); + assert(std::is_sorted(Mapped.begin(), Mapped.end())); + + std::vector<Diff> Diffs; + for (size_t I = 0; I < Indexed.size(); ++I) + Diffs.push_back(computeDiff(Indexed[I], Mapped[I])); + auto DiffsRef = ArrayRef<Diff>(Diffs); + size_t SumCost = 0; + while (!DiffsRef.empty()) { + auto DType = DiffsRef.front().DType; + + // Group all successive diffs: all line diffs, or all column diffs on a + // same line. + auto Batch = DiffsRef.take_while([&](const Diff &D) { + if (DType == Diff::Line) + return D.DType == Diff::Line; + return DiffsRef.front().IndexedRange->start.line == + D.IndexedRange->start.line; + }); + assert(Batch.size()); + SumCost += batchEditSize(Batch); + DiffsRef = DiffsRef.drop_front(Batch.size()); + + if (!DiffsRef.empty() && DType == Diff::Line) + // pay back the cost for the next column-based batch. + SumCost += abs(Batch.back().Offset); + } + return SumCost; +} + } // namespace clangd } // namespace clang
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits