Author: ibiryukov Date: Fri Mar 15 07:00:49 2019 New Revision: 356261 URL: http://llvm.org/viewvc/llvm-project?rev=356261&view=rev Log: [clangd] Tune the fuzzy-matching algorithm
Summary: To reduce the gap between prefix and initialism matches. The motivation is producing better scoring in one particular example, but the change does not seem to cause large regressions in other cases. The examples is matching 'up' against 'unique_ptr' and 'upper_bound'. Before the change, we had: - "[u]nique_[p]tr" with a score of 0.3, - "[up]per_bound" with a score of 1.0. A 3x difference meant that symbol quality signals were almost always ignored and 'upper_bound' was always ranked higher. However, intuitively, the match scores should be very close for the two. After the change we have the following scores: - "[u]nique_[p]tr" with a score of 0.75, - "[up]per_bound" with a score of 1.0. Reviewers: ioeric Reviewed By: ioeric Subscribers: MaskRay, jkorous, arphaman, kadircet, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D59300 Modified: clang-tools-extra/trunk/clangd/FuzzyMatch.cpp clang-tools-extra/trunk/unittests/clangd/FuzzyMatchTests.cpp Modified: clang-tools-extra/trunk/clangd/FuzzyMatch.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/FuzzyMatch.cpp?rev=356261&r1=356260&r2=356261&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/FuzzyMatch.cpp (original) +++ clang-tools-extra/trunk/clangd/FuzzyMatch.cpp Fri Mar 15 07:00:49 2019 @@ -71,7 +71,7 @@ static char lower(char C) { return C >= // Score field is 15 bits wide, min value is -2^14, we use half of that. static constexpr int AwfulScore = -(1 << 13); static bool isAwful(int S) { return S < AwfulScore / 2; } -static constexpr int PerfectBonus = 3; // Perfect per-pattern-char score. +static constexpr int PerfectBonus = 4; // Perfect per-pattern-char score. FuzzyMatcher::FuzzyMatcher(llvm::StringRef Pattern) : PatN(std::min<int>(MaxPat, Pattern.size())), @@ -267,24 +267,31 @@ bool FuzzyMatcher::allowMatch(int P, int } int FuzzyMatcher::skipPenalty(int W, Action Last) const { - int S = 0; + if (W == 0) // Skipping the first character. + return 3; if (WordRole[W] == Head) // Skipping a segment. - S += 1; - if (Last == Match) // Non-consecutive match. - S += 2; // We'd rather skip a segment than split our match. - return S; + return 1; // We want to keep this lower than a consecutive match bonus. + // Instead of penalizing non-consecutive matches, we give a bonus to a + // consecutive match in matchBonus. This produces a better score distribution + // than penalties in case of small patterns, e.g. 'up' for 'unique_ptr'. + return 0; } int FuzzyMatcher::matchBonus(int P, int W, Action Last) const { assert(LowPat[P] == LowWord[W]); int S = 1; - // Bonus: pattern so far is a (case-insensitive) prefix of the word. - if (P == W) // We can't skip pattern characters, so we must have matched all. - ++S; + bool IsPatSingleCase = + (PatTypeSet == 1 << Lower) || (PatTypeSet == 1 << Upper); // Bonus: case matches, or a Head in the pattern aligns with one in the word. - if ((Pat[P] == Word[W] && ((PatTypeSet & 1 << Upper) || P == W)) || - (PatRole[P] == Head && WordRole[W] == Head)) + // Single-case patterns lack segmentation signals and we assume any character + // can be a head of a segment. + if (Pat[P] == Word[W] || + (WordRole[W] == Head && (IsPatSingleCase || PatRole[P] == Head))) ++S; + // Bonus: a consecutive match. First character match also gets a bonus to + // ensure prefix final match score normalizes to 1.0. + if (W == 0 || Last == Match) + S += 2; // Penalty: matching inside a segment (and previous char wasn't matched). if (WordRole[W] == Tail && P && Last == Miss) S -= 3; Modified: clang-tools-extra/trunk/unittests/clangd/FuzzyMatchTests.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/FuzzyMatchTests.cpp?rev=356261&r1=356260&r2=356261&view=diff ============================================================================== --- clang-tools-extra/trunk/unittests/clangd/FuzzyMatchTests.cpp (original) +++ clang-tools-extra/trunk/unittests/clangd/FuzzyMatchTests.cpp Fri Mar 15 07:00:49 2019 @@ -9,6 +9,7 @@ #include "FuzzyMatch.h" #include "llvm/ADT/StringExtras.h" +#include "gmock/gmock-matchers.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -247,6 +248,8 @@ TEST(FuzzyMatch, Ranking) { EXPECT_THAT("foo", ranks("[foo]", "[Foo]")); EXPECT_THAT("onMes", ranks("[onMes]sage", "[onmes]sage", "[on]This[M]ega[Es]capes")); + EXPECT_THAT("onmes", + ranks("[onmes]sage", "[onMes]sage", "[on]This[M]ega[Es]capes")); EXPECT_THAT("CC", ranks("[C]amel[C]ase", "[c]amel[C]ase")); EXPECT_THAT("cC", ranks("[c]amel[C]ase", "[C]amel[C]ase")); EXPECT_THAT("p", ranks("[p]", "[p]arse", "[p]osix", "[p]afdsa", "[p]ath")); @@ -270,12 +273,18 @@ TEST(FuzzyMatch, Ranking) { // Verify some bounds so we know scores fall in the right range. // Testing exact scores is fragile, so we prefer Ranking tests. TEST(FuzzyMatch, Scoring) { - EXPECT_THAT("abs", matches("[a]w[B]xYz[S]", 0.f)); + EXPECT_THAT("abs", matches("[a]w[B]xYz[S]", 7.f / 12.f)); EXPECT_THAT("abs", matches("[abs]l", 1.f)); EXPECT_THAT("abs", matches("[abs]", 2.f)); EXPECT_THAT("Abs", matches("[abs]", 2.f)); } +TEST(FuzzyMatch, InitialismAndPrefix) { + // We want these scores to be roughly the same. + EXPECT_THAT("up", matches("[u]nique_[p]tr", 3.f / 4.f)); + EXPECT_THAT("up", matches("[up]per_bound", 1.f)); +} + // Returns pretty-printed segmentation of Text. // e.g. std::basic_string --> +-- +---- +----- std::string segment(llvm::StringRef Text) { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits