ilya-biryukov created this revision. ilya-biryukov added a reviewer: ioeric. Herald added subscribers: kadircet, arphaman, jkorous, MaskRay. Herald added a project: clang.
It now gives less preference to prefix matches and does not penalize skipping segments. The motivation is producing better scoring in one particular example, but the change does not seem to cause large regressions in other cases. The examples is matching 'up' against 'unique_ptr' and 'upper_bound'. Before the change, we had: - "[u]nique_[p]tr" with a score of 0.3. - "[up]per_bound" with a score of 1.0. A 3x difference meant that symbol quality signals were almost always ignored and 'upper_bound' was always ranked higher. However, intuitively, the match scores should be very close for the two. After the change both items match with a score of 1.0. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D59300 Files: clang-tools-extra/clangd/FuzzyMatch.cpp clang-tools-extra/unittests/clangd/FuzzyMatchTests.cpp
Index: clang-tools-extra/unittests/clangd/FuzzyMatchTests.cpp =================================================================== --- clang-tools-extra/unittests/clangd/FuzzyMatchTests.cpp +++ clang-tools-extra/unittests/clangd/FuzzyMatchTests.cpp @@ -9,6 +9,7 @@ #include "FuzzyMatch.h" #include "llvm/ADT/StringExtras.h" +#include "gmock/gmock-matchers.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -16,6 +17,7 @@ namespace clangd { namespace { using testing::Not; +using testing::AllOf; struct ExpectedMatch { // Annotations are optional, and will not be asserted if absent. @@ -170,7 +172,10 @@ EXPECT_THAT("zzg", matches("[zzG]roup")); EXPECT_THAT("g", matches("zz[G]roup")); - EXPECT_THAT("aaaa", matches("_a_[aaaa]")); // Prefer consecutive. + EXPECT_THAT("AAAA", matches("_a_[aaaa]")); // Prefer consecutive. + // All-lowercase patterns are special, they treat consecutive matches and + // head character matches the same, as the pattern lacks segmentation signals. + EXPECT_THAT("aaaa", matches("_[a]_[aaa]a")); // These would ideally match, but would need special segmentation rules. EXPECT_THAT("printf", Not(matches("s[printf]"))); EXPECT_THAT("str", Not(matches("o[str]eam"))); @@ -246,7 +251,7 @@ ranks("[cons]ole", "[Cons]ole", "ArrayBuffer[Cons]tructor")); EXPECT_THAT("foo", ranks("[foo]", "[Foo]")); EXPECT_THAT("onMes", - ranks("[onMes]sage", "[onmes]sage", "[on]This[M]ega[Es]capes")); + ranks("[onMes]sage", "[on]This[M]ega[Es]capes", "[onmes]sage")); EXPECT_THAT("CC", ranks("[C]amel[C]ase", "[c]amel[C]ase")); EXPECT_THAT("cC", ranks("[c]amel[C]ase", "[C]amel[C]ase")); EXPECT_THAT("p", ranks("[p]", "[p]arse", "[p]osix", "[p]afdsa", "[p]ath")); @@ -270,12 +275,18 @@ // Verify some bounds so we know scores fall in the right range. // Testing exact scores is fragile, so we prefer Ranking tests. TEST(FuzzyMatch, Scoring) { - EXPECT_THAT("abs", matches("[a]w[B]xYz[S]", 0.f)); + EXPECT_THAT("abs", matches("[a]w[B]xYz[S]", 1.f)); EXPECT_THAT("abs", matches("[abs]l", 1.f)); EXPECT_THAT("abs", matches("[abs]", 2.f)); EXPECT_THAT("Abs", matches("[abs]", 2.f)); } +TEST(FuzzyMatch, PrefixAndHead) { + // We want these scores to be roughly the same. + EXPECT_THAT("up", matches("[u]nique_[p]tr", 1.f)); + EXPECT_THAT("up", matches("[up]per_bound", 1.f)); +} + // Returns pretty-printed segmentation of Text. // e.g. std::basic_string --> +-- +---- +----- std::string segment(llvm::StringRef Text) { Index: clang-tools-extra/clangd/FuzzyMatch.cpp =================================================================== --- clang-tools-extra/clangd/FuzzyMatch.cpp +++ clang-tools-extra/clangd/FuzzyMatch.cpp @@ -71,7 +71,7 @@ // Score field is 15 bits wide, min value is -2^14, we use half of that. static constexpr int AwfulScore = -(1 << 13); static bool isAwful(int S) { return S < AwfulScore / 2; } -static constexpr int PerfectBonus = 3; // Perfect per-pattern-char score. +static constexpr int PerfectBonus = 2; // Perfect per-pattern-char score. FuzzyMatcher::FuzzyMatcher(llvm::StringRef Pattern) : PatN(std::min<int>(MaxPat, Pattern.size())), @@ -268,32 +268,25 @@ int FuzzyMatcher::skipPenalty(int W, Action Last) const { int S = 0; - if (WordRole[W] == Head) // Skipping a segment. - S += 1; - if (Last == Match) // Non-consecutive match. - S += 2; // We'd rather skip a segment than split our match. + if (W == 0) // Skipping the first character. + S += 2; return S; } int FuzzyMatcher::matchBonus(int P, int W, Action Last) const { assert(LowPat[P] == LowWord[W]); int S = 1; - // Bonus: pattern so far is a (case-insensitive) prefix of the word. - if (P == W) // We can't skip pattern characters, so we must have matched all. - ++S; - // Bonus: case matches, or a Head in the pattern aligns with one in the word. - if ((Pat[P] == Word[W] && ((PatTypeSet & 1 << Upper) || P == W)) || - (PatRole[P] == Head && WordRole[W] == Head)) + // Bonus: matching a new segment or a consecutive match. + if ((WordRole[W] == Head && + ((PatTypeSet == 1 << Lower) || PatRole[P] == Head)) || + (WordRole[W] == PatRole[P] && Last == Match)) ++S; + // Penalty: in mixed-case pattern we want the roles to match. + if ((PatTypeSet != 1 << Lower) && PatRole[P] != WordRole[W]) + --S; // Penalty: matching inside a segment (and previous char wasn't matched). if (WordRole[W] == Tail && P && Last == Miss) - S -= 3; - // Penalty: a Head in the pattern matches in the middle of a word segment. - if (PatRole[P] == Head && WordRole[W] == Tail) - --S; - // Penalty: matching the first pattern character in the middle of a segment. - if (P == 0 && WordRole[W] == Tail) - S -= 4; + S -= 2; assert(S <= PerfectBonus); return S; }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits