kbobyrev created this revision. kbobyrev added a reviewer: sammccall. Herald added subscribers: usaxena95, arphaman. kbobyrev requested review of this revision. Herald added subscribers: cfe-commits, MaskRay, ilya-biryukov. Herald added a project: clang-tools-extra.
These are the trigrams for queries right now: - "va" -> {Trigram("va")} - "va_" -> {} (empty) This is suboptimal since the resulting query will discard the query information and return all symbols, some of which will be later be scored expensively (fuzzy matching score). This is related to https://github.com/clangd/clangd/issues/39 but does not fix it. Accidentally, because of that incorrect behavior, when user types "tok::va" there are no results (the issue is that `tok::kw___builtin_va_arg` does not have "va" token) but when "tok::va_" is typed, expected result (`tok::kw___builtin_va_arg`) shows up by accident. This is because the dex query transformer will only lookup symbols within the `tok::` namespace. There won't be many, so the returned results will contain symbol we need; this symbol will be filtered out by the expensive checks and that will be displayed in the editor. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D113995 Files: clang-tools-extra/clangd/index/dex/Trigram.cpp clang-tools-extra/clangd/unittests/DexTests.cpp Index: clang-tools-extra/clangd/unittests/DexTests.cpp =================================================================== --- clang-tools-extra/clangd/unittests/DexTests.cpp +++ clang-tools-extra/clangd/unittests/DexTests.cpp @@ -404,6 +404,9 @@ EXPECT_THAT(identifierTrigramTokens("IsOK"), trigramsAre({"i", "is", "io", "iso", "iok", "sok"})); + EXPECT_THAT(identifierTrigramTokens("_pb"), trigramsAre({"_", "_p"})); + EXPECT_THAT(identifierTrigramTokens("__pb"), trigramsAre({"_", "__", "_p"})); + EXPECT_THAT( identifierTrigramTokens("abc_defGhij__klm"), trigramsAre({"a", "ab", "ad", "abc", "abd", "ade", "adg", "bcd", @@ -422,6 +425,14 @@ EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"__"})); EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({})); + EXPECT_THAT(generateQueryTrigrams("m_"), trigramsAre({"m_"})); + + EXPECT_THAT(generateQueryTrigrams("p_b"), trigramsAre({"pb"})); + EXPECT_THAT(generateQueryTrigrams("pb_"), trigramsAre({"pb"})); + EXPECT_THAT(generateQueryTrigrams("_p"), trigramsAre({"_p"})); + EXPECT_THAT(generateQueryTrigrams("_pb_"), trigramsAre({"_p"})); + EXPECT_THAT(generateQueryTrigrams("__pb"), trigramsAre({"_p"})); + EXPECT_THAT(generateQueryTrigrams("X86"), trigramsAre({"x86"})); EXPECT_THAT(generateQueryTrigrams("clangd"), @@ -545,6 +556,18 @@ Req.Query = "ttf"; EXPECT_THAT(match(*I, Req, &Incomplete), ElementsAre("OneTwoThreeFour")); EXPECT_FALSE(Incomplete) << "3-char string is not a short query"; + + I = Dex::build(generateSymbols({"tok::kw_builtin_va_arg", "bar::whatever"}), + RefSlab(), RelationSlab()); + + Req.Query = "kw_"; + EXPECT_THAT(match(*I, Req, &Incomplete), + ElementsAre("tok::kw_builtin_va_arg")); + EXPECT_FALSE(Incomplete) << "kw_ is enough to match the whole symbol"; + Req.Scopes = {"tok::"}; + EXPECT_THAT(match(*I, Req, &Incomplete), + ElementsAre("tok::kw_builtin_va_arg")); + EXPECT_FALSE(Incomplete) << "kw_ is enough to match the whole symbol"; } TEST(DexTest, MatchQualifiedNamesWithoutSpecificScope) { Index: clang-tools-extra/clangd/index/dex/Trigram.cpp =================================================================== --- clang-tools-extra/clangd/index/dex/Trigram.cpp +++ clang-tools-extra/clangd/index/dex/Trigram.cpp @@ -101,17 +101,42 @@ std::vector<Token> generateQueryTrigrams(llvm::StringRef Query) { if (Query.empty()) return {}; - std::string LowercaseQuery = Query.lower(); - if (Query.size() < 3) // short-query trigrams only - return {Token(Token::Kind::Trigram, LowercaseQuery)}; // Apply fuzzy matching text segmentation. std::vector<CharRole> Roles(Query.size()); calculateRoles(Query, llvm::makeMutableArrayRef(Roles.data(), Query.size())); + std::string LowercaseQuery = Query.lower(); + + if (LowercaseQuery.size() < 3) // short-query trigrams only. + return {Token(Token::Kind::Trigram, LowercaseQuery)}; + + unsigned ValidSymbols = + llvm::count_if(Roles, [](CharRole R) { return R == Head || R == Tail; }); + // If the query does not have any alphanumeric symbols, don't restrict the + // result to the names. + if (ValidSymbols == 0) + return {}; + // + if (ValidSymbols < 3) { + std::string Letters = + Roles.front() == Separator ? std::string(1, Query.front()) : ""; + for (unsigned I = 0; I < LowercaseQuery.size(); ++I) { + if (Roles[I] == Head || Roles[I] == Tail) { + Letters += LowercaseQuery[I]; + // Similar to the identifier trigram generation, stop here for the + // queries starting with the separator, i.e. "_va" will only output + // "_v" here, identifier trigram generator will output "_" and "_v" + if (Roles.front() == Separator) + break; + } + } + return {Token(Token::Kind::Trigram, Letters)}; + } + llvm::DenseSet<Token> UniqueTrigrams; std::string Chars; - for (unsigned I = 0; I < Query.size(); ++I) { + for (unsigned I = 0; I < LowercaseQuery.size(); ++I) { if (Roles[I] != Head && Roles[I] != Tail) continue; // Skip delimiters. Chars.push_back(LowercaseQuery[I]);
Index: clang-tools-extra/clangd/unittests/DexTests.cpp =================================================================== --- clang-tools-extra/clangd/unittests/DexTests.cpp +++ clang-tools-extra/clangd/unittests/DexTests.cpp @@ -404,6 +404,9 @@ EXPECT_THAT(identifierTrigramTokens("IsOK"), trigramsAre({"i", "is", "io", "iso", "iok", "sok"})); + EXPECT_THAT(identifierTrigramTokens("_pb"), trigramsAre({"_", "_p"})); + EXPECT_THAT(identifierTrigramTokens("__pb"), trigramsAre({"_", "__", "_p"})); + EXPECT_THAT( identifierTrigramTokens("abc_defGhij__klm"), trigramsAre({"a", "ab", "ad", "abc", "abd", "ade", "adg", "bcd", @@ -422,6 +425,14 @@ EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"__"})); EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({})); + EXPECT_THAT(generateQueryTrigrams("m_"), trigramsAre({"m_"})); + + EXPECT_THAT(generateQueryTrigrams("p_b"), trigramsAre({"pb"})); + EXPECT_THAT(generateQueryTrigrams("pb_"), trigramsAre({"pb"})); + EXPECT_THAT(generateQueryTrigrams("_p"), trigramsAre({"_p"})); + EXPECT_THAT(generateQueryTrigrams("_pb_"), trigramsAre({"_p"})); + EXPECT_THAT(generateQueryTrigrams("__pb"), trigramsAre({"_p"})); + EXPECT_THAT(generateQueryTrigrams("X86"), trigramsAre({"x86"})); EXPECT_THAT(generateQueryTrigrams("clangd"), @@ -545,6 +556,18 @@ Req.Query = "ttf"; EXPECT_THAT(match(*I, Req, &Incomplete), ElementsAre("OneTwoThreeFour")); EXPECT_FALSE(Incomplete) << "3-char string is not a short query"; + + I = Dex::build(generateSymbols({"tok::kw_builtin_va_arg", "bar::whatever"}), + RefSlab(), RelationSlab()); + + Req.Query = "kw_"; + EXPECT_THAT(match(*I, Req, &Incomplete), + ElementsAre("tok::kw_builtin_va_arg")); + EXPECT_FALSE(Incomplete) << "kw_ is enough to match the whole symbol"; + Req.Scopes = {"tok::"}; + EXPECT_THAT(match(*I, Req, &Incomplete), + ElementsAre("tok::kw_builtin_va_arg")); + EXPECT_FALSE(Incomplete) << "kw_ is enough to match the whole symbol"; } TEST(DexTest, MatchQualifiedNamesWithoutSpecificScope) { Index: clang-tools-extra/clangd/index/dex/Trigram.cpp =================================================================== --- clang-tools-extra/clangd/index/dex/Trigram.cpp +++ clang-tools-extra/clangd/index/dex/Trigram.cpp @@ -101,17 +101,42 @@ std::vector<Token> generateQueryTrigrams(llvm::StringRef Query) { if (Query.empty()) return {}; - std::string LowercaseQuery = Query.lower(); - if (Query.size() < 3) // short-query trigrams only - return {Token(Token::Kind::Trigram, LowercaseQuery)}; // Apply fuzzy matching text segmentation. std::vector<CharRole> Roles(Query.size()); calculateRoles(Query, llvm::makeMutableArrayRef(Roles.data(), Query.size())); + std::string LowercaseQuery = Query.lower(); + + if (LowercaseQuery.size() < 3) // short-query trigrams only. + return {Token(Token::Kind::Trigram, LowercaseQuery)}; + + unsigned ValidSymbols = + llvm::count_if(Roles, [](CharRole R) { return R == Head || R == Tail; }); + // If the query does not have any alphanumeric symbols, don't restrict the + // result to the names. + if (ValidSymbols == 0) + return {}; + // + if (ValidSymbols < 3) { + std::string Letters = + Roles.front() == Separator ? std::string(1, Query.front()) : ""; + for (unsigned I = 0; I < LowercaseQuery.size(); ++I) { + if (Roles[I] == Head || Roles[I] == Tail) { + Letters += LowercaseQuery[I]; + // Similar to the identifier trigram generation, stop here for the + // queries starting with the separator, i.e. "_va" will only output + // "_v" here, identifier trigram generator will output "_" and "_v" + if (Roles.front() == Separator) + break; + } + } + return {Token(Token::Kind::Trigram, Letters)}; + } + llvm::DenseSet<Token> UniqueTrigrams; std::string Chars; - for (unsigned I = 0; I < Query.size(); ++I) { + for (unsigned I = 0; I < LowercaseQuery.size(); ++I) { if (Roles[I] != Head && Roles[I] != Tail) continue; // Skip delimiters. Chars.push_back(LowercaseQuery[I]);
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits