usaxena95 created this revision. Herald added subscribers: cfe-commits, kadircet, arphaman, mgrang. Herald added a project: clang. usaxena95 requested review of this revision. Herald added subscribers: MaskRay, ilya-biryukov.
By default clangd scores a code completion item using heuristic model. Scoring can be done by Decision Forest by passing --ranking_model=df to clangd. TODO: - Explain and backup the heuristic for combining NameMatch and DF prediction. - Maybe add tests for sanity. - Add end-to-end latency metrics. - Add extra debug logging for the DF model. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D88281 Files: clang-tools-extra/clangd/CodeComplete.cpp clang-tools-extra/clangd/CodeComplete.h clang-tools-extra/clangd/tool/ClangdMain.cpp clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
Index: clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp =================================================================== --- clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -26,6 +26,8 @@ #include "clang/Tooling/CompilationDatabase.h" #include "llvm/Support/Error.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Testing/Support/Annotations.h" #include "llvm/Testing/Support/Error.h" #include "gmock/gmock.h" @@ -33,6 +35,7 @@ #include <condition_variable> #include <functional> #include <mutex> +#include <sstream> #include <vector> namespace clang { @@ -648,6 +651,76 @@ Has("bar", CompletionItemKind::Function))); } +/* +TEST(CompletionTest, DO_NOT_SUBMIT_SpeedTestForReviewOnly) { + + auto randFunction = []() { + std::stringstream ss; + ss << "void "; + for (int i = 0; i < 10; ++i) + ss << char(rand() % 3 == 0 ? 'a' + rand() % 3 : 'A' + rand() % 3); + ss << "();\n"; + return ss.str(); + }; + int Trials = 1; + std::vector<Symbol> IndexedResults; + IndexedResults.push_back(cls({"ns::ABCDabc"})); + IndexedResults.push_back(cls({"ns::abcABC"})); + + std::string code = R"cpp( + namespace ns { + void abc(); + )cpp"; + for (int i = 0; i < 50; ++i) + code += randFunction(); + code += "namepsace ns2 {\n"; + for (int i = 0; i < 50; ++i) + code += randFunction(); + code += "} //namespace ns2\n"; + code += R"cpp(} // namespace ns + void f() { abc^ } + )cpp"; + llvm::errs() << code << "\n"; + + CodeCompleteOptions Opts; + Opts.AllScopes = true; + + llvm::Timer DF("DF", "Decision Forest"); + llvm::Timer Heuristic("Heuristic", "Heuristic ranking."); + + Opts.RankingModel = CodeCompleteOptions::DecisionForest; + DF.startTimer(); + auto DFCompletions = completions(code, IndexedResults, Opts); + for (int i = 0; i < Trials; ++i) + completions(code, IndexedResults, Opts); + DF.stopTimer(); + + Opts.RankingModel = CodeCompleteOptions::Heuristic; + Heuristic.startTimer(); + auto HeuristicCompletions = completions(code, IndexedResults, Opts); + for (int i = 0; i < Trials; ++i) + completions(code, IndexedResults, Opts); + Heuristic.stopTimer(); + + for (const auto &M : {DFCompletions, HeuristicCompletions}) { + auto S = M.Completions; + std::sort(S.begin(), S.end(), + [&](const CodeCompletion &a, const CodeCompletion &b) { + return a.Score.Total > b.Score.Total || + (a.Score.Total == b.Score.Total && a.Name < b.Name); + }); + int r = 0; + for (const auto &C : S) { + llvm::errs() << r++ << ".\t" << C.Scope << " " << C.Name << " " + << C.Score.Total << " / " << C.Score.ExcludingName + << "\t\tNameMatch = " + << C.Score.Total / C.Score.ExcludingName << "\n"; + } + llvm::errs() << "\n\n"; + } + EXPECT_LT(int(DFCompletions.Completions.size()), 0LL); +} +*/ TEST(CompletionTest, SemaIndexMerge) { auto Results = completions( R"cpp( Index: clang-tools-extra/clangd/tool/ClangdMain.cpp =================================================================== --- clang-tools-extra/clangd/tool/ClangdMain.cpp +++ clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -167,6 +167,18 @@ Hidden, }; +opt<CodeCompleteOptions::CodeCompletionRankingModel> RankingModel{ + "ranking-model", + cat(Features), + desc("Model to use to rank code-completion items"), + values(clEnumValN(CodeCompleteOptions::Heuristic, "heuristic", + "Use hueristics to rank code completion items"), + clEnumValN(CodeCompleteOptions::DecisionForest, "df", + "Use Decision Forest model to rank completion items")), + init(CodeCompleteOptions().RankingModel), + Hidden, +}; + // FIXME: also support "plain" style where signatures are always omitted. enum CompletionStyleFlag { Detailed, Bundled }; opt<CompletionStyleFlag> CompletionStyle{ @@ -739,6 +751,7 @@ CCOpts.EnableFunctionArgSnippets = EnableFunctionArgSnippets; CCOpts.AllScopes = AllScopesCompletion; CCOpts.RunParser = CodeCompletionParse; + CCOpts.RankingModel = RankingModel; RealThreadsafeFS TFS; std::vector<std::unique_ptr<config::Provider>> ProviderStack; Index: clang-tools-extra/clangd/CodeComplete.h =================================================================== --- clang-tools-extra/clangd/CodeComplete.h +++ clang-tools-extra/clangd/CodeComplete.h @@ -147,6 +147,12 @@ std::function<void(const CodeCompletion &, const SymbolQualitySignals &, const SymbolRelevanceSignals &, float Score)> RecordCCResult; + + /// Model to use for ranking code completion candidates. + enum CodeCompletionRankingModel { + Heuristic, + DecisionForest, + } RankingModel = Heuristic; }; // Semi-structured representation of a code-complete suggestion for our C++ API. Index: clang-tools-extra/clangd/CodeComplete.cpp =================================================================== --- clang-tools-extra/clangd/CodeComplete.cpp +++ clang-tools-extra/clangd/CodeComplete.cpp @@ -21,6 +21,7 @@ #include "AST.h" #include "CodeCompletionStrings.h" #include "Compiler.h" +#include "CompletionModel.h" #include "Diagnostics.h" #include "ExpectedTypes.h" #include "FileDistance.h" @@ -1625,6 +1626,73 @@ return Filter->match(C.Name); } + CodeCompletion::Scores + evaluateHeuristic(const SymbolQualitySignals &Quality, + const SymbolRelevanceSignals &Relevance) { + CodeCompletion::Scores Scores; + Scores.Quality = Quality.evaluate(); + Scores.Relevance = Relevance.evaluate(); + Scores.Total = evaluateSymbolAndRelevance(Scores.Quality, Scores.Relevance); + // NameMatch is in fact a multiplier on total score, so rescoring is sound. + Scores.ExcludingName = Relevance.NameMatch + ? Scores.Total / Relevance.NameMatch + : Scores.Quality; + return Scores; + } + + CodeCompletion::Scores + evaluateDecisionForest(const SymbolQualitySignals &Quality, + const SymbolRelevanceSignals &Relevance) { + Example E; + E.setIsDeprecated(Quality.Deprecated); + E.setIsReservedName(Quality.ReservedName); + E.setIsImplementationDetail(Quality.ImplementationDetail); + E.setNumReferences(Quality.References); + E.setSymbolCategory(Quality.Category); + + SymbolRelevanceSignals::DerivedSignals Derived = + Relevance.calculateDerivedSignals(); + // FIXME(usx): Move FilterLength to RelevanceSignals. + E.setFilterLength(HeuristicPrefix.Name.size()); + E.setIsNameInContext(Derived.NameMatchesContext); + E.setIsForbidden(Relevance.Forbidden); + E.setIsInBaseClass(Relevance.InBaseClass); + E.setFileProximityDistance(Derived.FileProximityDistance); + E.setSemaFileProximityScore(Relevance.SemaFileProximityScore); + E.setSymbolScopeDistance(Derived.ScopeProximityDistance); + E.setSemaSaysInScope(Relevance.SemaSaysInScope); + E.setScope(Relevance.Scope); + E.setContextKind(Relevance.Context); + E.setIsInstanceMember(Relevance.IsInstanceMember); + E.setHadContextType(Relevance.HadContextType); + E.setHadSymbolType(Relevance.HadSymbolType); + E.setTypeMatchesPreferred(Relevance.TypeMatchesPreferred); + + CodeCompletion::Scores Scores; + Scores.ExcludingName = exp(Evaluate(E)); + if (Relevance.NeedsFixIts) + Scores.ExcludingName *= 0.5; + Scores.Total = Relevance.NameMatch * Scores.ExcludingName; + // DO NOT SUBMIT: Remove before submitting. + log("{0} = {1} * {2}, {3}\t{4}", Scores.Total, Scores.ExcludingName, + Relevance.NameMatch, Relevance.SymbolScope, Relevance.Name); + + return Scores; + } + + CodeCompletion::Scores + evaluateCompletion(const SymbolQualitySignals &Quality, + const SymbolRelevanceSignals &Relevance) { + using RM = CodeCompleteOptions::CodeCompletionRankingModel; + switch (Opts.RankingModel) { + case RM::Heuristic: + return evaluateHeuristic(Quality, Relevance); + case RM::DecisionForest: + return evaluateDecisionForest(Quality, Relevance); + } + llvm_unreachable("Unhandled CodeCompletion ranking model."); + } + // Scores a candidate and adds it to the TopN structure. void addCandidate(TopN<ScoredBundle, ScoredBundleGreater> &Candidates, CompletionCandidate::Bundle Bundle) { @@ -1680,15 +1748,7 @@ } } - CodeCompletion::Scores Scores; - Scores.Quality = Quality.evaluate(); - Scores.Relevance = Relevance.evaluate(); - Scores.Total = evaluateSymbolAndRelevance(Scores.Quality, Scores.Relevance); - // NameMatch is in fact a multiplier on total score, so rescoring is sound. - Scores.ExcludingName = Relevance.NameMatch - ? Scores.Total / Relevance.NameMatch - : Scores.Quality; - + CodeCompletion::Scores Scores = evaluateCompletion(Quality, Relevance); if (Opts.RecordCCResult) Opts.RecordCCResult(toCodeCompletion(Bundle), Quality, Relevance, Scores.Total);
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits