sammccall created this revision.
sammccall added a reviewer: ioeric.
Herald added subscribers: cfe-commits, mgrang, ilya-biryukov, klimek.

- we match on USR, and do a field-by-field merge if both have results
- scoring is post-merge, with both sets of information available (for now, sema 
priority is used if available, static score for index results)
- limit is applied to the complete result set (previously index ignored limit)
- CompletionItem is only produces for the returned results
- If the user doesn't type a scope, we send the scopeless query to the index 
for global completion.

This needs tests for the new functionality, but I thought you could take a look
while I work on that. It passes the existing tests, and works in VSCode.


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D42181

Files:
  clangd/CodeComplete.cpp
  test/clangd/authority-less-uri.test
  unittests/clangd/CodeCompleteTests.cpp

Index: unittests/clangd/CodeCompleteTests.cpp
===================================================================
--- unittests/clangd/CodeCompleteTests.cpp
+++ unittests/clangd/CodeCompleteTests.cpp
@@ -497,6 +497,7 @@
       Sym.Scope = QName.substr(0, Pos);
     }
     Sym.CompletionPlainInsertText = Sym.Name;
+    Sym.CompletionLabel = Sym.Name;
     Sym.SymInfo.Kind = Pair.second;
     Slab.insert(Sym);
   }
@@ -528,8 +529,8 @@
       void f() { ::ns::^ }
   )cpp",
                              Opts);
-  EXPECT_THAT(Results.items, Contains(Labeled("[I]XYZ")));
-  EXPECT_THAT(Results.items, Contains(Labeled("[I]foo")));
+  EXPECT_THAT(Results.items, Contains(Labeled("XYZ")));
+  EXPECT_THAT(Results.items, Contains(Labeled("foo")));
 }
 
 TEST(CompletionTest, SimpleIndexBased) {
Index: test/clangd/authority-less-uri.test
===================================================================
--- test/clangd/authority-less-uri.test
+++ test/clangd/authority-less-uri.test
@@ -25,7 +25,7 @@
 # CHECK-NEXT:        "insertTextFormat": 1,
 # CHECK-NEXT:        "kind": 7,
 # CHECK-NEXT:        "label": "fake::",
-# CHECK-NEXT:        "sortText": "{{.*}}fake"
+# CHECK-NEXT:        "sortText": "{{.*}}fake::"
 #      CHECK:    ]
 # CHECK-NEXT:  }
 Content-Length: 173
@@ -43,7 +43,7 @@
 # CHECK-NEXT:        "insertTextFormat": 1,
 # CHECK-NEXT:        "kind": 7,
 # CHECK-NEXT:        "label": "fake::",
-# CHECK-NEXT:        "sortText": "{{.*}}fake"
+# CHECK-NEXT:        "sortText": "{{.*}}fake::"
 #      CHECK:    ]
 # CHECK-NEXT:  }
 Content-Length: 44
Index: clangd/CodeComplete.cpp
===================================================================
--- clangd/CodeComplete.cpp
+++ clangd/CodeComplete.cpp
@@ -22,6 +22,7 @@
 #include "index/Index.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/FrontendActions.h"
+#include "clang/Index/USRGeneration.h"
 #include "clang/Sema/CodeCompleteConsumer.h"
 #include "clang/Sema/Sema.h"
 #include "llvm/Support/Format.h"
@@ -185,48 +186,53 @@
   return Result;
 }
 
-/// A scored code completion result.
-/// It may be promoted to a CompletionItem if it's among the top-ranked results.
-///
-/// We score candidates by multiplying the symbolScore ("quality" of the result)
-/// with the filterScore (how well it matched the query).
-/// This is sensitive to the distribution of both component scores!
-struct CompletionCandidate {
-  CompletionCandidate(CodeCompletionResult &Result, float FilterScore)
-      : Result(&Result) {
-    Scores.symbolScore = score(Result);  // Higher is better.
-    Scores.filterScore = FilterScore;    // 0-1, higher is better.
-    Scores.finalScore = Scores.symbolScore * Scores.filterScore;
-  }
+// Produces an integer that sorts in the same order as F.
+// That is: a < b <==> encodeFloat(a) < encodeFloat(b).
+uint32_t encodeFloat(float F) {
+  static_assert(std::numeric_limits<float>::is_iec559, "");
+  static_assert(sizeof(float) == sizeof(uint32_t), "");
+  constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1);
+
+  // Get the bits of the float. Endianness is the same as for integers.
+  uint32_t U;
+  memcpy(&U, &F, sizeof(float));
+  // IEEE 754 floats compare like sign-magnitude integers.
+  if (U & TopBit)    // Negative float.
+    return 0 - U;    // Map onto the low half of integers, order reversed.
+  return U + TopBit; // Positive floats map onto the high half of integers.
+}
 
-  CodeCompletionResult *Result;
-  CompletionItemScores Scores;
+// Returns a string that sorts in the same order as (-Score, Name), for LSP.
+std::string sortText(float Score, llvm::StringRef Name) {
+  // We convert -Score to an integer, and hex-encode for readability.
+  // Example: [0.5, "foo"] -> "41000000foo"
+  std::string S;
+  llvm::raw_string_ostream OS(S);
+  write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower,
+            /*Width=*/2 * sizeof(Score));
+  OS << Name;
+  OS.flush();
+  return S;
+}
 
-  // Comparison reflects rank: better candidates are smaller.
-  bool operator<(const CompletionCandidate &C) const {
-    if (Scores.finalScore != C.Scores.finalScore)
-      return Scores.finalScore > C.Scores.finalScore;
-    return *Result < *C.Result;
-  }
+/// A code completion result, in clang-native form.
+/// It may be promoted to a CompletionItem if it's among the top-ranked results.
+struct CompletionCandidate {
+  llvm::StringRef Name; // Used for filtering and sorting.
+  // We may have a result from Sema, from the index, or both.
+  const CodeCompletionResult *SemaResult = nullptr;
+  const Symbol *IndexResult = nullptr;
 
-  // Returns a string that sorts in the same order as operator<, for LSP.
-  // Conceptually, this is [-Score, Name]. We convert -Score to an integer, and
-  // hex-encode it for readability. Example: [0.5, "foo"] -> "41000000foo"
-  std::string sortText() const {
-    std::string S, NameStorage;
-    llvm::raw_string_ostream OS(S);
-    write_hex(OS, encodeFloat(-Scores.finalScore), llvm::HexPrintStyle::Lower,
-              /*Width=*/2 * sizeof(Scores.finalScore));
-    OS << Result->getOrderedName(NameStorage);
-    return OS.str();
-  }
+  // Computes the "symbol quality" score for this completion. Higher is better.
+  float score() const {
+    // For now we just use the Sema priority, mapping it onto a 0-1 interval.
+    if (!SemaResult) // FIXME(sammccall): better scoring for index results.
+      return 0.3;    // fixed mediocre score for index-only results.
 
-private:
-  static float score(const CodeCompletionResult &Result) {
     // Priority 80 is a really bad score.
-    float Score = 1 - std::min<float>(80, Result.Priority) / 80;
+    float Score = 1 - std::min<float>(80, SemaResult->Priority) / 80;
 
-    switch (static_cast<CXAvailabilityKind>(Result.Availability)) {
+    switch (static_cast<CXAvailabilityKind>(SemaResult->Availability)) {
     case CXAvailability_Available:
       // No penalty.
       break;
@@ -241,23 +247,65 @@
     return Score;
   }
 
-  // Produces an integer that sorts in the same order as F.
-  // That is: a < b <==> encodeFloat(a) < encodeFloat(b).
-  static uint32_t encodeFloat(float F) {
-    static_assert(std::numeric_limits<float>::is_iec559, "");
-    static_assert(sizeof(float) == sizeof(uint32_t), "");
-    constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1);
-
-    // Get the bits of the float. Endianness is the same as for integers.
-    uint32_t U;
-    memcpy(&U, &F, sizeof(float));
-    // IEEE 754 floats compare like sign-magnitude integers.
-    if (U & TopBit)    // Negative float.
-      return 0 - U;    // Map onto the low half of integers, order reversed.
-    return U + TopBit; // Positive floats map onto the high half of integers.
+  // Builds an LSP completion item.
+  // If SemaResult is non-null, CCS is non-null and describes it.
+  CompletionItem build(CodeCompletionString *CCS,
+                       const CodeCompleteOptions &Opts) const {
+    CompletionItem I;
+    if (SemaResult) {
+      I.kind =
+          toCompletionItemKind(SemaResult->Kind, SemaResult->CursorKind);
+      getLabelAndInsertText(*CCS, &I.label, &I.insertText, Opts.EnableSnippets);
+      I.filterText = getFilterText(*CCS);
+      I.insertTextFormat = Opts.EnableSnippets ? InsertTextFormat::Snippet
+                                               : InsertTextFormat::PlainText;
+      I.documentation = getDocumentation(*CCS);
+      I.detail = getDetail(*CCS);
+    }
+    if (IndexResult) {
+      if (I.kind == CompletionItemKind::Missing)
+        I.kind = toCompletionItemKind(IndexResult->SymInfo.Kind);
+      // FIXME: reintroduce a way to show the index source for debugging.
+      if (I.label.empty())
+        I.label = IndexResult->CompletionLabel;
+      if (I.filterText.empty())
+        I.filterText = IndexResult->Name;
+
+      // FIXME(ioeric): support inserting/replacing scope qualifiers.
+      // FIXME(ioeric): support snippets.
+      if (I.insertText.empty()) {
+        I.insertText = IndexResult->CompletionPlainInsertText;
+        I.insertTextFormat = InsertTextFormat::PlainText;
+      }
+
+      if (auto *D = IndexResult->Detail) {
+        if (I.documentation.empty())
+          I.documentation = D->Documentation;
+        if (I.detail.empty())
+          I.detail = D->CompletionDetail;
+      }
+    }
+    return I;
   }
 };
 
+// Determine the symbol ID for a Sema code completion result, if possible.
+llvm::Optional<SymbolID> getSymbolID(const CodeCompletionResult &R) {
+  switch (R.Kind) {
+    case CodeCompletionResult::RK_Declaration:
+    case CodeCompletionResult::RK_Pattern: {
+      llvm::SmallString<128> USR;
+      if (/*Ignore=*/ clang::index::generateUSRForDecl(R.Declaration, USR))
+        return None;
+      return SymbolID(USR);
+    }
+    case CodeCompletionResult::RK_Macro:
+      // FIXME: Macros do have USRs, but the CCR doesn't contain enough info.
+    case CodeCompletionResult::RK_Keyword:
+      return None;
+  }
+}
+
 /// \brief Information about the scope specifier in the qualified-id code
 /// completion (e.g. "ns::ab?").
 struct SpecifiedScope {
@@ -268,133 +316,135 @@
   // context), this will be set to the fully qualfied name of the corresponding
   // context.
   std::string Resolved;
-};
 
-/// \brief Information from sema about (parital) symbol names to be completed.
-/// For example, for completion "ns::ab^", this stores the scope specifier
-/// "ns::" and the completion filter text "ab".
-struct NameToComplete {
-  // The partial identifier being completed, without qualifier.
-  std::string Filter;
-
-  /// This is set if the completion is for qualified IDs, e.g. "abc::x^".
-  llvm::Optional<SpecifiedScope> SSInfo;
+  llvm::StringRef forIndex() {
+    llvm::StringRef Chosen = Resolved.empty() ? Written : Resolved;
+    return Chosen.trim(':');
+  }
 };
 
-SpecifiedScope extraCompletionScope(Sema &S, const CXXScopeSpec &SS);
-
-class CompletionItemsCollector : public CodeCompleteConsumer {
-public:
-  CompletionItemsCollector(const CodeCompleteOptions &CodeCompleteOpts,
-                           CompletionList &Items, NameToComplete &CompletedName)
-      : CodeCompleteConsumer(CodeCompleteOpts.getClangCompleteOpts(),
+// The CompletionRecorder captures Sema code-complete output, including context.
+// It filters out ignored results (and therefore does fuzzy-matching of names).
+// It doesn't do scoring or conversion to CompletionItem yet, as we want to
+// merge with index results first.
+struct CompletionRecorder : public CodeCompleteConsumer {
+  CompletionRecorder(const CodeCompleteOptions &Opts)
+      : CodeCompleteConsumer(Opts.getClangCompleteOpts(),
                              /*OutputIsBinary=*/false),
-        ClangdOpts(CodeCompleteOpts), Items(Items),
-        Allocator(std::make_shared<clang::GlobalCodeCompletionAllocator>()),
-        CCTUInfo(Allocator), CompletedName(CompletedName),
-        EnableSnippets(CodeCompleteOpts.EnableSnippets) {}
-
-  void ProcessCodeCompleteResults(Sema &S, CodeCompletionContext Context,
-                                  CodeCompletionResult *Results,
+        CCContext(CodeCompletionContext::CCC_Other),
+        Opts(Opts),
+        CCAllocator(std::make_shared<GlobalCodeCompletionAllocator>()),
+        CCTUInfo(CCAllocator) {}
+  std::vector<CodeCompletionResult> Results;
+  CodeCompletionContext CCContext;
+  Sema *Sema = nullptr; // The Sema that created the results.
+
+  void ProcessCodeCompleteResults(class Sema &S, CodeCompletionContext Context,
+                                  CodeCompletionResult *InResults,
                                   unsigned NumResults) override final {
-    FuzzyMatcher Filter(S.getPreprocessor().getCodeCompletionFilter());
-    if (auto SS = Context.getCXXScopeSpecifier())
-      CompletedName.SSInfo = extraCompletionScope(S, **SS);
+    // Record the completion context.
+    assert((!Sema || Sema == &S) && "Called with different Semas");
+    Sema = &S;
+    CCContext = Context;
 
-    CompletedName.Filter = S.getPreprocessor().getCodeCompletionFilter();
-    std::priority_queue<CompletionCandidate> Candidates;
+    // Retain the results we might want.
     for (unsigned I = 0; I < NumResults; ++I) {
-      auto &Result = Results[I];
-      // We drop hidden items, as they cannot be found by the lookup after
-      // inserting the corresponding completion item and only produce noise and
-      // duplicates in the completion list. However, there is one exception. If
-      // Result has a Qualifier which is non-informative, we can refer to an
-      // item by adding that qualifier, so we don't filter out this item.
+      auto &Result = InResults[I];
+      // Drop hidden items which cannot be found by lookup after completion.
+      // Exception: some items can be named by using a qualifier.
       if (Result.Hidden && (!Result.Qualifier || Result.QualifierIsInformative))
         continue;
-      if (!ClangdOpts.IncludeIneligibleResults &&
+      if (!Opts.IncludeIneligibleResults &&
           (Result.Availability == CXAvailability_NotAvailable ||
            Result.Availability == CXAvailability_NotAccessible))
         continue;
-      auto FilterScore = fuzzyMatch(S, Context, Filter, Result);
-      if (!FilterScore)
-        continue;
-      Candidates.emplace(Result, *FilterScore);
-      if (ClangdOpts.Limit && Candidates.size() > ClangdOpts.Limit) {
-        Candidates.pop();
-        Items.isIncomplete = true;
-      }
+      Results.push_back(Result);
     }
-    while (!Candidates.empty()) {
-      auto &Candidate = Candidates.top();
-      const auto *CCS = Candidate.Result->CreateCodeCompletionString(
-          S, Context, *Allocator, CCTUInfo,
-          CodeCompleteOpts.IncludeBriefComments);
-      assert(CCS && "Expected the CodeCompletionString to be non-null");
-      Items.items.push_back(ProcessCodeCompleteResult(Candidate, *CCS));
-      Candidates.pop();
-    }
-    std::reverse(Items.items.begin(), Items.items.end());
   }
 
-  GlobalCodeCompletionAllocator &getAllocator() override { return *Allocator; }
-
+  CodeCompletionAllocator &getAllocator() override { return *CCAllocator; }
   CodeCompletionTUInfo &getCodeCompletionTUInfo() override { return CCTUInfo; }
 
-private:
-  llvm::Optional<float> fuzzyMatch(Sema &S, const CodeCompletionContext &CCCtx,
-                                   FuzzyMatcher &Filter,
-                                   CodeCompletionResult Result) {
+  // Returns the filtering/sorting name for Result, which must be from Results.
+  // Returned string is owned by this recorder (or the AST).
+  llvm::StringRef getName(const CodeCompletionResult &Result) {
     switch (Result.Kind) {
     case CodeCompletionResult::RK_Declaration:
       if (auto *ID = Result.Declaration->getIdentifier())
-        return Filter.match(ID->getName());
+        return ID->getName();
       break;
     case CodeCompletionResult::RK_Keyword:
-      return Filter.match(Result.Keyword);
+      return Result.Keyword;
     case CodeCompletionResult::RK_Macro:
-      return Filter.match(Result.Macro->getName());
+      return Result.Macro->getName();
     case CodeCompletionResult::RK_Pattern:
-      return Filter.match(Result.Pattern->getTypedText());
+      return Result.Pattern->getTypedText();
     }
-    auto *CCS = Result.CreateCodeCompletionString(
-        S, CCCtx, *Allocator, CCTUInfo, /*IncludeBriefComments=*/false);
-    return Filter.match(CCS->getTypedText());
+    auto *CCS = codeCompletionString(Result, /*IncludeBriefComments=*/false);
+    return CCS->getTypedText();
   }
 
-  CompletionItem
-  ProcessCodeCompleteResult(const CompletionCandidate &Candidate,
-                            const CodeCompletionString &CCS) const {
-
-    // Adjust this to InsertTextFormat::Snippet iff we encounter a
-    // CK_Placeholder chunk in SnippetCompletionItemsCollector.
-    CompletionItem Item;
+  // Build a CodeCompletion string for R, which must be from Results.
+  // The CCS will be owned by this recorder.
+  CodeCompletionString *codeCompletionString(const CodeCompletionResult &R,
+                                             bool IncludeBriefComments) {
+    // CodeCompletionResult doesn't seem to be const-correct. We own it, anyway.
+    return const_cast<CodeCompletionResult &>(R).CreateCodeCompletionString(
+        *Sema, CCContext, *CCAllocator, CCTUInfo, IncludeBriefComments);
+  }
 
-    Item.documentation = getDocumentation(CCS);
-    Item.sortText = Candidate.sortText();
-    Item.scoreInfo = Candidate.Scores;
+private:
+  CodeCompleteOptions Opts;
+  std::shared_ptr<GlobalCodeCompletionAllocator> CCAllocator;
+  CodeCompletionTUInfo CCTUInfo;
+};
 
-    Item.detail = getDetail(CCS);
-    Item.filterText = getFilterText(CCS);
-    getLabelAndInsertText(CCS, &Item.label, &Item.insertText, EnableSnippets);
+// Tracks a bounded number of candidates with the best scores.
+class TopN {
+public:
+  using value_type = std::pair<CompletionCandidate, CompletionItemScores>;
+
+  TopN(size_t N) : N(N) {}
+
+  // Adds a candidate to the set, and drops one if needed to get back under N.
+  void push(value_type &&V) {
+    if (Heap.size() >= N) {
+      More = true;
+      if (N == 0 || !greater(V, Heap.front()))
+        return;
+      std::pop_heap(Heap.begin(), Heap.end(), greater);
+      Heap.back() = std::move(V);
+      std::push_heap(Heap.begin(), Heap.end(), greater);
+    } else {
+      Heap.push_back(std::move(V));
+      std::push_heap(Heap.begin(), Heap.end(), greater);
+    }
+    assert(Heap.size() <= N);
+    assert(std::is_heap(Heap.begin(), Heap.end(), greater));
+  }
 
-    Item.insertTextFormat = EnableSnippets ? InsertTextFormat::Snippet
-                                           : InsertTextFormat::PlainText;
+  // Returns candidates from best to worst.
+  std::vector<value_type> items() && {
+    std::sort_heap(Heap.begin(), Heap.end(), greater);
+    assert(Heap.size() <= N);
+    return std::move(Heap);
+  }
 
-    // Fill in the kind field of the CompletionItem.
-    Item.kind = toCompletionItemKind(Candidate.Result->Kind,
-                                     Candidate.Result->CursorKind);
+  // Returns true if we had more than N candidates, and dropped some.
+  bool more() const { return More; }
 
-    return Item;
+private:
+  static bool greater(const value_type &L, const value_type &R) {
+    if (L.second.finalScore != R.second.finalScore)
+      return L.second.finalScore > R.second.finalScore;
+    return L.first.Name < R.first.Name; // Earlier name is better.
   }
 
-  CodeCompleteOptions ClangdOpts;
-  CompletionList &Items;
-  std::shared_ptr<clang::GlobalCodeCompletionAllocator> Allocator;
-  CodeCompletionTUInfo CCTUInfo;
-  NameToComplete &CompletedName;
-  bool EnableSnippets;
-}; // CompletionItemsCollector
+  const size_t N;
+  bool More = false;
+  std::vector<value_type> Heap; // Min-heap, comparator is greater().
+};
+
 
 class SignatureHelpCollector final : public CodeCompleteConsumer {
 
@@ -491,14 +541,16 @@
 
 }; // SignatureHelpCollector
 
-bool invokeCodeComplete(const Context &Ctx,
-                        std::unique_ptr<CodeCompleteConsumer> Consumer,
-                        const clang::CodeCompleteOptions &Options,
-                        PathRef FileName,
-                        const tooling::CompileCommand &Command,
-                        PrecompiledPreamble const *Preamble, StringRef Contents,
-                        Position Pos, IntrusiveRefCntPtr<vfs::FileSystem> VFS,
-                        std::shared_ptr<PCHContainerOperations> PCHs) {
+// Invokes Sema code completion on a file.
+// Callback will be invoked once completion is done, but before cleaning up.
+bool semaCodeComplete(const Context &Ctx,
+                      std::unique_ptr<CodeCompleteConsumer> Consumer,
+                      const clang::CodeCompleteOptions &Options,
+                      PathRef FileName, const tooling::CompileCommand &Command,
+                      PrecompiledPreamble const *Preamble, StringRef Contents,
+                      Position Pos, IntrusiveRefCntPtr<vfs::FileSystem> VFS,
+                      std::shared_ptr<PCHContainerOperations> PCHs,
+                      llvm::function_ref<void()> Callback = nullptr) {
   std::vector<const char *> ArgStrs;
   for (const auto &S : Command.CommandLine)
     ArgStrs.push_back(S.c_str());
@@ -551,69 +603,14 @@
     return false;
   }
 
+  if (Callback)
+    Callback();
   Action.EndSourceFile();
 
   return true;
 }
 
-CompletionItem indexCompletionItem(const Symbol &Sym, llvm::StringRef Filter,
-                                   const SpecifiedScope &SSInfo,
-                                   llvm::StringRef DebuggingLabel = "") {
-  CompletionItem Item;
-  Item.kind = toCompletionItemKind(Sym.SymInfo.Kind);
-  // Add DebuggingLabel to the completion results if DebuggingLabel is not
-  // empty.
-  //
-  // For symbols from static index, there are prefix "[G]" in the
-  // results (which is used for debugging purpose).
-  // So completion list will be like:
-  //   clang::symbol_from_dynamic_index
-  //   [G]clang::symbol_from_static_index
-  //
-  // FIXME: Find out a better way to show the index source.
-  if (!DebuggingLabel.empty()) {
-    llvm::raw_string_ostream Label(Item.label);
-    Label << llvm::format("[%s]%s", DebuggingLabel.str().c_str(),
-                          Sym.Name.str().c_str());
-  } else {
-    Item.label = Sym.Name;
-  }
-  // FIXME(ioeric): support inserting/replacing scope qualifiers.
-
-  // FIXME(ioeric): support snippets.
-  Item.insertText = Sym.CompletionPlainInsertText;
-  Item.insertTextFormat = InsertTextFormat::PlainText;
-  Item.filterText = Sym.Name;
-
-  // FIXME(ioeric): sort symbols appropriately.
-  Item.sortText = "";
-
-  if (Sym.Detail) {
-    Item.documentation = Sym.Detail->Documentation;
-    Item.detail = Sym.Detail->CompletionDetail;
-  }
-
-  return Item;
-}
-
-void completeWithIndex(const Context &Ctx, const SymbolIndex &Index,
-                       llvm::StringRef Code, const SpecifiedScope &SSInfo,
-                       llvm::StringRef Filter, CompletionList *Items,
-                       llvm::StringRef DebuggingLabel = "") {
-  FuzzyFindRequest Req;
-  Req.Query = Filter;
-  // FIXME(ioeric): add more possible scopes based on using namespaces and
-  // containing namespaces.
-  StringRef Scope = SSInfo.Resolved.empty() ? SSInfo.Written : SSInfo.Resolved;
-  Req.Scopes = {Scope.trim(':').str()};
-
-  Items->isIncomplete |= !Index.fuzzyFind(Ctx, Req, [&](const Symbol &Sym) {
-    Items->items.push_back(
-        indexCompletionItem(Sym, Filter, SSInfo, DebuggingLabel));
-  });
-}
-
-SpecifiedScope extraCompletionScope(Sema &S, const CXXScopeSpec &SS) {
+SpecifiedScope getSpecifiedScope(Sema &S, const CXXScopeSpec &SS) {
   SpecifiedScope Info;
   auto &SM = S.getSourceManager();
   auto SpecifierRange = SS.getRange();
@@ -650,28 +647,160 @@
   return Result;
 }
 
+// Runs Sema-based (AST) and Index-based completion, returns merged results.
+//
+// There are a few tricky considerations:
+//   - the AST provides information needed for the index query (e.g. which
+//     namespaces to search in). So Sema must start first.
+//   - we only want to return the top results (Opts.Limit).
+//     Building CompletionItems for everything else is wasteful, so we want to
+//     preserve the "native" format until we're done with scoring.
+//   - the data underlying Sema completion items is owned by the AST and various
+//     other arenas, which must stay alive for us to build CompletionItems.
+//   - we may get duplicate results from Sema and the Index, we need to merge.
+//
+// So we start Sema completion first, but defer its cleanup until we're done.
+// We use the Sema context information to query the index.
+// Then we merge the two result sets, producing items that are Sema/Index/Both.
+// These items are scored, and the top N are synthesized into the LSP response.
+// Finally, we can clean up the data structures created by Sema completion.
+//
+// Main collaborators are:
+//   - semaCodeComplete sets up the compiler machinery to run code completion.
+//   - CompletionRecorder captures Sema completion results, including context.
+//   - SymbolIndex (Opts.Index) provides index completion results as Symbols
+//   - CompletionCandidates are the result of merging Sema and Index results.
+//     Each candidate points to an underlying CodeCompletionResult (Sema), a
+//     Symbol (Index), or both. It computes the result quality score.
+//     CompletionCandidate also does conversion to CompletionItem (at the end).
+//   - FuzzyMatcher scores how the candidate matches the partial identifier.
+//     This score is combined with the result quality score for the final score.
+//   - TopN determines the results with the best score.
 CompletionList codeComplete(const Context &Ctx, PathRef FileName,
                             const tooling::CompileCommand &Command,
                             PrecompiledPreamble const *Preamble,
                             StringRef Contents, Position Pos,
                             IntrusiveRefCntPtr<vfs::FileSystem> VFS,
                             std::shared_ptr<PCHContainerOperations> PCHs,
                             CodeCompleteOptions Opts) {
-  CompletionList Results;
-  NameToComplete CompletedName;
-  auto Consumer =
-      llvm::make_unique<CompletionItemsCollector>(Opts, Results, CompletedName);
-  invokeCodeComplete(Ctx, std::move(Consumer), Opts.getClangCompleteOpts(),
-                     FileName, Command, Preamble, Contents, Pos, std::move(VFS),
-                     std::move(PCHs));
-
-  // Got scope specifier (ns::f^) for code completion from sema, try to query
-  // global symbols from indexes.
-  // FIXME: merge with Sema results, and respect limits.
-  if (CompletedName.SSInfo && Opts.Index)
-    completeWithIndex(Ctx, *Opts.Index, Contents, *CompletedName.SSInfo,
-                      CompletedName.Filter, &Results, /*DebuggingLabel=*/"I");
-  return Results;
+  // We only keep the best N results at any time. We keep their "native" data
+  // structures, and only serialize the winners to LSP at the end.
+  TopN Candidates(Opts.Limit ? Opts.Limit : std::numeric_limits<size_t>::max());
+  CompletionRecorder *Recorder;         // Forward declare for AddCandidate.
+  llvm::Optional<FuzzyMatcher> Filter;  // Initialized later, after Sema.
+  int NSema = 0, NIndex = 0, NBoth = 0; // Counters for logging.
+  // AddCandidate computes scores, and adds a candidate to the TopN structure.
+  auto AddCandidate = [&](const CodeCompletionResult *SemaResult,
+                          const Symbol *IndexResult) {
+    CompletionCandidate C;
+    C.SemaResult = SemaResult;
+    C.IndexResult = IndexResult;
+    C.Name = IndexResult ? IndexResult->Name : Recorder->getName(*SemaResult);
+
+    CompletionItemScores Scores;
+    if (auto FuzzyScore = Filter->match(C.Name))
+      Scores.filterScore = *FuzzyScore;
+    else
+      return;
+    Scores.symbolScore = C.score();
+    // We score candidates by multiplying symbolScore ("quality" of the result)
+    // with filterScore (how well it matched the query).
+    // This is sensitive to the distribution of both component scores!
+    Scores.finalScore = Scores.filterScore * Scores.symbolScore;
+
+    NSema += bool(SemaResult);
+    NIndex += bool(IndexResult);
+    NBoth += SemaResult && IndexResult;
+    Candidates.push({C, Scores});
+  };
+
+  // We run Sema code completion first. It builds an AST and calculates:
+  //   - completion results based on the AST. These are saved for merging.
+  //   - the partial identifier and namespaces to look in. We need these for
+  //     the index query.
+  auto RecorderOwner = llvm::make_unique<CompletionRecorder>(Opts);
+  Recorder = RecorderOwner.get(); // Valid until code completion finishes.
+  CompletionList Output;
+  semaCodeComplete(Ctx, std::move(RecorderOwner), Opts.getClangCompleteOpts(),
+                   FileName, Command, Preamble, Contents, Pos, std::move(VFS),
+                   std::move(PCHs), [&] {
+    if (!Recorder->Sema) // If Sema didn't trigger completion, don't query the
+      return;            // index either - we don't have the necessary context.
+    llvm::StringRef FilterText =
+        Recorder->Sema->getPreprocessor().getCodeCompletionFilter();
+    Filter = FuzzyMatcher(FilterText);
+
+    // Now we have the needed context to query the index.
+    // FIXME: we shouldn't query the index if the scope is e.g. class members.
+    // FIXME: in addition to querying for extra/overlapping symbols, we should
+    //        explicitly request symbols corresponding to Sema results.
+    //        We can use their signals even if the index can't suggest them.
+    // We must copy index results to preserve them, but there are at most Limit.
+    SymbolSlab IndexResults;
+    if (auto *Idx = Opts.Index) {
+      SymbolSlab::Builder ResultsBuilder;
+      // Build the query.
+      FuzzyFindRequest Req;
+      Req.Query = FilterText;
+      // If the user typed a scope, e.g. a::b::xxx(), restrict to that scope.
+      // FIXME(ioeric): add scopes based on using directives and enclosing ns.
+      if (auto SS = Recorder->CCContext.getCXXScopeSpecifier())
+        Req.Scopes = {getSpecifiedScope(*Recorder->Sema, **SS).forIndex()};
+      // Run the query against the index.
+      Output.isIncomplete |= !Idx->fuzzyFind(
+          Ctx, Req, [&](const Symbol &Sym) { ResultsBuilder.insert(Sym); });
+      IndexResults = std::move(ResultsBuilder).build();
+    }
+
+    // Now we merge Sema and Index results into CompletionCandidates.
+    llvm::DenseSet<const Symbol*> UsedIndexResults;
+    auto CorrespondingIndexResult =
+        [&](const CodeCompletionResult &SemaResult) -> const Symbol * {
+      if (auto SymID = getSymbolID(SemaResult)) {
+        auto I = IndexResults.find(*SymID);
+        if (I != IndexResults.end()) {
+          UsedIndexResults.insert(&*I);
+          return &*I;
+        }
+      }
+      return nullptr;
+    };
+    // Emit all Sema results, merging them with Index results if possible.
+    for (auto &SemaResult : Recorder->Results)
+      AddCandidate(&SemaResult, CorrespondingIndexResult(SemaResult));
+    // Now emit any Index-only results.
+    for (const auto &IndexResult : IndexResults) {
+      if (UsedIndexResults.count(&IndexResult))
+        continue;
+      AddCandidate(/*SemaResult=*/nullptr, &IndexResult);
+    }
+
+    // Now we have all the winning candidates. Convert them to LSP structs.
+    Output.isIncomplete |= Candidates.more();
+    for (auto &Candidate : std::move(Candidates).items()) {
+      // For Sema results, we build the CCS here, where we have the arenas.
+      auto *CCS =
+          Candidate.first.SemaResult
+              ? Recorder->codeCompletionString(*Candidate.first.SemaResult,
+                                              Opts.IncludeBriefComments)
+              : nullptr;
+      Output.items.push_back(Candidate.first.build(CCS, Opts));
+      auto &R = Output.items.back();
+      R.scoreInfo = Candidate.second;
+      R.sortText = sortText(R.scoreInfo->finalScore, R.label);
+    }
+    // We're done with the Sema results, so allow the AST to be cleaned up.
+  });
+
+  log(Ctx,
+      llvm::formatv("Code completion: {0} results from Sema, {1} from Index, "
+                    "{2} matched, {3} returned{4}.",
+                    NSema, NIndex, NBoth, Output.items.size(),
+                    Output.isIncomplete ? " (incomplete)" : ""));
+  assert(!Opts.Limit || Output.items.size() <= Opts.Limit);
+  // Don't assert(!isIncomplete || !Limit && Output.items.size() == Opts.Limit)
+  // Indexes may choose to impose their own limits even if we don't have one.
+  return Output;
 }
 
 SignatureHelp signatureHelp(const Context &Ctx, PathRef FileName,
@@ -686,10 +815,10 @@
   Options.IncludeMacros = false;
   Options.IncludeCodePatterns = false;
   Options.IncludeBriefComments = true;
-  invokeCodeComplete(Ctx,
-                     llvm::make_unique<SignatureHelpCollector>(Options, Result),
-                     Options, FileName, Command, Preamble, Contents, Pos,
-                     std::move(VFS), std::move(PCHs));
+  semaCodeComplete(Ctx,
+                   llvm::make_unique<SignatureHelpCollector>(Options, Result),
+                   Options, FileName, Command, Preamble, Contents, Pos,
+                   std::move(VFS), std::move(PCHs));
   return Result;
 }
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to