kbobyrev updated this revision to Diff 165471.
kbobyrev marked 3 inline comments as done.
kbobyrev edited the summary of this revision.
kbobyrev added a comment.

Fixed the bug with incorrect assumption of `Children` being sorted (instead of 
being a heap). This caused a huge overhead (~30%). When I iterate through all 
children in `consume()` (like before) it's also worse (~18%).

Seems like this optimization is not worth (yet?). As soon as we get more 
proximity paths (and hence more OR iterator children) that might make sense.


https://reviews.llvm.org/D52083

Files:
  clang-tools-extra/clangd/index/dex/Iterator.cpp
  clang-tools-extra/unittests/clangd/DexTests.cpp

Index: clang-tools-extra/unittests/clangd/DexTests.cpp
===================================================================
--- clang-tools-extra/unittests/clangd/DexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexTests.cpp
@@ -255,20 +255,20 @@
 }
 
 TEST(DexIterators, StringRepresentation) {
-  const PostingList L0({4, 7, 8, 20, 42, 100});
+  const PostingList L0({0, 1, 8, 20, 42, 100});
   const PostingList L1({1, 3, 5, 8, 9});
   const PostingList L2({1, 5, 7, 9});
-  const PostingList L3({0, 5});
+  const PostingList L3({0, 1});
   const PostingList L4({0, 1, 5});
   const PostingList L5({});
 
-  EXPECT_EQ(llvm::to_string(*(L0.iterator())), "[4]");
+  EXPECT_EQ(llvm::to_string(*(L0.iterator())), "[0]");
 
-  auto Nested =
-      createAnd(createAnd(L1.iterator(), L2.iterator()),
-                createOr(L3.iterator(), L4.iterator(), L5.iterator()));
+  auto Nested = createAnd(
+      createAnd(L1.iterator(), L2.iterator(), L4.iterator(), L0.iterator()),
+      createOr(L3.iterator(), L5.iterator()));
 
-  EXPECT_EQ(llvm::to_string(*Nested), "(& (| [5] [1] [END]) (& [1] [1]))");
+  EXPECT_EQ(llvm::to_string(*Nested), "(& (| [1] [END]) (& [1] [1] [1] [1]))");
 }
 
 TEST(DexIterators, Limit) {
Index: clang-tools-extra/clangd/index/dex/Iterator.cpp
===================================================================
--- clang-tools-extra/clangd/index/dex/Iterator.cpp
+++ clang-tools-extra/clangd/index/dex/Iterator.cpp
@@ -125,69 +125,88 @@
   bool ReachedEnd = false;
 };
 
+/// Comparator treats END as the largest DocID and ensures that peek() call is
+/// valid.
+bool Greater(const std::unique_ptr<Iterator> &LHS,
+             const std::unique_ptr<Iterator> &RHS) {
+  if (LHS->reachedEnd())
+    return true;
+  if (RHS->reachedEnd())
+    return false;
+  return LHS->peek() > RHS->peek();
+};
+
 /// Implements Iterator over the union of other iterators.
 ///
 /// OrIterator iterates through all items which can be pointed to by at least
 /// one child. To preserve the sorted order, this iterator always advances the
 /// child with smallest Child->peek() value. OrIterator becomes exhausted as
 /// soon as all of its children are exhausted.
+///
+/// Invariant: Children are always stored in a min-heap which puts iterators
+/// with the smallest DocID to the front.
 class OrIterator : public Iterator {
 public:
   explicit OrIterator(std::vector<std::unique_ptr<Iterator>> AllChildren)
       : Children(std::move(AllChildren)) {
     assert(!Children.empty() && "OR iterator should have at least one child.");
+    // Initialize invariant.
+    std::make_heap(begin(Children), end(Children), Greater);
   }
 
-  /// Returns true if all children are exhausted.
-  bool reachedEnd() const override {
-    return std::all_of(begin(Children), end(Children),
-                       [](const std::unique_ptr<Iterator> &Child) {
-                         return Child->reachedEnd();
-                       });
-  }
+  /// Returns true if all children are exhausted. Due to the invariant, if the
+  /// first child has reached the end, all of them have reached the end.
+  bool reachedEnd() const override { return Children.front()->reachedEnd(); }
 
   /// Moves each child pointing to the smallest DocID to the next item.
   void advance() override {
     assert(!reachedEnd() && "OR iterator can't advance() at the end.");
     const auto SmallestID = peek();
-    for (const auto &Child : Children)
-      if (!Child->reachedEnd() && Child->peek() == SmallestID)
-        Child->advance();
+    // The first element always contains Child with the smallest DocID.
+    while (!Children.front()->reachedEnd() &&
+           Children.front()->peek() == SmallestID) {
+      Children.front()->advance();
+      // Restore invariant.
+      std::pop_heap(begin(Children), end(Children), Greater);
+      std::push_heap(begin(Children), end(Children), Greater);
+    }
   }
 
   /// Advances each child to the next existing element with DocumentID >= ID.
   void advanceTo(DocID ID) override {
     assert(!reachedEnd() && "OR iterator can't advanceTo() at the end.");
     for (const auto &Child : Children)
       if (!Child->reachedEnd())
         Child->advanceTo(ID);
+    // Restore invariant.
+    std::make_heap(begin(Children), end(Children), Greater);
   }
 
   /// Returns the element under cursor of the child with smallest Child->peek()
   /// value.
   DocID peek() const override {
     assert(!reachedEnd() && "OR iterator can't peek() at the end.");
-    DocID Result = std::numeric_limits<DocID>::max();
-
-    for (const auto &Child : Children)
-      if (!Child->reachedEnd())
-        Result = std::min(Result, Child->peek());
-
-    return Result;
+    return Children.front()->peek();
   }
 
   // Returns the maximum boosting score among all Children when iterator is not
   // exhausted and points to the given ID, DEFAULT_BOOST_SCORE otherwise.
   float consume() override {
     assert(!reachedEnd() && "OR iterator can't consume() at the end.");
     const DocID ID = peek();
-    return std::accumulate(
-        begin(Children), end(Children), DEFAULT_BOOST_SCORE,
-        [&](float Boost, const std::unique_ptr<Iterator> &Child) {
-          return (!Child->reachedEnd() && Child->peek() == ID)
-                     ? std::max(Boost, Child->consume())
-                     : Boost;
-        });
+    float Boost = DEFAULT_BOOST_SCORE;
+    // Call consume() on all children pointing to ID and temporally pop them out
+    // of heap to proceed to the next valid item.
+    size_t Popped = 0;
+    while (!Children.front()->reachedEnd() && Children.front()->peek() == ID &&
+           Popped < Children.size()) {
+      Boost = std::max(Children.front()->consume(), Boost);
+      std::pop_heap(begin(Children), end(Children) - Popped++, Greater);
+    }
+    // Restore invariant.
+    while (Popped > 0)
+      std::push_heap(begin(Children), end(Children) - --Popped, Greater);
+    return Boost;
   }
 
   size_t estimateSize() const override {
@@ -210,7 +229,6 @@
     return OS;
   }
 
-  // FIXME(kbobyrev): Would storing Children in min-heap be faster?
   std::vector<std::unique_ptr<Iterator>> Children;
 };
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to