kbobyrev created this revision.
kbobyrev added reviewers: ioeric, sammccall, ilya-biryukov.
kbobyrev added a project: clang-tools-extra.
Herald added subscribers: kadircet, arphaman, jkorous, MaskRay.

Use min-heap invariant for OR iterator's children. This helps to avoid 
iterating through all children in `reachedEnd()`, `peek()` and allows 
early-stopping in `consume()` and `advance()`.

This optimization results in ~8-9% of performance boost on benchmark with ~4300 
real queries to LLVM static index.

benchmark's `compare.py` output:

  benchmark                    Time             CPU      Time Old      Time New 
      CPU Old       CPU New
  
---------------------------------------------------------------------------------------------------------
  DexQueries                -0.0706         -0.0706    4719149241    4385985920 
   4719100956    4385946455


https://reviews.llvm.org/D52083

Files:
  clang-tools-extra/clangd/index/dex/Iterator.cpp
  clang-tools-extra/unittests/clangd/DexTests.cpp

Index: clang-tools-extra/unittests/clangd/DexTests.cpp
===================================================================
--- clang-tools-extra/unittests/clangd/DexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexTests.cpp
@@ -255,20 +255,20 @@
 }
 
 TEST(DexIterators, StringRepresentation) {
-  const PostingList L0({4, 7, 8, 20, 42, 100});
+  const PostingList L0({0, 1, 8, 20, 42, 100});
   const PostingList L1({1, 3, 5, 8, 9});
   const PostingList L2({1, 5, 7, 9});
-  const PostingList L3({0, 5});
+  const PostingList L3({0, 1});
   const PostingList L4({0, 1, 5});
   const PostingList L5({});
 
-  EXPECT_EQ(llvm::to_string(*(L0.iterator())), "[4]");
+  EXPECT_EQ(llvm::to_string(*(L0.iterator())), "[0]");
 
-  auto Nested =
-      createAnd(createAnd(L1.iterator(), L2.iterator()),
-                createOr(L3.iterator(), L4.iterator(), L5.iterator()));
+  auto Nested = createAnd(
+      createAnd(L1.iterator(), L2.iterator(), L4.iterator(), L0.iterator()),
+      createOr(L3.iterator(), L5.iterator()));
 
-  EXPECT_EQ(llvm::to_string(*Nested), "(& (| [5] [1] [END]) (& [1] [1]))");
+  EXPECT_EQ(llvm::to_string(*Nested), "(& (| [1] [END]) (& [1] [1] [1] [1]))");
 }
 
 TEST(DexIterators, Limit) {
Index: clang-tools-extra/clangd/index/dex/Iterator.cpp
===================================================================
--- clang-tools-extra/clangd/index/dex/Iterator.cpp
+++ clang-tools-extra/clangd/index/dex/Iterator.cpp
@@ -125,69 +125,80 @@
   bool ReachedEnd = false;
 };
 
+// Return LHS > RHS.
+auto Compare = [](const std::unique_ptr<Iterator> &LHS,
+                  const std::unique_ptr<Iterator> &RHS) {
+  if (LHS->reachedEnd())
+    return true;
+  if (RHS->reachedEnd())
+    return false;
+  return LHS->peek() > RHS->peek();
+};
+
 /// Implements Iterator over the union of other iterators.
 ///
 /// OrIterator iterates through all items which can be pointed to by at least
 /// one child. To preserve the sorted order, this iterator always advances the
 /// child with smallest Child->peek() value. OrIterator becomes exhausted as
 /// soon as all of its children are exhausted.
+///
+/// Invariant: Children are always stored in a min-heap which puts iterators
+/// with the smallest DocID to the front.
 class OrIterator : public Iterator {
 public:
   explicit OrIterator(std::vector<std::unique_ptr<Iterator>> AllChildren)
       : Children(std::move(AllChildren)) {
     assert(!Children.empty() && "OR iterator should have at least one child.");
+    // Initialize invariant.
+    std::make_heap(begin(Children), end(Children), Compare);
   }
 
-  /// Returns true if all children are exhausted.
-  bool reachedEnd() const override {
-    return std::all_of(begin(Children), end(Children),
-                       [](const std::unique_ptr<Iterator> &Child) {
-                         return Child->reachedEnd();
-                       });
-  }
+  /// Returns true if all children are exhausted. Due to the invariant, if the
+  /// first child has reached the end, all of them have reached the end.
+  bool reachedEnd() const override { return Children.front()->reachedEnd(); }
 
   /// Moves each child pointing to the smallest DocID to the next item.
   void advance() override {
     assert(!reachedEnd() && "OR iterator can't advance() at the end.");
     const auto SmallestID = peek();
-    for (const auto &Child : Children)
-      if (!Child->reachedEnd() && Child->peek() == SmallestID)
-        Child->advance();
+    // The first element always contains Child with the smallest DocID.
+    while (!Children.front()->reachedEnd() &&
+           Children.front()->peek() == SmallestID) {
+      Children.front()->advance();
+      // Restore invariant.
+      std::pop_heap(begin(Children), end(Children), Compare);
+      std::push_heap(begin(Children), end(Children), Compare);
+    }
   }
 
   /// Advances each child to the next existing element with DocumentID >= ID.
   void advanceTo(DocID ID) override {
     assert(!reachedEnd() && "OR iterator can't advanceTo() at the end.");
     for (const auto &Child : Children)
-      if (!Child->reachedEnd())
-        Child->advanceTo(ID);
+      Child->advanceTo(ID);
+    // Restore invariant.
+    std::make_heap(begin(Children), end(Children), Compare);
   }
 
   /// Returns the element under cursor of the child with smallest Child->peek()
   /// value.
   DocID peek() const override {
     assert(!reachedEnd() && "OR iterator can't peek() at the end.");
-    DocID Result = std::numeric_limits<DocID>::max();
-
-    for (const auto &Child : Children)
-      if (!Child->reachedEnd())
-        Result = std::min(Result, Child->peek());
-
-    return Result;
+    return Children.front()->peek();
   }
 
   // Returns the maximum boosting score among all Children when iterator is not
   // exhausted and points to the given ID, DEFAULT_BOOST_SCORE otherwise.
   float consume() override {
     assert(!reachedEnd() && "OR iterator can't consume() at the end.");
     const DocID ID = peek();
-    return std::accumulate(
-        begin(Children), end(Children), DEFAULT_BOOST_SCORE,
-        [&](float Boost, const std::unique_ptr<Iterator> &Child) {
-          return (!Child->reachedEnd() && Child->peek() == ID)
-                     ? std::max(Boost, Child->consume())
-                     : Boost;
-        });
+    float Boost = DEFAULT_BOOST_SCORE;
+    for (const auto &Child : Children) {
+      if (Child->peek() != ID)
+        break;
+      Boost = std::max(Child->consume(), Boost);
+    }
+    return Boost;
   }
 
   size_t estimateSize() const override {
@@ -210,7 +221,6 @@
     return OS;
   }
 
-  // FIXME(kbobyrev): Would storing Children in min-heap be faster?
   std::vector<std::unique_ptr<Iterator>> Children;
 };
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to