Re: [PR] SOLR-13350: Multithreaded search [solr]

via GitHub Mon, 15 Apr 2024 05:01:22 -0700


cpoerschke commented on code in PR #2248:
URL: https://github.com/apache/solr/pull/2248#discussion_r1565662583



##########
solr/core/src/java/org/apache/solr/search/MultiThreadedSearcher.java:
##########
@@ -0,0 +1,415 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.concurrent.ExecutionException;
+import java.util.function.Supplier;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.CollectorManager;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.QueryVisitor;
+import org.apache.lucene.search.ScoreMode;
+import org.apache.lucene.search.SimpleCollector;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TopDocsCollector;
+import org.apache.lucene.search.TopFieldDocs;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.automaton.ByteRunAutomaton;
+import org.apache.solr.search.join.GraphQuery;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class MultiThreadedSearcher {
+  private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  final SolrIndexSearcher searcher;
+
+  public MultiThreadedSearcher(SolrIndexSearcher searcher) {
+    this.searcher = searcher;
+  }
+
+  SearchResult searchCollectorManagers(
+      int len,
+      QueryCommand cmd,
+      Query query,
+      boolean needTopDocs,
+      boolean needMaxScore,
+      boolean needDocSet)
+      throws IOException {
+    Collection<CollectorManager<Collector, Object>> collectors = new 
ArrayList<>();
+
+    int firstCollectorsSize = 0;
+
+    final int firstTopDocsCollectorIndex;
+    if (needTopDocs) {
+      firstTopDocsCollectorIndex = firstCollectorsSize;
+      firstCollectorsSize++;
+    } else {
+      firstTopDocsCollectorIndex = -1;
+    }
+
+    final int firstMaxScoreCollectorIndex;
+    if (needMaxScore) {
+      firstMaxScoreCollectorIndex = firstCollectorsSize;
+      firstCollectorsSize++;
+    } else {
+      firstMaxScoreCollectorIndex = -1;
+    }
+
+    Collector[] firstCollectors = new Collector[firstCollectorsSize];
+
+    if (needTopDocs) {
+
+      collectors.add(new TopDocsCM(len, cmd, firstCollectors, 
firstTopDocsCollectorIndex));
+    }
+    if (needMaxScore) {
+      collectors.add(new MaxScoreCM(firstCollectors, 
firstMaxScoreCollectorIndex));
+    }
+    if (needDocSet) {
+      int maxDoc = searcher.getRawReader().maxDoc();
+      log.error("raw read max={}", searcher.getRawReader().maxDoc());
+
+      collectors.add(new DocSetCM(maxDoc));
+    }
+
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    CollectorManager<Collector, Object>[] colls = collectors.toArray(new 
CollectorManager[0]);
+    SolrMultiCollectorManager manager = new SolrMultiCollectorManager(colls);
+    Object[] ret;
+    try {
+      ret = searcher.search(query, manager);
+    } catch (Exception ex) {
+      if (ex instanceof RuntimeException
+          && ex.getCause() != null
+          && ex.getCause() instanceof ExecutionException
+          && ex.getCause().getCause() != null
+          && ex.getCause().getCause() instanceof RuntimeException) {
+        throw (RuntimeException) ex.getCause().getCause();
+      } else {
+        throw ex;
+      }
+    }
+
+    ScoreMode scoreMode = SolrMultiCollectorManager.scoreMode(firstCollectors);
+
+    return new SearchResult(scoreMode, ret);
+  }
+
+  static boolean allowMT(DelegatingCollector postFilter, QueryCommand cmd, 
Query query) {
+    if (postFilter != null || cmd.getSegmentTerminateEarly() || 
cmd.getTimeAllowed() > 0) {
+      return false;
+    } else {
+      MTCollectorQueryCheck allowMT = new MTCollectorQueryCheck();
+      query.visit(allowMT);
+      return allowMT.allowed();
+    }
+  }
+
+  /**
+   * A {@link QueryVisitor} that recurses through the query tree, determining 
if all queries support
+   * multi-threaded collecting.
+   */
+  private static class MTCollectorQueryCheck extends QueryVisitor {
+
+    private QueryVisitor subVisitor = this;
+
+    private boolean allowMt(Query query) {
+      if (query instanceof RankQuery || query instanceof GraphQuery || query 
instanceof JoinQuery) {
+        return false;
+      }
+      return true;
+    }
+
+    @Override
+    public void consumeTerms(Query query, Term... terms) {
+      if (!allowMt(query)) {
+        subVisitor = EMPTY_VISITOR;
+      }
+    }
+
+    @Override
+    public void consumeTermsMatching(
+        Query query, String field, Supplier<ByteRunAutomaton> automaton) {
+      if (!allowMt(query)) {
+        subVisitor = EMPTY_VISITOR;
+      } else {
+        super.consumeTermsMatching(query, field, automaton);
+      }
+    }
+
+    @Override
+    public void visitLeaf(Query query) {
+      if (!allowMt(query)) {
+        subVisitor = EMPTY_VISITOR;
+      }
+    }
+
+    @Override
+    public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) 
{
+      return subVisitor;
+    }
+
+    public boolean allowed() {
+      return subVisitor != EMPTY_VISITOR;
+    }
+  }
+
+  static class MaxScoreResult {
+    final float maxScore;
+
+    public MaxScoreResult(float maxScore) {
+      this.maxScore = maxScore;
+    }
+  }
+
+  static class FixedBitSetCollector extends SimpleCollector {
+    private final LinkedList<FixedBitSet> bitSets = new LinkedList<>();
+    private final LinkedList<Integer> skipWords = new LinkedList<>();
+    private final LinkedList<Integer> skipBits = new LinkedList<>();
+
+    FixedBitSetCollector() {}
+
+    @Override
+    protected void doSetNextReader(LeafReaderContext context) throws 
IOException {
+      this.bitSets.add(null); // lazy allocate when collecting document(s)
+      this.skipWords.add(context.docBase / 64);
+      this.skipBits.add(context.docBase % 64);
+    }
+
+    @Override
+    public void collect(int doc) throws IOException {
+      FixedBitSet bitSet = this.bitSets.getLast();
+      final int idx = this.skipBits.getLast() + doc;
+
+      final int numWords = FixedBitSet.bits2words(idx + 1); // +1 to ensure 
minimum 1 word
+
+      if (bitSet == null) {

Review Comment:
   I'm thinking lazy allocation makes sense since for some segments maybe 
nothing will ever be collected ...
   
   ... however `+1` here for segment bitset sizing seems too basic ...
   
   ... didn't (yet) find a number-of-documents-in-a-segment value ... `maxDoc / 
numberOfLeaves` might a way to initially approximate but once that's exceeded 
the question of how to upsize arises again ...
   
   ... thoughts?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org
For additional commands, e-mail: issues-h...@solr.apache.org

Re: [PR] SOLR-13350: Multithreaded search [solr]

Reply via email to