cpoerschke commented on code in PR #2248: URL: https://github.com/apache/solr/pull/2248#discussion_r1565662583
########## solr/core/src/java/org/apache/solr/search/MultiThreadedSearcher.java: ########## @@ -0,0 +1,415 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search; + +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.concurrent.ExecutionException; +import java.util.function.Supplier; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TopDocsCollector; +import org.apache.lucene.search.TopFieldDocs; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.automaton.ByteRunAutomaton; +import org.apache.solr.search.join.GraphQuery; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MultiThreadedSearcher { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + final SolrIndexSearcher searcher; + + public MultiThreadedSearcher(SolrIndexSearcher searcher) { + this.searcher = searcher; + } + + SearchResult searchCollectorManagers( + int len, + QueryCommand cmd, + Query query, + boolean needTopDocs, + boolean needMaxScore, + boolean needDocSet) + throws IOException { + Collection<CollectorManager<Collector, Object>> collectors = new ArrayList<>(); + + int firstCollectorsSize = 0; + + final int firstTopDocsCollectorIndex; + if (needTopDocs) { + firstTopDocsCollectorIndex = firstCollectorsSize; + firstCollectorsSize++; + } else { + firstTopDocsCollectorIndex = -1; + } + + final int firstMaxScoreCollectorIndex; + if (needMaxScore) { + firstMaxScoreCollectorIndex = firstCollectorsSize; + firstCollectorsSize++; + } else { + firstMaxScoreCollectorIndex = -1; + } + + Collector[] firstCollectors = new Collector[firstCollectorsSize]; + + if (needTopDocs) { + + collectors.add(new TopDocsCM(len, cmd, firstCollectors, firstTopDocsCollectorIndex)); + } + if (needMaxScore) { + collectors.add(new MaxScoreCM(firstCollectors, firstMaxScoreCollectorIndex)); + } + if (needDocSet) { + int maxDoc = searcher.getRawReader().maxDoc(); + log.error("raw read max={}", searcher.getRawReader().maxDoc()); + + collectors.add(new DocSetCM(maxDoc)); + } + + @SuppressWarnings({"unchecked", "rawtypes"}) + CollectorManager<Collector, Object>[] colls = collectors.toArray(new CollectorManager[0]); + SolrMultiCollectorManager manager = new SolrMultiCollectorManager(colls); + Object[] ret; + try { + ret = searcher.search(query, manager); + } catch (Exception ex) { + if (ex instanceof RuntimeException + && ex.getCause() != null + && ex.getCause() instanceof ExecutionException + && ex.getCause().getCause() != null + && ex.getCause().getCause() instanceof RuntimeException) { + throw (RuntimeException) ex.getCause().getCause(); + } else { + throw ex; + } + } + + ScoreMode scoreMode = SolrMultiCollectorManager.scoreMode(firstCollectors); + + return new SearchResult(scoreMode, ret); + } + + static boolean allowMT(DelegatingCollector postFilter, QueryCommand cmd, Query query) { + if (postFilter != null || cmd.getSegmentTerminateEarly() || cmd.getTimeAllowed() > 0) { + return false; + } else { + MTCollectorQueryCheck allowMT = new MTCollectorQueryCheck(); + query.visit(allowMT); + return allowMT.allowed(); + } + } + + /** + * A {@link QueryVisitor} that recurses through the query tree, determining if all queries support + * multi-threaded collecting. + */ + private static class MTCollectorQueryCheck extends QueryVisitor { + + private QueryVisitor subVisitor = this; + + private boolean allowMt(Query query) { + if (query instanceof RankQuery || query instanceof GraphQuery || query instanceof JoinQuery) { + return false; + } + return true; + } + + @Override + public void consumeTerms(Query query, Term... terms) { + if (!allowMt(query)) { + subVisitor = EMPTY_VISITOR; + } + } + + @Override + public void consumeTermsMatching( + Query query, String field, Supplier<ByteRunAutomaton> automaton) { + if (!allowMt(query)) { + subVisitor = EMPTY_VISITOR; + } else { + super.consumeTermsMatching(query, field, automaton); + } + } + + @Override + public void visitLeaf(Query query) { + if (!allowMt(query)) { + subVisitor = EMPTY_VISITOR; + } + } + + @Override + public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) { + return subVisitor; + } + + public boolean allowed() { + return subVisitor != EMPTY_VISITOR; + } + } + + static class MaxScoreResult { + final float maxScore; + + public MaxScoreResult(float maxScore) { + this.maxScore = maxScore; + } + } + + static class FixedBitSetCollector extends SimpleCollector { + private final LinkedList<FixedBitSet> bitSets = new LinkedList<>(); + private final LinkedList<Integer> skipWords = new LinkedList<>(); + private final LinkedList<Integer> skipBits = new LinkedList<>(); + + FixedBitSetCollector() {} + + @Override + protected void doSetNextReader(LeafReaderContext context) throws IOException { + this.bitSets.add(null); // lazy allocate when collecting document(s) + this.skipWords.add(context.docBase / 64); + this.skipBits.add(context.docBase % 64); + } + + @Override + public void collect(int doc) throws IOException { + FixedBitSet bitSet = this.bitSets.getLast(); + final int idx = this.skipBits.getLast() + doc; + + final int numWords = FixedBitSet.bits2words(idx + 1); // +1 to ensure minimum 1 word + + if (bitSet == null) { Review Comment: I'm thinking lazy allocation makes sense since for some segments maybe nothing will ever be collected ... ... however `+1` here for segment bitset sizing seems too basic ... ... didn't (yet) find a number-of-documents-in-a-segment value ... `maxDoc / numberOfLeaves` might a way to initially approximate but once that's exceeded the question of how to upsize arises again ... ... thoughts? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For additional commands, e-mail: issues-h...@solr.apache.org