dsmiley commented on code in PR #3418: URL: https://github.com/apache/solr/pull/3418#discussion_r2286779679
########## solr/core/src/java/org/apache/solr/handler/component/CombinedQueryComponent.java: ########## @@ -0,0 +1,590 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.component; + +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.CombinerParams; +import org.apache.solr.common.params.CursorMarkParams; +import org.apache.solr.common.params.GroupParams; +import org.apache.solr.common.params.ShardParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.core.SolrCore; +import org.apache.solr.response.BasicResultContext; +import org.apache.solr.response.ResultContext; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.DocListAndSet; +import org.apache.solr.search.QueryResult; +import org.apache.solr.search.SolrReturnFields; +import org.apache.solr.search.SortSpec; +import org.apache.solr.search.combine.QueryAndResponseCombiner; +import org.apache.solr.search.combine.ReciprocalRankFusion; +import org.apache.solr.util.SolrResponseUtil; +import org.apache.solr.util.plugin.SolrCoreAware; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The CombinedQueryComponent class extends QueryComponent and provides support for executing + * multiple queries and combining their results. + */ +public class CombinedQueryComponent extends QueryComponent implements SolrCoreAware { + + public static final String COMPONENT_NAME = "combined_query"; + protected NamedList<?> initParams; + private Map<String, QueryAndResponseCombiner> combiners = new ConcurrentHashMap<>(); + private int maxCombinerQueries; + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + @Override + public void init(NamedList<?> args) { + super.init(args); + this.initParams = args; + this.maxCombinerQueries = CombinerParams.DEFAULT_MAX_COMBINER_QUERIES; + } + + @Override + public void inform(SolrCore core) { + if (initParams != null && initParams.size() > 0) { + log.info("Initializing CombinedQueryComponent"); + NamedList<?> all = (NamedList<?>) initParams.get("combiners"); Review Comment: please avoid indexed access to a NamedList unless you really need it. Use forEach. ########## solr/core/src/java/org/apache/solr/handler/component/CombinedQueryComponent.java: ########## @@ -0,0 +1,590 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.component; + +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.CombinerParams; +import org.apache.solr.common.params.CursorMarkParams; +import org.apache.solr.common.params.GroupParams; +import org.apache.solr.common.params.ShardParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.core.SolrCore; +import org.apache.solr.response.BasicResultContext; +import org.apache.solr.response.ResultContext; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.DocListAndSet; +import org.apache.solr.search.QueryResult; +import org.apache.solr.search.SolrReturnFields; +import org.apache.solr.search.SortSpec; +import org.apache.solr.search.combine.QueryAndResponseCombiner; +import org.apache.solr.search.combine.ReciprocalRankFusion; +import org.apache.solr.util.SolrResponseUtil; +import org.apache.solr.util.plugin.SolrCoreAware; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The CombinedQueryComponent class extends QueryComponent and provides support for executing + * multiple queries and combining their results. + */ +public class CombinedQueryComponent extends QueryComponent implements SolrCoreAware { + + public static final String COMPONENT_NAME = "combined_query"; + protected NamedList<?> initParams; + private Map<String, QueryAndResponseCombiner> combiners = new ConcurrentHashMap<>(); Review Comment: why concurrent? ########## solr/core/src/java/org/apache/solr/search/combine/ReciprocalRankFusion.java: ########## @@ -0,0 +1,252 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search.combine; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.StringJoiner; +import org.apache.lucene.document.Document; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TotalHits; +import org.apache.solr.common.params.CombinerParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.handler.component.ShardDoc; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.search.DocIterator; +import org.apache.solr.search.DocList; +import org.apache.solr.search.DocSlice; +import org.apache.solr.search.QueryResult; +import org.apache.solr.search.SolrDocumentFetcher; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.search.SortedIntDocSet; + +/** + * The ReciprocalRankFusion class implements a query and response combiner that uses the Reciprocal + * Rank Fusion (RRF) algorithm to combine multiple ranked lists into a single ranked list. + */ +public class ReciprocalRankFusion extends QueryAndResponseCombiner { + + private int k; + + public int getK() { + return k; + } + + public ReciprocalRankFusion() { + this.k = CombinerParams.COMBINER_RRF_K_DEFAULT; + } + + @Override + public void init(NamedList<?> args) { + Object kParam = args.get("k"); + if (kParam != null) { + this.k = Integer.parseInt(kParam.toString()); + } + } + + @Override + public QueryResult combine(List<QueryResult> rankedLists, SolrParams solrParams) { + int kVal = solrParams.getInt(CombinerParams.COMBINER_RRF_K, this.k); + List<DocList> docLists = getDocListsFromQueryResults(rankedLists); + QueryResult combinedResult = new QueryResult(); + combineResults(combinedResult, docLists, false, kVal); + return combinedResult; + } + + private static List<DocList> getDocListsFromQueryResults(List<QueryResult> rankedLists) { + List<DocList> docLists = new ArrayList<>(rankedLists.size()); + for (QueryResult rankedList : rankedLists) { + docLists.add(rankedList.getDocList()); + } + return docLists; + } + + @Override + public List<ShardDoc> combine(Map<String, List<ShardDoc>> shardDocMap, SolrParams solrParams) { + int kVal = solrParams.getInt(CombinerParams.COMBINER_RRF_K, this.k); + HashMap<String, Float> docIdToScore = new HashMap<>(); + Map<String, ShardDoc> docIdToShardDoc = new HashMap<>(); + List<ShardDoc> finalShardDocList = new ArrayList<>(); + for (Map.Entry<String, List<ShardDoc>> shardDocEntry : shardDocMap.entrySet()) { + List<ShardDoc> shardDocList = shardDocEntry.getValue(); + int ranking = 1; + while (ranking <= shardDocList.size()) { + String docId = shardDocList.get(ranking - 1).id.toString(); + docIdToShardDoc.put(docId, shardDocList.get(ranking - 1)); + float rrfScore = 1f / (kVal + ranking); + docIdToScore.compute(docId, (id, score) -> (score == null) ? rrfScore : score + rrfScore); + ranking++; + } + } + List<Map.Entry<String, Float>> sortedByScoreDescending = + docIdToScore.entrySet().stream() + .sorted(Collections.reverseOrder(Map.Entry.comparingByValue())) + .toList(); + for (Map.Entry<String, Float> scoredDoc : sortedByScoreDescending) { + String docId = scoredDoc.getKey(); + Float score = scoredDoc.getValue(); + ShardDoc shardDoc = docIdToShardDoc.get(docId); + shardDoc.score = score; + finalShardDocList.add(shardDoc); + } + return finalShardDocList; + } + + private Map<Integer, Integer[]> combineResults( + QueryResult combinedRankedList, + List<DocList> rankedLists, + boolean saveRankPositionsForExplain, + int kVal) { + Map<Integer, Integer[]> docIdToRanks = null; + HashMap<Integer, Float> docIdToScore = new HashMap<>(); + long totalMatches = 0; + for (DocList rankedList : rankedLists) { + DocIterator docs = rankedList.iterator(); + totalMatches = Math.max(totalMatches, rankedList.matches()); + int ranking = 1; + while (docs.hasNext()) { + int docId = docs.nextDoc(); + float rrfScore = 1f / (kVal + ranking); + docIdToScore.compute(docId, (id, score) -> (score == null) ? rrfScore : score + rrfScore); + ranking++; + } + } + List<Map.Entry<Integer, Float>> sortedByScoreDescending = + docIdToScore.entrySet().stream() + .sorted(Collections.reverseOrder(Map.Entry.comparingByValue())) + .toList(); + + int combinedResultsLength = docIdToScore.size(); + int[] combinedResultsDocIds = new int[combinedResultsLength]; + float[] combinedResultScores = new float[combinedResultsLength]; + + int i = 0; + for (Map.Entry<Integer, Float> scoredDoc : sortedByScoreDescending) { + combinedResultsDocIds[i] = scoredDoc.getKey(); + combinedResultScores[i] = scoredDoc.getValue(); + i++; + } + + if (saveRankPositionsForExplain) { + docIdToRanks = getRanks(rankedLists, combinedResultsDocIds); + } + + DocSlice combinedResultSlice = + new DocSlice( + 0, + combinedResultsLength, + combinedResultsDocIds, + combinedResultScores, + Math.max(combinedResultsLength, totalMatches), + combinedResultScores.length > 0 ? combinedResultScores[0] : 0, + TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO); + combinedRankedList.setDocList(combinedResultSlice); + SortedIntDocSet docSet = new SortedIntDocSet(combinedResultsDocIds, combinedResultsLength); Review Comment: I'm surprised here; perhaps I'm missing the bigger context. Solr DocSet is used to hold *all* doc IDs that match a query, ignoring rows or start params. It's only for a non-distrib request (distrib=false), as the coordinator isn't going to get *all* doc IDs. My limited understanding of CombinedQueryComponent is that it runs at the coordinator. In such a place, it's not reasonable for it to create a DocSet. ########## solr/solr-ref-guide/modules/query-guide/pages/json-combined-query-dsl.adoc: ########## @@ -0,0 +1,113 @@ += JSON Combined Query DSL +:tabs-sync-option: +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +The Combined Query feature aims to execute multiple queries of multiple kinds across multiple shards of a collection and combine their result basis an algorithm (like Reciprocal Rank Fusion). +It is extending JSON Query DSL ultimately enabling Hybrid Search. + +[NOTE] +==== +This feature is currently unsupported for non-distributed query, grouping and Cursors. Review Comment: Doesn't support non-distributed?! (single-core)? Really? ########## solr/solrj/src/java/org/apache/solr/common/params/CombinerParams.java: ########## @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.common.params; + +/** + * The CombinerParams class provides constants for configuration parameters related to the combiner. + * It defines keys for various properties used in the combiner configuration. + */ +public class CombinerParams { + + private CombinerParams() {} + + public static final String COMBINER = "combiner"; + public static final String COMBINER_ALGORITHM = COMBINER + ".algorithm"; + public static final String COMBINER_QUERY = COMBINER + ".query"; + public static final String RECIPROCAL_RANK_FUSION = "rrf"; + public static final String COMBINER_RRF_K = COMBINER + "." + RECIPROCAL_RANK_FUSION + ".k"; + public static final String DEFAULT_COMBINER = RECIPROCAL_RANK_FUSION; + public static final int COMBINER_RRF_K_DEFAULT = 60; Review Comment: This one ends with DEFAULT but two others start with DEFAULT. Let's be consistent. ########## solr/core/src/java/org/apache/solr/search/combine/ReciprocalRankFusion.java: ########## @@ -0,0 +1,252 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search.combine; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.StringJoiner; +import org.apache.lucene.document.Document; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TotalHits; +import org.apache.solr.common.params.CombinerParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.handler.component.ShardDoc; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.search.DocIterator; +import org.apache.solr.search.DocList; +import org.apache.solr.search.DocSlice; +import org.apache.solr.search.QueryResult; +import org.apache.solr.search.SolrDocumentFetcher; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.search.SortedIntDocSet; + +/** + * The ReciprocalRankFusion class implements a query and response combiner that uses the Reciprocal Review Comment: nit: javadoc style guide prescribes you skip these initial words here as it's boilerplate noise. Remove "The ReciprocalRankFusion class implements " ########## solr/solrj/src/java/org/apache/solr/common/params/CombinerParams.java: ########## @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.common.params; + +/** + * The CombinerParams class provides constants for configuration parameters related to the combiner. Review Comment: nit: again, skip leading boilerplate words (repetition of class name) and get straight to the point ########## solr/core/src/test-files/solr/collection1/conf/solrconfig-combined-query.xml: ########## @@ -0,0 +1,555 @@ +<?xml version="1.0" ?> + +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<!-- This is a "kitchen sink" config file that tests can use. + When writting a new test, feel free to add *new* items (plugins, + config options, etc...) as long as they don't break any existing + tests. if you need to test something esoteric please add a new + "solrconfig-your-esoteric-purpose.xml" config file. + + Note in particular that this test is used by MinimalSchemaTest so + Anything added to this file needs to work correctly even if there + is now uniqueKey or defaultSearch Field. + --> + +<config> + + <!-- Used to specify an alternate directory to hold all index data. + It defaults to "index" if not present, and should probably + not be changed if replication is in use. --> + <dataDir>${solr.data.dir:}</dataDir> + + <!-- The DirectoryFactory to use for indexes. + solr.StandardDirectoryFactory, the default, is filesystem based. + solr.RAMDirectoryFactory is memory based and not persistent. --> + <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.MockDirectoryFactory}"> + <double name="maxWriteMBPerSecDefault">1000000</double> + <double name="maxWriteMBPerSecFlush">2000000</double> + <double name="maxWriteMBPerSecMerge">3000000</double> + <double name="maxWriteMBPerSecRead">4000000</double> + </directoryFactory> + + <schemaFactory class="ClassicIndexSchemaFactory"/> + + <luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion> + + <statsCache class="${solr.statsCache:}"/> + + <xi:include href="solrconfig.snippet.randomindexconfig.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/> + + <updateHandler class="${solr.updateHandler:solr.DirectUpdateHandler2}"> + + <autoCommit> + <maxTime>${solr.autoCommit.maxTime:-1}</maxTime> + </autoCommit> + + <!-- autocommit pending docs if certain criteria are met + <autoCommit> + <maxDocs>10000</maxDocs> + <maxTime>3600000</maxTime> + </autoCommit> + --> + + <updateLog enable="${enable.update.log:true}"> + <str name="dir">${solr.ulog.dir:}</str> + </updateLog> + + <commitWithin> + <softCommit>${solr.commitwithin.softcommit:true}</softCommit> + </commitWithin> + + </updateHandler> + + <query> + <!-- Maximum number of clauses in a boolean query... can affect + range or wildcard queries that expand to big boolean + queries. An exception is thrown if exceeded. + --> + <maxBooleanClauses>${solr.max.booleanClauses:1024}</maxBooleanClauses> + + <!-- Minimum acceptable prefix-size for prefix-based queries. + + Prefix-based queries consume memory in proportion to the number of terms in the index + that start with that prefix. Short prefixes tend to match many many more indexed-terms + and consume more memory as a result, sometimes causing stability issues on the node. + + This setting allows administrators to require that prefixes meet or exceed a specified + minimum length requirement. Prefix queries that don't meet this requirement return an + error to users. The limit may be overridden on a per-query basis by specifying a + 'minPrefixQueryTermLength' local-param value. + + The flag value of '-1' can be used to disable enforcement of this limit. + --> + <minPrefixQueryTermLength>${solr.query.minPrefixLength:-1}</minPrefixQueryTermLength> + + <!-- Cache specification for Filters or DocSets - unordered set of *all* documents + that match a particular query. + --> + <filterCache + size="512" + initialSize="512" + autowarmCount="2" + async="${solr.filterCache.async:false}"/> + + <queryResultCache + size="512" + initialSize="512" + autowarmCount="2"/> + + <documentCache + size="512" + initialSize="512" + autowarmCount="0"/> + + <cache name="perSegFilter" + class="solr.CaffeineCache" + size="10" + initialSize="0" + autowarmCount="10" /> + + <!-- If true, stored fields that are not requested will be loaded lazily. + --> + <enableLazyFieldLoading>true</enableLazyFieldLoading> + + <!-- + + <cache name="myUserCache" + size="4096" + initialSize="1024" + autowarmCount="1024" + regenerator="MyRegenerator" + /> + --> + + <!-- + <useFilterForSortedQuery>true</useFilterForSortedQuery> + --> + + <queryResultWindowSize>10</queryResultWindowSize> + + <!-- boolToFilterOptimizer converts boolean clauses with zero boost + into cached filters if the number of docs selected by the clause exceeds + the threshold (represented as a fraction of the total index) + --> + <boolTofilterOptimizer enabled="false" cacheSize="32" threshold=".05"/> Review Comment: that thing is ancient ########## solr/core/src/java/org/apache/solr/search/combine/QueryAndResponseCombiner.java: ########## @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search.combine; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.Query; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.handler.component.ShardDoc; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.search.QueryResult; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.util.plugin.NamedListInitializedPlugin; + +/** + * The QueryAndResponseCombiner class is an abstract base class for combining query results and + * shard documents. It provides a framework for different algorithms to be implemented for merging + * ranked lists and shard documents. + */ +public abstract class QueryAndResponseCombiner implements NamedListInitializedPlugin { + /** + * Combines multiple ranked lists into a single QueryResult. + * + * @param rankedLists a list of ranked lists to be combined + * @param solrParams params to be used when provided at query time + * @return a new QueryResult containing the combined results + * @throws IllegalArgumentException if the input list is empty + */ + public abstract QueryResult combine(List<QueryResult> rankedLists, SolrParams solrParams); + + /** + * Combines shard documents based on the provided map. + * + * @param shardDocMap a map where keys represent shard IDs and values are lists of ShardDocs for + * each shard + * @param solrParams params to be used when provided at query time + * @return a combined list of ShardDocs from all shards + */ + public abstract List<ShardDoc> combine( + Map<String, List<ShardDoc>> shardDocMap, SolrParams solrParams); + + /** + * Retrieves a list of explanations for the given queries and results. + * + * @param queryKeys the keys associated with the queries + * @param queries the list of queries for which explanations are requested + * @param queryResult the list of QueryResult corresponding to each query + * @param searcher the SolrIndexSearcher used to perform the search + * @param schema the IndexSchema used to interpret the search results + * @param solrParams params to be used when provided at query time + * @return a list of explanations for the given queries and results + * @throws IOException if an I/O error occurs during the explanation retrieval process + */ + public abstract NamedList<Explanation> getExplanations( + String[] queryKeys, + List<Query> queries, + List<QueryResult> queryResult, + SolrIndexSearcher searcher, + IndexSchema schema, + SolrParams solrParams) + throws IOException; + + /** + * Retrieves an implementation of the QueryAndResponseCombiner based on the specified algorithm. + * + * @param algorithm the combiner algorithm + * @param combiners The already initialised map of QueryAndResponseCombiner + * @return an instance of QueryAndResponseCombiner corresponding to the specified algorithm. + * @throws SolrException if an unknown combiner algorithm is specified. + */ + public static QueryAndResponseCombiner getImplementation( + String algorithm, Map<String, QueryAndResponseCombiner> combiners) { + if (combiners.containsKey(algorithm)) { Review Comment: nit: we prefer to avoid double-lookup. Just call "get" and check if null. ########## solr/core/src/java/org/apache/solr/handler/component/CombinedQueryComponent.java: ########## @@ -0,0 +1,590 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.component; + +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.CombinerParams; +import org.apache.solr.common.params.CursorMarkParams; +import org.apache.solr.common.params.GroupParams; +import org.apache.solr.common.params.ShardParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.core.SolrCore; +import org.apache.solr.response.BasicResultContext; +import org.apache.solr.response.ResultContext; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.DocListAndSet; +import org.apache.solr.search.QueryResult; +import org.apache.solr.search.SolrReturnFields; +import org.apache.solr.search.SortSpec; +import org.apache.solr.search.combine.QueryAndResponseCombiner; +import org.apache.solr.search.combine.ReciprocalRankFusion; +import org.apache.solr.util.SolrResponseUtil; +import org.apache.solr.util.plugin.SolrCoreAware; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The CombinedQueryComponent class extends QueryComponent and provides support for executing + * multiple queries and combining their results. + */ +public class CombinedQueryComponent extends QueryComponent implements SolrCoreAware { + + public static final String COMPONENT_NAME = "combined_query"; + protected NamedList<?> initParams; + private Map<String, QueryAndResponseCombiner> combiners = new ConcurrentHashMap<>(); + private int maxCombinerQueries; + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); Review Comment: nit: always statics above non-statics ########## solr/core/src/java/org/apache/solr/search/combine/ReciprocalRankFusion.java: ########## @@ -0,0 +1,252 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search.combine; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.StringJoiner; +import org.apache.lucene.document.Document; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TotalHits; +import org.apache.solr.common.params.CombinerParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.handler.component.ShardDoc; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.search.DocIterator; +import org.apache.solr.search.DocList; +import org.apache.solr.search.DocSlice; +import org.apache.solr.search.QueryResult; +import org.apache.solr.search.SolrDocumentFetcher; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.search.SortedIntDocSet; + +/** + * The ReciprocalRankFusion class implements a query and response combiner that uses the Reciprocal + * Rank Fusion (RRF) algorithm to combine multiple ranked lists into a single ranked list. + */ +public class ReciprocalRankFusion extends QueryAndResponseCombiner { + + private int k; + + public int getK() { Review Comment: very weird to see methods above the constructor. Please don't do that. ########## solr/core/src/java/org/apache/solr/handler/component/CombinedQueryComponent.java: ########## @@ -0,0 +1,590 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.component; + +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.CombinerParams; +import org.apache.solr.common.params.CursorMarkParams; +import org.apache.solr.common.params.GroupParams; +import org.apache.solr.common.params.ShardParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.core.SolrCore; +import org.apache.solr.response.BasicResultContext; +import org.apache.solr.response.ResultContext; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.DocListAndSet; +import org.apache.solr.search.QueryResult; +import org.apache.solr.search.SolrReturnFields; +import org.apache.solr.search.SortSpec; +import org.apache.solr.search.combine.QueryAndResponseCombiner; +import org.apache.solr.search.combine.ReciprocalRankFusion; +import org.apache.solr.util.SolrResponseUtil; +import org.apache.solr.util.plugin.SolrCoreAware; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The CombinedQueryComponent class extends QueryComponent and provides support for executing + * multiple queries and combining their results. + */ +public class CombinedQueryComponent extends QueryComponent implements SolrCoreAware { + + public static final String COMPONENT_NAME = "combined_query"; + protected NamedList<?> initParams; + private Map<String, QueryAndResponseCombiner> combiners = new ConcurrentHashMap<>(); + private int maxCombinerQueries; + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + @Override + public void init(NamedList<?> args) { + super.init(args); + this.initParams = args; + this.maxCombinerQueries = CombinerParams.DEFAULT_MAX_COMBINER_QUERIES; + } + + @Override + public void inform(SolrCore core) { + if (initParams != null && initParams.size() > 0) { + log.info("Initializing CombinedQueryComponent"); + NamedList<?> all = (NamedList<?>) initParams.get("combiners"); + for (int i = 0; i < all.size(); i++) { + String name = all.getName(i); + NamedList<?> combinerConfig = (NamedList<?>) all.getVal(i); + String className = (String) combinerConfig.get("class"); + QueryAndResponseCombiner combiner = + core.getResourceLoader().newInstance(className, QueryAndResponseCombiner.class); + combiner.init(combinerConfig); + combiners.compute( + name, + (k, existingCombiner) -> { + if (existingCombiner == null) { + return combiner; + } + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, + "Found more than one combiner with same name"); + }); + } + Object maxQueries = initParams.get("maxCombinerQueries"); + if (maxQueries != null) { + this.maxCombinerQueries = Integer.parseInt(maxQueries.toString()); + } + } + combiners.computeIfAbsent( + CombinerParams.RECIPROCAL_RANK_FUSION, + key -> { + ReciprocalRankFusion reciprocalRankFusion = new ReciprocalRankFusion(); + reciprocalRankFusion.init(initParams); + return reciprocalRankFusion; + }); + } + + /** + * Overrides the prepare method to handle combined queries. + * + * @param rb the ResponseBuilder to prepare + * @throws IOException if an I/O error occurs during preparation + */ + @Override + public void prepare(ResponseBuilder rb) throws IOException { + if (rb instanceof CombinedQueryResponseBuilder crb) { + SolrParams params = crb.req.getParams(); + if (params.get(CursorMarkParams.CURSOR_MARK_PARAM) != null + || params.getBool(GroupParams.GROUP, false)) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, "Unsupported functionality for Combined Queries."); + } + String[] queriesToCombineKeys = params.getParams(CombinerParams.COMBINER_QUERY); + if (queriesToCombineKeys.length > maxCombinerQueries) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, + "Too many queries to combine: limit is " + maxCombinerQueries); + } + for (String queryKey : queriesToCombineKeys) { + final var unparsedQuery = params.get(queryKey); + ResponseBuilder rbNew = new ResponseBuilder(rb.req, new SolrQueryResponse(), rb.components); + rbNew.setQueryString(unparsedQuery); + super.prepare(rbNew); + crb.responseBuilders.add(rbNew); + } + } + super.prepare(rb); + } + + /** + * Overrides the process method to handle CombinedQueryResponseBuilder instances. This method + * processes the responses from multiple shards, combines them using the specified + * QueryAndResponseCombiner strategy, and sets the appropriate results and metadata in the + * CombinedQueryResponseBuilder. + * + * @param rb the ResponseBuilder object to process + * @throws IOException if an I/O error occurs during processing + */ + @Override + public void process(ResponseBuilder rb) throws IOException { + if (rb instanceof CombinedQueryResponseBuilder crb) { + boolean partialResults = false; + boolean segmentTerminatedEarly = false; + boolean setMaxHitsTerminatedEarly = false; + List<QueryResult> queryResults = new ArrayList<>(); + for (ResponseBuilder rbNow : crb.responseBuilders) { + // Just a placeholder for future implementation for Cursors + rbNow.setCursorMark(crb.getCursorMark()); + super.process(rbNow); + DocListAndSet docListAndSet = rbNow.getResults(); + QueryResult queryResult = new QueryResult(); + queryResult.setDocListAndSet(docListAndSet); + queryResults.add(queryResult); + partialResults |= queryResult.isPartialResults(); + if (queryResult.getSegmentTerminatedEarly() != null) { + segmentTerminatedEarly |= queryResult.getSegmentTerminatedEarly(); + } + if (queryResult.getMaxHitsTerminatedEarly() != null) { + setMaxHitsTerminatedEarly |= queryResult.getMaxHitsTerminatedEarly(); + } + } + String algorithm = + rb.req + .getParams() + .get(CombinerParams.COMBINER_ALGORITHM, CombinerParams.DEFAULT_COMBINER); + QueryAndResponseCombiner combinerStrategy = + QueryAndResponseCombiner.getImplementation(algorithm, combiners); + QueryResult combinedQueryResult = combinerStrategy.combine(queryResults, rb.req.getParams()); + combinedQueryResult.setPartialResults(partialResults); + combinedQueryResult.setSegmentTerminatedEarly(segmentTerminatedEarly); + combinedQueryResult.setMaxHitsTerminatedEarly(setMaxHitsTerminatedEarly); + crb.setResult(combinedQueryResult); + if (rb.isDebug()) { + String[] queryKeys = rb.req.getParams().getParams(CombinerParams.COMBINER_QUERY); + List<Query> queries = crb.responseBuilders.stream().map(ResponseBuilder::getQuery).toList(); + NamedList<Explanation> explanations = + combinerStrategy.getExplanations( + queryKeys, + queries, + queryResults, + rb.req.getSearcher(), + rb.req.getSchema(), + rb.req.getParams()); + rb.addDebugInfo("combinerExplanations", explanations); + } + ResultContext ctx = new BasicResultContext(crb); + crb.rsp.addResponse(ctx); + crb.rsp + .getToLog() Review Comment: nit: please use the convenience method addToLog(...) ########## solr/core/src/java/org/apache/solr/handler/component/CombinedQueryComponent.java: ########## @@ -0,0 +1,590 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.component; + +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.CombinerParams; +import org.apache.solr.common.params.CursorMarkParams; +import org.apache.solr.common.params.GroupParams; +import org.apache.solr.common.params.ShardParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.core.SolrCore; +import org.apache.solr.response.BasicResultContext; +import org.apache.solr.response.ResultContext; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.DocListAndSet; +import org.apache.solr.search.QueryResult; +import org.apache.solr.search.SolrReturnFields; +import org.apache.solr.search.SortSpec; +import org.apache.solr.search.combine.QueryAndResponseCombiner; +import org.apache.solr.search.combine.ReciprocalRankFusion; +import org.apache.solr.util.SolrResponseUtil; +import org.apache.solr.util.plugin.SolrCoreAware; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The CombinedQueryComponent class extends QueryComponent and provides support for executing + * multiple queries and combining their results. + */ +public class CombinedQueryComponent extends QueryComponent implements SolrCoreAware { + + public static final String COMPONENT_NAME = "combined_query"; + protected NamedList<?> initParams; + private Map<String, QueryAndResponseCombiner> combiners = new ConcurrentHashMap<>(); + private int maxCombinerQueries; + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + @Override + public void init(NamedList<?> args) { + super.init(args); + this.initParams = args; + this.maxCombinerQueries = CombinerParams.DEFAULT_MAX_COMBINER_QUERIES; + } + + @Override + public void inform(SolrCore core) { + if (initParams != null && initParams.size() > 0) { + log.info("Initializing CombinedQueryComponent"); Review Comment: It's common to work on a new component and log at info but that's pretty noisy. Imagine if every search component did that! ########## solr/core/src/test-files/solr/collection1/conf/solrconfig-combined-query.xml: ########## @@ -0,0 +1,555 @@ +<?xml version="1.0" ?> + +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<!-- This is a "kitchen sink" config file that tests can use. + When writting a new test, feel free to add *new* items (plugins, + config options, etc...) as long as they don't break any existing + tests. if you need to test something esoteric please add a new + "solrconfig-your-esoteric-purpose.xml" config file. + + Note in particular that this test is used by MinimalSchemaTest so + Anything added to this file needs to work correctly even if there + is now uniqueKey or defaultSearch Field. + --> + +<config> Review Comment: we may need a test config file but these things are a burden for the Solr project to maintain over time, especially when they look kitchen-sink like this one looks like. It's better to start with a "minimal" config and only add what you need. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
