kotman12 commented on code in PR #2382: URL: https://github.com/apache/solr/pull/2382#discussion_r1589698538
########## solr/modules/monitor/src/java/org/apache/solr/monitor/search/ReverseSearchComponent.java: ########## @@ -0,0 +1,192 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one or more + * * contributor license agreements. See the NOTICE file distributed with + * * this work for additional information regarding copyright ownership. + * * The ASF licenses this file to You under the Apache License, Version 2.0 + * * (the "License"); you may not use this file except in compliance with + * * the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package org.apache.solr.monitor.search; + +import static org.apache.solr.monitor.MonitorConstants.DOCUMENT_BATCH_KEY; +import static org.apache.solr.monitor.MonitorConstants.MONITOR_DOCUMENTS_KEY; +import static org.apache.solr.monitor.MonitorConstants.MONITOR_OUTPUT_KEY; +import static org.apache.solr.monitor.MonitorConstants.QUERY_MATCH_TYPE_KEY; +import static org.apache.solr.monitor.MonitorConstants.REVERSE_SEARCH_PARAM_NAME; +import static org.apache.solr.monitor.MonitorConstants.SOLR_MONITOR_CACHE_NAME; +import static org.apache.solr.monitor.MonitorConstants.WRITE_TO_DOC_LIST_KEY; +import static org.apache.solr.monitor.search.QueryMatchResponseCodec.mergeResponses; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.ExecutorService; +import java.util.stream.Collectors; +import org.apache.lucene.document.Document; +import org.apache.lucene.monitor.DocumentBatchVisitor; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.util.NamedThreadFactory; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.util.ExecutorUtil; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.CloseHook; +import org.apache.solr.core.SolrCore; +import org.apache.solr.handler.component.ResponseBuilder; +import org.apache.solr.handler.component.SearchComponent; +import org.apache.solr.handler.component.ShardRequest; +import org.apache.solr.handler.loader.JsonLoader; +import org.apache.solr.monitor.SolrMonitorQueryDecoder; +import org.apache.solr.monitor.cache.MonitorQueryCache; +import org.apache.solr.monitor.cache.SharedMonitorCache; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.QParser; +import org.apache.solr.search.SyntaxError; +import org.apache.solr.update.DocumentBuilder; +import org.apache.solr.util.plugin.SolrCoreAware; + +public class ReverseSearchComponent extends SearchComponent implements SolrCoreAware { + + private static final String MATCHER_THREAD_COUNT_KEY = "threadCount"; + + private SolrMatcherSinkFactory solrMatcherSinkFactory = new SolrMatcherSinkFactory(); + + private ExecutorService executor; + + @Override + public void init(NamedList<?> args) { + super.init(args); + Object threadCount = args.get(MATCHER_THREAD_COUNT_KEY); + if (threadCount instanceof Integer) { + executor = + ExecutorUtil.newMDCAwareFixedThreadPool( + (Integer) threadCount, new NamedThreadFactory("monitor-matcher")); + solrMatcherSinkFactory = new SolrMatcherSinkFactory(executor); + } + } + + @Override + public void prepare(ResponseBuilder rb) { + if (skipReverseSearch(rb)) { + return; + } + var req = rb.req; + var documentBatch = documentBatch(req); + req.getContext().put(DOCUMENT_BATCH_KEY, documentBatch); + var writeToDocListRaw = req.getParams().get(WRITE_TO_DOC_LIST_KEY, "false"); + boolean writeToDocList = Boolean.parseBoolean(writeToDocListRaw); Review Comment: I applied the change, but unresolving for further discussion. So this flag may seem a bit strange without context. Basically, the proposal has two ways of generating responses. The first is just a regular solr document list. The more I think about it, the more I am convinced that this is all that we _really_ need. However, if you run the tests you'll see there is a second/optional/overlapping response section which I've called `monitor.{ .. }` which maps to output generated by lucene-monitor's candidate matchers. This could include things like offsets of matches. So far I let the client decide which response sections they want. The main advantage of _not_ writing to a doc list and just having a `monitor.{}` response is that you can more easily parallelize the expensive post-filter. Because I don't think there is any _neat_ way to have an async callback that invokes a leaf collector to tell it "oh by the way this doc Id matches" because that collector might be closed and it would probably be hacky to re-open. Having said that, the leaf-parallelization PR https://github.com/apache/solr/pull/2248 that you are working on is very interesting for this use-case because I think it would totally remove this advantage. And at that point you could _always_ just send the doc list. Monitor specific output like highlights could be adapted to conform to the same structure as "traditional" solr highlights. There might still be some use for `monitor.{}` in the response but more of in a supporting role than the main payload (as is currently done). Just as @janhoy suggested I really need to document this. I was putting documentation off until someone commented on the viability of this solution as a whole but I think it is hard to comment on the viability of the solution without docs so I can't escape it 😃 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For additional commands, e-mail: issues-h...@solr.apache.org