Re: [PR] Draft - PoC execute FQs in parallel [solr]

via GitHub Thu, 29 Feb 2024 11:31:32 -0800


tuannh982 commented on code in PR #2322:
URL: https://github.com/apache/solr/pull/2322#discussion_r1507851176



##########
solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java:
##########
@@ -1166,6 +1166,181 @@ public static class ProcessedFilter {
    * resolved against the filter cache, and populate it.
    */
   public ProcessedFilter getProcessedFilter(List<Query> queries) throws 
IOException {
+    ProcessedFilter pf = new ProcessedFilter();
+    if (queries == null || queries.isEmpty()) {
+      return pf;
+    }
+
+    int length = queries.size();
+
+    // We combine all the filter queries that come from the filter cache into 
"answer".
+    // This might become pf.answer but not if there are any non-cached filters
+    DocSet[] answer0 = new DocSet[]{null};
+    DocSet answer = null;
+    boolean[] neg = new boolean[length];
+    DocSet[] sets = new DocSet[length];
+    List<ExtendedQuery> notCached = null;
+    List<PostFilter> postFilters = null;
+    AtomicReference<IOException> ioException = new AtomicReference<>(null);
+    List<CompletableFuture<Void>> futures = new ArrayList<>(length);
+
+    int end = 0; // size of "sets" and "neg"; parallel arrays
+    AtomicInteger numDocSets = new AtomicInteger(0);
+
+    for (Query q : queries) {
+      if (q instanceof ExtendedQuery) {
+        ExtendedQuery eq = (ExtendedQuery) q;
+        if (!eq.getCache()) {
+          if (eq.getCost() >= 100 && eq instanceof PostFilter) {
+            if (postFilters == null) postFilters = new ArrayList<>(length - 
end);
+            postFilters.add((PostFilter) q);
+          } else {
+            if (notCached == null) notCached = new ArrayList<>(length - end);
+            notCached.add((ExtendedQuery) q);
+          }
+          continue;
+        }
+      }
+
+      if (filterCache == null) {
+        // there is no cache: don't pull bitsets
+        if (notCached == null) notCached = new ArrayList<>(length - end);
+        WrappedQuery uncached = new WrappedQuery(q);
+        uncached.setCache(false);
+        notCached.add(uncached);
+        continue;
+      }
+
+      final int index = end;
+      CompletableFuture<Void> future = CompletableFuture.supplyAsync(() -> {
+        Query posQuery = QueryUtils.getAbs(q);
+        if (ioException.get() != null) {
+          return null;
+        }
+        DocSet docSet = null;
+        try {
+          docSet = getPositiveDocSet(posQuery);
+        } catch (IOException e) {
+          ioException.compareAndSet(null, e);
+          return null;
+        }
+        // Negative query if absolute value different from original
+        if (Objects.equals(q, posQuery)) {
+          synchronized (answer0) {
+            // keep track of the smallest positive set; use "answer" for this.
+            if (answer0[0] == null) {
+              answer0[0] = docSet;
+              return null;
+            }
+            // note: assume that size() is cached.  It generally comes from 
the cache, so should be.
+            if (docSet.size() < answer0[0].size()) {
+              // swap answer & docSet so that answer is smallest
+              DocSet tmp = answer0[0];
+              answer0[0] = docSet;
+              docSet = tmp;
+            }
+          }
+          neg[index] = false;
+        } else {
+          neg[index] = true;
+        }
+        sets[index] = docSet;
+        numDocSets.incrementAndGet();
+        return null;
+      });
+      futures.add(future);
+      end++;
+    } // end of queries
+
+    // await futures
+    CompletableFuture<Void> future = 
CompletableFuture.allOf(futures.toArray(new CompletableFuture<?>[0]));
+    try {
+      future.get(); // TODO add time limit?
+      answer = answer0[0];
+    } catch (InterruptedException | ExecutionException e) {
+      throw new RuntimeException(e);
+    }
+    if (ioException.get() != null) {
+      throw ioException.get();
+    }
+
+    if (numDocSets.get() > 0) {
+      // Are all of our normal cached filters negative?
+      if (answer == null) {
+        answer = getLiveDocSet();
+      }
+
+      // This optimizes for the case where we have more than 2 filters and 
instead
+      // of copying the bitsets we make one mutable bitset. We should only do 
this
+      // for BitDocSet since it clones the backing bitset for andNot and 
intersection.
+      if (numDocSets.get() > 1 && answer instanceof BitDocSet) {
+        answer = MutableBitDocSet.fromBitDocSet((BitDocSet) answer);
+      }
+
+      // do negative queries first to shrink set size
+      for (int i = 0; i < end; i++) {
+        if (neg[i] && sets[i] != null) answer = answer.andNot(sets[i]);
+      }
+
+      for (int i = 0; i < end; i++) {
+        if (!neg[i] && sets[i] != null) answer = answer.intersection(sets[i]);
+      }
+
+      // Make sure to keep answer as an immutable DocSet if we made it mutable
+      answer = MutableBitDocSet.unwrapIfMutable(answer);
+    }
+
+    // ignore "answer" if it simply matches all docs
+    if (answer != null && answer.size() == numDocs()) {
+      answer = null;
+    }
+
+    // answer is done.
+
+    // If no notCached nor postFilters, we can return now.
+    if (notCached == null && postFilters == null) {
+      // "answer" is the only part of the filter, so set it.
+      if (answer != null) {
+        pf.answer = answer;
+        pf.filter = answer.makeQuery();
+      }
+      return pf;
+    }
+    // pf.answer will remain null ...  (our local "answer" var is not the 
complete answer)
+
+    // Set pf.filter based on combining "answer" and "notCached"
+    if (notCached == null) {
+      if (answer != null) {
+        pf.filter = answer.makeQuery();
+      }
+    } else {
+      notCached.sort(sortByCost); // pointless?
+      final BooleanQuery.Builder builder = new BooleanQuery.Builder();
+      if (answer != null) {
+        builder.add(answer.makeQuery(), Occur.FILTER);
+      }
+      for (ExtendedQuery eq : notCached) {
+        Query q = eq.getCostAppliedQuery();
+        builder.add(q, Occur.FILTER);
+      }
+      pf.filter = builder.build();
+    }
+
+    // Set pf.postFilter
+    if (postFilters != null) {
+      postFilters.sort(sortByCost);
+      for (int i = postFilters.size() - 1; i >= 0; i--) {
+        DelegatingCollector prev = pf.postFilter;
+        pf.postFilter = postFilters.get(i).getFilterCollector(this);
+        if (prev != null) pf.postFilter.setDelegate(prev);
+      }
+    }
+
+    return pf;
+  }
+
+  // the old method
+  public ProcessedFilter getProcessedFilter_bk(List<Query> queries) throws 
IOException {

Review Comment:
   here the old code, just keeping it for later benchmark



##########
solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java:
##########
@@ -1389,6 +1564,9 @@ private DocSet getResult(DocsEnumState deState, int 
largestPossible) throws IOEx
 
   // query must be positive
   protected DocSet getDocSetNC(Query query, DocSet filter) throws IOException {
+    try {
+      Thread.sleep(20);

Review Comment:
   instead of indexing millions document, just simulating the slowness here



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org
For additional commands, e-mail: issues-h...@solr.apache.org

Re: [PR] Draft - PoC execute FQs in parallel [solr]

Reply via email to