Github user romseygeek commented on a diff in the pull request:
https://github.com/apache/lucene-solr/pull/345#discussion_r179679922
--- Diff:
lucene/core/src/java/org/apache/lucene/search/DisjunctionMatchesIterator.java
---
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
+import org.apache.lucene.util.PriorityQueue;
+
+/**
+ * A {@link MatchesIterator} that combines matches from a set of
sub-iterators
+ *
+ * Matches are sorted by their start positions, and then by their end
positions, so that
+ * prefixes sort first. Matches may overlap.
+ */
+public class DisjunctionMatchesIterator implements MatchesIterator {
+
+ /**
+ * Create a {@link DisjunctionMatchesIterator} over a list of terms
+ *
+ * Only terms that have at least one match in the given document will be
included
+ */
+ public static DisjunctionMatchesIterator fromTerms(LeafReaderContext
context, int doc, String field, List<Term> terms) throws IOException {
+ return fromTermsEnum(context, doc, field, asBytesRefIterator(terms));
+ }
+
+ private static BytesRefIterator asBytesRefIterator(List<Term> terms) {
+ return new BytesRefIterator() {
+ int i = 0;
+ @Override
+ public BytesRef next() {
+ if (i >= terms.size())
+ return null;
+ return terms.get(i++).bytes();
+ }
+ };
+ }
+
+ /**
+ * Create a {@link DisjunctionMatchesIterator} over a list of terms
extracted from a {@link BytesRefIterator}
+ *
+ * Only terms that have at least one match in the given document will be
included
+ */
+ public static DisjunctionMatchesIterator fromTermsEnum(LeafReaderContext
context, int doc, String field, BytesRefIterator terms) throws IOException {
+ List<MatchesIterator> mis = new ArrayList<>();
+ Terms t = context.reader().terms(field);
+ if (t == null)
+ return null;
+ TermsEnum te = t.iterator();
+ PostingsEnum reuse = null;
+ for (BytesRef term = terms.next(); term != null; term = terms.next()) {
+ if (te.seekExact(term)) {
+ PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS);
+ if (pe.advance(doc) == doc) {
+ // TODO do we want to use the copied term here, or instead
create a label that associates all of the TMIs with a single term?
+ mis.add(new TermMatchesIterator(BytesRef.deepCopyOf(term), pe));
+ reuse = null;
+ }
+ else {
+ reuse = pe;
+ }
+ }
+ }
+ if (mis.size() == 0)
+ return null;
+ return new DisjunctionMatchesIterator(mis);
+ }
+
+ static MatchesIterator fromSubIterators(List<MatchesIterator> mis)
throws IOException {
+ if (mis.size() == 0)
+ return null;
+ if (mis.size() == 1)
+ return mis.get(0);
+ return new DisjunctionMatchesIterator(mis);
--- End diff --
I've moved the notion of a match with no terms up a level into the Matches
object, so this is dealt with in Matches.fromSubMatches() now.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]