Github user jimczi commented on a diff in the pull request:
https://github.com/apache/lucene-solr/pull/317#discussion_r165372379
--- Diff:
lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
---
@@ -124,34 +123,122 @@ public PostingsEnum getPostingsEnum() {
@Override
public boolean nextPosition() throws IOException {
- if (posCounter < postingsEnum.freq()) {
- posCounter++;
+ if (posCounter > 0) {
+ posCounter--;
postingsEnum.nextPosition(); // note: we don't need to save the
position
return true;
} else {
return false;
}
}
+ @Override
+ public BytesRef getTerm() throws IOException {
+ return term;
+ }
+
+ @Override
+ public int startOffset() throws IOException {
+ return postingsEnum.startOffset();
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return postingsEnum.endOffset();
+ }
+
@Override
public int freq() throws IOException {
- return postingsEnum.freq();
+ return freq;
+ }
+ }
+
+ public static final OffsetsEnum EMPTY = new OffsetsEnum() {
+ @Override
+ public boolean nextPosition() throws IOException {
+ return false;
}
@Override
public BytesRef getTerm() throws IOException {
- return term;
+ throw new UnsupportedOperationException();
}
@Override
public int startOffset() throws IOException {
- return postingsEnum.startOffset();
+ throw new UnsupportedOperationException();
}
@Override
public int endOffset() throws IOException {
- return postingsEnum.endOffset();
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int freq() throws IOException {
+ return 0;
+ }
+
+ };
+
+ public static class MultiOffsetsEnum extends OffsetsEnum {
+
+ private final PriorityQueue<OffsetsEnum> queue;
+ private boolean started = false;
+
+ public MultiOffsetsEnum(List<OffsetsEnum> inner) throws IOException {
+ this.queue = new PriorityQueue<>();
+ for (OffsetsEnum oe : inner) {
+ if (oe.nextPosition())
+ this.queue.add(oe);
+ }
+ }
+
+ @Override
+ public boolean nextPosition() throws IOException {
+ if (started == false) {
+ started = true;
+ return this.queue.size() > 0;
+ }
+ if (this.queue.size() > 0) {
+ OffsetsEnum top = this.queue.poll();
+ if (top.nextPosition()) {
+ this.queue.add(top);
+ return true;
+ }
+ else {
+ top.close();
+ }
+ return this.queue.size() > 0;
+ }
+ return false;
+ }
+
+ @Override
+ public BytesRef getTerm() throws IOException {
+ return this.queue.peek().getTerm();
}
+ @Override
+ public int startOffset() throws IOException {
+ return this.queue.peek().startOffset();
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return this.queue.peek().endOffset();
+ }
+
+ @Override
+ public int freq() throws IOException {
+ return this.queue.peek().freq();
+ }
+
+ @Override
+ public void close() throws IOException {
+ // most child enums will have been closed in .nextPosition()
+ // here all remaining non-exhausted enums are closed
+ IOUtils.close(queue);
+ }
--- End diff --
Would be great to have a way to mark `OffsetsEnum` with an id so that we
know which enum matches when `MultiOffsetsEnum` is used. Maybe a `getId` in
`OffsetsEnum` is enough ? This way we could link multi term queries with the
same id and use this information for scoring.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]