All, I'm trying to find all spans in a given String via stored offsets in Lucene 5.3.1. I wanted to use the Highlighter with a NullFragmenter, but that is highlighting only the matching terms, not the full Spans (related to LUCENE-6796?).
My Current code iterates through the spans, stores the span positions in one array and gathers the character offsets via a SpanCollector in a Map<Integer, OffsetAttribute>. Is there a simpler way? Something like this: String s = "the quick brown fox jumped over the lazy dog"; String field = "f"; Analyzer analyzer = new StandardAnalyzer(); SpanQuery spanQuery = new SpanNearQuery( new SpanQuery[] { new SpanTermQuery(new Term(field, "fox")), new SpanTermQuery(new Term(field, "quick")) }, 3, false ); MemoryIndex index = new MemoryIndex(true); index.addField(field, s, analyzer); index.freeze(); IndexSearcher searcher = index.createSearcher(); IndexReader reader = searcher.getIndexReader(); spanQuery = (SpanQuery) spanQuery.rewrite(reader); SpanWeight weight = (SpanWeight) searcher.createWeight(spanQuery, false); Spans spans = weight.getSpans(reader.leaves().get(0), SpanWeight.Postings.OFFSETS); if (spans == null) { //do something with full string return; } OffsetSpanCollector offsetSpanCollector = new OffsetSpanCollector(); List<OffsetAttribute> spanPositions = new ArrayList<>(); while (spans.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { OffsetAttributeImpl offsetAttribute = new OffsetAttributeImpl(); offsetAttribute.setOffset(spans.startPosition(), spans.endPosition()-1); spanPositions.add(offsetAttribute); spans.collect(offsetSpanCollector); } } Map<Integer, OffsetAttribute> charOffsets = offsetSpanCollector.getOffsets(); //now iterate through the list of spanPositions and grab the character offsets for the start and end tokens of each //span from the charOffsets ... private class OffsetSpanCollector implements SpanCollector { Map<Integer, Offset> charOffsets = new HashMap<>(); @Override public void collectLeaf(PostingsEnum postingsEnum, int i, Term term) throws IOException { OffsetAttributeImpl offsetAttribute = new OffsetAttributeImpl(); offsetAttribute.setOffset(postingsEnum.startOffset(), postingsEnum.endOffset()); charOffsets.put(i, offsetAttribute); } @Override public void reset() { //don't think I need to do anything with this? } public Map<Integer, OffsetAttribute> getOffsets() { return charOffsets; } }