Here is an example that is failing. However, by just doing a dir(SpanScorer) with your console commands show it has no extra attributes other than the base java object ones, and it is using the spans version of the code.
# ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== from unittest import TestCase, main from lucene import * class TestFormatter(PythonFormatter): def __init__(self, testCase): super(TestFormatter, self).__init__() self.testCase = testCase def highlightTerm(self, originalText, group): if group.getTotalScore() <= 0: return originalText; self.testCase.countHighlightTerm(originalText) return "<b>" + originalText + "</b>" class HighlighterTestCase(TestCase): """ Unit tests ported from Java Lucene. 2004 by Yura Smolsky ;) """ FIELD_NAME = "contents" texts = [ "A wicked problem is one for which each attempt to create a solution changes the understanding of the problem. Wicked problems cannot be solved in a traditional linear fashion, because the problem definition evolves as new possible solutions are considered and/or implemented." "Wicked problems always occur in a social context -- the wickedness of the problem reflects the diversity among the stakeholders in the problem." "From http://cognexus.org/id42.htm" "Most projects in organizations -- and virtually all technology-related projects these days -- are about wicked problems. Indeed, it is the social complexity of these problems, not their technical complexity, that overwhelms most current problem solving and project management approaches." "This text has a typo in referring to whicked problems" ]; def __init__(self, *args): super(HighlighterTestCase, self).__init__(*args) self.foundList = [] self.parser = QueryParser(self.FIELD_NAME, StandardAnalyzer()) def testSimpleHighlighter(self): self.doSearching("wicked") formatter = TestFormatter(self) for i in range(0, self.hits.length()): self.foundList = [] text = self.hits.doc(i).get(self.FIELD_NAME) tokenStream = self.analyzer.tokenStream(self.FIELD_NAME, StringReader(text)) ctokenStream = CachingTokenFilter(tokenStream) highlighter = Highlighter(formatter, SpanScorer(self.query, self.FIELD_NAME, ctokenStream)) result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...") print "\t", result print "Found: ", print self.foundList def doSearching(self, queryString): searcher = IndexSearcher(self.ramDir) self.query = self.parser.parse(queryString) #self.query = PhraseQuery() #self.query = queries[0] #self.query.combine(queries) # for any multi-term queries to work (prefix, wildcard, range, # fuzzy etc) you must use a rewritten query! self.query = self.query.rewrite(self.reader) print "Searching for:", self.query.toString(self.FIELD_NAME) self.hits = searcher.search(self.query) self.numHighlights = 0 def countHighlightTerm(self, found): self.foundList.append(found) self.numHighlights += 1 # update stats used in assertions def setUp(self): self.analyzer=StandardAnalyzer() self.ramDir = RAMDirectory() writer = IndexWriter(self.ramDir, self.analyzer, True) for text in self.texts: self.addDoc(writer, text) writer.optimize() writer.close() self.reader = IndexReader.open(self.ramDir) self.numHighlights = 0; def addDoc(self, writer, text): d = Document() f = Field(self.FIELD_NAME, text, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES) d.add(f) writer.addDocument(d) if __name__ == "__main__": import sys, lucene lucene.initVM(lucene.CLASSPATH) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: main() except: pass else: main()