Hello Lucene-User, Is any one try to do highlighting with HTML pages?
I am trying to do this using demo example by Keld H. Hansen article "Unweaving a Tangled Web HTMLParser and Lucene" but I am getting "null" value for text at line #47 Any Idea? 1 package org.apache.lucene.search.highlight; 2 3 import java.io.StringReader; 4 5 import org.apache.lucene.analysis.Analyzer; 6 import org.apache.lucene.analysis.TokenStream; 7 import org.apache.lucene.analysis.standard.StandardAnalyzer; 8 import org.apache.lucene.queryParser.QueryParser; 9 import org.apache.lucene.search.Hits; 10 import org.apache.lucene.search.IndexSearcher; 11 import org.apache.lucene.search.Query; 12 import org.apache.lucene.search.highlight.Formatter; 13 import org.apache.lucene.search.highlight.Highlighter; 14 import org.apache.lucene.search.highlight.QueryScorer; 15 import org.apache.lucene.search.highlight.SimpleFragmenter; 16 17 public class Searcher { 18 19 static Query query; 20 static Hits hits; 21 22 private static final String FIELD_NAME = "contents"; 23 private static final String indexDir = "/opt/dynamo/prod/hww-doc/hww/help/index"; 24 25 private static Analyzer analyzer = new StandardAnalyzer(); 26 27 public static void main(String[] args) throws Exception { 28 29 IndexSearcher is = new IndexSearcher(indexDir); 30 String searchCriteria = "scholarly"; 31 query = QueryParser.parse(searchCriteria, "contents", analyzer); 32 33 hits = is.search(query); 34 System.out.println("found in: " + query +"\nhits-length:" +hits.length()); 35 36 doStandardHighlights(); 37 38 is.close(); 39 } 40 41 static void doStandardHighlights() throws Exception { 42 Highlighter highlighter = new Highlighter(new MyBolder(), new QueryScorer(query)); 43 System.out.println("Highlighter: " + highlighter +"\nhits-length:" +hits.length()); 44 highlighter.setTextFragmenter(new SimpleFragmenter(20)); 45 for (int i = 0; i < hits.length(); i++) { 46 System.out.println("URL " + (i + 1) + ": " + hits.doc(i).getField("path").stringValue()); 47 String text = hits.doc(i).get("FIELD_NAME"); 48 int maxNumFragmentsRequired = 2; 49 String fragmentSeparator = "..."; 50 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); 51 52 String result = 53 highlighter.getBestFragments( 54 tokenStream, 55 text, 56 maxNumFragmentsRequired, 57 fragmentSeparator); 58 System.out.println("\tfound in: " + result); 59 } 60 } 61 62 private static class MyBolder implements Formatter { 63 public String highlightTerm(String originalText , TokenGroup group) 64 { 65 if(group.getTotalScore()<=0) 66 { 67 return originalText; 68 } 69 return "<b>" + originalText + "</b>"; 70 } 71 } 72 73 } Yagnesh N. Shah Senior Technology Engineer CS Dept., 4th Floor H. W. Wilson 950 University Avenue, Bronx NY 10452 (718) 588 8400 x2721 http://www.hwwilson.com --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]