Errors while running LIA code.
Hi I am trying to run a code from Lucene In Action, but it generate some errors.There is one one warning at compilation time and the errors generate at run time. Given below the code and errors. Kindly give me some clue. thanks... *_Code:_* ///package lia.handlingtypes.xml; import lia.handlingtypes.framework.DocumentHandler; import lia.handlingtypes.framework.DocumentHandlerException; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.SAXException; import org.xml.sax.Attributes; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import javax.xml.parsers.ParserConfigurationException; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.FileInputStream; import java.util.Iterator; import java.util.HashMap; public class SAXXMLHandler extends DefaultHandler implements DocumentHandler { /** A buffer for each XML element */ private StringBuffer elementBuffer = new StringBuffer(); private HashMap attributeMap; private Document doc; public Document getDocument(InputStream is) throws DocumentHandlerException { SAXParserFactory spf = SAXParserFactory.newInstance(); try { SAXParser parser = spf.newSAXParser(); parser.parse(is, this); } catch (IOException e) { throw new DocumentHandlerException( "Cannot parse XML document", e); } catch (ParserConfigurationException e) { throw new DocumentHandlerException( "Cannot parse XML document", e); } catch (SAXException e) { throw new DocumentHandlerException( "Cannot parse XML document", e); } return doc; } public void startDocument() { doc = new Document(); } public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { elementBuffer.setLength(0); attributeMap.clear(); if (atts.getLength() > 0) { attributeMap = new HashMap(); for (int i = 0; i < atts.getLength(); i++) { attributeMap.put(atts.getQName(i), atts.getValue(i)); } } } public void characters(char[] text, int start, int length) { elementBuffer.append(text, start, length); } public void endElement(String uri, String localName, String qName) throws SAXException { if (qName.equals("address-book")) { return; } else if (qName.equals("contact")) { Iterator iter = attributeMap.keySet().iterator(); while (iter.hasNext()) { String attName = (String) iter.next(); String attValue = (String) attributeMap.get(attName); doc.add(new Field(attName, attValue,Field.Store.YES,Field.Index.TOKENIZED)); } } else { doc.add(new Field(qName, elementBuffer.toString(),Field.Store.YES,Field.Index.TOKENIZED)); } } public static void main(String args[]) throws Exception { SAXXMLHandler handler = new SAXXMLHandler(); //File file = new File ("d:\\addressbook.xml"); Document doc = handler.getDocument(new FileInputStream(new File(args[0]))); //Document doc = handler.getDocument(new FileInputStream(file)); System.out.println(doc); } } / _*Errors: *_/D:\>java SAXXMLHandler d:\addressbook.xml Exception in thread "main" java.lang.NullPointerException at SAXXMLHandler.startElement(SAXXMLHandler.java:66) at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.startEle ment(Unknown Source) at com.sun.org.apache.xerces.internal.impl.dtd.XMLDTDValidator.startElem ent(Unknown Source) at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImp l.scanStartElement(Unknown Source) at com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl$Conten tDriver.scanRootElementHook(Unknown Source) at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImp l$FragmentContentDriver.next(Unknown Source) at com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl$Prolog Driver.next(Unknown Source) at com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(U nknown Source) at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImp l.scanDocument(Unknown Source) at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(U nknown Source) at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(U nknown Source) at com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(Unknown So urce) at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(Un known Source) at com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.p arse(Unknown Source) at javax.xml.parsers.SAXParser.parse(Unknown Source) at javax.xml.parsers.SAXParser.parse(Unknown Source) at SAXXMLHandler.getDocument(SAXXMLHandler.java:39) at SAXXMLHandler.main(SAXXMLHandler.java:102) /_* *_ -
Re: Errors while running LIA code.
See this thread for one suggestion: http://www.gossamer-threads.com/lists/lucene/java-user/55465 Mike "Liaqat Ali" <[EMAIL PROTECTED]> wrote: > Hi > > I am trying to run a code from Lucene In Action, but it generate some > errors.There is one one warning at compilation time and the errors > generate at run time. Given below the code and errors. Kindly give me > some clue. thanks... > > *_Code:_* > > ///package lia.handlingtypes.xml; > import lia.handlingtypes.framework.DocumentHandler; > import lia.handlingtypes.framework.DocumentHandlerException; > import org.xml.sax.helpers.DefaultHandler; > import org.xml.sax.SAXException; > import org.xml.sax.Attributes; > import javax.xml.parsers.SAXParser; > import javax.xml.parsers.SAXParserFactory; > import javax.xml.parsers.ParserConfigurationException; > import org.apache.lucene.document.Document; > import org.apache.lucene.document.Field; > import java.io.File; > import java.io.IOException; > import java.io.InputStream; > import java.io.FileInputStream; > import java.util.Iterator; > import java.util.HashMap; > > public class SAXXMLHandler > extends DefaultHandler implements DocumentHandler { > > /** A buffer for each XML element */ > private StringBuffer elementBuffer = new StringBuffer(); > private HashMap attributeMap; > > private Document doc; > > public Document getDocument(InputStream is) > throws DocumentHandlerException { > > SAXParserFactory spf = SAXParserFactory.newInstance(); > try { > SAXParser parser = spf.newSAXParser(); > parser.parse(is, this); > } > catch (IOException e) { > throw new DocumentHandlerException( > "Cannot parse XML document", e); > } > catch (ParserConfigurationException e) { > throw new DocumentHandlerException( > "Cannot parse XML document", e); > } > catch (SAXException e) { > throw new DocumentHandlerException( > "Cannot parse XML document", e); > } > > return doc; > } > > public void startDocument() { > doc = new Document(); > } > > public void startElement(String uri, String localName, > String qName, Attributes atts) > throws SAXException { > > elementBuffer.setLength(0); > attributeMap.clear(); > if (atts.getLength() > 0) { > attributeMap = new HashMap(); > for (int i = 0; i < atts.getLength(); i++) { > attributeMap.put(atts.getQName(i), atts.getValue(i)); > } > } > } > > public void characters(char[] text, int start, int length) { > elementBuffer.append(text, start, length); > } > > public void endElement(String uri, String localName, String qName) > throws SAXException { > if (qName.equals("address-book")) { > return; > } > else if (qName.equals("contact")) { > Iterator iter = attributeMap.keySet().iterator(); > while (iter.hasNext()) { > String attName = (String) iter.next(); > String attValue = (String) attributeMap.get(attName); > doc.add(new Field(attName, > attValue,Field.Store.YES,Field.Index.TOKENIZED)); > } > } > else { > doc.add(new Field(qName, > elementBuffer.toString(),Field.Store.YES,Field.Index.TOKENIZED)); > } > } > > public static void main(String args[]) throws Exception { > SAXXMLHandler handler = new SAXXMLHandler(); > > //File file = new File ("d:\\addressbook.xml"); > > Document doc = handler.getDocument(new FileInputStream(new > File(args[0]))); > > //Document doc = handler.getDocument(new FileInputStream(file)); > > System.out.println(doc); > } > } > / > > _*Errors: > > *_/D:\>java SAXXMLHandler d:\addressbook.xml > > Exception in thread "main" java.lang.NullPointerException > at SAXXMLHandler.startElement(SAXXMLHandler.java:66) > at > com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.startEle > ment(Unknown Source) > at > com.sun.org.apache.xerces.internal.impl.dtd.XMLDTDValidator.startElem > ent(Unknown Source) > at > com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImp > l.scanStartElement(Unknown Source) > at > com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl$Conten > tDriver.scanRootElementHook(Unknown Source) > at > com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImp > l$FragmentContentDriver.next(Unknown Source) > at > com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl$Prolog > Driver.next(Unknown Source) > at > com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(U > nknown Source) > at > com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImp > l.scanDocument(Unknown Source) > at > com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(U > nknown Source) > at > com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(U > nknown Source) > at
Re: Errors while running LIA code.
Michael McCandless wrote: See this thread for one suggestion: http://www.gossamer-threads.com/lists/lucene/java-user/55465 Mike "Liaqat Ali" <[EMAIL PROTECTED]> wrote: Hi I am trying to run a code from Lucene In Action, but it generate some errors.There is one one warning at compilation time and the errors generate at run time. Given below the code and errors. Kindly give me some clue. thanks... *_Code:_* ///package lia.handlingtypes.xml; import lia.handlingtypes.framework.DocumentHandler; import lia.handlingtypes.framework.DocumentHandlerException; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.SAXException; import org.xml.sax.Attributes; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import javax.xml.parsers.ParserConfigurationException; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.FileInputStream; import java.util.Iterator; import java.util.HashMap; public class SAXXMLHandler extends DefaultHandler implements DocumentHandler { /** A buffer for each XML element */ private StringBuffer elementBuffer = new StringBuffer(); private HashMap attributeMap; private Document doc; public Document getDocument(InputStream is) throws DocumentHandlerException { SAXParserFactory spf = SAXParserFactory.newInstance(); try { SAXParser parser = spf.newSAXParser(); parser.parse(is, this); } catch (IOException e) { throw new DocumentHandlerException( "Cannot parse XML document", e); } catch (ParserConfigurationException e) { throw new DocumentHandlerException( "Cannot parse XML document", e); } catch (SAXException e) { throw new DocumentHandlerException( "Cannot parse XML document", e); } return doc; } public void startDocument() { doc = new Document(); } public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { elementBuffer.setLength(0); attributeMap.clear(); if (atts.getLength() > 0) { attributeMap = new HashMap(); for (int i = 0; i < atts.getLength(); i++) { attributeMap.put(atts.getQName(i), atts.getValue(i)); } } } public void characters(char[] text, int start, int length) { elementBuffer.append(text, start, length); } public void endElement(String uri, String localName, String qName) throws SAXException { if (qName.equals("address-book")) { return; } else if (qName.equals("contact")) { Iterator iter = attributeMap.keySet().iterator(); while (iter.hasNext()) { String attName = (String) iter.next(); String attValue = (String) attributeMap.get(attName); doc.add(new Field(attName, attValue,Field.Store.YES,Field.Index.TOKENIZED)); } } else { doc.add(new Field(qName, elementBuffer.toString(),Field.Store.YES,Field.Index.TOKENIZED)); } } public static void main(String args[]) throws Exception { SAXXMLHandler handler = new SAXXMLHandler(); //File file = new File ("d:\\addressbook.xml"); Document doc = handler.getDocument(new FileInputStream(new File(args[0]))); //Document doc = handler.getDocument(new FileInputStream(file)); System.out.println(doc); } } / _*Errors: *_/D:\>java SAXXMLHandler d:\addressbook.xml Exception in thread "main" java.lang.NullPointerException at SAXXMLHandler.startElement(SAXXMLHandler.java:66) at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.startEle ment(Unknown Source) at com.sun.org.apache.xerces.internal.impl.dtd.XMLDTDValidator.startElem ent(Unknown Source) at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImp l.scanStartElement(Unknown Source) at com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl$Conten tDriver.scanRootElementHook(Unknown Source) at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImp l$FragmentContentDriver.next(Unknown Source) at com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl$Prolog Driver.next(Unknown Source) at com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(U nknown Source) at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImp l.scanDocument(Unknown Source) at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(U nknown Source) at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(U nknown Source) at com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(Unknown So urce) at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(Un known Source) at com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.p arse
Re: Errors while running LIA code.
Wow, sure enough there is a bug in LIA's SAXXMLHandler! After all these years! We did not have it registered to run by default in the examples - it uses the Digester implementation instead of SAX. Mike's suggested fix works fine for me, changing the attributeMap declaration to be this: private HashMap attributeMap = new HashMap(); Here's how I ran this: 1) Downloaded http://www.ehatchersolutions.com/downloads/ LuceneInAction.zip - sorry, lucenebook.com is broken at the moment :( 2) Unzipped it, ran "ant" to build the base indexes. Ran "ant test" to verify all was working fine. 3) Ran "ant ExtensionHandler" to run the handling test, and put in "src/lia/handlingtypes/data/addressbook.xml". That works because the XML handler is set to Digester. If you change src/lia/ handlingtypes/framework/handler.properties to have xml = lia.handlingtypes.xml.SAXXMLHandler instead, it'll fail until you add the above "new HashMap()" to the mix. Erik p.s. Otis!! :) On Dec 6, 2007, at 5:06 AM, Liaqat Ali wrote: Michael McCandless wrote: See this thread for one suggestion: http://www.gossamer-threads.com/lists/lucene/java-user/55465 Mike "Liaqat Ali" <[EMAIL PROTECTED]> wrote: Hi I am trying to run a code from Lucene In Action, but it generate some errors.There is one one warning at compilation time and the errors generate at run time. Given below the code and errors. Kindly give me some clue. thanks... *_Code:_* ///package lia.handlingtypes.xml; import lia.handlingtypes.framework.DocumentHandler; import lia.handlingtypes.framework.DocumentHandlerException; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.SAXException; import org.xml.sax.Attributes; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import javax.xml.parsers.ParserConfigurationException; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.FileInputStream; import java.util.Iterator; import java.util.HashMap; public class SAXXMLHandler extends DefaultHandler implements DocumentHandler { /** A buffer for each XML element */ private StringBuffer elementBuffer = new StringBuffer(); private HashMap attributeMap; private Document doc; public Document getDocument(InputStream is) throws DocumentHandlerException { SAXParserFactory spf = SAXParserFactory.newInstance(); try { SAXParser parser = spf.newSAXParser(); parser.parse(is, this); } catch (IOException e) { throw new DocumentHandlerException( "Cannot parse XML document", e); } catch (ParserConfigurationException e) { throw new DocumentHandlerException( "Cannot parse XML document", e); } catch (SAXException e) { throw new DocumentHandlerException( "Cannot parse XML document", e); } return doc; } public void startDocument() { doc = new Document(); } public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { elementBuffer.setLength(0); attributeMap.clear(); if (atts.getLength() > 0) { attributeMap = new HashMap(); for (int i = 0; i < atts.getLength(); i++) { attributeMap.put(atts.getQName(i), atts.getValue(i)); } } } public void characters(char[] text, int start, int length) { elementBuffer.append(text, start, length); } public void endElement(String uri, String localName, String qName) throws SAXException { if (qName.equals("address-book")) { return; } else if (qName.equals("contact")) { Iterator iter = attributeMap.keySet().iterator(); while (iter.hasNext()) { String attName = (String) iter.next(); String attValue = (String) attributeMap.get(attName); doc.add(new Field(attName, attValue,Field.Store.YES,Field.Index.TOKENIZED)); } } else { doc.add(new Field(qName, elementBuffer.toString (),Field.Store.YES,Field.Index.TOKENIZED)); } } public static void main(String args[]) throws Exception { SAXXMLHandler handler = new SAXXMLHandler(); //File file = new File ("d:\\addressbook.xml"); Document doc = handler.getDocument(new FileInputStream(new File(args[0]))); //Document doc = handler.getDocument(new FileInputStream(file)); System.out.println(doc); } } / _*Errors: *_/D:\>java SAXXMLHandler d:\addressbook.xml Exception in thread "main" java.lang.NullPointerException at SAXXMLHandler.startElement(SAXXMLHandler.java:66) at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.startEl e ment(Unknown Source) at com.sun.org.apache.xerces.internal.impl.dtd.XMLDTDValidator.startEle m ent(Unknown Source) at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerIm p
Re: Errors while running LIA code.
>1) Downloaded http://www.ehatchersolutions.com/downloads/ > LuceneInAction.zip - sorry, lucenebook.com is broken at the moment :( This one works too - http://www.manning.com/hatcher2/ --> Downloads --> Source Code - To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]
best practices for reloading an index for a searcher
I did some searching on the lucene site and wiki, but didn't quite find what I was looking for in regards to a basic approach to how and when to reload index data. I have a long running process that will be continually indexing and concurrently searching the same index and I'm looking for a basic approach to reloading the data for the searcher. Perhaps I should verify that the data does need to be reloaded. Does a manual reload of a changing index need to be performed for a searcher? I'd appreciate any links to documents, source or some quick pseudo code. Thanks. -Nathan -- CONFIDENTIALITY NOTICE This message and any included attachments are from Cerner Corporation and are intended only for the addressee. The information contained in this message is confidential and may constitute inside or non-public information under international, federal, or state securities laws. Unauthorized forwarding, printing, copying, distribution, or use of such information is strictly prohibited and may be unlawful. If you are not the addressee, please promptly delete this message and notify the sender of the delivery error by e-mail or you may call Cerner's corporate offices in Kansas City, Missouri, U.S.A at (+1) (816)221-1024. - To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]
Re: best practices for reloading an index for a searcher
If by reload you mean closing and opening the reader, then yes. You need to do this in order to see the changes since the *last* time you opened the reader. Think of it as the reader taking a snapshot of the index and using that for its lifetime. Be aware that opening a reader (and running the first searches) is an expensive operation and shouldn't be done unnecessarily. I guess it all depends upon your requirements for how soon changes are available for search. As for guidelines, to quote the famous wise ones it depends. The real question is how quickly your users require seeing recent changes. One technique for handling these updates is to periodically open a new reader *without* closing the old one. Fire a few warm-up queries at the new searcher, then close the old one and start using the new one. There are some coordination issues here. Also be aware that this will chew up some disk space Hope this helps Erick On Dec 6, 2007 12:43 PM, Beyer,Nathan <[EMAIL PROTECTED]> wrote: > I did some searching on the lucene site and wiki, but didn't quite find > what I was looking for in regards to a basic approach to how and when to > reload index data. I have a long running process that will be > continually indexing and concurrently searching the same index and I'm > looking for a basic approach to reloading the data for the searcher. > Perhaps I should verify that the data does need to be reloaded. Does a > manual reload of a changing index need to be performed for a searcher? > > I'd appreciate any links to documents, source or some quick pseudo code. > > Thanks. > > -Nathan > > -- > CONFIDENTIALITY NOTICE This message and any included attachments are from > Cerner Corporation and are intended only for the addressee. The information > contained in this message is confidential and may constitute inside or > non-public information under international, federal, or state securities > laws. Unauthorized forwarding, printing, copying, distribution, or use of > such information is strictly prohibited and may be unlawful. If you are not > the addressee, please promptly delete this message and notify the sender of > the delivery error by e-mail or you may call Cerner's corporate offices in > Kansas City, Missouri, U.S.A at (+1) (816)221-1024. > > - > To unsubscribe, e-mail: [EMAIL PROTECTED] > For additional commands, e-mail: [EMAIL PROTECTED] > >
Re: Boost One Term Query
Thanks for the response Hoss. The score I receive is from the Explaination object. The score stays the same regardless of how I boost the single term. The score of the query: apple Is the same as the score of the query: apple^3 I am surprised by the result of the test. Would you expect "apple" and "apple^3" to receive the same score? Thanks hossman wrote: > > > first off: if you are looking at the score from the "Hits" class, bear in > mind they are "psuedo-normalized" and don't mean much. > > second: a "query" doesn't have a score, a document has a score relative to > a query ... scores can't be compared between different queries. > > third: there is a "queryNorm" that comes into play, it's designed to keep > scores "managable" you can read more about it (and how to change it if you > want) in the scoring documentation. you should also look at the > "Explanation" info for each query/doc to make sure you understand what's > going on. > > > > : For example: > : apple > : > : Has the same score as: > : apple^3 > : > : But repeating the term will up the score > : apple apple apple > : > : I expected the score to go up when boosting a one term query. Is that a > : wrong expectation? > > > > -Hoss > > > - > To unsubscribe, e-mail: [EMAIL PROTECTED] > For additional commands, e-mail: [EMAIL PROTECTED] > > > -- View this message in context: http://www.nabble.com/Boost-One-Term-Query-tf4900128.html#a14199255 Sent from the Lucene - Java Users mailing list archive at Nabble.com. - To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]
RE: best practices for reloading an index for a searcher
That does help, thank you. Does closing a reader, writer or searcher close the underlying Directory? I've been operating under the assumption that it does not and that I should be sharing the Directory instance as much as possible. -Nathan -Original Message- From: Erick Erickson [mailto:[EMAIL PROTECTED] Sent: Thursday, December 06, 2007 12:10 PM To: java-user@lucene.apache.org Subject: Re: best practices for reloading an index for a searcher If by reload you mean closing and opening the reader, then yes. You need to do this in order to see the changes since the *last* time you opened the reader. Think of it as the reader taking a snapshot of the index and using that for its lifetime. Be aware that opening a reader (and running the first searches) is an expensive operation and shouldn't be done unnecessarily. I guess it all depends upon your requirements for how soon changes are available for search. As for guidelines, to quote the famous wise ones it depends. The real question is how quickly your users require seeing recent changes. One technique for handling these updates is to periodically open a new reader *without* closing the old one. Fire a few warm-up queries at the new searcher, then close the old one and start using the new one. There are some coordination issues here. Also be aware that this will chew up some disk space Hope this helps Erick On Dec 6, 2007 12:43 PM, Beyer,Nathan <[EMAIL PROTECTED]> wrote: > I did some searching on the lucene site and wiki, but didn't quite find > what I was looking for in regards to a basic approach to how and when to > reload index data. I have a long running process that will be > continually indexing and concurrently searching the same index and I'm > looking for a basic approach to reloading the data for the searcher. > Perhaps I should verify that the data does need to be reloaded. Does a > manual reload of a changing index need to be performed for a searcher? > > I'd appreciate any links to documents, source or some quick pseudo code. > > Thanks. > > -Nathan > -- CONFIDENTIALITY NOTICE This message and any included attachments are from Cerner Corporation and are intended only for the addressee. The information contained in this message is confidential and may constitute inside or non-public information under international, federal, or state securities laws. Unauthorized forwarding, printing, copying, distribution, or use of such information is strictly prohibited and may be unlawful. If you are not the addressee, please promptly delete this message and notify the sender of the delivery error by e-mail or you may call Cerner's corporate offices in Kansas City, Missouri, U.S.A at (+1) (816)221-1024. - To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]
Re: Boost One Term Query
On Dec 6, 2007 2:31 PM, java_user_ <[EMAIL PROTECTED]> wrote: > Thanks for the response Hoss. > > The score I receive is from the Explaination object. The score stays the > same regardless of how I boost the single term. > > The score of the query: > apple > > Is the same as the score of the query: > apple^3 This boosts apple 3 times in relation to the other query clauses. If there are no other query clauses, it's a bit meaningless. > I am surprised by the result of the test. Would you expect "apple" and > "apple^3" to receive the same score? Lucene does some "weighting" of the query that causes this to happen. class Query { [...] /** Expert: Constructs and initializes a Weight for a top-level query. */ public Weight weight(Searcher searcher) throws IOException { Query query = searcher.rewrite(this); Weight weight = query.createWeight(searcher); float sum = weight.sumOfSquaredWeights(); float norm = getSimilarity(searcher).queryNorm(sum); weight.normalize(norm); return weight; } Are you simply curious about this, or is it causing you a problem somehow? -Yonik - To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]
Re: Boost One Term Query
I was hoping to boost the entire query to give the query more weight compared to other queries. Instead of boosting my entire query, I may just multiply the resulting score by the weight (or something like that). Yonik Seeley wrote: > > On Dec 6, 2007 2:31 PM, java_user_ <[EMAIL PROTECTED]> wrote: >> Thanks for the response Hoss. >> >> The score I receive is from the Explaination object. The score stays the >> same regardless of how I boost the single term. >> >> The score of the query: >> apple >> >> Is the same as the score of the query: >> apple^3 > > This boosts apple 3 times in relation to the other query clauses. If > there are no other query clauses, it's a bit meaningless. > >> I am surprised by the result of the test. Would you expect "apple" and >> "apple^3" to receive the same score? > > Lucene does some "weighting" of the query that causes this to happen. > > class Query { [...] > /** Expert: Constructs and initializes a Weight for a top-level query. > */ > public Weight weight(Searcher searcher) > throws IOException { > Query query = searcher.rewrite(this); > Weight weight = query.createWeight(searcher); > float sum = weight.sumOfSquaredWeights(); > float norm = getSimilarity(searcher).queryNorm(sum); > weight.normalize(norm); > return weight; > } > > Are you simply curious about this, or is it causing you a problem somehow? > > -Yonik > > - > To unsubscribe, e-mail: [EMAIL PROTECTED] > For additional commands, e-mail: [EMAIL PROTECTED] > > > -- View this message in context: http://www.nabble.com/Boost-One-Term-Query-tf4900128.html#a14200211 Sent from the Lucene - Java Users mailing list archive at Nabble.com. - To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]
Conversion from Lucene 1.4.3 API to 2.2.0 API.
Hi, With Lucene 1.4.3, we had used this constructor for Field. What is its equivalent in Lucene 2.2.0? /** Constructs a String-valued Field that is tokenized and indexed, and is stored in the index, for return with hits. The tokens are generated from the reader */ public static final Field Text(String name, String value, Reader reader) { Field aField = Text(name, value); aField.readerValue = reader; return aField; } Thanks -- View this message in context: http://www.nabble.com/Conversion-from-Lucene-1.4.3-API-to-2.2.0-API.-tf4958346.html#a14200220 Sent from the Lucene - Java Users mailing list archive at Nabble.com. - To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]
Custom SynonymMap
Is there a way to add synonyms to the SynonymMap map? The HashMap that holds all the words is not visible (private) so extending it will not work. Has anyone added their own custom vocabulary? -- View this message in context: http://www.nabble.com/Custom-SynonymMap-tf4958585.html#a14201041 Sent from the Lucene - Java Users mailing list archive at Nabble.com. - To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]
RE: Indexing XML documents (Urdu)
Hi Liaqat, I'd rather keep the email-thread on the lucene user list. The code I used is below, the thing to do is be careful when reading UTF-8 text so you don't garble it. import org.xml.sax.*; import org.xml.sax.helpers.DefaultHandler; import org.apache.lucene.document.*; import org.apache.lucene.index.*; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.search.*; import javax.xml.parsers.*; import java.io.*; public class testNonEnglishXML { public static void main(String[] args){ if(args.length < 3) { System.out.println("Usage: " ); System.exit(-1); } testNonEnglishXML idx = new testNonEnglishXML(); try { idx.index(args[0], args[1]); idx.search(args[1], args[2]); } catch(Exception e) { e.printStackTrace(); } } public void index(String filePath, String indexPath) throws IOException, UnsupportedEncodingException, ParserConfigurationException, SAXException { Document luceneDoc = new Document(); luceneDoc.add(new Field("name", filePath, Field.Store.YES, Field.Index.UN_TOKENIZED)); StringBuffer sb = new StringBuffer(1024); String line; BufferedReader reader = new BufferedReader( new InputStreamReader(new FileInputStream(new File(filePath)), "UTF-8") ); while( (line = reader.readLine()) != null) { sb.append(line); } luceneDoc.add(new Field("contents", sb.toString(), Field.Store.NO, Field.Index.TOKENIZED)); TestParser parser = new TestParser(luceneDoc); SAXParser saxParser = SAXParserFactory.newInstance().newSAXParser(); saxParser.parse(new FileInputStream(new File(filePath)), parser); IndexWriter writer = new IndexWriter(indexPath, new SimpleAnalyzer(), true); writer.addDocument(luceneDoc); writer.optimize(); writer.close(); } public void search(String indexPath, String queryFilePath) throws IOException { BufferedReader reader = new BufferedReader( new InputStreamReader(new FileInputStream(new File(queryFilePath)), "UTF-8") ); String queryString = new String( reader.readLine() ); reader.close(); IndexSearcher searcher = new IndexSearcher(FSDirectory.getDirectory(indexPath)); PhraseQuery query = new PhraseQuery(); query.add(new Term("contents", queryString)); Hits hits = searcher.search(query); if(hits.length()> 0) System.out.println("found " + hits.doc(0).getField("name")); } public class TestParser extends DefaultHandler { public TestParser(Document doc) { luceneDoc = doc; } public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { if(qName.equalsIgnoreCase("title")) buffer = new String(); } public void characters(char[] ch, int start, int length) throws SAXException { buffer += new String(ch, start, length); } public void endElement(String namespaceURI, String localName, String qName) throws SAXException { if(qName.equalsIgnoreCase("title")) luceneDoc.add(new Field(qName, buffer, Field.Store.YES, Field.Index.UN_TOKENIZED)); } public void endDocument() {} public void startDocument() {} public void error(SAXParseException e) {} public void fatalError(SAXParseException e) {} public void ignorableWhitespace(char[] ch, int start, int length) {} private Document luceneDoc = null; private String buffer = null; } } -Original Message- From: Liaqat Ali [mailto:[EMAIL PROTECTED] Sent: Thursday, December 06, 2007 1:42 AM To: Seneviratne, Yasoja Subject: Indexing XML documents (Urdu) Hello, I read your reply. I need some more help in this regard. As you saw the text (Urdu language). The whole XML file is a collection of 200 documents. I am at initial level as SAX is concerned. Can you kindly provide me the code how to extract textual information of each document plus its Doc number and title or some guidence, because my whole project has stuck because of this... Looking for your response Thanks.. Liaqat - To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]
Re: Boost One Term Query
I don't believe you can compare scores across queries in any meaningful way. This sounds a lot like you're trying to solve some problem and have decided that boosting and comparing scores across queries is the answer. in other words, the XY problem. Perhaps if you explained what you're trying to accomplish someone could suggest an alternative... Best Erick On Dec 6, 2007 3:12 PM, java_user_ <[EMAIL PROTECTED]> wrote: > > I was hoping to boost the entire query to give the query more weight > compared > to other queries. > > Instead of boosting my entire query, I may just multiply the resulting > score > by the weight (or something like that). > > > > Yonik Seeley wrote: > > > > On Dec 6, 2007 2:31 PM, java_user_ <[EMAIL PROTECTED]> wrote: > >> Thanks for the response Hoss. > >> > >> The score I receive is from the Explaination object. The score stays > the > >> same regardless of how I boost the single term. > >> > >> The score of the query: > >> apple > >> > >> Is the same as the score of the query: > >> apple^3 > > > > This boosts apple 3 times in relation to the other query clauses. If > > there are no other query clauses, it's a bit meaningless. > > > >> I am surprised by the result of the test. Would you expect "apple" and > >> "apple^3" to receive the same score? > > > > Lucene does some "weighting" of the query that causes this to happen. > > > > class Query { [...] > > /** Expert: Constructs and initializes a Weight for a top-level query. > > */ > > public Weight weight(Searcher searcher) > > throws IOException { > > Query query = searcher.rewrite(this); > > Weight weight = query.createWeight(searcher); > > float sum = weight.sumOfSquaredWeights(); > > float norm = getSimilarity(searcher).queryNorm(sum); > > weight.normalize(norm); > > return weight; > > } > > > > Are you simply curious about this, or is it causing you a problem > somehow? > > > > -Yonik > > > > - > > To unsubscribe, e-mail: [EMAIL PROTECTED] > > For additional commands, e-mail: [EMAIL PROTECTED] > > > > > > > > -- > View this message in context: > http://www.nabble.com/Boost-One-Term-Query-tf4900128.html#a14200211 > Sent from the Lucene - Java Users mailing list archive at Nabble.com. > > > - > To unsubscribe, e-mail: [EMAIL PROTECTED] > For additional commands, e-mail: [EMAIL PROTECTED] > >
Fwd: Can I search in realtime?
Hi, it's my first time to use lucene maillist. I have problem that when I add a document with IndexWriter , it searchable for the IndexSearcher instance which is creat before the document flush to index? if lucene can not do this,any suggest to solve this problem? Regards, cooc
Re: Error running Lucene in Action code
Thanks very much for your reply. I commented out the line: attributeMap.clear(); in the startElement() method, and the code ran! Thanks for your prompt reply, and my apologies for the delay in responding. All the best. Fayyaz Michael McCandless-2 wrote: > > > I think you need to initialize attributeMap, eg add " = new HashMap()" in > the declaration? > > Mike > > "syedfa" <[EMAIL PROTECTED]> wrote: >> >> Dear Fellow Java & Lucene developers: >> >> I am a Java developer learning lucene and I am currently going through >> the >> book Lucene in Action. At present, I am trying to run the sample code >> for >> indexing an xml document using sax. My code has been slightly updated >> for >> Lucene version 2.2: >> >> /* >> * To change this template, choose Tools | Templates >> * and open the template in the editor. >> */ >> >> package lucenexml; >> >> >> import java.io.File; >> import java.io.FileInputStream; >> import java.io.InputStream; >> import java.io.IOException; >> import java.util.HashMap; >> import java.util.Iterator; >> >> import org.xml.sax.helpers.DefaultHandler; >> import org.xml.sax.SAXException; >> import org.xml.sax.Attributes; >> >> import javax.xml.parsers.SAXParser; >> import javax.xml.parsers.SAXParserFactory; >> import javax.xml.parsers.ParserConfigurationException; >> >> >> import org.apache.lucene.document.Document; >> import org.apache.lucene.document.Field; >> >> /** >> * >> * @author fayyaz >> */ >> public class SAXXMLHandler extends DefaultHandler implements >> DocumentHandler{ >> >> private StringBuffer elementBuffer=new StringBuffer(); >> private HashMap attributeMap; >> >> private Document doc; >> /** >> * @param args the command line arguments >> */ >> public static void main(String[] args) throws Exception { >> // TODO code application logic here >> SAXXMLHandler handler=new SAXXMLHandler(); >> Document doc=handler.getDocument(new FileInputStream(new >> File(args[0]))); >> System.out.println(doc); >> } >> >> public Document getDocument(InputStream is) throws >> DocumentHandlerException{ >> >> SAXParserFactory spf=SAXParserFactory.newInstance(); >> >> try{ >> SAXParser parser=spf.newSAXParser(); >> parser.parse(is, this); >> >> } >> catch (IOException e){ >> throw new DocumentHandlerException("Cannot parse XML >> document", >> e); >> } >> catch (ParserConfigurationException e) { >> throw new DocumentHandlerException("Cannot parse XML >> document", >> e); >> } >> catch (SAXException e){ >> throw new DocumentHandlerException("Cannot parse XML >> document", >> e); >> } >> return doc; >> } >> >> public void startDocument(){ >> doc=new Document(); >> } >> >> public void startElement(String uri, String localName, String qName, >> Attributes atts) throws SAXException{ >> >> elementBuffer.setLength(0); >> attributeMap.clear(); >> if(atts.getLength()>0){ >> attributeMap=new HashMap(); >> for(int i=0; i> attributeMap.put(atts.getQName(i), atts.getValue(i)); >> } >> } >> } >> >> public void characters(char[] text, int start, int length){ >> elementBuffer.append(text, start, length); >> } >> >> public void endElement(String uri, String localName, String qName) >> throws SAXException{ >> if(qName.equals("address-book")){ >> return; >> } >> >> else if(qName.equals("contact")){ >> Iterator iter=attributeMap.keySet().iterator(); >> while(iter.hasNext()){ >> String attName=(String)iter.next(); >> String attValue=(String)attributeMap.get(attName); >> doc.add(new Field(qName, elementBuffer.toString(), >> Field.Store.YES,Field.Index.NO)); >> } >> } >> } >> } >> >> >> However, although the code compiles, I get the following runtime error >> when >> I pass the file addressbook.xml (which is used in the book) as a runtime >> argument: >> >> Exception in thread "main" java.lang.NullPointerException >> at lucenexml.SAXXMLHandler.startElement(SAXXMLHandler.java:81) >> at >> org.apache.xerces.parsers.AbstractSAXParser.startElement(Unknown >> Source) >> at >> org.apache.xerces.impl.dtd.XMLDTDValidator.startElement(Unknown >> Source) >> at >> org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanStartElement(Unknown >> Source) >> at >> org.apache.xerces.impl.XMLDocumentScannerImpl$ContentDispatcher.scanRootElementHook(Unknown >> Source) >> at >> org.apache.xerces.impl.XMLDocumentFragmentScannerImpl$FragmentContentDispatcher.dispa