Errors while running LIA code.

2007-12-06 Thread Liaqat Ali

Hi

I am trying to run a code from Lucene In Action, but it generate some 
errors.There is one one warning at compilation time and the errors 
generate at run time. Given below the code and errors. Kindly give me 
some clue. thanks...


*_Code:_*

///package lia.handlingtypes.xml;
import lia.handlingtypes.framework.DocumentHandler;
import lia.handlingtypes.framework.DocumentHandlerException;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.SAXException;
import org.xml.sax.Attributes;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.FileInputStream;
import java.util.Iterator;
import java.util.HashMap;

public class SAXXMLHandler
 extends DefaultHandler implements DocumentHandler {

 /** A buffer for each XML element */
 private StringBuffer elementBuffer = new StringBuffer();
 private HashMap attributeMap;

 private Document doc;

 public Document getDocument(InputStream is)
   throws DocumentHandlerException {

   SAXParserFactory spf = SAXParserFactory.newInstance();
   try {
 SAXParser parser = spf.newSAXParser();
 parser.parse(is, this);
   }
   catch (IOException e) {
 throw new DocumentHandlerException(
   "Cannot parse XML document", e);
   }
   catch (ParserConfigurationException e) {
 throw new DocumentHandlerException(
   "Cannot parse XML document", e);
   }
   catch (SAXException e) {
 throw new DocumentHandlerException(
   "Cannot parse XML document", e);
   }

   return doc;
 }

 public void startDocument() {
   doc = new Document();
 }

 public void startElement(String uri, String localName,
   String qName, Attributes atts)
   throws SAXException {

   elementBuffer.setLength(0);
   attributeMap.clear();
   if (atts.getLength() > 0) {
 attributeMap = new HashMap();
 for (int i = 0; i < atts.getLength(); i++) {
   attributeMap.put(atts.getQName(i), atts.getValue(i));
 }
   }
 }

 public void characters(char[] text, int start, int length) {
   elementBuffer.append(text, start, length);
 }

 public void endElement(String uri, String localName, String qName)
   throws SAXException {
   if (qName.equals("address-book")) {
 return;
   }
   else if (qName.equals("contact")) {
 Iterator iter = attributeMap.keySet().iterator();
 while (iter.hasNext()) {
   String attName = (String) iter.next();
   String attValue = (String) attributeMap.get(attName);
   doc.add(new Field(attName, 
attValue,Field.Store.YES,Field.Index.TOKENIZED));

 }
   }
   else {
 doc.add(new Field(qName, 
elementBuffer.toString(),Field.Store.YES,Field.Index.TOKENIZED));

   }
 }

 public static void main(String args[]) throws Exception {
   SAXXMLHandler handler = new SAXXMLHandler();

   //File file = new File ("d:\\addressbook.xml");

   Document doc = handler.getDocument(new FileInputStream(new 
File(args[0])));


   //Document doc = handler.getDocument(new FileInputStream(file));

   System.out.println(doc);
 }
}
/

_*Errors:

*_/D:\>java SAXXMLHandler d:\addressbook.xml

Exception in thread "main" java.lang.NullPointerException
   at SAXXMLHandler.startElement(SAXXMLHandler.java:66)
   at 
com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.startEle

ment(Unknown Source)
   at 
com.sun.org.apache.xerces.internal.impl.dtd.XMLDTDValidator.startElem

ent(Unknown Source)
   at 
com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImp

l.scanStartElement(Unknown Source)
   at 
com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl$Conten

tDriver.scanRootElementHook(Unknown Source)
   at 
com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImp

l$FragmentContentDriver.next(Unknown Source)
   at 
com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl$Prolog

Driver.next(Unknown Source)
   at 
com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(U

nknown Source)
   at 
com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImp

l.scanDocument(Unknown Source)
   at 
com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(U

nknown Source)
   at 
com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(U

nknown Source)
   at 
com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(Unknown So

urce)
   at 
com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(Un

known Source)
   at 
com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.p

arse(Unknown Source)
   at javax.xml.parsers.SAXParser.parse(Unknown Source)
   at javax.xml.parsers.SAXParser.parse(Unknown Source)
   at SAXXMLHandler.getDocument(SAXXMLHandler.java:39)
   at SAXXMLHandler.main(SAXXMLHandler.java:102)
/_*
*_

-

Re: Errors while running LIA code.

2007-12-06 Thread Michael McCandless

See this thread for one suggestion:

http://www.gossamer-threads.com/lists/lucene/java-user/55465

Mike

"Liaqat Ali" <[EMAIL PROTECTED]> wrote:
> Hi
> 
> I am trying to run a code from Lucene In Action, but it generate some 
> errors.There is one one warning at compilation time and the errors 
> generate at run time. Given below the code and errors. Kindly give me 
> some clue. thanks...
> 
> *_Code:_*
> 
> ///package lia.handlingtypes.xml;
> import lia.handlingtypes.framework.DocumentHandler;
> import lia.handlingtypes.framework.DocumentHandlerException;
> import org.xml.sax.helpers.DefaultHandler;
> import org.xml.sax.SAXException;
> import org.xml.sax.Attributes;
> import javax.xml.parsers.SAXParser;
> import javax.xml.parsers.SAXParserFactory;
> import javax.xml.parsers.ParserConfigurationException;
> import org.apache.lucene.document.Document;
> import org.apache.lucene.document.Field;
> import java.io.File;
> import java.io.IOException;
> import java.io.InputStream;
> import java.io.FileInputStream;
> import java.util.Iterator;
> import java.util.HashMap;
> 
> public class SAXXMLHandler
>   extends DefaultHandler implements DocumentHandler {
> 
>   /** A buffer for each XML element */
>   private StringBuffer elementBuffer = new StringBuffer();
>   private HashMap attributeMap;
> 
>   private Document doc;
> 
>   public Document getDocument(InputStream is)
> throws DocumentHandlerException {
> 
> SAXParserFactory spf = SAXParserFactory.newInstance();
> try {
>   SAXParser parser = spf.newSAXParser();
>   parser.parse(is, this);
> }
> catch (IOException e) {
>   throw new DocumentHandlerException(
> "Cannot parse XML document", e);
> }
> catch (ParserConfigurationException e) {
>   throw new DocumentHandlerException(
> "Cannot parse XML document", e);
> }
> catch (SAXException e) {
>   throw new DocumentHandlerException(
> "Cannot parse XML document", e);
> }
> 
> return doc;
>   }
> 
>   public void startDocument() {
> doc = new Document();
>   }
> 
>   public void startElement(String uri, String localName,
> String qName, Attributes atts)
> throws SAXException {
> 
> elementBuffer.setLength(0);
> attributeMap.clear();
> if (atts.getLength() > 0) {
>   attributeMap = new HashMap();
>   for (int i = 0; i < atts.getLength(); i++) {
> attributeMap.put(atts.getQName(i), atts.getValue(i));
>   }
> }
>   }
> 
>   public void characters(char[] text, int start, int length) {
> elementBuffer.append(text, start, length);
>   }
> 
>   public void endElement(String uri, String localName, String qName)
> throws SAXException {
> if (qName.equals("address-book")) {
>   return;
> }
> else if (qName.equals("contact")) {
>   Iterator iter = attributeMap.keySet().iterator();
>   while (iter.hasNext()) {
> String attName = (String) iter.next();
> String attValue = (String) attributeMap.get(attName);
> doc.add(new Field(attName, 
> attValue,Field.Store.YES,Field.Index.TOKENIZED));
>   }
> }
> else {
>   doc.add(new Field(qName, 
> elementBuffer.toString(),Field.Store.YES,Field.Index.TOKENIZED));
> }
>   }
> 
>   public static void main(String args[]) throws Exception {
> SAXXMLHandler handler = new SAXXMLHandler();
> 
> //File file = new File ("d:\\addressbook.xml");
> 
> Document doc = handler.getDocument(new FileInputStream(new 
> File(args[0])));
> 
> //Document doc = handler.getDocument(new FileInputStream(file));
> 
> System.out.println(doc);
>   }
> }
> /
> 
> _*Errors:
> 
> *_/D:\>java SAXXMLHandler d:\addressbook.xml
> 
> Exception in thread "main" java.lang.NullPointerException
> at SAXXMLHandler.startElement(SAXXMLHandler.java:66)
> at 
> com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.startEle
> ment(Unknown Source)
> at 
> com.sun.org.apache.xerces.internal.impl.dtd.XMLDTDValidator.startElem
> ent(Unknown Source)
> at 
> com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImp
> l.scanStartElement(Unknown Source)
> at 
> com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl$Conten
> tDriver.scanRootElementHook(Unknown Source)
> at 
> com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImp
> l$FragmentContentDriver.next(Unknown Source)
> at 
> com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl$Prolog
> Driver.next(Unknown Source)
> at 
> com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(U
> nknown Source)
> at 
> com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImp
> l.scanDocument(Unknown Source)
> at 
> com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(U
> nknown Source)
> at 
> com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(U
> nknown Source)
> at

Re: Errors while running LIA code.

2007-12-06 Thread Liaqat Ali

Michael McCandless wrote:

See this thread for one suggestion:

http://www.gossamer-threads.com/lists/lucene/java-user/55465

Mike

"Liaqat Ali" <[EMAIL PROTECTED]> wrote:
  

Hi

I am trying to run a code from Lucene In Action, but it generate some 
errors.There is one one warning at compilation time and the errors 
generate at run time. Given below the code and errors. Kindly give me 
some clue. thanks...


*_Code:_*

///package lia.handlingtypes.xml;
import lia.handlingtypes.framework.DocumentHandler;
import lia.handlingtypes.framework.DocumentHandlerException;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.SAXException;
import org.xml.sax.Attributes;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.FileInputStream;
import java.util.Iterator;
import java.util.HashMap;

public class SAXXMLHandler
  extends DefaultHandler implements DocumentHandler {

  /** A buffer for each XML element */
  private StringBuffer elementBuffer = new StringBuffer();
  private HashMap attributeMap;

  private Document doc;

  public Document getDocument(InputStream is)
throws DocumentHandlerException {

SAXParserFactory spf = SAXParserFactory.newInstance();
try {
  SAXParser parser = spf.newSAXParser();
  parser.parse(is, this);
}
catch (IOException e) {
  throw new DocumentHandlerException(
"Cannot parse XML document", e);
}
catch (ParserConfigurationException e) {
  throw new DocumentHandlerException(
"Cannot parse XML document", e);
}
catch (SAXException e) {
  throw new DocumentHandlerException(
"Cannot parse XML document", e);
}

return doc;
  }

  public void startDocument() {
doc = new Document();
  }

  public void startElement(String uri, String localName,
String qName, Attributes atts)
throws SAXException {

elementBuffer.setLength(0);
attributeMap.clear();
if (atts.getLength() > 0) {
  attributeMap = new HashMap();
  for (int i = 0; i < atts.getLength(); i++) {
attributeMap.put(atts.getQName(i), atts.getValue(i));
  }
}
  }

  public void characters(char[] text, int start, int length) {
elementBuffer.append(text, start, length);
  }

  public void endElement(String uri, String localName, String qName)
throws SAXException {
if (qName.equals("address-book")) {
  return;
}
else if (qName.equals("contact")) {
  Iterator iter = attributeMap.keySet().iterator();
  while (iter.hasNext()) {
String attName = (String) iter.next();
String attValue = (String) attributeMap.get(attName);
doc.add(new Field(attName, 
attValue,Field.Store.YES,Field.Index.TOKENIZED));

  }
}
else {
  doc.add(new Field(qName, 
elementBuffer.toString(),Field.Store.YES,Field.Index.TOKENIZED));

}
  }

  public static void main(String args[]) throws Exception {
SAXXMLHandler handler = new SAXXMLHandler();

//File file = new File ("d:\\addressbook.xml");

Document doc = handler.getDocument(new FileInputStream(new 
File(args[0])));


//Document doc = handler.getDocument(new FileInputStream(file));

System.out.println(doc);
  }
}
/

_*Errors:

*_/D:\>java SAXXMLHandler d:\addressbook.xml

Exception in thread "main" java.lang.NullPointerException
at SAXXMLHandler.startElement(SAXXMLHandler.java:66)
at 
com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.startEle

ment(Unknown Source)
at 
com.sun.org.apache.xerces.internal.impl.dtd.XMLDTDValidator.startElem

ent(Unknown Source)
at 
com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImp

l.scanStartElement(Unknown Source)
at 
com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl$Conten

tDriver.scanRootElementHook(Unknown Source)
at 
com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImp

l$FragmentContentDriver.next(Unknown Source)
at 
com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl$Prolog

Driver.next(Unknown Source)
at 
com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(U

nknown Source)
at 
com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImp

l.scanDocument(Unknown Source)
at 
com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(U

nknown Source)
at 
com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(U

nknown Source)
at 
com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(Unknown So

urce)
at 
com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(Un

known Source)
at 
com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.p

arse

Re: Errors while running LIA code.

2007-12-06 Thread Erik Hatcher
Wow, sure enough there is a bug in LIA's SAXXMLHandler!   After all  
these years!   We did not have it registered to run by default in the  
examples - it uses the Digester implementation instead of SAX.


Mike's suggested fix works fine for me, changing the attributeMap  
declaration to be this:


  private HashMap attributeMap = new HashMap();

Here's how I ran this:

  1) Downloaded http://www.ehatchersolutions.com/downloads/ 
LuceneInAction.zip - sorry, lucenebook.com is broken at the moment :(


  2) Unzipped it, ran "ant" to build the base indexes.  Ran "ant  
test" to verify all was working fine.


  3) Ran "ant ExtensionHandler" to run the handling test, and put in  
"src/lia/handlingtypes/data/addressbook.xml".   That works because  
the XML handler is set to Digester.  If you change src/lia/ 
handlingtypes/framework/handler.properties to have


   xml  = lia.handlingtypes.xml.SAXXMLHandler

instead, it'll fail until you add the above "new HashMap()" to the mix.

Erik

p.s. Otis!!  :)


On Dec 6, 2007, at 5:06 AM, Liaqat Ali wrote:


Michael McCandless wrote:

See this thread for one suggestion:

http://www.gossamer-threads.com/lists/lucene/java-user/55465

Mike

"Liaqat Ali" <[EMAIL PROTECTED]> wrote:


Hi

I am trying to run a code from Lucene In Action, but it generate  
some errors.There is one one warning at compilation time and the  
errors generate at run time. Given below the code and errors.  
Kindly give me some clue. thanks...


*_Code:_*

///package lia.handlingtypes.xml;
import lia.handlingtypes.framework.DocumentHandler;
import lia.handlingtypes.framework.DocumentHandlerException;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.SAXException;
import org.xml.sax.Attributes;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.FileInputStream;
import java.util.Iterator;
import java.util.HashMap;

public class SAXXMLHandler
  extends DefaultHandler implements DocumentHandler {

  /** A buffer for each XML element */
  private StringBuffer elementBuffer = new StringBuffer();
  private HashMap attributeMap;

  private Document doc;

  public Document getDocument(InputStream is)
throws DocumentHandlerException {

SAXParserFactory spf = SAXParserFactory.newInstance();
try {
  SAXParser parser = spf.newSAXParser();
  parser.parse(is, this);
}
catch (IOException e) {
  throw new DocumentHandlerException(
"Cannot parse XML document", e);
}
catch (ParserConfigurationException e) {
  throw new DocumentHandlerException(
"Cannot parse XML document", e);
}
catch (SAXException e) {
  throw new DocumentHandlerException(
"Cannot parse XML document", e);
}

return doc;
  }

  public void startDocument() {
doc = new Document();
  }

  public void startElement(String uri, String localName,
String qName, Attributes atts)
throws SAXException {

elementBuffer.setLength(0);
attributeMap.clear();
if (atts.getLength() > 0) {
  attributeMap = new HashMap();
  for (int i = 0; i < atts.getLength(); i++) {
attributeMap.put(atts.getQName(i), atts.getValue(i));
  }
}
  }

  public void characters(char[] text, int start, int length) {
elementBuffer.append(text, start, length);
  }

  public void endElement(String uri, String localName, String qName)
throws SAXException {
if (qName.equals("address-book")) {
  return;
}
else if (qName.equals("contact")) {
  Iterator iter = attributeMap.keySet().iterator();
  while (iter.hasNext()) {
String attName = (String) iter.next();
String attValue = (String) attributeMap.get(attName);
doc.add(new Field(attName,  
attValue,Field.Store.YES,Field.Index.TOKENIZED));

  }
}
else {
  doc.add(new Field(qName, elementBuffer.toString 
(),Field.Store.YES,Field.Index.TOKENIZED));

}
  }

  public static void main(String args[]) throws Exception {
SAXXMLHandler handler = new SAXXMLHandler();

//File file = new File ("d:\\addressbook.xml");

Document doc = handler.getDocument(new FileInputStream(new  
File(args[0])));


//Document doc = handler.getDocument(new FileInputStream(file));

System.out.println(doc);
  }
}
/

_*Errors:

*_/D:\>java SAXXMLHandler d:\addressbook.xml

Exception in thread "main" java.lang.NullPointerException
at SAXXMLHandler.startElement(SAXXMLHandler.java:66)
at  
com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.startEl 
e

ment(Unknown Source)
at  
com.sun.org.apache.xerces.internal.impl.dtd.XMLDTDValidator.startEle 
m

ent(Unknown Source)
at  
com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerIm 
p

Re: Errors while running LIA code.

2007-12-06 Thread Doron Cohen
>1) Downloaded http://www.ehatchersolutions.com/downloads/
> LuceneInAction.zip - sorry, lucenebook.com is broken at the moment :(

This one works too -
http://www.manning.com/hatcher2/ --> Downloads --> Source Code


-
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]



best practices for reloading an index for a searcher

2007-12-06 Thread Beyer,Nathan
I did some searching on the lucene site and wiki, but didn't quite find
what I was looking for in regards to a basic approach to how and when to
reload index data. I have a long running process that will be
continually indexing and concurrently searching the same index and I'm
looking for a basic approach to reloading the data for the searcher.
Perhaps I should verify that the data does need to be reloaded. Does a
manual reload of a changing index need to be performed for a searcher?

I'd appreciate any links to documents, source or some quick pseudo code.

Thanks.

-Nathan

--
CONFIDENTIALITY NOTICE This message and any included attachments are from 
Cerner Corporation and are intended only for the addressee. The information 
contained in this message is confidential and may constitute inside or 
non-public information under international, federal, or state securities laws. 
Unauthorized forwarding, printing, copying, distribution, or use of such 
information is strictly prohibited and may be unlawful. If you are not the 
addressee, please promptly delete this message and notify the sender of the 
delivery error by e-mail or you may call Cerner's corporate offices in Kansas 
City, Missouri, U.S.A at (+1) (816)221-1024.

-
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]



Re: best practices for reloading an index for a searcher

2007-12-06 Thread Erick Erickson
If by reload you mean closing and opening the reader, then yes. You need
to do this in order to see the changes since the *last* time you opened
the reader.

Think of it as the reader taking a snapshot of the index and using that
for its lifetime.

Be aware that opening a reader (and running the first searches) is an
expensive operation and shouldn't be done unnecessarily. I guess it
all depends upon your requirements for how soon changes are available
for search.

As for guidelines, to quote the famous wise ones it depends. The
real question is how quickly your users require seeing recent changes.

One technique for handling these updates is to periodically open a new
reader
*without* closing the old one. Fire a few warm-up queries at the new
searcher,
then close the old one and start using the new one. There are some
coordination issues here. Also be aware that this will chew up some
disk space

Hope this helps
Erick

On Dec 6, 2007 12:43 PM, Beyer,Nathan <[EMAIL PROTECTED]> wrote:

> I did some searching on the lucene site and wiki, but didn't quite find
> what I was looking for in regards to a basic approach to how and when to
> reload index data. I have a long running process that will be
> continually indexing and concurrently searching the same index and I'm
> looking for a basic approach to reloading the data for the searcher.
> Perhaps I should verify that the data does need to be reloaded. Does a
> manual reload of a changing index need to be performed for a searcher?
>
> I'd appreciate any links to documents, source or some quick pseudo code.
>
> Thanks.
>
> -Nathan
>
> --
> CONFIDENTIALITY NOTICE This message and any included attachments are from
> Cerner Corporation and are intended only for the addressee. The information
> contained in this message is confidential and may constitute inside or
> non-public information under international, federal, or state securities
> laws. Unauthorized forwarding, printing, copying, distribution, or use of
> such information is strictly prohibited and may be unlawful. If you are not
> the addressee, please promptly delete this message and notify the sender of
> the delivery error by e-mail or you may call Cerner's corporate offices in
> Kansas City, Missouri, U.S.A at (+1) (816)221-1024.
>
> -
> To unsubscribe, e-mail: [EMAIL PROTECTED]
> For additional commands, e-mail: [EMAIL PROTECTED]
>
>


Re: Boost One Term Query

2007-12-06 Thread java_user_

Thanks for the response Hoss.

The score I receive is from the Explaination object.  The score stays the
same regardless of how I boost the single term.

The score of the query:
apple

Is the same as the score of the query:
apple^3

I am surprised by the result of the test.  Would you expect "apple" and
"apple^3" to receive the same score?

Thanks


hossman wrote:
> 
> 
> first off: if you are looking at the score from the "Hits" class, bear in 
> mind they are "psuedo-normalized" and don't mean much.
> 
> second: a "query" doesn't have a score, a document has a score relative to 
> a query ... scores can't be compared between different queries.
> 
> third: there is a "queryNorm" that comes into play, it's designed to keep 
> scores "managable" you can read more about it (and how to change it if you 
> want) in the scoring documentation.  you should also look at the 
> "Explanation" info for each query/doc to make sure you understand what's 
> going on.
> 
> 
> 
> : For example:
> : apple
> : 
> : Has the same score as:
> : apple^3
> : 
> : But repeating the term will up the score
> : apple apple apple
> : 
> : I expected the score to go up when boosting a one term query.  Is that a
> : wrong expectation?
> 
> 
> 
> -Hoss
> 
> 
> -
> To unsubscribe, e-mail: [EMAIL PROTECTED]
> For additional commands, e-mail: [EMAIL PROTECTED]
> 
> 
> 

-- 
View this message in context: 
http://www.nabble.com/Boost-One-Term-Query-tf4900128.html#a14199255
Sent from the Lucene - Java Users mailing list archive at Nabble.com.


-
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]



RE: best practices for reloading an index for a searcher

2007-12-06 Thread Beyer,Nathan
That does help, thank you.

Does closing a reader, writer or searcher close the underlying
Directory? I've been operating under the assumption that it does not and
that I should be sharing the Directory instance as much as possible.

-Nathan

-Original Message-
From: Erick Erickson [mailto:[EMAIL PROTECTED] 
Sent: Thursday, December 06, 2007 12:10 PM
To: java-user@lucene.apache.org
Subject: Re: best practices for reloading an index for a searcher

If by reload you mean closing and opening the reader, then yes. You need
to do this in order to see the changes since the *last* time you opened
the reader.

Think of it as the reader taking a snapshot of the index and using that
for its lifetime.

Be aware that opening a reader (and running the first searches) is an
expensive operation and shouldn't be done unnecessarily. I guess it
all depends upon your requirements for how soon changes are available
for search.

As for guidelines, to quote the famous wise ones it depends. The
real question is how quickly your users require seeing recent changes.

One technique for handling these updates is to periodically open a new
reader
*without* closing the old one. Fire a few warm-up queries at the new
searcher,
then close the old one and start using the new one. There are some
coordination issues here. Also be aware that this will chew up some
disk space

Hope this helps
Erick

On Dec 6, 2007 12:43 PM, Beyer,Nathan <[EMAIL PROTECTED]> wrote:

> I did some searching on the lucene site and wiki, but didn't quite
find
> what I was looking for in regards to a basic approach to how and when
to
> reload index data. I have a long running process that will be
> continually indexing and concurrently searching the same index and I'm
> looking for a basic approach to reloading the data for the searcher.
> Perhaps I should verify that the data does need to be reloaded. Does a
> manual reload of a changing index need to be performed for a searcher?
>
> I'd appreciate any links to documents, source or some quick pseudo
code.
>
> Thanks.
>
> -Nathan
>

--
CONFIDENTIALITY NOTICE This message and any included attachments are from 
Cerner Corporation and are intended only for the addressee. The information 
contained in this message is confidential and may constitute inside or 
non-public information under international, federal, or state securities laws. 
Unauthorized forwarding, printing, copying, distribution, or use of such 
information is strictly prohibited and may be unlawful. If you are not the 
addressee, please promptly delete this message and notify the sender of the 
delivery error by e-mail or you may call Cerner's corporate offices in Kansas 
City, Missouri, U.S.A at (+1) (816)221-1024.

-
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]



Re: Boost One Term Query

2007-12-06 Thread Yonik Seeley
On Dec 6, 2007 2:31 PM, java_user_ <[EMAIL PROTECTED]> wrote:
> Thanks for the response Hoss.
>
> The score I receive is from the Explaination object.  The score stays the
> same regardless of how I boost the single term.
>
> The score of the query:
> apple
>
> Is the same as the score of the query:
> apple^3

This boosts apple 3 times in relation to the other query clauses.  If
there are no other query clauses, it's a bit meaningless.

> I am surprised by the result of the test.  Would you expect "apple" and
> "apple^3" to receive the same score?

Lucene does some "weighting" of the query that causes this to happen.

class Query { [...]
  /** Expert: Constructs and initializes a Weight for a top-level query. */
  public Weight weight(Searcher searcher)
throws IOException {
Query query = searcher.rewrite(this);
Weight weight = query.createWeight(searcher);
float sum = weight.sumOfSquaredWeights();
float norm = getSimilarity(searcher).queryNorm(sum);
weight.normalize(norm);
return weight;
  }

Are you simply curious about this, or is it causing you a problem somehow?

-Yonik

-
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]



Re: Boost One Term Query

2007-12-06 Thread java_user_

I was hoping to boost the entire query to give the query more weight compared
to other queries.

Instead of boosting my entire query, I may just multiply the resulting score
by the weight (or something like that).



Yonik Seeley wrote:
> 
> On Dec 6, 2007 2:31 PM, java_user_ <[EMAIL PROTECTED]> wrote:
>> Thanks for the response Hoss.
>>
>> The score I receive is from the Explaination object.  The score stays the
>> same regardless of how I boost the single term.
>>
>> The score of the query:
>> apple
>>
>> Is the same as the score of the query:
>> apple^3
> 
> This boosts apple 3 times in relation to the other query clauses.  If
> there are no other query clauses, it's a bit meaningless.
> 
>> I am surprised by the result of the test.  Would you expect "apple" and
>> "apple^3" to receive the same score?
> 
> Lucene does some "weighting" of the query that causes this to happen.
> 
> class Query { [...]
>   /** Expert: Constructs and initializes a Weight for a top-level query.
> */
>   public Weight weight(Searcher searcher)
> throws IOException {
> Query query = searcher.rewrite(this);
> Weight weight = query.createWeight(searcher);
> float sum = weight.sumOfSquaredWeights();
> float norm = getSimilarity(searcher).queryNorm(sum);
> weight.normalize(norm);
> return weight;
>   }
> 
> Are you simply curious about this, or is it causing you a problem somehow?
> 
> -Yonik
> 
> -
> To unsubscribe, e-mail: [EMAIL PROTECTED]
> For additional commands, e-mail: [EMAIL PROTECTED]
> 
> 
> 

-- 
View this message in context: 
http://www.nabble.com/Boost-One-Term-Query-tf4900128.html#a14200211
Sent from the Lucene - Java Users mailing list archive at Nabble.com.


-
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]



Conversion from Lucene 1.4.3 API to 2.2.0 API.

2007-12-06 Thread ts01

Hi,

With Lucene 1.4.3, we had used this constructor for Field. What is its
equivalent in Lucene 2.2.0?

  /** Constructs a String-valued Field that is tokenized and indexed,
and is stored in the index, for return with hits. The tokens are
generated from the reader */
  public static final Field Text(String name, String value, Reader reader) {
  Field aField = Text(name, value);
  aField.readerValue = reader;
  return aField;
  }

Thanks
-- 
View this message in context: 
http://www.nabble.com/Conversion-from-Lucene-1.4.3-API-to-2.2.0-API.-tf4958346.html#a14200220
Sent from the Lucene - Java Users mailing list archive at Nabble.com.


-
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]



Custom SynonymMap

2007-12-06 Thread java_user_

Is there a way to add synonyms to the SynonymMap map?
The HashMap that holds all the words is not visible (private) so extending
it will not work. 

Has anyone added their own custom vocabulary?
-- 
View this message in context: 
http://www.nabble.com/Custom-SynonymMap-tf4958585.html#a14201041
Sent from the Lucene - Java Users mailing list archive at Nabble.com.


-
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]



RE: Indexing XML documents (Urdu)

2007-12-06 Thread Seneviratne_Yasoja
Hi Liaqat, I'd rather keep the email-thread on the lucene user list.

The code I used is below, the thing to do is be careful when reading
UTF-8 text so you don't garble it.
 
import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.search.*;

import javax.xml.parsers.*;
import java.io.*;

public class testNonEnglishXML
{
public static void main(String[] args){
if(args.length < 3) {
System.out.println("Usage:  
" );
System.exit(-1);
}

testNonEnglishXML idx = new testNonEnglishXML();
try {
idx.index(args[0], args[1]);
idx.search(args[1], args[2]);
} catch(Exception e) {
e.printStackTrace();
}
}

public void index(String filePath, String indexPath) throws
IOException, UnsupportedEncodingException, ParserConfigurationException,
SAXException {
Document luceneDoc = new Document();
luceneDoc.add(new Field("name", filePath, Field.Store.YES,
Field.Index.UN_TOKENIZED));

StringBuffer sb = new StringBuffer(1024); String line;
BufferedReader reader = new BufferedReader( new
InputStreamReader(new FileInputStream(new File(filePath)),  "UTF-8") );
while( (line = reader.readLine()) != null) {
sb.append(line);
}
luceneDoc.add(new Field("contents", sb.toString(),
Field.Store.NO, Field.Index.TOKENIZED));

TestParser parser = new TestParser(luceneDoc);
SAXParser saxParser =
SAXParserFactory.newInstance().newSAXParser();
saxParser.parse(new FileInputStream(new File(filePath)),
parser);

IndexWriter writer = new IndexWriter(indexPath, new
SimpleAnalyzer(), true);
writer.addDocument(luceneDoc);
writer.optimize();
writer.close();
}

public void search(String indexPath, String queryFilePath) throws
IOException {
BufferedReader reader = new BufferedReader( new
InputStreamReader(new FileInputStream(new File(queryFilePath)),
"UTF-8") );
String queryString =  new String( reader.readLine() );
reader.close();

IndexSearcher searcher = new
IndexSearcher(FSDirectory.getDirectory(indexPath));
PhraseQuery query = new PhraseQuery();
query.add(new Term("contents", queryString));

Hits hits = searcher.search(query);
if(hits.length()> 0)
System.out.println("found " + hits.doc(0).getField("name"));
}

public class TestParser extends DefaultHandler
{
  public TestParser(Document doc)
  {
  luceneDoc = doc;
  }

  public void startElement(String namespaceURI, String localName,
String qName, Attributes atts) throws SAXException
  {
  if(qName.equalsIgnoreCase("title"))
buffer = new String();
  }

  public void characters(char[] ch, int start, int length) throws
SAXException
  {
  buffer +=  new String(ch, start, length);
  }

  public void endElement(String namespaceURI, String localName,
String qName) throws SAXException
  {
  if(qName.equalsIgnoreCase("title"))
luceneDoc.add(new Field(qName, buffer, Field.Store.YES,
Field.Index.UN_TOKENIZED));
  }

  public void endDocument() {}
  public void startDocument() {}
  public void error(SAXParseException e) {}
  public void fatalError(SAXParseException e) {}
  public void ignorableWhitespace(char[] ch, int start, int length)
{}

  private Document luceneDoc = null;
  private String buffer = null;
}
}

-Original Message-
From: Liaqat Ali [mailto:[EMAIL PROTECTED] 
Sent: Thursday, December 06, 2007 1:42 AM
To: Seneviratne, Yasoja
Subject: Indexing XML documents (Urdu)

Hello,

I read your reply. I need some more help in this regard. As you saw the 
text (Urdu language). The whole XML file is a collection of 200 
documents. I am at initial level as SAX is concerned. Can you kindly 
provide me the code how to extract textual information of each document 
plus its Doc number and title  or some guidence, because my whole 
project has stuck  because of this...

Looking for your response  Thanks..

Liaqat




-
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]



Re: Boost One Term Query

2007-12-06 Thread Erick Erickson
I don't believe you can compare scores across queries in any meaningful
way.

This sounds a lot like you're trying to solve some problem and have decided
that boosting and comparing scores across queries is the answer. in other
words, the XY problem.

Perhaps if you explained what you're trying to accomplish someone could
suggest an alternative...

Best
Erick

On Dec 6, 2007 3:12 PM, java_user_ <[EMAIL PROTECTED]> wrote:

>
> I was hoping to boost the entire query to give the query more weight
> compared
> to other queries.
>
> Instead of boosting my entire query, I may just multiply the resulting
> score
> by the weight (or something like that).
>
>
>
> Yonik Seeley wrote:
> >
> > On Dec 6, 2007 2:31 PM, java_user_ <[EMAIL PROTECTED]> wrote:
> >> Thanks for the response Hoss.
> >>
> >> The score I receive is from the Explaination object.  The score stays
> the
> >> same regardless of how I boost the single term.
> >>
> >> The score of the query:
> >> apple
> >>
> >> Is the same as the score of the query:
> >> apple^3
> >
> > This boosts apple 3 times in relation to the other query clauses.  If
> > there are no other query clauses, it's a bit meaningless.
> >
> >> I am surprised by the result of the test.  Would you expect "apple" and
> >> "apple^3" to receive the same score?
> >
> > Lucene does some "weighting" of the query that causes this to happen.
> >
> > class Query { [...]
> >   /** Expert: Constructs and initializes a Weight for a top-level query.
> > */
> >   public Weight weight(Searcher searcher)
> > throws IOException {
> > Query query = searcher.rewrite(this);
> > Weight weight = query.createWeight(searcher);
> > float sum = weight.sumOfSquaredWeights();
> > float norm = getSimilarity(searcher).queryNorm(sum);
> > weight.normalize(norm);
> > return weight;
> >   }
> >
> > Are you simply curious about this, or is it causing you a problem
> somehow?
> >
> > -Yonik
> >
> > -
> > To unsubscribe, e-mail: [EMAIL PROTECTED]
> > For additional commands, e-mail: [EMAIL PROTECTED]
> >
> >
> >
>
> --
> View this message in context:
> http://www.nabble.com/Boost-One-Term-Query-tf4900128.html#a14200211
> Sent from the Lucene - Java Users mailing list archive at Nabble.com.
>
>
> -
> To unsubscribe, e-mail: [EMAIL PROTECTED]
> For additional commands, e-mail: [EMAIL PROTECTED]
>
>


Fwd: Can I search in realtime?

2007-12-06 Thread 游泳池的鱼
Hi, it's my first time to use lucene maillist. I have problem that  when  I
add a document with IndexWriter , it searchable for the IndexSearcher
instance which is creat before the document flush to index? if lucene can
not do this,any suggest to solve this problem?

   Regards,
   cooc


Re: Error running Lucene in Action code

2007-12-06 Thread syedfa

Thanks very much for your reply.  I commented out the line:

attributeMap.clear();

in the startElement() method, and the code ran!  

Thanks for your prompt reply, and my apologies for the delay in responding.

All the best.
Fayyaz


Michael McCandless-2 wrote:
> 
> 
> I think you need to initialize attributeMap, eg add " = new HashMap()" in
> the declaration?
> 
> Mike
> 
> "syedfa" <[EMAIL PROTECTED]> wrote:
>> 
>> Dear Fellow Java & Lucene developers:
>> 
>> I am a Java developer learning lucene and I am currently going through
>> the
>> book Lucene in Action.  At present, I am trying to run the sample code
>> for
>> indexing an xml document using sax.  My code has been slightly updated
>> for
>> Lucene version 2.2:
>> 
>> /*
>>  * To change this template, choose Tools | Templates
>>  * and open the template in the editor.
>>  */
>> 
>> package lucenexml;
>> 
>> 
>> import java.io.File;
>> import java.io.FileInputStream;
>> import java.io.InputStream;
>> import java.io.IOException;
>> import java.util.HashMap;
>> import java.util.Iterator;
>> 
>> import org.xml.sax.helpers.DefaultHandler;
>> import org.xml.sax.SAXException;
>> import org.xml.sax.Attributes;
>> 
>> import javax.xml.parsers.SAXParser;
>> import javax.xml.parsers.SAXParserFactory;
>> import javax.xml.parsers.ParserConfigurationException;
>> 
>> 
>> import org.apache.lucene.document.Document;
>> import org.apache.lucene.document.Field;
>> 
>> /**
>>  *
>>  * @author fayyaz
>>  */
>> public class SAXXMLHandler extends DefaultHandler implements
>> DocumentHandler{
>> 
>> private StringBuffer elementBuffer=new StringBuffer();
>> private HashMap attributeMap;
>> 
>> private Document doc;
>> /**
>>  * @param args the command line arguments
>>  */
>> public static void main(String[] args) throws Exception {
>> // TODO code application logic here
>> SAXXMLHandler handler=new SAXXMLHandler();
>> Document doc=handler.getDocument(new FileInputStream(new
>> File(args[0])));
>> System.out.println(doc);
>> }
>> 
>> public Document getDocument(InputStream is) throws
>> DocumentHandlerException{
>> 
>> SAXParserFactory spf=SAXParserFactory.newInstance();
>> 
>> try{
>> SAXParser parser=spf.newSAXParser();
>> parser.parse(is, this);
>>  
>> }
>> catch (IOException e){
>> throw new DocumentHandlerException("Cannot parse XML
>> document",
>> e);
>> }
>> catch (ParserConfigurationException e) {
>> throw new DocumentHandlerException("Cannot parse XML
>> document",
>> e);
>> }
>> catch (SAXException e){
>> throw new DocumentHandlerException("Cannot parse XML
>> document",
>> e);
>> }
>> return doc;
>> }
>> 
>> public void startDocument(){
>> doc=new Document();
>> }
>> 
>> public void startElement(String uri, String localName, String qName,
>> Attributes atts) throws SAXException{
>> 
>> elementBuffer.setLength(0);
>> attributeMap.clear();
>> if(atts.getLength()>0){
>> attributeMap=new HashMap();
>> for(int i=0; i> attributeMap.put(atts.getQName(i), atts.getValue(i));
>> }
>> }
>> }
>> 
>> public void characters(char[] text, int start, int length){
>> elementBuffer.append(text, start, length);
>> }
>> 
>> public void endElement(String uri, String localName, String qName)
>> throws SAXException{
>> if(qName.equals("address-book")){
>> return;
>> }
>> 
>> else if(qName.equals("contact")){
>> Iterator iter=attributeMap.keySet().iterator();
>> while(iter.hasNext()){
>> String attName=(String)iter.next();
>> String attValue=(String)attributeMap.get(attName);
>> doc.add(new Field(qName, elementBuffer.toString(),
>> Field.Store.YES,Field.Index.NO));
>> }
>> }
>> }
>> }
>> 
>> 
>> However, although the code compiles, I get the following runtime error
>> when
>> I pass the file addressbook.xml (which is used in the book) as a runtime
>> argument:
>> 
>> Exception in thread "main" java.lang.NullPointerException
>> at lucenexml.SAXXMLHandler.startElement(SAXXMLHandler.java:81)
>> at
>> org.apache.xerces.parsers.AbstractSAXParser.startElement(Unknown
>> Source)
>> at
>> org.apache.xerces.impl.dtd.XMLDTDValidator.startElement(Unknown
>> Source)
>> at
>> org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanStartElement(Unknown
>> Source)
>> at
>> org.apache.xerces.impl.XMLDocumentScannerImpl$ContentDispatcher.scanRootElementHook(Unknown
>> Source)
>> at
>> org.apache.xerces.impl.XMLDocumentFragmentScannerImpl$FragmentContentDispatcher.dispa