Another problem with the QueryParser

Lucifer Hammer Wed, 15 Nov 2006 20:46:51 -0800

Hi,

I posted a few weeks ago with an issue that revolved around parens in a
query.  Since then, we've been testing other booleans and came across this
anomaly.  The test code is almost the same, I'm just modifying the queries.
Before I enter it as a bug, I wanted to run it by this group to see if I'm
just not looking at the boolean expression correctly.


Here's the issue:

I created an index with 5 documents, all have one field: "text", with the
following contents:
doc1:text:"Table Chair Spoon"
doc2:text:"Table Chair Spoon Fork"
doc3:text:"Table Spoon Fork"
doc4:text:"Chair Spoon Fork"
doc5:text:"Spoon Fork"

When I enter the query: "Table OR NOT Chair"  I get one hit, doc3
When I enter the query: "Table OR (NOT Chair)" I get 3 hits: doc1, doc2, and
doc3

I expected both queries to return 4 hits: doc1, doc2, doc3, doc5.

Is this a bug, or, am I not understanding the query language correctly?
(There's two separate issues here: the first Query should have worked
correctly. The second might have the same problem that I found in my
previous post - with subqueries beginning with the NOT operator, however,
since NOT is a unary operator, I'm not sure I understand.

I'm attaching test code.  The program creates an index in the directory
which you pass into the main program.

Thanks!
L

-----------------------------------------------------------------------------------------------------


import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;

import org.apache.lucene.search.Query;
import org.apache.lucene.search.Hits;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.index.Term;



import java.io.File;
import java.io.IOException;
import java.io.FileReader;

public class IndexTest {
   public static void create(File indexDir) throws IOException {

       IndexWriter writer = new IndexWriter(indexDir, new
               WhitespaceAnalyzer(), true);
       Document doc = new Document();
       doc.add(new Field("text",
               "Table Chair Spoon",
               Field.Store.YES,
               Field.Index.TOKENIZED,
               Field.TermVector.NO));

       writer.addDocument(doc);
       doc = new Document();
       doc.add(new Field("text",
               "Table Chair Spoon Fork",
               Field.Store.YES,
               Field.Index.TOKENIZED,
               Field.TermVector.NO));
       writer.addDocument(doc);
       doc = new Document();
       doc.add(new Field("text",
               "Table Spoon Fork",
               Field.Store.YES,
               Field.Index.TOKENIZED,
               Field.TermVector.NO));
       writer.addDocument(doc);
       doc = new Document();
       doc.add(new Field("text",
               "Chair Spoon Fork",
               Field.Store.YES,
               Field.Index.TOKENIZED,
               Field.TermVector.NO));
       writer.addDocument(doc);
       doc = new Document();
       doc.add(new Field("text",
               "Spoon Fork",
               Field.Store.YES,
               Field.Index.TOKENIZED,
               Field.TermVector.NO));
       writer.addDocument(doc);

       writer.close();
   }


   public static void query(File indexDir, String queryString) throws
IOException
   {
       Query query = null;
       Hits hits = null;

       try {
           QueryParser qp = new QueryParser("text",new
WhitespaceAnalyzer());
           qp.setDefaultOperator(QueryParser.OR_OPERATOR);
           query = qp.parse(queryString);
       } catch (Exception qe) {System.out.println(qe.toString());}
       if (query == null) return;
       System.out.println("Query: " + query.toString());
       IndexReader reader = IndexReader.open(indexDir);
       IndexSearcher searcher = new IndexSearcher(reader);

       hits = searcher.search(query);
       System.out.println("Hits: " + hits.length());

       for (int i = 0; i < hits.length(); i++)
       {
           System.out.println( hits.doc(i).get("text") + " ");
       }

       searcher.close();
       reader.close();

   }
   public static void main(String[] args) throws Exception {
       if (args.length != 1) {
           throw new Exception("Usage: " + IndexTest.class.getName() +
"<index dir>");
       }
       File indexDir = new File(args[0]);
       create(indexDir);
       query(indexDir,"Table OR NOT Chair");
       query(indexDir,"Table OR (NOT Chair)");
   }
}

Another problem with the QueryParser

Reply via email to