On 26/02/2013 18:01, Paul Taylor wrote:
On 26/02/2013 17:22, Uwe Schindler wrote:
Hi,
You cannot override rewrite() because you could easily break the logic
behind TopTermsRewrite. If you want another behavior, subclass another
base class and wrap the TopTermsRewrite instead of subclassing it (the
generics also enforce that the rewrite needs to rewrite() to a class
that’s
specified in the generics parameter).
addClause() is not final, its abstract. There is one "final" helper
method used
by the rewrite itself, but the methods you need to override are
abstract.
Also your generics seem to be wrong, leading to the above question...
In addition, you cast the call to super.rewrite() to DisjMaxQuery, so
it is definitely a DisjMaxQuery (because getTopLevelQuery() always
returns one, see generics). You then pass this DisjMaxQuery to this
"getQueryBoostMethod", which checks for instanceof PrefixQuery. This
can never return true, so the boost is always 1. You can therefore
nuke the whole rewrite method (as it changes nothing) and only
implement getToplevelQuery() and addClause().
Uwe
Not making much sense of this, Im trying to use the same rewritemethod for
QueryParser
and
FuzzyQuery
PrefixQuery
I'm confused as to whether I should be applying at both stages, and what
the generic parameter should be
as the javadoc for QueryParser. setMultiTermRewriteMethod() implies you
need to change this to use different rewrite for fuzzy and prefix
queries but you seem to be saying I should be using FuzzyQuery as the
generic type whihc would prevent this wouldn't it ?
Is there a fuller explanation of rewrite methods anywhere ?
Full class below if it makes things clearer
Paul
package org.musicbrainz.search.servlet;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.TFIDFSimilarity;
import org.musicbrainz.search.LuceneVersion;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
public class DismaxQueryParser {
public static String IMPOSSIBLE_FIELD_NAME = "\uFFFC\uFFFC\uFFFC";
protected DisjunctionQueryParser dqp;
protected DismaxQueryParser() {
}
public DismaxQueryParser(org.apache.lucene.analysis.Analyzer
analyzer) {
dqp = new DisjunctionQueryParser(IMPOSSIBLE_FIELD_NAME, analyzer);
//TODO FIXME
//dqp.setMultiTermRewriteMethod(new
MultiTermUseIdfOfSearchTerm(100));
}
/**
* Create query consists of disjunction queries for each term
fields combo, and then
* a phrase search for each field as long as the original query is
more than one term
*
* @param query
* @return
*
*/
public Query parse(String query) throws
org.apache.lucene.queryparser.classic.ParseException {
Query term = dqp.parse(DismaxQueryParser.IMPOSSIBLE_FIELD_NAME
+ ":(" + query + ")");
Query phrase =
dqp.parse(DismaxQueryParser.IMPOSSIBLE_FIELD_NAME + ":\"" + query + "\"");
return buildTopQuery(term, phrase);
}
/**
* If a phrase query was built then we create a boolean query that
requires something to match in
* the term query, under normal circumstances if nothing matches
the term query nothing will match the phrase
* query
*
* @param term
* @param phrase
* @return
*/
protected Query buildTopQuery(Query term, Query phrase) {
if (phrase instanceof DisjunctionMaxQuery) {
BooleanQuery bq = new BooleanQuery(true);
bq.add(term, BooleanClause.Occur.MUST);
bq.add(phrase, BooleanClause.Occur.SHOULD);
return bq;
} else {
return term;
}
}
public void addAlias(String field, DismaxAlias dismaxAlias) {
dqp.addAlias(field, dismaxAlias);
}
static class DisjunctionQueryParser extends QueryParser {
//Only make search terms that are this length fuzzy searchable
and only match to terms that are also this length
protected static final int MIN_FIELD_LENGTH_TO_MAKE_FUZZY = 4;
protected static final float FUZZY_SIMILARITY = 0.5f;
//Reduce boost of wildcard/fuzzy matches compared to exact matches
protected static final float WILDCARD_BOOST_REDUCER = 0.8f;
//Reduce phrase query scores otherwise there is too much
difference between a document that matches on
//phrase and one that doesn't quite.
protected static final float PHRASE_BOOST_REDUCER = 0.2f;
public DisjunctionQueryParser(String defaultField,
org.apache.lucene.analysis.Analyzer analyzer) {
super(LuceneVersion.LUCENE_VERSION, defaultField, analyzer);
}
protected Map<String, DismaxAlias> aliases = new
HashMap<String, DismaxAlias>(3);
//Field to DismaxAlias
public void addAlias(String field, DismaxAlias dismaxAlias) {
aliases.put(field, dismaxAlias);
}
// TODO FIXME _ Unable to create rewrite using original idf
// Rewrite Method used by Prefix Search and Fuzzy Search, use
idf of the original term
//MultiTermQuery.RewriteMethod fuzzyAndPrefixQueryRewriteMethod
// = new MultiTermUseIdfOfSearchTerm(200);
protected boolean checkQuery(DisjunctionMaxQuery q, Query
querySub, boolean quoted, DismaxAlias a, String f) {
if (querySub != null) {
//if query was quoted but doesn't generate a phrase
query we reject it
if ((!quoted) || (querySub instanceof PhraseQuery)) {
//Reduce phrase because will have matched both
parts giving far too much score differential
if (quoted) {
querySub.setBoost(PHRASE_BOOST_REDUCER);
} else {
querySub.setBoost(a.getFields().get(f).getBoost());
}
q.add(querySub);
return true;
}
}
return false;
}
@Override
//TODO FIXME was using a FLOAT similarity value of 0.5 but now
chnaged to integral
protected Query getFuzzyQuery(String field, String termStr,
float minSimilarity) {
Term t = new Term(field, termStr);
FuzzyQuery fq = new FuzzyQuery(t, 2,
MIN_FIELD_LENGTH_TO_MAKE_FUZZY);
//TODO FIXME
//fq.setRewriteMethod(fuzzyAndPrefixQueryRewriteMethod);
return fq;
}
protected Query getFieldQuery(String field, String queryText,
boolean quoted)
throws ParseException
{
//If field is an alias
if (aliases.containsKey(field)) {
DismaxAlias a = aliases.get(field);
DisjunctionMaxQuery q = new
DisjunctionMaxQuery(a.getTie());
boolean ok = false;
for (String f : a.getFields().keySet()) {
//if query can be created for this field and text
Query querySub;
Query queryWildcard = null;
Query queryFuzzy = null;
DismaxAlias.AliasField af = a.getFields().get(f);
if (!quoted && queryText.length() >=
MIN_FIELD_LENGTH_TO_MAKE_FUZZY) {
querySub = getFieldQuery(f, queryText, quoted);
if (querySub instanceof TermQuery) {
if (af.isFuzzy()) {
Term t = ((TermQuery) querySub).getTerm();
queryWildcard = newPrefixQuery(new
Term(t.field(), t.text()));
queryFuzzy = getFuzzyQuery(t.field(),
t.text(), FUZZY_SIMILARITY);
queryFuzzy.setBoost(af.getBoost() *
WILDCARD_BOOST_REDUCER);
q.add(queryFuzzy);
queryWildcard.setBoost(af.getBoost() *
WILDCARD_BOOST_REDUCER);
q.add(queryWildcard);
}
}
} else {
querySub = getFieldQuery(f, queryText, quoted);
}
if (checkQuery(q, querySub, quoted, a, f) && ok ==
false) {
ok = true;
}
}
//Something has been added to disjunction query
return ok ? q : null;
} else {
//usual Field
try {
return super.getFieldQuery(field, queryText, quoted);
} catch (Exception e) {
return null;
}
}
}
/**
* Builds a new PrefixQuery instance
* @param prefix Prefix term
* @return new PrefixQuery instance
*/
protected Query newPrefixQuery(Term prefix){
PrefixQuery query = new PrefixQuery(prefix);
//TODO FIXME
//query.setRewriteMethod(fuzzyAndPrefixQueryRewriteMethod);
return query;
}
}
/*
TODO FIXME WAS Overriding methods that are now final
public static class MultiTermUseIdfOfSearchTerm<Q extends
DisjunctionMaxQuery> extends TopTermsRewrite<Query> {
//public static final class MultiTermUseIdfOfSearchTerm extends
TopTermsRewrite<BooleanQuery> {
private final TFIDFSimilarity similarity;
public MultiTermUseIdfOfSearchTerm(int size) {
super(size);
this.similarity = new DefaultSimilarity();
}
@Override
protected int getMaxSize() {
return BooleanQuery.getMaxClauseCount();
}
@Override
protected DisjunctionMaxQuery getTopLevelQuery() {
return new DisjunctionMaxQuery(0.1f);
}
@Override
protected void addClause(Query topLevel, Term term, float boost) {
final Query tq = new ConstantScoreQuery(new TermQuery(term));
tq.setBoost(boost);
((DisjunctionMaxQuery)topLevel).add(tq);
}
protected float getQueryBoost(final IndexReader reader, final
MultiTermQuery query)
throws IOException {
float idf = 1f;
float df;
if (query instanceof PrefixQuery)
{
PrefixQuery fq = (PrefixQuery) query;
df = reader.docFreq(fq.getPrefix());
if(df>=1)
{
//Same as idf value for search term, 0.5 acts as
length norm
idf = (float)Math.pow(similarity.idf((int) df,
reader.numDocs()),2) * 0.5f;
}
}
return idf;
}
@Override
public Query rewrite(final IndexReader reader, final
MultiTermQuery query) throws IOException {
DisjunctionMaxQuery bq =
(DisjunctionMaxQuery)super.rewrite(reader, query);
float idfBoost = getQueryBoost(reader, query);
Iterator<Query> iterator = bq.iterator();
while(iterator.hasNext())
{
Query next = iterator.next();
next.setBoost(next.getBoost() * idfBoost);
}
return bq;
}
}
*/
}