(This is an expanded version of the post I made before in the hopes someone will comment)
I am trying to port the reverse wildcard support from SOLR to base Lucene. In broad strokes, I will use a PerFieldAnalyzer map with the INDEXWRITER such that fields that I want to be indexed both ways will call my "SuperAnalyzer" (StandardAnalyzer+ReverseWildcardFilter) When using INDEXSEARCHER, I use a extended version of QueryParser, that for these fields reverses as necessary. The Analyzer passed here, is JUST standardanalyzer, not superanalyzer 1. Is this the right approach? 2. Please glance at Superanalyzer. am I implementing things right? It's hard to know what to override from extending Analyzer class. This SEEMS to work in tests, but obviously I am concerned about missing a subtlety Superanalyzer ========= public class SuperAnalyzer extends Analyzer { final private Analyzer base; final private ReversedWildcardFilterFactory filter; public SuperAnalyzer(Analyzer base,ReversedWildcardFilterFactory filter) { this.base=base; this.filter=filter; } @Override public int getPositionIncrementGap(String fieldName) { return this.base.getPositionIncrementGap(fieldName); // or something else } @Override public TokenStream tokenStream(String fieldName, Reader reader) { // this looks ok TokenStream ts=this.base.tokenStream(fieldName, reader); return this.filter.create(ts); } @Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { TokenStream ts=this.base.reusableTokenStream(fieldName, reader); return this.filter.create(ts); } // other items to override? } ReversedWildcardFilterFactory (greatly simplified from SOLR) ======================================== public class ReversedWildcardFilterFactory { final private char markerChar = ReverseStringFilter.START_OF_HEADING_MARKER; final private boolean withOriginal; final private int maxPosAsterisk; final private int maxPosQuestion; final private int minTrailing; final private float maxFractionAsterisk; public ReversedWildcardFilterFactory(boolean withOriginal,int maxPosAsterisk,int maxPosQuestion,int minTrailing,float maxFractionAsterisk) { this.withOriginal=withOriginal; this.maxPosAsterisk=maxPosAsterisk; this.maxPosQuestion=maxPosQuestion; this.minTrailing=minTrailing; this.maxFractionAsterisk=maxFractionAsterisk; } public ReversedWildcardFilterFactory() { this(true, 2, 1, 2, 0.0f); } public TokenStream create(TokenStream input) { return new ReversedWildcardFilter(input, this.withOriginal, this.markerChar); } /** * This method encapsulates the logic that determines whether * a query token should be reversed in order to use the * reversed terms in the index. * @param token input token. * @return true if input token should be reversed, false otherwise. */ public boolean shouldReverse(String token) { int posQ = token.indexOf('?'); int posA = token.indexOf('*'); if (posQ == -1 && posA == -1) { // not a wildcard query return false; } int pos; int lastPos; int len = token.length(); lastPos = token.lastIndexOf('?'); pos = token.lastIndexOf('*'); if (pos > lastPos) lastPos = pos; if (posQ != -1) { pos = posQ; if (posA != -1) { pos = Math.min(posQ, posA); } } else { pos = posA; } if (len - lastPos < this.minTrailing) { // too few trailing chars return false; } if (posQ != -1 && posQ < this.maxPosQuestion) { // leading '?' return true; } if (posA != -1 && posA < this.maxPosAsterisk) { // leading '*' return true; } // '*' in the leading part if (this.maxFractionAsterisk > 0.0f && pos < token.length() * this.maxFractionAsterisk) { return true; } return false; } public char getMarkerChar() { return this.markerChar; } } SolrQueryParser (greatly simplified from SOLR) ================================ public class SolrQueryParser extends QueryParser { protected final ReversedWildcardFilterFactory reverseFactory; protected final Set<String> fieldsToReverse; public SolrQueryParser(Version version, String defaultField, Analyzer analyzer) { this(version, defaultField, analyzer, null, null); } public SolrQueryParser(Version version, String defaultField, Analyzer analyzer, ReversedWildcardFilterFactory reverseFactory, Set<String> fieldsToSupportReverse) { super(version, defaultField, analyzer); this.reverseFactory=reverseFactory; this.fieldsToReverse=(fieldsToSupportReverse==null) ? new HashSet<String>() : new HashSet<String>(fieldsToSupportReverse); //setLowercaseExpandedTerms(false); //setEnablePositionIncrements(true); checkAllowLeadingWildcards(); } protected void checkAllowLeadingWildcards() { boolean allow = false; if (this.reverseFactory !=null) { if (!this.fieldsToReverse.isEmpty()) allow=true; } // should be enabled on a per-field basis if (allow) { setAllowLeadingWildcard(true); } } @Override protected Query getWildcardQuery(String field, String termStrp) throws ParseException { // *:* -> MatchAllDocsQuery String termStr=termStrp; if ("*".equals(field) && "*".equals(termStr)) { return newMatchAllDocsQuery(); } // can we use reversed wildcards in this field? ReversedWildcardFilterFactory factory = this.reverseFactory; if ((factory != null) && (this.fieldsToReverse.contains(field)) && (factory.shouldReverse(termStr))) { int len = termStr.length(); char[] chars = new char[len+1]; chars[0] = factory.getMarkerChar(); termStr.getChars(0, len, chars, 1); ReversedWildcardFilter.reverse(chars, 1, len); termStr = new String(chars); } Query q = super.getWildcardQuery(field, termStr); if (q instanceof WildcardQuery) { // use a constant score query to avoid overflowing clauses WildcardQuery wildcardQuery = new WildcardQuery(((WildcardQuery)q).getTerm()); return wildcardQuery; } return q; } } (ReversedWildcardFilter itself is more or less as it is in Lucene-Contrib) --------------------------------------------------------------------- To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org For additional commands, e-mail: java-user-h...@lucene.apache.org