Re: Custom FieldComparator and incorrect sort order

Michael McCandless Wed, 15 Jul 2009 04:05:24 -0700

OK this is a bug in BooleanScorer2!  I'll open it shortly... thanks Shalin!


Mike

On Wed, Jul 15, 2009 at 6:32 AM, Michael
McCandless<[email protected]> wrote:
> I'll look into this...
>
> Mike
>
> On Wed, Jul 15, 2009 at 3:55 AM, Shalin Shekhar
> Mangar<[email protected]> wrote:
>> Hello,
>>
>> Over in Solr land, I'm facing a problem while upgrading the lucene version
>> to trunk. Solr has a QueryElevationComponent which is used to boost certain
>> documents to the top. It pre-processes the query to add a few boolean
>> clauses of its own and uses a FieldComparator for the sorting part. This
>> worked fine before the upgrade. There's a test which fixes the position of
>> two docs and then sorts on score ascending. After the upgrade, the score asc
>> does not seem to take effect and documents are sorted by score descending.
>>
>> I've tried to remove the solr baggage in the following code. Changing the
>> score sort to ascending/descending gives the exact same order of the
>> results. Any ideas on what may be the problem?
>>
>> package org.apache.solr;
>>
>> import org.apache.lucene.analysis.WhitespaceAnalyzer;
>> import org.apache.lucene.document.Document;
>> import org.apache.lucene.document.Field;
>> import org.apache.lucene.index.IndexReader;
>> import org.apache.lucene.index.IndexWriter;
>> import org.apache.lucene.index.Term;
>> import org.apache.lucene.search.*;
>> import org.apache.lucene.store.RAMDirectory;
>> import org.junit.Test;
>>
>> import java.io.IOException;
>> import java.util.HashMap;
>> import java.util.Map;
>>
>> public class TestSort {
>>
>>  private final Map<String, Integer> priority = new HashMap<String,
>> Integer>();
>>
>> �...@test
>>  public void testSorting() throws IOException {
>>    RAMDirectory directory = new RAMDirectory();
>>    IndexWriter writer = new IndexWriter(directory, new
>> WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
>>    writer.setMaxBufferedDocs(2);
>>    writer.setMergeFactor(1000);
>>    writer.addDocument(adoc("id", "a", "title", "ipod", "str_s", "a"));
>>    writer.addDocument(adoc("id", "b", "title", "ipod ipod", "str_s", "b"));
>>    writer.addDocument(adoc("id", "c", "title", "ipod ipod ipod", "str_s",
>> "c"));
>>    writer.addDocument(adoc("id", "x", "title", "boosted", "str_s", "x"));
>>    writer.addDocument(adoc("id", "y", "title", "boosted boosted", "str_s",
>> "y"));
>>    writer.addDocument(adoc("id", "z", "title", "boosted boosted boosted",
>> "str_s", "z"));
>>    writer.close();
>>
>>    IndexSearcher searcher = new IndexSearcher(directory, true);
>>    BooleanQuery newq = new BooleanQuery(false);
>>    TermQuery query = new TermQuery(new Term("title", "ipod"));
>>
>>    newq.add(query, BooleanClause.Occur.SHOULD);
>>    newq.add(getElevatedQuery("id", "a", "id", "x"),
>> BooleanClause.Occur.SHOULD);
>>
>>    Sort sort = new Sort(new SortField[]{
>>            new SortField("id", new ElevationComparatorSource(priority),
>> false),
>>            new SortField(null, SortField.SCORE, true)
>>    });
>>    TopDocsCollector topCollector = TopFieldCollector.create(sort, 50,
>> false, true, true, true);
>>    searcher.search(newq, null, topCollector);
>>
>>    TopDocs topDocs = topCollector.topDocs(0, 10);
>>    int nDocsReturned = topDocs.scoreDocs.length;
>>
>>    int[] ids = new int[nDocsReturned];
>>    float[] scores = new float[nDocsReturned];
>>    Document[] documents = new Document[nDocsReturned];
>>    for (int i = 0; i < nDocsReturned; i++) {
>>      ScoreDoc scoreDoc = topDocs.scoreDocs[i];
>>      ids[i] = scoreDoc.doc;
>>      scores[i] = scoreDoc.score;
>>      documents[i] = searcher.doc(ids[i]);
>>      System.out.println("documents[i] = " + documents[i]);
>>      System.out.println("scores[i] = " + scores[i]);
>>    }
>>
>>    searcher.close();
>>  }
>>
>>  private Query getElevatedQuery(String... vals) {
>>    BooleanQuery q = new BooleanQuery(false);
>>    q.setBoost(0);
>>    int max = (vals.length / 2) + 5;
>>    for (int i = 0; i < vals.length - 1; i += 2) {
>>      q.add(new TermQuery(new Term(vals[i], vals[i + 1])),
>> BooleanClause.Occur.SHOULD);
>>      priority.put(vals[i + 1], max--);
>>    }
>>    return q;
>>  }
>>
>>  private Document adoc(String... vals) {
>>    Document doc = new Document();
>>    for (int i = 0; i < vals.length - 2; i += 2) {
>>      doc.add(new Field(vals[i], vals[i + 1], Field.Store.YES,
>> Field.Index.ANALYZED));
>>    }
>>    return doc;
>>  }
>> }
>>
>> class ElevationComparatorSource extends FieldComparatorSource {
>>  private final Map<String, Integer> priority;
>>
>>  public ElevationComparatorSource(final Map<String, Integer> boosts) {
>>    this.priority = boosts;
>>  }
>>
>>  public FieldComparator newComparator(final String fieldname, final int
>> numHits, int sortPos, boolean reversed) throws IOException {
>>    return new FieldComparator() {
>>
>>      FieldCache.StringIndex idIndex;
>>      private final int[] values = new int[numHits];
>>      int bottomVal;
>>
>>      public int compare(int slot1, int slot2) {
>>        return values[slot2] - values[slot1];  // values will be small
>> enough that there is no overflow concern
>>      }
>>
>>      public void setBottom(int slot) {
>>        bottomVal = values[slot];
>>      }
>>
>>      private int docVal(int doc) throws IOException {
>>        String id = idIndex.lookup[idIndex.order[doc]];
>>        Integer prio = priority.get(id);
>>        return prio == null ? 0 : prio.intValue();
>>      }
>>
>>      public int compareBottom(int doc) throws IOException {
>>        return docVal(doc) - bottomVal;
>>      }
>>
>>      public void copy(int slot, int doc) throws IOException {
>>        values[slot] = docVal(doc);
>>      }
>>
>>      public void setNextReader(IndexReader reader, int docBase, int
>> numSlotsFull) throws IOException {
>>        idIndex = FieldCache.DEFAULT.getStringIndex(reader, fieldname);
>>      }
>>
>>      public int sortType() {
>>        return SortField.CUSTOM;
>>      }
>>
>>      public Comparable value(int slot) {
>>        return values[slot];
>>      }
>>    };
>>  }
>> }
>>
>> With Lucene trunk:
>>
>> Sort: new SortField(null, SortField.SCORE, true)
>>
>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:a>
>> stored/uncompressed,indexed,tokenized<title:ipod>>
>> scores[i] = 1.4054651
>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:x>
>> stored/uncompressed,indexed,tokenized<title:boosted>>
>> scores[i] = 0.0
>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:b>
>> stored/uncompressed,indexed,tokenized<title:ipod ipod>>
>> scores[i] = 0.6211337
>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:c>
>> stored/uncompressed,indexed,tokenized<title:ipod ipod ipod>>
>> scores[i] = 0.6085842
>>
>> Sort: new SortField(null, SortField.SCORE, false)
>>
>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:a>
>> stored/uncompressed,indexed,tokenized<title:ipod>>
>> scores[i] = 1.4054651
>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:x>
>> stored/uncompressed,indexed,tokenized<title:boosted>>
>> scores[i] = 0.0
>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:b>
>> stored/uncompressed,indexed,tokenized<title:ipod ipod>>
>> scores[i] = 0.6211337
>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:c>
>> stored/uncompressed,indexed,tokenized<title:ipod ipod ipod>>
>> scores[i] = 0.6085842
>>
>> With Lucene r779312:
>>
>> Sort: new SortField(null, SortField.SCORE, true)
>>
>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:a>
>> stored/uncompressed,indexed,tokenized<title:ipod>>
>> scores[i] = 1.4054651
>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:x>
>> stored/uncompressed,indexed,tokenized<title:boosted>>
>> scores[i] = 0.0
>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:c>
>> stored/uncompressed,indexed,tokenized<title:ipod ipod ipod>>
>> scores[i] = 0.6085842
>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:b>
>> stored/uncompressed,indexed,tokenized<title:ipod ipod>>
>> scores[i] = 0.6211337
>>
>> Sort: new SortField(null, SortField.SCORE, false)
>>
>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:a>
>> stored/uncompressed,indexed,tokenized<title:ipod>>
>> scores[i] = 1.4054651
>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:x>
>> stored/uncompressed,indexed,tokenized<title:boosted>>
>> scores[i] = 0.0
>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:b>
>> stored/uncompressed,indexed,tokenized<title:ipod ipod>>
>> scores[i] = 0.6211337
>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:c>
>> stored/uncompressed,indexed,tokenized<title:ipod ipod ipod>>
>> scores[i] = 0.6085842
>>
>> --
>> Regards,
>> Shalin Shekhar Mangar.
>>
>

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: Custom FieldComparator and incorrect sort order

Reply via email to