tomglk commented on a change in pull request #166: URL: https://github.com/apache/solr/pull/166#discussion_r647722195
########## File path: solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/PrefetchingFieldValueFeature.java ########## @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.ltr.feature; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.SolrDocumentFetcher; +import org.apache.solr.search.SolrIndexSearcher; + +import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; + +/** + * This feature returns the value of a field in the current document. + * The field must have stored="true" or docValues="true" properties. + * Example configuration: + * <pre>{ + "name": "rawHits", + "class": "org.apache.solr.ltr.feature.FieldValueFeature", + "params": { + "field": "hits" + } +}</pre> + */ +public class PrefetchingFieldValueFeature extends FieldValueFeature { + // used to store all fields from all PrefetchingFieldValueFeatures + private Set<String> prefetchFields; + + public void setField(String field) { + this.field = field; + } + + public void setPrefetchFields(Set<String> fields) { + prefetchFields = fields; + } + + @Override + public LinkedHashMap<String,Object> paramsToMap() { + final LinkedHashMap<String,Object> params = defaultParamsToMap(); + params.put("field", field); + return params; + } + + @Override + protected void validate() throws FeatureException { + if (field == null || field.isEmpty()) { + throw new FeatureException(getClass().getSimpleName()+ + ": field must be provided"); + } + } + + public PrefetchingFieldValueFeature(String name, Map<String,Object> params) { + super(name, params); + } + + @Override + public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) + throws IOException { + return new PrefetchingFieldValueFeatureWeight(searcher, request, originalQuery, efi); + } + + @VisibleForTesting + public Set<String> getPrefetchFields(){ + return prefetchFields; + } + + public class PrefetchingFieldValueFeatureWeight extends FieldValueFeatureWeight { + private final SchemaField schemaField; + private final SolrDocumentFetcher docFetcher; + + public PrefetchingFieldValueFeatureWeight(IndexSearcher searcher, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) { + super(searcher, request, originalQuery, efi); + if (searcher instanceof SolrIndexSearcher) { + schemaField = ((SolrIndexSearcher) searcher).getSchema().getFieldOrNull(field); + } else { + schemaField = null; + } + this.docFetcher = request.getSearcher().getDocFetcher(); + } + + /** + * Return a FeatureScorer that work with stored fields and makes use of the cache if the configured field is stored + * and has no docValues. + * Otherwise, delegate the work to the FieldValueFeature. + * + * @param context the segment this FeatureScorer is working with + * @return FeatureScorer for the current segment and field + * @throws IOException as defined by abstract class Feature + */ + @Override + public FeatureScorer scorer(LeafReaderContext context) throws IOException { + if (schemaField != null && !schemaField.stored() && schemaField.hasDocValues()) { + return super.scorer(context); + } + return new PrefetchingFieldValueFeatureScorer(this, context, + DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS)); + } + + /** + * A FeatureScorer that reads the stored value for a field + * docFetcher does not request a single field but all the prefetchFields to improve performance through caching + */ + public class PrefetchingFieldValueFeatureScorer extends FieldValueFeatureScorer { + + public PrefetchingFieldValueFeatureScorer(FeatureWeight weight, + LeafReaderContext context, DocIdSetIterator itr) { + super(weight, context, itr); + } + + @Override + public float score() throws IOException { + try { + final Document document = docFetcher.doc(context.docBase + itr.docID(), prefetchFields); + + return super.parseStoredFieldValue(document.getField(field)); + } catch (final IOException e) { + throw new FeatureException( + e.toString() + ": " + + "Unable to extract feature for " + + name, e); Review comment: Small update on that topic: I was looking into possible exceptions that could be thrown and tried to debug whether the order of the fields is equal to the order that they are read in from the index. Based on this: ```java public void visitDocument(int docID, StoredFieldVisitor visitor) throws IOException { final Lucene90CompressingStoredFieldsReader.SerializedDocument doc = document(docID); for (int fieldIDX = 0; fieldIDX < doc.numStoredFields; fieldIDX++) { final long infoAndBits = doc.in.readVLong(); final int fieldNumber = (int) (infoAndBits >>> TYPE_BITS); final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber); ... } ``` it seems to me like it only depends on the fieldNumber. I assume that your intention behind the ordered set is, that if the user wanted to prefetch fields `A, B, C, D` and an exception regarding field `C` would happen, they would know that `A` and `B` worked. Did I understand you correctly? The IOException seems to be thrown quite deep in the code, so I am not sure whether we can get useful information from there. I had the idea of adding a toggle that the user could use to disable the prefetching (which would result in the same behavior the `FieldValueFeature` has). This way the user can narrow down the problematic field. ( 33efe62 ) What do you think about that approach? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For additional commands, e-mail: issues-h...@solr.apache.org