cpoerschke commented on a change in pull request #123: URL: https://github.com/apache/solr/pull/123#discussion_r632769663
########## File path: solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java ########## @@ -57,50 +67,79 @@ public void setField(String field) { } @Override - public LinkedHashMap<String,Object> paramsToMap() { - final LinkedHashMap<String,Object> params = defaultParamsToMap(); + public LinkedHashMap<String, Object> paramsToMap() { + final LinkedHashMap<String, Object> params = defaultParamsToMap(); params.put("field", field); return params; } @Override protected void validate() throws FeatureException { if (field == null || field.isEmpty()) { - throw new FeatureException(getClass().getSimpleName()+ - ": field must be provided"); + throw new FeatureException(getClass().getSimpleName() + ": field must be provided"); } } - public FieldValueFeature(String name, Map<String,Object> params) { + public FieldValueFeature(String name, Map<String, Object> params) { super(name, params); } @Override - public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, - SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) - throws IOException { + public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, SolrQueryRequest request, + Query originalQuery, Map<String, String[]> efi) throws IOException { return new FieldValueFeatureWeight(searcher, request, originalQuery, efi); } public class FieldValueFeatureWeight extends FeatureWeight { + private final SchemaField schemaField; public FieldValueFeatureWeight(IndexSearcher searcher, - SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) { + SolrQueryRequest request, Query originalQuery, Map<String, String[]> efi) { super(FieldValueFeature.this, searcher, request, originalQuery, efi); + if (searcher instanceof SolrIndexSearcher) { + schemaField = ((SolrIndexSearcher) searcher).getSchema().getFieldOrNull(field); + } else { + schemaField = null; + } } + /** + * Return a FeatureScorer that uses docValues or storedFields if no docValues are present + * + * @param context the segment this FeatureScorer is working with + * @return FeatureScorer for the current segment and field + * @throws IOException as defined by abstract class Feature + */ @Override public FeatureScorer scorer(LeafReaderContext context) throws IOException { + if (schemaField != null && !schemaField.stored() && schemaField.hasDocValues()) { + + FieldInfo fieldInfo = context.reader().getFieldInfos().fieldInfo(field); + DocValuesType docValuesType = fieldInfo != null ? fieldInfo.getDocValuesType() : DocValuesType.NONE; + + if (DocValuesType.NUMERIC.equals(docValuesType)) { + return new NumericDocValuesFieldValueFeatureScorer(this, context, + DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), schemaField.getType().getNumberType()); + } else if (DocValuesType.SORTED.equals(docValuesType)) { + return new SortedDocValuesFieldValueFeatureScorer(this, context, + DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS)); + // If type is NONE, this segment has no docs with this field. That's not a problem, because we won't call score() anyway Review comment: Have added a commit to split the dual purpose `DocValuesFieldValueFeatureScorer` into two, I think it helps reduce type casting and this-or-that choices (and the class members required to make those choices) in the DocValuesFieldValueFeatureScorer implementation, what do you think? ########## File path: solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml ########## @@ -18,13 +18,22 @@ <schema name="example" version="1.5"> <fields> - <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" /> + <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false"/> + <field name="finalScore" type="string" indexed="true" stored="true" multiValued="false"/> + <field name="finalScoreFloat" type="float" indexed="true" stored="true" multiValued="false"/> + <field name="field" type="text_general" indexed="true" stored="false" multiValued="false"/> <field name="title" type="text_general" indexed="true" stored="true"/> <field name="description" type="text_general" indexed="true" stored="true"/> <field name="keywords" type="text_general" indexed="true" stored="true" multiValued="true"/> <field name="popularity" type="int" indexed="true" stored="true" /> Review comment: Added a commit with additional `TestFieldValueFeature` coverage, for the numeric fields only so far i.e. `dvStrNumPopularity` and `dvStrBoolPopularity` not yet. ########## File path: solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml ########## @@ -38,6 +51,11 @@ <copyField source="title" dest="text"/> <copyField source="description" dest="text"/> + <copyField source="popularity" dest="dvIntPopularity"/> + <copyField source="popularity" dest="dvLongPopularity"/> + <copyField source="popularity" dest="dvFloatPopularity"/> + <copyField source="popularity" dest="dvDoublePopularity"/> Review comment: observation: `copyField` here works for the `dv*Popularity` fields but the `dv(Int|Long|Float|Double)Field` fields cannot use it since the defaulting logic and the popularity/10 logic is happening there. ########## File path: solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java ########## @@ -70,86 +78,92 @@ public void after() throws Exception { @Test public void testRanking() throws Exception { - - final SolrQuery query = new SolrQuery(); - query.setQuery("title:w1"); - query.add("fl", "*, score"); - query.add("rows", "4"); - - // Normal term match - assertJQ("/query" + query.toQueryString(), "/response/numFound/==4"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='8'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='7'"); - - query.add("rq", "{!ltr model=popularity-model reRankDocs=4}"); - - assertJQ("/query" + query.toQueryString(), "/response/numFound/==4"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='1'"); - - query.setQuery("*:*"); - query.remove("rows"); - query.add("rows", "8"); - query.remove("rq"); - query.add("rq", "{!ltr model=popularity-model reRankDocs=8}"); - - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='5'"); + for (String field : FIELD_NAMES) { + + final SolrQuery query = new SolrQuery(); + query.setQuery("title:w1"); + query.add("fl", "*, score"); + query.add("rows", "4"); + + // Normal term match + assertJQ("/query" + query.toQueryString(), "/response/numFound/==4"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='8'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='7'"); + + query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}"); + + assertJQ("/query" + query.toQueryString(), "/response/numFound/==4"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='1'"); + + query.setQuery("*:*"); + query.remove("rows"); + query.add("rows", "8"); + query.remove("rq"); + query.add("rq", "{!ltr model="+field+"-model reRankDocs=8}"); + + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='5'"); + } } @Test public void testIfADocumentDoesntHaveAFieldDefaultValueIsReturned() throws Exception { - SolrQuery query = new SolrQuery(); - query.setQuery("id:42"); - query.add("fl", "*, score"); - query.add("rows", "4"); - - assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'"); - query = new SolrQuery(); - query.setQuery("id:42"); - query.add("rq", "{!ltr model=popularity-model reRankDocs=4}"); - query.add("fl", "[fv]"); - assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); - assertJQ("/query" + query.toQueryString(), - "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("popularity",Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"); - + for (String field : FIELD_NAMES) { + SolrQuery query = new SolrQuery(); + query.setQuery("id:42"); + query.add("fl", "*, score"); + query.add("rows", "4"); + + assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'"); + query = new SolrQuery(); + query.setQuery("id:42"); + query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}"); + query.add("fl", "[fv]"); + assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); + assertJQ("/query" + query.toQueryString(), + "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field,Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"); Review comment: **This test fails here on the second and subsequent run of the loop.** I don't know yet if that is a test artifact or an actual code issue. The `testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned` test does not have the same behaviour since each loop turn uses a different `f(feature )store` (and its own model) whereas this test uses the test-wide models and its default feature store. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For additional commands, e-mail: issues-h...@solr.apache.org