cpoerschke commented on code in PR #1435: URL: https://github.com/apache/solr/pull/1435#discussion_r1136807166
########## solr/core/src/java/org/apache/solr/schema/DenseVectorField.java: ########## @@ -46,19 +50,17 @@ */ public class DenseVectorField extends FloatPointField { public static final String HNSW_ALGORITHM = "hnsw"; - public static final String DEFAULT_KNN_ALGORITHM = HNSW_ALGORITHM; static final String KNN_VECTOR_DIMENSION = "vectorDimension"; static final String KNN_SIMILARITY_FUNCTION = "similarityFunction"; - static final String KNN_ALGORITHM = "knnAlgorithm"; static final String HNSW_MAX_CONNECTIONS = "hnswMaxConnections"; static final String HNSW_BEAM_WIDTH = "hnswBeamWidth"; - + static final String VECTOR_ENCODING = "vectorEncoding"; + private final VectorEncoding DEFAULT_VECTOR_ENCODING = VectorEncoding.FLOAT32; + private final VectorSimilarityFunction DEFAULT_SIMILARITY = VectorSimilarityFunction.EUCLIDEAN; Review Comment: minor: keep `DEFAULT_SIMILARITY` at its current location adjacent to `similarityFunction` ########## solr/core/src/java/org/apache/solr/schema/DenseVectorField.java: ########## @@ -106,19 +114,22 @@ public void init(IndexSchema schema, Map<String, String> args) { args.remove(KNN_SIMILARITY_FUNCTION); this.knnAlgorithm = args.getOrDefault(KNN_ALGORITHM, DEFAULT_KNN_ALGORITHM); - args.remove(KNN_ALGORITHM); + this.vectorEncoding = + ofNullable(args.get(VECTOR_ENCODING)) + .map(value -> VectorEncoding.valueOf(value.toUpperCase(Locale.ROOT))) + .orElse(DEFAULT_VECTOR_ENCODING); + ; + + args.remove(VECTOR_ENCODING); Review Comment: minor (consistency with existing style) ```suggestion .orElse(DEFAULT_VECTOR_ENCODING); args.remove(VECTOR_ENCODING); ``` ########## solr/core/src/java/org/apache/solr/schema/DenseVectorField.java: ########## @@ -179,24 +206,37 @@ public List<IndexableField> createFields(SchemaField field, Object value) { + "', expected format:'[f1, f2, f3...fn]' e.g. [1.0, 3.4, 5.6]", Review Comment: subjective: if byte encoding is used might it be confusing to see floats in the example here? ```suggestion + "', expected format:'[f1, f2, f3...fn]'", ``` ########## solr/core/src/java/org/apache/solr/schema/DenseVectorField.java: ########## @@ -165,10 +180,22 @@ public void checkSchemaField(final SchemaField field) throws SolrException { @Override public List<IndexableField> createFields(SchemaField field, Object value) { - ArrayList<IndexableField> fields = new ArrayList<>(); - float[] parsedVector; try { - parsedVector = parseVector(value); + ArrayList<IndexableField> fields = new ArrayList<>(); + VectorValue vectorValue = new VectorValue(value); + if (field.indexed()) { + fields.add(createField(field, vectorValue)); + } + if (field.stored()) { + if (vectorEncoding.equals(VectorEncoding.FLOAT32)) { + for (float vectorElement : vectorValue.getFloatVector()) { + fields.add(getStoredField(field, vectorElement)); + } + } else { + fields.add(new StoredField(field.getName(), vectorValue.getByteVector())); + } Review Comment: ```suggestion } else if (vectorEncoding.equals(VectorEncoding.BYTE)) { fields.add(new StoredField(field.getName(), vectorValue.getByteVector())); } else { should not happen, throw something (mentioning the unsupported vectorEncoding value) } ``` ########## solr/core/src/java/org/apache/solr/schema/DenseVectorField.java: ########## @@ -179,24 +206,37 @@ public List<IndexableField> createFields(SchemaField field, Object value) { + "', expected format:'[f1, f2, f3...fn]' e.g. [1.0, 3.4, 5.6]", e); } + } - if (field.indexed()) { - fields.add(createField(field, parsedVector)); - } - if (field.stored()) { - fields.ensureCapacity(parsedVector.length + 1); - for (float vectorElement : parsedVector) { - fields.add(getStoredField(field, vectorElement)); - } + @Override + public IndexableField createField(SchemaField field, Object vectorValue) { + if (vectorValue == null) return null; + VectorValue typedVectorValue = (VectorValue) vectorValue; + if (vectorEncoding.equals(VectorEncoding.BYTE)) { + return new KnnVectorField( + field.getName(), typedVectorValue.getByteVector(), similarityFunction); + } else { + return new KnnVectorField( + field.getName(), typedVectorValue.getFloatVector(), similarityFunction); Review Comment: ```suggestion } else if (vectorEncoding.equals(VectorEncoding.FLOAT31)) { return new KnnVectorField( field.getName(), typedVectorValue.getFloatVector(), similarityFunction); } else { return null; // or throw something ``` ########## solr/core/src/java/org/apache/solr/schema/DenseVectorField.java: ########## @@ -179,24 +206,37 @@ public List<IndexableField> createFields(SchemaField field, Object value) { + "', expected format:'[f1, f2, f3...fn]' e.g. [1.0, 3.4, 5.6]", e); } + } - if (field.indexed()) { - fields.add(createField(field, parsedVector)); - } - if (field.stored()) { - fields.ensureCapacity(parsedVector.length + 1); - for (float vectorElement : parsedVector) { - fields.add(getStoredField(field, vectorElement)); - } + @Override + public IndexableField createField(SchemaField field, Object vectorValue) { + if (vectorValue == null) return null; + VectorValue typedVectorValue = (VectorValue) vectorValue; + if (vectorEncoding.equals(VectorEncoding.BYTE)) { + return new KnnVectorField( + field.getName(), typedVectorValue.getByteVector(), similarityFunction); + } else { + return new KnnVectorField( + field.getName(), typedVectorValue.getFloatVector(), similarityFunction); } - return fields; } @Override - public IndexableField createField(SchemaField field, Object parsedVector) { - if (parsedVector == null) return null; - float[] typedVector = (float[]) parsedVector; - return new KnnVectorField(field.getName(), typedVector, similarityFunction); + public Object toObject(IndexableField f) { + if (vectorEncoding.equals(VectorEncoding.BYTE)) { + BytesRef bytesRef = f.binaryValue(); + if (bytesRef != null) { + List<Number> ret = new ArrayList<>(); Review Comment: Wondering if the required capacity of the array is knowable at this point e.g. based on dimension maybe? ```suggestion List<Number> ret = new ArrayList<>(dimension); ``` ########## solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java: ########## @@ -458,4 +495,128 @@ public void query_functionQueryUsage_shouldThrowException() throws Exception { deleteCore(); } } + + @Test + public void denseVectorField_shouldBePresentAfterAtomicUpdate() throws Exception { + try { + initCore("solrconfig.xml", "schema-densevector.xml"); + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", "0"); + doc.addField("vector", Arrays.asList(1.1, 2.2, 3.3, 4.4)); + doc.addField("vector_byte_encoding", Arrays.asList(5.5, 6.6, 7.7, 8.8)); + doc.addField("string_field", "test"); + + assertU(adoc(doc)); + assertU(commit()); + + assertJQ( + req("q", "id:0", "fl", "*"), + "/response/docs/[0]/vector==[1.1,2.2,3.3,4.4]", + "/response/docs/[0]/vector_byte_encoding==[5,6,7,8]", + "/response/docs/[0]/string_field==test"); + + SolrInputDocument updateDoc = new SolrInputDocument(); + updateDoc.addField("id", "0"); + updateDoc.addField("string_field", ImmutableMap.of("set", "other test")); + assertU(adoc(updateDoc)); + assertU(commit()); + + assertJQ( + req("q", "id:0", "fl", "*"), + "/response/docs/[0]/vector==[1.1,2.2,3.3,4.4]", + "/response/docs/[0]/vector_byte_encoding==[5,6,7,8]", + "/response/docs/[0]/string_field=='other test'"); + + } finally { + deleteCore(); + } + } + + @Test + public void denseVectorFieldOnAtomicUpdate_shouldBeUpdatedCorrectly() throws Exception { + try { + initCore("solrconfig.xml", "schema-densevector.xml"); + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", "0"); + doc.addField("vector", Arrays.asList(1.1, 2.2, 3.3, 4.4)); + doc.addField("vector_byte_encoding", Arrays.asList(5.5, 6.6, 7.7, 8.8)); Review Comment: ```suggestion doc.addField("vector_byte_encoding", Arrays.asList(5, 6, 7, 8)); ``` ########## solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java: ########## @@ -458,4 +495,128 @@ public void query_functionQueryUsage_shouldThrowException() throws Exception { deleteCore(); } } + + @Test + public void denseVectorField_shouldBePresentAfterAtomicUpdate() throws Exception { + try { + initCore("solrconfig.xml", "schema-densevector.xml"); + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", "0"); + doc.addField("vector", Arrays.asList(1.1, 2.2, 3.3, 4.4)); + doc.addField("vector_byte_encoding", Arrays.asList(5.5, 6.6, 7.7, 8.8)); Review Comment: subjective: decimals removal already tested elsewhere? ```suggestion doc.addField("vector_byte_encoding", Arrays.asList(5, 6, 7, 8)); ``` ########## solr/core/src/test-files/solr/collection1/conf/schema-densevector.xml: ########## @@ -20,11 +20,19 @@ <schema name="schema-densevector" version="1.0"> <fieldType name="string" class="solr.StrField" multiValued="true"/> - <fieldType name="knn_vector" class="solr.DenseVectorField" vectorDimension="4" similarityFunction="cosine"/> - + <fieldType name="knn_vector" class="solr.DenseVectorField" vectorDimension="4" similarityFunction="cosine" /> + <fieldType name="plong" class="solr.LongPointField" useDocValuesAsStored="false"/> + + <fieldType name="knn_vector_byte_encoding" class="solr.DenseVectorField" vectorDimension="4" similarityFunction="cosine" vectorEncoding="BYTE"/> + <fieldType name="knn_vector_esplicit_float32_encoding" class="solr.DenseVectorField" vectorDimension="4" similarityFunction="cosine" vectorEncoding="FLOAT32"/> Review Comment: ```suggestion <fieldType name="knn_vector_explicit_float32_encoding" class="solr.DenseVectorField" vectorDimension="4" similarityFunction="cosine" vectorEncoding="FLOAT32"/> ``` or it seems to be unused field type actually? ```suggestion ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For additional commands, e-mail: issues-h...@solr.apache.org