This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 6cad835a06 [index] lumina support vector type (#7580)
6cad835a06 is described below
commit 6cad835a06425dba7b689413ab5dc4eb2b46def6
Author: jerry <[email protected]>
AuthorDate: Thu Apr 2 16:40:10 2026 +0800
[index] lumina support vector type (#7580)
---
.../index/LuminaVectorGlobalIndexReader.java | 13 +++-
.../index/LuminaVectorGlobalIndexWriter.java | 32 ++++++--
.../lumina/index/LuminaVectorGlobalIndexTest.java | 87 ++++++++++++++++++++++
3 files changed, 122 insertions(+), 10 deletions(-)
diff --git
a/paimon-lumina/src/main/java/org/apache/paimon/lumina/index/LuminaVectorGlobalIndexReader.java
b/paimon-lumina/src/main/java/org/apache/paimon/lumina/index/LuminaVectorGlobalIndexReader.java
index d8492f0b13..df20e14f85 100644
---
a/paimon-lumina/src/main/java/org/apache/paimon/lumina/index/LuminaVectorGlobalIndexReader.java
+++
b/paimon-lumina/src/main/java/org/apache/paimon/lumina/index/LuminaVectorGlobalIndexReader.java
@@ -29,6 +29,7 @@ import org.apache.paimon.predicate.VectorSearch;
import org.apache.paimon.types.ArrayType;
import org.apache.paimon.types.DataType;
import org.apache.paimon.types.FloatType;
+import org.apache.paimon.types.VectorType;
import org.apache.paimon.utils.IOUtils;
import org.apache.paimon.utils.RoaringNavigableMap64;
@@ -204,10 +205,16 @@ public class LuminaVectorGlobalIndexReader implements
GlobalIndexReader {
throw new IllegalArgumentException(
"Expected float[] vector but got: " + vector.getClass());
}
- if (!(fieldType instanceof ArrayType)
- || !(((ArrayType) fieldType).getElementType() instanceof
FloatType)) {
+ boolean validFieldType = false;
+ if (fieldType instanceof VectorType) {
+ validFieldType = ((VectorType) fieldType).getElementType()
instanceof FloatType;
+ } else if (fieldType instanceof ArrayType) {
+ validFieldType = ((ArrayType) fieldType).getElementType()
instanceof FloatType;
+ }
+ if (!validFieldType) {
throw new IllegalArgumentException(
- "Lumina currently only supports float arrays, but field
type is: " + fieldType);
+ "Lumina requires VectorType<FLOAT> or ArrayType<FLOAT>,
but field type is: "
+ + fieldType);
}
int queryDim = ((float[]) vector).length;
if (queryDim != indexMeta.dim()) {
diff --git
a/paimon-lumina/src/main/java/org/apache/paimon/lumina/index/LuminaVectorGlobalIndexWriter.java
b/paimon-lumina/src/main/java/org/apache/paimon/lumina/index/LuminaVectorGlobalIndexWriter.java
index 9a85fc9c60..16058f5085 100644
---
a/paimon-lumina/src/main/java/org/apache/paimon/lumina/index/LuminaVectorGlobalIndexWriter.java
+++
b/paimon-lumina/src/main/java/org/apache/paimon/lumina/index/LuminaVectorGlobalIndexWriter.java
@@ -19,6 +19,7 @@
package org.apache.paimon.lumina.index;
import org.apache.paimon.data.InternalArray;
+import org.apache.paimon.data.InternalVector;
import org.apache.paimon.fs.PositionOutputStream;
import org.apache.paimon.globalindex.GlobalIndexSingletonWriter;
import org.apache.paimon.globalindex.ResultEntry;
@@ -26,6 +27,7 @@ import org.apache.paimon.globalindex.io.GlobalIndexFileWriter;
import org.apache.paimon.types.ArrayType;
import org.apache.paimon.types.DataType;
import org.apache.paimon.types.FloatType;
+import org.apache.paimon.types.VectorType;
import org.aliyun.lumina.LuminaDataset;
import org.aliyun.lumina.LuminaFileOutput;
@@ -113,15 +115,25 @@ public class LuminaVectorGlobalIndexWriter implements
GlobalIndexSingletonWriter
}
private void validateFieldType(DataType dataType) {
- if (!(dataType instanceof ArrayType)) {
- throw new IllegalArgumentException(
- "Lumina vector index requires ArrayType, but got: " +
dataType);
+ if (dataType instanceof VectorType) {
+ DataType elementType = ((VectorType) dataType).getElementType();
+ if (!(elementType instanceof FloatType)) {
+ throw new IllegalArgumentException(
+ "Lumina vector index requires float vector, but got: "
+ elementType);
+ }
+ return;
}
- DataType elementType = ((ArrayType) dataType).getElementType();
- if (!(elementType instanceof FloatType)) {
- throw new IllegalArgumentException(
- "Lumina vector index requires float array, but got: " +
elementType);
+ if (dataType instanceof ArrayType) {
+ DataType elementType = ((ArrayType) dataType).getElementType();
+ if (!(elementType instanceof FloatType)) {
+ throw new IllegalArgumentException(
+ "Lumina vector index requires float array, but got: "
+ elementType);
+ }
+ return;
}
+ throw new IllegalArgumentException(
+ "Lumina vector index requires VectorType or ArrayType<FLOAT>,
but got: "
+ + dataType);
}
@Override
@@ -141,6 +153,12 @@ public class LuminaVectorGlobalIndexWriter implements
GlobalIndexSingletonWriter
for (int i = 0; i < dim; i++) {
writeBuf.putFloat(vector[i]);
}
+ } else if (fieldData instanceof InternalVector) {
+ InternalVector vector = (InternalVector) fieldData;
+ checkDimension(vector.size());
+ for (int i = 0; i < dim; i++) {
+ writeBuf.putFloat(vector.getFloat(i));
+ }
} else if (fieldData instanceof InternalArray) {
InternalArray array = (InternalArray) fieldData;
checkDimension(array.size());
diff --git
a/paimon-lumina/src/test/java/org/apache/paimon/lumina/index/LuminaVectorGlobalIndexTest.java
b/paimon-lumina/src/test/java/org/apache/paimon/lumina/index/LuminaVectorGlobalIndexTest.java
index c2a6a50677..f690fb7342 100644
---
a/paimon-lumina/src/test/java/org/apache/paimon/lumina/index/LuminaVectorGlobalIndexTest.java
+++
b/paimon-lumina/src/test/java/org/apache/paimon/lumina/index/LuminaVectorGlobalIndexTest.java
@@ -18,6 +18,7 @@
package org.apache.paimon.lumina.index;
+import org.apache.paimon.data.BinaryVector;
import org.apache.paimon.fs.FileIO;
import org.apache.paimon.fs.Path;
import org.apache.paimon.fs.PositionOutputStream;
@@ -31,6 +32,8 @@ import org.apache.paimon.predicate.VectorSearch;
import org.apache.paimon.types.ArrayType;
import org.apache.paimon.types.DataType;
import org.apache.paimon.types.FloatType;
+import org.apache.paimon.types.IntType;
+import org.apache.paimon.types.VectorType;
import org.apache.paimon.utils.RoaringNavigableMap64;
import org.aliyun.lumina.Lumina;
@@ -424,6 +427,90 @@ public class LuminaVectorGlobalIndexTest {
}
}
+ @Test
+ public void testVectorTypeEndToEnd() throws IOException {
+ int dimension = 2;
+ Options options = createDefaultOptions(dimension);
+ DataType vecFieldType = new VectorType(dimension, new FloatType());
+
+ float[][] vectors =
+ new float[][] {
+ new float[] {1.0f, 0.0f},
+ new float[] {0.95f, 0.1f},
+ new float[] {0.1f, 0.95f},
+ new float[] {0.98f, 0.05f},
+ new float[] {0.0f, 1.0f},
+ new float[] {0.05f, 0.98f}
+ };
+
+ Path vecIndexPath = new Path(indexPath, "vector_type");
+ GlobalIndexFileWriter fileWriter = createFileWriter(vecIndexPath);
+ LuminaVectorIndexOptions indexOptions = new
LuminaVectorIndexOptions(options);
+ LuminaVectorGlobalIndexWriter writer =
+ new LuminaVectorGlobalIndexWriter(fileWriter, vecFieldType,
indexOptions);
+
+ // Write using BinaryVector (InternalVector)
+ for (float[] vec : vectors) {
+ writer.write(BinaryVector.fromPrimitiveArray(vec));
+ }
+
+ List<ResultEntry> results = writer.finish();
+ List<GlobalIndexIOMeta> metas = toIOMetas(results, vecIndexPath);
+
+ GlobalIndexFileReader fileReader = createFileReader(vecIndexPath);
+ try (LuminaVectorGlobalIndexReader reader =
+ new LuminaVectorGlobalIndexReader(fileReader, metas,
vecFieldType, indexOptions)) {
+ VectorSearch vectorSearch = new VectorSearch(vectors[0], 3,
fieldName);
+ LuminaScoredGlobalIndexResult result =
+ (LuminaScoredGlobalIndexResult)
reader.visitVectorSearch(vectorSearch).get();
+ assertThat(result.results().getLongCardinality()).isEqualTo(3);
+ assertThat(result.results().contains(0L)).isTrue();
+ assertThat(result.results().contains(3L)).isTrue();
+ }
+ }
+
+ @Test
+ public void testVectorTypeWithFloatArrayWrite() throws IOException {
+ int dimension = 2;
+ Options options = createDefaultOptions(dimension);
+ DataType vecFieldType = new VectorType(dimension, new FloatType());
+
+ Path vecIndexPath = new Path(indexPath, "vector_type_float");
+ GlobalIndexFileWriter fileWriter = createFileWriter(vecIndexPath);
+ LuminaVectorIndexOptions indexOptions = new
LuminaVectorIndexOptions(options);
+ LuminaVectorGlobalIndexWriter writer =
+ new LuminaVectorGlobalIndexWriter(fileWriter, vecFieldType,
indexOptions);
+
+ // Write using raw float[] with VectorType field type
+ float[][] vectors =
+ new float[][] {
+ new float[] {1.0f, 0.0f},
+ new float[] {0.0f, 1.0f},
+ new float[] {0.7f, 0.7f}
+ };
+ for (float[] vec : vectors) {
+ writer.write(vec);
+ }
+
+ List<ResultEntry> results = writer.finish();
+ assertThat(results).hasSize(1);
+ }
+
+ @Test
+ public void testVectorTypeRejectsNonFloatElement() {
+ DataType intVecType = new VectorType(2, new IntType());
+ Options options = createDefaultOptions(2);
+ LuminaVectorIndexOptions indexOptions = new
LuminaVectorIndexOptions(options);
+ GlobalIndexFileWriter fileWriter = createFileWriter(indexPath);
+
+ assertThatThrownBy(
+ () ->
+ new LuminaVectorGlobalIndexWriter(
+ fileWriter, intVecType, indexOptions))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("float vector");
+ }
+
private Options createDefaultOptions(int dimension) {
Options options = new Options();
options.setInteger(LuminaVectorIndexOptions.DIMENSION.key(),
dimension);