Pulkitg64 commented on code in PR #13401:
URL: https://github.com/apache/lucene/pull/13401#discussion_r1613024700
##########
lucene/core/src/java/org/apache/lucene/index/VectorSimilarityFunction.java:
##########
@@ -16,104 +16,73 @@
*/
package org.apache.lucene.index;
-import static org.apache.lucene.util.VectorUtil.cosine;
-import static org.apache.lucene.util.VectorUtil.dotProduct;
-import static org.apache.lucene.util.VectorUtil.dotProductScore;
-import static org.apache.lucene.util.VectorUtil.scaleMaxInnerProductScore;
-import static org.apache.lucene.util.VectorUtil.squareDistance;
+import org.apache.lucene.util.NamedSPILoader;
/**
* Vector similarity function; used in search to return top K most similar
vectors to a target
- * vector. This is a label describing the method used during indexing and
searching of the vectors
- * in order to determine the nearest neighbors.
+ * vector.
*/
-public enum VectorSimilarityFunction {
+public abstract class VectorSimilarityFunction implements
NamedSPILoader.NamedSPI {
- /** Euclidean distance */
- EUCLIDEAN {
- @Override
- public float compare(float[] v1, float[] v2) {
- return 1 / (1 + squareDistance(v1, v2));
- }
-
- @Override
- public float compare(byte[] v1, byte[] v2) {
- return 1 / (1f + squareDistance(v1, v2));
- }
- },
+ private static class Holder {
+ private static final NamedSPILoader<VectorSimilarityFunction> LOADER =
+ new NamedSPILoader<>(VectorSimilarityFunction.class);
- /**
- * Dot product. NOTE: this similarity is intended as an optimized way to
perform cosine
- * similarity. In order to use it, all vectors must be normalized, including
both document and
- * query vectors. Using dot product with vectors that are not normalized can
result in errors or
- * poor search results. Floating point vectors must be normalized to be of
unit length, while byte
- * vectors should simply all have the same norm.
- */
- DOT_PRODUCT {
- @Override
- public float compare(float[] v1, float[] v2) {
- return Math.max((1 + dotProduct(v1, v2)) / 2, 0);
+ static NamedSPILoader<VectorSimilarityFunction> getLoader() {
+ if (LOADER == null) {
+ throw new IllegalStateException();
Review Comment:
Done
##########
lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java:
##########
@@ -19,18 +19,35 @@
import java.io.Closeable;
import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FloatVectorValues;
+import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.search.KnnCollector;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TotalHits;
+import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.Bits;
/** Reads vectors from an index. */
public abstract class KnnVectorsReader implements Closeable {
+ /**
+ * SIMILAIRTY_FUNCTION_MAP containing hardcoded mapping for ordinal to
vectorSimilarityFunction
+ * name
+ */
+ public static final Map<Integer, String> SIMILARITY_FUNCTIONS_MAP = new
HashMap<>();
Review Comment:
Done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]