Hisoka-X commented on code in PR #9783:
URL: https://github.com/apache/seatunnel/pull/9783#discussion_r2311742376


##########
seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/functions/VectorFunction.java:
##########
@@ -199,4 +201,194 @@ private static Float[] convertToFloatArray(Object obj) {
                     String.format("Unsupported vector type: %s", 
obj.getClass().getName()));
         }
     }
+
+    /** Truncate vector to target dimension Usage: VECTOR_REDUCE(embedding, 
256, 'TRUNCATE') */
+    public static Object vectorTruncate(Object vectorData, Integer 
targetDimension) {
+        if (vectorData == null || targetDimension == null) {
+            return null;
+        }
+
+        Float[] sourceVector = extractFloatArray(vectorData);
+        if (sourceVector.length <= targetDimension) {
+            return vectorData; // No need to truncate
+        }
+
+        Float[] result = new Float[targetDimension];
+        System.arraycopy(sourceVector, 0, result, 0, targetDimension);
+        return VectorUtils.toByteBuffer(result);
+    }
+
+    /**
+     * Random projection for dimension reduction Usage: 
VECTOR_REDUCE(embedding, 128,
+     * 'RANDOM_PROJECTION')
+     */
+    public static Object vectorRandomProjection(Object vectorData, Integer 
targetDimension) {
+        if (vectorData == null || targetDimension == null) {
+            return null;
+        }
+
+        Float[] sourceVector = extractFloatArray(vectorData);
+        if (sourceVector.length <= targetDimension) {
+            return vectorData; // No need to reduce
+        }
+
+        float[][] projectionMatrix =
+                createGaussianProjectionMatrix(sourceVector.length, 
targetDimension);
+        Float[] result = applyProjection(sourceVector, projectionMatrix, 
targetDimension);
+        return VectorUtils.toByteBuffer(result);
+    }
+
+    /**
+     * Sparse random projection for dimension reduction Usage: 
VECTOR_REDUCE(embedding, 64,
+     * 'SPARSE_RANDOM_PROJECTION')
+     */
+    public static Object vectorSparseProjection(Object vectorData, Integer 
targetDimension) {
+        if (vectorData == null || targetDimension == null) {
+            return null;
+        }
+
+        Float[] sourceVector = extractFloatArray(vectorData);
+        if (sourceVector.length <= targetDimension) {
+            return vectorData; // No need to reduce
+        }
+
+        float[][] projectionMatrix =
+                createSparseProjectionMatrix(sourceVector.length, 
targetDimension);
+        Float[] result = applyProjection(sourceVector, projectionMatrix, 
targetDimension);
+        return VectorUtils.toByteBuffer(result);
+    }
+
+    /**
+     * Generic vector dimension reduction function Usage: 
VECTOR_REDUCE(vector_field,
+     * target_dimension, method) method: 'TRUNCATE', 'RANDOM_PROJECTION', 
'SPARSE_RANDOM_PROJECTION'
+     */
+    public static Object vectorReduce(Object vectorData, Integer 
targetDimension, String method) {
+        if (vectorData == null || targetDimension == null || method == null) {
+            return null;
+        }
+
+        switch (method.toUpperCase()) {
+            case "TRUNCATE":
+                return vectorTruncate(vectorData, targetDimension);
+            case "RANDOM_PROJECTION":
+                return vectorRandomProjection(vectorData, targetDimension);
+            case "SPARSE_RANDOM_PROJECTION":
+                return vectorSparseProjection(vectorData, targetDimension);
+            default:
+                throw new IllegalArgumentException("Unknown reduction method: 
" + method);
+        }
+    }
+
+    /** Normalize vector to unit length Usage: VECTOR_NORMALIZE(vector_field) 
*/
+    public static Object vectorNormalize(Object vectorData) {
+        if (vectorData == null) {
+            return null;
+        }
+
+        Float[] vector = extractFloatArray(vectorData);
+        double magnitude = 0.0;
+        for (Float value : vector) {
+            if (value != null) {
+                magnitude += value * value;
+            }
+        }
+        magnitude = Math.sqrt(magnitude);
+
+        if (magnitude == 0.0) {
+            return vectorData; // Return original if zero vector
+        }
+
+        Float[] normalized = new Float[vector.length];
+        for (int i = 0; i < vector.length; i++) {
+            normalized[i] = vector[i] == null ? null : (float) (vector[i] / 
magnitude);
+        }
+
+        return VectorUtils.toByteBuffer(normalized);
+    }
+
+    // Helper methods
+
+    private static Float[] extractFloatArray(Object vectorData) {

Review Comment:
   Let's reuse 
https://github.com/apache/seatunnel/blob/35f41a7baf6f359d635ab68c81f2d6d745bef5a6/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/functions/VectorFunction.java#L182



##########
seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLFunction.java:
##########
@@ -212,6 +212,10 @@ public class ZetaSQLFunction {
     public static final String VECTOR_NORM = "VECTOR_NORM";
     public static final String INNER_PRODUCT = "INNER_PRODUCT";
 
+    // -------------------------vector functions----------------------------
+    public static final String VECTOR_REDUCE = "VECTOR_REDUCE";

Review Comment:
   ```suggestion
       public static final String VECTOR_REDUCE = "VECTOR_REDUCE";
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to