This is an automated email from the ASF dual-hosted git repository.

yuzelin pushed a commit to branch release-1.4
in repository https://gitbox.apache.org/repos/asf/paimon.git

commit 2aecba09e60a6396ff4f64f2f79a02dcad99c5c0
Author: xuzifu666 <[email protected]>
AuthorDate: Wed Apr 1 17:16:54 2026 +0800

    [arrow] Reuse length array to avoid allocate array memory repeatedly (#7573)
    
    Note that there are TODO points in ArrowFieldWriters here. The array
    memory was indeed allocated repeatedly. Maybe we can improve here by
    reusing it.
---
 .../paimon/arrow/writer/ArrowFieldWriters.java     | 39 ++++++++++++++--------
 1 file changed, 26 insertions(+), 13 deletions(-)

diff --git 
a/paimon-arrow/src/main/java/org/apache/paimon/arrow/writer/ArrowFieldWriters.java
 
b/paimon-arrow/src/main/java/org/apache/paimon/arrow/writer/ArrowFieldWriters.java
index 7cb64de7fd..2999acdaf6 100644
--- 
a/paimon-arrow/src/main/java/org/apache/paimon/arrow/writer/ArrowFieldWriters.java
+++ 
b/paimon-arrow/src/main/java/org/apache/paimon/arrow/writer/ArrowFieldWriters.java
@@ -638,6 +638,8 @@ public class ArrowFieldWriters {
 
         private int offset;
 
+        private int[] reusableLengths;
+
         public ArrayWriter(
                 FieldVector fieldVector, ArrowFieldWriter elementWriter, 
boolean isNullable) {
             super(fieldVector, isNullable);
@@ -667,8 +669,12 @@ public class ArrowFieldWriters {
             }
 
             // length for arrays in [0, startIndex + batchRows)
-            // TODO: reuse this
-            int[] lengths = new int[lenSize];
+            int[] lengths;
+            if (reusableLengths == null || reusableLengths.length < lenSize) {
+                reusableLengths = new int[lenSize];
+            }
+            lengths = reusableLengths;
+            // Only use the first lenSize elements, reset if needed
             for (int i = 0; i < lenSize; i++) {
                 if (arrayColumnVector.isNullAt(i)) {
                     // null values don't occupy space
@@ -680,7 +686,7 @@ public class ArrowFieldWriters {
             }
 
             ArrayChildWriteInfo arrayChildWriteInfo =
-                    getArrayChildWriteInfo(pickedInColumn, startIndex, 
lengths);
+                    getArrayChildWriteInfo(pickedInColumn, startIndex, 
lengths, lenSize);
             elementWriter.write(
                     arrayColumnVector.getColumnVector(),
                     arrayChildWriteInfo.pickedInColumn,
@@ -809,6 +815,8 @@ public class ArrowFieldWriters {
 
         private int offset;
 
+        private int[] reusableLengths;
+
         public MapWriter(
                 FieldVector fieldVector,
                 ArrowFieldWriter keyWriter,
@@ -843,8 +851,10 @@ public class ArrowFieldWriters {
             }
 
             // length for arrays in [0, startIndex + batchRows)
-            // TODO: reuse this
-            int[] lengths = new int[lenSize];
+            if (reusableLengths == null || reusableLengths.length < lenSize) {
+                reusableLengths = new int[lenSize];
+            }
+            int[] lengths = reusableLengths;
             for (int i = 0; i < lenSize; i++) {
                 if (mapColumnVector.isNullAt(i)) {
                     // null values don't occupy space
@@ -856,7 +866,7 @@ public class ArrowFieldWriters {
             }
 
             ArrayChildWriteInfo arrayChildWriteInfo =
-                    getArrayChildWriteInfo(pickedInColumn, startIndex, 
lengths);
+                    getArrayChildWriteInfo(pickedInColumn, startIndex, 
lengths, lenSize);
             keyWriter.write(
                     mapColumnVector.getChildren()[0],
                     arrayChildWriteInfo.pickedInColumn,
@@ -920,20 +930,23 @@ public class ArrowFieldWriters {
     }
 
     private static ArrayChildWriteInfo getArrayChildWriteInfo(
-            @Nullable int[] pickedInParentColumn, int parentStartIndex, int[] 
parentLengths) {
+            @Nullable int[] pickedInParentColumn,
+            int parentStartIndex,
+            int[] parentLengths,
+            int lenSize) {
         return pickedInParentColumn == null
-                ? getArrayChildWriteInfoWithoutDelete(parentStartIndex, 
parentLengths)
+                ? getArrayChildWriteInfoWithoutDelete(parentStartIndex, 
parentLengths, lenSize)
                 : getArrayChildWriteInfoWithDelete(
-                        pickedInParentColumn, parentStartIndex, parentLengths);
+                        pickedInParentColumn, parentStartIndex, parentLengths, 
lenSize);
     }
 
     private static ArrayChildWriteInfo getArrayChildWriteInfoWithoutDelete(
-            int parentStartIndex, int[] parentLengths) {
+            int parentStartIndex, int[] parentLengths, int lenSize) {
         // the first element index which is to be written
         int firstElementIndex = 0;
         // batchRows of child column vector
         int childBatchRows = 0;
-        for (int i = 0; i < parentLengths.length; i++) {
+        for (int i = 0; i < lenSize; i++) {
             if (i < parentStartIndex) {
                 firstElementIndex += parentLengths[i];
             } else {
@@ -944,14 +957,14 @@ public class ArrowFieldWriters {
     }
 
     private static ArrayChildWriteInfo getArrayChildWriteInfoWithDelete(
-            int[] pickedInParentColumn, int parentStartIndex, int[] 
parentLengths) {
+            int[] pickedInParentColumn, int parentStartIndex, int[] 
parentLengths, int lenSize) {
         // the first element index which is to be written
         int firstElementIndex = 0;
         // objects to calculate child pickedInColumn
         IntArrayList childPicked = new IntArrayList(1024);
         int offset = 0;
         int currentParentPickedIndex = parentStartIndex;
-        for (int i = 0; i < parentLengths.length; i++) {
+        for (int i = 0; i < lenSize; i++) {
             if (i < pickedInParentColumn[parentStartIndex]) {
                 firstElementIndex += parentLengths[i];
                 offset = firstElementIndex;

Reply via email to