This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 779c841067 [arrow] Reuse length array to avoid allocate array memory
repeatedly (#7573)
779c841067 is described below
commit 779c841067df0d55c77cec8679ee6105ded7bb37
Author: xuzifu666 <[email protected]>
AuthorDate: Wed Apr 1 17:16:54 2026 +0800
[arrow] Reuse length array to avoid allocate array memory repeatedly (#7573)
Note that there are TODO points in ArrowFieldWriters here. The array
memory was indeed allocated repeatedly. Maybe we can improve here by
reusing it.
---
.../paimon/arrow/writer/ArrowFieldWriters.java | 39 ++++++++++++++--------
1 file changed, 26 insertions(+), 13 deletions(-)
diff --git
a/paimon-arrow/src/main/java/org/apache/paimon/arrow/writer/ArrowFieldWriters.java
b/paimon-arrow/src/main/java/org/apache/paimon/arrow/writer/ArrowFieldWriters.java
index 7cb64de7fd..2999acdaf6 100644
---
a/paimon-arrow/src/main/java/org/apache/paimon/arrow/writer/ArrowFieldWriters.java
+++
b/paimon-arrow/src/main/java/org/apache/paimon/arrow/writer/ArrowFieldWriters.java
@@ -638,6 +638,8 @@ public class ArrowFieldWriters {
private int offset;
+ private int[] reusableLengths;
+
public ArrayWriter(
FieldVector fieldVector, ArrowFieldWriter elementWriter,
boolean isNullable) {
super(fieldVector, isNullable);
@@ -667,8 +669,12 @@ public class ArrowFieldWriters {
}
// length for arrays in [0, startIndex + batchRows)
- // TODO: reuse this
- int[] lengths = new int[lenSize];
+ int[] lengths;
+ if (reusableLengths == null || reusableLengths.length < lenSize) {
+ reusableLengths = new int[lenSize];
+ }
+ lengths = reusableLengths;
+ // Only use the first lenSize elements, reset if needed
for (int i = 0; i < lenSize; i++) {
if (arrayColumnVector.isNullAt(i)) {
// null values don't occupy space
@@ -680,7 +686,7 @@ public class ArrowFieldWriters {
}
ArrayChildWriteInfo arrayChildWriteInfo =
- getArrayChildWriteInfo(pickedInColumn, startIndex,
lengths);
+ getArrayChildWriteInfo(pickedInColumn, startIndex,
lengths, lenSize);
elementWriter.write(
arrayColumnVector.getColumnVector(),
arrayChildWriteInfo.pickedInColumn,
@@ -809,6 +815,8 @@ public class ArrowFieldWriters {
private int offset;
+ private int[] reusableLengths;
+
public MapWriter(
FieldVector fieldVector,
ArrowFieldWriter keyWriter,
@@ -843,8 +851,10 @@ public class ArrowFieldWriters {
}
// length for arrays in [0, startIndex + batchRows)
- // TODO: reuse this
- int[] lengths = new int[lenSize];
+ if (reusableLengths == null || reusableLengths.length < lenSize) {
+ reusableLengths = new int[lenSize];
+ }
+ int[] lengths = reusableLengths;
for (int i = 0; i < lenSize; i++) {
if (mapColumnVector.isNullAt(i)) {
// null values don't occupy space
@@ -856,7 +866,7 @@ public class ArrowFieldWriters {
}
ArrayChildWriteInfo arrayChildWriteInfo =
- getArrayChildWriteInfo(pickedInColumn, startIndex,
lengths);
+ getArrayChildWriteInfo(pickedInColumn, startIndex,
lengths, lenSize);
keyWriter.write(
mapColumnVector.getChildren()[0],
arrayChildWriteInfo.pickedInColumn,
@@ -920,20 +930,23 @@ public class ArrowFieldWriters {
}
private static ArrayChildWriteInfo getArrayChildWriteInfo(
- @Nullable int[] pickedInParentColumn, int parentStartIndex, int[]
parentLengths) {
+ @Nullable int[] pickedInParentColumn,
+ int parentStartIndex,
+ int[] parentLengths,
+ int lenSize) {
return pickedInParentColumn == null
- ? getArrayChildWriteInfoWithoutDelete(parentStartIndex,
parentLengths)
+ ? getArrayChildWriteInfoWithoutDelete(parentStartIndex,
parentLengths, lenSize)
: getArrayChildWriteInfoWithDelete(
- pickedInParentColumn, parentStartIndex, parentLengths);
+ pickedInParentColumn, parentStartIndex, parentLengths,
lenSize);
}
private static ArrayChildWriteInfo getArrayChildWriteInfoWithoutDelete(
- int parentStartIndex, int[] parentLengths) {
+ int parentStartIndex, int[] parentLengths, int lenSize) {
// the first element index which is to be written
int firstElementIndex = 0;
// batchRows of child column vector
int childBatchRows = 0;
- for (int i = 0; i < parentLengths.length; i++) {
+ for (int i = 0; i < lenSize; i++) {
if (i < parentStartIndex) {
firstElementIndex += parentLengths[i];
} else {
@@ -944,14 +957,14 @@ public class ArrowFieldWriters {
}
private static ArrayChildWriteInfo getArrayChildWriteInfoWithDelete(
- int[] pickedInParentColumn, int parentStartIndex, int[]
parentLengths) {
+ int[] pickedInParentColumn, int parentStartIndex, int[]
parentLengths, int lenSize) {
// the first element index which is to be written
int firstElementIndex = 0;
// objects to calculate child pickedInColumn
IntArrayList childPicked = new IntArrayList(1024);
int offset = 0;
int currentParentPickedIndex = parentStartIndex;
- for (int i = 0; i < parentLengths.length; i++) {
+ for (int i = 0; i < lenSize; i++) {
if (i < pickedInParentColumn[parentStartIndex]) {
firstElementIndex += parentLengths[i];
offset = firstElementIndex;