This is an automated email from the ASF dual-hosted git repository.
Fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git
The following commit(s) were added to refs/heads/master by this push:
new 26fa3530f GH-3261: Fix integer overflow in
CapacityByteArrayOutputStream (#3525)
26fa3530f is described below
commit 26fa3530f29f42a15ee0f74adf659fbc84662fe7
Author: Anupam Yadav <[email protected]>
AuthorDate: Wed May 6 13:28:31 2026 -0700
GH-3261: Fix integer overflow in CapacityByteArrayOutputStream (#3525)
The overflow check in addSlab() used bytesUsed which is not updated until
after addSlab() returns in write(). This caused the overflow guard to miss
cases where bytesAllocated + nextSlabSize exceeds Integer.MAX_VALUE.
Fix:
- Use bytesAllocated instead of bytesUsed for the overflow check, since
bytesAllocated is always up to date when addSlab() is called.
- Cap nextSlabSize when it would cause bytesAllocated to overflow, instead
of letting Math.addExact throw an uncaught ArithmeticException.
Co-authored-by: Anupam Yadav <[email protected]>
---
.../bytes/CapacityByteArrayOutputStream.java | 11 ++-
.../TestCapacityByteArrayOutputStreamOverflow.java | 97 ++++++++++++++++++++++
2 files changed, 106 insertions(+), 2 deletions(-)
diff --git
a/parquet-common/src/main/java/org/apache/parquet/bytes/CapacityByteArrayOutputStream.java
b/parquet-common/src/main/java/org/apache/parquet/bytes/CapacityByteArrayOutputStream.java
index 84d3c5b7b..d3d8b1b6d 100644
---
a/parquet-common/src/main/java/org/apache/parquet/bytes/CapacityByteArrayOutputStream.java
+++
b/parquet-common/src/main/java/org/apache/parquet/bytes/CapacityByteArrayOutputStream.java
@@ -167,9 +167,10 @@ public class CapacityByteArrayOutputStream extends
OutputStream {
private void addSlab(int minimumSize) {
int nextSlabSize;
- // check for overflow
+ // check for overflow using bytesAllocated which is always up to date
(unlike bytesUsed which
+ // is updated after addSlab returns in write())
try {
- Math.addExact(bytesUsed, minimumSize);
+ Math.addExact(bytesAllocated, minimumSize);
} catch (ArithmeticException e) {
// This is interpreted as a request for a value greater than
Integer.MAX_VALUE
// We throw OOM because that is what java.io.ByteArrayOutputStream also
does
@@ -191,6 +192,12 @@ public class CapacityByteArrayOutputStream extends
OutputStream {
nextSlabSize = minimumSize;
}
+ // Cap nextSlabSize to avoid integer overflow on bytesAllocated
+ int maxNextSlabSize = Integer.MAX_VALUE - bytesAllocated;
+ if (nextSlabSize > maxNextSlabSize) {
+ nextSlabSize = max(minimumSize, maxNextSlabSize);
+ }
+
LOG.debug("used {} slabs, adding new slab of size {}", slabs.size(),
nextSlabSize);
this.currentSlab = allocator.allocate(nextSlabSize);
diff --git
a/parquet-common/src/test/java/org/apache/parquet/bytes/TestCapacityByteArrayOutputStreamOverflow.java
b/parquet-common/src/test/java/org/apache/parquet/bytes/TestCapacityByteArrayOutputStreamOverflow.java
new file mode 100644
index 000000000..771d9e17b
--- /dev/null
+++
b/parquet-common/src/test/java/org/apache/parquet/bytes/TestCapacityByteArrayOutputStreamOverflow.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.bytes;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThrows;
+
+import java.lang.reflect.Field;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Tests for integer overflow handling in {@link
CapacityByteArrayOutputStream#addSlab(int)}.
+ * Verifies the fix for PARQUET-3261.
+ */
+public class TestCapacityByteArrayOutputStreamOverflow {
+
+ private TrackingByteBufferAllocator allocator;
+
+ @Before
+ public void initAllocator() {
+ allocator = TrackingByteBufferAllocator.wrap(new
HeapByteBufferAllocator());
+ }
+
+ @After
+ public void closeAllocator() {
+ allocator.close();
+ }
+
+ /**
+ * Regression test for PARQUET-3261: bytesAllocated overflow in addSlab().
+ * Simulates near-overflow by setting bytesAllocated via reflection, then
verifying
+ * that addSlab caps the slab size instead of throwing ArithmeticException.
+ */
+ @Test
+ public void testAddSlabCapsSlabSizeNearIntegerMaxValue() throws Exception {
+ int slabSize = 1024;
+ try (CapacityByteArrayOutputStream cbaos =
+ new CapacityByteArrayOutputStream(slabSize, Integer.MAX_VALUE,
allocator)) {
+ // Write initial data to set up internal state
+ byte[] data = new byte[slabSize];
+ cbaos.write(data, 0, data.length);
+
+ // Simulate near-overflow by setting bytesAllocated close to
Integer.MAX_VALUE
+ Field bytesAllocatedField =
CapacityByteArrayOutputStream.class.getDeclaredField("bytesAllocated");
+ bytesAllocatedField.setAccessible(true);
+ bytesAllocatedField.setInt(cbaos, Integer.MAX_VALUE - 100);
+
+ // Writing 1 byte triggers addSlab with minimumSize=1.
+ // Without the fix, the doubling strategy would compute nextSlabSize =
bytesUsed (1024),
+ // and bytesAllocated + 1024 would overflow. With the fix, nextSlabSize
is capped to 100.
+ cbaos.write(1);
+ assertEquals(slabSize + 1, cbaos.size());
+ }
+ }
+
+ /**
+ * Verify that a true overflow (bytesAllocated + minimumSize >
Integer.MAX_VALUE)
+ * still throws OutOfMemoryError.
+ */
+ @Test
+ public void testAddSlabThrowsOOMOnTrueOverflow() throws Exception {
+ int slabSize = 1024;
+ try (CapacityByteArrayOutputStream cbaos =
+ new CapacityByteArrayOutputStream(slabSize, Integer.MAX_VALUE,
allocator)) {
+ byte[] data = new byte[slabSize];
+ cbaos.write(data, 0, data.length);
+
+ // Set bytesAllocated so that even minimumSize=200 would overflow
+ Field bytesAllocatedField =
CapacityByteArrayOutputStream.class.getDeclaredField("bytesAllocated");
+ bytesAllocatedField.setAccessible(true);
+ bytesAllocatedField.setInt(cbaos, Integer.MAX_VALUE - 50);
+
+ // Writing 200 bytes requires minimumSize=200, but only 50 bytes remain.
+ // The addExact(bytesAllocated, minimumSize) check should throw OOM.
+ byte[] tooLarge = new byte[200];
+ assertThrows(OutOfMemoryError.class, () -> cbaos.write(tooLarge, 0,
tooLarge.length));
+ }
+ }
+}