This is an automated email from the ASF dual-hosted git repository.

Fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git


The following commit(s) were added to refs/heads/master by this push:
     new 26fa3530f GH-3261: Fix integer overflow in 
CapacityByteArrayOutputStream (#3525)
26fa3530f is described below

commit 26fa3530f29f42a15ee0f74adf659fbc84662fe7
Author: Anupam Yadav <[email protected]>
AuthorDate: Wed May 6 13:28:31 2026 -0700

    GH-3261: Fix integer overflow in CapacityByteArrayOutputStream (#3525)
    
    The overflow check in addSlab() used bytesUsed which is not updated until
    after addSlab() returns in write(). This caused the overflow guard to miss
    cases where bytesAllocated + nextSlabSize exceeds Integer.MAX_VALUE.
    
    Fix:
    - Use bytesAllocated instead of bytesUsed for the overflow check, since
      bytesAllocated is always up to date when addSlab() is called.
    - Cap nextSlabSize when it would cause bytesAllocated to overflow, instead
      of letting Math.addExact throw an uncaught ArithmeticException.
    
    Co-authored-by: Anupam Yadav <[email protected]>
---
 .../bytes/CapacityByteArrayOutputStream.java       | 11 ++-
 .../TestCapacityByteArrayOutputStreamOverflow.java | 97 ++++++++++++++++++++++
 2 files changed, 106 insertions(+), 2 deletions(-)

diff --git 
a/parquet-common/src/main/java/org/apache/parquet/bytes/CapacityByteArrayOutputStream.java
 
b/parquet-common/src/main/java/org/apache/parquet/bytes/CapacityByteArrayOutputStream.java
index 84d3c5b7b..d3d8b1b6d 100644
--- 
a/parquet-common/src/main/java/org/apache/parquet/bytes/CapacityByteArrayOutputStream.java
+++ 
b/parquet-common/src/main/java/org/apache/parquet/bytes/CapacityByteArrayOutputStream.java
@@ -167,9 +167,10 @@ public class CapacityByteArrayOutputStream extends 
OutputStream {
   private void addSlab(int minimumSize) {
     int nextSlabSize;
 
-    // check for overflow
+    // check for overflow using bytesAllocated which is always up to date 
(unlike bytesUsed which
+    // is updated after addSlab returns in write())
     try {
-      Math.addExact(bytesUsed, minimumSize);
+      Math.addExact(bytesAllocated, minimumSize);
     } catch (ArithmeticException e) {
       // This is interpreted as a request for a value greater than 
Integer.MAX_VALUE
       // We throw OOM because that is what java.io.ByteArrayOutputStream also 
does
@@ -191,6 +192,12 @@ public class CapacityByteArrayOutputStream extends 
OutputStream {
       nextSlabSize = minimumSize;
     }
 
+    // Cap nextSlabSize to avoid integer overflow on bytesAllocated
+    int maxNextSlabSize = Integer.MAX_VALUE - bytesAllocated;
+    if (nextSlabSize > maxNextSlabSize) {
+      nextSlabSize = max(minimumSize, maxNextSlabSize);
+    }
+
     LOG.debug("used {} slabs, adding new slab of size {}", slabs.size(), 
nextSlabSize);
 
     this.currentSlab = allocator.allocate(nextSlabSize);
diff --git 
a/parquet-common/src/test/java/org/apache/parquet/bytes/TestCapacityByteArrayOutputStreamOverflow.java
 
b/parquet-common/src/test/java/org/apache/parquet/bytes/TestCapacityByteArrayOutputStreamOverflow.java
new file mode 100644
index 000000000..771d9e17b
--- /dev/null
+++ 
b/parquet-common/src/test/java/org/apache/parquet/bytes/TestCapacityByteArrayOutputStreamOverflow.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.bytes;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThrows;
+
+import java.lang.reflect.Field;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Tests for integer overflow handling in {@link 
CapacityByteArrayOutputStream#addSlab(int)}.
+ * Verifies the fix for PARQUET-3261.
+ */
+public class TestCapacityByteArrayOutputStreamOverflow {
+
+  private TrackingByteBufferAllocator allocator;
+
+  @Before
+  public void initAllocator() {
+    allocator = TrackingByteBufferAllocator.wrap(new 
HeapByteBufferAllocator());
+  }
+
+  @After
+  public void closeAllocator() {
+    allocator.close();
+  }
+
+  /**
+   * Regression test for PARQUET-3261: bytesAllocated overflow in addSlab().
+   * Simulates near-overflow by setting bytesAllocated via reflection, then 
verifying
+   * that addSlab caps the slab size instead of throwing ArithmeticException.
+   */
+  @Test
+  public void testAddSlabCapsSlabSizeNearIntegerMaxValue() throws Exception {
+    int slabSize = 1024;
+    try (CapacityByteArrayOutputStream cbaos =
+        new CapacityByteArrayOutputStream(slabSize, Integer.MAX_VALUE, 
allocator)) {
+      // Write initial data to set up internal state
+      byte[] data = new byte[slabSize];
+      cbaos.write(data, 0, data.length);
+
+      // Simulate near-overflow by setting bytesAllocated close to 
Integer.MAX_VALUE
+      Field bytesAllocatedField = 
CapacityByteArrayOutputStream.class.getDeclaredField("bytesAllocated");
+      bytesAllocatedField.setAccessible(true);
+      bytesAllocatedField.setInt(cbaos, Integer.MAX_VALUE - 100);
+
+      // Writing 1 byte triggers addSlab with minimumSize=1.
+      // Without the fix, the doubling strategy would compute nextSlabSize = 
bytesUsed (1024),
+      // and bytesAllocated + 1024 would overflow. With the fix, nextSlabSize 
is capped to 100.
+      cbaos.write(1);
+      assertEquals(slabSize + 1, cbaos.size());
+    }
+  }
+
+  /**
+   * Verify that a true overflow (bytesAllocated + minimumSize > 
Integer.MAX_VALUE)
+   * still throws OutOfMemoryError.
+   */
+  @Test
+  public void testAddSlabThrowsOOMOnTrueOverflow() throws Exception {
+    int slabSize = 1024;
+    try (CapacityByteArrayOutputStream cbaos =
+        new CapacityByteArrayOutputStream(slabSize, Integer.MAX_VALUE, 
allocator)) {
+      byte[] data = new byte[slabSize];
+      cbaos.write(data, 0, data.length);
+
+      // Set bytesAllocated so that even minimumSize=200 would overflow
+      Field bytesAllocatedField = 
CapacityByteArrayOutputStream.class.getDeclaredField("bytesAllocated");
+      bytesAllocatedField.setAccessible(true);
+      bytesAllocatedField.setInt(cbaos, Integer.MAX_VALUE - 50);
+
+      // Writing 200 bytes requires minimumSize=200, but only 50 bytes remain.
+      // The addExact(bytesAllocated, minimumSize) check should throw OOM.
+      byte[] tooLarge = new byte[200];
+      assertThrows(OutOfMemoryError.class, () -> cbaos.write(tooLarge, 0, 
tooLarge.length));
+    }
+  }
+}

Reply via email to