This is an automated email from the ASF dual-hosted git repository.

wgtmac pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git


The following commit(s) were added to refs/heads/master by this push:
     new 65f7adef5 GH-3398: Fix potential ClassLoader leak caused by 
ThreadLocal lambda in Binary (#3447)
65f7adef5 is described below

commit 65f7adef548f9ab52e5790feba1a5ae44f7ad3f7
Author: YangJie <[email protected]>
AuthorDate: Mon Jun 8 13:59:07 2026 +0800

    GH-3398: Fix potential ClassLoader leak caused by ThreadLocal lambda in 
Binary (#3447)
---
 .../java/org/apache/parquet/io/api/Binary.java     | 13 ++++----
 .../java/org/apache/parquet/io/api/TestBinary.java | 35 ++++++++++++++++++++++
 2 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java 
b/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java
index e37ee1248..173581bdd 100644
--- a/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java
+++ b/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java
@@ -27,7 +27,6 @@ import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
 import java.nio.CharBuffer;
 import java.nio.charset.CharacterCodingException;
-import java.nio.charset.CharsetEncoder;
 import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import org.apache.parquet.io.ParquetEncodingException;
@@ -268,14 +267,16 @@ public abstract class Binary implements 
Comparable<Binary>, Serializable {
       return "Binary{\"" + toStringUsingUTF8() + "\"}";
     }
 
-    private static final ThreadLocal<CharsetEncoder> ENCODER =
-        ThreadLocal.withInitial(StandardCharsets.UTF_8::newEncoder);
-
     private static ByteBuffer encodeUTF8(CharSequence value) {
       try {
-        return ENCODER.get().encode(CharBuffer.wrap(value));
+        // Use a fresh encoder per call rather than a static ThreadLocal 
initialized with a lambda
+        // (UTF_8::newEncoder): that lambda's class is loaded by the 
application ClassLoader and can
+        // keep it from being unloaded in long-lived pooled threads, leaking 
Metaspace (GH-3398).
+        // The encoder also preserves strict CodingErrorAction.REPORT, so 
malformed UTF-16 fails
+        // fast instead of being silently replaced (as String#getBytes(UTF_8) 
would).
+        return 
StandardCharsets.UTF_8.newEncoder().encode(CharBuffer.wrap(value));
       } catch (CharacterCodingException e) {
-        throw new ParquetEncodingException("UTF-8 not supported.", e);
+        throw new ParquetEncodingException("Failed to encode CharSequence as 
UTF-8.", e);
       }
     }
   }
diff --git 
a/parquet-column/src/test/java/org/apache/parquet/io/api/TestBinary.java 
b/parquet-column/src/test/java/org/apache/parquet/io/api/TestBinary.java
index a1a83af77..3dcb878d2 100644
--- a/parquet-column/src/test/java/org/apache/parquet/io/api/TestBinary.java
+++ b/parquet-column/src/test/java/org/apache/parquet/io/api/TestBinary.java
@@ -30,7 +30,10 @@ import java.io.IOException;
 import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
 import java.nio.ByteBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
+import org.apache.parquet.io.ParquetEncodingException;
 import org.apache.parquet.io.api.TestBinary.BinaryFactory.BinaryAndOriginal;
 import org.junit.Test;
 
@@ -314,4 +317,36 @@ public class TestBinary {
       // expected
     }
   }
+
+  @Test
+  public void testFromCharSequenceEncodesValidUtf8() {
+    // Cover ASCII, multi-byte BMP, a supplementary code point (valid 
surrogate pair) and empty.
+    assertFromCharSequenceEncodesUtf8("test-123-é中"); // ASCII + U+00E9 
(2-byte) + U+4E2D (3-byte)
+    assertFromCharSequenceEncodesUtf8("😀"); // U+1F600, valid surrogate pair 
(4-byte)
+    assertFromCharSequenceEncodesUtf8(""); // empty
+  }
+
+  private static void assertFromCharSequenceEncodesUtf8(String value) {
+    // fromCharSequence routes any CharSequence (here a StringBuilder) through 
FromCharSequenceBinary.
+    // For valid input the strict encoder must match String#getBytes(UTF_8), 
so this is a genuine
+    // cross-check, not a circular assertion.
+    Binary binary = Binary.fromCharSequence(new StringBuilder(value));
+    assertArrayEquals(value.getBytes(StandardCharsets.UTF_8), 
binary.getBytes());
+  }
+
+  @Test
+  public void testFromCharSequenceRejectsMalformedUtf16() {
+    // An unpaired high surrogate is invalid UTF-16. FromCharSequenceBinary 
must fail fast
+    // rather than silently substituting a replacement byte (as 
String#getBytes(UTF_8) would).
+    CharSequence value = new 
StringBuilder().append('a').append('\uD800').append('b');
+    try {
+      Binary.fromCharSequence(value);
+      fail("Should have thrown an exception for malformed UTF-16 input");
+    } catch (ParquetEncodingException e) {
+      // Lock in that the cause is a UTF-8 coding error, not an unrelated 
failure of the same type.
+      assertTrue(
+          "expected a CharacterCodingException cause but was " + e.getCause(),
+          e.getCause() instanceof CharacterCodingException);
+    }
+  }
 }

Reply via email to