This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch parquet-1.13.x
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/parquet-1.13.x by this push:
new 7d74292a7 PARQUET-2282: Don't initialize HadoopCodec (#1071) (#1074)
7d74292a7 is described below
commit 7d74292a7d7f0959cdb8e8b41b723e7eb3182db1
Author: Fokko Driesprong <[email protected]>
AuthorDate: Wed Apr 19 11:10:43 2023 +0200
PARQUET-2282: Don't initialize HadoopCodec (#1071) (#1074)
At Iceberg we want to run Apache Flink without Hadoop, and
by initializing HadoopCodec directly, but only if another
codec hasn't been provided.
---
.../src/main/java/org/apache/parquet/ParquetReadOptions.java | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git
a/parquet-hadoop/src/main/java/org/apache/parquet/ParquetReadOptions.java
b/parquet-hadoop/src/main/java/org/apache/parquet/ParquetReadOptions.java
index a69ba46be..f20628275 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/ParquetReadOptions.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/ParquetReadOptions.java
@@ -171,7 +171,7 @@ public class ParquetReadOptions {
protected FilterCompat.Filter recordFilter = null;
protected ParquetMetadataConverter.MetadataFilter metadataFilter =
NO_FILTER;
// the page size parameter isn't used when only using the codec factory to
get decompressors
- protected CompressionCodecFactory codecFactory =
HadoopCodecs.newFactory(0);
+ protected CompressionCodecFactory codecFactory = null;
protected ByteBufferAllocator allocator = new HeapByteBufferAllocator();
protected int maxAllocationSize = ALLOCATION_SIZE_DEFAULT;
protected Map<String, String> properties = new HashMap<>();
@@ -314,6 +314,10 @@ public class ParquetReadOptions {
}
public ParquetReadOptions build() {
+ if (codecFactory == null) {
+ codecFactory = HadoopCodecs.newFactory(0);
+ }
+
return new ParquetReadOptions(
useSignedStringMinMax, useStatsFilter, useDictionaryFilter,
useRecordFilter,
useColumnIndexFilter, usePageChecksumVerification, useBloomFilter,
recordFilter, metadataFilter,