This is an automated email from the ASF dual-hosted git repository.
gabor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git
The following commit(s) were added to refs/heads/master by this push:
new c00232857 GH-3298: Support unified file based configurations for CLI
(#3304)
c00232857 is described below
commit c00232857843032cd288433c5d0b4fbcc8faf44f
Author: Arnav Balyan <[email protected]>
AuthorDate: Wed Sep 10 13:00:40 2025 +0530
GH-3298: Support unified file based configurations for CLI (#3304)
---
parquet-cli/README.md | 4 ++
.../src/main/java/org/apache/parquet/cli/Main.java | 50 ++++++++++++++++++++++
.../test/java/org/apache/parquet/cli/MainTest.java | 36 ++++++++++++++++
.../src/test/resources/test-config.properties | 21 +++++++++
parquet-cli/src/test/resources/test-config.xml | 36 ++++++++++++++++
5 files changed, 147 insertions(+)
diff --git a/parquet-cli/README.md b/parquet-cli/README.md
index c7b3540a4..4e9aea10e 100644
--- a/parquet-cli/README.md
+++ b/parquet-cli/README.md
@@ -137,6 +137,7 @@ Usage: parquet [options] [command] [command options]
### Configuration Options
- `--conf` or `--property`: Set any configuration property in format
`key=value`. Can be specified multiple times.
+- `--config-file`: Path to a configuration file (`.properties` or `.xml`
format).
Examples:
```bash
@@ -147,4 +148,7 @@ parquet convert input.avro -o output.parquet --conf
parquet.avro.write-old-list-
# Multiple options
parquet convert-csv input.csv -o output.parquet --schema schema.avsc --conf
parquet.avro.write-parquet-uuid=true --conf
parquet.avro.write-old-list-structure=false
+# Using config file
+parquet convert input.avro -o output.parquet --config-file config.properties
+
```
diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java
b/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java
index e93a21e89..0f4932f3d 100644
--- a/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java
@@ -25,12 +25,16 @@ import com.beust.jcommander.ParameterException;
import com.beust.jcommander.Parameters;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableSet;
+import java.io.FileInputStream;
+import java.io.InputStream;
import java.util.List;
+import java.util.Properties;
import java.util.Set;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Level;
@@ -73,6 +77,11 @@ public class Main extends Configured implements Tool {
description = "Set a configuration property (format: key=value). Can be
specified multiple times.")
private List<String> confProperties;
+ @Parameter(
+ names = {"--config-file"},
+ description = "Path to a configuration file (properties or Hadoop XML
format).")
+ private String configFilePath;
+
@VisibleForTesting
@Parameter(names = "--dollar-zero", description = "A way for the runtime
path to be passed in", hidden = true)
String programName = DEFAULT_PROGRAM_NAME;
@@ -172,6 +181,24 @@ public class Main extends Configured implements Tool {
// If the command does not support the configs, it would simply be ignored.
if (command instanceof Configurable) {
Configuration merged = new Configuration(getConf());
+
+ if (configFilePath != null) {
+ try {
+ if (isXmlConfigFile(configFilePath)) {
+ loadXmlConfiguration(merged, configFilePath);
+ } else if (isPropertiesConfigFile(configFilePath)) {
+ loadPropertiesConfiguration(merged, configFilePath);
+ } else {
+ throw new IllegalArgumentException(
+ "Unsupported config file format. Only .xml and .properties
files are supported: "
+ + configFilePath);
+ }
+ } catch (Exception e) {
+ throw new IllegalArgumentException(
+ "Failed to load config file '" + configFilePath + "': " +
e.getMessage(), e);
+ }
+ }
+
if (confProperties != null) {
for (String prop : confProperties) {
String[] parts = prop.split("=", 2);
@@ -218,4 +245,27 @@ public class Main extends Configured implements Tool {
int rc = ToolRunner.run(new Configuration(), new Main(console), args);
System.exit(rc);
}
+
+ private boolean isXmlConfigFile(String filePath) {
+ return filePath.toLowerCase().endsWith(".xml");
+ }
+
+ private boolean isPropertiesConfigFile(String filePath) {
+ String lowerPath = filePath.toLowerCase();
+ return lowerPath.endsWith(".properties");
+ }
+
+ private void loadXmlConfiguration(Configuration config, String filePath) {
+ config.addResource(new Path(filePath));
+ console.debug("Loaded XML configuration from file: {}", filePath);
+ }
+
+ private void loadPropertiesConfiguration(Configuration config, String
filePath) throws Exception {
+ try (InputStream in = new FileInputStream(filePath)) {
+ Properties props = new Properties();
+ props.load(in);
+ props.forEach((key, value) -> config.set(key.toString(),
value.toString()));
+ console.debug("Loaded properties configuration from file: {}", filePath);
+ }
+ }
}
diff --git a/parquet-cli/src/test/java/org/apache/parquet/cli/MainTest.java
b/parquet-cli/src/test/java/org/apache/parquet/cli/MainTest.java
index ec4f8cc1f..6bf54bdf0 100644
--- a/parquet-cli/src/test/java/org/apache/parquet/cli/MainTest.java
+++ b/parquet-cli/src/test/java/org/apache/parquet/cli/MainTest.java
@@ -18,6 +18,8 @@
*/
package org.apache.parquet.cli;
+import java.io.File;
+import java.io.FileWriter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.ToolRunner;
import org.junit.Assert;
@@ -31,4 +33,38 @@ public class MainTest {
ToolRunner.run(new Configuration(), new
Main(LoggerFactory.getLogger(MainTest.class)), new String[] {});
Assert.assertTrue("we simply verify there are no errors here", true);
}
+
+ @Test
+ public void testConfigFileLoading() throws Exception {
+ File configFile = File.createTempFile("test-config", ".properties");
+ configFile.deleteOnExit();
+
+ try (FileWriter writer = new FileWriter(configFile)) {
+ writer.write("test.key=test.value\n");
+ }
+
+ try {
+ new Main(LoggerFactory.getLogger(MainTest.class))
+ .run(new String[] {"--config-file", configFile.getAbsolutePath(),
"help"});
+ Assert.assertTrue("Config file loading should not throw exception",
true);
+ } catch (IllegalArgumentException e) {
+ Assert.fail("Config file loading failed: " + e.getMessage());
+ }
+ }
+
+ @Test
+ public void testLocalPropertiesFile() throws Exception {
+ String configFile =
getClass().getResource("/test-config.properties").getPath();
+ ToolRunner.run(new Configuration(), new
Main(LoggerFactory.getLogger(MainTest.class)), new String[] {
+ "--config-file", configFile, "version"
+ });
+ }
+
+ @Test
+ public void testLocalXmlFile() throws Exception {
+ String configFile = getClass().getResource("/test-config.xml").getPath();
+ ToolRunner.run(new Configuration(), new
Main(LoggerFactory.getLogger(MainTest.class)), new String[] {
+ "--config-file", configFile, "version"
+ });
+ }
}
diff --git a/parquet-cli/src/test/resources/test-config.properties
b/parquet-cli/src/test/resources/test-config.properties
new file mode 100644
index 000000000..40d792fca
--- /dev/null
+++ b/parquet-cli/src/test/resources/test-config.properties
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test.key=test.value
+parquet.avro.write-old-list-structure=false
+parquet.compression=SNAPPY
+parquet.block.size=134217728
diff --git a/parquet-cli/src/test/resources/test-config.xml
b/parquet-cli/src/test/resources/test-config.xml
new file mode 100644
index 000000000..b7d17e917
--- /dev/null
+++ b/parquet-cli/src/test/resources/test-config.xml
@@ -0,0 +1,36 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<configuration>
+ <property>
+ <name>test.key</name>
+ <value>test.value</value>
+ </property>
+
+ <property>
+ <name>parquet.avro.write-old-list-structure</name>
+ <value>false</value>
+ </property>
+
+ <property>
+ <name>parquet.compression</name>
+ <value>SNAPPY</value>
+ </property>
+
+</configuration>