This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push: new 6451189032 [#6563] improvement(core) Optimize the configuration iterator in FileSystem provider (#6576) 6451189032 is described below commit 64511890324e138ecebf4e6fe65654cb8e0f491e Author: Xiaojian Sun <sunxiaojian...@163.com> AuthorDate: Thu Mar 6 20:29:10 2025 +0800 [#6563] improvement(core) Optimize the configuration iterator in FileSystem provider (#6576) ### What changes were proposed in this pull request? Optimize the configuration iterator in FileSystem provider. ### Why are the changes needed? Fix: #6563 ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? TestFileSystemUtils#testCreateConfiguration --- .../gravitino/oss/fs/OSSFileSystemProvider.java | 4 +- .../gravitino/s3/fs/S3FileSystemProvider.java | 17 +++--- .../gravitino/abs/fs/AzureFileSystemProvider.java | 6 +- .../gravitino/gcs/fs/GCSFileSystemProvider.java | 7 +-- .../catalog/hadoop/fs/TestFileSystemUtils.java | 17 ++++++ .../catalog/hadoop/fs/FileSystemUtils.java | 68 ++++++++++++++++++++++ .../catalog/hadoop/fs/HDFSFileSystemProvider.java | 6 +- .../catalog/hadoop/fs/LocalFileSystemProvider.java | 8 +-- 8 files changed, 102 insertions(+), 31 deletions(-) diff --git a/bundles/aliyun/src/main/java/org/apache/gravitino/oss/fs/OSSFileSystemProvider.java b/bundles/aliyun/src/main/java/org/apache/gravitino/oss/fs/OSSFileSystemProvider.java index 358e3a08c7..e72f3842ea 100644 --- a/bundles/aliyun/src/main/java/org/apache/gravitino/oss/fs/OSSFileSystemProvider.java +++ b/bundles/aliyun/src/main/java/org/apache/gravitino/oss/fs/OSSFileSystemProvider.java @@ -56,8 +56,6 @@ public class OSSFileSystemProvider implements FileSystemProvider, SupportsCreden @Override public FileSystem getFileSystem(Path path, Map<String, String> config) throws IOException { - Configuration configuration = new Configuration(); - Map<String, String> hadoopConfMap = FileSystemUtils.toHadoopConfigMap(config, GRAVITINO_KEY_TO_OSS_HADOOP_KEY); // OSS do not use service loader to load the file system, so we need to set the impl class @@ -65,7 +63,7 @@ public class OSSFileSystemProvider implements FileSystemProvider, SupportsCreden hadoopConfMap.put(OSS_FILESYSTEM_IMPL, AliyunOSSFileSystem.class.getCanonicalName()); } - hadoopConfMap.forEach(configuration::set); + Configuration configuration = FileSystemUtils.createConfiguration(hadoopConfMap); return AliyunOSSFileSystem.newInstance(path.toUri(), configuration); } diff --git a/bundles/aws/src/main/java/org/apache/gravitino/s3/fs/S3FileSystemProvider.java b/bundles/aws/src/main/java/org/apache/gravitino/s3/fs/S3FileSystemProvider.java index cbe133ed77..ccec76d931 100644 --- a/bundles/aws/src/main/java/org/apache/gravitino/s3/fs/S3FileSystemProvider.java +++ b/bundles/aws/src/main/java/org/apache/gravitino/s3/fs/S3FileSystemProvider.java @@ -62,18 +62,17 @@ public class S3FileSystemProvider implements FileSystemProvider, SupportsCredent @Override public FileSystem getFileSystem(Path path, Map<String, String> config) throws IOException { - Configuration configuration = new Configuration(); Map<String, String> hadoopConfMap = FileSystemUtils.toHadoopConfigMap(config, GRAVITINO_KEY_TO_S3_HADOOP_KEY); - hadoopConfMap.forEach(configuration::set); if (!hadoopConfMap.containsKey(S3_CREDENTIAL_KEY)) { - configuration.set(S3_CREDENTIAL_KEY, S3_SIMPLE_CREDENTIAL); + hadoopConfMap.put(S3_CREDENTIAL_KEY, S3_SIMPLE_CREDENTIAL); } // Hadoop-aws 2 does not support IAMInstanceCredentialsProvider - checkAndSetCredentialProvider(configuration); + checkAndSetCredentialProvider(hadoopConfMap); + Configuration configuration = FileSystemUtils.createConfiguration(hadoopConfMap); return S3AFileSystem.newInstance(path.toUri(), configuration); } @@ -89,8 +88,8 @@ public class S3FileSystemProvider implements FileSystemProvider, SupportsCredent return result; } - private void checkAndSetCredentialProvider(Configuration configuration) { - String provides = configuration.get(S3_CREDENTIAL_KEY); + private void checkAndSetCredentialProvider(Map<String, String> configs) { + String provides = configs.get(S3_CREDENTIAL_KEY); if (provides == null) { return; } @@ -115,15 +114,15 @@ public class S3FileSystemProvider implements FileSystemProvider, SupportsCredent LOG.warn( "Credential provider {} not found in the Hadoop runtime, falling back to default", provider); - configuration.set(S3_CREDENTIAL_KEY, S3_SIMPLE_CREDENTIAL); + configs.put(S3_CREDENTIAL_KEY, S3_SIMPLE_CREDENTIAL); return; } } if (validProviders.isEmpty()) { - configuration.set(S3_CREDENTIAL_KEY, S3_SIMPLE_CREDENTIAL); + configs.put(S3_CREDENTIAL_KEY, S3_SIMPLE_CREDENTIAL); } else { - configuration.set(S3_CREDENTIAL_KEY, joiner.join(validProviders)); + configs.put(S3_CREDENTIAL_KEY, joiner.join(validProviders)); } } diff --git a/bundles/azure/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java b/bundles/azure/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java index 3dcbb502f6..4b14d31b04 100644 --- a/bundles/azure/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java +++ b/bundles/azure/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java @@ -54,7 +54,6 @@ public class AzureFileSystemProvider implements FileSystemProvider, SupportsCred @Override public FileSystem getFileSystem(@Nonnull Path path, @Nonnull Map<String, String> config) throws IOException { - Configuration configuration = new Configuration(); Map<String, String> hadoopConfMap = FileSystemUtils.toHadoopConfigMap(config, ImmutableMap.of()); @@ -69,11 +68,10 @@ public class AzureFileSystemProvider implements FileSystemProvider, SupportsCred } if (!hadoopConfMap.containsKey(ABFS_IMPL_KEY)) { - configuration.set(ABFS_IMPL_KEY, ABFS_IMPL); + hadoopConfMap.put(ABFS_IMPL_KEY, ABFS_IMPL); } - hadoopConfMap.forEach(configuration::set); - + Configuration configuration = FileSystemUtils.createConfiguration(hadoopConfMap); return FileSystem.newInstance(path.toUri(), configuration); } diff --git a/bundles/gcp/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java b/bundles/gcp/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java index 7ab38b2d7a..5c37614f4a 100644 --- a/bundles/gcp/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java +++ b/bundles/gcp/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java @@ -45,10 +45,9 @@ public class GCSFileSystemProvider implements FileSystemProvider, SupportsCreden @Override public FileSystem getFileSystem(Path path, Map<String, String> config) throws IOException { - Configuration configuration = new Configuration(); - FileSystemUtils.toHadoopConfigMap(config, GRAVITINO_KEY_TO_GCS_HADOOP_KEY) - .forEach(configuration::set); - + Map<String, String> hadoopConfMap = + FileSystemUtils.toHadoopConfigMap(config, GRAVITINO_KEY_TO_GCS_HADOOP_KEY); + Configuration configuration = FileSystemUtils.createConfiguration(hadoopConfMap); return FileSystem.newInstance(path.toUri(), configuration); } diff --git a/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/fs/TestFileSystemUtils.java b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/fs/TestFileSystemUtils.java index b4e0809b6e..a035170368 100644 --- a/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/fs/TestFileSystemUtils.java +++ b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/fs/TestFileSystemUtils.java @@ -22,7 +22,9 @@ package org.apache.gravitino.catalog.hadoop.fs; import com.google.common.collect.ImmutableMap; import java.util.Map; import java.util.stream.Stream; +import org.apache.hadoop.conf.Configuration; import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; @@ -38,6 +40,21 @@ public class TestFileSystemUtils { Assertions.assertEquals(toHadoopConf, result); } + @Test + void testCreateConfiguration() { + Map<String, String> confMap = + ImmutableMap.of( + "s3a-endpoint", "v1", + "fs.s3a.impl", "v2", + "fs.s3a.endpoint", "v3", + "gravitino.bypass.fs.s3a.endpoint", "v4"); + Configuration configuration = FileSystemUtils.createConfiguration(confMap); + Assertions.assertEquals("v1", configuration.get("s3a-endpoint")); + Assertions.assertEquals("v2", configuration.get("fs.s3a.impl")); + Assertions.assertEquals("v3", configuration.get("fs.s3a.endpoint")); + Assertions.assertEquals("v4", configuration.get("gravitino.bypass.fs.s3a.endpoint")); + } + private static Stream<Arguments> mapArguments() { return Stream.of( Arguments.of( diff --git a/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/FileSystemUtils.java b/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/FileSystemUtils.java index 69f6e3b803..b1fd91e311 100644 --- a/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/FileSystemUtils.java +++ b/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/FileSystemUtils.java @@ -25,16 +25,26 @@ import static org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider.GRAVITIN import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.google.common.collect.Streams; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.util.Arrays; import java.util.Locale; import java.util.Map; import java.util.ServiceLoader; import java.util.Set; import java.util.stream.Collectors; +import javax.xml.stream.XMLOutputFactory; +import javax.xml.stream.XMLStreamWriter; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; public class FileSystemUtils { + private static final String CONFIG_ROOT = "configuration"; + private static final String PROPERTY_TAG = "property"; + private static final String NAME_TAG = "name"; + private static final String VALUE_TAG = "value"; + private FileSystemUtils() {} public static Map<String, FileSystemProvider> getFileSystemProviders(String fileSystemProviders) { @@ -183,4 +193,62 @@ public class FileSystemUtils { throw new RuntimeException("Failed to create GravitinoFileSystemCredentialProvider", e); } } + + /** + * Create a configuration from the config map. + * + * @param config properties map. + * @return + */ + public static Configuration createConfiguration(Map<String, String> config) { + return createConfiguration(null, config); + } + + /** + * Create a configuration from the config map. + * + * @param bypass prefix to remove from the config keys. + * @param config properties map. + * @return + */ + public static Configuration createConfiguration(String bypass, Map<String, String> config) { + try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { + XMLStreamWriter writer = XMLOutputFactory.newInstance().createXMLStreamWriter(out); + writer.writeStartDocument(); + writer.writeStartElement(CONFIG_ROOT); + + config.forEach( + (k, v) -> + writeProperty(writer, StringUtils.isNotBlank(bypass) ? k.replace(bypass, "") : k, v)); + writer.writeEndElement(); + writer.writeEndDocument(); + writer.close(); + + return new Configuration() { + { + addResource(new ByteArrayInputStream(out.toByteArray())); + } + }; + } catch (Exception e) { + throw new RuntimeException("Failed to create configuration", e); + } + } + + private static void writeProperty(XMLStreamWriter writer, String key, String value) { + try { + writer.writeStartElement(PROPERTY_TAG); + writeElement(writer, NAME_TAG, key); + writeElement(writer, VALUE_TAG, value); + writer.writeEndElement(); + } catch (Exception e) { + throw new RuntimeException("Failed to write property: " + key, e); + } + } + + private static void writeElement(XMLStreamWriter writer, String tag, String content) + throws Exception { + writer.writeStartElement(tag); + writer.writeCharacters(content); + writer.writeEndElement(); + } } diff --git a/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/HDFSFileSystemProvider.java b/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/HDFSFileSystemProvider.java index c6bc8e2e99..00f14fccf4 100644 --- a/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/HDFSFileSystemProvider.java +++ b/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/HDFSFileSystemProvider.java @@ -32,11 +32,7 @@ public class HDFSFileSystemProvider implements FileSystemProvider { @Override public FileSystem getFileSystem(@Nonnull Path path, @Nonnull Map<String, String> config) throws IOException { - Configuration configuration = new Configuration(); - config.forEach( - (k, v) -> { - configuration.set(k.replace(GRAVITINO_BYPASS, ""), v); - }); + Configuration configuration = FileSystemUtils.createConfiguration(GRAVITINO_BYPASS, config); return FileSystem.newInstance(path.toUri(), configuration); } diff --git a/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/LocalFileSystemProvider.java b/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/LocalFileSystemProvider.java index 5a2f10f473..2d036255f3 100644 --- a/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/LocalFileSystemProvider.java +++ b/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/LocalFileSystemProvider.java @@ -31,12 +31,8 @@ public class LocalFileSystemProvider implements FileSystemProvider { @Override public FileSystem getFileSystem(Path path, Map<String, String> config) throws IOException { - Configuration configuration = new Configuration(); - config.forEach( - (k, v) -> { - configuration.set(k.replace(BUILTIN_HDFS_FS_PROVIDER, ""), v); - }); - + Configuration configuration = + FileSystemUtils.createConfiguration(BUILTIN_HDFS_FS_PROVIDER, config); return FileSystem.newInstance(path.toUri(), configuration); }