jerryshao commented on code in PR #5020: URL: https://github.com/apache/gravitino/pull/5020#discussion_r1800657095
########## catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java: ########## @@ -119,26 +125,28 @@ public void initialize( Map<String, String> config, CatalogInfo info, HasPropertyMetadata propertiesMetadata) throws RuntimeException { this.propertiesMetadata = propertiesMetadata; - // Initialize Hadoop Configuration. - this.conf = config; - this.hadoopConf = new Configuration(); this.catalogInfo = info; - Map<String, String> bypassConfigs = - config.entrySet().stream() - .filter(e -> e.getKey().startsWith(CATALOG_BYPASS_PREFIX)) - .collect( - Collectors.toMap( - e -> e.getKey().substring(CATALOG_BYPASS_PREFIX.length()), - Map.Entry::getValue)); - bypassConfigs.forEach(hadoopConf::set); + + this.conf = config; + + String fileSystemProviders = + (String) + propertiesMetadata + .catalogPropertiesMetadata() + .getOrDefault(config, HadoopCatalogPropertiesMetadata.FILESYSTEM_PROVIDERS); + FileSystemUtils.initFileSystemProviders(fileSystemProviders, fileSystemProvidersMap); + + this.defaultFilesystemProvider = + (String) + propertiesMetadata + .catalogPropertiesMetadata() + .getOrDefault(config, HadoopCatalogPropertiesMetadata.DEFAULT_FS_PROVIDER); Review Comment: What is the default value for this property, should if be "file"? ########## catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java: ########## @@ -742,4 +750,35 @@ private boolean checkSingleFile(Fileset fileset) { fileset.name()); } } + + FileSystem getFileSystem(Path path, Map<String, String> config) throws IOException { + if (path == null) { + throw new IllegalArgumentException("Path should not be null"); + } + + // Can't get the scheme from the path like '/path/to/file', use the default filesystem provider. + if (path.toUri().getScheme() == null) { + if (defaultFilesystemProvider != null) { + return getFileSystemByScheme(defaultFilesystemProvider, config, path); + } + + LOG.warn( + "Can't get schema from path: {} and default filesystem provider is null, using" Review Comment: scheme... ########## catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java: ########## @@ -742,4 +750,35 @@ private boolean checkSingleFile(Fileset fileset) { fileset.name()); } } + + FileSystem getFileSystem(Path path, Map<String, String> config) throws IOException { + if (path == null) { + throw new IllegalArgumentException("Path should not be null"); + } + + // Can't get the scheme from the path like '/path/to/file', use the default filesystem provider. + if (path.toUri().getScheme() == null) { + if (defaultFilesystemProvider != null) { + return getFileSystemByScheme(defaultFilesystemProvider, config, path); + } + + LOG.warn( + "Can't get schema from path: {} and default filesystem provider is null, using" + + " local file system", + path); + return getFileSystemByScheme(LOCAL_FILE_SCHEME, config, path); + } + + return getFileSystemByScheme(path.toUri().getScheme(), config, path); + } + + private FileSystem getFileSystemByScheme(String scheme, Map<String, String> config, Path path) + throws IOException { + FileSystemProvider provider = fileSystemProvidersMap.get(scheme); + if (provider == null) { + throw new IllegalArgumentException("Unsupported scheme: " + scheme); Review Comment: You should clearly tell user why the exception is happened, the code here is not easy for user to know the actual reason. ########## catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java: ########## @@ -90,6 +90,10 @@ public class HadoopCatalogOperations implements CatalogOperations, SupportsSchem private CatalogInfo catalogInfo; + private final Map<String, FileSystemProvider> fileSystemProvidersMap = Maps.newHashMap(); + + private String defaultFilesystemProvider; Review Comment: You can maintain a default `FileSystemProvider`, not just a string. Besides, it is `FileSystem`, not `FileSystem`. ########## catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java: ########## @@ -742,4 +750,35 @@ private boolean checkSingleFile(Fileset fileset) { fileset.name()); } } + + FileSystem getFileSystem(Path path, Map<String, String> config) throws IOException { + if (path == null) { + throw new IllegalArgumentException("Path should not be null"); + } + + // Can't get the scheme from the path like '/path/to/file', use the default filesystem provider. + if (path.toUri().getScheme() == null) { + if (defaultFilesystemProvider != null) { + return getFileSystemByScheme(defaultFilesystemProvider, config, path); + } + + LOG.warn( + "Can't get schema from path: {} and default filesystem provider is null, using" + + " local file system", + path); + return getFileSystemByScheme(LOCAL_FILE_SCHEME, config, path); + } + + return getFileSystemByScheme(path.toUri().getScheme(), config, path); Review Comment: ```java if (scheme == null && defaultFSProvider == null) { LOG.warn(xxx) } newScheme = scheme == null ? defaultFSProvider.getScheme() : scheme; getFileSystemByScheme(newScheme); ``` ########## catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogPropertiesMetadata.java: ########## @@ -44,6 +63,24 @@ public class HadoopCatalogPropertiesMetadata extends BaseCatalogPropertiesMetada false /* immutable */, null, false /* hidden */)) + .put( + FILESYSTEM_PROVIDERS, + PropertyEntry.stringOptionalPropertyEntry( + FILESYSTEM_PROVIDERS, + "The file system provider class name, separated by comma", + false /* immutable */, + null, + false /* hidden */)) + .put( + DEFAULT_FS_PROVIDER, + PropertyEntry.stringOptionalPropertyEntry( + DEFAULT_FS_PROVIDER, + "Default file system provider, used to create the default file system " + + "candidate value is 'local', 'hdfs' or others specified in the " Review Comment: file, not local... -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gravitino.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org