jerryshao commented on code in PR #5020: URL: https://github.com/apache/gravitino/pull/5020#discussion_r1798731309
########## catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java: ########## @@ -742,4 +769,51 @@ private boolean checkSingleFile(Fileset fileset) { fileset.name()); } } + + static FileSystem getFileSystem(Path path, Map<String, String> config) throws IOException { + Map<String, String> newConfig = Maps.newHashMap(config); + String scheme; + Path fsPath; + if (path != null) { + scheme = path.toUri().getScheme(); + if (scheme == null) { + // If the schema of the path is not set, we need to get the default FS from the + // configuration. + String defaultFS = config.get(DEFAULT_FS); Review Comment: If this is a catalog property should be defined by users, why can't you follow our rule to define the key name and manage them in property metadata? ########## catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java: ########## @@ -742,4 +769,51 @@ private boolean checkSingleFile(Fileset fileset) { fileset.name()); } } + + static FileSystem getFileSystem(Path path, Map<String, String> config) throws IOException { + Map<String, String> newConfig = Maps.newHashMap(config); + String scheme; + Path fsPath; + if (path != null) { + scheme = path.toUri().getScheme(); + if (scheme == null) { + // If the schema of the path is not set, we need to get the default FS from the + // configuration. + String defaultFS = config.get(DEFAULT_FS); + if (defaultFS == null) { + scheme = LOCAL_FILE_SCHEMA; + } else { + String schemaFromDefaultFS = new Path(defaultFS).toUri().getScheme(); + scheme = schemaFromDefaultFS == null ? LOCAL_FILE_SCHEMA : schemaFromDefaultFS; Review Comment: "scheme", not "schema". ########## catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java: ########## @@ -742,4 +769,51 @@ private boolean checkSingleFile(Fileset fileset) { fileset.name()); } } + + static FileSystem getFileSystem(Path path, Map<String, String> config) throws IOException { + Map<String, String> newConfig = Maps.newHashMap(config); + String scheme; + Path fsPath; + if (path != null) { + scheme = path.toUri().getScheme(); + if (scheme == null) { + // If the schema of the path is not set, we need to get the default FS from the + // configuration. + String defaultFS = config.get(DEFAULT_FS); + if (defaultFS == null) { + scheme = LOCAL_FILE_SCHEMA; + } else { + String schemaFromDefaultFS = new Path(defaultFS).toUri().getScheme(); + scheme = schemaFromDefaultFS == null ? LOCAL_FILE_SCHEMA : schemaFromDefaultFS; Review Comment: Why the default is local fs, it is not set and cannot figure out by code, we'd better throw an exception, instead of choosing one that may not be expected by users. ########## catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java: ########## @@ -72,10 +76,14 @@ public class HadoopCatalogOperations implements CatalogOperations, SupportsSchemas, FilesetCatalog { + public static final String DEFAULT_FS = "fs.defaultFS"; + public static final String LOCAL_FILE_PATH = "file:///"; + public static final Map<String, FileSystemProvider> FILE_SYSTEM_PROVIDERS = Maps.newHashMap(); Review Comment: Why do you need to make this public? ########## catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java: ########## @@ -119,26 +140,32 @@ public void initialize( Map<String, String> config, CatalogInfo info, HasPropertyMetadata propertiesMetadata) throws RuntimeException { this.propertiesMetadata = propertiesMetadata; + this.catalogInfo = info; + // Initialize Hadoop Configuration. this.conf = config; - this.hadoopConf = new Configuration(); - this.catalogInfo = info; - Map<String, String> bypassConfigs = - config.entrySet().stream() + + this.bypassConfigs = + conf.entrySet().stream() .filter(e -> e.getKey().startsWith(CATALOG_BYPASS_PREFIX)) .collect( Collectors.toMap( e -> e.getKey().substring(CATALOG_BYPASS_PREFIX.length()), Map.Entry::getValue)); - bypassConfigs.forEach(hadoopConf::set); + + String fileSystemProviders = + (String) + propertiesMetadata + .catalogPropertiesMetadata() + .getOrDefault(config, HadoopCatalogPropertiesMetadata.FILESYSTEM_PROVIDERS); + + FileSystemUtils.initFileSystemProviders(fileSystemProviders, FILE_SYSTEM_PROVIDERS); Review Comment: What's the purpose of making this `FILE_SYSTEM_PROVIDERS` static? ########## catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java: ########## @@ -119,26 +140,32 @@ public void initialize( Map<String, String> config, CatalogInfo info, HasPropertyMetadata propertiesMetadata) throws RuntimeException { this.propertiesMetadata = propertiesMetadata; + this.catalogInfo = info; + // Initialize Hadoop Configuration. this.conf = config; - this.hadoopConf = new Configuration(); - this.catalogInfo = info; - Map<String, String> bypassConfigs = - config.entrySet().stream() + + this.bypassConfigs = + conf.entrySet().stream() .filter(e -> e.getKey().startsWith(CATALOG_BYPASS_PREFIX)) .collect( Collectors.toMap( e -> e.getKey().substring(CATALOG_BYPASS_PREFIX.length()), Map.Entry::getValue)); - bypassConfigs.forEach(hadoopConf::set); + + String fileSystemProviders = + (String) + propertiesMetadata + .catalogPropertiesMetadata() + .getOrDefault(config, HadoopCatalogPropertiesMetadata.FILESYSTEM_PROVIDERS); + + FileSystemUtils.initFileSystemProviders(fileSystemProviders, FILE_SYSTEM_PROVIDERS); Review Comment: If you're making this static, how do you handle the concurrent issue if users create the Hadoop catalog in the same time? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gravitino.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org