This is an automated email from the ASF dual-hosted git repository.

marong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new ad4393e32c [VL] Separate filesystem configuration initialization 
(#10540)
ad4393e32c is described below

commit ad4393e32c7307071be5b9ca6a03aba2adafa296
Author: Rong Ma <[email protected]>
AuthorDate: Fri Aug 29 09:31:14 2025 +0100

    [VL] Separate filesystem configuration initialization (#10540)
---
 .../operators/writer/VeloxParquetDataSourceABFS.h  |  5 +-
 .../operators/writer/VeloxParquetDataSourceHDFS.h  |  5 +-
 .../operators/writer/VeloxParquetDataSourceS3.h    |  5 +-
 cpp/velox/utils/ConfigExtractor.cc                 | 76 +++++++++++++++++-----
 cpp/velox/utils/ConfigExtractor.h                  |  5 +-
 5 files changed, 72 insertions(+), 24 deletions(-)

diff --git a/cpp/velox/operators/writer/VeloxParquetDataSourceABFS.h 
b/cpp/velox/operators/writer/VeloxParquetDataSourceABFS.h
index f8f6e5878c..6dd7027cea 100644
--- a/cpp/velox/operators/writer/VeloxParquetDataSourceABFS.h
+++ b/cpp/velox/operators/writer/VeloxParquetDataSourceABFS.h
@@ -43,8 +43,9 @@ class VeloxParquetDataSourceABFS final : public 
VeloxParquetDataSource {
       : VeloxParquetDataSource(filePath, veloxPool, sinkPool, schema) {}
 
   void initSink(const std::unordered_map<std::string, std::string>& 
sparkConfs) override {
-    auto hiveConf = 
getHiveConfig(std::make_shared<facebook::velox::config::ConfigBase>(
-        std::unordered_map<std::string, std::string>(sparkConfs)));
+    auto hiveConf = getHiveConfig(
+        
std::make_shared<facebook::velox::config::ConfigBase>(std::unordered_map<std::string,
 std::string>(sparkConfs)),
+        FileSystemType::kAbfs);
     auto fileSystem = filesystems::getFileSystem(filePath_, hiveConf);
     auto* abfsFileSystem = 
dynamic_cast<filesystems::AbfsFileSystem*>(fileSystem.get());
     sink_ = std::make_unique<dwio::common::WriteFileSink>(
diff --git a/cpp/velox/operators/writer/VeloxParquetDataSourceHDFS.h 
b/cpp/velox/operators/writer/VeloxParquetDataSourceHDFS.h
index 5f61d9145f..2e7b313118 100644
--- a/cpp/velox/operators/writer/VeloxParquetDataSourceHDFS.h
+++ b/cpp/velox/operators/writer/VeloxParquetDataSourceHDFS.h
@@ -43,8 +43,9 @@ class VeloxParquetDataSourceHDFS final : public 
VeloxParquetDataSource {
       : VeloxParquetDataSource(filePath, veloxPool, sinkPool, schema) {}
 
   void initSink(const std::unordered_map<std::string, std::string>& 
sparkConfs) override {
-    auto hiveConf = 
getHiveConfig(std::make_shared<facebook::velox::config::ConfigBase>(
-        std::unordered_map<std::string, std::string>(sparkConfs)));
+    auto hiveConf = getHiveConfig(
+        
std::make_shared<facebook::velox::config::ConfigBase>(std::unordered_map<std::string,
 std::string>(sparkConfs)),
+        FileSystemType::kHdfs);
     sink_ = dwio::common::FileSink::create(filePath_, {.connectorProperties = 
hiveConf, .pool = sinkPool_.get()});
   }
 };
diff --git a/cpp/velox/operators/writer/VeloxParquetDataSourceS3.h 
b/cpp/velox/operators/writer/VeloxParquetDataSourceS3.h
index 788eda19e5..f366953422 100644
--- a/cpp/velox/operators/writer/VeloxParquetDataSourceS3.h
+++ b/cpp/velox/operators/writer/VeloxParquetDataSourceS3.h
@@ -43,8 +43,9 @@ class VeloxParquetDataSourceS3 final : public 
VeloxParquetDataSource {
       : VeloxParquetDataSource(filePath, veloxPool, sinkPool, schema) {}
 
   void initSink(const std::unordered_map<std::string, std::string>& 
sparkConfs) override {
-    auto hiveConf = 
getHiveConfig(std::make_shared<facebook::velox::config::ConfigBase>(
-        std::unordered_map<std::string, std::string>(sparkConfs)));
+    auto hiveConf = getHiveConfig(
+        
std::make_shared<facebook::velox::config::ConfigBase>(std::unordered_map<std::string,
 std::string>(sparkConfs)),
+        FileSystemType::kS3);
     sink_ = dwio::common::FileSink::create(filePath_, {.connectorProperties = 
hiveConf, .pool = sinkPool_.get()});
   }
 };
diff --git a/cpp/velox/utils/ConfigExtractor.cc 
b/cpp/velox/utils/ConfigExtractor.cc
index 2e1aa92ff7..96da5069a2 100644
--- a/cpp/velox/utils/ConfigExtractor.cc
+++ b/cpp/velox/utils/ConfigExtractor.cc
@@ -22,29 +22,18 @@
 
 #include "config/VeloxConfig.h"
 #include "utils/Exception.h"
+#include "utils/Macros.h"
 #include "velox/connectors/hive/HiveConfig.h"
 #include "velox/connectors/hive/storage_adapters/s3fs/S3Config.h"
 
 namespace gluten {
 
-std::string getConfigValue(
-    const std::unordered_map<std::string, std::string>& confMap,
-    const std::string& key,
-    const std::optional<std::string>& fallbackValue) {
-  auto got = confMap.find(key);
-  if (got == confMap.end()) {
-    if (fallbackValue == std::nullopt) {
-      throw std::runtime_error("No such config key: " + key);
-    }
-    return fallbackValue.value();
-  }
-  return got->second;
-}
-
-std::shared_ptr<facebook::velox::config::ConfigBase> getHiveConfig(
-    std::shared_ptr<facebook::velox::config::ConfigBase> conf) {
-  std::unordered_map<std::string, std::string> hiveConfMap;
+namespace {
 
+void getS3HiveConfig(
+    std::shared_ptr<facebook::velox::config::ConfigBase> conf,
+    FileSystemType fsType,
+    std::unordered_map<std::string, std::string>& hiveConfMap) {
 #ifdef ENABLE_S3
   using namespace facebook::velox::filesystems;
   std::string_view kSparkHadoopS3Prefix = "spark.hadoop.fs.s3a.";
@@ -161,7 +150,12 @@ std::shared_ptr<facebook::velox::config::ConfigBase> 
getHiveConfig(
     }
   }
 #endif
+}
 
+void getGcsHiveConfig(
+    std::shared_ptr<facebook::velox::config::ConfigBase> conf,
+    FileSystemType fsType,
+    std::unordered_map<std::string, std::string>& hiveConfMap) {
 #ifdef ENABLE_GCS
   // 
https://github.com/GoogleCloudDataproc/hadoop-connectors/blob/master/gcs/CONFIGURATION.md#api-client-configuration
   auto gsStorageRootUrl = 
conf->get<std::string>("spark.hadoop.fs.gs.storage.root.url");
@@ -204,7 +198,12 @@ std::shared_ptr<facebook::velox::config::ConfigBase> 
getHiveConfig(
     throw GlutenException("Conf spark.hadoop.fs.gs.auth.type is missing or 
incorrect");
   }
 #endif
+}
 
+void getAbfsHiveConfig(
+    std::shared_ptr<facebook::velox::config::ConfigBase> conf,
+    FileSystemType fsType,
+    std::unordered_map<std::string, std::string>& hiveConfMap) {
 #ifdef ENABLE_ABFS
   std::string_view kSparkHadoopPrefix = "spark.hadoop.";
   std::string_view kSparkHadoopAbfsPrefix = "spark.hadoop.fs.azure.";
@@ -215,6 +214,49 @@ std::shared_ptr<facebook::velox::config::ConfigBase> 
getHiveConfig(
     }
   }
 #endif
+}
+
+} // namespace
+
+std::string getConfigValue(
+    const std::unordered_map<std::string, std::string>& confMap,
+    const std::string& key,
+    const std::optional<std::string>& fallbackValue) {
+  auto got = confMap.find(key);
+  if (got == confMap.end()) {
+    if (fallbackValue == std::nullopt) {
+      throw std::runtime_error("No such config key: " + key);
+    }
+    return fallbackValue.value();
+  }
+  return got->second;
+}
+
+std::shared_ptr<facebook::velox::config::ConfigBase> getHiveConfig(
+    std::shared_ptr<facebook::velox::config::ConfigBase> conf,
+    FileSystemType fsType) {
+  std::unordered_map<std::string, std::string> hiveConfMap;
+
+  switch (fsType) {
+    case FileSystemType::kS3:
+      getS3HiveConfig(conf, fsType, hiveConfMap);
+      break;
+    case FileSystemType::kAbfs:
+      getAbfsHiveConfig(conf, fsType, hiveConfMap);
+      break;
+    case FileSystemType::kGcs:
+      getGcsHiveConfig(conf, fsType, hiveConfMap);
+      break;
+    case FileSystemType::kHdfs:
+      break;
+    case FileSystemType::kAll:
+      getS3HiveConfig(conf, fsType, hiveConfMap);
+      getAbfsHiveConfig(conf, fsType, hiveConfMap);
+      getGcsHiveConfig(conf, fsType, hiveConfMap);
+      break;
+    default:
+      GLUTEN_UNREACHABLE();
+  }
 
   
hiveConfMap[facebook::velox::connector::hive::HiveConfig::kEnableFileHandleCache]
 =
       conf->get<bool>(kVeloxFileHandleCacheEnabled, 
kVeloxFileHandleCacheEnabledDefault) ? "true" : "false";
diff --git a/cpp/velox/utils/ConfigExtractor.h 
b/cpp/velox/utils/ConfigExtractor.h
index 4cbfdf991f..5b10c714f2 100644
--- a/cpp/velox/utils/ConfigExtractor.h
+++ b/cpp/velox/utils/ConfigExtractor.h
@@ -28,12 +28,15 @@
 
 namespace gluten {
 
+enum class FileSystemType : uint8_t { kHdfs, kS3, kAbfs, kGcs, kAll };
+
 std::string getConfigValue(
     const std::unordered_map<std::string, std::string>& confMap,
     const std::string& key,
     const std::optional<std::string>& fallbackValue);
 
 std::shared_ptr<facebook::velox::config::ConfigBase> getHiveConfig(
-    std::shared_ptr<facebook::velox::config::ConfigBase> conf);
+    std::shared_ptr<facebook::velox::config::ConfigBase> conf,
+    FileSystemType fsType = FileSystemType::kAll);
 
 } // namespace gluten


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to