This is an automated email from the ASF dual-hosted git repository.
chengchengjin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new b16f300207 [GLUTEN-10582][VL] Add Cudf memory resource mode and
percent parameters (#10583)
b16f300207 is described below
commit b16f300207f55bc242d757453a68b63c4c469fd9
Author: Jin Chengcheng <[email protected]>
AuthorDate: Mon Sep 1 09:55:03 2025 +0100
[GLUTEN-10582][VL] Add Cudf memory resource mode and percent parameters
(#10583)
---
.../main/scala/org/apache/gluten/config/VeloxConfig.scala | 13 +++++++++++++
cpp/velox/compute/VeloxBackend.cc | 7 +++++--
cpp/velox/config/VeloxConfig.h | 11 +++++++++++
docs/velox-configuration.md | 2 ++
4 files changed, 31 insertions(+), 2 deletions(-)
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
b/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
index 52bde8044d..d017c11c63 100644
--- a/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
+++ b/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
@@ -656,6 +656,19 @@ object VeloxConfig {
.booleanConf
.createWithDefault(true)
+ val CUDF_MEMORY_RESOURCE =
+
buildStaticConf("spark.gluten.sql.columnar.backend.velox.cudf.memoryResource")
+ .doc("GPU RMM memory resource.")
+ .stringConf
+ .checkValues(Set("cuda", "pool", "async", "arena", "managed",
"managed_pool"))
+ .createWithDefault("async")
+
+ val CUDF_MEMORY_PERCENT =
+
buildStaticConf("spark.gluten.sql.columnar.backend.velox.cudf.memoryPercent")
+ .doc("The initial percent of GPU memory to allocate for memory resource
for one thread.")
+ .intConf
+ .createWithDefault(50)
+
val MEMORY_DUMP_ON_EXIT =
buildConf("spark.gluten.monitor.memoryDumpOnExit")
.doc(
diff --git a/cpp/velox/compute/VeloxBackend.cc
b/cpp/velox/compute/VeloxBackend.cc
index b79bece0f1..d8089d8653 100644
--- a/cpp/velox/compute/VeloxBackend.cc
+++ b/cpp/velox/compute/VeloxBackend.cc
@@ -163,9 +163,12 @@ void VeloxBackend::init(
#endif
#ifdef GLUTEN_ENABLE_GPU
- FLAGS_velox_cudf_debug = backendConf_->get<bool>(kDebugCudf,
kDebugCudfDefault);
if (backendConf_->get<bool>(kCudfEnabled, kCudfEnabledDefault)) {
- velox::cudf_velox::registerCudf();
+ FLAGS_velox_cudf_debug = backendConf_->get<bool>(kDebugCudf,
kDebugCudfDefault);
+ FLAGS_velox_cudf_memory_resource =
backendConf_->get<std::string>(kCudfMemoryResource, kCudfMemoryResourceDefault);
+ auto& options = velox::cudf_velox::CudfOptions::getInstance();
+ options.memoryPercent = backendConf_->get<int32_t>(kCudfMemoryPercent,
kCudfMemoryPercentDefault);
+ velox::cudf_velox::registerCudf(options);
}
#endif
diff --git a/cpp/velox/config/VeloxConfig.h b/cpp/velox/config/VeloxConfig.h
index e38253a3fb..ca85cc65ec 100644
--- a/cpp/velox/config/VeloxConfig.h
+++ b/cpp/velox/config/VeloxConfig.h
@@ -166,4 +166,15 @@ const std::string kQueryTraceTaskRegExp =
"spark.gluten.sql.columnar.backend.vel
/// defined by the underlying file system.
const std::string kOpTraceDirectoryCreateConfig =
"spark.gluten.sql.columnar.backend.velox.opTraceDirectoryCreateConfig";
+
+// Cudf config.
+// GPU RMM memory resource
+const std::string kCudfMemoryResource =
"spark.gluten.sql.columnar.backend.velox.cudf.memoryResource";
+const std::string kCudfMemoryResourceDefault =
+ "async"; // Allowed: "cuda", "pool", "async", "arena", "managed",
"managed_pool"
+
+// Initial percent of GPU memory to allocate for memory resource for one thread
+const std::string kCudfMemoryPercent =
"spark.gluten.sql.columnar.backend.velox.cudf.memoryPercent";
+const int32_t kCudfMemoryPercentDefault = 50;
+
} // namespace gluten
diff --git a/docs/velox-configuration.md b/docs/velox-configuration.md
index d6fdb7ca2a..34cb9a0d88 100644
--- a/docs/velox-configuration.md
+++ b/docs/velox-configuration.md
@@ -22,6 +22,8 @@ nav_order: 16
| spark.gluten.sql.columnar.backend.velox.cacheEnabled
| false | Enable Velox cache, default off. It's recommended to
enablesoft-affinity as well when enable velox cache.
[...]
| spark.gluten.sql.columnar.backend.velox.cachePrefetchMinPct
| 0 | Set prefetch cache min pct for velox file scan
[...]
| spark.gluten.sql.columnar.backend.velox.checkUsageLeak
| true | Enable check memory usage leak.
[...]
+| spark.gluten.sql.columnar.backend.velox.cudf.memoryPercent
| 50 | The initial percent of GPU memory to allocate for
memory resource for one thread.
[...]
+| spark.gluten.sql.columnar.backend.velox.cudf.memoryResource
| async | GPU RMM memory resource.
[...]
| spark.gluten.sql.columnar.backend.velox.directorySizeGuess
| 32KB | Deprecated, rename to
spark.gluten.sql.columnar.backend.velox.footerEstimatedSize
[...]
| spark.gluten.sql.columnar.backend.velox.enableSystemExceptionStacktrace
| true | Enable the stacktrace for system type of
VeloxException
[...]
| spark.gluten.sql.columnar.backend.velox.enableUserExceptionStacktrace
| true | Enable the stacktrace for user type of VeloxException
[...]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]