parthchandra commented on code in PR #1581:
URL: https://github.com/apache/datafusion-comet/pull/1581#discussion_r2029172120


##########
spark/src/test/scala/org/apache/comet/CometNativeSuite.scala:
##########
@@ -63,4 +66,35 @@ class CometNativeSuite extends CometTestBase {
     }
     assert(exception2.getMessage contains "null context handle")
   }
+
+  test("Comet native should use spark local dir as temp dir") {
+    withParquetTable((0 until 100000).map(i => (i, i + 1)), "table") {
+      val dirs = SparkEnv.get.blockManager.getLocalDiskDirs
+      dirs.foreach { dir =>
+        val files = new java.io.File(dir).listFiles()
+        assert(!files.exists(f => f.isDirectory && 
f.getName.startsWith("datafusion-")))
+      }
+
+      // Check if the DataFusion temporary dir exists in the Spark local dirs 
when a spark job involving
+      // Comet native operator is running.
+      val observedDataFusionDir = spark
+        .table("table")
+        .selectExpr("_1 + _2 as value")
+        .rdd
+        .mapPartitions { _ =>
+          dirs.map { dir =>
+            val files = new java.io.File(dir).listFiles()
+            files.count(f => f.isDirectory && 
f.getName.startsWith("datafusion-"))

Review Comment:
   👍🏽 



##########
native/core/src/execution/jni_api.rs:
##########
@@ -262,8 +277,11 @@ pub unsafe extern "system" fn 
Java_org_apache_comet_Native_createPlan(
 fn prepare_datafusion_session_context(
     batch_size: usize,
     memory_pool: Arc<dyn MemoryPool>,
+    local_dirs: Vec<String>,
 ) -> CometResult<SessionContext> {
-    let mut rt_config = 
RuntimeEnvBuilder::new().with_disk_manager(DiskManagerConfig::NewOs);
+    let disk_manager_config =
+        
DiskManagerConfig::NewSpecified(local_dirs.into_iter().map(PathBuf::from).collect());

Review Comment:
   Do you know if it is possible to use an object store here?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to