This is an automated email from the ASF dual-hosted git repository.

hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 7e44fbf5d6 [VL] Gluten-it: Support using Delta tables in TPC-H and 
TPC-DS benchmarks (#10562)
7e44fbf5d6 is described below

commit 7e44fbf5d6c6bfc9e4f27f42121b06fea93ea5c3
Author: Hongze Zhang <[email protected]>
AuthorDate: Mon Sep 1 10:37:58 2025 +0200

    [VL] Gluten-it: Support using Delta tables in TPC-H and TPC-DS benchmarks 
(#10562)
---
 tools/gluten-it/common/pom.xml                     | 24 ++++++++++++++++++++++
 .../org/apache/gluten/integration/Suite.scala      |  9 ++++++++
 .../apache/gluten/integration/TableCreator.scala   |  5 ++++-
 .../apache/gluten/integration/ds/TpcdsSuite.scala  |  2 +-
 .../apache/gluten/integration/h/TpchSuite.scala    |  2 +-
 tools/gluten-it/pom.xml                            | 15 ++++++++++++++
 6 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/tools/gluten-it/common/pom.xml b/tools/gluten-it/common/pom.xml
index 5be543998f..cb1cf5026a 100644
--- a/tools/gluten-it/common/pom.xml
+++ b/tools/gluten-it/common/pom.xml
@@ -220,5 +220,29 @@
         </dependency>
       </dependencies>
     </profile>
+    <profile>
+      <id>delta</id>
+      <activation>
+        <activeByDefault>false</activeByDefault>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>io.delta</groupId>
+          
<artifactId>${delta.package.name}_${scala.binary.version}</artifactId>
+          <version>${delta.version}</version>
+          <scope>runtime</scope>
+          <exclusions>
+            <exclusion>
+              <groupId>org.antlr</groupId>
+              <artifactId>*</artifactId>
+            </exclusion>
+            <exclusion>
+              <groupId>org.scala-lang</groupId>
+              <artifactId>scala-library</artifactId>
+            </exclusion>
+          </exclusions>
+        </dependency>
+      </dependencies>
+    </profile>
   </profiles>
 </project>
diff --git 
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/Suite.scala
 
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/Suite.scala
index 748b8d6b11..c1ca9b575c 100644
--- 
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/Suite.scala
+++ 
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/Suite.scala
@@ -70,6 +70,15 @@ abstract class Suite(
   sessionSwitcher.addDefaultConf("spark.network.io.preferDirectBufs", "false")
   sessionSwitcher.addDefaultConf("spark.unsafe.exceptionOnMemoryLeak", 
s"$errorOnMemLeak")
 
+  if (dataSource() == "delta") {
+    sessionSwitcher.addDefaultConf(
+      "spark.sql.extensions",
+      "io.delta.sql.DeltaSparkSessionExtension")
+    sessionSwitcher.addDefaultConf(
+      "spark.sql.catalog.spark_catalog",
+      "org.apache.spark.sql.delta.catalog.DeltaCatalog")
+  }
+
   if (!enableUi) {
     sessionSwitcher.addDefaultConf("spark.ui.enabled", "false")
   }
diff --git 
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/TableCreator.scala
 
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/TableCreator.scala
index fea9c8baef..b4b3c203fd 100644
--- 
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/TableCreator.scala
+++ 
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/TableCreator.scala
@@ -53,9 +53,12 @@ object TableCreator {
           } else {
             spark.catalog.createTable(tableName, file.getAbsolutePath, source)
             createdTableNames += tableName
-            if 
(spark.catalog.listColumns(tableName).collect().exists(_.isPartition)) {
+            try {
               spark.catalog.recoverPartitions(tableName)
               recoveredPartitionTableNames += tableName
+            } catch {
+              case _: AnalysisException =>
+              // Swallows analysis exceptions.
             }
           }
         })
diff --git 
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsSuite.scala
 
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsSuite.scala
index 9bf2bf66ea..869041e268 100644
--- 
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsSuite.scala
+++ 
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsSuite.scala
@@ -224,7 +224,7 @@ object TpcdsSuite {
       scale: Double,
       genPartitionedData: Boolean): Unit = {
     require(
-      Set("parquet").contains(dataSource),
+      Set("parquet", "delta").contains(dataSource),
       s"Data source type $dataSource is not supported by TPC-DS suite")
   }
 }
diff --git 
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchSuite.scala
 
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchSuite.scala
index 9aaccabdff..a381550b32 100644
--- 
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchSuite.scala
+++ 
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchSuite.scala
@@ -134,7 +134,7 @@ object TpchSuite {
       scale: Double,
       genPartitionedData: Boolean): Unit = {
     require(
-      Set("parquet").contains(dataSource),
+      Set("parquet", "delta").contains(dataSource),
       s"Data source type $dataSource is not supported by TPC-H suite")
     require(!genPartitionedData, "TPC-H suite doesn't support generating 
partitioned data")
   }
diff --git a/tools/gluten-it/pom.xml b/tools/gluten-it/pom.xml
index 5ac0f29779..aabff52efd 100644
--- a/tools/gluten-it/pom.xml
+++ b/tools/gluten-it/pom.xml
@@ -21,6 +21,9 @@
     <scala.binary.version>2.12</scala.binary.version>
     <spark.version>3.4.4</spark.version>
     <spark.major.version>3</spark.major.version>
+    <delta.package.name>delta-core</delta.package.name>
+    <delta.version>2.4.0</delta.version>
+    <delta.binary.version>24</delta.binary.version>
     <celeborn.version>0.6.0</celeborn.version>
     <uniffle.version>0.9.2</uniffle.version>
     <gluten.version>1.6.0-SNAPSHOT</gluten.version>
@@ -179,6 +182,9 @@
         <spark.version>3.2.2</spark.version>
         <scala.version>2.12.15</scala.version>
         <scala.binary.version>2.12</scala.binary.version>
+        <delta.package.name>delta-core</delta.package.name>
+        <delta.version>2.0.1</delta.version>
+        <delta.binary.version>20</delta.binary.version>
       </properties>
     </profile>
     <profile>
@@ -187,6 +193,9 @@
         <spark.version>3.3.1</spark.version>
         <scala.version>2.12.15</scala.version>
         <scala.binary.version>2.12</scala.binary.version>
+        <delta.package.name>delta-core</delta.package.name>
+        <delta.version>2.3.0</delta.version>
+        <delta.binary.version>23</delta.binary.version>
       </properties>
     </profile>
     <profile>
@@ -203,6 +212,9 @@
         <spark.version>3.5.2</spark.version>
         <scala.version>2.12.18</scala.version>
         <scala.binary.version>2.12</scala.binary.version>
+        <delta.package.name>delta-core</delta.package.name>
+        <delta.version>2.4.0</delta.version>
+        <delta.binary.version>24</delta.binary.version>
       </properties>
     </profile>
     <profile>
@@ -211,6 +223,9 @@
         <spark.version>4.0.0</spark.version>
         <scala.version>2.13.8</scala.version>
         <scala.binary.version>2.13</scala.binary.version>
+        <delta.package.name>delta-spark</delta.package.name>
+        <delta.version>3.3.1</delta.version>
+        <delta.binary.version>33</delta.binary.version>
       </properties>
     </profile>
     <profile>


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to