This is an automated email from the ASF dual-hosted git repository.
hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 7e44fbf5d6 [VL] Gluten-it: Support using Delta tables in TPC-H and
TPC-DS benchmarks (#10562)
7e44fbf5d6 is described below
commit 7e44fbf5d6c6bfc9e4f27f42121b06fea93ea5c3
Author: Hongze Zhang <[email protected]>
AuthorDate: Mon Sep 1 10:37:58 2025 +0200
[VL] Gluten-it: Support using Delta tables in TPC-H and TPC-DS benchmarks
(#10562)
---
tools/gluten-it/common/pom.xml | 24 ++++++++++++++++++++++
.../org/apache/gluten/integration/Suite.scala | 9 ++++++++
.../apache/gluten/integration/TableCreator.scala | 5 ++++-
.../apache/gluten/integration/ds/TpcdsSuite.scala | 2 +-
.../apache/gluten/integration/h/TpchSuite.scala | 2 +-
tools/gluten-it/pom.xml | 15 ++++++++++++++
6 files changed, 54 insertions(+), 3 deletions(-)
diff --git a/tools/gluten-it/common/pom.xml b/tools/gluten-it/common/pom.xml
index 5be543998f..cb1cf5026a 100644
--- a/tools/gluten-it/common/pom.xml
+++ b/tools/gluten-it/common/pom.xml
@@ -220,5 +220,29 @@
</dependency>
</dependencies>
</profile>
+ <profile>
+ <id>delta</id>
+ <activation>
+ <activeByDefault>false</activeByDefault>
+ </activation>
+ <dependencies>
+ <dependency>
+ <groupId>io.delta</groupId>
+
<artifactId>${delta.package.name}_${scala.binary.version}</artifactId>
+ <version>${delta.version}</version>
+ <scope>runtime</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>org.antlr</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.scala-lang</groupId>
+ <artifactId>scala-library</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ </dependencies>
+ </profile>
</profiles>
</project>
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/Suite.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/Suite.scala
index 748b8d6b11..c1ca9b575c 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/Suite.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/Suite.scala
@@ -70,6 +70,15 @@ abstract class Suite(
sessionSwitcher.addDefaultConf("spark.network.io.preferDirectBufs", "false")
sessionSwitcher.addDefaultConf("spark.unsafe.exceptionOnMemoryLeak",
s"$errorOnMemLeak")
+ if (dataSource() == "delta") {
+ sessionSwitcher.addDefaultConf(
+ "spark.sql.extensions",
+ "io.delta.sql.DeltaSparkSessionExtension")
+ sessionSwitcher.addDefaultConf(
+ "spark.sql.catalog.spark_catalog",
+ "org.apache.spark.sql.delta.catalog.DeltaCatalog")
+ }
+
if (!enableUi) {
sessionSwitcher.addDefaultConf("spark.ui.enabled", "false")
}
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/TableCreator.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/TableCreator.scala
index fea9c8baef..b4b3c203fd 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/TableCreator.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/TableCreator.scala
@@ -53,9 +53,12 @@ object TableCreator {
} else {
spark.catalog.createTable(tableName, file.getAbsolutePath, source)
createdTableNames += tableName
- if
(spark.catalog.listColumns(tableName).collect().exists(_.isPartition)) {
+ try {
spark.catalog.recoverPartitions(tableName)
recoveredPartitionTableNames += tableName
+ } catch {
+ case _: AnalysisException =>
+ // Swallows analysis exceptions.
}
}
})
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsSuite.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsSuite.scala
index 9bf2bf66ea..869041e268 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsSuite.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsSuite.scala
@@ -224,7 +224,7 @@ object TpcdsSuite {
scale: Double,
genPartitionedData: Boolean): Unit = {
require(
- Set("parquet").contains(dataSource),
+ Set("parquet", "delta").contains(dataSource),
s"Data source type $dataSource is not supported by TPC-DS suite")
}
}
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchSuite.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchSuite.scala
index 9aaccabdff..a381550b32 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchSuite.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchSuite.scala
@@ -134,7 +134,7 @@ object TpchSuite {
scale: Double,
genPartitionedData: Boolean): Unit = {
require(
- Set("parquet").contains(dataSource),
+ Set("parquet", "delta").contains(dataSource),
s"Data source type $dataSource is not supported by TPC-H suite")
require(!genPartitionedData, "TPC-H suite doesn't support generating
partitioned data")
}
diff --git a/tools/gluten-it/pom.xml b/tools/gluten-it/pom.xml
index 5ac0f29779..aabff52efd 100644
--- a/tools/gluten-it/pom.xml
+++ b/tools/gluten-it/pom.xml
@@ -21,6 +21,9 @@
<scala.binary.version>2.12</scala.binary.version>
<spark.version>3.4.4</spark.version>
<spark.major.version>3</spark.major.version>
+ <delta.package.name>delta-core</delta.package.name>
+ <delta.version>2.4.0</delta.version>
+ <delta.binary.version>24</delta.binary.version>
<celeborn.version>0.6.0</celeborn.version>
<uniffle.version>0.9.2</uniffle.version>
<gluten.version>1.6.0-SNAPSHOT</gluten.version>
@@ -179,6 +182,9 @@
<spark.version>3.2.2</spark.version>
<scala.version>2.12.15</scala.version>
<scala.binary.version>2.12</scala.binary.version>
+ <delta.package.name>delta-core</delta.package.name>
+ <delta.version>2.0.1</delta.version>
+ <delta.binary.version>20</delta.binary.version>
</properties>
</profile>
<profile>
@@ -187,6 +193,9 @@
<spark.version>3.3.1</spark.version>
<scala.version>2.12.15</scala.version>
<scala.binary.version>2.12</scala.binary.version>
+ <delta.package.name>delta-core</delta.package.name>
+ <delta.version>2.3.0</delta.version>
+ <delta.binary.version>23</delta.binary.version>
</properties>
</profile>
<profile>
@@ -203,6 +212,9 @@
<spark.version>3.5.2</spark.version>
<scala.version>2.12.18</scala.version>
<scala.binary.version>2.12</scala.binary.version>
+ <delta.package.name>delta-core</delta.package.name>
+ <delta.version>2.4.0</delta.version>
+ <delta.binary.version>24</delta.binary.version>
</properties>
</profile>
<profile>
@@ -211,6 +223,9 @@
<spark.version>4.0.0</spark.version>
<scala.version>2.13.8</scala.version>
<scala.binary.version>2.13</scala.binary.version>
+ <delta.package.name>delta-spark</delta.package.name>
+ <delta.version>3.3.1</delta.version>
+ <delta.binary.version>33</delta.binary.version>
</properties>
</profile>
<profile>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]