This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new dd6a4d563 ci: run Iceberg Spark tests on all PRs and commits to main
branch (#3792)
dd6a4d563 is described below
commit dd6a4d563e78f965c8cb771d958544a6a7dfc3b9
Author: Matt Butrovich <[email protected]>
AuthorDate: Wed Mar 25 17:38:38 2026 -0400
ci: run Iceberg Spark tests on all PRs and commits to main branch (#3792)
---
.../actions/setup-iceberg-rust-builder/action.yaml | 39 ----------------------
.github/workflows/iceberg_spark_test.yml | 28 +++++++---------
dev/diffs/{iceberg-rust => iceberg}/1.10.0.diff | 0
dev/diffs/{iceberg-rust => iceberg}/1.8.1.diff | 0
dev/diffs/{iceberg-rust => iceberg}/1.9.1.diff | 0
.../contributor-guide/iceberg-spark-tests.md | 16 +++++----
docs/source/contributor-guide/index.md | 1 +
7 files changed, 22 insertions(+), 62 deletions(-)
diff --git a/.github/actions/setup-iceberg-rust-builder/action.yaml
b/.github/actions/setup-iceberg-rust-builder/action.yaml
deleted file mode 100644
index ed41f940f..000000000
--- a/.github/actions/setup-iceberg-rust-builder/action.yaml
+++ /dev/null
@@ -1,39 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-name: Setup Iceberg Builder
-description: 'Setup Apache Iceberg to run Spark SQL tests'
-inputs:
- iceberg-version:
- description: 'The Apache Iceberg version (e.g., 1.8.1) to build'
- required: true
-runs:
- using: "composite"
- steps:
- - name: Clone Iceberg repo
- uses: actions/checkout@v6
- with:
- repository: apache/iceberg
- path: apache-iceberg
- ref: apache-iceberg-${{inputs.iceberg-version}}
- fetch-depth: 1
-
- - name: Setup Iceberg for Comet
- shell: bash
- run: |
- cd apache-iceberg
- git apply ../dev/diffs/iceberg-rust/${{inputs.iceberg-version}}.diff
diff --git a/.github/workflows/iceberg_spark_test.yml
b/.github/workflows/iceberg_spark_test.yml
index eec2c9946..bb857c9e8 100644
--- a/.github/workflows/iceberg_spark_test.yml
+++ b/.github/workflows/iceberg_spark_test.yml
@@ -53,7 +53,6 @@ env:
jobs:
# Build native library once and share with all test jobs
build-native:
- if: contains(github.event.pull_request.title, '[iceberg]')
name: Build Native Library
runs-on: ubuntu-24.04
container:
@@ -102,9 +101,8 @@ jobs:
path: native/target/ci/libcomet.so
retention-days: 1
- iceberg-spark-rust:
+ iceberg-spark:
needs: build-native
- if: contains(github.event.pull_request.title, '[iceberg]')
strategy:
matrix:
os: [ubuntu-24.04]
@@ -113,7 +111,7 @@ jobs:
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full:
'3.5.8'}]
scala-version: ['2.13']
fail-fast: false
- name: iceberg-spark-rust/${{ matrix.os }}/iceberg-${{
matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{
matrix.scala-version }}/java-${{ matrix.java-version }}
+ name: iceberg-spark/${{ matrix.os }}/iceberg-${{
matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{
matrix.scala-version }}/java-${{ matrix.java-version }}
runs-on: ${{ matrix.os }}
container:
image: amd64/rust
@@ -135,10 +133,10 @@ jobs:
run: |
./mvnw install -Prelease -DskipTests -Pspark-${{
matrix.spark-version.short }} -Pscala-${{ matrix.scala-version }}
- name: Setup Iceberg
- uses: ./.github/actions/setup-iceberg-rust-builder
+ uses: ./.github/actions/setup-iceberg-builder
with:
iceberg-version: ${{ matrix.iceberg-version.full }}
- - name: Run Iceberg Spark tests (Rust)
+ - name: Run Iceberg Spark tests
run: |
cd apache-iceberg
rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet
cache requires cleanups
@@ -146,9 +144,8 @@ jobs:
:iceberg-spark:iceberg-spark-${{ matrix.spark-version.short }}_${{
matrix.scala-version }}:test \
-Pquick=true -x javadoc
- iceberg-spark-extensions-rust:
+ iceberg-spark-extensions:
needs: build-native
- if: contains(github.event.pull_request.title, '[iceberg]')
strategy:
matrix:
os: [ubuntu-24.04]
@@ -157,7 +154,7 @@ jobs:
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full:
'3.5.8'}]
scala-version: ['2.13']
fail-fast: false
- name: iceberg-spark-extensions-rust/${{ matrix.os }}/iceberg-${{
matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{
matrix.scala-version }}/java-${{ matrix.java-version }}
+ name: iceberg-spark-extensions/${{ matrix.os }}/iceberg-${{
matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{
matrix.scala-version }}/java-${{ matrix.java-version }}
runs-on: ${{ matrix.os }}
container:
image: amd64/rust
@@ -179,10 +176,10 @@ jobs:
run: |
./mvnw install -Prelease -DskipTests -Pspark-${{
matrix.spark-version.short }} -Pscala-${{ matrix.scala-version }}
- name: Setup Iceberg
- uses: ./.github/actions/setup-iceberg-rust-builder
+ uses: ./.github/actions/setup-iceberg-builder
with:
iceberg-version: ${{ matrix.iceberg-version.full }}
- - name: Run Iceberg Spark extensions tests (Rust)
+ - name: Run Iceberg Spark extensions tests
run: |
cd apache-iceberg
rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet
cache requires cleanups
@@ -190,9 +187,8 @@ jobs:
:iceberg-spark:iceberg-spark-extensions-${{
matrix.spark-version.short }}_${{ matrix.scala-version }}:test \
-Pquick=true -x javadoc
- iceberg-spark-runtime-rust:
+ iceberg-spark-runtime:
needs: build-native
- if: contains(github.event.pull_request.title, '[iceberg]')
strategy:
matrix:
os: [ubuntu-24.04]
@@ -201,7 +197,7 @@ jobs:
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full:
'3.5.8'}]
scala-version: ['2.13']
fail-fast: false
- name: iceberg-spark-runtime-rust/${{ matrix.os }}/iceberg-${{
matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{
matrix.scala-version }}/java-${{ matrix.java-version }}
+ name: iceberg-spark-runtime/${{ matrix.os }}/iceberg-${{
matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{
matrix.scala-version }}/java-${{ matrix.java-version }}
runs-on: ${{ matrix.os }}
container:
image: amd64/rust
@@ -223,10 +219,10 @@ jobs:
run: |
./mvnw install -Prelease -DskipTests -Pspark-${{
matrix.spark-version.short }} -Pscala-${{ matrix.scala-version }}
- name: Setup Iceberg
- uses: ./.github/actions/setup-iceberg-rust-builder
+ uses: ./.github/actions/setup-iceberg-builder
with:
iceberg-version: ${{ matrix.iceberg-version.full }}
- - name: Run Iceberg Spark runtime tests (Rust)
+ - name: Run Iceberg Spark runtime tests
run: |
cd apache-iceberg
rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet
cache requires cleanups
diff --git a/dev/diffs/iceberg-rust/1.10.0.diff b/dev/diffs/iceberg/1.10.0.diff
similarity index 100%
rename from dev/diffs/iceberg-rust/1.10.0.diff
rename to dev/diffs/iceberg/1.10.0.diff
diff --git a/dev/diffs/iceberg-rust/1.8.1.diff b/dev/diffs/iceberg/1.8.1.diff
similarity index 100%
rename from dev/diffs/iceberg-rust/1.8.1.diff
rename to dev/diffs/iceberg/1.8.1.diff
diff --git a/dev/diffs/iceberg-rust/1.9.1.diff b/dev/diffs/iceberg/1.9.1.diff
similarity index 100%
rename from dev/diffs/iceberg-rust/1.9.1.diff
rename to dev/diffs/iceberg/1.9.1.diff
diff --git a/docs/source/contributor-guide/iceberg-spark-tests.md
b/docs/source/contributor-guide/iceberg-spark-tests.md
index 5cc5690f4..38becc020 100644
--- a/docs/source/contributor-guide/iceberg-spark-tests.md
+++ b/docs/source/contributor-guide/iceberg-spark-tests.md
@@ -51,7 +51,7 @@ Clone Apache Iceberg locally and apply the diff file from
Comet against the matc
git clone [email protected]:apache/iceberg.git apache-iceberg
cd apache-iceberg
git checkout apache-iceberg-1.8.1
-git apply ../datafusion-comet/dev/diffs/iceberg-rust/1.8.1.diff
+git apply ../datafusion-comet/dev/diffs/iceberg/1.8.1.diff
```
## 3. Run Iceberg Spark Tests
@@ -64,9 +64,11 @@ ENABLE_COMET=true ./gradlew -DsparkVersions=3.5
-DscalaVersion=2.13 -DflinkVersi
The three Gradle targets tested in CI are:
-- `:iceberg-spark:iceberg-spark-<sparkVersion>_<scalaVersion>:test`
-- `:iceberg-spark:iceberg-spark-extensions-<sparkVersion>_<scalaVersion>:test`
--
`:iceberg-spark:iceberg-spark-runtime-<sparkVersion>_<scalaVersion>:integrationTest`
+| Gradle Target | What It Covers
|
+| --------------------------------------------- |
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
+| `iceberg-spark-<ver>:test` | Core read/write paths
(Parquet, Avro, ORC, vectorized), scan operations, filtering, bloom filters,
runtime filtering, deletion handling, structured streaming, DDL/DML
(create/alter/drop, writes, deletes), filter and aggregate pushdown, actions
(snapshot expiration, file rewriting, orphan cleanup, table migration),
serialization, and data format conversions. |
+| `iceberg-spark-extensions-<ver>:test` | SQL extensions: stored
procedures (migrate, snapshot, cherrypick, rollback, rewrite-data-files,
rewrite-manifests, expire-snapshots, remove-orphan-files, etc.), row-level
operations (copy-on-write and merge-on-read update/delete/merge), DDL
extensions (branches, tags, alter schema, partition fields), changelog
tables/views, metadata tables, and views. |
+| `iceberg-spark-runtime-<ver>:integrationTest` | A single smoke test
(`SmokeTest.java`) that validates the shaded runtime JAR. The `spark-runtime`
module has no main source — it packages Iceberg and all dependencies into a
shaded uber-JAR. The smoke test exercises basic create, insert, merge, query,
partition field, and sort order operations to confirm the shaded JAR works
end-to-end. |
## Updating Diffs
@@ -76,14 +78,14 @@ regenerate:
```shell
cd apache-iceberg
git reset --hard apache-iceberg-1.8.1 && git clean -fd
-git apply ../datafusion-comet/dev/diffs/iceberg-rust/1.8.1.diff
+git apply ../datafusion-comet/dev/diffs/iceberg/1.8.1.diff
# Make changes, then run spotless to fix formatting
./gradlew spotlessApply
# Stage any new or deleted files, then generate the diff
git add -A
-git diff apache-iceberg-1.8.1 >
../datafusion-comet/dev/diffs/iceberg-rust/1.8.1.diff
+git diff apache-iceberg-1.8.1 >
../datafusion-comet/dev/diffs/iceberg/1.8.1.diff
```
Repeat for each Iceberg version (1.8.1, 1.9.1, 1.10.0). The file contents
differ between versions, so each
@@ -93,4 +95,4 @@ diff must be generated against its own tag.
The `iceberg_spark_test.yml` workflow applies these diffs and runs the three
Gradle targets above against
each Iceberg version. The test matrix covers Spark 3.4 and 3.5 across Iceberg
1.8.1, 1.9.1, and 1.10.0
-with Java 11 and 17. The workflow only runs when the PR title contains
`[iceberg]`.
+with Java 11 and 17. The workflow runs on all pull requests and pushes to the
main branch.
diff --git a/docs/source/contributor-guide/index.md
b/docs/source/contributor-guide/index.md
index 2b6842e44..c0751c68a 100644
--- a/docs/source/contributor-guide/index.md
+++ b/docs/source/contributor-guide/index.md
@@ -37,6 +37,7 @@ Adding a New Expression <adding_a_new_expression>
Tracing <tracing>
Profiling Native Code <profiling_native_code>
Spark SQL Tests <spark-sql-tests.md>
+Iceberg Spark Tests <iceberg-spark-tests.md>
SQL File Tests <sql-file-tests.md>
Roadmap <roadmap.md>
Release Process <release_process>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]