This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 3fc83c2 ARROW-9438: [CI] Add spark patch to compile with recent Arrow
Java changes
3fc83c2 is described below
commit 3fc83c281104fff0bf8e07e7589281186c7ed251
Author: Bryan Cutler <[email protected]>
AuthorDate: Tue Jul 14 16:04:32 2020 -0500
ARROW-9438: [CI] Add spark patch to compile with recent Arrow Java changes
Recent changes in Arrow Java from ARROW-9300 now require adding a
dependency on arrow-memory-netty to provide a default allocator. This adds a
patch to build spark with the required dependency.
Closes #7746 from BryanCutler/spark-integration-patch-ARROW-9438
Lead-authored-by: Bryan Cutler <[email protected]>
Co-authored-by: Krisztián Szűcs <[email protected]>
Signed-off-by: Wes McKinney <[email protected]>
---
ci/docker/conda-python-spark.dockerfile | 4 ++
ci/etc/integration_spark_ARROW-9438.patch | 72 +++++++++++++++++++++++++++++++
dev/release/rat_exclude_files.txt | 1 +
3 files changed, 77 insertions(+)
diff --git a/ci/docker/conda-python-spark.dockerfile
b/ci/docker/conda-python-spark.dockerfile
index d3f0a22..a20f1ff 100644
--- a/ci/docker/conda-python-spark.dockerfile
+++ b/ci/docker/conda-python-spark.dockerfile
@@ -36,6 +36,10 @@ ARG spark=master
COPY ci/scripts/install_spark.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_spark.sh ${spark} /spark
+# patch spark to build with current Arrow Java
+COPY ci/etc/integration_spark_ARROW-9438.patch /arrow/ci/etc/
+RUN patch -d /spark -p1 -i /arrow/ci/etc/integration_spark_ARROW-9438.patch
+
# build cpp with tests
ENV CC=gcc \
CXX=g++ \
diff --git a/ci/etc/integration_spark_ARROW-9438.patch
b/ci/etc/integration_spark_ARROW-9438.patch
new file mode 100644
index 0000000..2baed30
--- /dev/null
+++ b/ci/etc/integration_spark_ARROW-9438.patch
@@ -0,0 +1,72 @@
+From 0b5388a945a7e5c5706cf00d0754540a6c68254d Mon Sep 17 00:00:00 2001
+From: Bryan Cutler <[email protected]>
+Date: Mon, 13 Jul 2020 23:12:25 -0700
+Subject: [PATCH] Update Arrow Java for 1.0.0
+
+---
+ pom.xml | 17 ++++++++++++++---
+ sql/catalyst/pom.xml | 4 ++++
+ 2 files changed, 18 insertions(+), 3 deletions(-)
+
+diff --git a/pom.xml b/pom.xml
+index 08ca13bfe9..6619fca200 100644
+--- a/pom.xml
++++ b/pom.xml
+@@ -199,7 +199,7 @@
+ If you are changing Arrow version specification, please check
./python/pyspark/sql/utils.py,
+ and ./python/setup.py too.
+ -->
+- <arrow.version>0.15.1</arrow.version>
++ <arrow.version>1.0.0-SNAPSHOT</arrow.version>
+ <!-- org.fusesource.leveldbjni will be used except on arm64 platform. -->
+ <leveldbjni.group>org.fusesource.leveldbjni</leveldbjni.group>
+
+@@ -2288,7 +2288,7 @@
+ </exclusion>
+ <exclusion>
+ <groupId>com.fasterxml.jackson.core</groupId>
+- <artifactId>jackson-databind</artifactId>
++ <artifactId>jackson-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>io.netty</groupId>
+@@ -2298,9 +2298,20 @@
+ <groupId>io.netty</groupId>
+ <artifactId>netty-common</artifactId>
+ </exclusion>
++ </exclusions>
++ </dependency>
++ <dependency>
++ <groupId>org.apache.arrow</groupId>
++ <artifactId>arrow-memory-netty</artifactId>
++ <version>${arrow.version}</version>
++ <exclusions>
+ <exclusion>
+ <groupId>io.netty</groupId>
+- <artifactId>netty-handler</artifactId>
++ <artifactId>netty-buffer</artifactId>
++ </exclusion>
++ <exclusion>
++ <groupId>io.netty</groupId>
++ <artifactId>netty-common</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
+index 9edbb7fec9..6b79eb722f 100644
+--- a/sql/catalyst/pom.xml
++++ b/sql/catalyst/pom.xml
+@@ -117,6 +117,10 @@
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-vector</artifactId>
+ </dependency>
++ <dependency>
++ <groupId>org.apache.arrow</groupId>
++ <artifactId>arrow-memory-netty</artifactId>
++ </dependency>
+ </dependencies>
+ <build>
+
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+--
+2.17.1
+
diff --git a/dev/release/rat_exclude_files.txt
b/dev/release/rat_exclude_files.txt
index d25e2e3..158790d 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -9,6 +9,7 @@
*.snap
.github/ISSUE_TEMPLATE/question.md
ci/etc/rprofile
+ci/etc/*.patch
cpp/CHANGELOG_PARQUET.md
cpp/src/arrow/io/mman.h
cpp/src/arrow/util/random.h