This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new 439657fb5 [CI] Fix Connection time out in docker workflow (#1656)
439657fb5 is described below

commit 439657fb57ed4a03f430379aae8b93fc2743649f
Author: Furqaan Khan <[email protected]>
AuthorDate: Mon Oct 28 20:14:15 2024 -0400

    [CI] Fix Connection time out in docker workflow (#1656)
    
    * fix: connection timeout error
    
    * fix: try some ideas
    
    * fix: try some ideas 2/?
    
    * fix: try some ideas 3/?
    
    * fix: try some ideas 4/?
    
    * fix: add debugger
    
    * fix: add debugger 2/?
    
    * fix: add debugger 3/?
    
    * fix: add debugger 4/?
    
    * fix: try some idea 5/?
    
    * fix: add debugger
    
    * fix: add debugger 2/?
    
    * fix: add debugger 3/?
    
    * fix: try some ideas 6/?
    
    * fix: try some ideas 7/?
    
    * fix: try some ideas 8/?
    
    * fix: it should work now.
    
    * fix: remove the debugger
    
    * cleaning up
    
    * introduce global environment
    
    * Update .github/workflows/python.yml
    
    Co-authored-by: Jia Yu <[email protected]>
    
    * fix: docker timeout issues
    
    ---------
    
    Co-authored-by: Jia Yu <[email protected]>
---
 docker/sedona-spark-jupyterlab/sedona-jupyterlab.dockerfile |  6 ++----
 docker/spark.sh                                             | 13 +++----------
 2 files changed, 5 insertions(+), 14 deletions(-)

diff --git a/docker/sedona-spark-jupyterlab/sedona-jupyterlab.dockerfile 
b/docker/sedona-spark-jupyterlab/sedona-jupyterlab.dockerfile
index 1b3963734..6596229af 100644
--- a/docker/sedona-spark-jupyterlab/sedona-jupyterlab.dockerfile
+++ b/docker/sedona-spark-jupyterlab/sedona-jupyterlab.dockerfile
@@ -19,7 +19,6 @@ FROM ubuntu:22.04
 
 ARG shared_workspace=/opt/workspace
 ARG spark_version=3.4.1
-ARG hadoop_version=3
 ARG hadoop_s3_version=3.3.4
 ARG aws_sdk_version=1.12.402
 ARG spark_xml_version=0.16.0
@@ -29,8 +28,7 @@ ARG spark_extension_version=2.11.0
 
 # Set up envs
 ENV SHARED_WORKSPACE=${shared_workspace}
-ENV SPARK_HOME /opt/spark
-RUN mkdir ${SPARK_HOME}
+ENV SPARK_HOME /usr/local/lib/python3.10/dist-packages/pyspark
 ENV SEDONA_HOME /opt/sedona
 RUN mkdir ${SEDONA_HOME}
 
@@ -44,7 +42,7 @@ COPY ./ ${SEDONA_HOME}/
 
 RUN chmod +x ${SEDONA_HOME}/docker/spark.sh
 RUN chmod +x ${SEDONA_HOME}/docker/sedona.sh
-RUN ${SEDONA_HOME}/docker/spark.sh ${spark_version} ${hadoop_version} 
${hadoop_s3_version} ${aws_sdk_version} ${spark_xml_version}
+RUN ${SEDONA_HOME}/docker/spark.sh ${spark_version} ${hadoop_s3_version} 
${aws_sdk_version} ${spark_xml_version}
 
 # Install Python dependencies
 COPY docker/sedona-spark-jupyterlab/requirements.txt /opt/requirements.txt
diff --git a/docker/spark.sh b/docker/spark.sh
index 8cca154a3..bd935e8a4 100755
--- a/docker/spark.sh
+++ b/docker/spark.sh
@@ -19,10 +19,9 @@ set -e
 
 # Define variables
 spark_version=$1
-hadoop_version=$2
-hadoop_s3_version=$3
-aws_sdk_version=$4
-spark_xml_version=$5
+hadoop_s3_version=$2
+aws_sdk_version=$3
+spark_xml_version=$4
 
 # Set up OS libraries
 apt-get update
@@ -30,9 +29,6 @@ apt-get install -y openjdk-19-jdk-headless curl python3-pip 
maven
 pip3 install --upgrade pip && pip3 install pipenv
 
 # Download Spark jar and set up PySpark
-curl 
https://archive.apache.org/dist/spark/spark-"${spark_version}"/spark-"${spark_version}"-bin-hadoop"${hadoop_version}".tgz
 -o spark.tgz
-tar -xf spark.tgz && mv 
spark-"${spark_version}"-bin-hadoop"${hadoop_version}"/* "${SPARK_HOME}"/
-rm spark.tgz && rm -rf spark-"${spark_version}"-bin-hadoop"${hadoop_version}"
 pip3 install pyspark=="${spark_version}"
 
 # Add S3 jars
@@ -42,9 +38,6 @@ curl 
https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/"${aws_sdk
 # Add spark-xml jar
 curl 
https://repo1.maven.org/maven2/com/databricks/spark-xml_2.12/"${spark_xml_version}"/spark-xml_2.12-"${spark_xml_version}".jar
 -o "${SPARK_HOME}"/jars/spark-xml_2.12-"${spark_xml_version}".jar
 
-# Set up master IP address and executor memory
-cp "${SPARK_HOME}"/conf/spark-defaults.conf.template 
"${SPARK_HOME}"/conf/spark-defaults.conf
-
 # Install required libraries for GeoPandas on Apple chip mac
 apt-get install -y gdal-bin libgdal-dev
 

Reply via email to