This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 4845f36b4eec47dcf8b1083bd8a042925a02622f Author: Joe McDonnell <[email protected]> AuthorDate: Thu Aug 4 13:27:32 2022 -0700 IMPALA-11207: Use hadoop-cloud-storage for Cloud dependencies Hadoop provides hadoop-cloud-storage, which includes most of the dependencies that Impala currently uses like hadoop-aws, hadoop-azure, Knox's gateway-cloud-bindings, etc. Hadoop has put in a lot of work to make sure that this package includes the right version of dependencies (including shading some dependencies for GCS). It seems like this is a more reliable way to consume these dependencies. This switches the Java build to use hadoop-cloud-storage and removes the dependencies that it replaces. This eliminates the need to control the version of oauth and GCS, as those are determined by hadoop-cloud-storage. Change-Id: I3a1631289f990513823c2b17eb9241cc1b5a7ffd Reviewed-on: http://gerrit.cloudera.org:8080/18817 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- bin/impala-config.sh | 3 --- java/executor-deps/pom.xml | 62 ++++++++-------------------------------------- java/pom.xml | 2 -- 3 files changed, 11 insertions(+), 56 deletions(-) diff --git a/bin/impala-config.sh b/bin/impala-config.sh index c3f36dfda..42fb2924a 100755 --- a/bin/impala-config.sh +++ b/bin/impala-config.sh @@ -211,7 +211,6 @@ export CDP_OZONE_VERSION=1.1.0.7.2.16.0-127 export CDP_PARQUET_VERSION=1.10.99.7.2.16.0-127 export CDP_RANGER_VERSION=2.3.0.7.2.16.0-127 export CDP_TEZ_VERSION=0.9.1.7.2.16.0-127 -export CDP_GCS_VERSION=2.1.2.7.2.16.0-127 # Ref: https://infra.apache.org/release-download-pages.html#closer : ${APACHE_MIRROR:="https://www.apache.org/dyn/closer.cgi"} @@ -284,7 +283,6 @@ export IMPALA_RANGER_VERSION=${CDP_RANGER_VERSION} export IMPALA_RANGER_URL=${CDP_RANGER_URL-} export IMPALA_TEZ_VERSION=${CDP_TEZ_VERSION} export IMPALA_TEZ_URL=${CDP_TEZ_URL-} -export IMPALA_GCS_VERSION=${CDP_GCS_VERSION} export APACHE_COMPONENTS_HOME="$IMPALA_TOOLCHAIN/apache_components" export USE_APACHE_HIVE=${USE_APACHE_HIVE-false} @@ -892,7 +890,6 @@ echo "IMPALA_HUDI_VERSION = $IMPALA_HUDI_VERSION" echo "IMPALA_KUDU_VERSION = $IMPALA_KUDU_VERSION" echo "IMPALA_RANGER_VERSION = $IMPALA_RANGER_VERSION" echo "IMPALA_ICEBERG_VERSION = $IMPALA_ICEBERG_VERSION" -echo "IMPALA_GCS_VERSION = $IMPALA_GCS_VERSION" echo "IMPALA_COS_VERSION = $IMPALA_COS_VERSION" # Kerberos things. If the cluster exists and is kerberized, source diff --git a/java/executor-deps/pom.xml b/java/executor-deps/pom.xml index 06f4e6971..f219c2f25 100644 --- a/java/executor-deps/pom.xml +++ b/java/executor-deps/pom.xml @@ -88,9 +88,15 @@ under the License. </exclusions> </dependency> + <!-- + hadoop-cloud-storage includes several dependencies that Impala needs: + Hadoop's AWS support, Azure support, GCS connector, Knox support, etc. + hadoop-cloud-storage has been carefully tuned to include the right shaded + dependencies. + --> <dependency> <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-aws</artifactId> + <artifactId>hadoop-cloud-storage</artifactId> <version>${hadoop.version}</version> <!-- Exclude the aws-java-sdk-bundle dependency because the Impala minimal version of this dependency is used instead. --> @@ -99,6 +105,10 @@ under the License. <groupId>com.amazonaws</groupId> <artifactId>aws-java-sdk-bundle</artifactId> </exclusion> + <exclusion> + <groupId>org.eclipse.jetty</groupId> + <artifactId>*</artifactId> + </exclusion> </exclusions> </dependency> @@ -114,42 +124,6 @@ under the License. </exclusions> </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-azure</artifactId> - <version>${hadoop.version}</version> - <exclusions> - <exclusion> - <groupId>org.eclipse.jetty</groupId> - <artifactId>*</artifactId> - </exclusion> - </exclusions> - </dependency> - - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-azure-datalake</artifactId> - <version>${hadoop.version}</version> - </dependency> - - <dependency> - <groupId>com.google.cloud.bigdataoss</groupId> - <artifactId>gcs-connector</artifactId> - <version>${gcs.version}</version> - </dependency> - - <!-- Dependency of gcs-connector, newer version addresses CVE --> - <dependency> - <groupId>com.google.oauth-client</groupId> - <artifactId>google-oauth-client</artifactId> - <version>${google.oauth-client.version}</version> - </dependency> - <dependency> - <groupId>com.google.oauth-client</groupId> - <artifactId>google-oauth-client-java6</artifactId> - <version>${google.oauth-client.version}</version> - </dependency> - <dependency> <groupId>com.qcloud.cos</groupId> <artifactId>hadoop-cos</artifactId> @@ -196,20 +170,6 @@ under the License. </exclusions> </dependency> - <!-- IMPALA-8766: Include Knox jars on the classpath --> - <dependency> - <groupId>org.apache.knox</groupId> - <artifactId>gateway-cloud-bindings</artifactId> - <version>${knox.version}</version> - <exclusions> - <!-- Impala currently doesn't support GCS, so exclude those jars --> - <exclusion> - <groupId>com.google.cloud.bigdataoss</groupId> - <artifactId>*</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> <groupId>${ozone.groupId}</groupId> <artifactId>${ozone.artifactId}</artifactId> diff --git a/java/pom.xml b/java/pom.xml index 8bc7c4531..409a15c05 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -47,8 +47,6 @@ under the License. <parquet.version>${env.IMPALA_PARQUET_VERSION}</parquet.version> <kite.version>${env.IMPALA_KITE_VERSION}</kite.version> <knox.version>${env.IMPALA_KNOX_VERSION}</knox.version> - <gcs.version>${env.IMPALA_GCS_VERSION}</gcs.version> - <google.oauth-client.version>1.33.3</google.oauth-client.version> <cos.version>${env.IMPALA_COS_VERSION}</cos.version> <thrift.version>${env.IMPALA_THRIFT_POM_VERSION}</thrift.version> <impala.extdatasrc.api.version>${project.version}</impala.extdatasrc.api.version>
