This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 9dc1196f1c2 [update](hudi) update hudi-spark bundle to 3.4.3 (#35013)
(#35718)
9dc1196f1c2 is described below
commit 9dc1196f1c26ff0127eb4dfd2598d7bb8713cabf
Author: Ashin Gau <[email protected]>
AuthorDate: Fri May 31 20:51:28 2024 +0800
[update](hudi) update hudi-spark bundle to 3.4.3 (#35013) (#35718)
backport: #35013
---
fe/be-java-extensions/hudi-scanner/pom.xml | 41 ++++----------------
.../org/apache/doris/hudi/BaseSplitReader.scala | 8 ++--
.../apache/doris/hudi/MORSnapshotSplitReader.scala | 3 +-
fe/be-java-extensions/preload-extensions/pom.xml | 42 ++++++--------------
fe/fe-core/pom.xml | 40 ++++++++++++++-----
.../hudi/source/HudiCachedPartitionProcessor.java | 1 +
fe/pom.xml | 45 ++++++++++++++++++++--
7 files changed, 99 insertions(+), 81 deletions(-)
diff --git a/fe/be-java-extensions/hudi-scanner/pom.xml
b/fe/be-java-extensions/hudi-scanner/pom.xml
index d4f7a458612..465a9393b0f 100644
--- a/fe/be-java-extensions/hudi-scanner/pom.xml
+++ b/fe/be-java-extensions/hudi-scanner/pom.xml
@@ -32,10 +32,7 @@ under the License.
<fe_ut_parallel>1</fe_ut_parallel>
<scala.version>2.12.15</scala.version>
<scala.binary.version>2.12</scala.binary.version>
- <spark.version>3.2.0</spark.version>
- <sparkbundle.version>3.2</sparkbundle.version>
- <janino.version>3.0.16</janino.version>
- <avro.version>1.11.2</avro.version>
+ <avro.version>1.11.3</avro.version>
</properties>
<dependencyManagement>
@@ -91,7 +88,7 @@ under the License.
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
- <artifactId>hudi-spark3.2.x_${scala.binary.version}</artifactId>
+
<artifactId>${hudi-spark.version}_${scala.binary.version}</artifactId>
<version>${hudi.version}</version>
<scope>provided</scope>
<exclusions>
@@ -119,6 +116,11 @@ under the License.
<version>1.10.1</version>
<scope>provided</scope>
</dependency>
+ <dependency>
+ <groupId>org.antlr</groupId>
+ <artifactId>antlr4-runtime</artifactId>
+ <version>${antlr4.version}</version>
+ </dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -160,35 +162,6 @@ under the License.
<artifactId>spark-catalyst_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
- <exclusions>
- <exclusion>
- <groupId>org.codehaus.janino</groupId>
- <artifactId>janino</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.codehaus.janino</groupId>
- <artifactId>commons-compiler</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <!-- version of spark's janino is error -->
- <groupId>org.codehaus.janino</groupId>
- <artifactId>janino</artifactId>
- <version>${janino.version}</version>
- <scope>provided</scope>
- <exclusions>
- <exclusion>
- <groupId>org.codehaus.janino</groupId>
- <artifactId>commons-compiler</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>org.codehaus.janino</groupId>
- <artifactId>commons-compiler</artifactId>
- <version>${janino.version}</version>
- <scope>provided</scope>
</dependency>
<dependency>
<!-- version of spark's jackson module is error -->
diff --git
a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/BaseSplitReader.scala
b/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/BaseSplitReader.scala
index 8229064163d..dcc068ad700 100644
---
a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/BaseSplitReader.scala
+++
b/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/BaseSplitReader.scala
@@ -44,7 +44,7 @@ import org.apache.hudi.io.storage.HoodieAvroHFileReader
import org.apache.hudi.metadata.HoodieTableMetadataUtil
import org.apache.hudi.{AvroConversionUtils, DataSourceReadOptions,
DataSourceWriteOptions, HoodieSparkConfUtils, HoodieTableSchema,
HoodieTableState}
import org.apache.log4j.Logger
-import org.apache.spark.sql.adapter.Spark3_2Adapter
+import org.apache.spark.sql.adapter.Spark3_4Adapter
import org.apache.spark.sql.avro.{HoodieAvroSchemaConverters,
HoodieSparkAvroSchemaConverters}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
@@ -66,7 +66,7 @@ import scala.collection.JavaConverters._
import scala.util.control.NonFatal
import scala.util.{Failure, Success, Try}
-class DorisSparkAdapter extends Spark3_2Adapter {
+class DorisSparkAdapter extends Spark3_4Adapter {
override def getAvroSchemaConverters: HoodieAvroSchemaConverters =
HoodieSparkAvroSchemaConverters
}
@@ -498,7 +498,7 @@ abstract class BaseSplitReader(val split: HoodieSplit) {
hadoopConf: Configuration): PartitionedFile =>
Iterator[InternalRow] = {
partitionedFile => {
val reader = new HoodieAvroHFileReader(
- hadoopConf, new Path(partitionedFile.filePath), new
CacheConfig(hadoopConf))
+ hadoopConf, partitionedFile.filePath.toPath, new
CacheConfig(hadoopConf))
val requiredRowSchema = requiredDataSchema.structTypeSchema
// NOTE: Schema has to be parsed at this point, since Avro's [[Schema]]
aren't serializable
@@ -573,7 +573,7 @@ abstract class BaseSplitReader(val split: HoodieSplit) {
BaseFileReader(
read = partitionedFile => {
- val extension = FSUtils.getFileExtension(partitionedFile.filePath)
+ val extension =
FSUtils.getFileExtension(partitionedFile.filePath.toString())
if (tableBaseFileFormat.getFileExtension.equals(extension)) {
read(partitionedFile)
} else {
diff --git
a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/MORSnapshotSplitReader.scala
b/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/MORSnapshotSplitReader.scala
index 07e236082ce..02a4fa40045 100644
---
a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/MORSnapshotSplitReader.scala
+++
b/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/MORSnapshotSplitReader.scala
@@ -21,6 +21,7 @@ import org.apache.hudi.HoodieBaseRelation.convertToAvroSchema
import org.apache.hudi.avro.HoodieAvroUtils
import org.apache.hudi.common.model.HoodieLogFile
import org.apache.hudi.{DataSourceReadOptions, HoodieMergeOnReadFileSplit,
HoodieTableSchema}
+import org.apache.spark.paths.SparkPath
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.execution.datasources.PartitionedFile
@@ -80,7 +81,7 @@ class MORSnapshotSplitReader(override val split: HoodieSplit)
extends BaseSplitR
val partitionedBaseFile = if (split.dataFilePath.isEmpty) {
None
} else {
- Some(PartitionedFile(getPartitionColumnsAsInternalRow(),
split.dataFilePath, 0, split.dataFileLength))
+ Some(PartitionedFile(getPartitionColumnsAsInternalRow(),
SparkPath.fromPathString(split.dataFilePath), 0, split.dataFileLength))
}
HoodieMergeOnReadFileSplit(partitionedBaseFile, logFiles)
}
diff --git a/fe/be-java-extensions/preload-extensions/pom.xml
b/fe/be-java-extensions/preload-extensions/pom.xml
index 8cc11473fdd..235a3f270f9 100644
--- a/fe/be-java-extensions/preload-extensions/pom.xml
+++ b/fe/be-java-extensions/preload-extensions/pom.xml
@@ -33,8 +33,6 @@ under the License.
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<scala.binary.version>2.12</scala.binary.version>
- <spark.version>3.2.0</spark.version>
- <janino.version>3.0.16</janino.version>
</properties>
<dependencies>
@@ -63,6 +61,12 @@ under the License.
<!-- Must be provided, we use hadoop_libs in BE's 3rd party
instead -->
<scope>provided</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-annotations</artifactId>
+ <version>${hadoop.version}</version>
+ <scope>provided</scope>
+ </dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark-client</artifactId>
@@ -83,6 +87,11 @@ under the License.
</exclusion>
</exclusions>
</dependency>
+ <dependency>
+ <groupId>org.antlr</groupId>
+ <artifactId>antlr4-runtime</artifactId>
+ <version>${antlr4.version}</version>
+ </dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark3-common</artifactId>
@@ -90,7 +99,7 @@ under the License.
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
- <artifactId>hudi-spark3.2.x_${scala.binary.version}</artifactId>
+
<artifactId>${hudi-spark.version}_${scala.binary.version}</artifactId>
<version>${hudi.version}</version>
<exclusions>
<exclusion>
@@ -158,33 +167,6 @@ under the License.
<artifactId>spark-catalyst_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>compile</scope>
- <exclusions>
- <exclusion>
- <groupId>org.codehaus.janino</groupId>
- <artifactId>janino</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.codehaus.janino</groupId>
- <artifactId>commons-compiler</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <!-- version of spark's janino is error -->
- <groupId>org.codehaus.janino</groupId>
- <artifactId>janino</artifactId>
- <version>${janino.version}</version>
- <exclusions>
- <exclusion>
- <groupId>org.codehaus.janino</groupId>
- <artifactId>commons-compiler</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>org.codehaus.janino</groupId>
- <artifactId>commons-compiler</artifactId>
- <version>${janino.version}</version>
</dependency>
<dependency>
<!-- version of spark's jackson module is error -->
diff --git a/fe/fe-core/pom.xml b/fe/fe-core/pom.xml
index 0a8cbcdce2f..21a6fac91ff 100644
--- a/fe/fe-core/pom.xml
+++ b/fe/fe-core/pom.xml
@@ -32,7 +32,6 @@ under the License.
<doris.home>${basedir}/../../</doris.home>
<doris.thirdparty>${basedir}/../../thirdparty</doris.thirdparty>
<fe_ut_parallel>1</fe_ut_parallel>
- <antlr4.version>4.9.3</antlr4.version>
<awssdk.version>2.20.131</awssdk.version>
<huaweiobs.version>3.1.1-hw-46</huaweiobs.version>
<tencentcos.version>8.2.7</tencentcos.version>
@@ -433,9 +432,26 @@ under the License.
</exclusion>
</exclusions>
</dependency>
+ <!-- antl4 The version of antlr-runtime in trino parser is need to be
consistent with doris,
+ when upgrade doris antlr-runtime version, should take care of
trino-parser.-->
+ <dependency>
+ <groupId>org.antlr</groupId>
+ <artifactId>antlr4-runtime</artifactId>
+ <version>${antlr4.version}</version>
+ </dependency>
<dependency>
<groupId>com.aliyun.odps</groupId>
<artifactId>odps-sdk-core</artifactId>
+ <exclusions>
+ <exclusion>
+ <artifactId>antlr-runtime</artifactId>
+ <groupId>org.antlr</groupId>
+ </exclusion>
+ <exclusion>
+ <artifactId>antlr4</artifactId>
+ <groupId>org.antlr</groupId>
+ </exclusion>
+ </exclusions>
</dependency>
<!--
https://mvnrepository.com/artifact/org.springframework.boot/spring-boot-starter-web
-->
<dependency>
@@ -639,14 +655,6 @@ under the License.
<artifactId>mariadb-java-client</artifactId>
</dependency>
- <!-- antl4 The version of antlr-runtime in trino parser is need to be
consistent with doris,
- when upgrade doris antlr-runtime version, should take care of
trino-parser.-->
- <dependency>
- <groupId>org.antlr</groupId>
- <artifactId>antlr4-runtime</artifactId>
- <version>${antlr4.version}</version>
- </dependency>
-
<dependency>
<groupId>com.zaxxer</groupId>
<artifactId>HikariCP</artifactId>
@@ -747,6 +755,20 @@ under the License.
<groupId>io.airlift</groupId>
<artifactId>concurrent</artifactId>
</dependency>
+ <dependency>
+ <groupId>me.bechberger</groupId>
+ <artifactId>ap-loader-all</artifactId>
+ <version>3.0-8</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-server</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ <version>2.5.2-hadoop3</version>
+ </dependency>
</dependencies>
<repositories>
<!-- for huawei obs sdk -->
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
index 4543303db6c..c8220349019 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
@@ -162,6 +162,7 @@ public class HudiCachedPartitionProcessor extends
HudiPartitionProcessor {
partitionValues.writeLock().unlock();
}
} catch (Exception e) {
+ LOG.warn("Failed to get hudi partitions", e);
throw new CacheException("Failed to get hudi partitions", e);
}
}
diff --git a/fe/pom.xml b/fe/pom.xml
index 5dcdabfb331..f20953bc8e7 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -273,7 +273,7 @@ under the License.
<!-- NOTE: Using grpc-java whose version is newer than 1.34.0 will
break the build on CentOS 6 due to the obsolete GLIBC -->
<grpc-java.version>1.34.0</grpc-java.version>
<grpc.version>1.60.1</grpc.version>
- <check.freamework.version>3.42.0</check.freamework.version>
+ <check.freamework.version>3.43.0</check.freamework.version>
<protobuf.version>3.24.3</protobuf.version>
<!-- we use protoc-jar-maven-plugin to generate protobuf generated
code -->
<!-- see
https://repo.maven.apache.org/maven2/com/google/protobuf/protoc/ to get correct
version -->
@@ -293,12 +293,13 @@ under the License.
<zjsonpatch.version>0.2.3</zjsonpatch.version>
<kafka-clients.version>3.4.0</kafka-clients.version>
<oshi-core.version>6.4.5</oshi-core.version>
- <xnio-nio.version>3.8.9.Final</xnio-nio.version>
+ <xnio-nio.version>3.8.14.Final</xnio-nio.version>
<javax.annotation-api.version>1.3.2</javax.annotation-api.version>
<javax.activation.version>1.2.0</javax.activation.version>
<jaxws-api.version>2.3.0</jaxws-api.version>
<RoaringBitmap.version>0.8.13</RoaringBitmap.version>
- <spark.version>3.4.1</spark.version>
+ <spark.version>3.4.3</spark.version>
+ <hudi-spark.version>hudi-spark3.4.x</hudi-spark.version>
<hive.version>3.1.3</hive.version>
<hive.common.version>2.3.9</hive.common.version>
<nimbusds.version>9.35</nimbusds.version>
@@ -330,6 +331,8 @@ under the License.
<aws-java-sdk.version>1.12.669</aws-java-sdk.version>
<mariadb-java-client.version>3.0.9</mariadb-java-client.version>
<hadoop.version>3.3.6</hadoop.version>
+ <hbase.version>2.4.9</hbase.version>
+ <antlr4.version>4.13.1</antlr4.version>
<joda.version>2.8.1</joda.version>
<project.scm.id>github</project.scm.id>
<spring.version>2.7.13</spring.version>
@@ -522,6 +525,14 @@ under the License.
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-common</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-api</artifactId>
</exclusion>
</exclusions>
</dependency>
@@ -552,6 +563,29 @@ under the License.
<artifactId>kerb-simplekdc</artifactId>
<version>${kerby.version}</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-server</artifactId>
+ <version>${hbase.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-common</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop2-compat</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-annotations</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
<dependency>
<groupId>org.apache.kerby</groupId>
<artifactId>kerb-core</artifactId>
@@ -1123,6 +1157,11 @@ under the License.
<artifactId>xnio-nio</artifactId>
<version>${xnio-nio.version}</version>
</dependency>
+ <dependency>
+ <groupId>org.jboss.xnio</groupId>
+ <artifactId>xnio-api</artifactId>
+ <version>${xnio-nio.version}</version>
+ </dependency>
<!-- support jdk9 -->
<dependency>
<groupId>javax.annotation</groupId>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]