This is an automated email from the ASF dual-hosted git repository.
jonvex pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
from 17f3720a48c0 [HUDI-9672] Disable skipping clustering for spark
incremental query to avoid data duplication (#13659)
add b763291008a4 [HUDI-8746] Add ORC support to FileGroupReader paths and
migrate Spark multi-format reading to FileGroupReader (#13632)
No new revisions were added by this update.
Summary of changes:
.../read/HoodieFileGroupReaderOnJavaTestBase.java | 2 +
.../hadoop/TestHoodieFileGroupReaderOnHive.java | 3 +
.../client/common/SparkReaderContextFactory.java | 45 +++-
.../SparkFileFormatInternalRowReaderContext.scala | 10 +-
...tReader.scala => SparkColumnarFileReader.scala} | 8 +-
.../hudi/MultipleColumnarFileFormatReader.scala | 58 +++++
.../org/apache/spark/sql/hudi/SparkAdapter.scala | 11 +-
.../common/TestSparkReaderContextFactory.java | 4 +-
.../java/org/apache/hudi/avro/HoodieAvroUtils.java | 4 +-
.../common/table/read/HoodieFileGroupReader.java | 2 +-
.../apache/hudi/common/table/read/InputSplit.java | 5 +
.../apache/hudi/common/util/FileFormatUtils.java | 15 +-
.../table/read/TestHoodieFileGroupReaderBase.java | 75 ++++--
.../org/apache/hudi/common/util/AvroOrcUtils.java | 14 +-
.../java/org/apache/hudi/common/util/OrcUtils.java | 17 +-
.../org/apache/hudi/common/util/ParquetUtils.java | 9 +-
.../apache/hudi/io/hadoop/HoodieAvroOrcReader.java | 20 +-
.../hudi/io/hadoop/TestHoodieReaderWriterBase.java | 1 -
.../hudi/HoodieHadoopFsRelationFactory.scala | 58 +----
.../org/apache/hudi/HoodieMergeOnReadRDDV2.scala | 23 +-
.../org/apache/hudi/cdc/CDCFileGroupIterator.scala | 14 +-
.../datasources/HoodieMultipleBaseFileFormat.scala | 288 ---------------------
.../datasources/orc/SparkOrcReaderBase.scala | 143 ++++++++++
... => HoodieFileGroupReaderBasedFileFormat.scala} | 122 ++++++---
.../parquet/SparkParquetReaderBase.scala | 6 +-
...stSparkFileFormatInternalRowReaderContext.scala | 4 +-
.../TestSparkOrcReaderFormat.scala} | 14 +-
.../parquet/TestSparkParquetReaderFormat.scala | 4 +-
.../TestPositionBasedFileGroupRecordBuffer.java | 4 +-
...kParquetReader.java => TestSparkOrcReader.java} | 15 +-
.../hudi/functional/TestSparkParquetReader.java | 3 +-
.../read/TestHoodieFileGroupReaderOnSpark.scala | 21 +-
.../TestHoodieMultipleBaseFileFormat.scala | 9 +-
.../org/apache/hudi/util/JavaConversions.scala | 19 +-
.../apache/spark/sql/adapter/Spark3_3Adapter.scala | 10 +-
.../datasources/orc/Spark33OrcReader.scala | 85 ++++++
.../datasources/parquet/Spark33ParquetReader.scala | 2 +-
.../apache/spark/sql/adapter/Spark3_4Adapter.scala | 9 +-
.../datasources/orc/Spark34OrcReader.scala | 91 +++++++
.../datasources/parquet/Spark34ParquetReader.scala | 8 +-
.../apache/spark/sql/adapter/Spark3_5Adapter.scala | 22 +-
.../datasources/orc/Spark35OrcReader.scala | 92 +++++++
.../datasources/parquet/Spark35ParquetReader.scala | 4 +-
43 files changed, 858 insertions(+), 515 deletions(-)
rename
hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/execution/datasources/{parquet/SparkParquetReader.scala
=> SparkColumnarFileReader.scala} (92%)
create mode 100644
hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/MultipleColumnarFileFormatReader.scala
delete mode 100644
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieMultipleBaseFileFormat.scala
create mode 100644
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/orc/SparkOrcReaderBase.scala
rename
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/{HoodieFileGroupReaderBasedParquetFileFormat.scala
=> HoodieFileGroupReaderBasedFileFormat.scala} (76%)
copy
hudi-spark-datasource/hudi-spark-common/src/test/scala/org/apache/spark/sql/execution/datasources/{parquet/TestSparkParquetReaderFormat.scala
=> orc/TestSparkOrcReaderFormat.scala} (82%)
copy
hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/{TestSparkParquetReader.java
=> TestSparkOrcReader.java} (74%)
create mode 100644
hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/execution/datasources/orc/Spark33OrcReader.scala
create mode 100644
hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/execution/datasources/orc/Spark34OrcReader.scala
create mode 100644
hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/orc/Spark35OrcReader.scala