This is an automated email from the ASF dual-hosted git repository.
vinoyang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 5388c7f [HUDI-1470] Use the latest writer schema, when reading from
existing parquet files in the hudi-test-suite (#2344)
5388c7f is described below
commit 5388c7f7a3efb84516af71a2533e8953113b30da
Author: Balajee Nagasubramaniam <[email protected]>
AuthorDate: Fri Dec 18 03:18:52 2020 -0800
[HUDI-1470] Use the latest writer schema, when reading from existing
parquet files in the hudi-test-suite (#2344)
---
.../hudi/integ/testsuite/reader/DFSHoodieDatasetInputReader.java | 4 ++++
1 file changed, 4 insertions(+)
diff --git
a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSHoodieDatasetInputReader.java
b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSHoodieDatasetInputReader.java
index 2bd507c..bc7803d 100644
---
a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSHoodieDatasetInputReader.java
+++
b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSHoodieDatasetInputReader.java
@@ -52,6 +52,7 @@ import org.apache.hudi.common.util.ParquetReaderIterator;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.config.HoodieMemoryConfig;
import org.apache.parquet.avro.AvroParquetReader;
+import org.apache.parquet.avro.AvroReadSupport;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
@@ -243,6 +244,9 @@ public class DFSHoodieDatasetInputReader extends
DFSDeltaInputReader {
private Iterator<IndexedRecord> readParquetOrLogFiles(FileSlice fileSlice)
throws IOException {
if (fileSlice.getBaseFile().isPresent()) {
+ // Read the parquet files using the latest writer schema.
+ Schema schema = new Schema.Parser().parse(schemaStr);
+ AvroReadSupport.setAvroReadSchema(metaClient.getHadoopConf(),
HoodieAvroUtils.addMetadataFields(schema));
Iterator<IndexedRecord> itr =
new
ParquetReaderIterator<IndexedRecord>(AvroParquetReader.<IndexedRecord>builder(new
Path(fileSlice.getBaseFile().get().getPath())).withConf(metaClient.getHadoopConf()).build());