This is an automated email from the ASF dual-hosted git repository. xushiyan pushed a commit to branch rc3-patched-for-test in repository https://gitbox.apache.org/repos/asf/hudi.git
commit e535353f2e40fc1be943e59f65b6d643947364e5 Author: Raymond Xu <2701446+xushi...@users.noreply.github.com> AuthorDate: Thu Apr 21 22:05:03 2022 +0800 fix undeterministic ctor order --- .../parquet/Spark32HoodieParquetFileFormat.scala | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32HoodieParquetFileFormat.scala b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32HoodieParquetFileFormat.scala index 3ba5d38623..ec522fb31b 100644 --- a/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32HoodieParquetFileFormat.scala +++ b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32HoodieParquetFileFormat.scala @@ -397,26 +397,27 @@ object Spark32HoodieParquetFileFormat { private def createParquetFilters(args: Any*): ParquetFilters = { // NOTE: ParquetFilters ctor args contain Scala enum, therefore we can't look it - // up by arg types, and have to instead rely on relative order of ctors - val ctor = classOf[ParquetFilters].getConstructors.head + // up by arg types, and have to instead rely on the number of args based on individual class; + // the ctor order is not guaranteed + val ctor = classOf[ParquetFilters].getConstructors.maxBy(_.getParameterCount) ctor.newInstance(args.map(_.asInstanceOf[AnyRef]): _*) .asInstanceOf[ParquetFilters] } private def createParquetReadSupport(args: Any*): ParquetReadSupport = { // NOTE: ParquetReadSupport ctor args contain Scala enum, therefore we can't look it - // up by arg types, and have to instead rely on relative order of ctors - val ctor = classOf[ParquetReadSupport].getConstructors.head + // up by arg types, and have to instead rely on the number of args based on individual class; + // the ctor order is not guaranteed + val ctor = classOf[ParquetReadSupport].getConstructors.maxBy(_.getParameterCount) ctor.newInstance(args.map(_.asInstanceOf[AnyRef]): _*) .asInstanceOf[ParquetReadSupport] } private def createVectorizedParquetRecordReader(args: Any*): VectorizedParquetRecordReader = { // NOTE: ParquetReadSupport ctor args contain Scala enum, therefore we can't look it - // up by arg types, and have to instead rely on relative order of ctors - // NOTE: VectorizedParquetRecordReader has 2 ctors and the one we need is 2nd on the array - // This is a hacky workaround for the fixed version of Class. - val ctor = classOf[VectorizedParquetRecordReader].getConstructors.last + // up by arg types, and have to instead rely on the number of args based on individual class; + // the ctor order is not guaranteed + val ctor = classOf[VectorizedParquetRecordReader].getConstructors.maxBy(_.getParameterCount) ctor.newInstance(args.map(_.asInstanceOf[AnyRef]): _*) .asInstanceOf[VectorizedParquetRecordReader] }