This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 29e2f6cd0f0 [HUDI-7926] Data skipping failure mode should be strict in
query test (#11502)
29e2f6cd0f0 is described below
commit 29e2f6cd0f0287158bc85ae36f8d2c081ff3a8c2
Author: KnightChess <[email protected]>
AuthorDate: Wed Jul 3 08:19:38 2024 +0800
[HUDI-7926] Data skipping failure mode should be strict in query test
(#11502)
---
.../hudi/testutils/HoodieSparkClientTestHarness.java | 6 ++++++
.../main/scala/org/apache/hudi/HoodieFileIndex.scala | 17 +++++++++++------
.../apache/hudi/HoodieHadoopFsRelationFactory.scala | 2 +-
.../scala/org/apache/hudi/SparkBaseIndexSupport.scala | 19 +++++++++++++++++++
.../main/scala/org/apache/hudi/cdc/HoodieCDCRDD.scala | 2 +-
.../HoodieFileGroupReaderBasedParquetFileFormat.scala | 2 +-
.../hudi/functional/ColumnStatIndexTestBase.scala | 1 +
.../hudi/functional/PartitionStatsIndexTestBase.scala | 1 +
.../hudi/functional/RecordLevelIndexTestBase.scala | 1 +
.../hudi/functional/SecondaryIndexTestBase.scala | 1 +
.../functional/TestBloomFiltersIndexSupport.scala | 1 +
.../functional/TestPartitionStatsIndexWithSql.scala | 7 ++++++-
.../sql/hudi/command/index/TestFunctionalIndex.scala | 4 ++++
.../sql/hudi/common/HoodieSparkSqlTestBase.scala | 13 ++++++++++++-
.../spark/sql/hudi/dml/TestDataSkippingQuery.scala | 6 ++++++
15 files changed, 72 insertions(+), 11 deletions(-)
diff --git
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java
index eefa825bc5c..3f342f8e054 100644
---
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java
+++
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java
@@ -137,6 +137,7 @@ public abstract class HoodieSparkClientTestHarness extends
HoodieWriterClientTes
protected SparkRDDWriteClient writeClient;
protected SparkRDDReadClient readClient;
protected HoodieTableFileSystemView tableView;
+ protected Map<String, String> extraConf = new HashMap<>();
protected TimelineService timelineService;
protected final SparkTaskContextSupplier supplier = new
SparkTaskContextSupplier();
@@ -200,6 +201,7 @@ public abstract class HoodieSparkClientTestHarness extends
HoodieWriterClientTes
// Initialize a local spark env
SparkConf sc = HoodieClientTestUtils.getSparkConfForTest(appName + "#" +
testMethodName);
+ extraConf.forEach(sc::set);
SparkContext sparkContext = new SparkContext(sc);
HoodieClientTestUtils.overrideSparkHadoopConfiguration(sparkContext);
jsc = new JavaSparkContext(sparkContext);
@@ -229,6 +231,10 @@ public abstract class HoodieSparkClientTestHarness extends
HoodieWriterClientTes
initSparkContexts(this.getClass().getSimpleName());
}
+ protected void initQueryIndexConf() {
+ extraConf.put("hoodie.fileIndex.dataSkippingFailureMode", "strict");
+ }
+
/**
* Cleanups Spark contexts ({@link JavaSparkContext} and {@link SQLContext}).
*/
diff --git
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
index 47090d73887..e987ae47fc7 100644
---
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
+++
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
@@ -22,14 +22,14 @@ import org.apache.hudi.HoodieSparkConfUtils.getConfigValue
import
org.apache.hudi.common.config.TimestampKeyGeneratorConfig.{TIMESTAMP_INPUT_DATE_FORMAT,
TIMESTAMP_OUTPUT_DATE_FORMAT}
import org.apache.hudi.common.config.{HoodieMetadataConfig, TypedProperties}
import org.apache.hudi.common.model.{FileSlice, HoodieBaseFile, HoodieLogFile}
-import org.apache.hudi.common.table.HoodieTableMetaClient
+import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient}
import org.apache.hudi.common.util.StringUtils
import org.apache.hudi.exception.HoodieException
import org.apache.hudi.keygen.{TimestampBasedAvroKeyGenerator,
TimestampBasedKeyGenerator}
import org.apache.hudi.storage.{StoragePath, StoragePathInfo}
import org.apache.hudi.util.JFunction
-
import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.hudi.DataSourceWriteOptions.{PARTITIONPATH_FIELD,
PRECOMBINE_FIELD, RECORDKEY_FIELD}
import org.apache.spark.internal.Logging
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.InternalRow
@@ -43,7 +43,6 @@ import org.apache.spark.unsafe.types.UTF8String
import java.text.SimpleDateFormat
import java.util.stream.Collectors
import javax.annotation.concurrent.NotThreadSafe
-
import scala.collection.JavaConverters._
import scala.util.control.NonFatal
import scala.util.{Failure, Success, Try}
@@ -83,7 +82,7 @@ case class HoodieFileIndex(spark: SparkSession,
spark = spark,
metaClient = metaClient,
schemaSpec = schemaSpec,
- configProperties = getConfigProperties(spark, options),
+ configProperties = getConfigProperties(spark, options,
metaClient.getTableConfig),
queryPaths = HoodieFileIndex.getQueryPaths(options),
specifiedQueryInstant =
options.get(DataSourceReadOptions.TIME_TRAVEL_AS_OF_INSTANT.key).map(HoodieSqlCommonUtils.formatQueryInstant),
fileStatusCache = fileStatusCache,
@@ -350,7 +349,7 @@ case class HoodieFileIndex(spark: SparkSession,
if (isDataSkippingEnabled) {
for(indexSupport: SparkBaseIndexSupport <- indicesSupport) {
if (indexSupport.isIndexAvailable) {
- val prunedFileNames = indexSupport.computeCandidateFileNames(this,
queryFilters, queryReferencedColumns,
+ val prunedFileNames = indexSupport.computeCandidateIsStrict(spark,
this, queryFilters, queryReferencedColumns,
prunedPartitionsAndFileSlices, shouldPushDownFilesFilter)
if (prunedFileNames.nonEmpty) {
return Try(prunedFileNames)
@@ -453,7 +452,7 @@ object HoodieFileIndex extends Logging {
schema.fieldNames.filter { colName => refs.exists(r =>
resolver.apply(colName, r.name)) }
}
- def getConfigProperties(spark: SparkSession, options: Map[String, String]) =
{
+ def getConfigProperties(spark: SparkSession, options: Map[String, String],
tableConfig: HoodieTableConfig) = {
val sqlConf: SQLConf = spark.sessionState.conf
val properties = TypedProperties.fromMap(options.filter(p => p._2 !=
null).asJava)
@@ -472,6 +471,12 @@ object HoodieFileIndex extends Logging {
properties.setProperty(DataSourceReadOptions.FILE_INDEX_LISTING_MODE_OVERRIDE.key,
listingModeOverride)
}
+ if (tableConfig != null) {
+ properties.setProperty(RECORDKEY_FIELD.key,
tableConfig.getRecordKeyFields.orElse(Array.empty).mkString(","))
+ properties.setProperty(PRECOMBINE_FIELD.key,
Option(tableConfig.getPreCombineField).getOrElse(""))
+ properties.setProperty(PARTITIONPATH_FIELD.key,
tableConfig.getPartitionFields.orElse(Array.apply("")).mkString(","))
+ }
+
properties
}
diff --git
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieHadoopFsRelationFactory.scala
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieHadoopFsRelationFactory.scala
index c3faa339b70..b88fb4cfedb 100644
---
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieHadoopFsRelationFactory.scala
+++
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieHadoopFsRelationFactory.scala
@@ -211,7 +211,7 @@ class
HoodieMergeOnReadSnapshotHadoopFsRelationFactory(override val sqlContext:
includeLogFiles = true,
shouldEmbedFileSlices = true)
- val configProperties: TypedProperties = getConfigProperties(sparkSession,
options)
+ val configProperties: TypedProperties = getConfigProperties(sparkSession,
options, metaClient.getTableConfig)
val metadataConfig: HoodieMetadataConfig = HoodieMetadataConfig.newBuilder
.fromProperties(configProperties)
.enable(configProperties.getBoolean(ENABLE.key,
DEFAULT_METADATA_ENABLE_FOR_READERS)
diff --git
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkBaseIndexSupport.scala
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkBaseIndexSupport.scala
index bc29354d7f8..d9396433571 100644
---
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkBaseIndexSupport.scala
+++
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkBaseIndexSupport.scala
@@ -18,6 +18,7 @@
package org.apache.hudi
+import org.apache.hudi.HoodieFileIndex.DataSkippingFailureMode
import org.apache.hudi.client.common.HoodieSparkEngineContext
import org.apache.hudi.common.config.HoodieMetadataConfig
import org.apache.hudi.common.model.FileSlice
@@ -31,6 +32,7 @@ import
org.apache.spark.sql.hudi.DataSkippingUtils.translateIntoColumnStatsIndex
import org.apache.spark.sql.{Column, DataFrame, SparkSession}
import scala.collection.JavaConverters._
+import scala.util.control.NonFatal
abstract class SparkBaseIndexSupport(spark: SparkSession,
metadataConfig: HoodieMetadataConfig,
@@ -43,6 +45,23 @@ abstract class SparkBaseIndexSupport(spark: SparkSession,
def isIndexAvailable: Boolean
+ def computeCandidateIsStrict(spark: SparkSession,
+ fileIndex: HoodieFileIndex,
+ queryFilters: Seq[Expression],
+ queryReferencedColumns: Seq[String],
+ prunedPartitionsAndFileSlices:
Seq[(Option[BaseHoodieTableFileIndex.PartitionPath], Seq[FileSlice])],
+ shouldPushDownFilesFilter: Boolean):
Option[Set[String]] = {
+ try {
+ computeCandidateFileNames(fileIndex, queryFilters,
queryReferencedColumns, prunedPartitionsAndFileSlices,
shouldPushDownFilesFilter)
+ } catch {
+ case NonFatal(e) =>
+ spark.sqlContext.getConf(DataSkippingFailureMode.configName,
DataSkippingFailureMode.Fallback.value) match {
+ case DataSkippingFailureMode.Fallback.value => Option.empty
+ case DataSkippingFailureMode.Strict.value => throw e;
+ }
+ }
+ }
+
def computeCandidateFileNames(fileIndex: HoodieFileIndex,
queryFilters: Seq[Expression],
queryReferencedColumns: Seq[String],
diff --git
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/cdc/HoodieCDCRDD.scala
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/cdc/HoodieCDCRDD.scala
index 9794c10061a..e1ce293ab54 100644
---
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/cdc/HoodieCDCRDD.scala
+++
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/cdc/HoodieCDCRDD.scala
@@ -89,7 +89,7 @@ class HoodieCDCRDD(
private val cdcSupplementalLoggingMode =
metaClient.getTableConfig.cdcSupplementalLoggingMode
- private val props = HoodieFileIndex.getConfigProperties(spark, Map.empty)
+ private val props = HoodieFileIndex.getConfigProperties(spark, Map.empty,
metaClient.getTableConfig)
protected val payloadProps: Properties =
Option(metaClient.getTableConfig.getPreCombineField)
.map { preCombineField =>
diff --git
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieFileGroupReaderBasedParquetFileFormat.scala
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieFileGroupReaderBasedParquetFileFormat.scala
index f7c09b007be..58ba502cfd7 100644
---
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieFileGroupReaderBasedParquetFileFormat.scala
+++
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieFileGroupReaderBasedParquetFileFormat.scala
@@ -134,7 +134,7 @@ class
HoodieFileGroupReaderBasedParquetFileFormat(tableState: HoodieTableState,
val broadcastedStorageConf =
spark.sparkContext.broadcast(augmentedStorageConf)
val broadcastedDataSchema = spark.sparkContext.broadcast(dataAvroSchema)
val broadcastedRequestedSchema =
spark.sparkContext.broadcast(requestedAvroSchema)
- val fileIndexProps: TypedProperties =
HoodieFileIndex.getConfigProperties(spark, options)
+ val fileIndexProps: TypedProperties =
HoodieFileIndex.getConfigProperties(spark, options, null)
(file: PartitionedFile) => {
file.partitionValues match {
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/ColumnStatIndexTestBase.scala
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/ColumnStatIndexTestBase.scala
index 2b81715c2d1..779abafb2da 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/ColumnStatIndexTestBase.scala
+++
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/ColumnStatIndexTestBase.scala
@@ -60,6 +60,7 @@ class ColumnStatIndexTestBase extends
HoodieSparkClientTestBase {
@BeforeEach
override def setUp() {
initPath()
+ initQueryIndexConf()
initSparkContexts()
initHoodieStorage()
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/PartitionStatsIndexTestBase.scala
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/PartitionStatsIndexTestBase.scala
index 54197798d72..2818e1c50e6 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/PartitionStatsIndexTestBase.scala
+++
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/PartitionStatsIndexTestBase.scala
@@ -74,6 +74,7 @@ class PartitionStatsIndexTestBase extends
HoodieSparkClientTestBase {
@BeforeEach
override def setUp(): Unit = {
initPath()
+ initQueryIndexConf()
initSparkContexts()
initHoodieStorage()
initTestDataGenerator()
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/RecordLevelIndexTestBase.scala
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/RecordLevelIndexTestBase.scala
index db7a3982128..8439ffc7d18 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/RecordLevelIndexTestBase.scala
+++
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/RecordLevelIndexTestBase.scala
@@ -84,6 +84,7 @@ class RecordLevelIndexTestBase extends
HoodieSparkClientTestBase {
@BeforeEach
override def setUp() {
initPath()
+ initQueryIndexConf()
initSparkContexts()
initHoodieStorage()
initTestDataGenerator()
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/SecondaryIndexTestBase.scala
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/SecondaryIndexTestBase.scala
index bba806f7078..ae3f383ec63 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/SecondaryIndexTestBase.scala
+++
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/SecondaryIndexTestBase.scala
@@ -58,6 +58,7 @@ class SecondaryIndexTestBase extends
HoodieSparkClientTestBase {
@BeforeEach
override def setUp(): Unit = {
initPath()
+ initQueryIndexConf()
initSparkContexts()
initHoodieStorage()
initTestDataGenerator()
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestBloomFiltersIndexSupport.scala
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestBloomFiltersIndexSupport.scala
index b3de9fcbbdf..1bfad570120 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestBloomFiltersIndexSupport.scala
+++
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestBloomFiltersIndexSupport.scala
@@ -69,6 +69,7 @@ class TestBloomFiltersIndexSupport extends
HoodieSparkClientTestBase {
@BeforeEach
override def setUp(): Unit = {
initPath()
+ initQueryIndexConf()
initSparkContexts()
initHoodieStorage()
initTestDataGenerator()
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestPartitionStatsIndexWithSql.scala
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestPartitionStatsIndexWithSql.scala
index 2be4a37c915..1daa2349264 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestPartitionStatsIndexWithSql.scala
+++
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestPartitionStatsIndexWithSql.scala
@@ -33,7 +33,7 @@ import
org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression
import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
import org.apache.spark.sql.types.{IntegerType, StringType}
import org.junit.jupiter.api.Assertions.assertTrue
-import org.junit.jupiter.api.Tag
+import org.junit.jupiter.api.{BeforeAll, Tag}
import scala.collection.JavaConverters._
@@ -42,6 +42,11 @@ class TestPartitionStatsIndexWithSql extends
HoodieSparkSqlTestBase {
val sqlTempTable = "hudi_tbl"
+ @BeforeAll
+ def init(): Unit = {
+ initQueryIndexConf()
+ }
+
test("Test partition stats index following insert, merge into, update and
delete") {
Seq("cow", "mor").foreach { tableType =>
withTempDir { tmp =>
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/command/index/TestFunctionalIndex.scala
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/command/index/TestFunctionalIndex.scala
index 0148b8028de..c7036b1d86d 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/command/index/TestFunctionalIndex.scala
+++
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/command/index/TestFunctionalIndex.scala
@@ -41,6 +41,10 @@ import org.scalatest.Ignore
@Ignore
class TestFunctionalIndex extends HoodieSparkSqlTestBase {
+ override protected def beforeAll(): Unit = {
+ initQueryIndexConf()
+ }
+
test("Test Functional Index With Hive Sync Non Partitioned Table") {
// There is a big difference between Java class loader architecture of
versions 1.8 and 17.
// Hive 2.3.7 is compiled with Java 1.8, and the class loader used there
throws error when Hive APIs are run on Java 17.
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/HoodieSparkSqlTestBase.scala
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/HoodieSparkSqlTestBase.scala
index e8020a3e7a3..794e7a322b3 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/HoodieSparkSqlTestBase.scala
+++
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/common/HoodieSparkSqlTestBase.scala
@@ -29,6 +29,7 @@ import org.apache.hudi.index.inmemory.HoodieInMemoryHashIndex
import org.apache.hudi.testutils.HoodieClientTestUtils.{createMetaClient,
getSparkConfForTest}
import org.apache.hadoop.fs.Path
+import org.apache.hudi.HoodieFileIndex.DataSkippingFailureMode
import org.apache.spark.SparkConf
import org.apache.spark.sql.catalyst.util.DateTimeUtils
import
org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase.checkMessageContains
@@ -70,8 +71,18 @@ class HoodieSparkSqlTestBase extends FunSuite with
BeforeAndAfterAll {
private var tableId = 0
+ private var extraConf = Map[String, String]()
+
def sparkConf(): SparkConf = {
- getSparkConfForTest("Hoodie SQL Test")
+ val conf = getSparkConfForTest("Hoodie SQL Test")
+ conf.setAll(extraConf)
+ conf
+ }
+
+ protected def initQueryIndexConf(): Unit = {
+ extraConf = extraConf ++ Map(
+ DataSkippingFailureMode.configName ->
DataSkippingFailureMode.Strict.value
+ )
}
protected def withTempDir(f: File => Unit): Unit = {
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestDataSkippingQuery.scala
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestDataSkippingQuery.scala
index 9ae8e3a2cdd..c318d5e449b 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestDataSkippingQuery.scala
+++
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestDataSkippingQuery.scala
@@ -23,6 +23,10 @@ import
org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
class TestDataSkippingQuery extends HoodieSparkSqlTestBase {
+ override protected def beforeAll(): Unit = {
+ initQueryIndexConf()
+ }
+
test("Test the data skipping query involves conditions " +
"that cover both columns supported by column stats and those that are not
supported.") {
withTempDir { tmp =>
@@ -98,6 +102,8 @@ class TestDataSkippingQuery extends HoodieSparkSqlTestBase {
checkAnswer(s"select id, name, price, ts, dt from $tableName where
attributes.color = 'red'")(
Seq(1, "a1", 10.0, 1000, "2021-01-05")
)
+ // TODO add this fallback param, cause by PartitionStatsIndexSupport,
cause by HUDI-7144,may be fix by HUDI-7903
+ spark.sql("set hoodie.fileIndex.dataSkippingFailureMode = fallback")
// Check the case where the WHERE condition only includes columns
supported by column stats
checkAnswer(s"select id, name, price, ts, dt from $tableName where
name='a1'")(
Seq(1, "a1", 10.0, 1000, "2021-01-05")