This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 822b5a1284e [MINOR] Improve sparksql test for bucket index bulk insert
(#9014)
822b5a1284e is described below
commit 822b5a1284ed580d2a4000da39a001ae64ba3a6a
Author: StreamingFlames <[email protected]>
AuthorDate: Tue Jun 20 16:29:43 2023 +0800
[MINOR] Improve sparksql test for bucket index bulk insert (#9014)
---
.../apache/spark/sql/hudi/TestInsertTable.scala | 101 +++++++++++----------
1 file changed, 52 insertions(+), 49 deletions(-)
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
index f0a30a9406c..a9fd0a4a030 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
+++
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
@@ -1078,57 +1078,60 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
test("Test Bulk Insert Into Bucket Index Table") {
withSQLConf("hoodie.datasource.write.operation" -> "bulk_insert") {
Seq("mor", "cow").foreach { tableType =>
- withTempDir { tmp =>
- val tableName = generateTableName
- // Create a partitioned table
- spark.sql(
- s"""
- |create table $tableName (
- | id int,
- | dt string,
- | name string,
- | price double,
- | ts long
- |) using hudi
- | tblproperties (
- | primaryKey = 'id,name',
- | type = '$tableType',
- | preCombineField = 'ts',
- | hoodie.index.type = 'BUCKET',
- | hoodie.bucket.index.hash.field = 'id,name')
- | partitioned by (dt)
- | location '${tmp.getCanonicalPath}'
+ Seq("true", "false").foreach { bulkInsertAsRow =>
+ withTempDir { tmp =>
+ val tableName = generateTableName
+ // Create a partitioned table
+ spark.sql(
+ s"""
+ |create table $tableName (
+ | id int,
+ | dt string,
+ | name string,
+ | price double,
+ | ts long
+ |) using hudi
+ | tblproperties (
+ | primaryKey = 'id,name',
+ | type = '$tableType',
+ | preCombineField = 'ts',
+ | hoodie.index.type = 'BUCKET',
+ | hoodie.bucket.index.hash.field = 'id,name',
+ | hoodie.datasource.write.row.writer.enable =
'$bulkInsertAsRow')
+ | partitioned by (dt)
+ | location '${tmp.getCanonicalPath}'
+ """.stripMargin)
+
+ // Note: Do not write the field alias, the partition field must be
placed last.
+ spark.sql(
+ s"""
+ | insert into $tableName values
+ | (1, 'a1,1', 10, 1000, "2021-01-05"),
+ | (2, 'a2', 20, 2000, "2021-01-06"),
+ | (3, 'a3,3', 30, 3000, "2021-01-07")
+ """.stripMargin)
+
+ checkAnswer(s"select id, name, price, ts, dt from $tableName")(
+ Seq(1, "a1,1", 10.0, 1000, "2021-01-05"),
+ Seq(2, "a2", 20.0, 2000, "2021-01-06"),
+ Seq(3, "a3,3", 30.0, 3000, "2021-01-07")
+ )
+
+ spark.sql(
+ s"""
+ | insert into $tableName values
+ | (1, 'a1', 10, 1000, "2021-01-05"),
+ | (3, "a3", 30, 3000, "2021-01-07")
""".stripMargin)
- // Note: Do not write the field alias, the partition field must be
placed last.
- spark.sql(
- s"""
- | insert into $tableName values
- | (1, 'a1,1', 10, 1000, "2021-01-05"),
- | (2, 'a2', 20, 2000, "2021-01-06"),
- | (3, 'a3,3', 30, 3000, "2021-01-07")
- """.stripMargin)
-
- checkAnswer(s"select id, name, price, ts, dt from $tableName")(
- Seq(1, "a1,1", 10.0, 1000, "2021-01-05"),
- Seq(2, "a2", 20.0, 2000, "2021-01-06"),
- Seq(3, "a3,3", 30.0, 3000, "2021-01-07")
- )
-
- spark.sql(
- s"""
- | insert into $tableName values
- | (1, 'a1', 10, 1000, "2021-01-05"),
- | (3, "a3", 30, 3000, "2021-01-07")
- """.stripMargin)
-
- checkAnswer(s"select id, name, price, ts, dt from $tableName")(
- Seq(1, "a1,1", 10.0, 1000, "2021-01-05"),
- Seq(1, "a1", 10.0, 1000, "2021-01-05"),
- Seq(2, "a2", 20.0, 2000, "2021-01-06"),
- Seq(3, "a3,3", 30.0, 3000, "2021-01-07"),
- Seq(3, "a3", 30.0, 3000, "2021-01-07")
- )
+ checkAnswer(s"select id, name, price, ts, dt from $tableName")(
+ Seq(1, "a1,1", 10.0, 1000, "2021-01-05"),
+ Seq(1, "a1", 10.0, 1000, "2021-01-05"),
+ Seq(2, "a2", 20.0, 2000, "2021-01-06"),
+ Seq(3, "a3,3", 30.0, 3000, "2021-01-07"),
+ Seq(3, "a3", 30.0, 3000, "2021-01-07")
+ )
+ }
}
}
}