Davis Zhang created HUDI-8568:
---------------------------------
Summary: update cannot change partition path value
Key: HUDI-8568
URL: https://issues.apache.org/jira/browse/HUDI-8568
Project: Apache Hudi
Issue Type: Bug
Reporter: Davis Zhang
start with (1, a), first col is primary key, second is partition key
update set partitionKey=b where primaryKey=1, expect (1,b)
but actually we got (1,a)
The {{mergeForPartitionUpdatesIfNeeded}} which is responsible for handling
partition path update, it only works for insert into and merge into, for update
it goes a different route so the issue is surfaced
```
test("Test Type Casting with Global Index for Primary Key and Partition Key
Updates") {
Seq("cow", "mor").foreach { tableType =>
withRecordType()(withTempDir { tmp =>
withSQLConf("hoodie.index.type" -> "GLOBAL_SIMPLE",
"hoodie.simple.index.update.partition.path"->"true") {
valtableName= generateTableName
// Create table with both primary key and partition key
spark.sql(
s"""
|create table $tableName (
| c1 int,
| c2 int,
| c3 string,
| ts long
|) using hudi
|partitioned by (c2)
|location '${tmp.getCanonicalPath}/$tableName'
|tblproperties (
| type = '$tableType',
| primaryKey = 'c1',
| preCombineField = 'ts'
|)
""".stripMargin)
// Test Case 1: Initial insert with double values
spark.sql(
s"""
|insert into $tableName
|select
| cast(1.0 as double) as c1,
| cast(1.0 as double) as c2,
| 'a' as c3,
| 1000 as ts
""".stripMargin)
// Verify initial insert
checkAnswer(
s"select c1, c2, c3 from $tableName")(
Seq(1, 1, "a")
)
// Test Case 2: Update partition key (c2)
spark.sql(
s"""
|update $tableName
|set c2 = cast(2.0 as double)
|where c3 = 'a'
""".stripMargin)
// Verify partition key update
checkAnswer(
s"select c1, c2, c3 from $tableName")(
Seq(1, 2, "a") <---- failed as I got (1,1,"a")
)
// Test Case 3: Insert overwrite with double values
spark.sql(
s"""
|insert overwrite table $tableName
|select
| cast(3.0 as double) as c1,
| cast(3.0 as double) as c2,
| 'a' as c3,
| 1003 as ts
""".stripMargin)
// Verify final state after insert overwrite
checkAnswer(
s"select c1, c2, c3 from $tableName")(
Seq(3, 3, "a")
)
// Additional verification: check complete table state with sorting
checkAnswer(
s"""
|select c1, c2, c3 from $tableName
|order by c1, c2
""")(
Seq(3, 3, "a")
)
// Verify record count
valcount= spark.sql(s"select count(*) from $tableName").collect()(0).getLong(0)
assert(count == 1L,
s"$tableType table: Expected 1 record but found $count records")
}
})
}
}
```
--
This message was sent by Atlassian Jira
(v8.20.10#820010)