[ https://issues.apache.org/jira/browse/HUDI-2781?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17451641#comment-17451641 ]
Yann Byron commented on HUDI-2781: ---------------------------------- [~xushiyan] Environment: spark 3.0.3, hive3.1.2, hudi release-0.10.0-rc2 Conclusions: * must provide `primaryKey` when create table, so should remove related content from [https://hudi.apache.org/docs/quick-start-guide]; * ShowPartitions's result is wrong when delete/drop partitions; * Others is ok. Details: Create Table: {code:java} -- cow table, without partition fields and preCombineField create table if not exists cow_nonpt_nonpcf_tbl ( id int, name string, price double ) using hudi options ( type = 'cow', primaryKey = 'id' ); -- mor table, without partition fields and preCombineField create table if not exists mor_nonpt_nonpcf_tbl ( id int, name string, price double ) using hudi options ( type = 'mor', primaryKey = 'id' ); -- cow table, without partition fields, with preCombineField create table if not exists cow_nonpt_pcf_tbl ( id int, name string, price double, ts bigint ) using hudi options ( type = 'cow', primaryKey = 'id', preCombineField = 'ts' ); -- mor table, without partition fields, with preCombineField create table if not exists mor_nonpt_pcf_tbl ( id int, name string, price double, ts bigint ) using hudi options ( type = 'mor', primaryKey = 'id', preCombineField = 'ts' ); -- cow table, with partition fields, without preCombineField create table if not exists cow_pt_nonpcf_tbl ( id bigint, name string, dt string, hh string ) using hudi location 'file:///tmp/hudi/cow_pt_nonpcf_tbl' tblproperties ( primaryKey = 'id' ) partitioned by (dt, hh); -- mor table, with partition fields, with preCombineField create table if not exists mor_pt_pcf_tbl ( id bigint, name string, ts bigint, dt string, hh string ) using hudi location 'file:///tmp/hudi/mor_pt_pcf_tbl' tblproperties ( type = 'mor', primaryKey = 'id', preCombineField = 'ts' ) partitioned by (dt, hh); {code} CTAS {code:java} -- partitioned table and use `options` create table ctas_cow_pt_nonpcf_tbl using hudi options (type = 'cow', primaryKey = 'id') partitioned by (dt) as select 1 as id, 'a1' as name, 10 as price, 1000 as dt; -- non-partitioned table and use `tblproperties` create table ctas_cow_nonpt_nonpcf_tbl using hudi tblproperties (primaryKey = 'id') as select 1 as id, 'a1' as name, 10 as price; {code} Create table based on existing path {code:java} create table existing_hudi_tbl using hudi options ( primaryKey = 'id', preCombineField = 'ts' ) partitioned by (dt) location 'file:///tmp/hudi/dataframe_hudi_table'; {code} Insert {code:java} -- normal insert into insert into cow_nonpt_nonpcf_tbl select 1, 'a1', 20; insert into mor_nonpt_nonpcf_tbl select 1, 'a1', 20; insert into cow_nonpt_pcf_tbl select 1, 'a1', 20, 1000; insert into mor_nonpt_pcf_tbl select 1, 'a1', 20, 1000; -- insert static partition insert into cow_pt_nonpcf_tbl partition(dt = '2021-01-02', hh='10') select 1, 'a1'; insert into mor_pt_pcf_tbl partition(dt = '2021-01-02', hh='10') select 1, 'a1', '1000'; -- insert dynamic partition insert into cow_pt_nonpcf_tbl select 2, 'a2', '2021-01-02', '11'; insert into mor_pt_pcf_tbl select 2, 'a2', '1000', '2021-01-02', '11'; {code} Insert overwrite {code:java} -- insert overwrite table insert overwrite table cow_nonpt_nonpcf_tbl select 3, 'a3', 30; insert overwrite table mor_nonpt_nonpcf_tbl select 3, 'a3', 30; insert overwrite cow_nonpt_pcf_tbl select 3, 'a3', 30, 1000; insert overwrite mor_nonpt_pcf_tbl select 3, 'a3', 30, 1000; -- insert overwrite table with static partition insert overwrite cow_pt_nonpcf_tbl partition(dt = '2021-01-02', hh='10') select 1, 'a1_1'; insert overwrite mor_pt_pcf_tbl partition(dt = '2021-01-02', hh='10') select 1, 'a1_1', '1100'; -- insert overwrite table with dynamic partition insert overwrite table cow_pt_nonpcf_tbl select 2 as id, 'a2_2', '2021-01-02' as dt, '11' as hh; insert overwrite table mor_pt_pcf_tbl select 2 as id, 'a2_2', '2200', '2021-01-02' as dt, '11' as hh; {code} Update * if no preCombineField provided, can not use `update` syntax. {code:java} update cow_nonpt_pcf_tbl set price = price * 2, name = 'a3_3', ts = 3000 where id = 3; update mor_nonpt_pcf_tbl set price = price * 2, name = 'a3_3', ts = 3000 where id = 3; update mor_pt_pcf_tbl set name = 'aa_2', ts = 2222 where id % 2 = 0; {code} Merge into {code:java} -- source table using delta for merging into non-partitioned table create table merge_source (id int, name string, price double, ts bigint) using hudi tblproperties (primaryKey = 'id', preCombineField = 'ts'); insert into merge_source values (1, "new_a1", 22.22, 4001), (2, "new_a2", 33.33, 4001), (3, "new_a3", 44.44, 4001); merge into cow_nonpt_nonpcf_tbl as target using merge_source as source on target.id = source.id when matched then update set * when not matched then insert * ; merge into mor_nonpt_pcf_tbl as target using merge_source as source on target.id = source.id when matched then update set * when not matched then insert * ; -- source table using parquet for merging into partitioned table create table merge_source2 (id int, name string, flag string, dt string, hh string) using parquet; insert into merge_source2 values (1, "new_a1", 'update', '2021-01-02', '10'), (2, "new_a2", 'delete', '2021-01-02', '11'), (3, "new_a3", 'insert', '2021-01-02', '12'); merge into cow_pt_nonpcf_tbl as target using ( select id, name, flag, dt, hh from merge_source2 ) source on target.id = source.id when matched and flag != 'delete' then update set id = source.id, name = source.name when matched and flag = 'delete' then delete when not matched then insert (id, name, dt, hh) values(source.id, source.name, source.dt, source.hh) ; merge into mor_pt_pcf_tbl as target using ( select id, name, '1000' as ts, flag, dt, hh from merge_source2 ) source on target.id = source.id when matched and flag != 'delete' then update set * when matched and flag = 'delete' then delete when not matched then insert (id, name, ts, dt, hh) values(source.id, source.name, source.ts, source.dt, source.hh) ; {code} Delete {code:java} delete from cow_nonpt_nonpcf_tbl where id = 1; delete from mor_nonpt_pcf_tbl where id = 1; {code} Alter {code:java} --rename to: ALTER TABLE cow_nonpt_nonpcf_tbl RENAME TO cow_nonpt_nonpcf_tbl_2; --add column: ALTER TABLE cow_nonpt_nonpcf_tbl_2 add columns(ext0 string); --change column: ALTER TABLE cow_nonpt_nonpcf_tbl_2 change column id id bigint; --show partition: show partitions mor_pt_pcf_tbl; show partitions cow_pt_nonpcf_tbl; --drop partition: alter table cow_pt_nonpcf_tbl drop partition (dt='2021-01-02', hh='10'); --set properties; alter table mor_nonpt_pcf_tbl set tblproperties (hoodie.keep.max.commits = '10'); alter table mor_nonpt_pcf_tbl set serdeproperties (hoodie.keep.max.commits = '10'); {code} > Test 0.10 RC for Spark 3.x > -------------------------- > > Key: HUDI-2781 > URL: https://issues.apache.org/jira/browse/HUDI-2781 > Project: Apache Hudi > Issue Type: Test > Components: Spark Integration > Reporter: Raymond Xu > Assignee: Yann Byron > Priority: Blocker > Labels: pull-request-available, sev:high > Fix For: 0.10.0 > > > Combinations > # Spark 3.0 & 3.1.x against Hive 2 > # Spark 3.0 & 3.1.X against Hive 3 > # Spark 3.2 against Hive 2 > > Let's test a COW and MOR long running DAG across these environments and get a > report with bugs/issues > > We have YAMLs here, that can be run across all different environments listed > here. > [https://github.com/apache/hudi/tree/master/docker/demo/config/test-suite] -- This message was sent by Atlassian Jira (v8.20.1#820001)