Thank you Adrian, Greg and Veem. I tried writing a small routine to see how the performance differs in these four approaches i.e. Upsert VS traditional update+insert VS Merge vs Truncate+load.
Initially I was thinking Upsert will perform the same as Merge as the logic looks similar but it seems it's the worst performing among all, not sure why , yet to know the reason though. Truncate+ load seems to be the best performing among all. Hope i am doing it correctly. Please correct me if I'm wrong. UPSERT approach execution time: *00:00:20.921343* UPSERT approach rows: 1000000 insert/update approach execution time: *00:00:15.53612* insert/update approach update rows : 500000 insert/update approach Insert rows: 500000 MERGE approach execution time: *00:00:14.884623* MERGE approach rows: 1000000 truncate load approach execution time:* 00:00:07.428826* truncate load rows: 1000000 ********* Routine ******** **************** UPSERT Testcase *********** drop table source_tab; drop table target_tab; CREATE TABLE source_tab ( id SERIAL PRIMARY KEY, column1 VARCHAR(100), column2 VARCHAR(100) ); -- Create target table CREATE TABLE target_tab ( id SERIAL PRIMARY KEY, column1 VARCHAR(100), column2 VARCHAR(100) ); INSERT INTO source_tab (column1, column2) SELECT 'Value ' || i, 'Value ' || (i * 2) FROM generate_series(1, 1000000) AS i; INSERT INTO target_tab (column1, column2) SELECT 'Value ' || i, 'Value ' || (i * 2) FROM generate_series(1, 500000) AS i; DO $$ DECLARE start_time timestamp; end_time timestamp; rows_inserted integer:=0; rows_updated integer:=0; rows_upserted integer:=0; rows_merged integer:=0; BEGIN -- Measure performance of UPSERT start_time := clock_timestamp(); INSERT INTO target_tab (id, column1, column2) SELECT id, column1, column2 FROM source_tab ON CONFLICT (id) DO UPDATE SET column1 = EXCLUDED.column1, column2 = EXCLUDED.column2; get diagnostics rows_upserted=row_count; end_time := clock_timestamp(); RAISE NOTICE 'UPSERT approach execution time: %', end_time - start_time; RAISE NOTICE 'UPSERT approach rows: %', rows_upserted; rollback; END $$; **************** Traditional Insert+update Testcase *********** drop table source_tab; drop table target_tab; CREATE TABLE source_tab ( id SERIAL PRIMARY KEY, column1 VARCHAR(100), column2 VARCHAR(100) ); -- Create target table CREATE TABLE target_tab ( id SERIAL PRIMARY KEY, column1 VARCHAR(100), column2 VARCHAR(100) ); INSERT INTO source_tab (column1, column2) SELECT 'Value ' || i, 'Value ' || (i * 2) FROM generate_series(1, 1000000) AS i; INSERT INTO target_tab (column1, column2) SELECT 'Value ' || i, 'Value ' || (i * 2) FROM generate_series(1, 500000) AS i; DO $$ DECLARE start_time timestamp; end_time timestamp; rows_inserted integer:=0; rows_updated integer:=0; rows_upserted integer:=0; rows_merged integer:=0; BEGIN -- Measure performance of insert/update approach start_time := clock_timestamp(); -- Update existing records UPDATE target_tab AS t SET column1 = s.column1, column2 = s.column2 FROM source_tab AS s WHERE t.id = s.id; get diagnostics rows_updated=row_count; -- Insert new records INSERT INTO target_tab (id, column1, column2) SELECT s.id, s.column1, s.column2 FROM source_tab AS s LEFT JOIN target_tab AS t ON s.id = t.id WHERE t.id IS NULL; get diagnostics rows_inserted=row_count; end_time := clock_timestamp(); RAISE NOTICE 'insert/update approach execution time: %', end_time - start_time; RAISE NOTICE 'insert/update approach update rows : %', rows_updated; RAISE NOTICE 'insert/update approach Insert rows: %', rows_inserted; rollback; END $$; **************** MERGE Testcase *********** drop table source_tab; drop table target_tab; CREATE TABLE source_tab ( id SERIAL PRIMARY KEY, column1 VARCHAR(100), column2 VARCHAR(100) ); -- Create target table CREATE TABLE target_tab ( id SERIAL PRIMARY KEY, column1 VARCHAR(100), column2 VARCHAR(100) ); INSERT INTO source_tab (column1, column2) SELECT 'Value ' || i, 'Value ' || (i * 2) FROM generate_series(1, 1000000) AS i; INSERT INTO target_tab (column1, column2) SELECT 'Value ' || i, 'Value ' || (i * 2) FROM generate_series(1, 500000) AS i; DO $$ DECLARE start_time timestamp; end_time timestamp; rows_inserted integer:=0; rows_updated integer:=0; rows_upserted integer:=0; rows_merged integer:=0; begin start_time := clock_timestamp(); merge into target_tab t using source_tab s on t. id = s. id when matched then update set column1 = s.column1, column2 = s.column2 when not matched then insert values (id, column1, column2); get diagnostics rows_merged=row_count; end_time := clock_timestamp(); RAISE NOTICE 'MERGE approach execution time: %', end_time - start_time; RAISE NOTICE 'MERGE approach rows: %', rows_merged; rollback; END $$; **************** Truncate+load Testcase *********** drop table source_tab; drop table target_tab; CREATE TABLE source_tab ( id SERIAL PRIMARY KEY, column1 VARCHAR(100), column2 VARCHAR(100) ); -- Create target table CREATE TABLE target_tab ( id SERIAL PRIMARY KEY, column1 VARCHAR(100), column2 VARCHAR(100) ); INSERT INTO source_tab (column1, column2) SELECT 'Value ' || i, 'Value ' || (i * 2) FROM generate_series(1, 1000000) AS i; INSERT INTO target_tab (column1, column2) SELECT 'Value ' || i, 'Value ' || (i * 2) FROM generate_series(1, 500000) AS i; DO $$ DECLARE start_time timestamp; end_time timestamp; rows_inserted integer:=0; rows_updated integer:=0; rows_upserted integer:=0; rows_truncate_loaded integer:=0; begin start_time := clock_timestamp(); truncate table target_tab; INSERT INTO target_tab (id, column1, column2) SELECT s.id, s.column1, s.column2 FROM source_tab AS s; get diagnostics rows_truncate_loaded=row_count; end_time := clock_timestamp(); RAISE NOTICE 'truncate load approach execution time: %', end_time - start_time; RAISE NOTICE 'truncate load rows: %', rows_truncate_loaded; rollback; END $$;