[ https://issues.apache.org/jira/browse/HIVE-22198?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
LuGuangMing updated HIVE-22198: ------------------------------- Description: set parallel is true, set skewjoin is false, set auto convert join is false. run a unoin all, There is nothing error message, but some result data is missing, details check attatchment [^test-parallel.sql] create table tab1(tid int, com string) row format delimited fields terminated by '\t' stored as textfile; create table tab2(tid int, com string) row format delimited fields terminated by '\t' stored as textfile; create table tab3(tid int, com string) row format delimited fields terminated by '\t' stored as textfile; create table tab4(tid int, com string) row format delimited fields terminated by '\t' stored as textfile; insert into tab1 values(1,'abc'),(2,'bcd'),(3,'cde'); insert into tab2 values(1,'abc'),(2,'bcd'),(3,'cde'); insert into tab3 values(1,'abc'),(2,'bcd'),(3,'cde'); insert into tab4 values(1,'abc'),(2,'bcd'),(3,'cde'); set hive.auto.convert.join=false; set hive.optimize.skewjoin=true; set hive.exec.parallel=true; SELECT sum(1) as a FROM tab1 t1 INNER JOIN tab2 t2 ON t1.com = t2.com UNION ALL SELECT sum(1) as a FROM tab3 t3 INNER JOIN tab4 t4 ON t3.com = t4.com; create table test_parallel stored as orcfile as SELECT sum(1) as a FROM tab1 t1 INNER JOIN tab2 t2 ON t1.com = t2.com UNION ALL SELECT sum(1) as a FROM tab3 t3 INNER JOIN tab4 t4 ON t3.com = t4.com; select * from test_parallel; The result data should be two, but only one. was: set parallel is true, set skewjoin is false, set auto convert join is false. run a unoin all, There is nothing error message, but some result data is missing, details check attatchment create table tab1(tid int, com string) row format delimited fields terminated by '\t' stored as textfile; create table tab2(tid int, com string) row format delimited fields terminated by '\t' stored as textfile; create table tab3(tid int, com string) row format delimited fields terminated by '\t' stored as textfile; create table tab4(tid int, com string) row format delimited fields terminated by '\t' stored as textfile; insert into tab1 values(1,'abc'),(2,'bcd'),(3,'cde'); insert into tab2 values(1,'abc'),(2,'bcd'),(3,'cde'); insert into tab3 values(1,'abc'),(2,'bcd'),(3,'cde'); insert into tab4 values(1,'abc'),(2,'bcd'),(3,'cde'); set hive.auto.convert.join=false; set hive.optimize.skewjoin=true; set hive.exec.parallel=true; SELECT sum(1) as a FROM tab1 t1 INNER JOIN tab2 t2 ON t1.com = t2.com UNION ALL SELECT sum(1) as a FROM tab3 t3 INNER JOIN tab4 t4 ON t3.com = t4.com; create table test_parallel stored as orcfile as SELECT sum(1) as a FROM tab1 t1 INNER JOIN tab2 t2 ON t1.com = t2.com UNION ALL SELECT sum(1) as a FROM tab3 t3 INNER JOIN tab4 t4 ON t3.com = t4.com; select * from test_parallel; The result data should be two, but only one. > Execute unoin-all with childs Join in parallel > ---------------------------------------------- > > Key: HIVE-22198 > URL: https://issues.apache.org/jira/browse/HIVE-22198 > Project: Hive > Issue Type: Bug > Affects Versions: 1.2.0, 3.0.0, 3.1.0 > Reporter: LuGuangMing > Assignee: LuGuangMing > Priority: Major > Attachments: image-2019-09-20-11-38-37-433.png, > image-2019-09-20-11-39-30-347.png, test-parallel.sql > > > set parallel is true, set skewjoin is false, set auto convert join is false. > run a unoin all, There is nothing error message, but some result data is > missing, details check attatchment [^test-parallel.sql] > create table tab1(tid int, com string) row format delimited fields terminated > by '\t' stored as textfile; > create table tab2(tid int, com string) row format delimited fields > terminated by '\t' stored as textfile; > create table tab3(tid int, com string) row format delimited fields > terminated by '\t' stored as textfile; > create table tab4(tid int, com string) row format delimited fields > terminated by '\t' stored as textfile; > insert into tab1 values(1,'abc'),(2,'bcd'),(3,'cde'); > insert into tab2 values(1,'abc'),(2,'bcd'),(3,'cde'); > insert into tab3 values(1,'abc'),(2,'bcd'),(3,'cde'); > insert into tab4 values(1,'abc'),(2,'bcd'),(3,'cde'); > set hive.auto.convert.join=false; > set hive.optimize.skewjoin=true; > set hive.exec.parallel=true; > SELECT sum(1) as a > FROM tab1 t1 > INNER JOIN tab2 t2 > ON t1.com = t2.com > UNION ALL > SELECT sum(1) as a > FROM tab3 t3 > INNER JOIN tab4 t4 > ON t3.com = t4.com; > create table test_parallel stored as orcfile as > SELECT sum(1) as a > FROM tab1 t1 > INNER JOIN tab2 t2 > ON t1.com = t2.com > UNION ALL > SELECT sum(1) as a > FROM tab3 t3 > INNER JOIN tab4 t4 > ON t3.com = t4.com; > select * from test_parallel; > The result data should be two, but only one. -- This message was sent by Atlassian Jira (v8.3.4#803005)