[ 
https://issues.apache.org/jira/browse/HIVE-22198?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

LuGuangMing updated HIVE-22198:
-------------------------------
    Description: 
set parallel is true, set skewjoin is false, set auto convert join is false. 
run a unoin all, There is nothing error message, but some result data is 
missing, details check attatchment sql file

create table tab1(tid int, com string) row format delimited fields terminated 
by '\t' stored as textfile;
create table tab2(tid int, com string) row format delimited fields terminated 
by '\t' stored as textfile;
create table tab3(tid int, com string) row format delimited fields terminated 
by '\t' stored as textfile;
create table tab4(tid int, com string) row format delimited fields terminated 
by '\t' stored as textfile;

insert into tab1 values(1,'abc'),(2,'bcd'),(3,'cde');
insert into tab2 values(1,'abc'),(2,'bcd'),(3,'cde');
insert into tab3 values(1,'abc'),(2,'bcd'),(3,'cde');
insert into tab4 values(1,'abc'),(2,'bcd'),(3,'cde');

set hive.auto.convert.join=false;
set hive.optimize.skewjoin=true;
set hive.exec.parallel=true;

SELECT sum(1) as a 
 FROM tab1 t1 
 INNER JOIN tab2 t2 
 ON t1.com = t2.com
UNION ALL
SELECT sum(1) as a 
FROM tab3 t3 
INNER JOIN tab4 t4 
 ON t3.com = t4.com;

-- result total number shoud be two result, but it's one

create table test_parallel stored as orcfile as 
SELECT sum(1) as a 
 FROM tab1 t1 
 INNER JOIN tab2 t2 
 ON t1.com = t2.com
UNION ALL
SELECT sum(1) as a 
FROM tab3 t3 
INNER JOIN tab4 t4 
 ON t3.com = t4.com;

select * from test_parallel;

 

  was:
it's missing a part of data. details check attatchment sql file

 


> Execute unoin-all with childs Join in parallel
> ----------------------------------------------
>
>                 Key: HIVE-22198
>                 URL: https://issues.apache.org/jira/browse/HIVE-22198
>             Project: Hive
>          Issue Type: Bug
>    Affects Versions: 3.1.0
>            Reporter: LuGuangMing
>            Assignee: LuGuangMing
>            Priority: Major
>         Attachments: test-parallel.sql
>
>
> set parallel is true, set skewjoin is false, set auto convert join is false. 
> run a unoin all, There is nothing error message, but some result data is 
> missing, details check attatchment sql file
> create table tab1(tid int, com string) row format delimited fields terminated 
> by '\t' stored as textfile;
> create table tab2(tid int, com string) row format delimited fields terminated 
> by '\t' stored as textfile;
> create table tab3(tid int, com string) row format delimited fields terminated 
> by '\t' stored as textfile;
> create table tab4(tid int, com string) row format delimited fields terminated 
> by '\t' stored as textfile;
> insert into tab1 values(1,'abc'),(2,'bcd'),(3,'cde');
> insert into tab2 values(1,'abc'),(2,'bcd'),(3,'cde');
> insert into tab3 values(1,'abc'),(2,'bcd'),(3,'cde');
> insert into tab4 values(1,'abc'),(2,'bcd'),(3,'cde');
> set hive.auto.convert.join=false;
> set hive.optimize.skewjoin=true;
> set hive.exec.parallel=true;
> SELECT sum(1) as a 
>  FROM tab1 t1 
>  INNER JOIN tab2 t2 
>  ON t1.com = t2.com
> UNION ALL
> SELECT sum(1) as a 
> FROM tab3 t3 
> INNER JOIN tab4 t4 
>  ON t3.com = t4.com;
> -- result total number shoud be two result, but it's one
> create table test_parallel stored as orcfile as 
> SELECT sum(1) as a 
>  FROM tab1 t1 
>  INNER JOIN tab2 t2 
>  ON t1.com = t2.com
> UNION ALL
> SELECT sum(1) as a 
> FROM tab3 t3 
> INNER JOIN tab4 t4 
>  ON t3.com = t4.com;
> select * from test_parallel;
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to