[ https://issues.apache.org/jira/browse/HIVE-22227?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Vineet Garg updated HIVE-22227: ------------------------------- Description: *Reproducer* {code:sql} set hive.tez.bucket.pruning=true; set hive.optimize.shared.work=true; CREATE TABLE srcbucket_mapjoin_n16(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; CREATE TABLE tab_part_n10 (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS ORCFILE; CREATE TABLE srcbucket_mapjoin_part_n17 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; load data local inpath '$HIVE_SRC/data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_n16 partition(ds='2008-04-08'); load data local inpath '.$HIVE_SRC/data/files/bmj1/000001_0' INTO TABLE srcbucket_mapjoin_n16 partition(ds='2008-04-08'); load data local inpath '$HIVE_SRC/data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_n17 partition(ds='2008-04-08'); load data local inpath '$HIVE_SRC/data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_n17 partition(ds='2008-04-08'); load data local inpath '$HIVE_SRC/data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_n17 partition(ds='2008-04-08'); set hive.optimize.bucketingsorting=false; insert overwrite table tab_part_n10 partition (ds='2008-04-08') select key,value from srcbucket_mapjoin_part_n17; CREATE TABLE tab_n9(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORCFILE; insert overwrite table tab_n9 partition (ds='2008-04-08') select key,value from srcbucket_mapjoin_n16; select * from (select * from tab_n9 where tab_n9.key = 0)a join (select * from tab_part_n10 where tab_part_n10.key = 98)b full outer join tab_part_n10 c on a.key = b.key and b.key = c.key order by 1,2,3,4,5,6,7,8,9; {code} > Tez bucket pruning produces wrong result with shared work optimization > ---------------------------------------------------------------------- > > Key: HIVE-22227 > URL: https://issues.apache.org/jira/browse/HIVE-22227 > Project: Hive > Issue Type: Bug > Components: Query Planning > Affects Versions: 4.0.0 > Reporter: Vineet Garg > Assignee: Vineet Garg > Priority: Major > > *Reproducer* > {code:sql} > set hive.tez.bucket.pruning=true; > set hive.optimize.shared.work=true; > CREATE TABLE srcbucket_mapjoin_n16(key int, value string) partitioned by (ds > string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; > CREATE TABLE tab_part_n10 (key int, value string) PARTITIONED BY(ds STRING) > CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS ORCFILE; > CREATE TABLE srcbucket_mapjoin_part_n17 (key int, value string) partitioned > by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; > load data local inpath '$HIVE_SRC/data/files/bmj/000000_0' INTO TABLE > srcbucket_mapjoin_n16 partition(ds='2008-04-08'); > load data local inpath '.$HIVE_SRC/data/files/bmj1/000001_0' INTO TABLE > srcbucket_mapjoin_n16 partition(ds='2008-04-08'); > load data local inpath '$HIVE_SRC/data/files/bmj/000000_0' INTO TABLE > srcbucket_mapjoin_part_n17 partition(ds='2008-04-08'); > load data local inpath '$HIVE_SRC/data/files/bmj/000001_0' INTO TABLE > srcbucket_mapjoin_part_n17 partition(ds='2008-04-08'); > load data local inpath '$HIVE_SRC/data/files/bmj/000002_0' INTO TABLE > srcbucket_mapjoin_part_n17 partition(ds='2008-04-08'); > set hive.optimize.bucketingsorting=false; > insert overwrite table tab_part_n10 partition (ds='2008-04-08') > select key,value from srcbucket_mapjoin_part_n17; > CREATE TABLE tab_n9(key int, value string) PARTITIONED BY(ds STRING) > CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORCFILE; > insert overwrite table tab_n9 partition (ds='2008-04-08') > select key,value from srcbucket_mapjoin_n16; > select * from > (select * from tab_n9 where tab_n9.key = 0)a > join > (select * from tab_part_n10 where tab_part_n10.key = 98)b full outer join > tab_part_n10 c on a.key = b.key and b.key = c.key > order by 1,2,3,4,5,6,7,8,9; > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)