[ 
https://issues.apache.org/jira/browse/HIVE-22567?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Zhang Xiaoyang updated HIVE-22567:
----------------------------------
    Description: 
I get diffrent results when the map join is off and the reduce tasks is 
diffrent !

the tmp.tmp_mon is a big table and tmp.business_unit_config has only 7 records;

when set hive.auto.convert.join=false,the result is diffrent when the number of 
the reduce tasks changed;

set mapred.reduce.tasks=1 the result seems right and when  set 
mapred.reduce.tasks=2 or other,the result is missing some data;

what can cause this ? 


case 1:
set hive.auto.convert.join=true;
set mapred.reduce.tasks=-1;
select count(1) from (
    select department_code
    from scot_dws.dws_sales_contribution_kpi_mon
    where business_unit_code in (select business_unit_code from 
tmp.business_unit_config)
    group by department_code
)tmp;
+------+
| _c0  |
+------+
| 62   |
+------+
+----------------------------------------------------+
|                      Explain                       |
+----------------------------------------------------+
| Plan optimized by CBO.                             |
|                                                    |
| Vertex dependency in root stage                    |
| Map 1 <- Map 4 (BROADCAST_EDGE)                    |
| Reducer 2 <- Map 1 (SIMPLE_EDGE)                   |
| Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)        |
|                                                    |
| Stage-0                                            |
|   Fetch Operator                                   |
|     limit:-1                                       |
|     Stage-1                                        |
|       Reducer 3                                    |
|       File Output Operator [FS_21]                 |
|         Group By Operator [GBY_19] (rows=1 width=8) |
|           Output:["_col0"],aggregations:["count(VALUE._col0)"] |
|         <-Reducer 2 [CUSTOM_SIMPLE_EDGE]           |
|           PARTITION_ONLY_SHUFFLE [RS_18]           |
|             Group By Operator [GBY_17] (rows=1 width=8) |
|               Output:["_col0"],aggregations:["count()"] |
|               Select Operator [SEL_15] (rows=3673352 width=178) |
|                 Group By Operator [GBY_14] (rows=3673352 width=178) |
|                   Output:["_col0"],keys:KEY._col0  |
|                 <-Map 1 [SIMPLE_EDGE]              |
|                   SHUFFLE [RS_13]                  |
|                     PartitionCols:_col0            |
|                     Group By Operator [GBY_12] (rows=7346704 width=178) |
|                       Output:["_col0"],keys:_col1  |
|                       Map Join Operator [MAPJOIN_36] (rows=7346704 width=178) 
|
|                         Conds:SEL_2._col0=RS_9._col0(Left 
Semi),HybridGraceHashJoin:true,Output:["_col1"] |
|                       <-Map 4 [BROADCAST_EDGE]     |
|                         BROADCAST [RS_9]           |
|                           PartitionCols:_col0      |
|                           Group By Operator [GBY_7] (rows=7 width=184) |
|                             Output:["_col0"],keys:_col0 |
|                             Select Operator [SEL_5] (rows=7 width=184) |
|                               Output:["_col0"]     |
|                               Filter Operator [FIL_25] (rows=7 width=184) |
|                                 predicate:business_unit_code is not null |
|                                 TableScan [TS_3] (rows=7 width=184) |
|                                   
tmp@business_unit_config,business_unit_config,Tbl:COMPLETE,Col:NONE,Output:["business_unit_code"]
 |
|                       <-Select Operator [SEL_2] (rows=6678822 width=178) |
|                           Output:["_col0","_col1"] |
|                           Filter Operator [FIL_24] (rows=6678822 width=178) |
|                             predicate:business_unit_code is not null |
|                             TableScan [TS_0] (rows=6678822 width=178) |
|                               
scot_dws@dws_sales_contribution_kpi_mon,dws_sales_contribution_kpi_mon,Tbl:COMPLETE,Col:COMPLETE,Output:["business_unit_code","department_code"]
 |
|                                                    |
+----------------------------------------------------+
47 rows selected (1.589 seconds)

case 2:
set hive.auto.convert.join=true;
set mapred.reduce.tasks=100;
select count(1) from (
    select department_code
    from scot_dws.dws_sales_contribution_kpi_mon
    where business_unit_code in (select business_unit_code from 
tmp.business_unit_config)
    group by department_code
)tmp;
+------+
| _c0  |
+------+
| 62   |
+------+
+----------------------------------------------------+
|                      Explain                       |
+----------------------------------------------------+
| Plan optimized by CBO.                             |
|                                                    |
| Vertex dependency in root stage                    |
| Map 1 <- Map 4 (BROADCAST_EDGE)                    |
| Reducer 2 <- Map 1 (SIMPLE_EDGE)                   |
| Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)        |
|                                                    |
| Stage-0                                            |
|   Fetch Operator                                   |
|     limit:-1                                       |
|     Stage-1                                        |
|       Reducer 3                                    |
|       File Output Operator [FS_21]                 |
|         Group By Operator [GBY_19] (rows=1 width=8) |
|           Output:["_col0"],aggregations:["count(VALUE._col0)"] |
|         <-Reducer 2 [CUSTOM_SIMPLE_EDGE]           |
|           PARTITION_ONLY_SHUFFLE [RS_18]           |
|             Group By Operator [GBY_17] (rows=1 width=8) |
|               Output:["_col0"],aggregations:["count()"] |
|               Select Operator [SEL_15] (rows=3673352 width=178) |
|                 Group By Operator [GBY_14] (rows=3673352 width=178) |
|                   Output:["_col0"],keys:KEY._col0  |
|                 <-Map 1 [SIMPLE_EDGE]              |
|                   SHUFFLE [RS_13]                  |
|                     PartitionCols:_col0            |
|                     Group By Operator [GBY_12] (rows=7346704 width=178) |
|                       Output:["_col0"],keys:_col1  |
|                       Map Join Operator [MAPJOIN_36] (rows=7346704 width=178) 
|
|                         Conds:SEL_2._col0=RS_9._col0(Left 
Semi),HybridGraceHashJoin:true,Output:["_col1"] |
|                       <-Map 4 [BROADCAST_EDGE]     |
|                         BROADCAST [RS_9]           |
|                           PartitionCols:_col0      |
|                           Group By Operator [GBY_7] (rows=7 width=184) |
|                             Output:["_col0"],keys:_col0 |
|                             Select Operator [SEL_5] (rows=7 width=184) |
|                               Output:["_col0"]     |
|                               Filter Operator [FIL_25] (rows=7 width=184) |
|                                 predicate:business_unit_code is not null |
|                                 TableScan [TS_3] (rows=7 width=184) |
|                                   
tmp@business_unit_config,business_unit_config,Tbl:COMPLETE,Col:NONE,Output:["business_unit_code"]
 |
|                       <-Select Operator [SEL_2] (rows=6678822 width=178) |
|                           Output:["_col0","_col1"] |
|                           Filter Operator [FIL_24] (rows=6678822 width=178) |
|                             predicate:business_unit_code is not null |
|                             TableScan [TS_0] (rows=6678822 width=178) |
|                               
scot_dws@dws_sales_contribution_kpi_mon,dws_sales_contribution_kpi_mon,Tbl:COMPLETE,Col:COMPLETE,Output:["business_unit_code","department_code"]
 |
|                                                    |
+----------------------------------------------------+
47 rows selected (1.541 seconds)


case 3:
set hive.auto.convert.join=false;
set mapred.reduce.tasks=-1;
select count(1) from (
    select department_code
    from scot_dws.dws_sales_contribution_kpi_mon
    where business_unit_code in (select business_unit_code from 
tmp.business_unit_config)
    group by department_code
)tmp;
+------+
| _c0  |
+------+
| 62   |
+------+
+----------------------------------------------------+
|                      Explain                       |
+----------------------------------------------------+
| Plan optimized by CBO.                             |
|                                                    |
| Vertex dependency in root stage                    |
| Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) |
| Reducer 3 <- Reducer 2 (SIMPLE_EDGE)               |
| Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)        |
|                                                    |
| Stage-0                                            |
|   Fetch Operator                                   |
|     limit:-1                                       |
|     Stage-1                                        |
|       Reducer 4                                    |
|       File Output Operator [FS_21]                 |
|         Group By Operator [GBY_19] (rows=1 width=8) |
|           Output:["_col0"],aggregations:["count(VALUE._col0)"] |
|         <-Reducer 3 [CUSTOM_SIMPLE_EDGE]           |
|           PARTITION_ONLY_SHUFFLE [RS_18]           |
|             Group By Operator [GBY_17] (rows=1 width=8) |
|               Output:["_col0"],aggregations:["count()"] |
|               Select Operator [SEL_15] (rows=3673352 width=178) |
|                 Group By Operator [GBY_14] (rows=3673352 width=178) |
|                   Output:["_col0"],keys:KEY._col0  |
|                 <-Reducer 2 [SIMPLE_EDGE]          |
|                   SHUFFLE [RS_13]                  |
|                     PartitionCols:_col0            |
|                     Group By Operator [GBY_12] (rows=7346704 width=178) |
|                       Output:["_col0"],keys:_col1  |
|                       Merge Join Operator [MERGEJOIN_36] (rows=7346704 
width=178) |
|                         Conds:RS_8._col0=RS_9._col0(Left 
Semi),Output:["_col1"] |
|                       <-Map 1 [SIMPLE_EDGE]        |
|                         SHUFFLE [RS_8]             |
|                           PartitionCols:_col0      |
|                           Select Operator [SEL_2] (rows=6678822 width=178) |
|                             Output:["_col0","_col1"] |
|                             Filter Operator [FIL_24] (rows=6678822 width=178) 
|
|                               predicate:business_unit_code is not null |
|                               TableScan [TS_0] (rows=6678822 width=178) |
|                                 
scot_dws@dws_sales_contribution_kpi_mon,dws_sales_contribution_kpi_mon,Tbl:COMPLETE,Col:COMPLETE,Output:["business_unit_code","department_code"]
 |
|                       <-Map 5 [SIMPLE_EDGE]        |
|                         SHUFFLE [RS_9]             |
|                           PartitionCols:_col0      |
|                           Group By Operator [GBY_7] (rows=7 width=184) |
|                             Output:["_col0"],keys:_col0 |
|                             Select Operator [SEL_5] (rows=7 width=184) |
|                               Output:["_col0"]     |
|                               Filter Operator [FIL_25] (rows=7 width=184) |
|                                 predicate:business_unit_code is not null |
|                                 TableScan [TS_3] (rows=7 width=184) |
|                                   
tmp@business_unit_config,business_unit_config,Tbl:COMPLETE,Col:NONE,Output:["business_unit_code"]
 |
|                                                    |
+----------------------------------------------------+
50 rows selected (1.545 seconds)

case 4:
set hive.auto.convert.join=false;
set mapred.reduce.tasks=100;
select count(1) from (
    select department_code
    from scot_dws.dws_sales_contribution_kpi_mon
    where business_unit_code in (select business_unit_code from 
tmp.business_unit_config)
    group by department_code
)tmp;
+------+
| _c0  |
+------+
| 0    |
+------+
+----------------------------------------------------+
|                      Explain                       |
+----------------------------------------------------+
| Plan optimized by CBO.                             |
|                                                    |
| Vertex dependency in root stage                    |
| Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) |
| Reducer 3 <- Reducer 2 (SIMPLE_EDGE)               |
| Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)        |
|                                                    |
| Stage-0                                            |
|   Fetch Operator                                   |
|     limit:-1                                       |
|     Stage-1                                        |
|       Reducer 4                                    |
|       File Output Operator [FS_21]                 |
|         Group By Operator [GBY_19] (rows=1 width=8) |
|           Output:["_col0"],aggregations:["count(VALUE._col0)"] |
|         <-Reducer 3 [CUSTOM_SIMPLE_EDGE]           |
|           PARTITION_ONLY_SHUFFLE [RS_18]           |
|             Group By Operator [GBY_17] (rows=1 width=8) |
|               Output:["_col0"],aggregations:["count()"] |
|               Select Operator [SEL_15] (rows=3673352 width=178) |
|                 Group By Operator [GBY_14] (rows=3673352 width=178) |
|                   Output:["_col0"],keys:KEY._col0  |
|                 <-Reducer 2 [SIMPLE_EDGE]          |
|                   SHUFFLE [RS_13]                  |
|                     PartitionCols:_col0            |
|                     Group By Operator [GBY_12] (rows=7346704 width=178) |
|                       Output:["_col0"],keys:_col1  |
|                       Merge Join Operator [MERGEJOIN_36] (rows=7346704 
width=178) |
|                         Conds:RS_8._col0=RS_9._col0(Left 
Semi),Output:["_col1"] |
|                       <-Map 1 [SIMPLE_EDGE]        |
|                         SHUFFLE [RS_8]             |
|                           PartitionCols:_col0      |
|                           Select Operator [SEL_2] (rows=6678822 width=178) |
|                             Output:["_col0","_col1"] |
|                             Filter Operator [FIL_24] (rows=6678822 width=178) 
|
|                               predicate:business_unit_code is not null |
|                               TableScan [TS_0] (rows=6678822 width=178) |
|                                 
scot_dws@dws_sales_contribution_kpi_mon,dws_sales_contribution_kpi_mon,Tbl:COMPLETE,Col:COMPLETE,Output:["business_unit_code","department_code"]
 |
|                       <-Map 5 [SIMPLE_EDGE]        |
|                         SHUFFLE [RS_9]             |
|                           PartitionCols:_col0      |
|                           Group By Operator [GBY_7] (rows=7 width=184) |
|                             Output:["_col0"],keys:_col0 |
|                             Select Operator [SEL_5] (rows=7 width=184) |
|                               Output:["_col0"]     |
|                               Filter Operator [FIL_25] (rows=7 width=184) |
|                                 predicate:business_unit_code is not null |
|                                 TableScan [TS_3] (rows=7 width=184) |
|                                   
tmp@business_unit_config,business_unit_config,Tbl:COMPLETE,Col:NONE,Output:["business_unit_code"]
 |
|                                                    |
+----------------------------------------------------+
50 rows selected (1.547 seconds)

 

 

  was:
I get diffrent results when the map join is off and the reduce tasks is 
diffrent !

the tmp.tmp_mon is a big table and tmp.business_unit_config has only 7 records;

when set hive.auto.convert.join=false,the result is diffrent when the number of 
the reduce tasks changed;

set mapred.reduce.tasks=1 the result seems right and when  set 
mapred.reduce.tasks=2 or other,the result is missing some data;

what can cause this ? 

 

 


> Data loss when map join is off ,the result is diffrent when the number of 
> reduce tasks is diffrent;
> ---------------------------------------------------------------------------------------------------
>
>                 Key: HIVE-22567
>                 URL: https://issues.apache.org/jira/browse/HIVE-22567
>             Project: Hive
>          Issue Type: Bug
>    Affects Versions: 3.1.1, 3.1.2
>         Environment: select count(1) from (
>     select department_code 
>     from tmp.tmp_mon
>     where business_unit_code in (select business_unit_code from 
> tmp.business_unit_config)
>     group by department_code 
> )tmp
>            Reporter: Zhang Xiaoyang
>            Assignee: Aditya Shah
>            Priority: Blocker
>
> I get diffrent results when the map join is off and the reduce tasks is 
> diffrent !
> the tmp.tmp_mon is a big table and tmp.business_unit_config has only 7 
> records;
> when set hive.auto.convert.join=false,the result is diffrent when the number 
> of the reduce tasks changed;
> set mapred.reduce.tasks=1 the result seems right and when  set 
> mapred.reduce.tasks=2 or other,the result is missing some data;
> what can cause this ? 
> case 1:
> set hive.auto.convert.join=true;
> set mapred.reduce.tasks=-1;
> select count(1) from (
>     select department_code
>     from scot_dws.dws_sales_contribution_kpi_mon
>     where business_unit_code in (select business_unit_code from 
> tmp.business_unit_config)
>     group by department_code
> )tmp;
> +------+
> | _c0  |
> +------+
> | 62   |
> +------+
> +----------------------------------------------------+
> |                      Explain                       |
> +----------------------------------------------------+
> | Plan optimized by CBO.                             |
> |                                                    |
> | Vertex dependency in root stage                    |
> | Map 1 <- Map 4 (BROADCAST_EDGE)                    |
> | Reducer 2 <- Map 1 (SIMPLE_EDGE)                   |
> | Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)        |
> |                                                    |
> | Stage-0                                            |
> |   Fetch Operator                                   |
> |     limit:-1                                       |
> |     Stage-1                                        |
> |       Reducer 3                                    |
> |       File Output Operator [FS_21]                 |
> |         Group By Operator [GBY_19] (rows=1 width=8) |
> |           Output:["_col0"],aggregations:["count(VALUE._col0)"] |
> |         <-Reducer 2 [CUSTOM_SIMPLE_EDGE]           |
> |           PARTITION_ONLY_SHUFFLE [RS_18]           |
> |             Group By Operator [GBY_17] (rows=1 width=8) |
> |               Output:["_col0"],aggregations:["count()"] |
> |               Select Operator [SEL_15] (rows=3673352 width=178) |
> |                 Group By Operator [GBY_14] (rows=3673352 width=178) |
> |                   Output:["_col0"],keys:KEY._col0  |
> |                 <-Map 1 [SIMPLE_EDGE]              |
> |                   SHUFFLE [RS_13]                  |
> |                     PartitionCols:_col0            |
> |                     Group By Operator [GBY_12] (rows=7346704 width=178) |
> |                       Output:["_col0"],keys:_col1  |
> |                       Map Join Operator [MAPJOIN_36] (rows=7346704 
> width=178) |
> |                         Conds:SEL_2._col0=RS_9._col0(Left 
> Semi),HybridGraceHashJoin:true,Output:["_col1"] |
> |                       <-Map 4 [BROADCAST_EDGE]     |
> |                         BROADCAST [RS_9]           |
> |                           PartitionCols:_col0      |
> |                           Group By Operator [GBY_7] (rows=7 width=184) |
> |                             Output:["_col0"],keys:_col0 |
> |                             Select Operator [SEL_5] (rows=7 width=184) |
> |                               Output:["_col0"]     |
> |                               Filter Operator [FIL_25] (rows=7 width=184) |
> |                                 predicate:business_unit_code is not null |
> |                                 TableScan [TS_3] (rows=7 width=184) |
> |                                   
> tmp@business_unit_config,business_unit_config,Tbl:COMPLETE,Col:NONE,Output:["business_unit_code"]
>  |
> |                       <-Select Operator [SEL_2] (rows=6678822 width=178) |
> |                           Output:["_col0","_col1"] |
> |                           Filter Operator [FIL_24] (rows=6678822 width=178) 
> |
> |                             predicate:business_unit_code is not null |
> |                             TableScan [TS_0] (rows=6678822 width=178) |
> |                               
> scot_dws@dws_sales_contribution_kpi_mon,dws_sales_contribution_kpi_mon,Tbl:COMPLETE,Col:COMPLETE,Output:["business_unit_code","department_code"]
>  |
> |                                                    |
> +----------------------------------------------------+
> 47 rows selected (1.589 seconds)
> case 2:
> set hive.auto.convert.join=true;
> set mapred.reduce.tasks=100;
> select count(1) from (
>     select department_code
>     from scot_dws.dws_sales_contribution_kpi_mon
>     where business_unit_code in (select business_unit_code from 
> tmp.business_unit_config)
>     group by department_code
> )tmp;
> +------+
> | _c0  |
> +------+
> | 62   |
> +------+
> +----------------------------------------------------+
> |                      Explain                       |
> +----------------------------------------------------+
> | Plan optimized by CBO.                             |
> |                                                    |
> | Vertex dependency in root stage                    |
> | Map 1 <- Map 4 (BROADCAST_EDGE)                    |
> | Reducer 2 <- Map 1 (SIMPLE_EDGE)                   |
> | Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)        |
> |                                                    |
> | Stage-0                                            |
> |   Fetch Operator                                   |
> |     limit:-1                                       |
> |     Stage-1                                        |
> |       Reducer 3                                    |
> |       File Output Operator [FS_21]                 |
> |         Group By Operator [GBY_19] (rows=1 width=8) |
> |           Output:["_col0"],aggregations:["count(VALUE._col0)"] |
> |         <-Reducer 2 [CUSTOM_SIMPLE_EDGE]           |
> |           PARTITION_ONLY_SHUFFLE [RS_18]           |
> |             Group By Operator [GBY_17] (rows=1 width=8) |
> |               Output:["_col0"],aggregations:["count()"] |
> |               Select Operator [SEL_15] (rows=3673352 width=178) |
> |                 Group By Operator [GBY_14] (rows=3673352 width=178) |
> |                   Output:["_col0"],keys:KEY._col0  |
> |                 <-Map 1 [SIMPLE_EDGE]              |
> |                   SHUFFLE [RS_13]                  |
> |                     PartitionCols:_col0            |
> |                     Group By Operator [GBY_12] (rows=7346704 width=178) |
> |                       Output:["_col0"],keys:_col1  |
> |                       Map Join Operator [MAPJOIN_36] (rows=7346704 
> width=178) |
> |                         Conds:SEL_2._col0=RS_9._col0(Left 
> Semi),HybridGraceHashJoin:true,Output:["_col1"] |
> |                       <-Map 4 [BROADCAST_EDGE]     |
> |                         BROADCAST [RS_9]           |
> |                           PartitionCols:_col0      |
> |                           Group By Operator [GBY_7] (rows=7 width=184) |
> |                             Output:["_col0"],keys:_col0 |
> |                             Select Operator [SEL_5] (rows=7 width=184) |
> |                               Output:["_col0"]     |
> |                               Filter Operator [FIL_25] (rows=7 width=184) |
> |                                 predicate:business_unit_code is not null |
> |                                 TableScan [TS_3] (rows=7 width=184) |
> |                                   
> tmp@business_unit_config,business_unit_config,Tbl:COMPLETE,Col:NONE,Output:["business_unit_code"]
>  |
> |                       <-Select Operator [SEL_2] (rows=6678822 width=178) |
> |                           Output:["_col0","_col1"] |
> |                           Filter Operator [FIL_24] (rows=6678822 width=178) 
> |
> |                             predicate:business_unit_code is not null |
> |                             TableScan [TS_0] (rows=6678822 width=178) |
> |                               
> scot_dws@dws_sales_contribution_kpi_mon,dws_sales_contribution_kpi_mon,Tbl:COMPLETE,Col:COMPLETE,Output:["business_unit_code","department_code"]
>  |
> |                                                    |
> +----------------------------------------------------+
> 47 rows selected (1.541 seconds)
> case 3:
> set hive.auto.convert.join=false;
> set mapred.reduce.tasks=-1;
> select count(1) from (
>     select department_code
>     from scot_dws.dws_sales_contribution_kpi_mon
>     where business_unit_code in (select business_unit_code from 
> tmp.business_unit_config)
>     group by department_code
> )tmp;
> +------+
> | _c0  |
> +------+
> | 62   |
> +------+
> +----------------------------------------------------+
> |                      Explain                       |
> +----------------------------------------------------+
> | Plan optimized by CBO.                             |
> |                                                    |
> | Vertex dependency in root stage                    |
> | Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) |
> | Reducer 3 <- Reducer 2 (SIMPLE_EDGE)               |
> | Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)        |
> |                                                    |
> | Stage-0                                            |
> |   Fetch Operator                                   |
> |     limit:-1                                       |
> |     Stage-1                                        |
> |       Reducer 4                                    |
> |       File Output Operator [FS_21]                 |
> |         Group By Operator [GBY_19] (rows=1 width=8) |
> |           Output:["_col0"],aggregations:["count(VALUE._col0)"] |
> |         <-Reducer 3 [CUSTOM_SIMPLE_EDGE]           |
> |           PARTITION_ONLY_SHUFFLE [RS_18]           |
> |             Group By Operator [GBY_17] (rows=1 width=8) |
> |               Output:["_col0"],aggregations:["count()"] |
> |               Select Operator [SEL_15] (rows=3673352 width=178) |
> |                 Group By Operator [GBY_14] (rows=3673352 width=178) |
> |                   Output:["_col0"],keys:KEY._col0  |
> |                 <-Reducer 2 [SIMPLE_EDGE]          |
> |                   SHUFFLE [RS_13]                  |
> |                     PartitionCols:_col0            |
> |                     Group By Operator [GBY_12] (rows=7346704 width=178) |
> |                       Output:["_col0"],keys:_col1  |
> |                       Merge Join Operator [MERGEJOIN_36] (rows=7346704 
> width=178) |
> |                         Conds:RS_8._col0=RS_9._col0(Left 
> Semi),Output:["_col1"] |
> |                       <-Map 1 [SIMPLE_EDGE]        |
> |                         SHUFFLE [RS_8]             |
> |                           PartitionCols:_col0      |
> |                           Select Operator [SEL_2] (rows=6678822 width=178) |
> |                             Output:["_col0","_col1"] |
> |                             Filter Operator [FIL_24] (rows=6678822 
> width=178) |
> |                               predicate:business_unit_code is not null |
> |                               TableScan [TS_0] (rows=6678822 width=178) |
> |                                 
> scot_dws@dws_sales_contribution_kpi_mon,dws_sales_contribution_kpi_mon,Tbl:COMPLETE,Col:COMPLETE,Output:["business_unit_code","department_code"]
>  |
> |                       <-Map 5 [SIMPLE_EDGE]        |
> |                         SHUFFLE [RS_9]             |
> |                           PartitionCols:_col0      |
> |                           Group By Operator [GBY_7] (rows=7 width=184) |
> |                             Output:["_col0"],keys:_col0 |
> |                             Select Operator [SEL_5] (rows=7 width=184) |
> |                               Output:["_col0"]     |
> |                               Filter Operator [FIL_25] (rows=7 width=184) |
> |                                 predicate:business_unit_code is not null |
> |                                 TableScan [TS_3] (rows=7 width=184) |
> |                                   
> tmp@business_unit_config,business_unit_config,Tbl:COMPLETE,Col:NONE,Output:["business_unit_code"]
>  |
> |                                                    |
> +----------------------------------------------------+
> 50 rows selected (1.545 seconds)
> case 4:
> set hive.auto.convert.join=false;
> set mapred.reduce.tasks=100;
> select count(1) from (
>     select department_code
>     from scot_dws.dws_sales_contribution_kpi_mon
>     where business_unit_code in (select business_unit_code from 
> tmp.business_unit_config)
>     group by department_code
> )tmp;
> +------+
> | _c0  |
> +------+
> | 0    |
> +------+
> +----------------------------------------------------+
> |                      Explain                       |
> +----------------------------------------------------+
> | Plan optimized by CBO.                             |
> |                                                    |
> | Vertex dependency in root stage                    |
> | Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) |
> | Reducer 3 <- Reducer 2 (SIMPLE_EDGE)               |
> | Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)        |
> |                                                    |
> | Stage-0                                            |
> |   Fetch Operator                                   |
> |     limit:-1                                       |
> |     Stage-1                                        |
> |       Reducer 4                                    |
> |       File Output Operator [FS_21]                 |
> |         Group By Operator [GBY_19] (rows=1 width=8) |
> |           Output:["_col0"],aggregations:["count(VALUE._col0)"] |
> |         <-Reducer 3 [CUSTOM_SIMPLE_EDGE]           |
> |           PARTITION_ONLY_SHUFFLE [RS_18]           |
> |             Group By Operator [GBY_17] (rows=1 width=8) |
> |               Output:["_col0"],aggregations:["count()"] |
> |               Select Operator [SEL_15] (rows=3673352 width=178) |
> |                 Group By Operator [GBY_14] (rows=3673352 width=178) |
> |                   Output:["_col0"],keys:KEY._col0  |
> |                 <-Reducer 2 [SIMPLE_EDGE]          |
> |                   SHUFFLE [RS_13]                  |
> |                     PartitionCols:_col0            |
> |                     Group By Operator [GBY_12] (rows=7346704 width=178) |
> |                       Output:["_col0"],keys:_col1  |
> |                       Merge Join Operator [MERGEJOIN_36] (rows=7346704 
> width=178) |
> |                         Conds:RS_8._col0=RS_9._col0(Left 
> Semi),Output:["_col1"] |
> |                       <-Map 1 [SIMPLE_EDGE]        |
> |                         SHUFFLE [RS_8]             |
> |                           PartitionCols:_col0      |
> |                           Select Operator [SEL_2] (rows=6678822 width=178) |
> |                             Output:["_col0","_col1"] |
> |                             Filter Operator [FIL_24] (rows=6678822 
> width=178) |
> |                               predicate:business_unit_code is not null |
> |                               TableScan [TS_0] (rows=6678822 width=178) |
> |                                 
> scot_dws@dws_sales_contribution_kpi_mon,dws_sales_contribution_kpi_mon,Tbl:COMPLETE,Col:COMPLETE,Output:["business_unit_code","department_code"]
>  |
> |                       <-Map 5 [SIMPLE_EDGE]        |
> |                         SHUFFLE [RS_9]             |
> |                           PartitionCols:_col0      |
> |                           Group By Operator [GBY_7] (rows=7 width=184) |
> |                             Output:["_col0"],keys:_col0 |
> |                             Select Operator [SEL_5] (rows=7 width=184) |
> |                               Output:["_col0"]     |
> |                               Filter Operator [FIL_25] (rows=7 width=184) |
> |                                 predicate:business_unit_code is not null |
> |                                 TableScan [TS_3] (rows=7 width=184) |
> |                                   
> tmp@business_unit_config,business_unit_config,Tbl:COMPLETE,Col:NONE,Output:["business_unit_code"]
>  |
> |                                                    |
> +----------------------------------------------------+
> 50 rows selected (1.547 seconds)
>  
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to