[ https://issues.apache.org/jira/browse/HIVE-17037?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16074723#comment-16074723 ]
Hive QA commented on HIVE-17037: -------------------------------- Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12875736/HIVE-17037.patch {color:green}SUCCESS:{color} +1 due to 1 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 44 failed/errored test(s), 10833 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestBeeLineDriver.testCliDriver[create_merge_compressed] (batchId=237) org.apache.hadoop.hive.cli.TestBeeLineDriver.testCliDriver[insert_overwrite_local_directory_1] (batchId=237) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[partialsmbjoin] (batchId=28) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[explainuser_2] (batchId=142) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[llap_smb] (batchId=143) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[orc_ppd_basic] (batchId=140) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[unionDistinct_1] (batchId=141) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[auto_smb_mapjoin_14] (batchId=155) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[cbo_gby] (batchId=152) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[cbo_rp_gby] (batchId=157) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[cbo_subq_exists] (batchId=159) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[cbo_subq_not_in] (batchId=154) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[correlationoptimizer2] (batchId=153) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[correlationoptimizer3] (batchId=160) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[correlationoptimizer6] (batchId=154) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[cross_product_check_1] (batchId=153) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[dynamic_partition_pruning] (batchId=149) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[explainuser_1] (batchId=151) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[jdbc_handler] (batchId=155) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[lineage3] (batchId=153) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[mergejoin] (batchId=156) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[mrr] (batchId=146) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[special_character_in_tabnames_1] (batchId=153) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[subquery_exists] (batchId=152) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[subquery_in] (batchId=156) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[subquery_multi] (batchId=147) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[subquery_nested_subquery] (batchId=150) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[subquery_notin] (batchId=157) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[subquery_null_agg] (batchId=157) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[subquery_scalar] (batchId=152) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[subquery_select] (batchId=152) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[subquery_views] (batchId=146) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[table_access_keys_stats] (batchId=157) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[tez_smb_1] (batchId=158) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[tez_union_group_by] (batchId=159) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_auto_smb_mapjoin_14] (batchId=150) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_groupby_grouping_sets4] (batchId=149) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_if_expr] (batchId=145) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vectorized_dynamic_partition_pruning] (batchId=150) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver[explainanalyze_2] (batchId=99) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver[explainanalyze_5] (batchId=98) org.apache.hive.hcatalog.api.TestHCatClient.testPartitionRegistrationWithCustomSchema (batchId=177) org.apache.hive.hcatalog.api.TestHCatClient.testPartitionSpecRegistrationWithCustomSchema (batchId=177) org.apache.hive.hcatalog.api.TestHCatClient.testTableSchemaPropagation (batchId=177) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/5892/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/5892/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-5892/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 44 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12875736 - PreCommit-HIVE-Build > Extend join algorithm selection to avoid unnecessary input data shuffle > ----------------------------------------------------------------------- > > Key: HIVE-17037 > URL: https://issues.apache.org/jira/browse/HIVE-17037 > Project: Hive > Issue Type: Improvement > Components: Physical Optimizer > Affects Versions: 3.0.0 > Reporter: Jesus Camacho Rodriguez > Assignee: Jesus Camacho Rodriguez > Attachments: HIVE-17037.patch > > > As an example, consider the following query: > {code:sql} > SELECT * > FROM ( > SELECT a.value > FROM src1 a > JOIN src1 b > ON (a.value = b.value) > GROUP BY a.value > ) a > JOIN src > ON (a.value = src.value); > {code} > Currently, the plan generated for Tez will contain an unnecessary shuffle > operation between the subquery and the join, since the records produced by > the subquery are already sorted by the value. > This issue is to extend join algorithm selection to be able to shuffle only > some of the inputs for a given join and avoid unnecessary shuffle operations. -- This message was sent by Atlassian JIRA (v6.4.14#64029)