[ https://issues.apache.org/jira/browse/HIVE-20260?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16564056#comment-16564056 ]
Hive QA commented on HIVE-20260: -------------------------------- Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12933789/HIVE-20260.01wip03.patch {color:green}SUCCESS:{color} +1 due to 2 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 63 failed/errored test(s), 14838 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[cbo_rp_auto_join1] (batchId=4) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[explainuser_2] (batchId=154) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[unionDistinct_1] (batchId=152) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[auto_join29] (batchId=170) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[auto_smb_mapjoin_14] (batchId=171) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[auto_sortmerge_join_10] (batchId=175) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[auto_sortmerge_join_9] (batchId=173) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[bucket_map_join_tez2] (batchId=158) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[bucketsortoptimize_insert_7] (batchId=170) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[constprog_semijoin] (batchId=172) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[correlationoptimizer1] (batchId=171) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[correlationoptimizer2] (batchId=169) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[correlationoptimizer6] (batchId=169) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[dynpart_sort_opt_vectorization] (batchId=169) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[explainuser_1] (batchId=165) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[explainuser_4] (batchId=166) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[filter_join_breaktask] (batchId=175) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[groupby_groupingset_bug] (batchId=178) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[hybridgrace_hashjoin_1] (batchId=162) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[limit_pushdown] (batchId=174) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[load_dyn_part1] (batchId=178) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[materialized_view_create_rewrite_3] (batchId=168) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[materialized_view_create_rewrite_rebuild_dummy] (batchId=163) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[mrr] (batchId=159) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[multiMapJoin2] (batchId=175) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[orc_llap] (batchId=168) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[orc_predicate_pushdown] (batchId=157) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[parquet_predicate_pushdown] (batchId=162) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[reopt_semijoin] (batchId=178) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[sample10] (batchId=168) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[sample10_mm] (batchId=170) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[semijoin6] (batchId=178) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[semijoin7] (batchId=156) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[semijoin] (batchId=161) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[skewjoin] (batchId=161) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[smb_mapjoin_14] (batchId=172) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[subquery_exists] (batchId=167) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[subquery_in] (batchId=172) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[subquery_in_having] (batchId=171) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[subquery_multi] (batchId=160) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[subquery_notin] (batchId=174) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[subquery_scalar] (batchId=166) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[subquery_select] (batchId=166) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[subquery_views] (batchId=159) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[tez_dynpart_hashjoin_2] (batchId=165) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[tez_vector_dynpart_hashjoin_2] (batchId=162) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[unionDistinct_3] (batchId=168) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_groupby_grouping_sets2] (batchId=165) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_mapjoin_reduce] (batchId=177) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_windowing_gby2] (batchId=173) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_windowing_gby] (batchId=177) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vectorization_0] (batchId=178) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vectorization_limit] (batchId=165) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vectorization_short_regress] (batchId=169) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vectorized_context] (batchId=164) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vectorized_mapjoin] (batchId=175) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vectorized_nested_mapjoin] (batchId=160) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vectorized_shufflejoin] (batchId=175) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[windowing_gby] (batchId=167) org.apache.hadoop.hive.cli.TestMiniSparkOnYarnCliDriver.testCliDriver[spark_explainuser_1] (batchId=187) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver[explainanalyze_1] (batchId=106) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver[explainanalyze_4] (batchId=107) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver[hybridgrace_hashjoin_1] (batchId=106) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/12967/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/12967/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-12967/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.YetusPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 63 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12933789 - PreCommit-HIVE-Build > NDV of a column shouldn't be scaled when row count is changed by filter on > another column > ----------------------------------------------------------------------------------------- > > Key: HIVE-20260 > URL: https://issues.apache.org/jira/browse/HIVE-20260 > Project: Hive > Issue Type: Improvement > Components: Statistics > Reporter: Ashutosh Chauhan > Assignee: Zoltan Haindrich > Priority: Major > Attachments: HIVE-20260.01wip01.patch, HIVE-20260.01wip02.patch, > HIVE-20260.01wip03.patch > > > HIVE-17465 introduced progressive scaling of rowcounts in presence of > multiple filters. HIVE-19500 improved on that by also scaling col stats (NDV) > in such scenario. However, it should pay attention to column used in filter > expression and not scale for all filters. eg., > consider filter a = 1 and b = 2 ndv of column b should not be scaled down by > row count changes caused by a = 1 > Other way to say this that ndv of a particular column should be updated at > the end of computation of row count for that operator. > Here are the possible cases where our estimates can be accurate (or close to) > {code} > case 1 - (d_year = 2001 and d_moy=1) > case 2 - (d_year = 2001 and d_year IN (2001, 2002)) > case 3 - (d_year = 2001 and d_moy = 1 and d_dom = 1) > case 4 - (d_date IN ('1999-01-02', '1999-01-02')) > case 5 - (d_date = '1999-01-01') > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)