[ 
https://issues.apache.org/jira/browse/HIVE-28582?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Stamatis Zampetakis resolved HIVE-28582.
----------------------------------------
    Fix Version/s: 4.1.0
       Resolution: Fixed

Fixed in 
https://github.com/apache/hive/commit/5ffcdb716581861f415964d46546ab092e551f45

Thanks for the review [~krisztiankasa] !

> OOM when compiling query with many GROUP BY columns aliased multiple times
> --------------------------------------------------------------------------
>
>                 Key: HIVE-28582
>                 URL: https://issues.apache.org/jira/browse/HIVE-28582
>             Project: Hive
>          Issue Type: Bug
>          Components: CBO, HiveServer2
>    Affects Versions: 4.0.1
>            Reporter: Stamatis Zampetakis
>            Assignee: Stamatis Zampetakis
>            Priority: Major
>              Labels: pull-request-available
>             Fix For: 4.1.0
>
>         Attachments: hive_28582.q
>
>
> {code:sql}
> CREATE TABLE t (
>     c1 string,
>     c2 string,
>     c3 string,
>     c4 string,
>     c5 string,
>     c6 string,
>     c7 string,
>     c8 string,
>     c9 string
> );
> EXPLAIN CBO
> SELECT a0
> FROM 
> (SELECT c1 as a0,
>         c1 as a1,
>         c1 as a2,
>         c2 as a3,
>         c2 as a4,
>         c2 as a5,
>         c3 as a6,
>         c3 as a7,
>         c3 as a8,
>         c4 as a9,
>         c4 as a10,
>         c4 as a11,
>         c5 as a12,
>         c5 as a13,
>         c5 as a14,
>         c6 as a15,
>         c6 as a16,
>         c6 as a17,
>         c7 as a18,
>         c7 as a19,
>         c7 as a20,
>         c8 as a21,
>         c8 as a22,
>         c8 as a23,
>         c9 as a24,
>         c9 as a25,
>         c9 as a26
> FROM t GROUP BY c1,c2,c3,c4,c5,c6,c7,c8,c9) t1
> GROUP BY a0, a4
> {code}
> The query above spends a lot of time in compilation and eventually leads to 
> an OutOfMemoryError causing Hiveserver2 to crash.
> Note that each column in the inner query appears in the GROUP BY clause and 
> is aliased three times in the SELECT clause. The multiple aliases of the same 
> column as well as the fact that the column appears in the GROUP BY are 
> necessary conditions to trigger the problem.
> The stacktraces show that the query spends the entire time in field trimming 
> (HiveRelFieldTrimmer) while trying to obtain metadata information about the 
> unique keys.
> {noformat}
> "fdc1b4d2-11d6-4b5c-a665-c321ee5f7e22 main" #1 prio=5 os_prio=0 
> tid=0x00007f2e2800b800 nid=0x132ff runnable [0x00007f2e2e7fb000]
>    java.lang.Thread.State: RUNNABLE
>         at 
> org.apache.calcite.util.ImmutableBitSet$Builder.addAll(ImmutableBitSet.java:1086)
>         at 
> org.apache.calcite.util.ImmutableBitSet.union(ImmutableBitSet.java:691)
>         at 
> org.apache.calcite.rel.metadata.RelMdUniqueKeys$$Lambda$1124/1526659719.apply(Unknown
>  Source)
>         at 
> org.apache.hive.com.google.common.collect.Iterators$5.transform(Iterators.java:757)
>         at 
> org.apache.hive.com.google.common.collect.TransformedIterator.next(TransformedIterator.java:48)
>         at 
> org.apache.hive.com.google.common.collect.ImmutableCollection$Builder.addAll(ImmutableCollection.java:418)
>         at 
> org.apache.hive.com.google.common.collect.ImmutableCollection$ArrayBasedBuilder.addAll(ImmutableCollection.java:502)
>         at 
> org.apache.hive.com.google.common.collect.ImmutableSet$Builder.addAll(ImmutableSet.java:520)
>         at 
> org.apache.calcite.rel.metadata.RelMdUniqueKeys.getProjectUniqueKeys(RelMdUniqueKeys.java:162)
>         at 
> org.apache.calcite.rel.metadata.RelMdUniqueKeys.getUniqueKeys(RelMdUniqueKeys.java:93)
>         at GeneratedMetadataHandler_UniqueKeys.getUniqueKeys_$(Unknown Source)
>         at GeneratedMetadataHandler_UniqueKeys.getUniqueKeys(Unknown Source)
>         at 
> org.apache.calcite.rel.metadata.RelMetadataQuery.getUniqueKeys(RelMetadataQuery.java:464)
>         at 
> org.apache.calcite.rel.metadata.RelMdUniqueKeys.getProjectUniqueKeys(RelMdUniqueKeys.java:136)
>         at 
> org.apache.calcite.rel.metadata.RelMdUniqueKeys.getUniqueKeys(RelMdUniqueKeys.java:93)
>         at GeneratedMetadataHandler_UniqueKeys.getUniqueKeys_$(Unknown Source)
>         at GeneratedMetadataHandler_UniqueKeys.getUniqueKeys(Unknown Source)
>         at 
> org.apache.calcite.rel.metadata.RelMetadataQuery.getUniqueKeys(RelMetadataQuery.java:464)
>         at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.generateNewGroupset(HiveRelFieldTrimmer.java:500)
>         at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.trimFields(HiveRelFieldTrimmer.java:641)
>         at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$VisitDispatcher$$Lambda$1113/231262971.apply(Unknown
>  Source)
>         at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$VarArgsFunc4.apply(HiveReflectUtil.java:322)
>         at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$HiveMethodDispatcher.invoke(HiveReflectUtil.java:221)
>         at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.RelFieldTrimmer.dispatchTrimFields(RelFieldTrimmer.java:286)
>         at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.trimChild(HiveRelFieldTrimmer.java:203)
>         at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.RelFieldTrimmer.trimFields(RelFieldTrimmer.java:447)
>         at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.trimFields(HiveRelFieldTrimmer.java:759)
>         at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$VisitDispatcher$$Lambda$1112/565392473.apply(Unknown
>  Source)
>         at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$VarArgsFunc4.apply(HiveReflectUtil.java:322)
>         at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$HiveMethodDispatcher.invoke(HiveReflectUtil.java:221)
>         at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.RelFieldTrimmer.dispatchTrimFields(RelFieldTrimmer.java:286)
>         at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.RelFieldTrimmer.trim(RelFieldTrimmer.java:170)
>         at 
> org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.trim(HiveRelFieldTrimmer.java:164)
>         at 
> org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.apply(CalcitePlanner.java:1674)
>         at 
> org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.apply(CalcitePlanner.java:1583)
>         at 
> org.apache.calcite.tools.Frameworks.lambda$withPlanner$0(Frameworks.java:131)
>         at 
> org.apache.calcite.tools.Frameworks$$Lambda$716/1464860003.apply(Unknown 
> Source)
>         at 
> org.apache.calcite.prepare.CalcitePrepareImpl.perform(CalcitePrepareImpl.java:914)
>         at 
> org.apache.calcite.tools.Frameworks.withPrepare(Frameworks.java:180)
>         at 
> org.apache.calcite.tools.Frameworks.withPlanner(Frameworks.java:126)
>         at 
> org.apache.hadoop.hive.ql.parse.CalcitePlanner.logicalPlan(CalcitePlanner.java:1335)
>         at 
> org.apache.hadoop.hive.ql.parse.CalcitePlanner.genOPTree(CalcitePlanner.java:583)
>         at 
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:13163)
>         at 
> org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:476)
>         at 
> org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:332)
>         at 
> org.apache.hadoop.hive.ql.parse.ExplainSemanticAnalyzer.analyzeInternal(ExplainSemanticAnalyzer.java:180)
>         at 
> org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:332)
>         at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:224)
>         at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:109)
>         at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:499)
> {noformat}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to