[ https://issues.apache.org/jira/browse/HIVE-28582?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Stamatis Zampetakis resolved HIVE-28582. ---------------------------------------- Fix Version/s: 4.1.0 Resolution: Fixed Fixed in https://github.com/apache/hive/commit/5ffcdb716581861f415964d46546ab092e551f45 Thanks for the review [~krisztiankasa] ! > OOM when compiling query with many GROUP BY columns aliased multiple times > -------------------------------------------------------------------------- > > Key: HIVE-28582 > URL: https://issues.apache.org/jira/browse/HIVE-28582 > Project: Hive > Issue Type: Bug > Components: CBO, HiveServer2 > Affects Versions: 4.0.1 > Reporter: Stamatis Zampetakis > Assignee: Stamatis Zampetakis > Priority: Major > Labels: pull-request-available > Fix For: 4.1.0 > > Attachments: hive_28582.q > > > {code:sql} > CREATE TABLE t ( > c1 string, > c2 string, > c3 string, > c4 string, > c5 string, > c6 string, > c7 string, > c8 string, > c9 string > ); > EXPLAIN CBO > SELECT a0 > FROM > (SELECT c1 as a0, > c1 as a1, > c1 as a2, > c2 as a3, > c2 as a4, > c2 as a5, > c3 as a6, > c3 as a7, > c3 as a8, > c4 as a9, > c4 as a10, > c4 as a11, > c5 as a12, > c5 as a13, > c5 as a14, > c6 as a15, > c6 as a16, > c6 as a17, > c7 as a18, > c7 as a19, > c7 as a20, > c8 as a21, > c8 as a22, > c8 as a23, > c9 as a24, > c9 as a25, > c9 as a26 > FROM t GROUP BY c1,c2,c3,c4,c5,c6,c7,c8,c9) t1 > GROUP BY a0, a4 > {code} > The query above spends a lot of time in compilation and eventually leads to > an OutOfMemoryError causing Hiveserver2 to crash. > Note that each column in the inner query appears in the GROUP BY clause and > is aliased three times in the SELECT clause. The multiple aliases of the same > column as well as the fact that the column appears in the GROUP BY are > necessary conditions to trigger the problem. > The stacktraces show that the query spends the entire time in field trimming > (HiveRelFieldTrimmer) while trying to obtain metadata information about the > unique keys. > {noformat} > "fdc1b4d2-11d6-4b5c-a665-c321ee5f7e22 main" #1 prio=5 os_prio=0 > tid=0x00007f2e2800b800 nid=0x132ff runnable [0x00007f2e2e7fb000] > java.lang.Thread.State: RUNNABLE > at > org.apache.calcite.util.ImmutableBitSet$Builder.addAll(ImmutableBitSet.java:1086) > at > org.apache.calcite.util.ImmutableBitSet.union(ImmutableBitSet.java:691) > at > org.apache.calcite.rel.metadata.RelMdUniqueKeys$$Lambda$1124/1526659719.apply(Unknown > Source) > at > org.apache.hive.com.google.common.collect.Iterators$5.transform(Iterators.java:757) > at > org.apache.hive.com.google.common.collect.TransformedIterator.next(TransformedIterator.java:48) > at > org.apache.hive.com.google.common.collect.ImmutableCollection$Builder.addAll(ImmutableCollection.java:418) > at > org.apache.hive.com.google.common.collect.ImmutableCollection$ArrayBasedBuilder.addAll(ImmutableCollection.java:502) > at > org.apache.hive.com.google.common.collect.ImmutableSet$Builder.addAll(ImmutableSet.java:520) > at > org.apache.calcite.rel.metadata.RelMdUniqueKeys.getProjectUniqueKeys(RelMdUniqueKeys.java:162) > at > org.apache.calcite.rel.metadata.RelMdUniqueKeys.getUniqueKeys(RelMdUniqueKeys.java:93) > at GeneratedMetadataHandler_UniqueKeys.getUniqueKeys_$(Unknown Source) > at GeneratedMetadataHandler_UniqueKeys.getUniqueKeys(Unknown Source) > at > org.apache.calcite.rel.metadata.RelMetadataQuery.getUniqueKeys(RelMetadataQuery.java:464) > at > org.apache.calcite.rel.metadata.RelMdUniqueKeys.getProjectUniqueKeys(RelMdUniqueKeys.java:136) > at > org.apache.calcite.rel.metadata.RelMdUniqueKeys.getUniqueKeys(RelMdUniqueKeys.java:93) > at GeneratedMetadataHandler_UniqueKeys.getUniqueKeys_$(Unknown Source) > at GeneratedMetadataHandler_UniqueKeys.getUniqueKeys(Unknown Source) > at > org.apache.calcite.rel.metadata.RelMetadataQuery.getUniqueKeys(RelMetadataQuery.java:464) > at > org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.generateNewGroupset(HiveRelFieldTrimmer.java:500) > at > org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.trimFields(HiveRelFieldTrimmer.java:641) > at > org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$VisitDispatcher$$Lambda$1113/231262971.apply(Unknown > Source) > at > org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$VarArgsFunc4.apply(HiveReflectUtil.java:322) > at > org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$HiveMethodDispatcher.invoke(HiveReflectUtil.java:221) > at > org.apache.hadoop.hive.ql.optimizer.calcite.rules.RelFieldTrimmer.dispatchTrimFields(RelFieldTrimmer.java:286) > at > org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.trimChild(HiveRelFieldTrimmer.java:203) > at > org.apache.hadoop.hive.ql.optimizer.calcite.rules.RelFieldTrimmer.trimFields(RelFieldTrimmer.java:447) > at > org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.trimFields(HiveRelFieldTrimmer.java:759) > at > org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$VisitDispatcher$$Lambda$1112/565392473.apply(Unknown > Source) > at > org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$VarArgsFunc4.apply(HiveReflectUtil.java:322) > at > org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReflectUtil$HiveMethodDispatcher.invoke(HiveReflectUtil.java:221) > at > org.apache.hadoop.hive.ql.optimizer.calcite.rules.RelFieldTrimmer.dispatchTrimFields(RelFieldTrimmer.java:286) > at > org.apache.hadoop.hive.ql.optimizer.calcite.rules.RelFieldTrimmer.trim(RelFieldTrimmer.java:170) > at > org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.trim(HiveRelFieldTrimmer.java:164) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.apply(CalcitePlanner.java:1674) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.apply(CalcitePlanner.java:1583) > at > org.apache.calcite.tools.Frameworks.lambda$withPlanner$0(Frameworks.java:131) > at > org.apache.calcite.tools.Frameworks$$Lambda$716/1464860003.apply(Unknown > Source) > at > org.apache.calcite.prepare.CalcitePrepareImpl.perform(CalcitePrepareImpl.java:914) > at > org.apache.calcite.tools.Frameworks.withPrepare(Frameworks.java:180) > at > org.apache.calcite.tools.Frameworks.withPlanner(Frameworks.java:126) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner.logicalPlan(CalcitePlanner.java:1335) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner.genOPTree(CalcitePlanner.java:583) > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:13163) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:476) > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:332) > at > org.apache.hadoop.hive.ql.parse.ExplainSemanticAnalyzer.analyzeInternal(ExplainSemanticAnalyzer.java:180) > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:332) > at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:224) > at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:109) > at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:499) > {noformat} -- This message was sent by Atlassian Jira (v8.20.10#820010)