[ https://issues.apache.org/jira/browse/HIVE-27658?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Stamatis Zampetakis reassigned HIVE-27658: ------------------------------------------ Assignee: Stamatis Zampetakis > Error resolving join keys during conversion to dynamic partition hashjoin > ------------------------------------------------------------------------- > > Key: HIVE-27658 > URL: https://issues.apache.org/jira/browse/HIVE-27658 > Project: Hive > Issue Type: Bug > Components: Query Planning > Affects Versions: 3.1.3, 4.0.0-beta-1 > Reporter: xiaojunxiang > Assignee: Stamatis Zampetakis > Priority: Major > Attachments: hive27658-query-plan.pdf, hive27658.q > > > In certain cases the compilation of queries fail during the conversion to a > dynamic partition hash join with the stacktrace similar to the one shown > below. > {noformat} > 2023-08-31T10:22:21,738 WARN [HiveServer2-Handler-Pool: Thread-100]: > thrift.ThriftCLIService (()) - Error executing statement: > org.apache.hive.service.cli.HiveSQLException: Error while compiling > statement: FAILED: SemanticException Error resolving join keys > at > org.apache.hive.service.cli.operation.Operation.toSQLException(Operation.java:335) > ~[hive-service-100.jar:?] > at > org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:199) > ~[hive-service-100.jar:?] > at > org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:260) > ~[hive-service-100.jar:?] > at > org.apache.hive.service.cli.operation.Operation.run(Operation.java:247) > ~[hive-service-100.jar:?] > at > org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:541) > ~[hive-service-100.jar:?] > at > org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:527) > ~[hive-service-100.jar:?] > at > org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:312) > ~[hive-service-100.jar:?] > at > org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:562) > ~[hive-service-100.jar:?] > at > org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1557) > ~[hive-exec-100.jar:?] > at > org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1542) > ~[hive-exec-100.jar:?] > at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39) > ~[hive-exec-100.jar:?] > at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39) > ~[hive-exec-100.jar:?] > at > org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge$Server$TUGIAssumingProcessor.process(HadoopThriftAuthBridge.java:647) > ~[hive-exec-100.jar:?] > at > org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286) > ~[hive-exec-100.jar:?] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > ~[?:1.8.0_312] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > ~[?:1.8.0_312] > at java.lang.Thread.run(Thread.java:748) [?:1.8.0_312] > Caused by: org.apache.hadoop.hive.ql.parse.SemanticException: Error resolving > join keys > at > org.apache.hadoop.hive.ql.optimizer.MapJoinProcessor.getMapJoinDesc(MapJoinProcessor.java:1105) > ~[hive-exec-100.jar:?] > at > org.apache.hadoop.hive.ql.optimizer.MapJoinProcessor.convertJoinOpMapJoinOp(MapJoinProcessor.java:372) > ~[hive-exec-100.jar:?] > at > org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin.convertJoinMapJoin(ConvertJoinMapJoin.java:1056) > ~[hive-exec-100.jar:?] > at > org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin.convertJoinDynamicPartitionedHashJoin(ConvertJoinMapJoin.java:1280) > ~[hive-exec-100.jar:?] > at > org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin.fallbackToReduceSideJoin(ConvertJoinMapJoin.java:1312) > ~[hive-exec-100.jar:?] > at > org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin.checkAndConvertSMBJoin(ConvertJoinMapJoin.java:371) > ~[hive-exec-100.jar:?] > at > org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin.process(ConvertJoinMapJoin.java:151) > ~[hive-exec-100.jar:?] > at > org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90) > ~[hive-exec-100.jar:?] > at > org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105) > ~[hive-exec-100.jar:?] > at > org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89) > ~[hive-exec-100.jar:?] > at > org.apache.hadoop.hive.ql.lib.ForwardWalker.walk(ForwardWalker.java:74) > ~[hive-exec-100.jar:?] > at > org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:120) > ~[hive-exec-100.jar:?] > at > org.apache.hadoop.hive.ql.parse.TezCompiler.runStatsDependentOptimizations(TezCompiler.java:447) > ~[hive-exec-100.jar:?] > at > org.apache.hadoop.hive.ql.parse.TezCompiler.optimizeOperatorPlan(TezCompiler.java:160) > ~[hive-exec-100.jar:?] > at > org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:144) > ~[hive-exec-100.jar:?] > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12320) > ~[hive-exec-100.jar:?] > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:330) > ~[hive-exec-100.jar:?] > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:285) > ~[hive-exec-100.jar:?] > at > org.apache.hadoop.hive.ql.parse.ExplainSemanticAnalyzer.analyzeInternal(ExplainSemanticAnalyzer.java:164) > ~[hive-exec-100.jar:?] > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:285) > ~[hive-exec-100.jar:?] > at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:659) > ~[hive-exec-100.jar:?] > at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1826) > ~[hive-exec-100.jar:?] > at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1773) > ~[hive-exec-100.jar:?] > at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1768) > ~[hive-exec-100.jar:?] > at > org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:126) > ~[hive-exec-100.jar:?] > at > org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:197) > ~[hive-service-100.jar:?] > ... 15 more > 2023-08-31T10:22:33,838 INFO > [org.apache.ranger.audit.queue.AuditBatchQueue0]: provider.BaseAuditHandler > (()) > {noformat} > The problem was originally reported for a query with a LEFT SEMI JOIN and the > scenario is outlined below. > {code:sql} > create database test_condition; > use test_condition; > create external table to_szyy_user_right_issue_log_df(flow_no_ string, > activity_code_ string, right_id_ string, user_id_ string,issue_flag_ string) > partitioned by (ds string) > STORED AS parquet TBLPROPERTIES('parquet.compress'='SNAPPY'); > create external table to_t0111_s62t1_cst_prft_df(dccp_stcd > string,dccp_ordr_ar_id string) partitioned by (ds string) > STORED AS parquet TBLPROPERTIES('parquet.compress'='SNAPPY'); > alter table to_szyy_user_right_issue_log_df add partition(ds='2023-08-24'); > alter table to_t0111_s62t1_cst_prft_df add partition(ds='2023-08-24'); > alter table to_szyy_user_right_issue_log_df partition(ds='2023-08-24') update > statistics set('numRows'='8146725','rawDataSize'='46331126445'); > alter table to_t0111_s62t1_cst_prft_df partition(ds='2023-08-24') update > statistics set('numRows'='15680439','rawDataSize'='56180088521'); > set hive.auto.convert.join.noconditionaltask.size=8153960755 > set hive.auto.convert.join=true; > set hive.optimize.dynamic.partition.hashjoin=true; > set hive.stats.fetch.column.stats=false; > set hive.cbo.enable=true; > explain > select flow_no_, activity_code_, right_id_, user_id_ > from test_condition.to_szyy_user_right_issue_log_df rlog > left semi join test_condition.to_t0111_s62t1_cst_prft_df prft on prft.ds = > '2023-08-24' and rlog.flow_no_ = prft.dccp_ordr_ar_id > group by flow_no_, activity_code_, right_id_, user_id_; > {code} > The {{SemanticException}} reported above is thrown by the [dynamic partition > hashjoin transformation > logic|https://github.com/apache/hive/blob/9b4ea7affa4902fc2849f1a88b68103940fc9866/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java#L1590] > of so a workaround consists in disabling the respective optimization via the > {{hive.optimize.dynamic.partition.hashjoin}} property. -- This message was sent by Atlassian Jira (v8.20.10#820010)