[ https://issues.apache.org/jira/browse/HIVE-26628?focusedWorklogId=824101&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-824101 ]
ASF GitHub Bot logged work on HIVE-26628: ----------------------------------------- Author: ASF GitHub Bot Created on: 08/Nov/22 05:10 Start Date: 08/Nov/22 05:10 Worklog Time Spent: 10m Work Description: kasakrisz commented on code in PR #3724: URL: https://github.com/apache/hive/pull/3724#discussion_r1016148487 ########## iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergQueryLifeTimeHook.java: ########## @@ -58,6 +69,11 @@ public void afterExecution(QueryLifeTimeHookContext ctx, boolean hasError) { } private void checkAndRollbackIcebergCTAS(QueryLifeTimeHookContext ctx) { + if (!(HiveOperation.CREATETABLE_AS_SELECT.getOperationName().equals( Review Comment: Yes, that is the case. Unfortunately the table is created without this patch too which is a side affect of executing an explain statement. The create is called from `HiveIcebergSerDe` init and this happening at compile time during FileSinkDesc generation. https://github.com/apache/hive/blob/63b6134b97036d6d31924ad0ec323fca2016dace/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergSerDe.java#L129 Later the Iceberg `Table` object is also required when it's properties are added to the job configs when augmenting the FileSinkOperator plan https://github.com/apache/hive/blob/63b6134b97036d6d31924ad0ec323fca2016dace/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java#L1611 https://github.com/apache/hive/blob/63b6134b97036d6d31924ad0ec323fca2016dace/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java#L796 The goal of this patch is to clean up this table after executing the statement. Issue Time Tracking ------------------- Worklog Id: (was: 824101) Time Spent: 2h 40m (was: 2.5h) > Iceberg table is created when running explain ctas command > ---------------------------------------------------------- > > Key: HIVE-26628 > URL: https://issues.apache.org/jira/browse/HIVE-26628 > Project: Hive > Issue Type: Bug > Components: StorageHandler > Reporter: Krisztian Kasa > Priority: Major > Labels: pull-request-available > Time Spent: 2h 40m > Remaining Estimate: 0h > > {code} > create table source(a int, b string, c int); > explain > create table tbl_ice stored by iceberg stored as orc tblproperties > ('format-version'='2') as > select a, b, c from source; > create table tbl_ice stored by iceberg stored as orc tblproperties > ('format-version'='2') as > select a, b, c from source; > {code} > {code} > org.apache.hadoop.hive.ql.parse.SemanticException: > org.apache.hadoop.hive.ql.parse.SemanticException: Table already exists: > default.tbl_ice > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeCreateTable(SemanticAnalyzer.java:13963) > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genResolvedParseTree(SemanticAnalyzer.java:12528) > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12693) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:460) > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:317) > at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:224) > at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:106) > at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:522) > at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:474) > at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:439) > at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:433) > at > org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:121) > at > org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:227) > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:255) > at org.apache.hadoop.hive.cli.CliDriver.processCmd1(CliDriver.java:200) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:126) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:421) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:352) > at > org.apache.hadoop.hive.ql.QTestUtil.executeClientInternal(QTestUtil.java:727) > at org.apache.hadoop.hive.ql.QTestUtil.executeClient(QTestUtil.java:697) > at > org.apache.hadoop.hive.cli.control.CoreCliDriver.runTest(CoreCliDriver.java:114) > at > org.apache.hadoop.hive.cli.control.CliAdapter.runTest(CliAdapter.java:157) > at > org.apache.hadoop.hive.cli.TestIcebergLlapLocalCliDriver.testCliDriver(TestIcebergLlapLocalCliDriver.java:60) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.apache.hadoop.hive.cli.control.CliAdapter$2$1.evaluate(CliAdapter.java:135) > at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306) > at > org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100) > at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:366) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63) > at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329) > at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293) > at org.junit.runners.ParentRunner.run(ParentRunner.java:413) > at org.junit.runners.Suite.runChild(Suite.java:128) > at org.junit.runners.Suite.runChild(Suite.java:27) > at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329) > at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293) > at > org.apache.hadoop.hive.cli.control.CliAdapter$1$1.evaluate(CliAdapter.java:95) > at org.junit.rules.RunRules.evaluate(RunRules.java:20) > at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306) > at org.junit.runners.ParentRunner.run(ParentRunner.java:413) > at > org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:365) > at > org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:273) > at > org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238) > at > org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:159) > at > org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:377) > at > org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:138) > at > org.apache.maven.surefire.booter.ForkedBooter.run(ForkedBooter.java:465) > at > org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:451) > Caused by: org.apache.hadoop.hive.ql.parse.SemanticException: Table already > exists: default.tbl_ice > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeCreateTable(SemanticAnalyzer.java:13960) > ... 61 more > {code} > The EXPLAIN ... command creates the Iceberg table default.tbl_ice hence the > ctas command executed after it fails with table already exists. -- This message was sent by Atlassian Jira (v8.20.10#820010)