[
https://issues.apache.org/jira/browse/HIVE-26628?focusedWorklogId=825229&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-825229
]
ASF GitHub Bot logged work on HIVE-26628:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 11/Nov/22 10:17
Start Date: 11/Nov/22 10:17
Worklog Time Spent: 10m
Work Description: deniskuzZ commented on code in PR #3745:
URL: https://github.com/apache/hive/pull/3745#discussion_r1020081943
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java:
##########
@@ -804,31 +806,59 @@ public static Schema schema(Configuration config) {
@VisibleForTesting
static void overlayTableProperties(Configuration configuration, TableDesc
tableDesc, Map<String, String> map) {
Properties props = tableDesc.getProperties();
- Table table = IcebergTableUtil.getTable(configuration, props);
- String schemaJson = SchemaParser.toJson(table.schema());
Maps.fromProperties(props).entrySet().stream()
.filter(entry -> !map.containsKey(entry.getKey())) // map overrides
tableDesc properties
.forEach(entry -> map.put(entry.getKey(), entry.getValue()));
- map.put(InputFormatConfig.TABLE_IDENTIFIER,
props.getProperty(Catalogs.NAME));
- map.put(InputFormatConfig.TABLE_LOCATION, table.location());
- map.put(InputFormatConfig.TABLE_SCHEMA, schemaJson);
- props.put(InputFormatConfig.PARTITION_SPEC,
PartitionSpecParser.toJson(table.spec()));
-
- // serialize table object into config
- Table serializableTable = SerializableTable.copyOf(table);
- checkAndSkipIoConfigSerialization(configuration, serializableTable);
- map.put(InputFormatConfig.SERIALIZED_TABLE_PREFIX +
tableDesc.getTableName(),
- SerializationUtil.serializeToBase64(serializableTable));
+ try {
+ Table table = IcebergTableUtil.getTable(configuration, props);
+ String schemaJson = SchemaParser.toJson(table.schema());
+
+ map.put(InputFormatConfig.TABLE_IDENTIFIER,
props.getProperty(Catalogs.NAME));
+ map.put(InputFormatConfig.TABLE_LOCATION, table.location());
+ map.put(InputFormatConfig.TABLE_SCHEMA, schemaJson);
+ props.put(InputFormatConfig.PARTITION_SPEC,
PartitionSpecParser.toJson(table.spec()));
+
+ // serialize table object into config
+ Table serializableTable = SerializableTable.copyOf(table);
+ checkAndSkipIoConfigSerialization(configuration, serializableTable);
+ map.put(InputFormatConfig.SERIALIZED_TABLE_PREFIX +
tableDesc.getTableName(),
+ SerializationUtil.serializeToBase64(serializableTable));
+
+ // We need to remove this otherwise the job.xml will be invalid as
column comments are separated with '\0' and
+ // the serialization utils fail to serialize this character
+ map.remove("columns.comments");
+
+ // save schema into table props as well to avoid repeatedly hitting the
HMS during serde initializations
+ // this is an exception to the interface documentation, but it's a safe
operation to add this property
+ props.put(InputFormatConfig.TABLE_SCHEMA, schemaJson);
+ } catch (NoSuchTableException ex) {
+ if
(!(StringUtils.isNotBlank(props.getProperty(hive_metastoreConstants.TABLE_IS_CTAS))
&&
+
Boolean.parseBoolean(props.getProperty(org.apache.hadoop.hive.conf.Constants.IS_EXPLAIN))))
{
+ throw ex;
+ }
- // We need to remove this otherwise the job.xml will be invalid as column
comments are separated with '\0' and
- // the serialization utils fail to serialize this character
- map.remove("columns.comments");
+ try {
+ map.put(InputFormatConfig.TABLE_IDENTIFIER,
props.getProperty(Catalogs.NAME));
+ map.put(InputFormatConfig.SERIALIZED_TABLE_PREFIX +
tableDesc.getTableName(),
+ SerializationUtil.serializeToBase64(null));
- // save schema into table props as well to avoid repeatedly hitting the
HMS during serde initializations
- // this is an exception to the interface documentation, but it's a safe
operation to add this property
- props.put(InputFormatConfig.TABLE_SCHEMA, schemaJson);
+ String location = map.get(hive_metastoreConstants.META_TABLE_LOCATION);
+ if (StringUtils.isBlank(location)) {
+ location = props.getProperty(hive_metastoreConstants.TABLE_IS_CTAS);
+ }
+ map.put(InputFormatConfig.TABLE_LOCATION, location);
+
+ AbstractSerDe serDe = tableDesc.getDeserializer(configuration);
+ HiveIcebergSerDe icebergSerDe = (HiveIcebergSerDe) serDe;
+ String schemaJson = SchemaParser.toJson(icebergSerDe.getTableSchema());
+ map.put(InputFormatConfig.TABLE_SCHEMA, schemaJson);
+ props.put(InputFormatConfig.TABLE_SCHEMA, schemaJson);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
Review Comment:
should we throw MetaException?
Issue Time Tracking
-------------------
Worklog Id: (was: 825229)
Time Spent: 4h 20m (was: 4h 10m)
> Iceberg table is created when running explain ctas command
> ----------------------------------------------------------
>
> Key: HIVE-26628
> URL: https://issues.apache.org/jira/browse/HIVE-26628
> Project: Hive
> Issue Type: Bug
> Components: StorageHandler
> Reporter: Krisztian Kasa
> Priority: Major
> Labels: pull-request-available
> Time Spent: 4h 20m
> Remaining Estimate: 0h
>
> {code}
> create table source(a int, b string, c int);
> explain
> create table tbl_ice stored by iceberg stored as orc tblproperties
> ('format-version'='2') as
> select a, b, c from source;
> create table tbl_ice stored by iceberg stored as orc tblproperties
> ('format-version'='2') as
> select a, b, c from source;
> {code}
> {code}
> org.apache.hadoop.hive.ql.parse.SemanticException:
> org.apache.hadoop.hive.ql.parse.SemanticException: Table already exists:
> default.tbl_ice
> at
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeCreateTable(SemanticAnalyzer.java:13963)
> at
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genResolvedParseTree(SemanticAnalyzer.java:12528)
> at
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12693)
> at
> org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:460)
> at
> org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:317)
> at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:224)
> at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:106)
> at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:522)
> at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:474)
> at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:439)
> at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:433)
> at
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:121)
> at
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:227)
> at
> org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:255)
> at org.apache.hadoop.hive.cli.CliDriver.processCmd1(CliDriver.java:200)
> at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:126)
> at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:421)
> at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:352)
> at
> org.apache.hadoop.hive.ql.QTestUtil.executeClientInternal(QTestUtil.java:727)
> at org.apache.hadoop.hive.ql.QTestUtil.executeClient(QTestUtil.java:697)
> at
> org.apache.hadoop.hive.cli.control.CoreCliDriver.runTest(CoreCliDriver.java:114)
> at
> org.apache.hadoop.hive.cli.control.CliAdapter.runTest(CliAdapter.java:157)
> at
> org.apache.hadoop.hive.cli.TestIcebergLlapLocalCliDriver.testCliDriver(TestIcebergLlapLocalCliDriver.java:60)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:498)
> at
> org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59)
> at
> org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
> at
> org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56)
> at
> org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
> at
> org.apache.hadoop.hive.cli.control.CliAdapter$2$1.evaluate(CliAdapter.java:135)
> at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306)
> at
> org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100)
> at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:366)
> at
> org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103)
> at
> org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63)
> at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331)
> at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79)
> at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329)
> at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66)
> at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293)
> at org.junit.runners.ParentRunner.run(ParentRunner.java:413)
> at org.junit.runners.Suite.runChild(Suite.java:128)
> at org.junit.runners.Suite.runChild(Suite.java:27)
> at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331)
> at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79)
> at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329)
> at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66)
> at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293)
> at
> org.apache.hadoop.hive.cli.control.CliAdapter$1$1.evaluate(CliAdapter.java:95)
> at org.junit.rules.RunRules.evaluate(RunRules.java:20)
> at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306)
> at org.junit.runners.ParentRunner.run(ParentRunner.java:413)
> at
> org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:365)
> at
> org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:273)
> at
> org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238)
> at
> org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:159)
> at
> org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:377)
> at
> org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:138)
> at
> org.apache.maven.surefire.booter.ForkedBooter.run(ForkedBooter.java:465)
> at
> org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:451)
> Caused by: org.apache.hadoop.hive.ql.parse.SemanticException: Table already
> exists: default.tbl_ice
> at
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeCreateTable(SemanticAnalyzer.java:13960)
> ... 61 more
> {code}
> The EXPLAIN ... command creates the Iceberg table default.tbl_ice hence the
> ctas command executed after it fails with table already exists.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)