[ https://issues.apache.org/jira/browse/HIVE-26498?focusedWorklogId=806353&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-806353 ]
ASF GitHub Bot logged work on HIVE-26498: ----------------------------------------- Author: ASF GitHub Bot Created on: 06/Sep/22 13:04 Start Date: 06/Sep/22 13:04 Worklog Time Spent: 10m Work Description: lcspinter commented on code in PR #3552: URL: https://github.com/apache/hive/pull/3552#discussion_r963624074 ########## iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java: ########## @@ -977,4 +977,11 @@ public Configuration get() { return conf; } } + + @Override + public String getCurrentSnapshot(org.apache.hadoop.hive.ql.metadata.Table hmsTable) { Review Comment: Why cannot we use long as return type? ########## ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java: ########## @@ -489,4 +489,13 @@ default void validateSinkDesc(FileSinkDesc sinkDesc) throws SemanticException { */ default void executeOperation(org.apache.hadoop.hive.ql.metadata.Table table, AlterTableExecuteSpec executeSpec) { } + + /** + * Query the unique snapshot id of the passed table. + * @param table - {@link org.apache.hadoop.hive.ql.metadata.Table} which snapshot id should be returned. + * @return String representation of the snapshotId or null if not supported. + */ + default String getCurrentSnapshot(org.apache.hadoop.hive.ql.metadata.Table table) { Review Comment: I would change this to `getCurrentSnapshotId` ########## ql/src/java/org/apache/hadoop/hive/ql/metadata/MaterializedViewMetadata.java: ########## @@ -32,16 +36,19 @@ import static java.util.Collections.unmodifiableSet; public class MaterializedViewMetadata { + private static final Logger LOG = LoggerFactory.getLogger(MaterializedViewMetadata.class); + final CreationMetadata creationMetadata; MaterializedViewMetadata(CreationMetadata creationMetadata) { this.creationMetadata = creationMetadata; } public MaterializedViewMetadata( - String catalogName, String dbName, String mvName, Set<SourceTable> sourceTables, String validTxnList) { + String catalogName, String dbName, String mvName, Set<SourceTable> sourceTables, + MaterializationSnapshot snapshot) { this.creationMetadata = new CreationMetadata(catalogName, dbName, mvName, toFullTableNames(sourceTables)); - this.creationMetadata.setValidTxnList(validTxnList); + this.creationMetadata.setValidTxnList(snapshot.asJsonString()); Review Comment: This is not a List anymore, but rather an object. Maybe we should consider renaming it. ########## ql/src/java/org/apache/hadoop/hive/ql/metadata/MaterializedViewMetadata.java: ########## @@ -81,15 +88,20 @@ public Collection<SourceTable> getSourceTables() { return unmodifiableList(creationMetadata.getSourceTables()); } - public String getValidTxnList() { - return creationMetadata.getValidTxnList(); + public MaterializationSnapshot getSnapshot() { Review Comment: Javadoc. Please cover what happens in case of native ACID table and non-native table. ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewUtils.java: ########## @@ -98,7 +105,22 @@ public static Table extractTable(RelOptMaterialization materialization) { * materialized view definition uses external tables. */ public static Boolean isOutdatedMaterializedView( - String validTxnsList, HiveTxnManager txnMgr, + String validTxnsList, HiveTxnManager txnMgr, Hive db, + Set<TableName> tablesUsed, Table materializedViewTable) throws HiveException { + + MaterializedViewMetadata mvMetadata = materializedViewTable.getMVMetadata(); + MaterializationSnapshot snapshot = mvMetadata.getSnapshot(); + + if (snapshot.getTableSnapshots() != null && !snapshot.getTableSnapshots().isEmpty()) { Review Comment: snapshot can be null ########## iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java: ########## @@ -977,4 +977,11 @@ public Configuration get() { return conf; } } + + @Override + public String getCurrentSnapshot(org.apache.hadoop.hive.ql.metadata.Table hmsTable) { + TableDesc tableDesc = Utilities.getTableDesc(hmsTable); + Table table = IcebergTableUtil.getTable(conf, tableDesc.getProperties()); + return Long.toString(table.currentSnapshot().sequenceNumber()); Review Comment: `table.currentSnashot()` can be null, if the table is empty. ########## ql/src/java/org/apache/hadoop/hive/ql/metadata/MaterializedViewMetadata.java: ########## @@ -81,15 +88,20 @@ public Collection<SourceTable> getSourceTables() { return unmodifiableList(creationMetadata.getSourceTables()); } - public String getValidTxnList() { - return creationMetadata.getValidTxnList(); + public MaterializationSnapshot getSnapshot() { + if (creationMetadata.getValidTxnList() == null || creationMetadata.getValidTxnList().isEmpty()) { + LOG.debug("Could not obtain materialization snapshot of materialized view {}.{}", + creationMetadata.getDbName(), creationMetadata.getDbName()); Review Comment: `getDbName` is passed twice ########## storage-api/src/java/org/apache/hadoop/hive/common/MaterializationSnapshot.java: ########## @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +import java.io.IOException; +import java.io.StringWriter; +import java.io.UncheckedIOException; +import java.io.Writer; +import java.util.Map; + +/** + * Class to store snapshot data of Materialized view source tables. + * The data represents the state of the source tables when the view was created/last rebuilt. + */ +public class MaterializationSnapshot { + + public static MaterializationSnapshot fromJson(String jsonString) { + try { + return new ObjectMapper().readValue(jsonString, MaterializationSnapshot.class); + } catch (JsonProcessingException e) { + // this is not a jsonString, fall back to treating it as ValidTxnWriteIdList + return new MaterializationSnapshot(jsonString, null); + } + } + + // snapshot of native ACID tables + private String validTxnList; + // snapshot of non-native ACID tables. Key is the fully qualified name of the table. Review Comment: Materialized views are only supported for iceberg V2 tables? If not, we should change this java doc. ########## storage-api/src/java/org/apache/hadoop/hive/common/MaterializationSnapshot.java: ########## @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +import java.io.IOException; +import java.io.StringWriter; +import java.io.UncheckedIOException; +import java.io.Writer; +import java.util.Map; + +/** + * Class to store snapshot data of Materialized view source tables. + * The data represents the state of the source tables when the view was created/last rebuilt. + */ +public class MaterializationSnapshot { + + public static MaterializationSnapshot fromJson(String jsonString) { + try { + return new ObjectMapper().readValue(jsonString, MaterializationSnapshot.class); + } catch (JsonProcessingException e) { + // this is not a jsonString, fall back to treating it as ValidTxnWriteIdList + return new MaterializationSnapshot(jsonString, null); + } + } + + // snapshot of native ACID tables + private String validTxnList; + // snapshot of non-native ACID tables. Key is the fully qualified name of the table. + // Value is the unique id of the snapshot provided by the table's storage HiveStorageHandler + private Map<String, String> tableSnapshots; + + private MaterializationSnapshot() { + } + + public MaterializationSnapshot(String validTxnList, Map<String, String> tableSnapshots) { + this.validTxnList = validTxnList; + this.tableSnapshots = tableSnapshots; + } + + public String asJsonString() { Review Comment: Few lines of javadoc wouldn't hurt ########## iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_orc.q: ########## @@ -0,0 +1,35 @@ + Issue Time Tracking ------------------- Worklog Id: (was: 806353) Time Spent: 1h 10m (was: 1h) > Implement MV maintenance with Iceberg sources using full rebuild > ---------------------------------------------------------------- > > Key: HIVE-26498 > URL: https://issues.apache.org/jira/browse/HIVE-26498 > Project: Hive > Issue Type: Sub-task > Components: Materialized views > Reporter: Krisztian Kasa > Assignee: Krisztian Kasa > Priority: Major > Labels: pull-request-available > Time Spent: 1h 10m > Remaining Estimate: 0h > > {code} > set hive.support.concurrency=true; > set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; > create external table tbl_ice(a int, b string, c int) stored by iceberg > stored as orc tblproperties ('format-version'='2'); > insert into tbl_ice values (1, 'one', 50), (2, 'two', 51), (3, 'three', 52), > (4, 'four', 53), (5, 'five', 54); > create materialized view mat1 as > select b, c from tbl_ice where c > 52; > insert into tbl_ice values (111, 'one', 55), (333, 'two', 56); > explain cbo > alter materialized view mat1 rebuild; > alter materialized view mat1 rebuild; > {code} > MV full rebuild plan > {code} > CBO PLAN: > HiveProject(b=[$1], c=[$2]) > HiveFilter(condition=[>($2, 52)]) > HiveTableScan(table=[[default, tbl_ice]], table:alias=[tbl_ice]) > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010)