[ https://issues.apache.org/jira/browse/HIVE-26498?focusedWorklogId=806944&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-806944 ]
ASF GitHub Bot logged work on HIVE-26498: ----------------------------------------- Author: ASF GitHub Bot Created on: 08/Sep/22 09:27 Start Date: 08/Sep/22 09:27 Worklog Time Spent: 10m Work Description: lcspinter commented on code in PR #3552: URL: https://github.com/apache/hive/pull/3552#discussion_r965718266 ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewUtils.java: ########## @@ -160,11 +181,79 @@ public static Boolean isOutdatedMaterializedView( return false; } + private static Boolean isOutdatedMaterializedView( + MaterializationSnapshot snapshot, Hive db, + Set<TableName> tablesUsed, Table materializedViewTable) throws HiveException { + List<String> tablesUsedNames = tablesUsed.stream() + .map(tableName -> TableName.getDbTable(tableName.getDb(), tableName.getTable())) + .collect(Collectors.toList()); + + Map<String, String> snapshotMap = snapshot.getTableSnapshots(); + if (snapshotMap == null || snapshotMap.isEmpty()) { + LOG.debug("Materialized view " + materializedViewTable.getFullyQualifiedName() + + " ignored for rewriting as we could not obtain current snapshot ids"); + return null; + } + + Set<String> storedTablesUsed = materializedViewTable.getMVMetadata().getSourceTableFullNames(); + for (String fullyQualifiedTableName : tablesUsedNames) { + // Note. If the materialized view does not contain a table that is contained in the query, + // we do not need to check whether that specific table is outdated or not. If a rewriting + // is produced in those cases, it is because that additional table is joined with the + // existing tables with an append-columns only join, i.e., PK-FK + not null. + if (!storedTablesUsed.contains(fullyQualifiedTableName)) { + continue; + } + + Table table = db.getTable(fullyQualifiedTableName); + if (table.getStorageHandler() == null) { + LOG.debug("Materialized view {} ignored for rewriting as we could not storage handler of table {}", + materializedViewTable.getFullyQualifiedName(), fullyQualifiedTableName); + return null; + } + String currentTableSnapshot = table.getStorageHandler().getCurrentSnapshotId(table); + if (isBlank(currentTableSnapshot)) { Review Comment: The `currentTableSnapshot` will be never empty or null. ########## storage-api/src/java/org/apache/hadoop/hive/common/MaterializationSnapshot.java: ########## @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +import java.io.IOException; +import java.io.StringWriter; +import java.io.UncheckedIOException; +import java.io.Writer; +import java.util.Map; + +/** + * Class to store snapshot data of Materialized view source tables. + * The data represents the state of the source tables when the view was created/last rebuilt. + */ +public class MaterializationSnapshot { + + public static MaterializationSnapshot fromJson(String jsonString) { + try { + return new ObjectMapper().readValue(jsonString, MaterializationSnapshot.class); + } catch (JsonProcessingException e) { + // this is not a jsonString, fall back to treating it as ValidTxnWriteIdList + return new MaterializationSnapshot(jsonString); + } + } + + // Snapshot of native ACID tables. + private String validTxnList; + // Snapshot of non-native ACID and insert-only transactional tables. Key is the fully qualified name of the table. + // Value is the unique id of the snapshot provided by the table's storage HiveStorageHandler. + private Map<String, String> tableSnapshots; + + private MaterializationSnapshot() { + } + + public MaterializationSnapshot(String validTxnList) { + this.validTxnList = validTxnList; + this.tableSnapshots = null; + } + + public MaterializationSnapshot(Map<String, String> tableSnapshots) { + this.validTxnList = null; + this.tableSnapshots = tableSnapshots; + } + + /** + * Returns the json representation of this object. + * @return {@link String} containing a json. + */ + public String asJsonString() { + try (Writer out = new StringWriter()) { + new ObjectMapper().writeValue(out, this); + return out.toString(); + } catch (IOException e) { + throw new UncheckedIOException("Unable to convert " + this + " to json", e); + } + } + + @Override + public String toString() { + return "MaterializationSnapshot{" + + "validTxnList='" + validTxnList + '\'' + + ", tableSnapshots=" + tableSnapshots + + '}'; + } + + public String getValidTxnList() { + return validTxnList; + } + + public Map<String, String> getTableSnapshots() { Review Comment: javadoc ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewUtils.java: ########## @@ -403,4 +492,28 @@ private static RelNode copyNodeScanNewCluster(RelOptCluster optCluster, RelNode } return newScan; } + + public static MaterializationSnapshot getSnapshotOf(DDLOperationContext context, Set<TableName> tables) + throws HiveException { + Map<String, String> snapshot = getSnapshotOf(context.getDb(), tables); + if (snapshot.isEmpty()) { + return new MaterializationSnapshot(context.getConf().get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY)); + } + + return new MaterializationSnapshot(snapshot); + } + + private static Map<String, String> getSnapshotOf(Hive db, Set<TableName> tables) throws HiveException { + Map<String, String> snapshot = new HashMap<>(tables.size()); + for (TableName tableName : tables) { + Table table = db.getTable(tableName); + if (table.getStorageHandler() != null) { + String sh = table.getStorageHandler().getCurrentSnapshotId(table); + if (isNotBlank(sh)) { Review Comment: `sh` will be never empty or null. ########## storage-api/src/java/org/apache/hadoop/hive/common/MaterializationSnapshot.java: ########## @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +import java.io.IOException; +import java.io.StringWriter; +import java.io.UncheckedIOException; +import java.io.Writer; +import java.util.Map; + +/** + * Class to store snapshot data of Materialized view source tables. + * The data represents the state of the source tables when the view was created/last rebuilt. + */ +public class MaterializationSnapshot { + + public static MaterializationSnapshot fromJson(String jsonString) { + try { + return new ObjectMapper().readValue(jsonString, MaterializationSnapshot.class); + } catch (JsonProcessingException e) { + // this is not a jsonString, fall back to treating it as ValidTxnWriteIdList + return new MaterializationSnapshot(jsonString); + } + } + + // Snapshot of native ACID tables. + private String validTxnList; + // Snapshot of non-native ACID and insert-only transactional tables. Key is the fully qualified name of the table. + // Value is the unique id of the snapshot provided by the table's storage HiveStorageHandler. + private Map<String, String> tableSnapshots; + + private MaterializationSnapshot() { + } + + public MaterializationSnapshot(String validTxnList) { + this.validTxnList = validTxnList; + this.tableSnapshots = null; + } + + public MaterializationSnapshot(Map<String, String> tableSnapshots) { + this.validTxnList = null; + this.tableSnapshots = tableSnapshots; + } + + /** + * Returns the json representation of this object. + * @return {@link String} containing a json. + */ + public String asJsonString() { + try (Writer out = new StringWriter()) { + new ObjectMapper().writeValue(out, this); + return out.toString(); + } catch (IOException e) { + throw new UncheckedIOException("Unable to convert " + this + " to json", e); + } + } + + @Override + public String toString() { + return "MaterializationSnapshot{" + + "validTxnList='" + validTxnList + '\'' + + ", tableSnapshots=" + tableSnapshots + + '}'; + } + + public String getValidTxnList() { Review Comment: javadoc Issue Time Tracking ------------------- Worklog Id: (was: 806944) Time Spent: 3h 40m (was: 3.5h) > Implement MV maintenance with Iceberg sources using full rebuild > ---------------------------------------------------------------- > > Key: HIVE-26498 > URL: https://issues.apache.org/jira/browse/HIVE-26498 > Project: Hive > Issue Type: Sub-task > Components: Materialized views > Reporter: Krisztian Kasa > Assignee: Krisztian Kasa > Priority: Major > Labels: pull-request-available > Time Spent: 3h 40m > Remaining Estimate: 0h > > {code} > set hive.support.concurrency=true; > set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; > create external table tbl_ice(a int, b string, c int) stored by iceberg > stored as orc tblproperties ('format-version'='2'); > insert into tbl_ice values (1, 'one', 50), (2, 'two', 51), (3, 'three', 52), > (4, 'four', 53), (5, 'five', 54); > create materialized view mat1 as > select b, c from tbl_ice where c > 52; > insert into tbl_ice values (111, 'one', 55), (333, 'two', 56); > explain cbo > alter materialized view mat1 rebuild; > alter materialized view mat1 rebuild; > {code} > MV full rebuild plan > {code} > CBO PLAN: > HiveProject(b=[$1], c=[$2]) > HiveFilter(condition=[>($2, 52)]) > HiveTableScan(table=[[default, tbl_ice]], table:alias=[tbl_ice]) > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010)