[ 
https://issues.apache.org/jira/browse/HIVE-26498?focusedWorklogId=806944&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-806944
 ]

ASF GitHub Bot logged work on HIVE-26498:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 08/Sep/22 09:27
            Start Date: 08/Sep/22 09:27
    Worklog Time Spent: 10m 
      Work Description: lcspinter commented on code in PR #3552:
URL: https://github.com/apache/hive/pull/3552#discussion_r965718266


##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewUtils.java:
##########
@@ -160,11 +181,79 @@ public static Boolean isOutdatedMaterializedView(
     return false;
   }
 
+  private static Boolean isOutdatedMaterializedView(
+          MaterializationSnapshot snapshot, Hive db,
+          Set<TableName> tablesUsed, Table materializedViewTable) throws 
HiveException {
+    List<String> tablesUsedNames = tablesUsed.stream()
+        .map(tableName -> TableName.getDbTable(tableName.getDb(), 
tableName.getTable()))
+        .collect(Collectors.toList());
+
+    Map<String, String> snapshotMap = snapshot.getTableSnapshots();
+    if (snapshotMap == null || snapshotMap.isEmpty()) {
+      LOG.debug("Materialized view " + 
materializedViewTable.getFullyQualifiedName() +
+              " ignored for rewriting as we could not obtain current snapshot 
ids");
+      return null;
+    }
+
+    Set<String> storedTablesUsed = 
materializedViewTable.getMVMetadata().getSourceTableFullNames();
+    for (String fullyQualifiedTableName : tablesUsedNames) {
+      // Note. If the materialized view does not contain a table that is 
contained in the query,
+      // we do not need to check whether that specific table is outdated or 
not. If a rewriting
+      // is produced in those cases, it is because that additional table is 
joined with the
+      // existing tables with an append-columns only join, i.e., PK-FK + not 
null.
+      if (!storedTablesUsed.contains(fullyQualifiedTableName)) {
+        continue;
+      }
+
+      Table table = db.getTable(fullyQualifiedTableName);
+      if (table.getStorageHandler() == null) {
+        LOG.debug("Materialized view {} ignored for rewriting as we could not 
storage handler of table {}",
+                materializedViewTable.getFullyQualifiedName(), 
fullyQualifiedTableName);
+        return null;
+      }
+      String currentTableSnapshot = 
table.getStorageHandler().getCurrentSnapshotId(table);
+      if (isBlank(currentTableSnapshot)) {

Review Comment:
   The `currentTableSnapshot` will be never empty or null. 



##########
storage-api/src/java/org/apache/hadoop/hive/common/MaterializationSnapshot.java:
##########
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import java.io.IOException;
+import java.io.StringWriter;
+import java.io.UncheckedIOException;
+import java.io.Writer;
+import java.util.Map;
+
+/**
+ * Class to store snapshot data of Materialized view source tables.
+ * The data represents the state of the source tables when the view was 
created/last rebuilt.
+ */
+public class MaterializationSnapshot {
+
+  public static MaterializationSnapshot fromJson(String jsonString) {
+    try {
+      return new ObjectMapper().readValue(jsonString, 
MaterializationSnapshot.class);
+    } catch (JsonProcessingException e) {
+      // this is not a jsonString, fall back to treating it as 
ValidTxnWriteIdList
+      return new MaterializationSnapshot(jsonString);
+    }
+  }
+
+  // Snapshot of native ACID tables.
+  private String validTxnList;
+  // Snapshot of non-native ACID and insert-only transactional tables. Key is 
the fully qualified name of the table.
+  // Value is the unique id of the snapshot provided by the table's storage 
HiveStorageHandler.
+  private Map<String, String> tableSnapshots;
+
+  private MaterializationSnapshot() {
+  }
+
+  public MaterializationSnapshot(String validTxnList) {
+    this.validTxnList = validTxnList;
+    this.tableSnapshots = null;
+  }
+
+  public MaterializationSnapshot(Map<String, String> tableSnapshots) {
+    this.validTxnList = null;
+    this.tableSnapshots = tableSnapshots;
+  }
+
+  /**
+   * Returns the json representation of this object.
+   * @return {@link String} containing a json.
+   */
+  public String asJsonString() {
+    try (Writer out = new StringWriter()) {
+      new ObjectMapper().writeValue(out, this);
+      return out.toString();
+    } catch (IOException e) {
+      throw new UncheckedIOException("Unable to convert " + this + " to json", 
e);
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "MaterializationSnapshot{" +
+            "validTxnList='" + validTxnList + '\'' +
+            ", tableSnapshots=" + tableSnapshots +
+            '}';
+  }
+
+  public String getValidTxnList() {
+    return validTxnList;
+  }
+
+  public Map<String, String> getTableSnapshots() {

Review Comment:
   javadoc



##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewUtils.java:
##########
@@ -403,4 +492,28 @@ private static RelNode 
copyNodeScanNewCluster(RelOptCluster optCluster, RelNode
     }
     return newScan;
   }
+
+  public static MaterializationSnapshot getSnapshotOf(DDLOperationContext 
context, Set<TableName> tables)
+          throws HiveException {
+    Map<String, String> snapshot = getSnapshotOf(context.getDb(), tables);
+    if (snapshot.isEmpty()) {
+      return new 
MaterializationSnapshot(context.getConf().get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY));
+    }
+
+    return new MaterializationSnapshot(snapshot);
+  }
+
+  private static Map<String, String> getSnapshotOf(Hive db, Set<TableName> 
tables) throws HiveException {
+    Map<String, String> snapshot = new HashMap<>(tables.size());
+    for (TableName tableName : tables) {
+      Table table = db.getTable(tableName);
+      if (table.getStorageHandler() != null) {
+        String sh = table.getStorageHandler().getCurrentSnapshotId(table);
+        if (isNotBlank(sh)) {

Review Comment:
   `sh` will be never empty or null.



##########
storage-api/src/java/org/apache/hadoop/hive/common/MaterializationSnapshot.java:
##########
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import java.io.IOException;
+import java.io.StringWriter;
+import java.io.UncheckedIOException;
+import java.io.Writer;
+import java.util.Map;
+
+/**
+ * Class to store snapshot data of Materialized view source tables.
+ * The data represents the state of the source tables when the view was 
created/last rebuilt.
+ */
+public class MaterializationSnapshot {
+
+  public static MaterializationSnapshot fromJson(String jsonString) {
+    try {
+      return new ObjectMapper().readValue(jsonString, 
MaterializationSnapshot.class);
+    } catch (JsonProcessingException e) {
+      // this is not a jsonString, fall back to treating it as 
ValidTxnWriteIdList
+      return new MaterializationSnapshot(jsonString);
+    }
+  }
+
+  // Snapshot of native ACID tables.
+  private String validTxnList;
+  // Snapshot of non-native ACID and insert-only transactional tables. Key is 
the fully qualified name of the table.
+  // Value is the unique id of the snapshot provided by the table's storage 
HiveStorageHandler.
+  private Map<String, String> tableSnapshots;
+
+  private MaterializationSnapshot() {
+  }
+
+  public MaterializationSnapshot(String validTxnList) {
+    this.validTxnList = validTxnList;
+    this.tableSnapshots = null;
+  }
+
+  public MaterializationSnapshot(Map<String, String> tableSnapshots) {
+    this.validTxnList = null;
+    this.tableSnapshots = tableSnapshots;
+  }
+
+  /**
+   * Returns the json representation of this object.
+   * @return {@link String} containing a json.
+   */
+  public String asJsonString() {
+    try (Writer out = new StringWriter()) {
+      new ObjectMapper().writeValue(out, this);
+      return out.toString();
+    } catch (IOException e) {
+      throw new UncheckedIOException("Unable to convert " + this + " to json", 
e);
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "MaterializationSnapshot{" +
+            "validTxnList='" + validTxnList + '\'' +
+            ", tableSnapshots=" + tableSnapshots +
+            '}';
+  }
+
+  public String getValidTxnList() {

Review Comment:
   javadoc





Issue Time Tracking
-------------------

    Worklog Id:     (was: 806944)
    Time Spent: 3h 40m  (was: 3.5h)

> Implement MV maintenance with Iceberg sources using full rebuild
> ----------------------------------------------------------------
>
>                 Key: HIVE-26498
>                 URL: https://issues.apache.org/jira/browse/HIVE-26498
>             Project: Hive
>          Issue Type: Sub-task
>          Components: Materialized views
>            Reporter: Krisztian Kasa
>            Assignee: Krisztian Kasa
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 3h 40m
>  Remaining Estimate: 0h
>
> {code}
> set hive.support.concurrency=true;
> set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
> create external table tbl_ice(a int, b string, c int) stored by iceberg 
> stored as orc tblproperties ('format-version'='2');
> insert into tbl_ice values (1, 'one', 50), (2, 'two', 51), (3, 'three', 52), 
> (4, 'four', 53), (5, 'five', 54);
> create materialized view mat1 as
> select b, c from tbl_ice where c > 52;
> insert into tbl_ice values (111, 'one', 55), (333, 'two', 56);
> explain cbo
> alter materialized view mat1 rebuild;
> alter materialized view mat1 rebuild;
> {code}
> MV full rebuild plan
> {code}
> CBO PLAN:
> HiveProject(b=[$1], c=[$2])
>   HiveFilter(condition=[>($2, 52)])
>     HiveTableScan(table=[[default, tbl_ice]], table:alias=[tbl_ice])
> {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to