[ https://issues.apache.org/jira/browse/HIVE-24274?focusedWorklogId=523057&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-523057 ]
ASF GitHub Bot logged work on HIVE-24274: ----------------------------------------- Author: ASF GitHub Bot Created on: 11/Dec/20 07:18 Start Date: 11/Dec/20 07:18 Worklog Time Spent: 10m Work Description: kasakrisz commented on a change in pull request #1706: URL: https://github.com/apache/hive/pull/1706#discussion_r540738755 ########## File path: ql/src/java/org/apache/hadoop/hive/ql/metadata/MaterializedViewsCache.java ########## @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.metadata; + +import org.apache.calcite.plan.RelOptMaterialization; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.function.BiFunction; + +import static java.util.Collections.emptyList; +import static java.util.Collections.unmodifiableList; + +/** + * Collection for storing {@link RelOptMaterialization}s. + * RelOptMaterialization can be lookup by + * - the Materialized View fully qualified name + * - query text. + * This implementation contains two {@link ConcurrentHashMap} one for name based and one for query text based lookup. + * The map contents are synchronized during each dml operation: Dml operations are performed initially on the map + * which provides name based lookup. The map which provides query text based lookup is updated by lambda expressions + * passed to {@link ConcurrentHashMap#compute(Object, BiFunction)}. + */ +public class MaterializedViewsCache { + private static final Logger LOG = LoggerFactory.getLogger(MaterializedViewsCache.class); + + // Key is the database name. Value a map from the qualified name to the view object. + private final ConcurrentMap<String, ConcurrentMap<String, RelOptMaterialization>> materializedViews = + new ConcurrentHashMap<>(); + // Map for looking up materialization by view query text + private final Map<String, List<RelOptMaterialization>> sqlToMaterializedView = new ConcurrentHashMap<>(); + + + public void putIfAbsent(Table materializedViewTable, RelOptMaterialization materialization) { + ConcurrentMap<String, RelOptMaterialization> dbMap = ensureDbMap(materializedViewTable); + + // You store the materialized view + dbMap.compute(materializedViewTable.getTableName(), (mvTableName, relOptMaterialization) -> { + List<RelOptMaterialization> materializationList = sqlToMaterializedView.computeIfAbsent( + materializedViewTable.getViewExpandedText().toLowerCase(), s -> new ArrayList<>()); + materializationList.add(materialization); + return materialization; + }); + + LOG.debug("Materialized view {}.{} added to registry", + materializedViewTable.getDbName(), materializedViewTable.getTableName()); + } + + private ConcurrentMap<String, RelOptMaterialization> ensureDbMap(Table materializedViewTable) { + // We are going to create the map for each view in the given database + ConcurrentMap<String, RelOptMaterialization> dbMap = + new ConcurrentHashMap<String, RelOptMaterialization>(); + // If we are caching the MV, we include it in the cache + final ConcurrentMap<String, RelOptMaterialization> prevDbMap = materializedViews.putIfAbsent( + materializedViewTable.getDbName(), dbMap); + if (prevDbMap != null) { + dbMap = prevDbMap; + } + return dbMap; + } + + public void refresh( + Table oldMaterializedViewTable, Table materializedViewTable, RelOptMaterialization newMaterialization) { + ConcurrentMap<String, RelOptMaterialization> dbMap = ensureDbMap(materializedViewTable); + + dbMap.compute(materializedViewTable.getTableName(), (mvTableName, existingMaterialization) -> { + List<RelOptMaterialization> optMaterializationList = sqlToMaterializedView.computeIfAbsent( + materializedViewTable.getViewExpandedText().toLowerCase(), s -> new ArrayList<>()); + + if (existingMaterialization == null) { + // If it was not existing, we just create it + optMaterializationList.add(newMaterialization); + return newMaterialization; + } + Table existingMaterializedViewTable = HiveMaterializedViewUtils.extractTable(existingMaterialization); + if (existingMaterializedViewTable.equals(oldMaterializedViewTable)) { + // If the old version is the same, we replace it + optMaterializationList.remove(existingMaterialization); + optMaterializationList.add(newMaterialization); + return newMaterialization; + } + // Otherwise, we return existing materialization + return existingMaterialization; + }); + + LOG.debug("Refreshed materialized view {}.{} -> {}.{}", + oldMaterializedViewTable.getDbName(), oldMaterializedViewTable.getTableName(), + materializedViewTable.getDbName(), materializedViewTable.getTableName()); + } + + public void remove(Table materializedViewTable) { + ConcurrentMap<String, RelOptMaterialization> dbMap = materializedViews.get(materializedViewTable.getDbName()); + if (dbMap != null) { + // Delete only if the create time for the input materialized view table and the table + // in the map match. Otherwise, keep the one in the map. + dbMap.computeIfPresent(materializedViewTable.getTableName(), (mvTableName, oldMaterialization) -> { + if (HiveMaterializedViewUtils.extractTable(oldMaterialization).equals(materializedViewTable)) { + List<RelOptMaterialization> materializationList = + sqlToMaterializedView.get(materializedViewTable.getViewExpandedText().toLowerCase()); Review comment: removed `toLoverCase()` calls. Added test which confirms that lookup is case sensitive now. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 523057) Time Spent: 2h 20m (was: 2h 10m) > Implement Query Text based MaterializedView rewrite > --------------------------------------------------- > > Key: HIVE-24274 > URL: https://issues.apache.org/jira/browse/HIVE-24274 > Project: Hive > Issue Type: Improvement > Reporter: Krisztian Kasa > Assignee: Krisztian Kasa > Priority: Major > Labels: pull-request-available > Time Spent: 2h 20m > Remaining Estimate: 0h > > Besides the way queries are currently rewritten to use materialized views in > Hive this project provides an alternative: > Compare the query text with the materialized views query text stored. If we > found a match the original query's logical plan can be replaced by a scan on > the materialized view. > - Only materialized views which are enabled to rewrite can participate > - Use existing *HiveMaterializedViewsRegistry* through *Hive* object by > adding a lookup method by query text. > - There might be more than one materialized views which have the same query > text. In this case chose the first valid one. > - Validation can be done by calling > *Hive.validateMaterializedViewsFromRegistry()* > - The scope of this first patch is rewriting queries which entire text can be > matched only. > - Use the expanded query text (fully qualified column and table names) for > comparing -- This message was sent by Atlassian Jira (v8.3.4#803005)