[ https://issues.apache.org/jira/browse/HIVE-22977?focusedWorklogId=841377&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-841377 ]
ASF GitHub Bot logged work on HIVE-22977: ----------------------------------------- Author: ASF GitHub Bot Created on: 24/Jan/23 13:22 Start Date: 24/Jan/23 13:22 Worklog Time Spent: 10m Work Description: SourabhBadhya commented on code in PR #3801: URL: https://github.com/apache/hive/pull/3801#discussion_r1085312464 ########## ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorPipeline.java: ########## @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.txn.compactor; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import java.io.IOException; + +/** + * Runs different compactions based on the order in which compactors are added.<br> + * Mainly used for fall back mechanism for Merge compaction. + */ +public class CompactorPipeline { + + private Compactor compactor; + private final boolean isMR; + + public CompactorPipeline(Compactor compactor) { + this.compactor = compactor; + this.isMR = compactor instanceof MRCompactor; + } + + CompactorPipeline addCompactor(Compactor newCompactor) { + compactor = new FallbackCompactor(compactor, newCompactor); + return this; + } + + public boolean isMRCompaction() { + return isMR; + } + + public Boolean execute(CompactorContext input) throws IOException, HiveException, InterruptedException { + return compactor.run(input); + } +} + +/** + * This class defines a way of handling fallback given any number of fallback compactors.<br> + * It can encapsulate other fallback compactors within itself. + */ +class FallbackCompactor implements Compactor { Review Comment: Marked it as `static final` since it is used for testing fallback. Also moved it as an inner class to CompactorPipeline. ########## ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MergeCompactor.java: ########## @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.txn.compactor; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.ValidWriteIdList; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.txn.CompactionInfo; +import org.apache.hadoop.hive.ql.io.AcidDirectory; +import org.apache.hadoop.hive.ql.io.AcidOutputFormat; +import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.io.orc.OrcFile; +import org.apache.hadoop.hive.ql.io.orc.Reader; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Optional; +import java.util.regex.Matcher; + +final class MergeCompactor extends QueryCompactor { + + private static final Logger LOG = LoggerFactory.getLogger(MergeCompactor.class.getName()); + + @Override + public boolean run(CompactorContext context) throws IOException, HiveException, InterruptedException { + HiveConf hiveConf = context.getConf(); + Table table = context.getTable(); + AcidDirectory dir = context.getAcidDirectory(); + ValidWriteIdList writeIds = context.getValidWriteIdList(); + StorageDescriptor storageDescriptor = context.getSd(); + CompactionInfo compactionInfo = context.getCompactionInfo(); + if (isMergeCompaction(hiveConf, dir, writeIds, storageDescriptor)) { Review Comment: Done. Issue Time Tracking ------------------- Worklog Id: (was: 841377) Time Spent: 9h (was: 8h 50m) > Merge delta files instead of running a query in major/minor compaction > ---------------------------------------------------------------------- > > Key: HIVE-22977 > URL: https://issues.apache.org/jira/browse/HIVE-22977 > Project: Hive > Issue Type: Improvement > Reporter: László Pintér > Assignee: Sourabh Badhya > Priority: Major > Labels: pull-request-available > Attachments: HIVE-22977.01.patch, HIVE-22977.02.patch > > Time Spent: 9h > Remaining Estimate: 0h > > [Compaction Optimiziation] > We should analyse the possibility to move a delta file instead of running a > major/minor compaction query. > Please consider the following use cases: > - full acid table but only insert queries were run. This means that no > delete delta directories were created. Is it possible to merge the delta > directory contents without running a compaction query? > - full acid table, initiating queries through the streaming API. If there > are no abort transactions during the streaming, is it possible to merge the > delta directory contents without running a compaction query? -- This message was sent by Atlassian Jira (v8.20.10#820010)