[ https://issues.apache.org/jira/browse/HIVE-21960?focusedWorklogId=283871&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-283871 ]
ASF GitHub Bot logged work on HIVE-21960: ----------------------------------------- Author: ASF GitHub Bot Created on: 28/Jul/19 06:02 Start Date: 28/Jul/19 06:02 Worklog Time Spent: 10m Work Description: maheshk114 commented on pull request #735: HIVE-21960 : Avoid running stats updater and partition management task on a replicated table. URL: https://github.com/apache/hive/pull/735#discussion_r307966238 ########## File path: standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestPartitionManagement.java ########## @@ -563,6 +564,94 @@ public void testPartitionDiscoverySkipInvalidPath() throws TException, IOExcepti assertEquals(4, partitions.size()); } + @Test + public void testNoPartitionDiscoveryForReplTable() throws Exception { + String dbName = "db_repl1"; + String tableName = "tbl_repl1"; + Map<String, Column> colMap = buildAllColumns(); + List<String> partKeys = Lists.newArrayList("state", "dt"); + List<String> partKeyTypes = Lists.newArrayList("string", "date"); + List<List<String>> partVals = Lists.newArrayList( + Lists.newArrayList("__HIVE_DEFAULT_PARTITION__", "1990-01-01"), + Lists.newArrayList("CA", "1986-04-28"), + Lists.newArrayList("MN", "2018-11-31")); + createMetadata(DEFAULT_CATALOG_NAME, dbName, tableName, partKeys, partKeyTypes, partVals, colMap, false); + Table table = client.getTable(dbName, tableName); + List<Partition> partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(3, partitions.size()); + String tableLocation = table.getSd().getLocation(); + URI location = URI.create(tableLocation); + Path tablePath = new Path(location); + FileSystem fs = FileSystem.get(location, conf); + Path newPart1 = new Path(tablePath, "state=WA/dt=2018-12-01"); + Path newPart2 = new Path(tablePath, "state=UT/dt=2018-12-02"); + fs.mkdirs(newPart1); + fs.mkdirs(newPart2); + assertEquals(5, fs.listStatus(tablePath).length); + partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(3, partitions.size()); + + // table property is set to true, but the table is marked as replication target. The new + // partitions should not be created + table.getParameters().put(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY, "true"); + table.getParameters().put(ReplConst.REPL_TARGET_PROPERTY, "1"); + client.alter_table(dbName, tableName, table); + runPartitionManagementTask(conf); + partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(3, partitions.size()); + + // change table type to external, delete a partition directory and make sure partition discovery works + table.getParameters().put("EXTERNAL", "true"); + table.setTableType(TableType.EXTERNAL_TABLE.name()); + client.alter_table(dbName, tableName, table); + // Delete location of one of the partitions. The partition discovery task should not drop + // that partition. + boolean deleted = fs.delete((new Path(URI.create(partitions.get(0).getSd().getLocation()))).getParent(), + true); + assertTrue(deleted); + assertEquals(4, fs.listStatus(tablePath).length); + runPartitionManagementTask(conf); + partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(3, partitions.size()); + } + + @Test + public void testNoPartitionRetentionForReplTarget() throws TException, InterruptedException { + String dbName = "db_repl2"; + String tableName = "tbl_repl2"; + Map<String, Column> colMap = buildAllColumns(); + List<String> partKeys = Lists.newArrayList("state", "dt"); + List<String> partKeyTypes = Lists.newArrayList("string", "date"); + List<List<String>> partVals = Lists.newArrayList( + Lists.newArrayList("__HIVE_DEFAULT_PARTITION__", "1990-01-01"), + Lists.newArrayList("CA", "1986-04-28"), + Lists.newArrayList("MN", "2018-11-31")); + // Check for the existence of partitions 10 seconds after the partition retention period has + // elapsed. Gives enough time for the partition retention task to work. + long partitionRetentionPeriodMs = 20000; + long waitingPeriodForTest = partitionRetentionPeriodMs + 10 * 1000; + createMetadata(DEFAULT_CATALOG_NAME, dbName, tableName, partKeys, partKeyTypes, partVals, colMap, false); + Table table = client.getTable(dbName, tableName); + List<Partition> partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(3, partitions.size()); + + table.getParameters().put(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY, "true"); + table.getParameters().put(PartitionManagementTask.PARTITION_RETENTION_PERIOD_TBLPROPERTY, + partitionRetentionPeriodMs + "ms"); + table.getParameters().put(ReplConst.REPL_TARGET_PROPERTY, "1"); + client.alter_table(dbName, tableName, table); + + runPartitionManagementTask(conf); + partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(3, partitions.size()); + + // after 30s all partitions should remain in-tact for a table which is target of replication. + Thread.sleep(waitingPeriodForTest); + runPartitionManagementTask(conf); + partitions = client.listPartitions(dbName, tableName, (short) -1); Review comment: to make sure that the partition cleanup is done ..add a table with the REPL_TARGET_PROPERTY not set and check the partitions are not there. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 283871) Time Spent: 20m (was: 10m) > HMS tasks on replica > -------------------- > > Key: HIVE-21960 > URL: https://issues.apache.org/jira/browse/HIVE-21960 > Project: Hive > Issue Type: Improvement > Components: HiveServer2, repl > Affects Versions: 4.0.0 > Reporter: Ashutosh Bapat > Assignee: Ashutosh Bapat > Priority: Major > Labels: pull-request-available > Attachments: HIVE-21960.01.patch, HIVE-21960.02.patch, > HIVE-21960.03.patch, Replication and House keeping tasks.pdf > > Time Spent: 20m > Remaining Estimate: 0h > > An HMS performs a number of housekeeping tasks. Assess whether > # They are required to be performed in the replicated data > # Performing those on replicated data causes any issues and how to fix those. -- This message was sent by Atlassian JIRA (v7.6.14#76016)