[ https://issues.apache.org/jira/browse/HIVE-24733?focusedWorklogId=552505&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-552505 ]
ASF GitHub Bot logged work on HIVE-24733: ----------------------------------------- Author: ASF GitHub Bot Created on: 15/Feb/21 09:57 Start Date: 15/Feb/21 09:57 Worklog Time Spent: 10m Work Description: aasha commented on a change in pull request #1942: URL: https://github.com/apache/hive/pull/1942#discussion_r575957081 ########## File path: itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java ########## @@ -485,6 +485,7 @@ private Task getReplLoadRootTask(String sourceDb, String replicadb, boolean isIn metricCollector = new BootstrapLoadMetricCollector(replicadb, tuple.dumpLocation, 0, confTemp); } + confTemp.setBoolVar(HiveConf.ConfVars.REPL_RETAIN_CUSTOM_LOCATIONS_FOR_DB_ON_TARGET, false); Review comment: Add a comment for setting this to false. ########## File path: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ########## @@ -554,6 +554,8 @@ private static void populateLlapDaemonVarsSet(Set<String> llapDaemonVarsSetLocal REPL_RETAIN_PREV_DUMP_DIR_COUNT("hive.repl.retain.prev.dump.dir.count", 3, "Indicates maximium number of latest previously used dump-directories which would be retained when " + "hive.repl.retain.prev.dump.dir is set to true"), + REPL_RETAIN_CUSTOM_LOCATIONS_FOR_DB_ON_TARGET("hive.repl.retain.custom.db.locations.on.target", true, + "Indicates if source database has custom warehouse locations, whether that should be retained on target as well"), Review comment: managed table location? ########## File path: ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadTask.java ########## @@ -244,6 +245,7 @@ a database ( directory ) scope.database = true; } dbTracker.debugLog("database"); + dbEventFound = true; Review comment: should this be set to true only if scope.database = true. What is tracked using that? ########## File path: ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/LoadDatabase.java ########## @@ -162,6 +184,10 @@ private boolean isDbEmpty(String dbName) throws HiveException { Map<String, String> parameters = new HashMap<>(dbObj.getParameters()); parameters.remove(ReplicationSpec.KEY.CURR_STATE_ID.toString()); + parameters.remove(ReplUtils.REPL_IS_CUSTOM_DB_LOC); Review comment: why are these params needed? ########## File path: itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExclusiveReplica.java ########## @@ -522,6 +524,194 @@ public void externalTableReplicationDropDatabase() throws Throwable { verifyTableDataExists(replica, dbDataLocReplica, tableName, true); } + @Test + public void testCustomWarehouseLocations() throws Throwable { + List<String> withClauseOptions = getStagingLocationConfig(primary.repldDir, false); + String dbWhManagedLoc = new Path(primary.warehouseRoot.getParent(), "customManagedLoc").toUri().getPath(); + String dbWhExternalLoc = new Path(primary.externalTableWarehouseRoot.getParent(), + "customExternalLoc").toUri().getPath(); + String srcDb = "srcDb"; + WarehouseInstance.Tuple tuple = primary + .run("create database " + srcDb + " LOCATION '" + dbWhExternalLoc + "' MANAGEDLOCATION '" + dbWhManagedLoc + + "' WITH DBPROPERTIES ( '" + SOURCE_OF_REPLICATION + "' = '1,2,3')") + .run("use " + srcDb) + .run("create table t1 (id int)") + .run("insert into table t1 values (500)") + .run("create external table t2 (id int)") + .run("insert into table t2 values (1000)") + .run("create table tp1 (id int) partitioned by (p int)") + .run("insert into tp1 partition(p=1) values(10)") + .run("insert into tp1 partition(p=2) values(20)") + .dump(srcDb, withClauseOptions); + + replica.load(replicatedDbName, srcDb, withClauseOptions) + .run("use " + replicatedDbName) + .run("show tables like 't1'") + .verifyResult("t1") + .run("select id from t1") + .verifyResult("500") + .run("show tables like 't2'") + .verifyResult("t2") + .run("select id from t2") + .verifyResult("1000") + .run("show tables like 'tp1'") + .verifyResult("tp1") + .run("select id from tp1") + .verifyResults(new String[]{"10", "20"}); + verifyCustomDBLocations(dbWhManagedLoc, dbWhExternalLoc, true); + primary.run("use " + srcDb) + .run("insert into table t1 values (1000)") + .run("insert into table t2 values (2000)") + .run("insert into tp1 partition(p=1) values(30)") + .run("insert into tp1 partition(p=2) values(40)") + .dump(srcDb, withClauseOptions); + replica.load(replicatedDbName, srcDb, withClauseOptions) + .run("use " + replicatedDbName) + .run("show tables like 't1'") + .verifyResult("t1") + .run("select id from t1") + .verifyResults(new String[]{"500", "1000"}) + .run("show tables like 't2'") + .verifyResult("t2") + .run("select id from t2") + .verifyResults(new String[]{"1000", "2000"}) + .run("show tables like 'tp1'") + .verifyResult("tp1") + .run("select id from tp1") + .verifyResults(new String[]{"10", "20", "30", "40"}); + primary.run("use " + srcDb) + .run("insert into table t1 values (2000)") + .run("insert into table t2 values (3000)") + .run("create table t3 (id int)") + .run("insert into table t3 values (3000)") + .run("create external table t4 (id int)") + .run("insert into table t4 values (4000)") + .run("insert into tp1 partition(p=1) values(50)") + .run("insert into tp1 partition(p=2) values(60)") + .run("create table tp2 (id int) partitioned by (p int)") + .run("insert into tp2 partition(p=1) values(100)") + .run("insert into tp2 partition(p=2) values(200)") + .dump(srcDb, withClauseOptions); + replica.load(replicatedDbName, srcDb, withClauseOptions) + .run("use " + replicatedDbName) + .run("show tables like 't1'") + .verifyResult("t1") + .run("select id from t1") + .verifyResults(new String[]{"500", "1000", "2000"}) + .run("show tables like 't2'") + .verifyResult("t2") + .run("select id from t2") + .verifyResults(new String[]{"1000", "2000", "3000"}) + .run("show tables like 't3'") + .verifyResult("t3") + .run("select id from t3") + .verifyResults(new String[]{"3000"}) + .run("show tables like 't4'") + .verifyResult("t4") + .run("select id from t4") + .verifyResults(new String[]{"4000"}) + .run("select id from tp1") + .verifyResults(new String[]{"10", "20", "30", "40", "50", "60"}) + .run("show tables like 'tp1'") + .verifyResult("tp1") + .run("select id from tp2") + .verifyResults(new String[]{"100", "200"}); + } + + @Test + public void testCustomWarehouseLocationsConf() throws Throwable { + List<String> withClauseOptions = getStagingLocationConfig(primary.repldDir, false); + String dbWhManagedLoc = new Path(primary.warehouseRoot.getParent(), "customManagedLoc1").toUri().getPath(); + String dbWhExternalLoc = new Path(primary.externalTableWarehouseRoot.getParent(), + "customExternalLoc1").toUri().getPath(); + String srcDb = "srcDbConf"; + WarehouseInstance.Tuple tuple = primary + .run("create database " + srcDb + " LOCATION '" + dbWhExternalLoc + "' MANAGEDLOCATION '" + dbWhManagedLoc + + "' WITH DBPROPERTIES ( '" + SOURCE_OF_REPLICATION + "' = '1,2,3')") + .run("use " + srcDb) + .run("create table t1 (id int)") + .run("insert into table t1 values (500)") + .run("create external table t2 (id int)") + .run("insert into table t2 values (1000)") + .dump(srcDb, withClauseOptions); + + withClauseOptions.add("'" + HiveConf.ConfVars.REPL_RETAIN_CUSTOM_LOCATIONS_FOR_DB_ON_TARGET.varname + "'='false'"); + replica.load(replicatedDbName, srcDb, withClauseOptions) + .run("use " + replicatedDbName) + .run("show tables like 't1'") + .verifyResult("t1") + .run("select id from t1") + .verifyResult("500") + .run("show tables like 't2'") + .verifyResult("t2") + .run("select id from t2") + .verifyResult("1000"); + verifyDefaultDBLocations(dbWhManagedLoc, dbWhExternalLoc); + primary.run("use " + srcDb) + .run("insert into table t1 values (1000)") + .run("insert into table t2 values (2000)") + .dump(srcDb, withClauseOptions); + replica.load(replicatedDbName, srcDb, withClauseOptions) + .run("use " + replicatedDbName) + .run("show tables like 't1'") + .verifyResult("t1") + .run("select id from t1") + .verifyResults(new String[]{"500", "1000"}) + .run("show tables like 't2'") + .verifyResult("t2") + .run("select id from t2") + .verifyResults(new String[]{"1000", "2000"}); + primary.run("use " + srcDb) + .run("insert into table t1 values (2000)") + .run("insert into table t2 values (3000)") + .run("create table t3 (id int)") + .run("insert into table t3 values (3000)") + .run("create external table t4 (id int)") + .run("insert into table t4 values (4000)") + .dump(srcDb, withClauseOptions); + replica.load(replicatedDbName, srcDb, withClauseOptions) + .run("use " + replicatedDbName) + .run("show tables like 't1'") + .verifyResult("t1") + .run("select id from t1") + .verifyResults(new String[]{"500", "1000", "2000"}) + .run("show tables like 't2'") + .verifyResult("t2") + .run("select id from t2") + .verifyResults(new String[]{"1000", "2000", "3000"}) + .run("show tables like 't3'") + .verifyResult("t3") + .run("select id from t3") + .verifyResults(new String[]{"3000"}) + .run("show tables like 't4'") + .verifyResult("t4") + .run("select id from t4") + .verifyResults(new String[]{"4000"}); + } + + private void verifyCustomDBLocations(String managedCustLocOnSrc, String externalCustLocOnSrc, boolean replaceCustPath) + throws Exception { + if (replaceCustPath ) { + Database replDatabase = replica.getDatabase(replicatedDbName); + String managedCustLocOnTgt = new Path(replDatabase.getManagedLocationUri()).toUri().getPath(); + Assert.assertEquals(managedCustLocOnSrc, managedCustLocOnTgt); + Assert.assertNotEquals(managedCustLocOnTgt, replica.warehouseRoot.toUri().getPath()); + String externalCustLocOnTgt = new Path(replDatabase.getLocationUri()).toUri().getPath(); + Assert.assertEquals(externalCustLocOnSrc, externalCustLocOnTgt); + Assert.assertNotEquals(externalCustLocOnTgt, new Path(replica.externalTableWarehouseRoot, + replicatedDbName.toLowerCase() + ".db").toUri().getPath()); + } + } + + private void verifyDefaultDBLocations(String managedCustLocOnSrc, String externalCustLocOnSrc) throws Exception { Review comment: can reuse the same method as verifyCustomDBLocations with replaceCustPath set to false ########## File path: ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/events/filesystem/FSDatabaseEvent.java ########## @@ -24,6 +24,7 @@ import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.ql.exec.repl.bootstrap.events.DatabaseEvent; import org.apache.hadoop.hive.ql.parse.EximUtil; +import org.apache.hadoop.hive.ql.parse.ReplicationSpec; Review comment: unused import ########## File path: itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExclusiveReplica.java ########## @@ -522,6 +524,194 @@ public void externalTableReplicationDropDatabase() throws Throwable { verifyTableDataExists(replica, dbDataLocReplica, tableName, true); } + @Test + public void testCustomWarehouseLocations() throws Throwable { + List<String> withClauseOptions = getStagingLocationConfig(primary.repldDir, false); + String dbWhManagedLoc = new Path(primary.warehouseRoot.getParent(), "customManagedLoc").toUri().getPath(); + String dbWhExternalLoc = new Path(primary.externalTableWarehouseRoot.getParent(), + "customExternalLoc").toUri().getPath(); + String srcDb = "srcDb"; + WarehouseInstance.Tuple tuple = primary + .run("create database " + srcDb + " LOCATION '" + dbWhExternalLoc + "' MANAGEDLOCATION '" + dbWhManagedLoc + + "' WITH DBPROPERTIES ( '" + SOURCE_OF_REPLICATION + "' = '1,2,3')") + .run("use " + srcDb) + .run("create table t1 (id int)") + .run("insert into table t1 values (500)") + .run("create external table t2 (id int)") + .run("insert into table t2 values (1000)") + .run("create table tp1 (id int) partitioned by (p int)") + .run("insert into tp1 partition(p=1) values(10)") + .run("insert into tp1 partition(p=2) values(20)") + .dump(srcDb, withClauseOptions); + + replica.load(replicatedDbName, srcDb, withClauseOptions) + .run("use " + replicatedDbName) + .run("show tables like 't1'") + .verifyResult("t1") + .run("select id from t1") + .verifyResult("500") + .run("show tables like 't2'") + .verifyResult("t2") + .run("select id from t2") + .verifyResult("1000") + .run("show tables like 'tp1'") + .verifyResult("tp1") + .run("select id from tp1") + .verifyResults(new String[]{"10", "20"}); + verifyCustomDBLocations(dbWhManagedLoc, dbWhExternalLoc, true); + primary.run("use " + srcDb) + .run("insert into table t1 values (1000)") + .run("insert into table t2 values (2000)") + .run("insert into tp1 partition(p=1) values(30)") + .run("insert into tp1 partition(p=2) values(40)") + .dump(srcDb, withClauseOptions); + replica.load(replicatedDbName, srcDb, withClauseOptions) + .run("use " + replicatedDbName) + .run("show tables like 't1'") + .verifyResult("t1") + .run("select id from t1") + .verifyResults(new String[]{"500", "1000"}) + .run("show tables like 't2'") + .verifyResult("t2") + .run("select id from t2") + .verifyResults(new String[]{"1000", "2000"}) + .run("show tables like 'tp1'") + .verifyResult("tp1") + .run("select id from tp1") + .verifyResults(new String[]{"10", "20", "30", "40"}); + primary.run("use " + srcDb) + .run("insert into table t1 values (2000)") + .run("insert into table t2 values (3000)") + .run("create table t3 (id int)") + .run("insert into table t3 values (3000)") + .run("create external table t4 (id int)") + .run("insert into table t4 values (4000)") + .run("insert into tp1 partition(p=1) values(50)") + .run("insert into tp1 partition(p=2) values(60)") + .run("create table tp2 (id int) partitioned by (p int)") + .run("insert into tp2 partition(p=1) values(100)") + .run("insert into tp2 partition(p=2) values(200)") + .dump(srcDb, withClauseOptions); + replica.load(replicatedDbName, srcDb, withClauseOptions) + .run("use " + replicatedDbName) + .run("show tables like 't1'") + .verifyResult("t1") + .run("select id from t1") + .verifyResults(new String[]{"500", "1000", "2000"}) + .run("show tables like 't2'") + .verifyResult("t2") + .run("select id from t2") + .verifyResults(new String[]{"1000", "2000", "3000"}) + .run("show tables like 't3'") + .verifyResult("t3") + .run("select id from t3") + .verifyResults(new String[]{"3000"}) + .run("show tables like 't4'") + .verifyResult("t4") + .run("select id from t4") + .verifyResults(new String[]{"4000"}) + .run("select id from tp1") + .verifyResults(new String[]{"10", "20", "30", "40", "50", "60"}) + .run("show tables like 'tp1'") + .verifyResult("tp1") + .run("select id from tp2") + .verifyResults(new String[]{"100", "200"}); + } + + @Test + public void testCustomWarehouseLocationsConf() throws Throwable { + List<String> withClauseOptions = getStagingLocationConfig(primary.repldDir, false); + String dbWhManagedLoc = new Path(primary.warehouseRoot.getParent(), "customManagedLoc1").toUri().getPath(); + String dbWhExternalLoc = new Path(primary.externalTableWarehouseRoot.getParent(), + "customExternalLoc1").toUri().getPath(); + String srcDb = "srcDbConf"; + WarehouseInstance.Tuple tuple = primary + .run("create database " + srcDb + " LOCATION '" + dbWhExternalLoc + "' MANAGEDLOCATION '" + dbWhManagedLoc + + "' WITH DBPROPERTIES ( '" + SOURCE_OF_REPLICATION + "' = '1,2,3')") + .run("use " + srcDb) + .run("create table t1 (id int)") + .run("insert into table t1 values (500)") + .run("create external table t2 (id int)") + .run("insert into table t2 values (1000)") + .dump(srcDb, withClauseOptions); + + withClauseOptions.add("'" + HiveConf.ConfVars.REPL_RETAIN_CUSTOM_LOCATIONS_FOR_DB_ON_TARGET.varname + "'='false'"); + replica.load(replicatedDbName, srcDb, withClauseOptions) + .run("use " + replicatedDbName) + .run("show tables like 't1'") + .verifyResult("t1") + .run("select id from t1") + .verifyResult("500") + .run("show tables like 't2'") + .verifyResult("t2") + .run("select id from t2") + .verifyResult("1000"); + verifyDefaultDBLocations(dbWhManagedLoc, dbWhExternalLoc); Review comment: compare table locations also ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 552505) Time Spent: 20m (was: 10m) > Handle replication when db location and managed location is set to custom > location on source > -------------------------------------------------------------------------------------------- > > Key: HIVE-24733 > URL: https://issues.apache.org/jira/browse/HIVE-24733 > Project: Hive > Issue Type: Task > Reporter: Pravin Sinha > Assignee: Pravin Sinha > Priority: Major > Labels: pull-request-available > Time Spent: 20m > Remaining Estimate: 0h > > {color:#172b4d} {color} -- This message was sent by Atlassian Jira (v8.3.4#803005)