[ 
https://issues.apache.org/jira/browse/HIVE-24733?focusedWorklogId=552505&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-552505
 ]

ASF GitHub Bot logged work on HIVE-24733:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 15/Feb/21 09:57
            Start Date: 15/Feb/21 09:57
    Worklog Time Spent: 10m 
      Work Description: aasha commented on a change in pull request #1942:
URL: https://github.com/apache/hive/pull/1942#discussion_r575957081



##########
File path: 
itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
##########
@@ -485,6 +485,7 @@ private Task getReplLoadRootTask(String sourceDb, String 
replicadb, boolean isIn
       metricCollector = new BootstrapLoadMetricCollector(replicadb, 
tuple.dumpLocation, 0,
         confTemp);
     }
+    
confTemp.setBoolVar(HiveConf.ConfVars.REPL_RETAIN_CUSTOM_LOCATIONS_FOR_DB_ON_TARGET,
 false);

Review comment:
       Add a comment for setting this to false.

##########
File path: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
##########
@@ -554,6 +554,8 @@ private static void populateLlapDaemonVarsSet(Set<String> 
llapDaemonVarsSetLocal
     REPL_RETAIN_PREV_DUMP_DIR_COUNT("hive.repl.retain.prev.dump.dir.count", 3,
             "Indicates maximium number of latest previously used 
dump-directories which would be retained when " +
              "hive.repl.retain.prev.dump.dir is set to true"),
+    
REPL_RETAIN_CUSTOM_LOCATIONS_FOR_DB_ON_TARGET("hive.repl.retain.custom.db.locations.on.target",
 true,
+            "Indicates if source database has custom warehouse locations, 
whether that should be retained on target as well"),

Review comment:
       managed table location?

##########
File path: ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadTask.java
##########
@@ -244,6 +245,7 @@ a database ( directory )
           scope.database = true;
         }
         dbTracker.debugLog("database");
+        dbEventFound = true;

Review comment:
       should this be set to true only if scope.database = true. What is 
tracked using that?

##########
File path: 
ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/LoadDatabase.java
##########
@@ -162,6 +184,10 @@ private boolean isDbEmpty(String dbName) throws 
HiveException {
     Map<String, String> parameters = new HashMap<>(dbObj.getParameters());
     parameters.remove(ReplicationSpec.KEY.CURR_STATE_ID.toString());
 
+    parameters.remove(ReplUtils.REPL_IS_CUSTOM_DB_LOC);

Review comment:
       why are these params needed?

##########
File path: 
itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExclusiveReplica.java
##########
@@ -522,6 +524,194 @@ public void externalTableReplicationDropDatabase() throws 
Throwable {
     verifyTableDataExists(replica, dbDataLocReplica, tableName, true);
   }
 
+  @Test
+  public void testCustomWarehouseLocations() throws Throwable {
+    List<String> withClauseOptions = 
getStagingLocationConfig(primary.repldDir, false);
+    String dbWhManagedLoc = new Path(primary.warehouseRoot.getParent(), 
"customManagedLoc").toUri().getPath();
+    String dbWhExternalLoc = new 
Path(primary.externalTableWarehouseRoot.getParent(),
+            "customExternalLoc").toUri().getPath();
+    String srcDb = "srcDb";
+    WarehouseInstance.Tuple tuple = primary
+            .run("create database " + srcDb + " LOCATION '" + dbWhExternalLoc 
+ "' MANAGEDLOCATION '" + dbWhManagedLoc
+                    + "' WITH DBPROPERTIES ( '" + SOURCE_OF_REPLICATION + "' = 
'1,2,3')")
+            .run("use " + srcDb)
+            .run("create table t1 (id int)")
+            .run("insert into table t1 values (500)")
+            .run("create external table t2 (id int)")
+            .run("insert into table t2 values (1000)")
+            .run("create table tp1 (id int) partitioned by (p int)")
+            .run("insert into tp1 partition(p=1) values(10)")
+            .run("insert into tp1 partition(p=2) values(20)")
+            .dump(srcDb, withClauseOptions);
+
+    replica.load(replicatedDbName, srcDb, withClauseOptions)
+            .run("use " + replicatedDbName)
+            .run("show tables like 't1'")
+            .verifyResult("t1")
+            .run("select id from t1")
+            .verifyResult("500")
+            .run("show tables like 't2'")
+            .verifyResult("t2")
+            .run("select id from t2")
+            .verifyResult("1000")
+            .run("show tables like 'tp1'")
+            .verifyResult("tp1")
+            .run("select id from tp1")
+            .verifyResults(new String[]{"10", "20"});
+    verifyCustomDBLocations(dbWhManagedLoc, dbWhExternalLoc, true);
+    primary.run("use " + srcDb)
+            .run("insert into table t1 values (1000)")
+            .run("insert into table t2 values (2000)")
+            .run("insert into tp1 partition(p=1) values(30)")
+            .run("insert into tp1 partition(p=2) values(40)")
+            .dump(srcDb, withClauseOptions);
+    replica.load(replicatedDbName, srcDb, withClauseOptions)
+            .run("use " + replicatedDbName)
+            .run("show tables like 't1'")
+            .verifyResult("t1")
+            .run("select id from t1")
+            .verifyResults(new String[]{"500", "1000"})
+            .run("show tables like 't2'")
+            .verifyResult("t2")
+            .run("select id from t2")
+            .verifyResults(new String[]{"1000", "2000"})
+            .run("show tables like 'tp1'")
+            .verifyResult("tp1")
+            .run("select id from tp1")
+            .verifyResults(new String[]{"10", "20", "30", "40"});
+    primary.run("use " + srcDb)
+            .run("insert into table t1 values (2000)")
+            .run("insert into table t2 values (3000)")
+            .run("create table t3 (id int)")
+            .run("insert into table t3 values (3000)")
+            .run("create external table t4 (id int)")
+            .run("insert into table t4 values (4000)")
+            .run("insert into tp1 partition(p=1) values(50)")
+            .run("insert into tp1 partition(p=2) values(60)")
+            .run("create table tp2 (id int) partitioned by (p int)")
+            .run("insert into tp2 partition(p=1) values(100)")
+            .run("insert into tp2 partition(p=2) values(200)")
+            .dump(srcDb, withClauseOptions);
+    replica.load(replicatedDbName, srcDb, withClauseOptions)
+            .run("use " + replicatedDbName)
+            .run("show tables like 't1'")
+            .verifyResult("t1")
+            .run("select id from t1")
+            .verifyResults(new String[]{"500", "1000", "2000"})
+            .run("show tables like 't2'")
+            .verifyResult("t2")
+            .run("select id from t2")
+            .verifyResults(new String[]{"1000", "2000", "3000"})
+            .run("show tables like 't3'")
+            .verifyResult("t3")
+            .run("select id from t3")
+            .verifyResults(new String[]{"3000"})
+            .run("show tables like 't4'")
+            .verifyResult("t4")
+            .run("select id from t4")
+            .verifyResults(new String[]{"4000"})
+            .run("select id from tp1")
+            .verifyResults(new String[]{"10", "20", "30", "40", "50", "60"})
+            .run("show tables like 'tp1'")
+            .verifyResult("tp1")
+            .run("select id from tp2")
+            .verifyResults(new String[]{"100", "200"});
+  }
+
+  @Test
+  public void testCustomWarehouseLocationsConf() throws Throwable {
+    List<String> withClauseOptions = 
getStagingLocationConfig(primary.repldDir, false);
+    String dbWhManagedLoc = new Path(primary.warehouseRoot.getParent(), 
"customManagedLoc1").toUri().getPath();
+    String dbWhExternalLoc = new 
Path(primary.externalTableWarehouseRoot.getParent(),
+            "customExternalLoc1").toUri().getPath();
+    String srcDb = "srcDbConf";
+    WarehouseInstance.Tuple tuple = primary
+            .run("create database " + srcDb + " LOCATION '" + dbWhExternalLoc 
+ "' MANAGEDLOCATION '" + dbWhManagedLoc
+                    + "' WITH DBPROPERTIES ( '" + SOURCE_OF_REPLICATION + "' = 
'1,2,3')")
+            .run("use " + srcDb)
+            .run("create table t1 (id int)")
+            .run("insert into table t1 values (500)")
+            .run("create external table t2 (id int)")
+            .run("insert into table t2 values (1000)")
+            .dump(srcDb, withClauseOptions);
+
+    withClauseOptions.add("'" + 
HiveConf.ConfVars.REPL_RETAIN_CUSTOM_LOCATIONS_FOR_DB_ON_TARGET.varname + 
"'='false'");
+    replica.load(replicatedDbName, srcDb, withClauseOptions)
+            .run("use " + replicatedDbName)
+            .run("show tables like 't1'")
+            .verifyResult("t1")
+            .run("select id from t1")
+            .verifyResult("500")
+            .run("show tables like 't2'")
+            .verifyResult("t2")
+            .run("select id from t2")
+            .verifyResult("1000");
+    verifyDefaultDBLocations(dbWhManagedLoc, dbWhExternalLoc);
+    primary.run("use " + srcDb)
+            .run("insert into table t1 values (1000)")
+            .run("insert into table t2 values (2000)")
+            .dump(srcDb, withClauseOptions);
+    replica.load(replicatedDbName, srcDb, withClauseOptions)
+            .run("use " + replicatedDbName)
+            .run("show tables like 't1'")
+            .verifyResult("t1")
+            .run("select id from t1")
+            .verifyResults(new String[]{"500", "1000"})
+            .run("show tables like 't2'")
+            .verifyResult("t2")
+            .run("select id from t2")
+            .verifyResults(new String[]{"1000", "2000"});
+    primary.run("use " + srcDb)
+            .run("insert into table t1 values (2000)")
+            .run("insert into table t2 values (3000)")
+            .run("create table t3 (id int)")
+            .run("insert into table t3 values (3000)")
+            .run("create external table t4 (id int)")
+            .run("insert into table t4 values (4000)")
+            .dump(srcDb, withClauseOptions);
+    replica.load(replicatedDbName, srcDb, withClauseOptions)
+            .run("use " + replicatedDbName)
+            .run("show tables like 't1'")
+            .verifyResult("t1")
+            .run("select id from t1")
+            .verifyResults(new String[]{"500", "1000", "2000"})
+            .run("show tables like 't2'")
+            .verifyResult("t2")
+            .run("select id from t2")
+            .verifyResults(new String[]{"1000", "2000", "3000"})
+            .run("show tables like 't3'")
+            .verifyResult("t3")
+            .run("select id from t3")
+            .verifyResults(new String[]{"3000"})
+            .run("show tables like 't4'")
+            .verifyResult("t4")
+            .run("select id from t4")
+            .verifyResults(new String[]{"4000"});
+  }
+
+  private void verifyCustomDBLocations(String managedCustLocOnSrc, String 
externalCustLocOnSrc, boolean replaceCustPath)
+          throws Exception {
+    if (replaceCustPath ) {
+      Database replDatabase  = replica.getDatabase(replicatedDbName);
+      String managedCustLocOnTgt = new 
Path(replDatabase.getManagedLocationUri()).toUri().getPath();
+      Assert.assertEquals(managedCustLocOnSrc,  managedCustLocOnTgt);
+      Assert.assertNotEquals(managedCustLocOnTgt,  
replica.warehouseRoot.toUri().getPath());
+      String externalCustLocOnTgt = new 
Path(replDatabase.getLocationUri()).toUri().getPath();
+      Assert.assertEquals(externalCustLocOnSrc,  externalCustLocOnTgt);
+      Assert.assertNotEquals(externalCustLocOnTgt,  new 
Path(replica.externalTableWarehouseRoot,
+              replicatedDbName.toLowerCase()  + ".db").toUri().getPath());
+    }
+  }
+
+  private void verifyDefaultDBLocations(String managedCustLocOnSrc, String 
externalCustLocOnSrc) throws Exception {

Review comment:
       can reuse the same method as verifyCustomDBLocations with 
replaceCustPath set to false

##########
File path: 
ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/events/filesystem/FSDatabaseEvent.java
##########
@@ -24,6 +24,7 @@
 import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.ql.exec.repl.bootstrap.events.DatabaseEvent;
 import org.apache.hadoop.hive.ql.parse.EximUtil;
+import org.apache.hadoop.hive.ql.parse.ReplicationSpec;

Review comment:
       unused import

##########
File path: 
itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExclusiveReplica.java
##########
@@ -522,6 +524,194 @@ public void externalTableReplicationDropDatabase() throws 
Throwable {
     verifyTableDataExists(replica, dbDataLocReplica, tableName, true);
   }
 
+  @Test
+  public void testCustomWarehouseLocations() throws Throwable {
+    List<String> withClauseOptions = 
getStagingLocationConfig(primary.repldDir, false);
+    String dbWhManagedLoc = new Path(primary.warehouseRoot.getParent(), 
"customManagedLoc").toUri().getPath();
+    String dbWhExternalLoc = new 
Path(primary.externalTableWarehouseRoot.getParent(),
+            "customExternalLoc").toUri().getPath();
+    String srcDb = "srcDb";
+    WarehouseInstance.Tuple tuple = primary
+            .run("create database " + srcDb + " LOCATION '" + dbWhExternalLoc 
+ "' MANAGEDLOCATION '" + dbWhManagedLoc
+                    + "' WITH DBPROPERTIES ( '" + SOURCE_OF_REPLICATION + "' = 
'1,2,3')")
+            .run("use " + srcDb)
+            .run("create table t1 (id int)")
+            .run("insert into table t1 values (500)")
+            .run("create external table t2 (id int)")
+            .run("insert into table t2 values (1000)")
+            .run("create table tp1 (id int) partitioned by (p int)")
+            .run("insert into tp1 partition(p=1) values(10)")
+            .run("insert into tp1 partition(p=2) values(20)")
+            .dump(srcDb, withClauseOptions);
+
+    replica.load(replicatedDbName, srcDb, withClauseOptions)
+            .run("use " + replicatedDbName)
+            .run("show tables like 't1'")
+            .verifyResult("t1")
+            .run("select id from t1")
+            .verifyResult("500")
+            .run("show tables like 't2'")
+            .verifyResult("t2")
+            .run("select id from t2")
+            .verifyResult("1000")
+            .run("show tables like 'tp1'")
+            .verifyResult("tp1")
+            .run("select id from tp1")
+            .verifyResults(new String[]{"10", "20"});
+    verifyCustomDBLocations(dbWhManagedLoc, dbWhExternalLoc, true);
+    primary.run("use " + srcDb)
+            .run("insert into table t1 values (1000)")
+            .run("insert into table t2 values (2000)")
+            .run("insert into tp1 partition(p=1) values(30)")
+            .run("insert into tp1 partition(p=2) values(40)")
+            .dump(srcDb, withClauseOptions);
+    replica.load(replicatedDbName, srcDb, withClauseOptions)
+            .run("use " + replicatedDbName)
+            .run("show tables like 't1'")
+            .verifyResult("t1")
+            .run("select id from t1")
+            .verifyResults(new String[]{"500", "1000"})
+            .run("show tables like 't2'")
+            .verifyResult("t2")
+            .run("select id from t2")
+            .verifyResults(new String[]{"1000", "2000"})
+            .run("show tables like 'tp1'")
+            .verifyResult("tp1")
+            .run("select id from tp1")
+            .verifyResults(new String[]{"10", "20", "30", "40"});
+    primary.run("use " + srcDb)
+            .run("insert into table t1 values (2000)")
+            .run("insert into table t2 values (3000)")
+            .run("create table t3 (id int)")
+            .run("insert into table t3 values (3000)")
+            .run("create external table t4 (id int)")
+            .run("insert into table t4 values (4000)")
+            .run("insert into tp1 partition(p=1) values(50)")
+            .run("insert into tp1 partition(p=2) values(60)")
+            .run("create table tp2 (id int) partitioned by (p int)")
+            .run("insert into tp2 partition(p=1) values(100)")
+            .run("insert into tp2 partition(p=2) values(200)")
+            .dump(srcDb, withClauseOptions);
+    replica.load(replicatedDbName, srcDb, withClauseOptions)
+            .run("use " + replicatedDbName)
+            .run("show tables like 't1'")
+            .verifyResult("t1")
+            .run("select id from t1")
+            .verifyResults(new String[]{"500", "1000", "2000"})
+            .run("show tables like 't2'")
+            .verifyResult("t2")
+            .run("select id from t2")
+            .verifyResults(new String[]{"1000", "2000", "3000"})
+            .run("show tables like 't3'")
+            .verifyResult("t3")
+            .run("select id from t3")
+            .verifyResults(new String[]{"3000"})
+            .run("show tables like 't4'")
+            .verifyResult("t4")
+            .run("select id from t4")
+            .verifyResults(new String[]{"4000"})
+            .run("select id from tp1")
+            .verifyResults(new String[]{"10", "20", "30", "40", "50", "60"})
+            .run("show tables like 'tp1'")
+            .verifyResult("tp1")
+            .run("select id from tp2")
+            .verifyResults(new String[]{"100", "200"});
+  }
+
+  @Test
+  public void testCustomWarehouseLocationsConf() throws Throwable {
+    List<String> withClauseOptions = 
getStagingLocationConfig(primary.repldDir, false);
+    String dbWhManagedLoc = new Path(primary.warehouseRoot.getParent(), 
"customManagedLoc1").toUri().getPath();
+    String dbWhExternalLoc = new 
Path(primary.externalTableWarehouseRoot.getParent(),
+            "customExternalLoc1").toUri().getPath();
+    String srcDb = "srcDbConf";
+    WarehouseInstance.Tuple tuple = primary
+            .run("create database " + srcDb + " LOCATION '" + dbWhExternalLoc 
+ "' MANAGEDLOCATION '" + dbWhManagedLoc
+                    + "' WITH DBPROPERTIES ( '" + SOURCE_OF_REPLICATION + "' = 
'1,2,3')")
+            .run("use " + srcDb)
+            .run("create table t1 (id int)")
+            .run("insert into table t1 values (500)")
+            .run("create external table t2 (id int)")
+            .run("insert into table t2 values (1000)")
+            .dump(srcDb, withClauseOptions);
+
+    withClauseOptions.add("'" + 
HiveConf.ConfVars.REPL_RETAIN_CUSTOM_LOCATIONS_FOR_DB_ON_TARGET.varname + 
"'='false'");
+    replica.load(replicatedDbName, srcDb, withClauseOptions)
+            .run("use " + replicatedDbName)
+            .run("show tables like 't1'")
+            .verifyResult("t1")
+            .run("select id from t1")
+            .verifyResult("500")
+            .run("show tables like 't2'")
+            .verifyResult("t2")
+            .run("select id from t2")
+            .verifyResult("1000");
+    verifyDefaultDBLocations(dbWhManagedLoc, dbWhExternalLoc);

Review comment:
       compare table locations also




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 552505)
    Time Spent: 20m  (was: 10m)

> Handle replication when db location and managed location is set to custom 
> location on source
> --------------------------------------------------------------------------------------------
>
>                 Key: HIVE-24733
>                 URL: https://issues.apache.org/jira/browse/HIVE-24733
>             Project: Hive
>          Issue Type: Task
>            Reporter: Pravin Sinha
>            Assignee: Pravin Sinha
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 20m
>  Remaining Estimate: 0h
>
> {color:#172b4d} {color}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to