[ 
https://issues.apache.org/jira/browse/HIVE-24852?focusedWorklogId=592942&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-592942
 ]

ASF GitHub Bot logged work on HIVE-24852:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 04/May/21 18:54
            Start Date: 04/May/21 18:54
    Worklog Time Spent: 10m 
      Work Description: ayushtkn commented on a change in pull request #2043:
URL: https://github.com/apache/hive/pull/2043#discussion_r626026659



##########
File path: ql/src/java/org/apache/hadoop/hive/ql/exec/repl/DirCopyTask.java
##########
@@ -218,4 +219,62 @@ public String getName() {
   public boolean canExecuteInParallel() {
     return true;
   }
+
+  boolean copyUsingDistCpSnapshots(Path sourcePath, Path targetPath, 
UserGroupInformation proxyUser) throws IOException {
+
+    DistributedFileSystem targetFs = SnapshotUtils.getDFS(targetPath, conf);
+    boolean result = false;
+    if 
(getWork().getCopyMode().equals(SnapshotUtils.SnapshotCopyMode.DIFF_COPY)) {
+      LOG.info("Using snapshot diff copy for source: {} and target: {}", 
sourcePath, targetPath);
+       result = FileUtils
+          .distCpWithSnapshot(firstSnapshot(work.getSnapshotPrefix()), 
secondSnapshot(work.getSnapshotPrefix()),
+              Collections.singletonList(sourcePath), targetPath, proxyUser,
+              conf, ShimLoader.getHadoopShims());
+       if(result) {
+         // Delete the older snapshot from last iteration.
+         targetFs.deleteSnapshot(targetPath, 
firstSnapshot(work.getSnapshotPrefix()));
+       } else {
+         throw new IOException(
+             "Can not successfully copy external table data using snapshot 
diff. source:" + sourcePath + " and target: "
+                 + targetPath);
+       }
+    } else if 
(getWork().getCopyMode().equals(SnapshotUtils.SnapshotCopyMode.INITIAL_COPY)) {
+      LOG.info("Using snapshot initial copy for source: {} and target: {}", 
sourcePath, targetPath);
+      // Get the path relative to the initial snapshot for copy.
+      Path snapRelPath =
+          new Path(sourcePath, HdfsConstants.DOT_SNAPSHOT_DIR + "/" + 
secondSnapshot(work.getSnapshotPrefix()));
+
+      // This is the first time we are copying, check if the target is 
snapshottable or not, if not attempt to allow
+      // snapshots.
+      SnapshotUtils.allowSnapshot(targetFs, targetPath, conf);
+      // Attempt to delete the snapshot, in case this is a bootstrap post a 
failed incremental, Since in case of
+      // bootstrap we go from start, so delete any pre-existing snapshot.
+      SnapshotUtils.deleteSnapshotSafe(targetFs, targetPath, 
firstSnapshot(work.getSnapshotPrefix()));
+
+      // Copy from the initial snapshot path.
+      result = runFallbackDistCp(snapRelPath, targetPath, proxyUser);
+    }
+
+    // Create a new snapshot at target Filesystem. For the next iteration.
+    if (result) {
+      SnapshotUtils.createSnapshot(targetFs, targetPath, 
firstSnapshot(work.getSnapshotPrefix()), conf);
+    }
+    return result;
+  }
+
+  private boolean runFallbackDistCp(Path sourcePath, Path targetPath, 
UserGroupInformation proxyUser)
+      throws IOException {
+     // do we create a new conf and only here provide this additional option 
so that we get away from

Review comment:
       This was there already, Showing up again due to refactor




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 592942)
    Time Spent: 4h  (was: 3h 50m)

> Add support for Snapshots during external table replication
> -----------------------------------------------------------
>
>                 Key: HIVE-24852
>                 URL: https://issues.apache.org/jira/browse/HIVE-24852
>             Project: Hive
>          Issue Type: Improvement
>            Reporter: Ayush Saxena
>            Assignee: Ayush Saxena
>            Priority: Critical
>              Labels: pull-request-available
>         Attachments: Design Doc HDFS Snapshots for External Table 
> Replication-01.pdf
>
>          Time Spent: 4h
>  Remaining Estimate: 0h
>
> Add support for use of snapshot diff for external table replication.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to