anmolnar commented on code in PR #7222:
URL: https://github.com/apache/hbase/pull/7222#discussion_r2285675769
##########
hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java:
##########
@@ -190,6 +191,15 @@ private void handleContinuousBackup(Admin admin) throws
IOException {
// set overall backup status: complete. Here we make sure to complete the
backup.
// After this checkpoint, even if entering cancel process, will let the
backup finished
backupInfo.setState(BackupState.COMPLETE);
+
+ if (!conf.getBoolean(REPLICATION_BULKLOAD_ENABLE_KEY, false)) {
+ System.out.println("WARNING: Bulkload replication is not enabled. "
+ + "Since continuous backup is using HBase replication, bulk loaded
files won't be backup up as part of continuous backup. "
Review Comment:
typo: "backed up"
##########
hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/replication/ContinuousBackupReplicationEndpoint.java:
##########
@@ -372,15 +394,149 @@ private void close() {
}
}
+ @RestrictedApi(
+ explanation = "Package-private for test visibility only. Do not use
outside tests.",
+ link = "",
+ allowedOnPath =
"(.*/src/test/.*|.*/org/apache/hadoop/hbase/backup/replication/ContinuousBackupReplicationEndpoint.java)")
+ void uploadBulkLoadFiles(long dayInMillis, List<Path> bulkLoadFiles)
+ throws BulkLoadUploadException {
+ if (bulkLoadFiles.isEmpty()) {
+ LOG.debug("{} No bulk load files to upload for {}",
Utils.logPeerId(peerId), dayInMillis);
+ return;
+ }
+
+ LOG.debug("{} Starting upload of {} bulk load files",
Utils.logPeerId(peerId),
+ bulkLoadFiles.size());
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("{} Bulk load files to upload: {}", Utils.logPeerId(peerId),
+
bulkLoadFiles.stream().map(Path::toString).collect(Collectors.joining(", ")));
+ }
+ String dayDirectoryName = formatToDateString(dayInMillis);
+ Path bulkloadDir = new Path(backupFileSystemManager.getBulkLoadFilesDir(),
dayDirectoryName);
+ try {
+ backupFileSystemManager.getBackupFs().mkdirs(bulkloadDir);
+ } catch (IOException e) {
+ throw new BulkLoadUploadException(
+ String.format("%s Failed to create bulkload directory in backupFS: %s",
+ Utils.logPeerId(peerId), bulkloadDir),
+ e);
+ }
+
+ for (Path file : bulkLoadFiles) {
+ Path sourcePath;
+ try {
+ sourcePath = getBulkLoadFileStagingPath(file);
+ } catch (FileNotFoundException fnfe) {
+ throw new BulkLoadUploadException(
+ String.format("%s Bulk load file not found: %s",
Utils.logPeerId(peerId), file), fnfe);
+ } catch (IOException ioe) {
+ throw new BulkLoadUploadException(
+ String.format("%s Failed to resolve source path for: %s",
Utils.logPeerId(peerId), file),
+ ioe);
+ }
+
+ Path destPath = new Path(bulkloadDir, file);
+
+ try {
+ LOG.debug("{} Copying bulk load file from {} to {}",
Utils.logPeerId(peerId), sourcePath,
+ destPath);
+
+ copyWithCleanup(CommonFSUtils.getRootDirFileSystem(conf), sourcePath,
+ backupFileSystemManager.getBackupFs(), destPath, conf);
+
+ LOG.info("{} Bulk load file {} successfully backed up to {}",
Utils.logPeerId(peerId), file,
+ destPath);
+ } catch (IOException e) {
+ throw new BulkLoadUploadException(
+ String.format("%s Failed to copy bulk load file %s to %s on day %s",
+ Utils.logPeerId(peerId), file, destPath,
formatToDateString(dayInMillis)),
+ e);
+ }
+ }
+
+ LOG.debug("{} Completed upload of bulk load files",
Utils.logPeerId(peerId));
+ }
+
+ /**
+ * Copy a file with cleanup logic in case of failure. Always overwrite
destination to avoid
+ * leaving corrupt partial files.
+ */
+ @RestrictedApi(
+ explanation = "Package-private for test visibility only. Do not use
outside tests.",
+ link = "",
+ allowedOnPath =
"(.*/src/test/.*|.*/org/apache/hadoop/hbase/backup/replication/ContinuousBackupReplicationEndpoint.java)")
+ static void copyWithCleanup(FileSystem srcFS, Path src, FileSystem dstFS,
Path dst,
+ Configuration conf) throws IOException {
+ try {
+ if (dstFS.exists(dst)) {
Review Comment:
If the file exists and the size (or hash) equals to the file that you're
going to upload, you can skip the entire upload process.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]