w41ter commented on code in PR #49233:
URL: https://github.com/apache/doris/pull/49233#discussion_r2004967314


##########
be/src/runtime/snapshot_loader.cpp:
##########
@@ -426,61 +433,81 @@ Status SnapshotHttpDownloader::_link_same_rowset_files() {
         remote_rowset_metas.insert({rowset_meta.rowset_id_v2(), rowset_meta});
     }
 
-    for (const auto& local_rowset_meta : local_tablet_meta.rs_metas()) {
-        if (local_rowset_meta.has_resource_id() || 
!local_rowset_meta.has_source_rowset_id()) {
+    std::unordered_map<std::string, const RowsetMetaPB&> local_rowset_metas;
+    for (const auto& rowset_meta : local_tablet_meta.rs_metas()) {
+        if (rowset_meta.has_resource_id()) { // skip local rowset

Review Comment:
   ```suggestion
           if (rowset_meta.has_resource_id()) { // skip remote rowset
   ```



##########
be/src/runtime/snapshot_loader.cpp:
##########
@@ -426,61 +433,81 @@ Status SnapshotHttpDownloader::_link_same_rowset_files() {
         remote_rowset_metas.insert({rowset_meta.rowset_id_v2(), rowset_meta});
     }
 
-    for (const auto& local_rowset_meta : local_tablet_meta.rs_metas()) {
-        if (local_rowset_meta.has_resource_id() || 
!local_rowset_meta.has_source_rowset_id()) {
+    std::unordered_map<std::string, const RowsetMetaPB&> local_rowset_metas;
+    for (const auto& rowset_meta : local_tablet_meta.rs_metas()) {
+        if (rowset_meta.has_resource_id()) { // skip local rowset
+            continue;
+        }
+        local_rowset_metas.insert({rowset_meta.rowset_id_v2(), rowset_meta});
+    }
+
+    
RETURN_IF_ERROR(_link_rowset_if_exists_from_origin_to_target<LocalFileStat>(
+            local_tablet_meta, remote_rowset_metas, _local_files, 
_local_path));
+    
RETURN_IF_ERROR(_link_rowset_if_exists_from_origin_to_target<RemoteFileStat>(
+            remote_tablet_meta, local_rowset_metas, _remote_files, 
_remote_path));
+    return Status::OK();

Review Comment:
   This function aims to link the local files as downloaded remote files.
   
   The old logic: link if the local file is a remote file copy.  => 
local_rowset.source_rowset_id() == remote_rowset.rowset_id()
   link $local_file to $remote_file
   
   Now, we need to add a new logic: link if the remote file is a local file 
copy.  => remote_rowset.source_rowset_id() == local_rowset.rowset_id().
   link $local_file to $remote_file
   
   The only different thing is the condition, and the link progress are the 
same. So this change is wrong.



##########
be/src/runtime/snapshot_loader.cpp:
##########
@@ -426,61 +433,81 @@ Status SnapshotHttpDownloader::_link_same_rowset_files() {
         remote_rowset_metas.insert({rowset_meta.rowset_id_v2(), rowset_meta});
     }
 
-    for (const auto& local_rowset_meta : local_tablet_meta.rs_metas()) {
-        if (local_rowset_meta.has_resource_id() || 
!local_rowset_meta.has_source_rowset_id()) {
+    std::unordered_map<std::string, const RowsetMetaPB&> local_rowset_metas;
+    for (const auto& rowset_meta : local_tablet_meta.rs_metas()) {
+        if (rowset_meta.has_resource_id()) { // skip local rowset
+            continue;
+        }
+        local_rowset_metas.insert({rowset_meta.rowset_id_v2(), rowset_meta});
+    }
+
+    
RETURN_IF_ERROR(_link_rowset_if_exists_from_origin_to_target<LocalFileStat>(
+            local_tablet_meta, remote_rowset_metas, _local_files, 
_local_path));
+    
RETURN_IF_ERROR(_link_rowset_if_exists_from_origin_to_target<RemoteFileStat>(
+            remote_tablet_meta, local_rowset_metas, _remote_files, 
_remote_path));
+    return Status::OK();
+}
+
+template <typename FileStat>
+Status SnapshotHttpDownloader::_link_rowset_if_exists_from_origin_to_target(
+        TabletMetaPB origin_tablet_meta,
+        std::unordered_map<std::string, const RowsetMetaPB&> 
target_tablet_metas,
+        std::unordered_map<std::string, FileStat>& linked_files, const 
std::string& origin_path) {
+    for (const auto& origin_rowset_meta : origin_tablet_meta.rs_metas()) {
+        if (origin_rowset_meta.has_resource_id() || 
!origin_rowset_meta.has_source_rowset_id()) {
             continue;
         }
 
-        auto remote_rowset_meta = 
remote_rowset_metas.find(local_rowset_meta.source_rowset_id());
-        if (remote_rowset_meta == remote_rowset_metas.end()) {
+        auto target_rowset_meta = 
target_tablet_metas.find(origin_rowset_meta.source_rowset_id());
+        if (target_rowset_meta == target_tablet_metas.end()) {
             continue;
         }
 
-        const auto& remote_rowset_id = remote_rowset_meta->first;
-        const auto& remote_rowset_meta_pb = remote_rowset_meta->second;
-        const auto& local_rowset_id = local_rowset_meta.rowset_id_v2();
-        auto remote_tablet_id = remote_rowset_meta_pb.tablet_id();
-        if (local_rowset_meta.start_version() != 
remote_rowset_meta_pb.start_version() ||
-            local_rowset_meta.end_version() != 
remote_rowset_meta_pb.end_version()) {
+        const auto& target_rowset_id = target_rowset_meta->first;
+        const auto& target_rowset_meta_pb = target_rowset_meta->second;
+        const auto& origin_rowset_id = origin_rowset_meta.rowset_id_v2();
+        auto target_tablet_id = target_rowset_meta_pb.tablet_id();
+        if (origin_rowset_meta.start_version() != 
target_rowset_meta_pb.start_version() ||
+            origin_rowset_meta.end_version() != 
target_rowset_meta_pb.end_version()) {
             continue;
         }
 
-        LOG(INFO) << "rowset " << local_rowset_id << " was downloaded from 
remote tablet "
-                  << remote_tablet_id << " rowset " << remote_rowset_id
+        LOG(INFO) << "rowset " << origin_rowset_id << " was downloaded from 
target tablet "
+                  << target_tablet_id << " rowset " << target_rowset_id
                   << ", directly link files instead of downloading";
 
-        // Since the rowset meta are the same, we can link the local rowset 
files as
-        // the downloaded remote rowset files.
-        for (const auto& [local_file, local_filestat] : _local_files) {
-            if (!local_file.starts_with(local_rowset_id)) {
+        // Since the rowset meta are the same, we can link the rowset files
+        // from origin to target as the downloaded remote rowset files.
+        for (const auto& [origin_file, origin_filestat] : linked_files) {
+            if (!origin_file.starts_with(origin_rowset_id)) {
                 continue;
             }
 
-            std::string remote_file = local_file;
-            remote_file.replace(0, local_rowset_id.size(), remote_rowset_id);
-            std::string local_file_path = _local_path + "/" + local_file;
-            std::string remote_file_path = _local_path + "/" + remote_file;
+            std::string target_file = origin_file;
+            target_file.replace(0, origin_rowset_id.size(), target_rowset_id);
+            std::string origin_file_path = origin_path + "/" + origin_file;
+            std::string target_file_path = origin_path + "/" + target_file;
 
             bool exist = true;
-            
RETURN_IF_ERROR(io::global_local_filesystem()->exists(remote_file_path, 
&exist));
+            
RETURN_IF_ERROR(io::global_local_filesystem()->exists(target_file_path, 
&exist));
             if (exist) {
                 continue;
             }
 
-            LOG(INFO) << "link file from " << local_file_path << " to " << 
remote_file_path;
-            if (!io::global_local_filesystem()->link_file(local_file_path, 
remote_file_path)) {
+            LOG(INFO) << "link file from " << origin_file_path << " to " << 
target_file_path;
+            if (!io::global_local_filesystem()->link_file(origin_file_path, 
target_file_path)) {
                 std::string msg = fmt::format("link file failed from {} to {}, 
err: {}",
-                                              local_file_path, 
remote_file_path, strerror(errno));
+                                              origin_file_path, 
target_file_path, strerror(errno));
                 LOG(WARNING) << msg;
                 return Status::InternalError(std::move(msg));
             }
 
-            _local_files[remote_file] = local_filestat;
+            linked_files[target_file] = origin_filestat;
         }
     }
 
     return Status::OK();
-}
+};

Review Comment:
   ```suggestion
   }
   ```



##########
be/src/runtime/snapshot_loader.cpp:
##########
@@ -426,61 +433,81 @@ Status SnapshotHttpDownloader::_link_same_rowset_files() {
         remote_rowset_metas.insert({rowset_meta.rowset_id_v2(), rowset_meta});
     }
 
-    for (const auto& local_rowset_meta : local_tablet_meta.rs_metas()) {
-        if (local_rowset_meta.has_resource_id() || 
!local_rowset_meta.has_source_rowset_id()) {
+    std::unordered_map<std::string, const RowsetMetaPB&> local_rowset_metas;
+    for (const auto& rowset_meta : local_tablet_meta.rs_metas()) {
+        if (rowset_meta.has_resource_id()) { // skip local rowset
+            continue;
+        }
+        local_rowset_metas.insert({rowset_meta.rowset_id_v2(), rowset_meta});
+    }
+
+    
RETURN_IF_ERROR(_link_rowset_if_exists_from_origin_to_target<LocalFileStat>(
+            local_tablet_meta, remote_rowset_metas, _local_files, 
_local_path));
+    
RETURN_IF_ERROR(_link_rowset_if_exists_from_origin_to_target<RemoteFileStat>(
+            remote_tablet_meta, local_rowset_metas, _remote_files, 
_remote_path));
+    return Status::OK();
+}
+
+template <typename FileStat>
+Status SnapshotHttpDownloader::_link_rowset_if_exists_from_origin_to_target(
+        TabletMetaPB origin_tablet_meta,
+        std::unordered_map<std::string, const RowsetMetaPB&> 
target_tablet_metas,
+        std::unordered_map<std::string, FileStat>& linked_files, const 
std::string& origin_path) {
+    for (const auto& origin_rowset_meta : origin_tablet_meta.rs_metas()) {
+        if (origin_rowset_meta.has_resource_id() || 
!origin_rowset_meta.has_source_rowset_id()) {
             continue;
         }
 
-        auto remote_rowset_meta = 
remote_rowset_metas.find(local_rowset_meta.source_rowset_id());
-        if (remote_rowset_meta == remote_rowset_metas.end()) {
+        auto target_rowset_meta = 
target_tablet_metas.find(origin_rowset_meta.source_rowset_id());
+        if (target_rowset_meta == target_tablet_metas.end()) {
             continue;
         }
 
-        const auto& remote_rowset_id = remote_rowset_meta->first;
-        const auto& remote_rowset_meta_pb = remote_rowset_meta->second;
-        const auto& local_rowset_id = local_rowset_meta.rowset_id_v2();
-        auto remote_tablet_id = remote_rowset_meta_pb.tablet_id();
-        if (local_rowset_meta.start_version() != 
remote_rowset_meta_pb.start_version() ||
-            local_rowset_meta.end_version() != 
remote_rowset_meta_pb.end_version()) {
+        const auto& target_rowset_id = target_rowset_meta->first;
+        const auto& target_rowset_meta_pb = target_rowset_meta->second;
+        const auto& origin_rowset_id = origin_rowset_meta.rowset_id_v2();
+        auto target_tablet_id = target_rowset_meta_pb.tablet_id();
+        if (origin_rowset_meta.start_version() != 
target_rowset_meta_pb.start_version() ||
+            origin_rowset_meta.end_version() != 
target_rowset_meta_pb.end_version()) {
             continue;
         }
 
-        LOG(INFO) << "rowset " << local_rowset_id << " was downloaded from 
remote tablet "
-                  << remote_tablet_id << " rowset " << remote_rowset_id
+        LOG(INFO) << "rowset " << origin_rowset_id << " was downloaded from 
target tablet "
+                  << target_tablet_id << " rowset " << target_rowset_id
                   << ", directly link files instead of downloading";

Review Comment:
   The log should point to which file is the copy.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to