yiguolei commented on code in PR #11940:
URL: https://github.com/apache/doris/pull/11940#discussion_r953603686


##########
be/src/olap/task/engine_clone_task.cpp:
##########
@@ -96,176 +93,108 @@ Status EngineCloneTask::_do_clone() {
 
         // if missed version size is 0, then it is useless to clone from 
remote be, it means local data is
         // completed. Or remote be will just return header not the rowset 
files. clone will failed.
-        if (missed_versions.size() == 0) {
-            LOG(INFO) << "missed version size = 0, skip clone and return 
success. tablet id="
+        if (missed_versions.empty()) {
+            LOG(INFO) << "missed version size = 0, skip clone and return 
success. tablet_id="
                       << _clone_req.tablet_id;
-            _set_tablet_info(Status::OK(), is_new_tablet);
+            _set_tablet_info(is_new_tablet);
             return Status::OK();
         }
 
+        LOG(INFO) << "clone to existed tablet. missed_versions_size=" << 
missed_versions.size()
+                  << ", allow_incremental_clone=" << allow_incremental_clone
+                  << ", signature=" << _signature << ", tablet_id=" << 
_clone_req.tablet_id
+                  << ", committed_version=" << _clone_req.committed_version;
+
         // try to download missing version from src backend.
         // if tablet on src backend does not contains missing version, it will 
download all versions,
         // and set allow_incremental_clone to false
-        status = _make_and_download_snapshots(*(tablet->data_dir()), 
local_data_path, &src_host,
-                                              &src_file_path, _error_msgs, 
&missed_versions,
-                                              &allow_incremental_clone);
-
-        LOG(INFO) << "tablet exist with number of missing version: " << 
missed_versions.size()
-                  << ", try to incremental clone succeed: " << 
allow_incremental_clone
-                  << ", signature: " << _signature << ", tablet id: " << 
_clone_req.tablet_id
-                  << ", schema hash: " << _clone_req.schema_hash
-                  << ", clone version: " << _clone_req.committed_version
-                  << ", download snapshot: " << status;
-
-        if (status.ok()) {
-            Status olap_status =
-                    _finish_clone(tablet.get(), local_data_path, 
_clone_req.committed_version,
-                                  allow_incremental_clone);
-            if (!olap_status.ok()) {
-                LOG(WARNING) << "failed to finish clone. [table=" << 
tablet->full_name()
-                             << " res=" << olap_status << "]";
-                _error_msgs->push_back("clone error.");
-                status = Status::InternalError("Failed to finish clone");
-            }
-        }
+        RETURN_IF_ERROR(_make_and_download_snapshots(*(tablet->data_dir()), 
local_data_path,
+                                                     &src_host, 
&src_file_path, &missed_versions,
+                                                     
&allow_incremental_clone));
 
+        RETURN_IF_ERROR(_finish_clone(tablet.get(), local_data_path, 
_clone_req.committed_version,
+                                      allow_incremental_clone));
     } else {
         LOG(INFO) << "clone tablet not exist, begin clone a new tablet from 
remote be. "
-                  << "signature:" << _signature << ", tablet_id:" << 
_clone_req.tablet_id
-                  << ", schema_hash:" << _clone_req.schema_hash
-                  << ", committed_version:" << _clone_req.committed_version;
+                  << "signature=" << _signature << ", tablet_id=" << 
_clone_req.tablet_id
+                  << ", committed_version=" << _clone_req.committed_version;
         // create a new tablet in this be
         // Get local disk from olap
         string local_shard_root_path;
         DataDir* store = nullptr;
-        Status olap_status = StorageEngine::instance()->obtain_shard_path(
-                _clone_req.storage_medium, &local_shard_root_path, &store);
-        if (!olap_status.ok()) {
-            LOG(WARNING) << "clone get local root path failed. signature: " << 
_signature;
-            _error_msgs->push_back("clone get local root path failed.");
-            status = Status::InternalError("Clone to get local root path 
failed");
-        }
-        std::stringstream tablet_dir_stream;
-        tablet_dir_stream << local_shard_root_path << "/" << 
_clone_req.tablet_id << "/"
-                          << _clone_req.schema_hash;
+        RETURN_IF_ERROR(StorageEngine::instance()->obtain_shard_path(
+                _clone_req.storage_medium, &local_shard_root_path, &store));
+        auto tablet_dir = fmt::format("{}/{}/{}", local_shard_root_path, 
_clone_req.tablet_id,
+                                      _clone_req.schema_hash);
+
+        Defer remove_useless_dir {[&] {
+            if (status.ok()) {
+                return;
+            }
+            LOG(INFO) << "clone failed. want to delete local dir: " << 
tablet_dir
+                      << ". signature: " << _signature;
+            
WARN_IF_ERROR(io::global_local_filesystem()->delete_directory(tablet_dir),
+                          "failed to delete useless clone dir ");
+        }};
 
-        if (status.ok()) {
-            bool allow_incremental_clone = false;
-            status = _make_and_download_snapshots(*store, 
tablet_dir_stream.str(), &src_host,
-                                                  &src_file_path, _error_msgs, 
nullptr,
-                                                  &allow_incremental_clone);
+        bool allow_incremental_clone = false;
+        status = _make_and_download_snapshots(*store, tablet_dir, &src_host, 
&src_file_path,
+                                              nullptr, 
&allow_incremental_clone);
+        if (!status.ok()) {
+            return status;
         }
 
-        if (status.ok()) {
-            LOG(INFO) << "clone copy done. src_host: " << src_host.host
-                      << " src_file_path: " << src_file_path;
-            std::stringstream schema_hash_path_stream;
-            schema_hash_path_stream << local_shard_root_path << "/" << 
_clone_req.tablet_id << "/"
-                                    << _clone_req.schema_hash;
-            string header_path = TabletMeta::construct_header_file_path(
-                    schema_hash_path_stream.str(), _clone_req.tablet_id);
-            Status reset_id_status = TabletMeta::reset_tablet_uid(header_path);
-            if (reset_id_status != Status::OK()) {
-                LOG(WARNING) << "errors while set tablet uid: '" << 
header_path;
-                _error_msgs->push_back("errors while set tablet uid.");
-                status = Status::InternalError("Errors while set tablet uid");
-            } else {
-                Status load_header_status =
-                        
StorageEngine::instance()->tablet_manager()->load_tablet_from_dir(
-                                store, _clone_req.tablet_id, 
_clone_req.schema_hash,
-                                schema_hash_path_stream.str(), false);
-                if (load_header_status != Status::OK()) {
-                    LOG(WARNING) << "load header failed. 
local_shard_root_path: '"
-                                 << local_shard_root_path
-                                 << "' schema_hash: " << _clone_req.schema_hash
-                                 << ". status: " << load_header_status
-                                 << ". signature: " << _signature;
-                    _error_msgs->push_back("load header failed.");
-                    status = Status::InternalError("Load tablet header 
failed");
-                }
-            }
-            // clone success, delete .hdr file because tablet meta is stored 
in rocksdb
-            string cloned_meta_file =
-                    tablet_dir_stream.str() + "/" + 
std::to_string(_clone_req.tablet_id) + ".hdr";
-            FileUtils::remove(cloned_meta_file);
+        LOG(INFO) << "clone copy done. src_host: " << src_host.host
+                  << " src_file_path: " << src_file_path;
+        string header_path =
+                TabletMeta::construct_header_file_path(tablet_dir, 
_clone_req.tablet_id);
+        status = TabletMeta::reset_tablet_uid(header_path);
+        if (!status.ok()) {
+            return status;
         }
-        // Clean useless dir, if failed, ignore it.
+        status = 
StorageEngine::instance()->tablet_manager()->load_tablet_from_dir(
+                store, _clone_req.tablet_id, _clone_req.schema_hash, 
tablet_dir, false);
         if (!status.ok()) {
-            std::stringstream local_data_path_stream;
-            local_data_path_stream << local_shard_root_path << "/" << 
_clone_req.tablet_id;
-            string local_data_path = local_data_path_stream.str();
-            LOG(INFO) << "clone failed. want to delete local dir: " << 
local_data_path
-                      << ". signature: " << _signature;
-            try {
-                std::filesystem::path local_path(local_data_path);
-                if (std::filesystem::exists(local_path)) {
-                    std::filesystem::remove_all(local_path);
-                }
-            } catch (std::filesystem::filesystem_error& e) {
-                // Ignore the error, OLAP will delete it
-                LOG(WARNING) << "clone delete useless dir failed. "
-                             << " error: " << e.what() << " local dir: " << 
local_data_path.c_str()
-                             << " signature: " << _signature;
-            }
+            return status;
         }
+        // clone success, delete .hdr file because tablet meta is stored in 
rocksdb
+        FileUtils::remove(header_path);
     }
-    _set_tablet_info(status, is_new_tablet);
-    return Status::OK();
+    return _set_tablet_info(is_new_tablet);
 }
 
-void EngineCloneTask::_set_tablet_info(Status status, bool is_new_tablet) {
+Status EngineCloneTask::_set_tablet_info(bool is_new_tablet) {
     // Get clone tablet info
-    if (status.ok()) {
-        TTabletInfo tablet_info;
-        tablet_info.__set_tablet_id(_clone_req.tablet_id);
-        tablet_info.__set_replica_id(_clone_req.replica_id);
-        tablet_info.__set_schema_hash(_clone_req.schema_hash);
-        Status get_tablet_info_status =
-                
StorageEngine::instance()->tablet_manager()->report_tablet_info(&tablet_info);
-        if (get_tablet_info_status != Status::OK()) {
-            LOG(WARNING) << "clone success, but get tablet info failed."
-                         << " tablet id: " << _clone_req.tablet_id
-                         << ", replica_id:" << _clone_req.replica_id
-                         << " schema hash: " << _clone_req.schema_hash
-                         << " signature: " << _signature;
-            _error_msgs->push_back("clone success, but get tablet info 
failed.");
-            status = Status::InternalError("Clone success but get tablet info 
failed");
-        } else if (_clone_req.__isset.committed_version &&
-                   tablet_info.version < _clone_req.committed_version) {
-            LOG(WARNING) << "failed to clone tablet. tablet_id:" << 
_clone_req.tablet_id
+    TTabletInfo tablet_info;
+    tablet_info.__set_tablet_id(_clone_req.tablet_id);
+    tablet_info.__set_replica_id(_clone_req.replica_id);
+    tablet_info.__set_schema_hash(_clone_req.schema_hash);
+    
RETURN_IF_ERROR(StorageEngine::instance()->tablet_manager()->report_tablet_info(&tablet_info));
+    if (_clone_req.__isset.committed_version &&
+        tablet_info.version < _clone_req.committed_version) {
+        // if it is a new tablet and clone failed, then remove the tablet
+        // if it is incremental clone, then must not drop the tablet
+        if (is_new_tablet) {
+            // we need to check if this cloned table's version is what we 
expect.
+            // if not, maybe this is a stale remaining table which is waiting 
for drop.
+            // we drop it.
+            LOG(WARNING) << "begin to drop the stale tablet. tablet_id:" << 
_clone_req.tablet_id
                          << ", replica_id:" << _clone_req.replica_id
                          << ", schema_hash:" << _clone_req.schema_hash
                          << ", signature:" << _signature << ", version:" << 
tablet_info.version
                          << ", expected_version: " << 
_clone_req.committed_version;
-            // if it is a new tablet and clone failed, then remove the tablet
-            // if it is incremental clone, then must not drop the tablet
-            if (is_new_tablet) {
-                // we need to check if this cloned table's version is what we 
expect.
-                // if not, maybe this is a stale remaining table which is 
waiting for drop.
-                // we drop it.
-                LOG(WARNING) << "begin to drop the stale tablet. tablet_id:" 
<< _clone_req.tablet_id
-                             << ", replica_id:" << _clone_req.replica_id
-                             << ", schema_hash:" << _clone_req.schema_hash
-                             << ", signature:" << _signature << ", version:" 
<< tablet_info.version
-                             << ", expected_version: " << 
_clone_req.committed_version;
-                Status drop_status = 
StorageEngine::instance()->tablet_manager()->drop_tablet(
-                        _clone_req.tablet_id, _clone_req.replica_id, false);
-                if (drop_status != Status::OK() &&
-                    drop_status.precise_code() != OLAP_ERR_TABLE_NOT_FOUND) {
-                    // just log
-                    LOG(WARNING) << "drop stale cloned table failed! tablet 
id: "
-                                 << _clone_req.tablet_id;
-                }
-            }
-            status = Status::InternalError("Failed to clone tablet");
-        } else {
-            LOG(INFO) << "clone get tablet info success. tablet_id:" << 
_clone_req.tablet_id
-                      << ", schema_hash:" << _clone_req.schema_hash << ", 
signature:" << _signature

Review Comment:
   In previous logic, _tablet_infos is only set in this logic, not in 233 
logic. But in your code, _tablet_infos is always set.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to