yiguolei commented on code in PR #11940: URL: https://github.com/apache/doris/pull/11940#discussion_r953603686
########## be/src/olap/task/engine_clone_task.cpp: ########## @@ -96,176 +93,108 @@ Status EngineCloneTask::_do_clone() { // if missed version size is 0, then it is useless to clone from remote be, it means local data is // completed. Or remote be will just return header not the rowset files. clone will failed. - if (missed_versions.size() == 0) { - LOG(INFO) << "missed version size = 0, skip clone and return success. tablet id=" + if (missed_versions.empty()) { + LOG(INFO) << "missed version size = 0, skip clone and return success. tablet_id=" << _clone_req.tablet_id; - _set_tablet_info(Status::OK(), is_new_tablet); + _set_tablet_info(is_new_tablet); return Status::OK(); } + LOG(INFO) << "clone to existed tablet. missed_versions_size=" << missed_versions.size() + << ", allow_incremental_clone=" << allow_incremental_clone + << ", signature=" << _signature << ", tablet_id=" << _clone_req.tablet_id + << ", committed_version=" << _clone_req.committed_version; + // try to download missing version from src backend. // if tablet on src backend does not contains missing version, it will download all versions, // and set allow_incremental_clone to false - status = _make_and_download_snapshots(*(tablet->data_dir()), local_data_path, &src_host, - &src_file_path, _error_msgs, &missed_versions, - &allow_incremental_clone); - - LOG(INFO) << "tablet exist with number of missing version: " << missed_versions.size() - << ", try to incremental clone succeed: " << allow_incremental_clone - << ", signature: " << _signature << ", tablet id: " << _clone_req.tablet_id - << ", schema hash: " << _clone_req.schema_hash - << ", clone version: " << _clone_req.committed_version - << ", download snapshot: " << status; - - if (status.ok()) { - Status olap_status = - _finish_clone(tablet.get(), local_data_path, _clone_req.committed_version, - allow_incremental_clone); - if (!olap_status.ok()) { - LOG(WARNING) << "failed to finish clone. [table=" << tablet->full_name() - << " res=" << olap_status << "]"; - _error_msgs->push_back("clone error."); - status = Status::InternalError("Failed to finish clone"); - } - } + RETURN_IF_ERROR(_make_and_download_snapshots(*(tablet->data_dir()), local_data_path, + &src_host, &src_file_path, &missed_versions, + &allow_incremental_clone)); + RETURN_IF_ERROR(_finish_clone(tablet.get(), local_data_path, _clone_req.committed_version, + allow_incremental_clone)); } else { LOG(INFO) << "clone tablet not exist, begin clone a new tablet from remote be. " - << "signature:" << _signature << ", tablet_id:" << _clone_req.tablet_id - << ", schema_hash:" << _clone_req.schema_hash - << ", committed_version:" << _clone_req.committed_version; + << "signature=" << _signature << ", tablet_id=" << _clone_req.tablet_id + << ", committed_version=" << _clone_req.committed_version; // create a new tablet in this be // Get local disk from olap string local_shard_root_path; DataDir* store = nullptr; - Status olap_status = StorageEngine::instance()->obtain_shard_path( - _clone_req.storage_medium, &local_shard_root_path, &store); - if (!olap_status.ok()) { - LOG(WARNING) << "clone get local root path failed. signature: " << _signature; - _error_msgs->push_back("clone get local root path failed."); - status = Status::InternalError("Clone to get local root path failed"); - } - std::stringstream tablet_dir_stream; - tablet_dir_stream << local_shard_root_path << "/" << _clone_req.tablet_id << "/" - << _clone_req.schema_hash; + RETURN_IF_ERROR(StorageEngine::instance()->obtain_shard_path( + _clone_req.storage_medium, &local_shard_root_path, &store)); + auto tablet_dir = fmt::format("{}/{}/{}", local_shard_root_path, _clone_req.tablet_id, + _clone_req.schema_hash); + + Defer remove_useless_dir {[&] { + if (status.ok()) { + return; + } + LOG(INFO) << "clone failed. want to delete local dir: " << tablet_dir + << ". signature: " << _signature; + WARN_IF_ERROR(io::global_local_filesystem()->delete_directory(tablet_dir), + "failed to delete useless clone dir "); + }}; - if (status.ok()) { - bool allow_incremental_clone = false; - status = _make_and_download_snapshots(*store, tablet_dir_stream.str(), &src_host, - &src_file_path, _error_msgs, nullptr, - &allow_incremental_clone); + bool allow_incremental_clone = false; + status = _make_and_download_snapshots(*store, tablet_dir, &src_host, &src_file_path, + nullptr, &allow_incremental_clone); + if (!status.ok()) { + return status; } - if (status.ok()) { - LOG(INFO) << "clone copy done. src_host: " << src_host.host - << " src_file_path: " << src_file_path; - std::stringstream schema_hash_path_stream; - schema_hash_path_stream << local_shard_root_path << "/" << _clone_req.tablet_id << "/" - << _clone_req.schema_hash; - string header_path = TabletMeta::construct_header_file_path( - schema_hash_path_stream.str(), _clone_req.tablet_id); - Status reset_id_status = TabletMeta::reset_tablet_uid(header_path); - if (reset_id_status != Status::OK()) { - LOG(WARNING) << "errors while set tablet uid: '" << header_path; - _error_msgs->push_back("errors while set tablet uid."); - status = Status::InternalError("Errors while set tablet uid"); - } else { - Status load_header_status = - StorageEngine::instance()->tablet_manager()->load_tablet_from_dir( - store, _clone_req.tablet_id, _clone_req.schema_hash, - schema_hash_path_stream.str(), false); - if (load_header_status != Status::OK()) { - LOG(WARNING) << "load header failed. local_shard_root_path: '" - << local_shard_root_path - << "' schema_hash: " << _clone_req.schema_hash - << ". status: " << load_header_status - << ". signature: " << _signature; - _error_msgs->push_back("load header failed."); - status = Status::InternalError("Load tablet header failed"); - } - } - // clone success, delete .hdr file because tablet meta is stored in rocksdb - string cloned_meta_file = - tablet_dir_stream.str() + "/" + std::to_string(_clone_req.tablet_id) + ".hdr"; - FileUtils::remove(cloned_meta_file); + LOG(INFO) << "clone copy done. src_host: " << src_host.host + << " src_file_path: " << src_file_path; + string header_path = + TabletMeta::construct_header_file_path(tablet_dir, _clone_req.tablet_id); + status = TabletMeta::reset_tablet_uid(header_path); + if (!status.ok()) { + return status; } - // Clean useless dir, if failed, ignore it. + status = StorageEngine::instance()->tablet_manager()->load_tablet_from_dir( + store, _clone_req.tablet_id, _clone_req.schema_hash, tablet_dir, false); if (!status.ok()) { - std::stringstream local_data_path_stream; - local_data_path_stream << local_shard_root_path << "/" << _clone_req.tablet_id; - string local_data_path = local_data_path_stream.str(); - LOG(INFO) << "clone failed. want to delete local dir: " << local_data_path - << ". signature: " << _signature; - try { - std::filesystem::path local_path(local_data_path); - if (std::filesystem::exists(local_path)) { - std::filesystem::remove_all(local_path); - } - } catch (std::filesystem::filesystem_error& e) { - // Ignore the error, OLAP will delete it - LOG(WARNING) << "clone delete useless dir failed. " - << " error: " << e.what() << " local dir: " << local_data_path.c_str() - << " signature: " << _signature; - } + return status; } + // clone success, delete .hdr file because tablet meta is stored in rocksdb + FileUtils::remove(header_path); } - _set_tablet_info(status, is_new_tablet); - return Status::OK(); + return _set_tablet_info(is_new_tablet); } -void EngineCloneTask::_set_tablet_info(Status status, bool is_new_tablet) { +Status EngineCloneTask::_set_tablet_info(bool is_new_tablet) { // Get clone tablet info - if (status.ok()) { - TTabletInfo tablet_info; - tablet_info.__set_tablet_id(_clone_req.tablet_id); - tablet_info.__set_replica_id(_clone_req.replica_id); - tablet_info.__set_schema_hash(_clone_req.schema_hash); - Status get_tablet_info_status = - StorageEngine::instance()->tablet_manager()->report_tablet_info(&tablet_info); - if (get_tablet_info_status != Status::OK()) { - LOG(WARNING) << "clone success, but get tablet info failed." - << " tablet id: " << _clone_req.tablet_id - << ", replica_id:" << _clone_req.replica_id - << " schema hash: " << _clone_req.schema_hash - << " signature: " << _signature; - _error_msgs->push_back("clone success, but get tablet info failed."); - status = Status::InternalError("Clone success but get tablet info failed"); - } else if (_clone_req.__isset.committed_version && - tablet_info.version < _clone_req.committed_version) { - LOG(WARNING) << "failed to clone tablet. tablet_id:" << _clone_req.tablet_id + TTabletInfo tablet_info; + tablet_info.__set_tablet_id(_clone_req.tablet_id); + tablet_info.__set_replica_id(_clone_req.replica_id); + tablet_info.__set_schema_hash(_clone_req.schema_hash); + RETURN_IF_ERROR(StorageEngine::instance()->tablet_manager()->report_tablet_info(&tablet_info)); + if (_clone_req.__isset.committed_version && + tablet_info.version < _clone_req.committed_version) { + // if it is a new tablet and clone failed, then remove the tablet + // if it is incremental clone, then must not drop the tablet + if (is_new_tablet) { + // we need to check if this cloned table's version is what we expect. + // if not, maybe this is a stale remaining table which is waiting for drop. + // we drop it. + LOG(WARNING) << "begin to drop the stale tablet. tablet_id:" << _clone_req.tablet_id << ", replica_id:" << _clone_req.replica_id << ", schema_hash:" << _clone_req.schema_hash << ", signature:" << _signature << ", version:" << tablet_info.version << ", expected_version: " << _clone_req.committed_version; - // if it is a new tablet and clone failed, then remove the tablet - // if it is incremental clone, then must not drop the tablet - if (is_new_tablet) { - // we need to check if this cloned table's version is what we expect. - // if not, maybe this is a stale remaining table which is waiting for drop. - // we drop it. - LOG(WARNING) << "begin to drop the stale tablet. tablet_id:" << _clone_req.tablet_id - << ", replica_id:" << _clone_req.replica_id - << ", schema_hash:" << _clone_req.schema_hash - << ", signature:" << _signature << ", version:" << tablet_info.version - << ", expected_version: " << _clone_req.committed_version; - Status drop_status = StorageEngine::instance()->tablet_manager()->drop_tablet( - _clone_req.tablet_id, _clone_req.replica_id, false); - if (drop_status != Status::OK() && - drop_status.precise_code() != OLAP_ERR_TABLE_NOT_FOUND) { - // just log - LOG(WARNING) << "drop stale cloned table failed! tablet id: " - << _clone_req.tablet_id; - } - } - status = Status::InternalError("Failed to clone tablet"); - } else { - LOG(INFO) << "clone get tablet info success. tablet_id:" << _clone_req.tablet_id - << ", schema_hash:" << _clone_req.schema_hash << ", signature:" << _signature Review Comment: In previous logic, _tablet_infos is only set in this logic, not in 233 logic. But in your code, _tablet_infos is always set. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org