morningman commented on code in PR #8806:
URL: https://github.com/apache/incubator-doris/pull/8806#discussion_r845247265
##########
be/src/olap/rowset/beta_rowset.cpp:
##########
@@ -154,31 +158,38 @@ OLAPStatus BetaRowset::copy_files_to(const std::string&
dir) {
return OLAP_SUCCESS;
}
-OLAPStatus BetaRowset::upload_files_to(const FilePathDesc& dir_desc) {
+OLAPStatus BetaRowset::upload_files_to(const FilePathDesc& dir_desc, RowsetId
new_rowset_id, bool delete_src) {
Review Comment:
```suggestion
OLAPStatus BetaRowset::upload_files_to(const FilePathDesc& dir_desc, const
RowsetId& new_rowset_id, bool delete_src) {
```
##########
be/src/common/config.h:
##########
@@ -189,6 +189,7 @@ CONF_mInt64(column_dictionary_key_ratio_threshold, "0");
CONF_mInt64(column_dictionary_key_size_threshold, "0");
// memory_limitation_per_thread_for_schema_change_bytes unit bytes
CONF_mInt64(memory_limitation_per_thread_for_schema_change_bytes,
"2147483648");
+CONF_mInt64(memory_limitation_per_thread_for_storage_migration_bytes,
"2147483648");
Review Comment:
migration should not have much memory consumption
##########
be/src/olap/rowset/beta_rowset.cpp:
##########
@@ -132,19 +132,23 @@ OLAPStatus BetaRowset::link_files_to(const FilePathDesc&
dir_desc, RowsetId new_
return OLAP_SUCCESS;
}
-OLAPStatus BetaRowset::copy_files_to(const std::string& dir) {
+OLAPStatus BetaRowset::copy_files_to(const std::string& dir, RowsetId
new_rowset_id) {
Review Comment:
```suggestion
OLAPStatus BetaRowset::copy_files_to(const std::string& dir, const RowsetId&
new_rowset_id) {
```
##########
be/src/olap/task/engine_alter_tablet_task.cpp:
##########
@@ -35,6 +35,7 @@ EngineAlterTabletTask::EngineAlterTabletTask(const
TAlterTabletReqV2& request)
}
OLAPStatus EngineAlterTabletTask::execute() {
+ DorisMetrics::instance()->storage_migrate_v2_requests_total->increment(1);
Review Comment:
Why adding `storage_migrate_v2_requests_total` here?
##########
be/src/olap/rowset/alpha_rowset.cpp:
##########
@@ -100,7 +100,7 @@ OLAPStatus AlphaRowset::link_files_to(const FilePathDesc&
dir_desc, RowsetId new
return OLAP_SUCCESS;
}
-OLAPStatus AlphaRowset::copy_files_to(const std::string& dir) {
+OLAPStatus AlphaRowset::copy_files_to(const std::string& dir, RowsetId
new_rowset_id) {
Review Comment:
Need to check all other places
##########
be/src/olap/storage_migration_v2.cpp:
##########
@@ -0,0 +1,463 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/storage_migration_v2.h"
+
+#include <pthread.h>
+#include <signal.h>
+
+#include <algorithm>
+#include <vector>
+#include "rapidjson/document.h"
+#include "rapidjson/prettywriter.h"
+#include "rapidjson/stringbuffer.h"
+
+#include "agent/cgroups_mgr.h"
+#include "common/resource_tls.h"
+#include "env/env_util.h"
+#include "olap/merger.h"
+#include "olap/row.h"
+#include "olap/row_block.h"
+#include "olap/row_cursor.h"
+#include "olap/rowset/rowset_factory.h"
+#include "olap/rowset/rowset_id_generator.h"
+#include "olap/storage_engine.h"
+#include "olap/tablet.h"
+#include "olap/wrapper_field.h"
+#include "runtime/exec_env.h"
+#include "runtime/thread_context.h"
+#include "util/defer_op.h"
+
+using std::deque;
+using std::list;
+using std::nothrow;
+using std::pair;
+using std::string;
+using std::stringstream;
+using std::vector;
+
+namespace doris {
+
+DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(storage_migration_mem_consumption,
MetricUnit::BYTES, "",
+ mem_consumption, Labels({{"type",
"storage_migration"}}));
+
+
+StorageMigrationV2Handler::StorageMigrationV2Handler()
+ : _mem_tracker(MemTracker::create_tracker(
Review Comment:
this `_mem_tracker` is useless
##########
be/src/olap/rowset/beta_rowset_writer.cpp:
##########
@@ -141,6 +141,36 @@ OLAPStatus
BetaRowsetWriter::add_rowset_for_linked_schema_change(
return add_rowset(rowset);
}
+OLAPStatus BetaRowsetWriter::add_rowset_for_migration(RowsetSharedPtr rowset) {
+ OLAPStatus res = OLAP_SUCCESS;
+ assert(rowset->rowset_meta()->rowset_type() == BETA_ROWSET);
+ if (!rowset->rowset_path_desc().is_remote() &&
!_context.path_desc.is_remote()) {
+ res = rowset->copy_files_to(_context.path_desc.filepath,
_context.rowset_id);
+ if (res != OLAP_SUCCESS) {
+ LOG(WARNING) << "copy_files failed. src: " <<
rowset->rowset_path_desc().filepath
+ << ", dest: " << _context.path_desc.filepath;
+ return res;
+ }
+ } else if (!rowset->rowset_path_desc().is_remote() &&
_context.path_desc.is_remote()) {
Review Comment:
is it possible that `rowset->rowset_path_desc().is_remote()` is true here?
##########
be/src/olap/storage_engine.cpp:
##########
@@ -839,10 +849,42 @@ OLAPStatus StorageEngine::_do_sweep(const string&
scan_root, const time_t& local
string path_name = sorted_path.string();
if (difftime(local_now, mktime(&local_tm_create)) >=
actual_expire) {
+ std::string storage_name_path = path_name + "/" + STORAGE_NAME;
+ if (scan_root_desc.is_remote() &&
FileUtils::check_exist(storage_name_path)) {
+ faststring buf;
+ if (!env_util::read_file_to_string(Env::Default(),
storage_name_path, &buf).ok()) {
+ LOG(WARNING) << "read storage_name failed: " <<
storage_name_path;
+ continue;
+ }
+ FilePathDesc remote_path_desc = scan_root_desc;
+ remote_path_desc.storage_name = buf.ToString();
+ boost::algorithm::trim(remote_path_desc.storage_name);
+ std::shared_ptr<StorageBackend> storage_backend =
StorageBackendMgr::instance()->
+ get_storage_backend(remote_path_desc.storage_name);
+ if (storage_backend != nullptr) {
Review Comment:
What if `storage_backend` is null?
##########
be/src/olap/tablet_manager.cpp:
##########
@@ -1013,6 +1030,46 @@ void
TabletManager::try_delete_unused_tablet_path(DataDir* data_dir, TTabletId t
if (Env::Default()->path_exists(schema_hash_path).ok()) {
LOG(INFO) << "start to move tablet to trash. tablet_path = " <<
schema_hash_path;
FilePathDesc segment_desc(schema_hash_path);
+ string remote_file_param_path = schema_hash_path + REMOTE_FILE_PARAM;
+ if (data_dir->is_remote() &&
FileUtils::check_exist(remote_file_param_path)) {
+ // it means you must remove remote file for this segment first
+ faststring json_buf;
+ Status s = env_util::read_file_to_string(Env::Default(),
remote_file_param_path, &json_buf);
Review Comment:
better give an example of this json string
##########
gensrc/thrift/AgentService.thrift:
##########
@@ -144,6 +144,15 @@ struct TAlterMaterializedViewParam {
3: optional Exprs.TExpr mv_expr
}
+struct TStorageMigrationReqV2 {
+ 1: required Types.TTabletId base_tablet_id
Review Comment:
Do not use `required`
##########
be/src/olap/rowset/alpha_rowset.cpp:
##########
@@ -100,7 +100,7 @@ OLAPStatus AlphaRowset::link_files_to(const FilePathDesc&
dir_desc, RowsetId new
return OLAP_SUCCESS;
}
-OLAPStatus AlphaRowset::copy_files_to(const std::string& dir) {
+OLAPStatus AlphaRowset::copy_files_to(const std::string& dir, RowsetId
new_rowset_id) {
Review Comment:
```suggestion
OLAPStatus AlphaRowset::copy_files_to(const std::string& dir, const
RowsetId& new_rowset_id) {
```
##########
be/src/olap/rowset/beta_rowset.cpp:
##########
@@ -132,19 +132,23 @@ OLAPStatus BetaRowset::link_files_to(const FilePathDesc&
dir_desc, RowsetId new_
return OLAP_SUCCESS;
}
-OLAPStatus BetaRowset::copy_files_to(const std::string& dir) {
+OLAPStatus BetaRowset::copy_files_to(const std::string& dir, RowsetId
new_rowset_id) {
for (int i = 0; i < num_segments(); ++i) {
- FilePathDesc dst_path_desc = segment_file_path(dir, rowset_id(), i);
+ FilePathDesc dst_path_desc = segment_file_path(dir, new_rowset_id, i);
Status status = Env::Default()->path_exists(dst_path_desc.filepath);
if (status.ok()) {
- LOG(WARNING) << "file already exist: " << dst_path_desc.filepath;
- return OLAP_ERR_FILE_ALREADY_EXIST;
+ LOG(WARNING) << "file already exist, delete it: " <<
dst_path_desc.filepath;
+ status = Env::Default()->delete_file(dst_path_desc.filepath);
Review Comment:
The previous behavior just return `OLAP_ERR_FILE_ALREADY_EXIST`.
Why you modify this logic and remove dest path here?
##########
be/src/olap/rowset/beta_rowset_writer.cpp:
##########
@@ -141,6 +141,36 @@ OLAPStatus
BetaRowsetWriter::add_rowset_for_linked_schema_change(
return add_rowset(rowset);
}
+OLAPStatus BetaRowsetWriter::add_rowset_for_migration(RowsetSharedPtr rowset) {
+ OLAPStatus res = OLAP_SUCCESS;
+ assert(rowset->rowset_meta()->rowset_type() == BETA_ROWSET);
Review Comment:
use `CHECK` instead of `assert`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]