morningman commented on a change in pull request #8663:
URL: https://github.com/apache/incubator-doris/pull/8663#discussion_r836164163



##########
File path: gensrc/thrift/HeartbeatService.thrift
##########
@@ -32,6 +33,7 @@ struct TMasterInfo {
     6: optional Types.TPort http_port
     7: optional i64 heartbeat_flags
     8: optional i64 backend_id
+    9: optional list<AgentService.TStorageParam> remote_storage_params

Review comment:
       don't use heartbeat to pass these info.

##########
File path: gensrc/proto/olap_file.proto
##########
@@ -213,6 +214,24 @@ enum StorageMediumPB {
     HDD = 0;
     SSD = 1;
     S3 = 2;
+    REMOTE_CACHE = 99;
+}
+
+message S3StorageParamPB {
+    required string s3_endpoint = 1;
+    required string s3_region = 2;
+    optional string s3_ak = 3;
+    optional string s3_sk = 4;
+    optional int32 s3_max_conn = 5 [default = 50];
+    optional int32 s3_request_timeout_ms = 6 [default = 3000];
+    optional int32 s3_conn_timeout_ms = 7 [default = 1000];
+    optional string root_path = 8;
+}
+
+message StorageParamPB {
+    required StorageMediumPB storage_medium = 1 [default = HDD];

Review comment:
       use optional

##########
File path: gensrc/proto/olap_file.proto
##########
@@ -213,6 +214,24 @@ enum StorageMediumPB {
     HDD = 0;
     SSD = 1;
     S3 = 2;
+    REMOTE_CACHE = 99;
+}
+
+message S3StorageParamPB {
+    required string s3_endpoint = 1;

Review comment:
       use optional

##########
File path: be/src/olap/snapshot_manager.cpp
##########
@@ -98,13 +98,13 @@ OLAPStatus SnapshotManager::release_snapshot(const string& 
snapshot_path) {
             continue;
         }
         std::string abs_path;
-        RETURN_WITH_WARN_IF_ERROR(store->env()->canonicalize(store->path(), 
&abs_path),
+        RETURN_WITH_WARN_IF_ERROR(Env::Default()->canonicalize(store->path(), 
&abs_path),

Review comment:
       Why not still using `store->env()`?

##########
File path: be/src/olap/data_dir.cpp
##########
@@ -384,6 +358,10 @@ OLAPStatus 
DataDir::_check_incompatible_old_format_tablet() {
 // TODO(ygl): deal with rowsets and tablets when load failed
 OLAPStatus DataDir::load() {
     LOG(INFO) << "start to load tablets from " << _path_desc.filepath;
+    if (is_remote()) {
+        
RETURN_WITH_WARN_IF_ERROR(Env::get_remote_mgr()->init(_path_desc.filepath + 
STORAGE_PARAM_PREFIX),
+                                  OLAP_ERR_INIT_FAILED, "DataDir init 
failed.");
+    }

Review comment:
       Do we still to go on if this dir is a remote dir?

##########
File path: be/src/olap/olap_define.h
##########
@@ -82,7 +82,8 @@ enum OLAPDataVersion {
 static const std::string MINI_PREFIX = "/mini_download";
 static const std::string CLUSTER_ID_PREFIX = "/cluster_id";
 static const std::string DATA_PREFIX = "/data";
-static const std::string TABLET_UID = "/tablet_uid";
+static const std::string STORAGE_PARAM_PREFIX = "/storage_param";
+static const std::string REMOTE_FILE_PARAM = "/remote_file_param";

Review comment:
       Not used?

##########
File path: be/src/olap/base_tablet.cpp
##########
@@ -63,14 +65,22 @@ OLAPStatus BaseTablet::set_tablet_state(TabletState state) {
 }
 
 void BaseTablet::_gen_tablet_path() {
-    if (_data_dir != nullptr) {
+    if (_data_dir != nullptr && _tablet_meta != nullptr) {
+        FilePathDesc root_path_desc;
+        root_path_desc.filepath = _data_dir->path_desc().filepath;
+        root_path_desc.storage_name = _storage_param.storage_name();
+        root_path_desc.storage_medium = 
fs::fs_util::get_t_storage_medium(_storage_param.storage_medium());
+        if (_data_dir->is_remote() && !Env::get_remote_mgr()->get_root_path(
+                _storage_param.storage_name(), 
&(root_path_desc.remote_path)).ok()) {
+            LOG(WARNING) << "get_root_path failed for storage_name: " << 
_storage_param.storage_name();

Review comment:
       Why can we continue even if we meet error here?

##########
File path: be/src/env/env_remote_mgr.h
##########
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+#include <map>
+#include <vector>
+
+#include "env/env_remote.h"
+#include "util/mutex.h"
+
+namespace doris {
+
+class RemoteEnvMgr {

Review comment:
       Add some comment for this class and the main methods of this class.

##########
File path: be/src/olap/options.cpp
##########
@@ -48,6 +49,7 @@ static std::string to_upper(const std::string& str) {
 // Currently, both of two following formats are supported(see be.conf)
 //   format 1:   /home/disk1/palo.HDD,50
 //   format 2:   /home/disk1/palo,medium:ssd,capacity:50
+//   s3 format:  
/home/disk/palo/cache,medium:s3,capacity:50,remote:s3://bucket_name/palo/storage

Review comment:
       I think the comment need to be modified? the `medium` should be 
`REMOTE_CACHE`

##########
File path: be/src/olap/data_dir.h
##########
@@ -133,9 +132,13 @@ class DataDir {
 
     void disks_compaction_num_increment(int64_t delta);
 
-    Env* env() {
-        return _env;
-    }
+    // 将segment_path_desc移到回收站,回收站位于storage_root/trash, 
segment_path_desc可以是文件或目录
+    // 移动的同时将segment_path_desc的路径进行修改
+    // filepath改为:
+    // 
storage_root/trash/20150619154308/delete_counter/tablet_path/segment_path,
+    // remote_path改为:
+    // 
storage_root/trash/20150619154308/delete_counter/tablet_path/segment_path/tablet_uid
+    OLAPStatus move_to_trash(const FilePathDesc& segment_path_desc);

Review comment:
       There is also a method `move_to_trash` in `src/olap/utils.h`. Are they 
different?
   And I can't find the place to call this `move_to_trash`. What is this method 
for?

##########
File path: be/src/olap/rowset/beta_rowset_writer.cpp
##########
@@ -53,7 +53,11 @@ BetaRowsetWriter::~BetaRowsetWriter() {
     if (!_already_built) {       // abnormal exit, remove all files generated
         _segment_writer.reset(); // ensure all files are closed
         Status st;
-        Env* env = Env::get_env(_context.path_desc.storage_medium);
+        std::shared_ptr<Env> env = Env::get_env(_context.path_desc);
+        if (env == nullptr) {

Review comment:
       In what situation, here we may get nullptr?

##########
File path: be/src/olap/tablet_meta.h
##########
@@ -82,8 +82,8 @@ class TabletMeta {
     TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id, 
int32_t schema_hash,
                uint64_t shard_id, const TTabletSchema& tablet_schema, uint32_t 
next_unique_id,
                const std::unordered_map<uint32_t, uint32_t>& 
col_ordinal_to_unique_id,
-               TabletUid tablet_uid, TTabletType::type tabletType,
-               TStorageMedium::type t_storage_medium);
+               TabletUid tablet_uid, TTabletType::type tabletType, 
TStorageMedium::type t_storage_medium,
+               const std::string& remote_storage_name);

Review comment:
       Do we need to save the remote storage info in TabletMeta?
   Where to save the TabletMeta of tablets in remote storage? Still in local 
Rocksdb?

##########
File path: be/src/olap/data_dir.cpp
##########
@@ -772,4 +750,126 @@ void DataDir::disks_compaction_score_increment(int64_t 
delta) {
 void DataDir::disks_compaction_num_increment(int64_t delta) {
     disks_compaction_num->increment(delta);
 }
+
+OLAPStatus DataDir::move_to_trash(const FilePathDesc& segment_path_desc) {
+    OLAPStatus res = OLAP_SUCCESS;
+    FilePathDesc storage_root_desc = _path_desc;
+    if (is_remote() && !Env::get_remote_mgr()->get_root_path(
+            segment_path_desc.storage_name, 
&(storage_root_desc.remote_path)).ok()) {
+        LOG(WARNING) << "get_root_path failed for storage_name: " << 
segment_path_desc.storage_name;
+        return OLAP_ERR_OTHER_ERROR;
+    }
+
+    // 1. get timestamp string
+    string time_str;
+    if ((res = gen_timestamp_string(&time_str)) != OLAP_SUCCESS) {
+        OLAP_LOG_WARNING(
+                "failed to generate time_string when move file to trash."
+                "[err code=%d]",
+                res);
+        return res;
+    }
+
+    // 2. generate new file path desc
+    static uint64_t delete_counter = 0; // a global counter to avoid file name 
duplication.

Review comment:
       1. What is `delete_counter` for?
   2. Can we use atomic instead of lock?

##########
File path: be/src/olap/data_dir.cpp
##########
@@ -712,7 +690,7 @@ void DataDir::perform_path_scan() {
 }
 
 void DataDir::_process_garbage_path(const std::string& path) {
-    if (_env->path_exists(path).ok()) {
+    if (Env::Default()->path_exists(path).ok()) {

Review comment:
       What is relation between `data dir` and `env` now?
   I found that here we directly use `Env::Default()` to do something in 
`DataDir`, but what if
   this is a remote dir?

##########
File path: gensrc/thrift/AgentService.thrift
##########
@@ -67,6 +67,23 @@ enum TTabletType {
     TABLET_TYPE_MEMORY = 1
 }
 
+struct TS3StorageParam {
+    1: required string s3_endpoint
+    2: required string s3_region
+    3: optional string s3_ak
+    4: optional string s3_sk
+    5: optional i32 s3_max_conn = 50
+    6: optional i32 s3_request_timeout_ms = 3000
+    7: optional i32 s3_conn_timeout_ms = 1000
+    8: optional string root_path
+}
+
+struct TStorageParam {
+    1: required Types.TStorageMedium storage_medium = TStorageMedium.HDD

Review comment:
       use optional

##########
File path: be/src/olap/data_dir.h
##########
@@ -133,9 +132,13 @@ class DataDir {
 
     void disks_compaction_num_increment(int64_t delta);
 
-    Env* env() {
-        return _env;
-    }
+    // 将segment_path_desc移到回收站,回收站位于storage_root/trash, 
segment_path_desc可以是文件或目录

Review comment:
       Use english

##########
File path: be/src/olap/rowset/beta_rowset.cpp
##########
@@ -156,7 +164,12 @@ OLAPStatus BetaRowset::copy_files_to(const std::string& 
dir) {
 }
 
 OLAPStatus BetaRowset::upload_files_to(const FilePathDesc& dir_desc) {
-    RemoteEnv* dest_env = 
dynamic_cast<RemoteEnv*>(Env::get_env(_rowset_path_desc.storage_medium));
+    std::shared_ptr<Env> env = Env::get_env(dir_desc);
+    if (env == nullptr) {
+        LOG(WARNING) << "env is invalid: " << dir_desc.debug_string();
+        return OLAP_ERR_OS_ERROR;
+    }
+    RemoteEnv* dest_env = dynamic_cast<RemoteEnv*>(env.get());

Review comment:
       The logic here can not guarantee that we will get the RemoveEnv.
   You can check `dir_desc.is_remote()` at the beginning.

##########
File path: be/src/olap/fs/fs_util.cpp
##########
@@ -58,6 +60,42 @@ StorageMediumPB get_storage_medium_pb(TStorageMedium::type 
t_storage_medium) {
     }
 }
 
+TStorageMedium::type get_t_storage_medium(StorageMediumPB storage_medium) {
+    switch (storage_medium) {
+        case StorageMediumPB::S3:
+            return TStorageMedium::S3;
+        case StorageMediumPB::SSD:
+            return TStorageMedium::SSD;
+        case StorageMediumPB::HDD:

Review comment:
       CHECK(false) for `default` maybe better.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to