chaoyli closed pull request #506: add create file hard link for rowset URL: https://github.com/apache/incubator-doris/pull/506
This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/be/src/olap/rowset/alpha_rowset.cpp b/be/src/olap/rowset/alpha_rowset.cpp index 1c5e11bc..cbd69fa0 100644 --- a/be/src/olap/rowset/alpha_rowset.cpp +++ b/be/src/olap/rowset/alpha_rowset.cpp @@ -30,13 +30,20 @@ AlphaRowset::AlphaRowset(const RowFields& tablet_schema, _rowset_path(rowset_path), _rowset_meta(rowset_meta), _segment_group_size(0), - _is_cumulative_rowset(false) { - Version version = _rowset_meta->version(); - if (version.first == version.second) { - _is_cumulative_rowset = false; - } else { - _is_cumulative_rowset = true; + _is_cumulative_rowset(false), + _is_pending_rowset(false) { + if (!_rowset_meta->has_version()) { + _is_pending_rowset = true; } + if (!_is_pending_rowset) { + Version version = _rowset_meta->version(); + if (version.first == version.second) { + _is_cumulative_rowset = false; + } else { + _is_cumulative_rowset = true; + } + } + } OLAPStatus AlphaRowset::init() { @@ -71,6 +78,31 @@ RowsetMetaSharedPtr AlphaRowset::get_meta() { void AlphaRowset::set_version(Version version) { _rowset_meta->set_version(version); + _is_pending_rowset = false; +} + +bool AlphaRowset::create_hard_links(std::vector<std::string>* success_links) { + for (auto segment_group : _segment_groups) { + bool ret = segment_group->create_hard_links(success_links); + if (!ret) { + LOG(WARNING) << "create hard links failed for segment group:" + << segment_group->segment_group_id(); + return false; + } + } + return true; +} + +bool AlphaRowset::remove_old_files(std::vector<std::string>* removed_links) { + for (auto segment_group : _segment_groups) { + bool ret = segment_group->remove_old_files(removed_links); + if (!ret) { + LOG(WARNING) << "remove old files failed for segment group:" + << segment_group->segment_group_id(); + return false; + } + } + return true; } OLAPStatus AlphaRowset::_init_segment_groups() { diff --git a/be/src/olap/rowset/alpha_rowset.h b/be/src/olap/rowset/alpha_rowset.h index 077ce324..412c4f38 100644 --- a/be/src/olap/rowset/alpha_rowset.h +++ b/be/src/olap/rowset/alpha_rowset.h @@ -48,6 +48,10 @@ class AlphaRowset : public Rowset { virtual void set_version(Version version); + bool create_hard_links(std::vector<std::string>* success_links); + + bool remove_old_files(std::vector<std::string>* removed_links); + private: OLAPStatus _init_segment_groups(); @@ -61,6 +65,7 @@ class AlphaRowset : public Rowset { std::vector<std::shared_ptr<SegmentGroup>> _segment_groups; int _segment_group_size; bool _is_cumulative_rowset; + bool _is_pending_rowset; }; } // namespace doris diff --git a/be/src/olap/rowset/rowset_meta.h b/be/src/olap/rowset/rowset_meta.h index 1a4e6719..fd4926a4 100644 --- a/be/src/olap/rowset/rowset_meta.h +++ b/be/src/olap/rowset/rowset_meta.h @@ -127,6 +127,11 @@ class RowsetMeta { _rowset_meta_pb.set_end_version(version.second); } + virtual bool has_version() { + return _rowset_meta_pb.has_start_version() + && _rowset_meta_pb.has_end_version(); + } + virtual int start_version() { return _rowset_meta_pb.start_version(); } diff --git a/be/src/olap/rowset/segment_group.cpp b/be/src/olap/rowset/segment_group.cpp index 167bb13e..13ae5539 100644 --- a/be/src/olap/rowset/segment_group.cpp +++ b/be/src/olap/rowset/segment_group.cpp @@ -80,7 +80,7 @@ SegmentGroup::SegmentGroup(int64_t tablet_id, int64_t rowset_id, const RowFields _ref_count = 0; _is_pending = false; _partition_id = 0; - _transaction_id = 0; + _txn_id = 0; _short_key_length = 0; _new_short_key_length = 0; _short_key_buf = nullptr; @@ -114,7 +114,7 @@ SegmentGroup::SegmentGroup(int64_t tablet_id, int64_t rowset_id, const RowFields _delete_flag(delete_flag), _segment_group_id(segment_group_id), _num_segments(num_segments), _is_pending(is_pending), _partition_id(partition_id), - _transaction_id(transaction_id) { + _txn_id(transaction_id) { _version = {-1, -1}; _version_hash = 0; _load_id.set_hi(0); @@ -155,7 +155,7 @@ std::string SegmentGroup::_construct_pending_file_path(int32_t segment_id, const std::string pending_dir_path = _rowset_path_prefix + PENDING_DELTA_PREFIX; std::stringstream file_path; file_path << pending_dir_path << "/" - << _transaction_id << "_" + << std::to_string(_rowset_id) << "_" + _txn_id << _segment_group_id << "_" << segment_id << suffix; return file_path.str(); } @@ -688,4 +688,101 @@ int64_t SegmentGroup::get_tablet_id() { return _tablet_id; } +bool SegmentGroup::create_hard_links(std::vector<std::string>* success_links) { + for (int segment_id = 0; segment_id < _num_segments; segment_id++) { + std::string new_data_file_name = construct_data_file_path(segment_id); + if (!check_dir_existed(new_data_file_name)) { + std::string old_data_file_name = construct_old_data_file_path(segment_id); + if (link(new_data_file_name.c_str(), old_data_file_name.c_str()) != 0) { + LOG(WARNING) << "fail to create hard link. from=" << old_data_file_name << ", " + << "to=" << new_data_file_name << ", " << "errno=" << Errno::no(); + return false; + } + } + success_links->push_back(new_data_file_name); + std::string new_index_file_name = construct_index_file_path(segment_id); + if (!check_dir_existed(new_index_file_name)) { + std::string old_index_file_name = construct_old_index_file_path(segment_id); + if (link(new_index_file_name.c_str(), old_index_file_name.c_str()) != 0) { + LOG(WARNING) << "fail to create hard link. from=" << old_index_file_name << ", " + << "to=" << new_index_file_name << ", " << "errno=" << Errno::no(); + return false; + } + } + success_links->push_back(new_index_file_name); + } + return true; +} + +bool SegmentGroup::remove_old_files(std::vector<std::string>* removed_links) { + for (int segment_id = 0; segment_id < _num_segments; segment_id++) { + std::string old_data_file_name = construct_old_data_file_path(segment_id); + OLAPStatus status = remove_dir(old_data_file_name); + if (status != OLAP_SUCCESS) { + return false; + } + removed_links->push_back(old_data_file_name); + std::string old_index_file_name = construct_old_index_file_path(segment_id); + status = remove_dir(old_index_file_name); + if (status != OLAP_SUCCESS) { + return false; + } + removed_links->push_back(old_index_file_name); + } + return true; +} + +std::string SegmentGroup::construct_old_index_file_path(int32_t segment_id) const { + if (_is_pending) { + return _construct_old_pending_file_path(segment_id, ".idx"); + } else { + return _construct_old_file_path(segment_id, ".idx"); + } +} + +std::string SegmentGroup::construct_old_data_file_path(int32_t segment_id) const { + if (_is_pending) { + return _construct_old_pending_file_path(segment_id, ".dat"); + } else { + return _construct_old_file_path(segment_id, ".dat"); + } +} + +std::string SegmentGroup::_construct_old_pending_file_path(int32_t segment_id, + const std::string& suffix) const { + std::string dir_path = _rowset_path_prefix + PENDING_DELTA_PREFIX; + std::stringstream file_path; + file_path << dir_path << "/" + << _txn_id << "_" + << _segment_group_id << "_" << segment_id << suffix; + return file_path.str(); +} + +std::string SegmentGroup::_construct_old_file_path(int32_t segment_id, const std::string& suffix) const { + char file_path[OLAP_MAX_PATH_LEN]; + if (_segment_group_id == -1) { + snprintf(file_path, + sizeof(file_path), + "%s_%ld_%ld_%ld_%d.%s", + _rowset_path_prefix.c_str(), + _version.first, + _version.second, + _version_hash, + segment_id, + suffix.c_str()); + } else { + snprintf(file_path, + sizeof(file_path), + "%s_%ld_%ld_%ld_%d_%d.%s", + _rowset_path_prefix.c_str(), + _version.first, + _version.second, + _version_hash, + _segment_group_id, segment_id, + suffix.c_str()); + } + + return file_path; +} + } // namespace doris diff --git a/be/src/olap/rowset/segment_group.h b/be/src/olap/rowset/segment_group.h index 3b6a6885..f54ac00a 100644 --- a/be/src/olap/rowset/segment_group.h +++ b/be/src/olap/rowset/segment_group.h @@ -161,7 +161,7 @@ class SegmentGroup { inline void set_pending_finished() { _is_pending = false; } inline TPartitionId partition_id() const { return _partition_id; } - inline TTransactionId transaction_id() const { return _transaction_id; } + inline TTransactionId transaction_id() const { return _txn_id; } inline bool delete_flag() const { return _delete_flag; } @@ -227,8 +227,14 @@ class SegmentGroup { return _index.get_null_supported(seg_id); } - std::string construct_index_file_path(int32_t segment) const; - std::string construct_data_file_path(int32_t segment) const; + std::string construct_index_file_path(int32_t segment_id) const; + std::string construct_data_file_path(int32_t segment_id) const; + + // these two functions are for compatible, and will be deleted later + // so it is better not to use it. + std::string construct_old_index_file_path(int32_t segment_id) const; + std::string construct_old_data_file_path(int32_t segment_id) const; + size_t current_num_rows_per_row_block() const; void publish_version(Version version, VersionHash version_hash); @@ -244,10 +250,18 @@ class SegmentGroup { int64_t get_tablet_id(); + bool create_hard_links(std::vector<std::string>* success_links); + + bool remove_old_files(std::vector<std::string>* removed_links); + private: - std::string _construct_pending_file_path(int32_t segment, const std::string& suffix) const; + std::string _construct_pending_file_path(int32_t segment_id, const std::string& suffix) const; + + std::string _construct_file_path(int32_t segment_id, const std::string& suffix) const; + + std::string _construct_old_pending_file_path(int32_t segment_id, const std::string& suffix) const; - std::string _construct_file_path(int32_t segment, const std::string& suffix) const; + std::string _construct_old_file_path(int32_t segment_id, const std::string& suffix) const; private: int64_t _tablet_id; @@ -268,7 +282,7 @@ class SegmentGroup { MemIndex _index; bool _is_pending; TPartitionId _partition_id; - TTransactionId _transaction_id; + TTransactionId _txn_id; // short key对应的field_info数组 RowFields _short_key_info_list; ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org