chaoyli closed pull request #492: Modify segment file name URL: https://github.com/apache/incubator-doris/pull/492
This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index 825aaf80..a33959b9 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -551,7 +551,6 @@ add_subdirectory(${SRC_DIR}/util) add_subdirectory(${SRC_DIR}/gen_cpp) add_subdirectory(${SRC_DIR}/gutil) add_subdirectory(${SRC_DIR}/olap) -add_subdirectory(${SRC_DIR}/olap/rowset) add_subdirectory(${SRC_DIR}/agent) add_subdirectory(${SRC_DIR}/http) add_subdirectory(${SRC_DIR}/service) diff --git a/be/src/olap/CMakeLists.txt b/be/src/olap/CMakeLists.txt index f0e18335..c97f74f8 100644 --- a/be/src/olap/CMakeLists.txt +++ b/be/src/olap/CMakeLists.txt @@ -72,10 +72,7 @@ add_library(Olap STATIC types.cpp utils.cpp wrapper_field.cpp - rowset/rowset_meta_manager.cpp - rowset/alpha_rowset.cpp - rowset/alpha_rowset_reader.cpp - rowset/alpha_rowset_builder.cpp - rowset/alpha_rowset_meta.cpp task/engine_schema_change_task.cpp ) + +add_subdirectory(rowset) diff --git a/be/src/olap/rowset/alpha_rowset.cpp b/be/src/olap/rowset/alpha_rowset.cpp index 6c692852..7caccf25 100644 --- a/be/src/olap/rowset/alpha_rowset.cpp +++ b/be/src/olap/rowset/alpha_rowset.cpp @@ -81,7 +81,7 @@ NewStatus AlphaRowset::_init_segment_groups() { Version version = _rowset_meta->get_version(); int64_t version_hash = _rowset_meta->get_version_hash(); std::shared_ptr<SegmentGroup> segment_group(new SegmentGroup(_rowset_meta->get_tablet_id(), - _tablet_schema, _num_key_fields, _num_short_key_fields, + _rowset_meta->get_rowset_id(), _tablet_schema, _num_key_fields, _num_short_key_fields, _num_rows_per_row_block, _rowset_path, version, version_hash, false, segment_group_meta.segment_group_id(), segment_group_meta.num_segments())); if (segment_group.get() == nullptr) { diff --git a/be/src/olap/rowset/alpha_rowset_builder.cpp b/be/src/olap/rowset/alpha_rowset_builder.cpp index cdb75702..4851aef6 100644 --- a/be/src/olap/rowset/alpha_rowset_builder.cpp +++ b/be/src/olap/rowset/alpha_rowset_builder.cpp @@ -93,6 +93,7 @@ std::shared_ptr<Rowset> AlphaRowsetBuilder::build() { void AlphaRowsetBuilder::_init() { _segment_group_id++; _cur_segment_group = new SegmentGroup(_rowset_builder_context.tablet_id, + _rowset_builder_context.rowset_id, _rowset_builder_context.tablet_schema, _rowset_builder_context.num_key_fields, _rowset_builder_context.num_short_key_fields, diff --git a/be/src/olap/rowset/column_data.cpp b/be/src/olap/rowset/column_data.cpp index e27707a3..905ea819 100644 --- a/be/src/olap/rowset/column_data.cpp +++ b/be/src/olap/rowset/column_data.cpp @@ -127,7 +127,7 @@ OLAPStatus ColumnData::_seek_to_block(const RowBlockPosition& block_pos, bool wi _segment_reader = new(std::nothrow) SegmentReader( file_name, segment_group(), block_pos.segment, _seek_columns, _load_bf_columns, _conditions, - _col_predicates, _delete_handler, _delete_status, _runtime_state, _stats, _lru_cache); + _delete_handler, _delete_status, _runtime_state, _stats, _lru_cache); if (_segment_reader == nullptr) { OLAP_LOG_WARNING("fail to malloc segment reader."); return OLAP_ERR_MALLOC_ERROR; diff --git a/be/src/olap/rowset/rowset_meta.h b/be/src/olap/rowset/rowset_meta.h index 50a13c61..6a001db4 100644 --- a/be/src/olap/rowset/rowset_meta.h +++ b/be/src/olap/rowset/rowset_meta.h @@ -42,6 +42,11 @@ class RowsetMeta { return _deserialize_from_pb(pb_rowset_meta); } + virtual bool init_from_pb(const RowsetMetaPB& rowset_meta_pb) { + _rowset_meta_pb = rowset_meta_pb; + return true; + } + virtual bool init_from_json(const std::string& json_rowset_meta) { bool ret = json2pb::JsonToProtoMessage(json_rowset_meta, &_rowset_meta_pb); return ret; diff --git a/be/src/olap/rowset/segment_group.cpp b/be/src/olap/rowset/segment_group.cpp index 284f87d2..167bb13e 100644 --- a/be/src/olap/rowset/segment_group.cpp +++ b/be/src/olap/rowset/segment_group.cpp @@ -60,10 +60,12 @@ namespace doris { } \ } while (0); -SegmentGroup::SegmentGroup(int64_t tablet_id, const RowFields& tablet_schema, int num_key_fields, int num_short_key_fields, - size_t num_rows_per_row_block, const std::string& rowset_path_prefix, Version version, VersionHash version_hash, +SegmentGroup::SegmentGroup(int64_t tablet_id, int64_t rowset_id, const RowFields& tablet_schema, + int num_key_fields, int num_short_key_fields, size_t num_rows_per_row_block, + const std::string& rowset_path_prefix, Version version, VersionHash version_hash, bool delete_flag, int32_t segment_group_id, int32_t num_segments) : _tablet_id(tablet_id), + _rowset_id(rowset_id), _tablet_schema(tablet_schema), _num_key_fields(num_key_fields), _num_short_key_fields(num_short_key_fields), @@ -98,10 +100,12 @@ SegmentGroup::SegmentGroup(int64_t tablet_id, const RowFields& tablet_schema, in } } -SegmentGroup::SegmentGroup(int64_t tablet_id, const RowFields& tablet_schema, int num_key_fields, int num_short_key_fields, - size_t num_rows_per_row_block, const std::string& rowset_path_prefix, bool delete_flag, +SegmentGroup::SegmentGroup(int64_t tablet_id, int64_t rowset_id, const RowFields& tablet_schema, + int num_key_fields, int num_short_key_fields, size_t num_rows_per_row_block, + const std::string& rowset_path_prefix, bool delete_flag, int32_t segment_group_id, int32_t num_segments, bool is_pending, TPartitionId partition_id, TTransactionId transaction_id) : _tablet_id(tablet_id), + _rowset_id(rowset_id), _tablet_schema(tablet_schema), _num_key_fields(num_key_fields), _num_short_key_fields(num_short_key_fields), @@ -159,30 +163,8 @@ std::string SegmentGroup::_construct_pending_file_path(int32_t segment_id, const std::string SegmentGroup::_construct_file_path(int32_t segment_id, const string& suffix) const { std::stringstream prefix_stream; prefix_stream << _rowset_path_prefix << "/" << _tablet_id; - string path_prefix = prefix_stream.str(); - char file_path[OLAP_MAX_PATH_LEN]; - if (_segment_group_id == -1) { - snprintf(file_path, - sizeof(file_path), - "%s_%ld_%ld_%ld_%d.%s", - path_prefix.c_str(), - _version.first, - _version.second, - _version_hash, - segment_id, - suffix.c_str()); - } else { - snprintf(file_path, - sizeof(file_path), - "%s_%ld_%ld_%ld_%d_%d.%s", - path_prefix.c_str(), - _version.first, - _version.second, - _version_hash, - _segment_group_id, segment_id, - suffix.c_str()); - } - + std::string file_path = _rowset_path_prefix + "/" + std::to_string(_rowset_id) + + "_" + std::to_string(segment_id) + suffix; return file_path; } diff --git a/be/src/olap/rowset/segment_group.h b/be/src/olap/rowset/segment_group.h index ee169bd7..3b6a6885 100644 --- a/be/src/olap/rowset/segment_group.h +++ b/be/src/olap/rowset/segment_group.h @@ -47,12 +47,14 @@ namespace doris { class SegmentGroup { friend class MemIndex; public: - SegmentGroup(int64_t tablet_id, const RowFields& tablet_schema, int num_key_fields, int num_short_key_fields, - size_t num_rows_per_row_block, const std::string& rowset_path_prefix, Version version, + SegmentGroup(int64_t tablet_id, int64_t rowset_id, const RowFields& tablet_schema, + int num_key_fields, int num_short_key_fields, size_t num_rows_per_row_block, + const std::string& rowset_path_prefix, Version version, VersionHash version_hash, bool delete_flag, int segment_group_id, int32_t num_segments); - SegmentGroup(int64_t tablet_id, const RowFields& tablet_schema, int num_key_fields, int num_short_key_fields, - size_t num_rows_per_row_block, const std::string& rowset_path_prefix, bool delete_flag, + SegmentGroup(int64_t tablet_id, int64_t rowset_id, const RowFields& tablet_schema, + int num_key_fields, int num_short_key_fields, size_t num_rows_per_row_block, + const std::string& rowset_path_prefix, bool delete_flag, int32_t segment_group_id, int32_t num_segments, bool is_pending, TPartitionId partition_id, TTransactionId transaction_id); @@ -249,6 +251,7 @@ class SegmentGroup { private: int64_t _tablet_id; + int64_t _rowset_id; const RowFields& _tablet_schema; // tablet schema int _num_key_fields; // number of tablet keys int _num_short_key_fields; // number of tablet short keys diff --git a/be/src/olap/rowset/segment_reader.cpp b/be/src/olap/rowset/segment_reader.cpp index 51051b31..4aab5518 100644 --- a/be/src/olap/rowset/segment_reader.cpp +++ b/be/src/olap/rowset/segment_reader.cpp @@ -39,7 +39,6 @@ SegmentReader::SegmentReader( const std::vector<uint32_t>& used_columns, const std::set<uint32_t>& load_bf_columns, const Conditions* conditions, - const std::vector<ColumnPredicate*>* col_predicates, const DeleteHandler& delete_handler, const DelCondSatisfied delete_status, RuntimeState* runtime_state, diff --git a/be/src/olap/rowset/segment_reader.h b/be/src/olap/rowset/segment_reader.h index 2ce33c6c..9f33b35b 100644 --- a/be/src/olap/rowset/segment_reader.h +++ b/be/src/olap/rowset/segment_reader.h @@ -54,7 +54,6 @@ class SegmentReader { const std::vector<uint32_t>& used_columns, const std::set<uint32_t>& load_bf_columns, const Conditions* conditions, - const std::vector<ColumnPredicate*>* col_predicates, const DeleteHandler& delete_handler, const DelCondSatisfied delete_status, RuntimeState* runtime_state, @@ -289,11 +288,24 @@ class SegmentReader { std::string _file_name; // 文件名 SegmentGroup* _segment_group; uint32_t _segment_id; + // columns that can be used by client. when client seek to range's start or end, + // client may read more columns than normal read. + // For example: + // table1's schema is 'k1, k2, v1'. which k1, k2 is key column, v1 is value column. + // for query 'select sum(v1) from table1', client split all data to sub-range in logical, + // so, one sub-range need to seek to right position with k1 and k2; then only read v1. + // In this situation, _used_columns contains (k1, k2, v1) + std::vector<uint32_t> _used_columns; + UniqueIdSet _load_bf_columns; + const Conditions* _conditions; // 列过滤条件 doris::FileHandler _file_handler; // 文件handler - const Conditions* _conditions; // 列过滤条件 + DeleteHandler _delete_handler; DelCondSatisfied _delete_status; + RuntimeState* _runtime_state; // 用于统计内存消耗等运行时信息 + OlapReaderStatistics* _stats; + Cache* _lru_cache; bool _eof; // eof标志 @@ -313,19 +325,10 @@ class SegmentReader { bool _null_supported; uint64_t _header_length; // Header(FixHeader+PB)大小,读数据时需要偏移 - // columns that can be used by client. when client seek to range's start or end, - // client may read more columns than normal read. - // For example: - // table1's schema is 'k1, k2, v1'. which k1, k2 is key column, v1 is value column. - // for query 'select sum(v1) from table1', client split all data to sub-range in logical, - // so, one sub-range need to seek to right position with k1 and k2; then only read v1. - // In this situation, _used_columns contains (k1, k2, v1) - std::vector<uint32_t> _used_columns; std::vector<ColumnReader*> _column_readers; // 实际的数据读取器 std::vector<StreamIndexReader*> _column_indices; // 保存column的index UniqueIdSet _include_columns; // 用于判断该列是不是被包含 - UniqueIdSet _load_bf_columns; UniqueIdSet _include_bf_columns; UniqueIdToColumnIdMap _tablet_id_to_unique_id_map; // tablet id到unique id的映射 UniqueIdToColumnIdMap _unique_id_to_tablet_id_map; // unique id到tablet id的映射 @@ -354,21 +357,17 @@ class SegmentReader { bool _is_data_loaded; size_t _buffer_size; - Cache* _lru_cache; std::vector<Cache::Handle*> _cache_handle; const FileHeader<ColumnDataHeaderMessage>* _file_header; std::unique_ptr<MemTracker> _tracker; std::unique_ptr<MemPool> _mem_pool; - RuntimeState* _runtime_state; // 用于统计内存消耗等运行时信息 StorageByteBuffer* _shared_buffer; // Set when seek_to_block is called, valid until next seek_to_block is called. bool _without_filter = false; - OlapReaderStatistics* _stats; - DISALLOW_COPY_AND_ASSIGN(SegmentReader); }; ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org