chaoyli closed pull request #492: Modify segment file name
URL: https://github.com/apache/incubator-doris/pull/492
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index 825aaf80..a33959b9 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -551,7 +551,6 @@ add_subdirectory(${SRC_DIR}/util)
 add_subdirectory(${SRC_DIR}/gen_cpp)
 add_subdirectory(${SRC_DIR}/gutil)
 add_subdirectory(${SRC_DIR}/olap)
-add_subdirectory(${SRC_DIR}/olap/rowset)
 add_subdirectory(${SRC_DIR}/agent)
 add_subdirectory(${SRC_DIR}/http)
 add_subdirectory(${SRC_DIR}/service)
diff --git a/be/src/olap/CMakeLists.txt b/be/src/olap/CMakeLists.txt
index f0e18335..c97f74f8 100644
--- a/be/src/olap/CMakeLists.txt
+++ b/be/src/olap/CMakeLists.txt
@@ -72,10 +72,7 @@ add_library(Olap STATIC
     types.cpp 
     utils.cpp
     wrapper_field.cpp
-    rowset/rowset_meta_manager.cpp
-    rowset/alpha_rowset.cpp
-    rowset/alpha_rowset_reader.cpp
-    rowset/alpha_rowset_builder.cpp
-    rowset/alpha_rowset_meta.cpp
     task/engine_schema_change_task.cpp
 )
+
+add_subdirectory(rowset)
diff --git a/be/src/olap/rowset/alpha_rowset.cpp 
b/be/src/olap/rowset/alpha_rowset.cpp
index 6c692852..7caccf25 100644
--- a/be/src/olap/rowset/alpha_rowset.cpp
+++ b/be/src/olap/rowset/alpha_rowset.cpp
@@ -81,7 +81,7 @@ NewStatus AlphaRowset::_init_segment_groups() {
         Version version = _rowset_meta->get_version();
         int64_t version_hash = _rowset_meta->get_version_hash();
         std::shared_ptr<SegmentGroup> segment_group(new 
SegmentGroup(_rowset_meta->get_tablet_id(),
-                _tablet_schema, _num_key_fields, _num_short_key_fields,
+                _rowset_meta->get_rowset_id(), _tablet_schema, 
_num_key_fields, _num_short_key_fields,
                 _num_rows_per_row_block, _rowset_path, version, version_hash,
                 false, segment_group_meta.segment_group_id(), 
segment_group_meta.num_segments()));
         if (segment_group.get() == nullptr) {
diff --git a/be/src/olap/rowset/alpha_rowset_builder.cpp 
b/be/src/olap/rowset/alpha_rowset_builder.cpp
index cdb75702..4851aef6 100644
--- a/be/src/olap/rowset/alpha_rowset_builder.cpp
+++ b/be/src/olap/rowset/alpha_rowset_builder.cpp
@@ -93,6 +93,7 @@ std::shared_ptr<Rowset> AlphaRowsetBuilder::build() {
 void AlphaRowsetBuilder::_init() {
     _segment_group_id++;
     _cur_segment_group = new SegmentGroup(_rowset_builder_context.tablet_id,
+            _rowset_builder_context.rowset_id,
             _rowset_builder_context.tablet_schema,
             _rowset_builder_context.num_key_fields,
             _rowset_builder_context.num_short_key_fields,
diff --git a/be/src/olap/rowset/column_data.cpp 
b/be/src/olap/rowset/column_data.cpp
index e27707a3..905ea819 100644
--- a/be/src/olap/rowset/column_data.cpp
+++ b/be/src/olap/rowset/column_data.cpp
@@ -127,7 +127,7 @@ OLAPStatus ColumnData::_seek_to_block(const 
RowBlockPosition& block_pos, bool wi
         _segment_reader = new(std::nothrow) SegmentReader(
                 file_name, segment_group(),  block_pos.segment,
                 _seek_columns, _load_bf_columns, _conditions,
-                _col_predicates, _delete_handler, _delete_status, 
_runtime_state, _stats, _lru_cache);
+                _delete_handler, _delete_status, _runtime_state, _stats, 
_lru_cache);
         if (_segment_reader == nullptr) {
             OLAP_LOG_WARNING("fail to malloc segment reader.");
             return OLAP_ERR_MALLOC_ERROR;
diff --git a/be/src/olap/rowset/rowset_meta.h b/be/src/olap/rowset/rowset_meta.h
index 50a13c61..6a001db4 100644
--- a/be/src/olap/rowset/rowset_meta.h
+++ b/be/src/olap/rowset/rowset_meta.h
@@ -42,6 +42,11 @@ class RowsetMeta {
         return _deserialize_from_pb(pb_rowset_meta);
     }
 
+    virtual bool init_from_pb(const RowsetMetaPB& rowset_meta_pb) {
+        _rowset_meta_pb = rowset_meta_pb;
+        return true;
+    }
+
     virtual bool init_from_json(const std::string& json_rowset_meta) {
         bool ret = json2pb::JsonToProtoMessage(json_rowset_meta, 
&_rowset_meta_pb);
         return ret;
diff --git a/be/src/olap/rowset/segment_group.cpp 
b/be/src/olap/rowset/segment_group.cpp
index 284f87d2..167bb13e 100644
--- a/be/src/olap/rowset/segment_group.cpp
+++ b/be/src/olap/rowset/segment_group.cpp
@@ -60,10 +60,12 @@ namespace doris {
         } \
     } while (0);
 
-SegmentGroup::SegmentGroup(int64_t tablet_id, const RowFields& tablet_schema, 
int num_key_fields, int num_short_key_fields,
-            size_t num_rows_per_row_block, const std::string& 
rowset_path_prefix, Version version, VersionHash version_hash,
+SegmentGroup::SegmentGroup(int64_t tablet_id, int64_t rowset_id, const 
RowFields& tablet_schema,
+            int num_key_fields, int num_short_key_fields, size_t 
num_rows_per_row_block,
+            const std::string& rowset_path_prefix, Version version, 
VersionHash version_hash,
             bool delete_flag, int32_t segment_group_id, int32_t num_segments)
       : _tablet_id(tablet_id),
+        _rowset_id(rowset_id),
         _tablet_schema(tablet_schema),
         _num_key_fields(num_key_fields),
         _num_short_key_fields(num_short_key_fields),
@@ -98,10 +100,12 @@ SegmentGroup::SegmentGroup(int64_t tablet_id, const 
RowFields& tablet_schema, in
     }
 }
 
-SegmentGroup::SegmentGroup(int64_t tablet_id, const RowFields& tablet_schema, 
int num_key_fields, int num_short_key_fields,
-        size_t num_rows_per_row_block, const std::string& rowset_path_prefix, 
bool delete_flag,
+SegmentGroup::SegmentGroup(int64_t tablet_id, int64_t rowset_id, const 
RowFields& tablet_schema,
+        int num_key_fields, int num_short_key_fields, size_t 
num_rows_per_row_block,
+        const std::string& rowset_path_prefix, bool delete_flag,
         int32_t segment_group_id, int32_t num_segments, bool is_pending,
         TPartitionId partition_id, TTransactionId transaction_id) : 
_tablet_id(tablet_id),
+        _rowset_id(rowset_id),
         _tablet_schema(tablet_schema),
         _num_key_fields(num_key_fields),
         _num_short_key_fields(num_short_key_fields),
@@ -159,30 +163,8 @@ std::string 
SegmentGroup::_construct_pending_file_path(int32_t segment_id, const
 std::string SegmentGroup::_construct_file_path(int32_t segment_id, const 
string& suffix) const {
     std::stringstream prefix_stream;
     prefix_stream << _rowset_path_prefix << "/" << _tablet_id;
-    string path_prefix = prefix_stream.str();
-    char file_path[OLAP_MAX_PATH_LEN];
-    if (_segment_group_id == -1) {
-        snprintf(file_path,
-                 sizeof(file_path),
-                 "%s_%ld_%ld_%ld_%d.%s",
-                 path_prefix.c_str(),
-                 _version.first,
-                 _version.second,
-                 _version_hash,
-                 segment_id,
-                 suffix.c_str());
-    } else {
-        snprintf(file_path,
-                 sizeof(file_path),
-                 "%s_%ld_%ld_%ld_%d_%d.%s",
-                 path_prefix.c_str(),
-                 _version.first,
-                 _version.second,
-                 _version_hash,
-                 _segment_group_id, segment_id,
-                 suffix.c_str());
-    }
-
+    std::string file_path = _rowset_path_prefix + "/" + 
std::to_string(_rowset_id)
+            + "_" + std::to_string(segment_id) + suffix;
     return file_path;
 }
 
diff --git a/be/src/olap/rowset/segment_group.h 
b/be/src/olap/rowset/segment_group.h
index ee169bd7..3b6a6885 100644
--- a/be/src/olap/rowset/segment_group.h
+++ b/be/src/olap/rowset/segment_group.h
@@ -47,12 +47,14 @@ namespace doris {
 class SegmentGroup {
     friend class MemIndex;
 public:
-    SegmentGroup(int64_t tablet_id, const RowFields& tablet_schema, int 
num_key_fields, int num_short_key_fields,
-            size_t num_rows_per_row_block, const std::string& 
rowset_path_prefix, Version version,
+    SegmentGroup(int64_t tablet_id, int64_t rowset_id, const RowFields& 
tablet_schema,
+            int num_key_fields, int num_short_key_fields, size_t 
num_rows_per_row_block,
+            const std::string& rowset_path_prefix, Version version,
             VersionHash version_hash, bool delete_flag, int segment_group_id, 
int32_t num_segments);
 
-    SegmentGroup(int64_t tablet_id, const RowFields& tablet_schema, int 
num_key_fields, int num_short_key_fields,
-            size_t num_rows_per_row_block, const std::string& 
rowset_path_prefix, bool delete_flag,
+    SegmentGroup(int64_t tablet_id, int64_t rowset_id, const RowFields& 
tablet_schema,
+            int num_key_fields, int num_short_key_fields, size_t 
num_rows_per_row_block,
+            const std::string& rowset_path_prefix, bool delete_flag,
             int32_t segment_group_id, int32_t num_segments, bool is_pending,
             TPartitionId partition_id, TTransactionId transaction_id);
 
@@ -249,6 +251,7 @@ class SegmentGroup {
 
 private:
     int64_t _tablet_id;
+    int64_t _rowset_id;
     const RowFields& _tablet_schema;    // tablet schema
     int _num_key_fields;    // number of tablet keys
     int _num_short_key_fields;  // number of tablet short keys
diff --git a/be/src/olap/rowset/segment_reader.cpp 
b/be/src/olap/rowset/segment_reader.cpp
index 51051b31..4aab5518 100644
--- a/be/src/olap/rowset/segment_reader.cpp
+++ b/be/src/olap/rowset/segment_reader.cpp
@@ -39,7 +39,6 @@ SegmentReader::SegmentReader(
         const std::vector<uint32_t>& used_columns,
         const std::set<uint32_t>& load_bf_columns,
         const Conditions* conditions,
-        const std::vector<ColumnPredicate*>* col_predicates,
         const DeleteHandler& delete_handler,
         const DelCondSatisfied delete_status,
         RuntimeState* runtime_state,
diff --git a/be/src/olap/rowset/segment_reader.h 
b/be/src/olap/rowset/segment_reader.h
index 2ce33c6c..9f33b35b 100644
--- a/be/src/olap/rowset/segment_reader.h
+++ b/be/src/olap/rowset/segment_reader.h
@@ -54,7 +54,6 @@ class SegmentReader {
             const std::vector<uint32_t>& used_columns,
             const std::set<uint32_t>& load_bf_columns,
             const Conditions* conditions,
-            const std::vector<ColumnPredicate*>* col_predicates,
             const DeleteHandler& delete_handler,
             const DelCondSatisfied delete_status,
             RuntimeState* runtime_state,
@@ -289,11 +288,24 @@ class SegmentReader {
     std::string _file_name;                // 文件名
     SegmentGroup* _segment_group;
     uint32_t _segment_id;
+    // columns that can be used by client. when client seek to range's start 
or end,
+    // client may read more columns than normal read.
+    // For example: 
+    //  table1's schema is 'k1, k2, v1'. which k1, k2 is key column, v1 is 
value column.
+    //  for query 'select sum(v1) from table1', client split all data to 
sub-range in logical,
+    //  so, one sub-range need to seek to right position with k1 and k2; then 
only read v1.
+    //  In this situation, _used_columns contains (k1, k2, v1)
+    std::vector<uint32_t> _used_columns;
+    UniqueIdSet _load_bf_columns;
+    const Conditions* _conditions;         // 列过滤条件
     doris::FileHandler _file_handler;             // 文件handler
 
-    const Conditions* _conditions;         // 列过滤条件
+    
     DeleteHandler _delete_handler;
     DelCondSatisfied _delete_status;
+    RuntimeState* _runtime_state;  // 用于统计内存消耗等运行时信息
+    OlapReaderStatistics* _stats;
+    Cache* _lru_cache;
 
     bool _eof;                             // eof标志
 
@@ -313,19 +325,10 @@ class SegmentReader {
     bool _null_supported;
     uint64_t _header_length;           // Header(FixHeader+PB)大小,读数据时需要偏移
 
-    // columns that can be used by client. when client seek to range's start 
or end,
-    // client may read more columns than normal read.
-    // For example: 
-    //  table1's schema is 'k1, k2, v1'. which k1, k2 is key column, v1 is 
value column.
-    //  for query 'select sum(v1) from table1', client split all data to 
sub-range in logical,
-    //  so, one sub-range need to seek to right position with k1 and k2; then 
only read v1.
-    //  In this situation, _used_columns contains (k1, k2, v1)
-    std::vector<uint32_t> _used_columns;
     std::vector<ColumnReader*> _column_readers;    // 实际的数据读取器
     std::vector<StreamIndexReader*> _column_indices; // 保存column的index
 
     UniqueIdSet _include_columns;           // 用于判断该列是不是被包含
-    UniqueIdSet _load_bf_columns;
     UniqueIdSet _include_bf_columns;
     UniqueIdToColumnIdMap _tablet_id_to_unique_id_map; // tablet id到unique 
id的映射
     UniqueIdToColumnIdMap _unique_id_to_tablet_id_map; // unique id到tablet 
id的映射
@@ -354,21 +357,17 @@ class SegmentReader {
     bool _is_data_loaded;
     size_t _buffer_size;
 
-    Cache* _lru_cache;
     std::vector<Cache::Handle*> _cache_handle;
     const FileHeader<ColumnDataHeaderMessage>* _file_header;
 
     std::unique_ptr<MemTracker> _tracker;
     std::unique_ptr<MemPool> _mem_pool;
 
-    RuntimeState* _runtime_state;  // 用于统计内存消耗等运行时信息
     StorageByteBuffer* _shared_buffer;
 
     // Set when seek_to_block is called, valid until next seek_to_block is 
called.
     bool _without_filter = false;
 
-    OlapReaderStatistics* _stats;
-
     DISALLOW_COPY_AND_ASSIGN(SegmentReader);
 };
 


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org
For additional commands, e-mail: dev-h...@doris.apache.org

Reply via email to