yiguolei commented on code in PR #51271:
URL: https://github.com/apache/doris/pull/51271#discussion_r2119322858


##########
be/src/olap/rowset/beta_rowset.cpp:
##########
@@ -70,23 +74,83 @@ Status BetaRowset::init() {
 }
 
 Status BetaRowset::get_segment_num_rows(std::vector<uint32_t>* segment_rows) {
-    DCHECK(_rowset_state_machine.rowset_state() == ROWSET_LOADED);
+    RETURN_IF_ERROR(load_segments_info());
+    segment_rows->assign(_segments_rows.cbegin(), _segments_rows.cend());
+    return Status::OK();
+}
+
+Status BetaRowset::load_segments_info() {
+    return _load_segments_info_once.call([this] {
+        if (_rowset_state_machine.rowset_state() != ROWSET_LOADED) {
+            return Status::InternalError(
+                    "rowset {} was not loaded({})", rowset_id().to_string(),
+                    
static_cast<int32_t>(_rowset_state_machine.rowset_state()));
+        }
 
-    RETURN_IF_ERROR(_load_segment_rows_once.call([this] {
         auto segment_count = num_segments();
         _segments_rows.resize(segment_count);
+
+        int64_t invalid_segment_id = -1;
+        std::string invalid_reason;
+        size_t columns_has_zone_map_count = 0;
+        DorisMap<uint32_t, std::vector<ZoneMapPB>> columns_zone_maps;
+        std::map<uint32_t, std::pair<FieldType, int32_t>> 
columns_type_and_length;
+
         for (int64_t i = 0; i != segment_count; ++i) {
             SegmentCacheHandle segment_cache_handle;
             RETURN_IF_ERROR(SegmentLoader::instance()->load_segment(
                     std::static_pointer_cast<BetaRowset>(shared_from_this()), 
i,
                     &segment_cache_handle, false, false));
-            const auto& tmp_segments = segment_cache_handle.get_segments();
-            _segments_rows[i] = tmp_segments[0]->num_rows();
+            const auto& segment = segment_cache_handle.get_segments()[0];
+            _segments_rows[i] = segment->num_rows();
+
+            if (config::cache_zone_map_max_columns_count <= 0) {
+                continue;
+            }
+
+            auto zone_maps = segment->get_zone_maps();
+            if (zone_maps.empty()) {
+                if (_segments_rows[i] != 0) {
+                    invalid_segment_id = i;
+                    invalid_reason = "has no zonemap data";
+                }
+                continue;
+            }
+
+            if (invalid_segment_id != -1) {
+                continue;
+            }
+
+            if (columns_has_zone_map_count == 0) {
+                columns_has_zone_map_count = zone_maps.size();
+            } else if (columns_has_zone_map_count != zone_maps.size()) {
+                invalid_segment_id = i;
+                invalid_reason = fmt::format("zone maps count not matched {} 
vs {}",
+                                             columns_has_zone_map_count, 
zone_maps.size());
+                continue;
+            }
+
+            for (auto&& [uid, zone_map] : zone_maps) {
+                
columns_zone_maps[uid].emplace_back(std::move(zone_map.zone_map));
+                columns_type_and_length[uid].first = zone_map.field_type;
+                columns_type_and_length[uid].second = zone_map.field_length;
+            }
+        }
+
+        if (invalid_segment_id != -1) {
+            LOG(INFO) << "has invalid segment, rowset_id=" << rowset_id()
+                      << ", segment id: " << invalid_segment_id
+                      << ", skip build rowset level zonemap, reason: " << 
invalid_reason;
+        } else {
+            for (auto&& [col_uid, zone_maps] : columns_zone_maps) {
+                auto st = ColumnReader::merge_zone_maps(

Review Comment:
   这里得check 一下,每列的zonemap的数量 == segment的数量;防止出现有的列在segment 1,2 
上有zonemap,有的列在segment 2,3 有zonemap的现象



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to