yiguolei commented on code in PR #28103:
URL: https://github.com/apache/doris/pull/28103#discussion_r1435918560


##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -430,6 +456,53 @@ Status SegmentIterator::_get_row_ranges_by_keys() {
     return Status::OK();
 }
 
+Status SegmentIterator::_prepare_seek(const StorageReadOptions::SplitKeyRange& 
key_range) {
+    std::vector<const Field*> key_fields;
+    std::set<uint32_t> column_set;
+    for (auto cid : key_range.lower_key.schema->column_ids()) {
+        column_set.emplace(cid);
+        key_fields.emplace_back(key_range.lower_key.schema->column(cid));
+    }
+
+    for (auto cid : key_range.upper_key.schema->column_ids()) {
+        if (!column_set.contains(cid)) {
+            key_fields.emplace_back(key_range.upper_key.schema->column(cid));
+            column_set.emplace(cid);
+        }
+    }
+
+    if (!_seek_schema) {
+        _seek_schema = std::make_unique<Schema>(key_fields, key_fields.size());
+    }
+    // todo(wb) need refactor here, when using pk to search, _seek_block is 
useless
+    if (_seek_block.empty()) {
+        _seek_block.resize(_seek_schema->num_column_ids());
+        int i = 0;
+        for (auto cid : _seek_schema->column_ids()) {
+            const auto* column_desc = _seek_schema->column(cid);
+            _seek_block[i] = Schema::get_column_by_field(*column_desc);
+            i++;
+        }
+    }
+
+    // create used column iterator
+    for (auto cid : _seek_schema->column_ids()) {
+        if (_column_iterators[cid] == nullptr) {
+            
RETURN_IF_ERROR(_segment->new_column_iterator(_opts.tablet_schema->column(cid),
+                                                          
&_column_iterators[cid], &_opts));
+            ColumnIteratorOptions iter_opts {
+                    .use_page_cache = _opts.use_page_cache,
+                    .file_reader = _file_reader.get(),
+                    .stats = _opts.stats,
+                    .io_ctx = _opts.io_ctx,
+            };
+            RETURN_IF_ERROR(_column_iterators[cid]->init(iter_opts));
+        }
+    }
+
+    return Status::OK();
+}

Review Comment:
   parallel build
   1. 拿到where 条件里的 key range
   2. 我们对key range 拆分,拆分成多个, 此时 多个key range ==  原来的key range。
   3. key range 、 row range 下推到 rowset reader 和 segment iterator ---



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to