morningman commented on code in PR #47025: URL: https://github.com/apache/doris/pull/47025#discussion_r1946577050
########## be/src/vec/exec/scan/vfile_scanner.cpp: ########## @@ -174,6 +187,113 @@ Status VFileScanner::prepare(RuntimeState* state, const VExprContextSPtrs& conju return Status::OK(); } +// check if the expr is a partition pruning expr +bool VFileScanner::_check_partition_pruning_expr(const VExprSPtr& expr) { + if (expr->is_slot_ref()) { + auto* slot_ref = static_cast<VSlotRef*>(expr.get()); + return _partition_slot_index_map.find(slot_ref->slot_id()) != + _partition_slot_index_map.end(); + } + if (expr->is_literal()) { + return true; + } + return std::ranges::all_of(expr->children(), [this](const auto& child) { + return _check_partition_pruning_expr(child); + }); +} + +void VFileScanner::_init_runtime_filter_partition_pruning_ctxs() { + if (_partition_slot_index_map.empty()) { + return; + } + _runtime_filter_partition_pruning_ctxs.clear(); + for (auto& conjunct : _conjuncts) { + auto impl = conjunct->root()->get_impl(); + // If impl is not null, which means this a conjuncts from runtime filter. + auto expr = impl ? impl : conjunct->root(); + if (_check_partition_pruning_expr(expr)) { + _runtime_filter_partition_pruning_ctxs.emplace_back(conjunct); + } + } + // init block with empty column + for (auto const* slot_desc : _real_tuple_desc->slots()) { + if (!slot_desc->need_materialize()) { + // should be ignored from reading + continue; + } + _runtime_filter_partition_pruning_block.insert( + ColumnWithTypeAndName(slot_desc->get_empty_mutable_column(), + slot_desc->get_data_type_ptr(), slot_desc->col_name())); + } +} + +Status VFileScanner::_process_runtime_filters_partition_pruning(bool& can_filter_all) { + SCOPED_TIMER(_runtime_filter_partition_pruning_timer); + if (_runtime_filter_partition_pruning_ctxs.empty() || _partition_col_descs.empty()) { + return Status::OK(); + } + size_t partition_value_column_size = 1; + + // 1. Get partition key values to string columns. + std::unordered_map<SlotId, MutableColumnPtr> parititon_slot_id_to_column; + for (auto const& partition_col_desc : _partition_col_descs) { + const auto& [partition_value, partition_slot_desc] = partition_col_desc.second; + auto test_serde = partition_slot_desc->get_data_type_ptr()->get_serde(); + auto partition_value_column = partition_slot_desc->get_data_type_ptr()->create_column(); + auto* col_ptr = static_cast<IColumn*>(partition_value_column.get()); + Slice slice(partition_value.data(), partition_value.size()); + int num_deserialized = 0; + RETURN_IF_ERROR(test_serde->deserialize_column_from_fixed_json( + *col_ptr, slice, partition_value_column_size, &num_deserialized, {})); + parititon_slot_id_to_column[partition_slot_desc->id()] = std::move(partition_value_column); + } + + // 2. Fill _runtime_filter_partition_pruning_block from the partition column, then execute conjuncts and filter block. + // 2.1 Fill _runtime_filter_partition_pruning_block from the partition column to match the conjuncts executing. + size_t index = 0; + bool first_cloumn_filled = false; Review Comment: ```suggestion bool first_column_filled = false; ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org