morningman commented on code in PR #54240:
URL: https://github.com/apache/doris/pull/54240#discussion_r2252624304
##########
be/src/vec/exec/format/orc/vorc_reader.cpp:
##########
@@ -970,38 +976,61 @@ Status OrcReader::set_fill_columns(
// std::unordered_map<column_name, std::pair<col_id, slot_id>>
std::unordered_map<std::string, std::pair<uint32_t, int>>
predicate_table_columns;
std::function<void(VExpr * expr)> visit_slot = [&](VExpr* expr) {
- if (auto* slot_ref = typeid_cast<VSlotRef*>(expr)) {
+ if (expr->is_slot_ref()) {
+ VSlotRef* slot_ref = static_cast<VSlotRef*>(expr);
auto expr_name = slot_ref->expr_name();
predicate_table_columns.emplace(
expr_name, std::make_pair(slot_ref->column_id(),
slot_ref->slot_id()));
if (slot_ref->column_id() == 0) {
_lazy_read_ctx.resize_first_column = false;
}
return;
- } else if (auto* runtime_filter =
typeid_cast<VRuntimeFilterWrapper*>(expr)) {
- auto* filter_impl =
const_cast<VExpr*>(runtime_filter->get_impl().get());
- if (auto* bloom_predicate =
typeid_cast<VBloomPredicate*>(filter_impl)) {
- for (const auto& child : bloom_predicate->children()) {
+ }
+ for (auto& child : expr->children()) {
+ visit_slot(child.get());
+ }
+ };
+
+ for (const auto& conjunct : _lazy_read_ctx.conjuncts) {
+ auto expr = conjunct->root();
+
+ if (VRuntimeFilterWrapper* runtime_filter =
Review Comment:
Why moving this type check out of the `visit_slot` method? Is there any
difference?
##########
be/src/vec/exec/format/orc/vorc_reader.cpp:
##########
@@ -970,38 +976,61 @@ Status OrcReader::set_fill_columns(
// std::unordered_map<column_name, std::pair<col_id, slot_id>>
std::unordered_map<std::string, std::pair<uint32_t, int>>
predicate_table_columns;
std::function<void(VExpr * expr)> visit_slot = [&](VExpr* expr) {
- if (auto* slot_ref = typeid_cast<VSlotRef*>(expr)) {
+ if (expr->is_slot_ref()) {
+ VSlotRef* slot_ref = static_cast<VSlotRef*>(expr);
auto expr_name = slot_ref->expr_name();
predicate_table_columns.emplace(
expr_name, std::make_pair(slot_ref->column_id(),
slot_ref->slot_id()));
if (slot_ref->column_id() == 0) {
_lazy_read_ctx.resize_first_column = false;
}
return;
- } else if (auto* runtime_filter =
typeid_cast<VRuntimeFilterWrapper*>(expr)) {
- auto* filter_impl =
const_cast<VExpr*>(runtime_filter->get_impl().get());
- if (auto* bloom_predicate =
typeid_cast<VBloomPredicate*>(filter_impl)) {
- for (const auto& child : bloom_predicate->children()) {
+ }
+ for (auto& child : expr->children()) {
+ visit_slot(child.get());
+ }
+ };
+
+ for (const auto& conjunct : _lazy_read_ctx.conjuncts) {
+ auto expr = conjunct->root();
+
+ if (VRuntimeFilterWrapper* runtime_filter =
+ typeid_cast<VRuntimeFilterWrapper*>(expr.get())) {
+ auto filter_impl = runtime_filter->get_impl();
+ if (VBloomPredicate* bloom_predicate =
+ typeid_cast<VBloomPredicate*>(filter_impl.get())) {
+ for (auto& child : bloom_predicate->children()) {
visit_slot(child.get());
}
- } else if (auto* in_predicate =
typeid_cast<VInPredicate*>(filter_impl)) {
- if (!in_predicate->children().empty()) {
+ } else if (VDirectInPredicate* in_predicate =
Review Comment:
Where is `VDirectInPredicate` come from?
##########
be/src/vec/exec/format/orc/vorc_reader.cpp:
##########
@@ -970,38 +976,61 @@ Status OrcReader::set_fill_columns(
// std::unordered_map<column_name, std::pair<col_id, slot_id>>
std::unordered_map<std::string, std::pair<uint32_t, int>>
predicate_table_columns;
std::function<void(VExpr * expr)> visit_slot = [&](VExpr* expr) {
- if (auto* slot_ref = typeid_cast<VSlotRef*>(expr)) {
+ if (expr->is_slot_ref()) {
+ VSlotRef* slot_ref = static_cast<VSlotRef*>(expr);
auto expr_name = slot_ref->expr_name();
predicate_table_columns.emplace(
expr_name, std::make_pair(slot_ref->column_id(),
slot_ref->slot_id()));
if (slot_ref->column_id() == 0) {
_lazy_read_ctx.resize_first_column = false;
}
return;
- } else if (auto* runtime_filter =
typeid_cast<VRuntimeFilterWrapper*>(expr)) {
- auto* filter_impl =
const_cast<VExpr*>(runtime_filter->get_impl().get());
- if (auto* bloom_predicate =
typeid_cast<VBloomPredicate*>(filter_impl)) {
- for (const auto& child : bloom_predicate->children()) {
+ }
+ for (auto& child : expr->children()) {
+ visit_slot(child.get());
+ }
+ };
+
+ for (const auto& conjunct : _lazy_read_ctx.conjuncts) {
+ auto expr = conjunct->root();
+
+ if (VRuntimeFilterWrapper* runtime_filter =
+ typeid_cast<VRuntimeFilterWrapper*>(expr.get())) {
+ auto filter_impl = runtime_filter->get_impl();
+ if (VBloomPredicate* bloom_predicate =
+ typeid_cast<VBloomPredicate*>(filter_impl.get())) {
+ for (auto& child : bloom_predicate->children()) {
visit_slot(child.get());
}
- } else if (auto* in_predicate =
typeid_cast<VInPredicate*>(filter_impl)) {
- if (!in_predicate->children().empty()) {
+ } else if (VDirectInPredicate* in_predicate =
+
typeid_cast<VDirectInPredicate*>(filter_impl.get())) {
+ if (in_predicate->get_num_children() > 0) {
+ expr = runtime_filter->get_impl();
+
visit_slot(in_predicate->children()[0].get());
}
} else {
- for (const auto& child : filter_impl->children()) {
+ for (auto& child : filter_impl->children()) {
visit_slot(child.get());
}
}
- } else {
- for (const auto& child : expr->children()) {
- visit_slot(child.get());
+ } else if (VTopNPred* topn_pred = typeid_cast<VTopNPred*>(
+ expr.get())) { // top runtime filter : only le &&
ge.
+ if (topn_pred->has_value()) {
+ expr = topn_pred->get_binary_expr();
Review Comment:
Why only orc reader needs to call this `get_binary_expr`?
##########
be/src/vec/exprs/vtopn_pred.h:
##########
@@ -114,6 +115,50 @@ class VTopNPred : public VExpr {
const std::string& expr_name() const override { return _expr_name; }
+ bool has_value() const { return _predicate->has_value(); }
+
+ VExprSPtr get_binary_expr() const {
+ VExprSPtr root;
+
+ {
+ TFunction fn;
+ TFunctionName fn_name;
+ fn_name.__set_db_name("");
+ fn_name.__set_function_name(_predicate->is_asc() ? "le" : "ge");
+ fn.__set_name(fn_name);
+ fn.__set_binary_type(TFunctionBinaryType::BUILTIN);
+ std::vector<TTypeDesc> arg_types;
+ arg_types.push_back(create_type_desc(PrimitiveType::TYPE_INT));
Review Comment:
Why using `TYPE_INT`?
##########
be/src/vec/exec/format/orc/vorc_reader.cpp:
##########
@@ -970,38 +976,61 @@ Status OrcReader::set_fill_columns(
// std::unordered_map<column_name, std::pair<col_id, slot_id>>
std::unordered_map<std::string, std::pair<uint32_t, int>>
predicate_table_columns;
std::function<void(VExpr * expr)> visit_slot = [&](VExpr* expr) {
- if (auto* slot_ref = typeid_cast<VSlotRef*>(expr)) {
+ if (expr->is_slot_ref()) {
+ VSlotRef* slot_ref = static_cast<VSlotRef*>(expr);
auto expr_name = slot_ref->expr_name();
predicate_table_columns.emplace(
expr_name, std::make_pair(slot_ref->column_id(),
slot_ref->slot_id()));
if (slot_ref->column_id() == 0) {
_lazy_read_ctx.resize_first_column = false;
}
return;
- } else if (auto* runtime_filter =
typeid_cast<VRuntimeFilterWrapper*>(expr)) {
- auto* filter_impl =
const_cast<VExpr*>(runtime_filter->get_impl().get());
- if (auto* bloom_predicate =
typeid_cast<VBloomPredicate*>(filter_impl)) {
- for (const auto& child : bloom_predicate->children()) {
+ }
+ for (auto& child : expr->children()) {
+ visit_slot(child.get());
+ }
+ };
+
+ for (const auto& conjunct : _lazy_read_ctx.conjuncts) {
+ auto expr = conjunct->root();
+
+ if (VRuntimeFilterWrapper* runtime_filter =
+ typeid_cast<VRuntimeFilterWrapper*>(expr.get())) {
+ auto filter_impl = runtime_filter->get_impl();
+ if (VBloomPredicate* bloom_predicate =
+ typeid_cast<VBloomPredicate*>(filter_impl.get())) {
+ for (auto& child : bloom_predicate->children()) {
visit_slot(child.get());
}
- } else if (auto* in_predicate =
typeid_cast<VInPredicate*>(filter_impl)) {
- if (!in_predicate->children().empty()) {
+ } else if (VDirectInPredicate* in_predicate =
+
typeid_cast<VDirectInPredicate*>(filter_impl.get())) {
+ if (in_predicate->get_num_children() > 0) {
+ expr = runtime_filter->get_impl();
+
visit_slot(in_predicate->children()[0].get());
}
} else {
- for (const auto& child : filter_impl->children()) {
+ for (auto& child : filter_impl->children()) {
visit_slot(child.get());
}
}
- } else {
- for (const auto& child : expr->children()) {
- visit_slot(child.get());
+ } else if (VTopNPred* topn_pred = typeid_cast<VTopNPred*>(
Review Comment:
When is `VTopNPred` added into `_lazy_read_ctx.conjuncts`?
##########
be/src/vec/exec/format/parquet/vparquet_reader.cpp:
##########
@@ -358,40 +553,68 @@ Status ParquetReader::set_fill_columns(
// std::unordered_map<column_name, std::pair<col_id, slot_id>>
std::unordered_map<std::string, std::pair<uint32_t, int>>
predicate_columns;
std::function<void(VExpr * expr)> visit_slot = [&](VExpr* expr) {
- if (VSlotRef* slot_ref = typeid_cast<VSlotRef*>(expr)) {
+ if (expr->is_slot_ref()) {
+ VSlotRef* slot_ref = static_cast<VSlotRef*>(expr);
auto expr_name = slot_ref->expr_name();
predicate_columns.emplace(expr_name,
std::make_pair(slot_ref->column_id(),
slot_ref->slot_id()));
if (slot_ref->column_id() == 0) {
_lazy_read_ctx.resize_first_column = false;
}
return;
- } else if (VRuntimeFilterWrapper* runtime_filter =
- typeid_cast<VRuntimeFilterWrapper*>(expr)) {
- VExpr* filter_impl =
const_cast<VExpr*>(runtime_filter->get_impl().get());
- if (VBloomPredicate* bloom_predicate =
typeid_cast<VBloomPredicate*>(filter_impl)) {
+ }
+ for (auto& child : expr->children()) {
+ visit_slot(child.get());
+ }
+ };
+
+ for (const auto& conjunct : _lazy_read_ctx.conjuncts) {
+ auto expr = conjunct->root();
+
+ if (VRuntimeFilterWrapper* runtime_filter =
+ typeid_cast<VRuntimeFilterWrapper*>(expr.get())) {
+ auto filter_impl = runtime_filter->get_impl();
+ if (VBloomPredicate* bloom_predicate =
+ typeid_cast<VBloomPredicate*>(filter_impl.get())) {
for (auto& child : bloom_predicate->children()) {
visit_slot(child.get());
}
- } else if (VInPredicate* in_predicate =
typeid_cast<VInPredicate*>(filter_impl)) {
+ } else if (VDirectInPredicate* in_predicate =
+
typeid_cast<VDirectInPredicate*>(filter_impl.get())) {
if (in_predicate->get_num_children() > 0) {
+ expr = runtime_filter->get_impl();
+
visit_slot(in_predicate->children()[0].get());
}
} else {
for (auto& child : filter_impl->children()) {
visit_slot(child.get());
}
}
- } else {
- for (auto& child : expr->children()) {
- visit_slot(child.get());
+ } else if (VTopNPred* topn_pred = typeid_cast<VTopNPred*>(
+ expr.get())) { // top runtime filter : only le &&
ge.
+ if (topn_pred->has_value()) {
+ expr = topn_pred->get_binary_expr();
+
+ DCHECK(topn_pred->children().size() > 0);
+ visit_slot(topn_pred->children()[0].get());
+ } else {
+ continue;
}
+ } else {
+ visit_slot(expr.get());
}
- };
- if (!_lazy_read_ctx.conjuncts.empty()) {
- for (auto& conjunct : _lazy_read_ctx.conjuncts) {
- visit_slot(conjunct->root().get());
+ if (_check_expr_can_push_down(expr)) {
+ _push_down_exprs.emplace_back(expr);
+ if (expr->node_type() != TExprNodeType::COMPOUND_PRED) {
+ // for page index filter.
+ VSlotRef* slot_ref =
static_cast<VSlotRef*>(expr->children()[0].get());
+ if (_push_down_simple_expr.contains(slot_ref->slot_id())) {
Review Comment:
```suggestion
if (!_push_down_simple_expr.contains(slot_ref->slot_id())) {
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]