morningman commented on code in PR #54240:
URL: https://github.com/apache/doris/pull/54240#discussion_r2252624304


##########
be/src/vec/exec/format/orc/vorc_reader.cpp:
##########
@@ -970,38 +976,61 @@ Status OrcReader::set_fill_columns(
     // std::unordered_map<column_name, std::pair<col_id, slot_id>>
     std::unordered_map<std::string, std::pair<uint32_t, int>> 
predicate_table_columns;
     std::function<void(VExpr * expr)> visit_slot = [&](VExpr* expr) {
-        if (auto* slot_ref = typeid_cast<VSlotRef*>(expr)) {
+        if (expr->is_slot_ref()) {
+            VSlotRef* slot_ref = static_cast<VSlotRef*>(expr);
             auto expr_name = slot_ref->expr_name();
             predicate_table_columns.emplace(
                     expr_name, std::make_pair(slot_ref->column_id(), 
slot_ref->slot_id()));
             if (slot_ref->column_id() == 0) {
                 _lazy_read_ctx.resize_first_column = false;
             }
             return;
-        } else if (auto* runtime_filter = 
typeid_cast<VRuntimeFilterWrapper*>(expr)) {
-            auto* filter_impl = 
const_cast<VExpr*>(runtime_filter->get_impl().get());
-            if (auto* bloom_predicate = 
typeid_cast<VBloomPredicate*>(filter_impl)) {
-                for (const auto& child : bloom_predicate->children()) {
+        }
+        for (auto& child : expr->children()) {
+            visit_slot(child.get());
+        }
+    };
+
+    for (const auto& conjunct : _lazy_read_ctx.conjuncts) {
+        auto expr = conjunct->root();
+
+        if (VRuntimeFilterWrapper* runtime_filter =

Review Comment:
   Why moving this type check out of the `visit_slot` method? Is there any 
difference?



##########
be/src/vec/exec/format/orc/vorc_reader.cpp:
##########
@@ -970,38 +976,61 @@ Status OrcReader::set_fill_columns(
     // std::unordered_map<column_name, std::pair<col_id, slot_id>>
     std::unordered_map<std::string, std::pair<uint32_t, int>> 
predicate_table_columns;
     std::function<void(VExpr * expr)> visit_slot = [&](VExpr* expr) {
-        if (auto* slot_ref = typeid_cast<VSlotRef*>(expr)) {
+        if (expr->is_slot_ref()) {
+            VSlotRef* slot_ref = static_cast<VSlotRef*>(expr);
             auto expr_name = slot_ref->expr_name();
             predicate_table_columns.emplace(
                     expr_name, std::make_pair(slot_ref->column_id(), 
slot_ref->slot_id()));
             if (slot_ref->column_id() == 0) {
                 _lazy_read_ctx.resize_first_column = false;
             }
             return;
-        } else if (auto* runtime_filter = 
typeid_cast<VRuntimeFilterWrapper*>(expr)) {
-            auto* filter_impl = 
const_cast<VExpr*>(runtime_filter->get_impl().get());
-            if (auto* bloom_predicate = 
typeid_cast<VBloomPredicate*>(filter_impl)) {
-                for (const auto& child : bloom_predicate->children()) {
+        }
+        for (auto& child : expr->children()) {
+            visit_slot(child.get());
+        }
+    };
+
+    for (const auto& conjunct : _lazy_read_ctx.conjuncts) {
+        auto expr = conjunct->root();
+
+        if (VRuntimeFilterWrapper* runtime_filter =
+                    typeid_cast<VRuntimeFilterWrapper*>(expr.get())) {
+            auto filter_impl = runtime_filter->get_impl();
+            if (VBloomPredicate* bloom_predicate =
+                        typeid_cast<VBloomPredicate*>(filter_impl.get())) {
+                for (auto& child : bloom_predicate->children()) {
                     visit_slot(child.get());
                 }
-            } else if (auto* in_predicate = 
typeid_cast<VInPredicate*>(filter_impl)) {
-                if (!in_predicate->children().empty()) {
+            } else if (VDirectInPredicate* in_predicate =

Review Comment:
   Where is `VDirectInPredicate` come from?



##########
be/src/vec/exec/format/orc/vorc_reader.cpp:
##########
@@ -970,38 +976,61 @@ Status OrcReader::set_fill_columns(
     // std::unordered_map<column_name, std::pair<col_id, slot_id>>
     std::unordered_map<std::string, std::pair<uint32_t, int>> 
predicate_table_columns;
     std::function<void(VExpr * expr)> visit_slot = [&](VExpr* expr) {
-        if (auto* slot_ref = typeid_cast<VSlotRef*>(expr)) {
+        if (expr->is_slot_ref()) {
+            VSlotRef* slot_ref = static_cast<VSlotRef*>(expr);
             auto expr_name = slot_ref->expr_name();
             predicate_table_columns.emplace(
                     expr_name, std::make_pair(slot_ref->column_id(), 
slot_ref->slot_id()));
             if (slot_ref->column_id() == 0) {
                 _lazy_read_ctx.resize_first_column = false;
             }
             return;
-        } else if (auto* runtime_filter = 
typeid_cast<VRuntimeFilterWrapper*>(expr)) {
-            auto* filter_impl = 
const_cast<VExpr*>(runtime_filter->get_impl().get());
-            if (auto* bloom_predicate = 
typeid_cast<VBloomPredicate*>(filter_impl)) {
-                for (const auto& child : bloom_predicate->children()) {
+        }
+        for (auto& child : expr->children()) {
+            visit_slot(child.get());
+        }
+    };
+
+    for (const auto& conjunct : _lazy_read_ctx.conjuncts) {
+        auto expr = conjunct->root();
+
+        if (VRuntimeFilterWrapper* runtime_filter =
+                    typeid_cast<VRuntimeFilterWrapper*>(expr.get())) {
+            auto filter_impl = runtime_filter->get_impl();
+            if (VBloomPredicate* bloom_predicate =
+                        typeid_cast<VBloomPredicate*>(filter_impl.get())) {
+                for (auto& child : bloom_predicate->children()) {
                     visit_slot(child.get());
                 }
-            } else if (auto* in_predicate = 
typeid_cast<VInPredicate*>(filter_impl)) {
-                if (!in_predicate->children().empty()) {
+            } else if (VDirectInPredicate* in_predicate =
+                               
typeid_cast<VDirectInPredicate*>(filter_impl.get())) {
+                if (in_predicate->get_num_children() > 0) {
+                    expr = runtime_filter->get_impl();
+
                     visit_slot(in_predicate->children()[0].get());
                 }
             } else {
-                for (const auto& child : filter_impl->children()) {
+                for (auto& child : filter_impl->children()) {
                     visit_slot(child.get());
                 }
             }
-        } else {
-            for (const auto& child : expr->children()) {
-                visit_slot(child.get());
+        } else if (VTopNPred* topn_pred = typeid_cast<VTopNPred*>(
+                           expr.get())) { // top runtime filter : only le && 
ge.
+            if (topn_pred->has_value()) {
+                expr = topn_pred->get_binary_expr();

Review Comment:
   Why only orc reader needs to call this `get_binary_expr`?



##########
be/src/vec/exprs/vtopn_pred.h:
##########
@@ -114,6 +115,50 @@ class VTopNPred : public VExpr {
 
     const std::string& expr_name() const override { return _expr_name; }
 
+    bool has_value() const { return _predicate->has_value(); }
+
+    VExprSPtr get_binary_expr() const {
+        VExprSPtr root;
+
+        {
+            TFunction fn;
+            TFunctionName fn_name;
+            fn_name.__set_db_name("");
+            fn_name.__set_function_name(_predicate->is_asc() ? "le" : "ge");
+            fn.__set_name(fn_name);
+            fn.__set_binary_type(TFunctionBinaryType::BUILTIN);
+            std::vector<TTypeDesc> arg_types;
+            arg_types.push_back(create_type_desc(PrimitiveType::TYPE_INT));

Review Comment:
   Why using `TYPE_INT`?



##########
be/src/vec/exec/format/orc/vorc_reader.cpp:
##########
@@ -970,38 +976,61 @@ Status OrcReader::set_fill_columns(
     // std::unordered_map<column_name, std::pair<col_id, slot_id>>
     std::unordered_map<std::string, std::pair<uint32_t, int>> 
predicate_table_columns;
     std::function<void(VExpr * expr)> visit_slot = [&](VExpr* expr) {
-        if (auto* slot_ref = typeid_cast<VSlotRef*>(expr)) {
+        if (expr->is_slot_ref()) {
+            VSlotRef* slot_ref = static_cast<VSlotRef*>(expr);
             auto expr_name = slot_ref->expr_name();
             predicate_table_columns.emplace(
                     expr_name, std::make_pair(slot_ref->column_id(), 
slot_ref->slot_id()));
             if (slot_ref->column_id() == 0) {
                 _lazy_read_ctx.resize_first_column = false;
             }
             return;
-        } else if (auto* runtime_filter = 
typeid_cast<VRuntimeFilterWrapper*>(expr)) {
-            auto* filter_impl = 
const_cast<VExpr*>(runtime_filter->get_impl().get());
-            if (auto* bloom_predicate = 
typeid_cast<VBloomPredicate*>(filter_impl)) {
-                for (const auto& child : bloom_predicate->children()) {
+        }
+        for (auto& child : expr->children()) {
+            visit_slot(child.get());
+        }
+    };
+
+    for (const auto& conjunct : _lazy_read_ctx.conjuncts) {
+        auto expr = conjunct->root();
+
+        if (VRuntimeFilterWrapper* runtime_filter =
+                    typeid_cast<VRuntimeFilterWrapper*>(expr.get())) {
+            auto filter_impl = runtime_filter->get_impl();
+            if (VBloomPredicate* bloom_predicate =
+                        typeid_cast<VBloomPredicate*>(filter_impl.get())) {
+                for (auto& child : bloom_predicate->children()) {
                     visit_slot(child.get());
                 }
-            } else if (auto* in_predicate = 
typeid_cast<VInPredicate*>(filter_impl)) {
-                if (!in_predicate->children().empty()) {
+            } else if (VDirectInPredicate* in_predicate =
+                               
typeid_cast<VDirectInPredicate*>(filter_impl.get())) {
+                if (in_predicate->get_num_children() > 0) {
+                    expr = runtime_filter->get_impl();
+
                     visit_slot(in_predicate->children()[0].get());
                 }
             } else {
-                for (const auto& child : filter_impl->children()) {
+                for (auto& child : filter_impl->children()) {
                     visit_slot(child.get());
                 }
             }
-        } else {
-            for (const auto& child : expr->children()) {
-                visit_slot(child.get());
+        } else if (VTopNPred* topn_pred = typeid_cast<VTopNPred*>(

Review Comment:
   When is `VTopNPred` added into `_lazy_read_ctx.conjuncts`?



##########
be/src/vec/exec/format/parquet/vparquet_reader.cpp:
##########
@@ -358,40 +553,68 @@ Status ParquetReader::set_fill_columns(
     // std::unordered_map<column_name, std::pair<col_id, slot_id>>
     std::unordered_map<std::string, std::pair<uint32_t, int>> 
predicate_columns;
     std::function<void(VExpr * expr)> visit_slot = [&](VExpr* expr) {
-        if (VSlotRef* slot_ref = typeid_cast<VSlotRef*>(expr)) {
+        if (expr->is_slot_ref()) {
+            VSlotRef* slot_ref = static_cast<VSlotRef*>(expr);
             auto expr_name = slot_ref->expr_name();
             predicate_columns.emplace(expr_name,
                                       std::make_pair(slot_ref->column_id(), 
slot_ref->slot_id()));
             if (slot_ref->column_id() == 0) {
                 _lazy_read_ctx.resize_first_column = false;
             }
             return;
-        } else if (VRuntimeFilterWrapper* runtime_filter =
-                           typeid_cast<VRuntimeFilterWrapper*>(expr)) {
-            VExpr* filter_impl = 
const_cast<VExpr*>(runtime_filter->get_impl().get());
-            if (VBloomPredicate* bloom_predicate = 
typeid_cast<VBloomPredicate*>(filter_impl)) {
+        }
+        for (auto& child : expr->children()) {
+            visit_slot(child.get());
+        }
+    };
+
+    for (const auto& conjunct : _lazy_read_ctx.conjuncts) {
+        auto expr = conjunct->root();
+
+        if (VRuntimeFilterWrapper* runtime_filter =
+                    typeid_cast<VRuntimeFilterWrapper*>(expr.get())) {
+            auto filter_impl = runtime_filter->get_impl();
+            if (VBloomPredicate* bloom_predicate =
+                        typeid_cast<VBloomPredicate*>(filter_impl.get())) {
                 for (auto& child : bloom_predicate->children()) {
                     visit_slot(child.get());
                 }
-            } else if (VInPredicate* in_predicate = 
typeid_cast<VInPredicate*>(filter_impl)) {
+            } else if (VDirectInPredicate* in_predicate =
+                               
typeid_cast<VDirectInPredicate*>(filter_impl.get())) {
                 if (in_predicate->get_num_children() > 0) {
+                    expr = runtime_filter->get_impl();
+
                     visit_slot(in_predicate->children()[0].get());
                 }
             } else {
                 for (auto& child : filter_impl->children()) {
                     visit_slot(child.get());
                 }
             }
-        } else {
-            for (auto& child : expr->children()) {
-                visit_slot(child.get());
+        } else if (VTopNPred* topn_pred = typeid_cast<VTopNPred*>(
+                           expr.get())) { // top runtime filter : only le && 
ge.
+            if (topn_pred->has_value()) {
+                expr = topn_pred->get_binary_expr();
+
+                DCHECK(topn_pred->children().size() > 0);
+                visit_slot(topn_pred->children()[0].get());
+            } else {
+                continue;
             }
+        } else {
+            visit_slot(expr.get());
         }
-    };
 
-    if (!_lazy_read_ctx.conjuncts.empty()) {
-        for (auto& conjunct : _lazy_read_ctx.conjuncts) {
-            visit_slot(conjunct->root().get());
+        if (_check_expr_can_push_down(expr)) {
+            _push_down_exprs.emplace_back(expr);
+            if (expr->node_type() != TExprNodeType::COMPOUND_PRED) {
+                // for page index filter.
+                VSlotRef* slot_ref = 
static_cast<VSlotRef*>(expr->children()[0].get());
+                if (_push_down_simple_expr.contains(slot_ref->slot_id())) {

Review Comment:
   ```suggestion
                   if (!_push_down_simple_expr.contains(slot_ref->slot_id())) {
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to