morningman commented on code in PR #43255:
URL: https://github.com/apache/doris/pull/43255#discussion_r1839672535


##########
be/src/vec/exec/format/orc/vorc_reader.cpp:
##########
@@ -558,190 +565,347 @@ std::tuple<bool, orc::Literal> 
convert_to_orc_literal(const orc::Type* type, con
     }
 }
 
-template <PrimitiveType primitive_type>
-std::vector<OrcPredicate> value_range_to_predicate(
-        const ColumnValueRange<primitive_type>& col_val_range, const 
orc::Type* type,
-        std::vector<orc::TypeKind>* unsupported_pushdown_types) {
-    std::vector<OrcPredicate> predicates;
-
-    PrimitiveType src_type = OrcReader::convert_to_doris_type(type).type;
-    if (src_type != primitive_type) {
-        if (!(is_string_type(src_type) && is_string_type(primitive_type))) {
-            // not support schema change
-            return predicates;
-        }
+std::tuple<bool, orc::Literal, orc::PredicateDataType> 
OrcReader::_make_orc_literal(
+        const VSlotRef* slot_ref, const VLiteral* literal) {
+    auto file_col_name_low_case = 
_col_name_to_file_col_name_low_case[slot_ref->expr_name()];
+    if (!_type_map.contains(file_col_name_low_case)) {
+        // TODO: this is for acid table
+        LOG(WARNING) << "Column " << slot_ref->expr_name() << " not found in 
_type_map";
+        return std::make_tuple(false, orc::Literal(false), 
orc::PredicateDataType::LONG);
+    }
+    const auto* orc_type = _type_map[file_col_name_low_case];
+    if (!TYPEKIND_TO_PREDICATE_TYPE.contains(orc_type->getKind())) {
+        LOG(WARNING) << "Unsupported Push Down Orc Type [TypeKind=" << 
orc_type->getKind() << "]";
+        return std::make_tuple(false, orc::Literal(false), 
orc::PredicateDataType::LONG);
+    }
+    const auto predicate_type = 
TYPEKIND_TO_PREDICATE_TYPE[orc_type->getKind()];
+    if (literal == nullptr) {
+        // only get the predicate_type
+        return std::make_tuple(true, orc::Literal(true), predicate_type);
+    }
+    auto literal_data = literal->get_column_ptr()->get_data_at(0);
+    auto* slot = _tuple_descriptor->slots()[slot_ref->column_id()];
+    auto slot_type = slot->type();
+    switch (slot_type.type) {
+#define M(NAME)                                                                
\
+    case TYPE_##NAME: {                                                        
\
+        auto [valid, orc_literal] = convert_to_orc_literal<TYPE_##NAME>(       
\
+                orc_type, literal_data, slot_type.precision, slot_type.scale); 
\
+        return std::make_tuple(valid, orc_literal, predicate_type);            
\
+    }
+#define APPLY_FOR_PRIMITIVE_TYPE(M) \
+    M(TINYINT)                      \
+    M(SMALLINT)                     \
+    M(INT)                          \
+    M(BIGINT)                       \
+    M(LARGEINT)                     \
+    M(CHAR)                         \
+    M(DATE)                         \
+    M(DATETIME)                     \
+    M(DATEV2)                       \
+    M(DATETIMEV2)                   \
+    M(VARCHAR)                      \
+    M(STRING)                       \
+    M(HLL)                          \
+    M(DECIMAL32)                    \
+    M(DECIMAL64)                    \
+    M(DECIMAL128I)                  \
+    M(DECIMAL256)                   \
+    M(DECIMALV2)                    \
+    M(BOOLEAN)                      \
+    M(IPV4)                         \
+    M(IPV6)
+        APPLY_FOR_PRIMITIVE_TYPE(M)
+#undef M
+    default: {
+        VLOG_CRITICAL << "Unsupported Convert Orc Literal [ColName=" << 
slot->col_name() << "]";
+        return std::make_tuple(false, orc::Literal(false), predicate_type);
+    }
     }
+}
 
-    if (unsupported_pushdown_types != nullptr) {
-        for (vector<orc::TypeKind>::iterator it = 
unsupported_pushdown_types->begin();
-             it != unsupported_pushdown_types->end(); ++it) {
-            if (*it == type->getKind()) {
-                // Unsupported type
-                return predicates;
-            }
-        }
+// check if the slot of expr can be pushed down to orc reader
+bool OrcReader::_check_slot_can_push_down(const VExprSPtr& expr) {
+    if (!expr->children()[0]->is_slot_ref()) {
+        return false;
     }
+    const auto* slot_ref = static_cast<const 
VSlotRef*>(expr->children()[0].get());
+    // check if the slot exists in orc file and not partition column
+    return _col_name_to_file_col_name.contains(slot_ref->expr_name()) &&
+           
!_lazy_read_ctx.predicate_partition_columns.contains(slot_ref->expr_name());
+}
 
-    orc::PredicateDataType predicate_data_type;
-    auto type_it = TYPEKIND_TO_PREDICATE_TYPE.find(type->getKind());
-    if (type_it == TYPEKIND_TO_PREDICATE_TYPE.end()) {
-        // Unsupported type
-        return predicates;
-    } else {
-        predicate_data_type = type_it->second;
+// check if there are rest children of expr can be pushed down to orc reader
+bool OrcReader::_check_rest_children_can_push_down(const VExprSPtr& expr) {
+    if (expr->children().size() < 2) {
+        return false;
     }
 
-    if (col_val_range.is_fixed_value_range()) {
-        OrcPredicate in_predicate;
-        in_predicate.col_name = col_val_range.column_name();
-        in_predicate.data_type = predicate_data_type;
-        in_predicate.op = SQLFilterOp::FILTER_IN;
-        for (const auto& value : col_val_range.get_fixed_value_set()) {
-            auto [valid, literal] = convert_to_orc_literal<primitive_type>(
-                    type, &value, col_val_range.precision(), 
col_val_range.scale());
-            if (valid) {
-                in_predicate.literals.push_back(literal);
-            }
+    for (size_t i = 1; i < expr->children().size(); ++i) {
+        if (!expr->children()[i]->is_literal()) {
+            return false;
         }
-        if (!in_predicate.literals.empty()) {
-            predicates.emplace_back(in_predicate);
+    }
+    return true;
+}
+
+// check if the expr can be pushed down to orc reader
+bool OrcReader::_check_expr_can_push_down(const VExprSPtr& expr) {
+    DCHECK(expr != nullptr);
+    switch (expr->op()) {
+    case TExprOpcode::COMPOUND_AND:
+        // at least one child can be pushed down
+        return std::ranges::any_of(expr->children(), [this](const auto& child) 
{
+            return _check_expr_can_push_down(child);
+        });
+    case TExprOpcode::COMPOUND_OR:
+        // all children must be pushed down
+        return std::ranges::all_of(expr->children(), [this](const auto& child) 
{
+            return _check_expr_can_push_down(child);
+        });
+    case TExprOpcode::COMPOUND_NOT:
+        DCHECK_EQ(expr->children().size(), 1);
+        return _check_expr_can_push_down(expr->children()[0]);
+
+    case TExprOpcode::GE:
+    case TExprOpcode::GT:
+    case TExprOpcode::LE:
+    case TExprOpcode::LT:
+    case TExprOpcode::EQ:
+    case TExprOpcode::NE:
+    case TExprOpcode::FILTER_IN:
+    case TExprOpcode::FILTER_NOT_IN:
+        return _check_slot_can_push_down(expr) && 
_check_rest_children_can_push_down(expr);
+
+    case TExprOpcode::INVALID_OPCODE:
+        if (expr->node_type() == TExprNodeType::FUNCTION_CALL) {
+            auto fn_name = expr->fn().name.function_name;
+            // only support is_null_pred and is_not_null_pred
+            if (fn_name == "is_null_pred" || fn_name == "is_not_null_pred") {
+                return _check_slot_can_push_down(expr);
+            }
+            VLOG_CRITICAL << "Unsupported function [funciton=" << fn_name << 
"]";
         }
-        return predicates;
+        return false;
+    default:
+        VLOG_CRITICAL << "Unsupported Opcode [OpCode=" << expr->op() << "]";
+        return false;
+    }
+}
+
+bool OrcReader::_build_less_than(const VExprSPtr& expr,
+                                 std::unique_ptr<orc::SearchArgumentBuilder>& 
builder) {
+    DCHECK(expr->children().size() == 2);
+    DCHECK(expr->children()[0]->is_slot_ref());
+    DCHECK(expr->children()[1]->is_literal());
+    const auto* slot_ref = static_cast<const 
VSlotRef*>(expr->children()[0].get());
+    const auto* literal = static_cast<const 
VLiteral*>(expr->children()[1].get());
+    auto [valid, orc_literal, predicate_type] = _make_orc_literal(slot_ref, 
literal);
+    if (!valid) {
+        return false;
     }
+    builder->lessThan(slot_ref->expr_name(), predicate_type, orc_literal);
+    return true;
+}
 
-    const auto& high_value = col_val_range.get_range_max_value();
-    const auto& low_value = col_val_range.get_range_min_value();
-    const auto& high_op = col_val_range.get_range_high_op();
-    const auto& low_op = col_val_range.get_range_low_op();
+bool OrcReader::_build_less_than_equals(const VExprSPtr& expr,
+                                        
std::unique_ptr<orc::SearchArgumentBuilder>& builder) {
+    DCHECK(expr->children().size() == 2);
+    DCHECK(expr->children()[0]->is_slot_ref());
+    DCHECK(expr->children()[1]->is_literal());
+    const auto* slot_ref = static_cast<const 
VSlotRef*>(expr->children()[0].get());
+    const auto* literal = static_cast<const 
VLiteral*>(expr->children()[1].get());
+    auto [valid, orc_literal, predicate_type] = _make_orc_literal(slot_ref, 
literal);
+    if (!valid) {
+        return false;
+    }
+    builder->lessThanEquals(slot_ref->expr_name(), predicate_type, 
orc_literal);
+    return true;
+}
 
-    // orc can only push down is_null. When col_value_range._contain_null = 
true, only indicating that
-    // value can be null, not equals null, so ignore _contain_null in 
col_value_range
-    if (col_val_range.is_high_value_maximum() && high_op == 
SQLFilterOp::FILTER_LESS_OR_EQUAL &&
-        col_val_range.is_low_value_mininum() && low_op == 
SQLFilterOp::FILTER_LARGER_OR_EQUAL) {
-        return predicates;
+bool OrcReader::_build_equals(const VExprSPtr& expr,
+                              std::unique_ptr<orc::SearchArgumentBuilder>& 
builder) {
+    DCHECK(expr->children().size() == 2);
+    DCHECK(expr->children()[0]->is_slot_ref());
+    DCHECK(expr->children()[1]->is_literal());
+    const auto* slot_ref = static_cast<const 
VSlotRef*>(expr->children()[0].get());
+    const auto* literal = static_cast<const 
VLiteral*>(expr->children()[1].get());
+    auto [valid, orc_literal, predicate_type] = _make_orc_literal(slot_ref, 
literal);
+    if (!valid) {
+        return false;
     }
+    builder->equals(slot_ref->expr_name(), predicate_type, orc_literal);
+    return true;
+}
 
-    if (low_value < high_value) {
-        if (!col_val_range.is_low_value_mininum() ||
-            SQLFilterOp::FILTER_LARGER_OR_EQUAL != low_op) {
-            auto [valid, low_literal] = convert_to_orc_literal<primitive_type>(
-                    type, &low_value, col_val_range.precision(), 
col_val_range.scale());
-            if (valid) {
-                OrcPredicate low_predicate;
-                low_predicate.col_name = col_val_range.column_name();
-                low_predicate.data_type = predicate_data_type;
-                low_predicate.op = low_op;
-                low_predicate.literals.emplace_back(low_literal);
-                predicates.emplace_back(low_predicate);
-            }
-        }
-        if (!col_val_range.is_high_value_maximum() ||
-            SQLFilterOp::FILTER_LESS_OR_EQUAL != high_op) {
-            auto [valid, high_literal] = 
convert_to_orc_literal<primitive_type>(
-                    type, &high_value, col_val_range.precision(), 
col_val_range.scale());
-            if (valid) {
-                OrcPredicate high_predicate;
-                high_predicate.col_name = col_val_range.column_name();
-                high_predicate.data_type = predicate_data_type;
-                high_predicate.op = high_op;
-                high_predicate.literals.emplace_back(high_literal);
-                predicates.emplace_back(high_predicate);
-            }
+bool OrcReader::_build_filter_in(const VExprSPtr& expr,
+                                 std::unique_ptr<orc::SearchArgumentBuilder>& 
builder) {
+    DCHECK(expr->children().size() >= 2);
+    DCHECK(expr->children()[0]->is_slot_ref());
+    const auto* slot_ref = static_cast<const 
VSlotRef*>(expr->children()[0].get());
+    std::vector<orc::Literal> literals;
+    orc::PredicateDataType predicate_type = orc::PredicateDataType::LONG;
+    for (size_t i = 1; i < expr->children().size(); ++i) {
+        DCHECK(expr->children()[i]->is_literal());
+        const auto* literal = static_cast<const 
VLiteral*>(expr->children()[i].get());
+        auto [valid, orc_literal, type] = _make_orc_literal(slot_ref, literal);
+        if (!valid) {
+            return false;
         }
+        literals.emplace_back(orc_literal);
+        predicate_type = type;
     }
-    return predicates;
+    DCHECK(!literals.empty());
+    builder->in(slot_ref->expr_name(), predicate_type, literals);
+    return true;
 }
 
-bool static build_search_argument(std::vector<OrcPredicate>& predicates, int 
index,
-                                  std::unique_ptr<orc::SearchArgumentBuilder>& 
builder) {
-    if (index >= predicates.size()) {
+bool OrcReader::_build_is_null(const VExprSPtr& expr,
+                               std::unique_ptr<orc::SearchArgumentBuilder>& 
builder) {
+    DCHECK(expr->children().size() == 1);
+    DCHECK(expr->children()[0]->is_slot_ref());
+    const auto* slot_ref = static_cast<const 
VSlotRef*>(expr->children()[0].get());
+    auto [valid, _, predicate_type] = _make_orc_literal(slot_ref, nullptr);
+    builder->isNull(slot_ref->expr_name(), predicate_type);
+    return true;
+}
+
+bool OrcReader::_build_search_argument(const VExprSPtr& expr,
+                                       
std::unique_ptr<orc::SearchArgumentBuilder>& builder) {
+    if (expr == nullptr) {
         return false;
     }
-    if (index < predicates.size() - 1) {
-        builder->startAnd();
+
+    // if expr can not be pushed down, skip it and continue to next expr
+    if (!_check_expr_can_push_down(expr)) {
+        return false;
     }
-    OrcPredicate& predicate = predicates[index];
-    switch (predicate.op) {
-    case SQLFilterOp::FILTER_IN: {
-        if (predicate.literals.size() == 1) {
-            builder->equals(predicate.col_name, predicate.data_type, 
predicate.literals[0]);
-        } else {
-            builder->in(predicate.col_name, predicate.data_type, 
predicate.literals);
+
+    switch (expr->op()) {
+    case TExprOpcode::COMPOUND_AND:
+        builder->startAnd();
+        for (const auto& child : expr->children()) {
+            if (!_build_search_argument(child, builder)) {

Review Comment:
   I think for AND, it should be `continue`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to