Copilot commented on code in PR #61379:
URL: https://github.com/apache/doris/pull/61379#discussion_r2938809135


##########
be/src/vec/exec/format/table/paimon_predicate_converter.cpp:
##########
@@ -0,0 +1,659 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/format/table/paimon_predicate_converter.h"
+
+#include <algorithm>
+#include <cctype>
+#include <utility>
+
+#include "paimon/data/decimal.h"
+#include "paimon/data/timestamp.h"
+#include "paimon/predicate/predicate_builder.h"
+#include "runtime/decimalv2_value.h"
+#include "runtime/descriptors.h"
+#include "runtime/runtime_state.h"
+#include "util/timezone_utils.h"
+#include "vec/columns/column_const.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/core/field.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/exprs/vcompound_pred.h"
+#include "vec/exprs/vdirect_in_predicate.h"
+#include "vec/exprs/vectorized_fn_call.h"
+#include "vec/exprs/vexpr.h"
+#include "vec/exprs/vin_predicate.h"
+#include "vec/exprs/vliteral.h"
+#include "vec/exprs/vslot_ref.h"
+#include "vec/runtime/timestamptz_value.h"
+#include "vec/runtime/vdatetime_value.h"
+
+namespace doris::vectorized {
+#include "common/compile_check_begin.h"
+
+PaimonPredicateConverter::PaimonPredicateConverter(
+        const std::vector<SlotDescriptor*>& file_slot_descs, RuntimeState* 
state)
+        : _state(state) {
+    _field_index_by_name.reserve(file_slot_descs.size());
+    for (size_t i = 0; i < file_slot_descs.size(); ++i) {
+        const auto& name = file_slot_descs[i]->col_name();
+        auto normalized = _normalize_name(name);
+        if (_field_index_by_name.find(normalized) == 
_field_index_by_name.end()) {
+            _field_index_by_name.emplace(std::move(normalized), 
static_cast<int32_t>(i));
+        }
+    }
+
+    if (!TimezoneUtils::find_cctz_time_zone("GMT", _gmt_tz)) {
+        TimezoneUtils::find_cctz_time_zone(TimezoneUtils::default_time_zone, 
_gmt_tz);
+    }
+}
+
+std::shared_ptr<paimon::Predicate> PaimonPredicateConverter::build(
+        const VExprContextSPtrs& conjuncts) {
+    std::vector<std::shared_ptr<paimon::Predicate>> predicates;
+    predicates.reserve(conjuncts.size());
+    for (const auto& conjunct : conjuncts) {
+        if (!conjunct || !conjunct->root()) {
+            continue;
+        }
+        auto root = conjunct->root();
+        if (root->is_rf_wrapper()) {
+            if (auto impl = root->get_impl()) {
+                root = impl;
+            }
+        }
+        auto predicate = _convert_expr(root);
+        if (predicate) {
+            predicates.emplace_back(std::move(predicate));
+        }
+    }
+
+    if (predicates.empty()) {
+        return nullptr;
+    }
+    if (predicates.size() == 1) {
+        return predicates.front();
+    }
+    auto and_result = paimon::PredicateBuilder::And(predicates);
+    if (!and_result.ok()) {
+        return nullptr;
+    }
+    return std::move(and_result).value();
+}
+
+std::shared_ptr<paimon::Predicate> 
PaimonPredicateConverter::_convert_expr(const VExprSPtr& expr) {
+    if (!expr) {
+        return nullptr;
+    }
+
+    auto uncast = VExpr::expr_without_cast(expr);
+
+    if (auto* direct_in = dynamic_cast<VDirectInPredicate*>(uncast.get())) {
+        VExprSPtr in_expr;
+        if (direct_in->get_slot_in_expr(in_expr)) {
+            return _convert_in(in_expr);
+        }
+        return nullptr;
+    }
+
+    if (dynamic_cast<VInPredicate*>(uncast.get()) != nullptr) {
+        return _convert_in(uncast);
+    }
+
+    switch (uncast->op()) {
+    case TExprOpcode::COMPOUND_AND:
+    case TExprOpcode::COMPOUND_OR:
+        return _convert_compound(uncast);
+    case TExprOpcode::COMPOUND_NOT:
+        return nullptr;
+    case TExprOpcode::EQ:
+    case TExprOpcode::EQ_FOR_NULL:
+    case TExprOpcode::NE:
+    case TExprOpcode::GE:
+    case TExprOpcode::GT:
+    case TExprOpcode::LE:
+    case TExprOpcode::LT:
+        return _convert_binary(uncast);
+    default:
+        break;
+    }
+
+    if (auto* fn = dynamic_cast<VectorizedFnCall*>(uncast.get())) {
+        auto fn_name = _normalize_name(fn->function_name());
+        if (fn_name == "is_null_pred" || fn_name == "is_not_null_pred") {
+            return _convert_is_null(uncast, fn_name);
+        }
+        if (fn_name == "like") {
+            return _convert_like_prefix(uncast);
+        }
+    }
+
+    return nullptr;
+}
+
+std::shared_ptr<paimon::Predicate> PaimonPredicateConverter::_convert_compound(
+        const VExprSPtr& expr) {
+    if (!expr || expr->get_num_children() != 2) {
+        return nullptr;
+    }
+    auto left = _convert_expr(expr->get_child(0));
+    if (!left) {
+        return nullptr;
+    }
+    auto right = _convert_expr(expr->get_child(1));
+    if (!right) {
+        return nullptr;
+    }
+
+    if (expr->op() == TExprOpcode::COMPOUND_AND) {
+        auto and_result = paimon::PredicateBuilder::And({left, right});
+        return and_result.ok() ? std::move(and_result).value() : nullptr;
+    }
+    if (expr->op() == TExprOpcode::COMPOUND_OR) {
+        auto or_result = paimon::PredicateBuilder::Or({left, right});
+        return or_result.ok() ? std::move(or_result).value() : nullptr;
+    }
+    return nullptr;
+}
+
+std::shared_ptr<paimon::Predicate> PaimonPredicateConverter::_convert_in(const 
VExprSPtr& expr) {
+    auto* in_pred = dynamic_cast<VInPredicate*>(expr.get());
+    if (!in_pred || expr->get_num_children() < 2) {
+        return nullptr;
+    }
+    auto field_meta = _resolve_field(expr->get_child(0));
+    if (!field_meta) {
+        return nullptr;
+    }
+
+    std::vector<paimon::Literal> literals;
+    literals.reserve(expr->get_num_children() - 1);
+    for (uint16_t i = 1; i < expr->get_num_children(); ++i) {
+        auto literal = _convert_literal(expr->get_child(i), 
*field_meta->slot_desc,
+                                        field_meta->field_type);
+        if (!literal) {
+            return nullptr;
+        }
+        literals.emplace_back(std::move(*literal));
+    }
+
+    if (literals.empty()) {
+        return nullptr;
+    }
+    if (in_pred->is_not_in()) {
+        return paimon::PredicateBuilder::NotIn(field_meta->index, 
field_meta->slot_desc->col_name(),
+                                               field_meta->field_type, 
literals);
+    }
+    return paimon::PredicateBuilder::In(field_meta->index, 
field_meta->slot_desc->col_name(),
+                                        field_meta->field_type, literals);
+}
+
+std::shared_ptr<paimon::Predicate> PaimonPredicateConverter::_convert_binary(
+        const VExprSPtr& expr) {
+    if (!expr || expr->get_num_children() != 2) {
+        return nullptr;
+    }
+    auto field_meta = _resolve_field(expr->get_child(0));
+    if (!field_meta) {
+        return nullptr;
+    }
+
+    if (expr->op() == TExprOpcode::EQ_FOR_NULL) {
+        return paimon::PredicateBuilder::IsNull(
+                field_meta->index, field_meta->slot_desc->col_name(), 
field_meta->field_type);
+    }
+

Review Comment:
   `EQ_FOR_NULL` (null-safe equal, `<=>`) is currently converted to `IsNull` 
unconditionally. This will produce wrong pushdown for expressions like `col <=> 
5` (it would filter to only NULL rows). Consider handling `EQ_FOR_NULL` as: if 
RHS is a NULL literal => `IsNull`; if RHS is a non-NULL literal => `Equal`; 
otherwise don’t push down (return nullptr).
   



##########
be/src/vec/exec/format/table/paimon_predicate_converter.cpp:
##########
@@ -0,0 +1,659 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/format/table/paimon_predicate_converter.h"
+
+#include <algorithm>
+#include <cctype>
+#include <utility>
+
+#include "paimon/data/decimal.h"
+#include "paimon/data/timestamp.h"
+#include "paimon/predicate/predicate_builder.h"
+#include "runtime/decimalv2_value.h"
+#include "runtime/descriptors.h"
+#include "runtime/runtime_state.h"
+#include "util/timezone_utils.h"
+#include "vec/columns/column_const.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/core/field.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/exprs/vcompound_pred.h"
+#include "vec/exprs/vdirect_in_predicate.h"
+#include "vec/exprs/vectorized_fn_call.h"
+#include "vec/exprs/vexpr.h"
+#include "vec/exprs/vin_predicate.h"
+#include "vec/exprs/vliteral.h"
+#include "vec/exprs/vslot_ref.h"
+#include "vec/runtime/timestamptz_value.h"
+#include "vec/runtime/vdatetime_value.h"
+
+namespace doris::vectorized {
+#include "common/compile_check_begin.h"
+
+PaimonPredicateConverter::PaimonPredicateConverter(
+        const std::vector<SlotDescriptor*>& file_slot_descs, RuntimeState* 
state)
+        : _state(state) {
+    _field_index_by_name.reserve(file_slot_descs.size());
+    for (size_t i = 0; i < file_slot_descs.size(); ++i) {
+        const auto& name = file_slot_descs[i]->col_name();
+        auto normalized = _normalize_name(name);
+        if (_field_index_by_name.find(normalized) == 
_field_index_by_name.end()) {
+            _field_index_by_name.emplace(std::move(normalized), 
static_cast<int32_t>(i));
+        }
+    }
+
+    if (!TimezoneUtils::find_cctz_time_zone("GMT", _gmt_tz)) {
+        TimezoneUtils::find_cctz_time_zone(TimezoneUtils::default_time_zone, 
_gmt_tz);
+    }
+}
+
+std::shared_ptr<paimon::Predicate> PaimonPredicateConverter::build(
+        const VExprContextSPtrs& conjuncts) {
+    std::vector<std::shared_ptr<paimon::Predicate>> predicates;
+    predicates.reserve(conjuncts.size());
+    for (const auto& conjunct : conjuncts) {
+        if (!conjunct || !conjunct->root()) {
+            continue;
+        }
+        auto root = conjunct->root();
+        if (root->is_rf_wrapper()) {
+            if (auto impl = root->get_impl()) {
+                root = impl;
+            }
+        }
+        auto predicate = _convert_expr(root);
+        if (predicate) {
+            predicates.emplace_back(std::move(predicate));
+        }
+    }
+
+    if (predicates.empty()) {
+        return nullptr;
+    }
+    if (predicates.size() == 1) {
+        return predicates.front();
+    }
+    auto and_result = paimon::PredicateBuilder::And(predicates);
+    if (!and_result.ok()) {
+        return nullptr;
+    }
+    return std::move(and_result).value();
+}

Review Comment:
   There are no BE unit tests covering `PaimonPredicateConverter` behavior (no 
references under `be/test`). Given this converter affects correctness of 
predicate pushdown, it should have focused tests for supported ops (IN, 
comparisons, LIKE prefix, IS NULL/NOT NULL) and type conversions 
(date/datetime/decimal).



##########
be/CMakeLists.txt:
##########
@@ -146,6 +146,17 @@ message(STATUS "build task executor simulator: 
${BUILD_TASK_EXECUTOR_SIMULATOR}"
 option(BUILD_FILE_CACHE_LRU_TOOL "ON for building file cache lru tool or OFF 
for not" OFF)
 message(STATUS "build file cache lru tool: ${BUILD_FILE_CACHE_LRU_TOOL}")
 
+option(ENABLE_PAIMON_CPP "Enable Paimon C++ integration" ON)
+set(PAIMON_HOME "" CACHE PATH "Paimon install prefix")
+
+# Allow env to override when reconfiguring (avoid picking /usr/local).
+if (DEFINED ENV{ENABLE_PAIMON_CPP})
+    set(ENABLE_PAIMON_CPP "$ENV{ENABLE_PAIMON_CPP}" CACHE BOOL "" FORCE)
+endif()

Review Comment:
   `ENABLE_PAIMON_CPP` is introduced as a toggle, but the BE source tree always 
compiles/links `PaimonCppReader` (e.g. `be/src/vec` uses `GLOB_RECURSE *.cpp` 
and `file_scanner.cpp` includes/uses the reader unconditionally). If someone 
configures with `-DENABLE_PAIMON_CPP=OFF`, CMake will stop adding paimon static 
libs in `thirdparty.cmake`, likely causing link failures. Either (1) make the 
option actually gate compilation/usage (compile definitions + `#ifdef` + source 
filtering), or (2) remove the option and always require the paimon-cpp deps.
   



##########
be/src/vec/exec/scan/file_scanner.cpp:
##########
@@ -995,9 +997,25 @@ Status FileScanner::_get_next_reader() {
                 _cur_reader = std::move(mc_reader);
             } else if (range.__isset.table_format_params &&
                        range.table_format_params.table_format_type == 
"paimon") {
-                _cur_reader = PaimonJniReader::create_unique(_file_slot_descs, 
_state, _profile,
-                                                             range, _params);
-                init_status = 
((PaimonJniReader*)(_cur_reader.get()))->init_reader();
+                if (_state->query_options().__isset.enable_paimon_cpp_reader &&
+                    _state->query_options().enable_paimon_cpp_reader) {
+                    auto cpp_reader = 
PaimonCppReader::create_unique(_file_slot_descs, _state,
+                                                                     _profile, 
range, _params);

Review Comment:
   The paimon-cpp reader selection is compiled in unconditionally. With 
`ENABLE_PAIMON_CPP=OFF` the paimon static libs won’t be linked (see 
`thirdparty.cmake`), so this should be guarded or compiled out to avoid 
unresolved symbols and provide a clean fallback to `PaimonJniReader`.



##########
be/cmake/thirdparty.cmake:
##########
@@ -179,3 +179,18 @@ add_thirdparty(icudata LIB64)
 
 
 add_thirdparty(pugixml LIB64)
+
+if (ENABLE_PAIMON_CPP)
+    add_thirdparty(paimon LIB64)
+    add_thirdparty(paimon_parquet_file_format LIB64)
+    add_thirdparty(paimon_orc_file_format LIB64)
+    add_thirdparty(paimon_blob_file_format LIB64)
+    add_thirdparty(paimon_local_file_system LIB64)
+    add_thirdparty(paimon_file_index LIB64)
+    add_thirdparty(paimon_global_index LIB64)
+
+    add_thirdparty(roaring_bitmap_paimon LIB64)
+    add_thirdparty(xxhash_paimon LIB64)
+    add_thirdparty(fmt_paimon LIB64)
+    add_thirdparty(tbb_paimon LIB64)
+endif()

Review Comment:
   `add_thirdparty(...)` does not validate that the referenced static libraries 
actually exist; it only sets an IMPORTED_LOCATION. With `ENABLE_PAIMON_CPP` 
defaulting to ON, missing paimon-cpp artifacts will fail later with opaque 
linker errors. Consider adding explicit `if (NOT EXISTS ...) 
message(FATAL_ERROR ...)` checks for the paimon libs (or default 
`ENABLE_PAIMON_CPP` to OFF unless the libs are present).



##########
be/src/vec/exec/scan/file_scanner.cpp:
##########
@@ -69,7 +69,9 @@
 #include "vec/exec/format/table/iceberg_reader.h"
 #include "vec/exec/format/table/lakesoul_jni_reader.h"
 #include "vec/exec/format/table/max_compute_jni_reader.h"
+#include "vec/exec/format/table/paimon_cpp_reader.h"
 #include "vec/exec/format/table/paimon_jni_reader.h"
+#include "vec/exec/format/table/paimon_predicate_converter.h"

Review Comment:
   `paimon_cpp_reader.h` / `paimon_predicate_converter.h` are included 
unconditionally. If `ENABLE_PAIMON_CPP` is intended as a build-time toggle, 
consider guarding these includes (and related code paths) behind a compile 
definition so builds without paimon-cpp headers/libs still compile.
   



##########
be/src/vec/exec/format/table/paimon_predicate_converter.cpp:
##########
@@ -0,0 +1,659 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/format/table/paimon_predicate_converter.h"
+
+#include <algorithm>
+#include <cctype>
+#include <utility>
+
+#include "paimon/data/decimal.h"
+#include "paimon/data/timestamp.h"
+#include "paimon/predicate/predicate_builder.h"
+#include "runtime/decimalv2_value.h"
+#include "runtime/descriptors.h"
+#include "runtime/runtime_state.h"
+#include "util/timezone_utils.h"
+#include "vec/columns/column_const.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/core/field.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/exprs/vcompound_pred.h"
+#include "vec/exprs/vdirect_in_predicate.h"
+#include "vec/exprs/vectorized_fn_call.h"
+#include "vec/exprs/vexpr.h"
+#include "vec/exprs/vin_predicate.h"
+#include "vec/exprs/vliteral.h"
+#include "vec/exprs/vslot_ref.h"
+#include "vec/runtime/timestamptz_value.h"
+#include "vec/runtime/vdatetime_value.h"
+
+namespace doris::vectorized {
+#include "common/compile_check_begin.h"
+
+PaimonPredicateConverter::PaimonPredicateConverter(
+        const std::vector<SlotDescriptor*>& file_slot_descs, RuntimeState* 
state)
+        : _state(state) {
+    _field_index_by_name.reserve(file_slot_descs.size());
+    for (size_t i = 0; i < file_slot_descs.size(); ++i) {
+        const auto& name = file_slot_descs[i]->col_name();
+        auto normalized = _normalize_name(name);
+        if (_field_index_by_name.find(normalized) == 
_field_index_by_name.end()) {
+            _field_index_by_name.emplace(std::move(normalized), 
static_cast<int32_t>(i));
+        }
+    }
+
+    if (!TimezoneUtils::find_cctz_time_zone("GMT", _gmt_tz)) {
+        TimezoneUtils::find_cctz_time_zone(TimezoneUtils::default_time_zone, 
_gmt_tz);
+    }
+}
+
+std::shared_ptr<paimon::Predicate> PaimonPredicateConverter::build(
+        const VExprContextSPtrs& conjuncts) {
+    std::vector<std::shared_ptr<paimon::Predicate>> predicates;
+    predicates.reserve(conjuncts.size());
+    for (const auto& conjunct : conjuncts) {
+        if (!conjunct || !conjunct->root()) {
+            continue;
+        }
+        auto root = conjunct->root();
+        if (root->is_rf_wrapper()) {
+            if (auto impl = root->get_impl()) {
+                root = impl;
+            }
+        }
+        auto predicate = _convert_expr(root);
+        if (predicate) {
+            predicates.emplace_back(std::move(predicate));
+        }
+    }
+
+    if (predicates.empty()) {
+        return nullptr;
+    }
+    if (predicates.size() == 1) {
+        return predicates.front();
+    }
+    auto and_result = paimon::PredicateBuilder::And(predicates);
+    if (!and_result.ok()) {
+        return nullptr;
+    }
+    return std::move(and_result).value();
+}
+
+std::shared_ptr<paimon::Predicate> 
PaimonPredicateConverter::_convert_expr(const VExprSPtr& expr) {
+    if (!expr) {
+        return nullptr;
+    }
+
+    auto uncast = VExpr::expr_without_cast(expr);
+
+    if (auto* direct_in = dynamic_cast<VDirectInPredicate*>(uncast.get())) {
+        VExprSPtr in_expr;
+        if (direct_in->get_slot_in_expr(in_expr)) {
+            return _convert_in(in_expr);
+        }
+        return nullptr;
+    }
+
+    if (dynamic_cast<VInPredicate*>(uncast.get()) != nullptr) {
+        return _convert_in(uncast);
+    }
+
+    switch (uncast->op()) {
+    case TExprOpcode::COMPOUND_AND:
+    case TExprOpcode::COMPOUND_OR:
+        return _convert_compound(uncast);
+    case TExprOpcode::COMPOUND_NOT:
+        return nullptr;
+    case TExprOpcode::EQ:
+    case TExprOpcode::EQ_FOR_NULL:
+    case TExprOpcode::NE:
+    case TExprOpcode::GE:
+    case TExprOpcode::GT:
+    case TExprOpcode::LE:
+    case TExprOpcode::LT:
+        return _convert_binary(uncast);
+    default:
+        break;
+    }
+
+    if (auto* fn = dynamic_cast<VectorizedFnCall*>(uncast.get())) {
+        auto fn_name = _normalize_name(fn->function_name());
+        if (fn_name == "is_null_pred" || fn_name == "is_not_null_pred") {
+            return _convert_is_null(uncast, fn_name);
+        }
+        if (fn_name == "like") {
+            return _convert_like_prefix(uncast);
+        }
+    }
+
+    return nullptr;
+}
+
+std::shared_ptr<paimon::Predicate> PaimonPredicateConverter::_convert_compound(
+        const VExprSPtr& expr) {
+    if (!expr || expr->get_num_children() != 2) {
+        return nullptr;
+    }
+    auto left = _convert_expr(expr->get_child(0));
+    if (!left) {
+        return nullptr;
+    }
+    auto right = _convert_expr(expr->get_child(1));
+    if (!right) {
+        return nullptr;
+    }
+
+    if (expr->op() == TExprOpcode::COMPOUND_AND) {
+        auto and_result = paimon::PredicateBuilder::And({left, right});
+        return and_result.ok() ? std::move(and_result).value() : nullptr;
+    }
+    if (expr->op() == TExprOpcode::COMPOUND_OR) {
+        auto or_result = paimon::PredicateBuilder::Or({left, right});
+        return or_result.ok() ? std::move(or_result).value() : nullptr;
+    }
+    return nullptr;
+}
+
+std::shared_ptr<paimon::Predicate> PaimonPredicateConverter::_convert_in(const 
VExprSPtr& expr) {
+    auto* in_pred = dynamic_cast<VInPredicate*>(expr.get());
+    if (!in_pred || expr->get_num_children() < 2) {
+        return nullptr;
+    }
+    auto field_meta = _resolve_field(expr->get_child(0));
+    if (!field_meta) {
+        return nullptr;
+    }
+
+    std::vector<paimon::Literal> literals;
+    literals.reserve(expr->get_num_children() - 1);
+    for (uint16_t i = 1; i < expr->get_num_children(); ++i) {
+        auto literal = _convert_literal(expr->get_child(i), 
*field_meta->slot_desc,
+                                        field_meta->field_type);
+        if (!literal) {
+            return nullptr;
+        }
+        literals.emplace_back(std::move(*literal));
+    }
+
+    if (literals.empty()) {
+        return nullptr;
+    }
+    if (in_pred->is_not_in()) {
+        return paimon::PredicateBuilder::NotIn(field_meta->index, 
field_meta->slot_desc->col_name(),
+                                               field_meta->field_type, 
literals);
+    }
+    return paimon::PredicateBuilder::In(field_meta->index, 
field_meta->slot_desc->col_name(),
+                                        field_meta->field_type, literals);
+}
+
+std::shared_ptr<paimon::Predicate> PaimonPredicateConverter::_convert_binary(
+        const VExprSPtr& expr) {
+    if (!expr || expr->get_num_children() != 2) {
+        return nullptr;
+    }
+    auto field_meta = _resolve_field(expr->get_child(0));
+    if (!field_meta) {
+        return nullptr;
+    }
+
+    if (expr->op() == TExprOpcode::EQ_FOR_NULL) {
+        return paimon::PredicateBuilder::IsNull(
+                field_meta->index, field_meta->slot_desc->col_name(), 
field_meta->field_type);
+    }
+
+    auto literal =
+            _convert_literal(expr->get_child(1), *field_meta->slot_desc, 
field_meta->field_type);
+    if (!literal) {
+        return nullptr;
+    }
+
+    switch (expr->op()) {
+    case TExprOpcode::EQ:
+        return paimon::PredicateBuilder::Equal(field_meta->index, 
field_meta->slot_desc->col_name(),
+                                               field_meta->field_type, 
*literal);
+    case TExprOpcode::NE:
+        return paimon::PredicateBuilder::NotEqual(field_meta->index,
+                                                  
field_meta->slot_desc->col_name(),
+                                                  field_meta->field_type, 
*literal);
+    case TExprOpcode::GE:
+        return paimon::PredicateBuilder::GreaterOrEqual(field_meta->index,
+                                                        
field_meta->slot_desc->col_name(),
+                                                        
field_meta->field_type, *literal);
+    case TExprOpcode::GT:
+        return paimon::PredicateBuilder::GreaterThan(field_meta->index,
+                                                     
field_meta->slot_desc->col_name(),
+                                                     field_meta->field_type, 
*literal);
+    case TExprOpcode::LE:
+        return paimon::PredicateBuilder::LessOrEqual(field_meta->index,
+                                                     
field_meta->slot_desc->col_name(),
+                                                     field_meta->field_type, 
*literal);
+    case TExprOpcode::LT:
+        return paimon::PredicateBuilder::LessThan(field_meta->index,
+                                                  
field_meta->slot_desc->col_name(),
+                                                  field_meta->field_type, 
*literal);
+    default:
+        break;
+    }
+    return nullptr;
+}
+
+std::shared_ptr<paimon::Predicate> PaimonPredicateConverter::_convert_is_null(
+        const VExprSPtr& expr, const std::string& fn_name) {
+    if (!expr || expr->get_num_children() != 1) {
+        return nullptr;
+    }
+    auto field_meta = _resolve_field(expr->get_child(0));
+    if (!field_meta) {
+        return nullptr;
+    }
+    if (fn_name == "is_not_null_pred") {
+        return paimon::PredicateBuilder::IsNotNull(
+                field_meta->index, field_meta->slot_desc->col_name(), 
field_meta->field_type);
+    }
+    return paimon::PredicateBuilder::IsNull(field_meta->index, 
field_meta->slot_desc->col_name(),
+                                            field_meta->field_type);
+}
+
+std::shared_ptr<paimon::Predicate> 
PaimonPredicateConverter::_convert_like_prefix(
+        const VExprSPtr& expr) {
+    if (!expr || expr->get_num_children() != 2) {
+        return nullptr;
+    }
+    auto field_meta = _resolve_field(expr->get_child(0));
+    if (!field_meta || field_meta->field_type != paimon::FieldType::STRING) {
+        return nullptr;
+    }
+
+    auto pattern_opt = _extract_string_literal(expr->get_child(1));
+    if (!pattern_opt) {
+        return nullptr;
+    }
+    const std::string& pattern = *pattern_opt;
+    if (!pattern.empty() && pattern.front() == '%') {
+        return nullptr;
+    }
+    if (pattern.empty() || pattern.back() != '%') {
+        return nullptr;
+    }
+
+    std::string prefix = pattern.substr(0, pattern.size() - 1);
+    paimon::Literal lower_literal(paimon::FieldType::STRING, prefix.data(), 
prefix.size());
+    auto lower_pred = paimon::PredicateBuilder::GreaterOrEqual(

Review Comment:
   `LIKE` pushdown currently treats any pattern that ends with `%` (and doesn’t 
start with `%`) as a simple prefix. This is incorrect for patterns containing 
other wildcards (e.g. `ab%c%`, `_`, or escaped `%/_`), which would be converted 
into an incorrect range predicate. Suggest only enabling this optimization when 
the pattern contains no other `%` and no `_` (and no escape sequences), 
otherwise return nullptr.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to