Copilot commented on code in PR #61379: URL: https://github.com/apache/doris/pull/61379#discussion_r2938809135
########## be/src/vec/exec/format/table/paimon_predicate_converter.cpp: ########## @@ -0,0 +1,659 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/exec/format/table/paimon_predicate_converter.h" + +#include <algorithm> +#include <cctype> +#include <utility> + +#include "paimon/data/decimal.h" +#include "paimon/data/timestamp.h" +#include "paimon/predicate/predicate_builder.h" +#include "runtime/decimalv2_value.h" +#include "runtime/descriptors.h" +#include "runtime/runtime_state.h" +#include "util/timezone_utils.h" +#include "vec/columns/column_const.h" +#include "vec/columns/column_nullable.h" +#include "vec/core/field.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_nullable.h" +#include "vec/exprs/vcompound_pred.h" +#include "vec/exprs/vdirect_in_predicate.h" +#include "vec/exprs/vectorized_fn_call.h" +#include "vec/exprs/vexpr.h" +#include "vec/exprs/vin_predicate.h" +#include "vec/exprs/vliteral.h" +#include "vec/exprs/vslot_ref.h" +#include "vec/runtime/timestamptz_value.h" +#include "vec/runtime/vdatetime_value.h" + +namespace doris::vectorized { +#include "common/compile_check_begin.h" + +PaimonPredicateConverter::PaimonPredicateConverter( + const std::vector<SlotDescriptor*>& file_slot_descs, RuntimeState* state) + : _state(state) { + _field_index_by_name.reserve(file_slot_descs.size()); + for (size_t i = 0; i < file_slot_descs.size(); ++i) { + const auto& name = file_slot_descs[i]->col_name(); + auto normalized = _normalize_name(name); + if (_field_index_by_name.find(normalized) == _field_index_by_name.end()) { + _field_index_by_name.emplace(std::move(normalized), static_cast<int32_t>(i)); + } + } + + if (!TimezoneUtils::find_cctz_time_zone("GMT", _gmt_tz)) { + TimezoneUtils::find_cctz_time_zone(TimezoneUtils::default_time_zone, _gmt_tz); + } +} + +std::shared_ptr<paimon::Predicate> PaimonPredicateConverter::build( + const VExprContextSPtrs& conjuncts) { + std::vector<std::shared_ptr<paimon::Predicate>> predicates; + predicates.reserve(conjuncts.size()); + for (const auto& conjunct : conjuncts) { + if (!conjunct || !conjunct->root()) { + continue; + } + auto root = conjunct->root(); + if (root->is_rf_wrapper()) { + if (auto impl = root->get_impl()) { + root = impl; + } + } + auto predicate = _convert_expr(root); + if (predicate) { + predicates.emplace_back(std::move(predicate)); + } + } + + if (predicates.empty()) { + return nullptr; + } + if (predicates.size() == 1) { + return predicates.front(); + } + auto and_result = paimon::PredicateBuilder::And(predicates); + if (!and_result.ok()) { + return nullptr; + } + return std::move(and_result).value(); +} + +std::shared_ptr<paimon::Predicate> PaimonPredicateConverter::_convert_expr(const VExprSPtr& expr) { + if (!expr) { + return nullptr; + } + + auto uncast = VExpr::expr_without_cast(expr); + + if (auto* direct_in = dynamic_cast<VDirectInPredicate*>(uncast.get())) { + VExprSPtr in_expr; + if (direct_in->get_slot_in_expr(in_expr)) { + return _convert_in(in_expr); + } + return nullptr; + } + + if (dynamic_cast<VInPredicate*>(uncast.get()) != nullptr) { + return _convert_in(uncast); + } + + switch (uncast->op()) { + case TExprOpcode::COMPOUND_AND: + case TExprOpcode::COMPOUND_OR: + return _convert_compound(uncast); + case TExprOpcode::COMPOUND_NOT: + return nullptr; + case TExprOpcode::EQ: + case TExprOpcode::EQ_FOR_NULL: + case TExprOpcode::NE: + case TExprOpcode::GE: + case TExprOpcode::GT: + case TExprOpcode::LE: + case TExprOpcode::LT: + return _convert_binary(uncast); + default: + break; + } + + if (auto* fn = dynamic_cast<VectorizedFnCall*>(uncast.get())) { + auto fn_name = _normalize_name(fn->function_name()); + if (fn_name == "is_null_pred" || fn_name == "is_not_null_pred") { + return _convert_is_null(uncast, fn_name); + } + if (fn_name == "like") { + return _convert_like_prefix(uncast); + } + } + + return nullptr; +} + +std::shared_ptr<paimon::Predicate> PaimonPredicateConverter::_convert_compound( + const VExprSPtr& expr) { + if (!expr || expr->get_num_children() != 2) { + return nullptr; + } + auto left = _convert_expr(expr->get_child(0)); + if (!left) { + return nullptr; + } + auto right = _convert_expr(expr->get_child(1)); + if (!right) { + return nullptr; + } + + if (expr->op() == TExprOpcode::COMPOUND_AND) { + auto and_result = paimon::PredicateBuilder::And({left, right}); + return and_result.ok() ? std::move(and_result).value() : nullptr; + } + if (expr->op() == TExprOpcode::COMPOUND_OR) { + auto or_result = paimon::PredicateBuilder::Or({left, right}); + return or_result.ok() ? std::move(or_result).value() : nullptr; + } + return nullptr; +} + +std::shared_ptr<paimon::Predicate> PaimonPredicateConverter::_convert_in(const VExprSPtr& expr) { + auto* in_pred = dynamic_cast<VInPredicate*>(expr.get()); + if (!in_pred || expr->get_num_children() < 2) { + return nullptr; + } + auto field_meta = _resolve_field(expr->get_child(0)); + if (!field_meta) { + return nullptr; + } + + std::vector<paimon::Literal> literals; + literals.reserve(expr->get_num_children() - 1); + for (uint16_t i = 1; i < expr->get_num_children(); ++i) { + auto literal = _convert_literal(expr->get_child(i), *field_meta->slot_desc, + field_meta->field_type); + if (!literal) { + return nullptr; + } + literals.emplace_back(std::move(*literal)); + } + + if (literals.empty()) { + return nullptr; + } + if (in_pred->is_not_in()) { + return paimon::PredicateBuilder::NotIn(field_meta->index, field_meta->slot_desc->col_name(), + field_meta->field_type, literals); + } + return paimon::PredicateBuilder::In(field_meta->index, field_meta->slot_desc->col_name(), + field_meta->field_type, literals); +} + +std::shared_ptr<paimon::Predicate> PaimonPredicateConverter::_convert_binary( + const VExprSPtr& expr) { + if (!expr || expr->get_num_children() != 2) { + return nullptr; + } + auto field_meta = _resolve_field(expr->get_child(0)); + if (!field_meta) { + return nullptr; + } + + if (expr->op() == TExprOpcode::EQ_FOR_NULL) { + return paimon::PredicateBuilder::IsNull( + field_meta->index, field_meta->slot_desc->col_name(), field_meta->field_type); + } + Review Comment: `EQ_FOR_NULL` (null-safe equal, `<=>`) is currently converted to `IsNull` unconditionally. This will produce wrong pushdown for expressions like `col <=> 5` (it would filter to only NULL rows). Consider handling `EQ_FOR_NULL` as: if RHS is a NULL literal => `IsNull`; if RHS is a non-NULL literal => `Equal`; otherwise don’t push down (return nullptr). ########## be/src/vec/exec/format/table/paimon_predicate_converter.cpp: ########## @@ -0,0 +1,659 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/exec/format/table/paimon_predicate_converter.h" + +#include <algorithm> +#include <cctype> +#include <utility> + +#include "paimon/data/decimal.h" +#include "paimon/data/timestamp.h" +#include "paimon/predicate/predicate_builder.h" +#include "runtime/decimalv2_value.h" +#include "runtime/descriptors.h" +#include "runtime/runtime_state.h" +#include "util/timezone_utils.h" +#include "vec/columns/column_const.h" +#include "vec/columns/column_nullable.h" +#include "vec/core/field.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_nullable.h" +#include "vec/exprs/vcompound_pred.h" +#include "vec/exprs/vdirect_in_predicate.h" +#include "vec/exprs/vectorized_fn_call.h" +#include "vec/exprs/vexpr.h" +#include "vec/exprs/vin_predicate.h" +#include "vec/exprs/vliteral.h" +#include "vec/exprs/vslot_ref.h" +#include "vec/runtime/timestamptz_value.h" +#include "vec/runtime/vdatetime_value.h" + +namespace doris::vectorized { +#include "common/compile_check_begin.h" + +PaimonPredicateConverter::PaimonPredicateConverter( + const std::vector<SlotDescriptor*>& file_slot_descs, RuntimeState* state) + : _state(state) { + _field_index_by_name.reserve(file_slot_descs.size()); + for (size_t i = 0; i < file_slot_descs.size(); ++i) { + const auto& name = file_slot_descs[i]->col_name(); + auto normalized = _normalize_name(name); + if (_field_index_by_name.find(normalized) == _field_index_by_name.end()) { + _field_index_by_name.emplace(std::move(normalized), static_cast<int32_t>(i)); + } + } + + if (!TimezoneUtils::find_cctz_time_zone("GMT", _gmt_tz)) { + TimezoneUtils::find_cctz_time_zone(TimezoneUtils::default_time_zone, _gmt_tz); + } +} + +std::shared_ptr<paimon::Predicate> PaimonPredicateConverter::build( + const VExprContextSPtrs& conjuncts) { + std::vector<std::shared_ptr<paimon::Predicate>> predicates; + predicates.reserve(conjuncts.size()); + for (const auto& conjunct : conjuncts) { + if (!conjunct || !conjunct->root()) { + continue; + } + auto root = conjunct->root(); + if (root->is_rf_wrapper()) { + if (auto impl = root->get_impl()) { + root = impl; + } + } + auto predicate = _convert_expr(root); + if (predicate) { + predicates.emplace_back(std::move(predicate)); + } + } + + if (predicates.empty()) { + return nullptr; + } + if (predicates.size() == 1) { + return predicates.front(); + } + auto and_result = paimon::PredicateBuilder::And(predicates); + if (!and_result.ok()) { + return nullptr; + } + return std::move(and_result).value(); +} Review Comment: There are no BE unit tests covering `PaimonPredicateConverter` behavior (no references under `be/test`). Given this converter affects correctness of predicate pushdown, it should have focused tests for supported ops (IN, comparisons, LIKE prefix, IS NULL/NOT NULL) and type conversions (date/datetime/decimal). ########## be/CMakeLists.txt: ########## @@ -146,6 +146,17 @@ message(STATUS "build task executor simulator: ${BUILD_TASK_EXECUTOR_SIMULATOR}" option(BUILD_FILE_CACHE_LRU_TOOL "ON for building file cache lru tool or OFF for not" OFF) message(STATUS "build file cache lru tool: ${BUILD_FILE_CACHE_LRU_TOOL}") +option(ENABLE_PAIMON_CPP "Enable Paimon C++ integration" ON) +set(PAIMON_HOME "" CACHE PATH "Paimon install prefix") + +# Allow env to override when reconfiguring (avoid picking /usr/local). +if (DEFINED ENV{ENABLE_PAIMON_CPP}) + set(ENABLE_PAIMON_CPP "$ENV{ENABLE_PAIMON_CPP}" CACHE BOOL "" FORCE) +endif() Review Comment: `ENABLE_PAIMON_CPP` is introduced as a toggle, but the BE source tree always compiles/links `PaimonCppReader` (e.g. `be/src/vec` uses `GLOB_RECURSE *.cpp` and `file_scanner.cpp` includes/uses the reader unconditionally). If someone configures with `-DENABLE_PAIMON_CPP=OFF`, CMake will stop adding paimon static libs in `thirdparty.cmake`, likely causing link failures. Either (1) make the option actually gate compilation/usage (compile definitions + `#ifdef` + source filtering), or (2) remove the option and always require the paimon-cpp deps. ########## be/src/vec/exec/scan/file_scanner.cpp: ########## @@ -995,9 +997,25 @@ Status FileScanner::_get_next_reader() { _cur_reader = std::move(mc_reader); } else if (range.__isset.table_format_params && range.table_format_params.table_format_type == "paimon") { - _cur_reader = PaimonJniReader::create_unique(_file_slot_descs, _state, _profile, - range, _params); - init_status = ((PaimonJniReader*)(_cur_reader.get()))->init_reader(); + if (_state->query_options().__isset.enable_paimon_cpp_reader && + _state->query_options().enable_paimon_cpp_reader) { + auto cpp_reader = PaimonCppReader::create_unique(_file_slot_descs, _state, + _profile, range, _params); Review Comment: The paimon-cpp reader selection is compiled in unconditionally. With `ENABLE_PAIMON_CPP=OFF` the paimon static libs won’t be linked (see `thirdparty.cmake`), so this should be guarded or compiled out to avoid unresolved symbols and provide a clean fallback to `PaimonJniReader`. ########## be/cmake/thirdparty.cmake: ########## @@ -179,3 +179,18 @@ add_thirdparty(icudata LIB64) add_thirdparty(pugixml LIB64) + +if (ENABLE_PAIMON_CPP) + add_thirdparty(paimon LIB64) + add_thirdparty(paimon_parquet_file_format LIB64) + add_thirdparty(paimon_orc_file_format LIB64) + add_thirdparty(paimon_blob_file_format LIB64) + add_thirdparty(paimon_local_file_system LIB64) + add_thirdparty(paimon_file_index LIB64) + add_thirdparty(paimon_global_index LIB64) + + add_thirdparty(roaring_bitmap_paimon LIB64) + add_thirdparty(xxhash_paimon LIB64) + add_thirdparty(fmt_paimon LIB64) + add_thirdparty(tbb_paimon LIB64) +endif() Review Comment: `add_thirdparty(...)` does not validate that the referenced static libraries actually exist; it only sets an IMPORTED_LOCATION. With `ENABLE_PAIMON_CPP` defaulting to ON, missing paimon-cpp artifacts will fail later with opaque linker errors. Consider adding explicit `if (NOT EXISTS ...) message(FATAL_ERROR ...)` checks for the paimon libs (or default `ENABLE_PAIMON_CPP` to OFF unless the libs are present). ########## be/src/vec/exec/scan/file_scanner.cpp: ########## @@ -69,7 +69,9 @@ #include "vec/exec/format/table/iceberg_reader.h" #include "vec/exec/format/table/lakesoul_jni_reader.h" #include "vec/exec/format/table/max_compute_jni_reader.h" +#include "vec/exec/format/table/paimon_cpp_reader.h" #include "vec/exec/format/table/paimon_jni_reader.h" +#include "vec/exec/format/table/paimon_predicate_converter.h" Review Comment: `paimon_cpp_reader.h` / `paimon_predicate_converter.h` are included unconditionally. If `ENABLE_PAIMON_CPP` is intended as a build-time toggle, consider guarding these includes (and related code paths) behind a compile definition so builds without paimon-cpp headers/libs still compile. ########## be/src/vec/exec/format/table/paimon_predicate_converter.cpp: ########## @@ -0,0 +1,659 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/exec/format/table/paimon_predicate_converter.h" + +#include <algorithm> +#include <cctype> +#include <utility> + +#include "paimon/data/decimal.h" +#include "paimon/data/timestamp.h" +#include "paimon/predicate/predicate_builder.h" +#include "runtime/decimalv2_value.h" +#include "runtime/descriptors.h" +#include "runtime/runtime_state.h" +#include "util/timezone_utils.h" +#include "vec/columns/column_const.h" +#include "vec/columns/column_nullable.h" +#include "vec/core/field.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_nullable.h" +#include "vec/exprs/vcompound_pred.h" +#include "vec/exprs/vdirect_in_predicate.h" +#include "vec/exprs/vectorized_fn_call.h" +#include "vec/exprs/vexpr.h" +#include "vec/exprs/vin_predicate.h" +#include "vec/exprs/vliteral.h" +#include "vec/exprs/vslot_ref.h" +#include "vec/runtime/timestamptz_value.h" +#include "vec/runtime/vdatetime_value.h" + +namespace doris::vectorized { +#include "common/compile_check_begin.h" + +PaimonPredicateConverter::PaimonPredicateConverter( + const std::vector<SlotDescriptor*>& file_slot_descs, RuntimeState* state) + : _state(state) { + _field_index_by_name.reserve(file_slot_descs.size()); + for (size_t i = 0; i < file_slot_descs.size(); ++i) { + const auto& name = file_slot_descs[i]->col_name(); + auto normalized = _normalize_name(name); + if (_field_index_by_name.find(normalized) == _field_index_by_name.end()) { + _field_index_by_name.emplace(std::move(normalized), static_cast<int32_t>(i)); + } + } + + if (!TimezoneUtils::find_cctz_time_zone("GMT", _gmt_tz)) { + TimezoneUtils::find_cctz_time_zone(TimezoneUtils::default_time_zone, _gmt_tz); + } +} + +std::shared_ptr<paimon::Predicate> PaimonPredicateConverter::build( + const VExprContextSPtrs& conjuncts) { + std::vector<std::shared_ptr<paimon::Predicate>> predicates; + predicates.reserve(conjuncts.size()); + for (const auto& conjunct : conjuncts) { + if (!conjunct || !conjunct->root()) { + continue; + } + auto root = conjunct->root(); + if (root->is_rf_wrapper()) { + if (auto impl = root->get_impl()) { + root = impl; + } + } + auto predicate = _convert_expr(root); + if (predicate) { + predicates.emplace_back(std::move(predicate)); + } + } + + if (predicates.empty()) { + return nullptr; + } + if (predicates.size() == 1) { + return predicates.front(); + } + auto and_result = paimon::PredicateBuilder::And(predicates); + if (!and_result.ok()) { + return nullptr; + } + return std::move(and_result).value(); +} + +std::shared_ptr<paimon::Predicate> PaimonPredicateConverter::_convert_expr(const VExprSPtr& expr) { + if (!expr) { + return nullptr; + } + + auto uncast = VExpr::expr_without_cast(expr); + + if (auto* direct_in = dynamic_cast<VDirectInPredicate*>(uncast.get())) { + VExprSPtr in_expr; + if (direct_in->get_slot_in_expr(in_expr)) { + return _convert_in(in_expr); + } + return nullptr; + } + + if (dynamic_cast<VInPredicate*>(uncast.get()) != nullptr) { + return _convert_in(uncast); + } + + switch (uncast->op()) { + case TExprOpcode::COMPOUND_AND: + case TExprOpcode::COMPOUND_OR: + return _convert_compound(uncast); + case TExprOpcode::COMPOUND_NOT: + return nullptr; + case TExprOpcode::EQ: + case TExprOpcode::EQ_FOR_NULL: + case TExprOpcode::NE: + case TExprOpcode::GE: + case TExprOpcode::GT: + case TExprOpcode::LE: + case TExprOpcode::LT: + return _convert_binary(uncast); + default: + break; + } + + if (auto* fn = dynamic_cast<VectorizedFnCall*>(uncast.get())) { + auto fn_name = _normalize_name(fn->function_name()); + if (fn_name == "is_null_pred" || fn_name == "is_not_null_pred") { + return _convert_is_null(uncast, fn_name); + } + if (fn_name == "like") { + return _convert_like_prefix(uncast); + } + } + + return nullptr; +} + +std::shared_ptr<paimon::Predicate> PaimonPredicateConverter::_convert_compound( + const VExprSPtr& expr) { + if (!expr || expr->get_num_children() != 2) { + return nullptr; + } + auto left = _convert_expr(expr->get_child(0)); + if (!left) { + return nullptr; + } + auto right = _convert_expr(expr->get_child(1)); + if (!right) { + return nullptr; + } + + if (expr->op() == TExprOpcode::COMPOUND_AND) { + auto and_result = paimon::PredicateBuilder::And({left, right}); + return and_result.ok() ? std::move(and_result).value() : nullptr; + } + if (expr->op() == TExprOpcode::COMPOUND_OR) { + auto or_result = paimon::PredicateBuilder::Or({left, right}); + return or_result.ok() ? std::move(or_result).value() : nullptr; + } + return nullptr; +} + +std::shared_ptr<paimon::Predicate> PaimonPredicateConverter::_convert_in(const VExprSPtr& expr) { + auto* in_pred = dynamic_cast<VInPredicate*>(expr.get()); + if (!in_pred || expr->get_num_children() < 2) { + return nullptr; + } + auto field_meta = _resolve_field(expr->get_child(0)); + if (!field_meta) { + return nullptr; + } + + std::vector<paimon::Literal> literals; + literals.reserve(expr->get_num_children() - 1); + for (uint16_t i = 1; i < expr->get_num_children(); ++i) { + auto literal = _convert_literal(expr->get_child(i), *field_meta->slot_desc, + field_meta->field_type); + if (!literal) { + return nullptr; + } + literals.emplace_back(std::move(*literal)); + } + + if (literals.empty()) { + return nullptr; + } + if (in_pred->is_not_in()) { + return paimon::PredicateBuilder::NotIn(field_meta->index, field_meta->slot_desc->col_name(), + field_meta->field_type, literals); + } + return paimon::PredicateBuilder::In(field_meta->index, field_meta->slot_desc->col_name(), + field_meta->field_type, literals); +} + +std::shared_ptr<paimon::Predicate> PaimonPredicateConverter::_convert_binary( + const VExprSPtr& expr) { + if (!expr || expr->get_num_children() != 2) { + return nullptr; + } + auto field_meta = _resolve_field(expr->get_child(0)); + if (!field_meta) { + return nullptr; + } + + if (expr->op() == TExprOpcode::EQ_FOR_NULL) { + return paimon::PredicateBuilder::IsNull( + field_meta->index, field_meta->slot_desc->col_name(), field_meta->field_type); + } + + auto literal = + _convert_literal(expr->get_child(1), *field_meta->slot_desc, field_meta->field_type); + if (!literal) { + return nullptr; + } + + switch (expr->op()) { + case TExprOpcode::EQ: + return paimon::PredicateBuilder::Equal(field_meta->index, field_meta->slot_desc->col_name(), + field_meta->field_type, *literal); + case TExprOpcode::NE: + return paimon::PredicateBuilder::NotEqual(field_meta->index, + field_meta->slot_desc->col_name(), + field_meta->field_type, *literal); + case TExprOpcode::GE: + return paimon::PredicateBuilder::GreaterOrEqual(field_meta->index, + field_meta->slot_desc->col_name(), + field_meta->field_type, *literal); + case TExprOpcode::GT: + return paimon::PredicateBuilder::GreaterThan(field_meta->index, + field_meta->slot_desc->col_name(), + field_meta->field_type, *literal); + case TExprOpcode::LE: + return paimon::PredicateBuilder::LessOrEqual(field_meta->index, + field_meta->slot_desc->col_name(), + field_meta->field_type, *literal); + case TExprOpcode::LT: + return paimon::PredicateBuilder::LessThan(field_meta->index, + field_meta->slot_desc->col_name(), + field_meta->field_type, *literal); + default: + break; + } + return nullptr; +} + +std::shared_ptr<paimon::Predicate> PaimonPredicateConverter::_convert_is_null( + const VExprSPtr& expr, const std::string& fn_name) { + if (!expr || expr->get_num_children() != 1) { + return nullptr; + } + auto field_meta = _resolve_field(expr->get_child(0)); + if (!field_meta) { + return nullptr; + } + if (fn_name == "is_not_null_pred") { + return paimon::PredicateBuilder::IsNotNull( + field_meta->index, field_meta->slot_desc->col_name(), field_meta->field_type); + } + return paimon::PredicateBuilder::IsNull(field_meta->index, field_meta->slot_desc->col_name(), + field_meta->field_type); +} + +std::shared_ptr<paimon::Predicate> PaimonPredicateConverter::_convert_like_prefix( + const VExprSPtr& expr) { + if (!expr || expr->get_num_children() != 2) { + return nullptr; + } + auto field_meta = _resolve_field(expr->get_child(0)); + if (!field_meta || field_meta->field_type != paimon::FieldType::STRING) { + return nullptr; + } + + auto pattern_opt = _extract_string_literal(expr->get_child(1)); + if (!pattern_opt) { + return nullptr; + } + const std::string& pattern = *pattern_opt; + if (!pattern.empty() && pattern.front() == '%') { + return nullptr; + } + if (pattern.empty() || pattern.back() != '%') { + return nullptr; + } + + std::string prefix = pattern.substr(0, pattern.size() - 1); + paimon::Literal lower_literal(paimon::FieldType::STRING, prefix.data(), prefix.size()); + auto lower_pred = paimon::PredicateBuilder::GreaterOrEqual( Review Comment: `LIKE` pushdown currently treats any pattern that ends with `%` (and doesn’t start with `%`) as a simple prefix. This is incorrect for patterns containing other wildcards (e.g. `ab%c%`, `_`, or escaped `%/_`), which would be converted into an incorrect range predicate. Suggest only enabling this optimization when the pattern contains no other `%` and no `_` (and no escape sequences), otherwise return nullptr. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
