This is an automated email from the ASF dual-hosted git repository.

suxiaogang223 pushed a commit to branch 
codex/complex-column-predicate-stats-filtering
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 5aab85d7004a27afc7ec43eae2af59faec4c19ae
Author: Socrates <[email protected]>
AuthorDate: Thu Jun 4 02:27:50 2026 +0800

    [feature](be) Support nested parquet IN pruning hints
    
    ### What problem does this PR solve?
    
    Issue Number: close #xxx
    
    Related PR: #xxx
    
    Problem Summary: Add nested STRUCT IN-list pruning hint extraction for new 
parquet scans and restore explicit nested scalar value index mapping so 
nullable struct parent/child values remain aligned with Arrow RecordReader 
output.
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test: Manual test
        - build-support/clang-format.sh be/src/format/reader/column_mapper.cpp 
be/src/format/new_parquet/reader/nested_column_reader.h 
be/src/format/new_parquet/reader/arrow_leaf_reader_adapter.cpp 
be/test/format/new_parquet/parquet_reader_test.cpp
        - git diff --check
    - Behavior changed: No
    - Does this need documentation: No
---
 .../reader/arrow_leaf_reader_adapter.cpp           |  21 ++
 .../new_parquet/reader/nested_column_reader.h      |  35 +--
 be/src/format/reader/column_mapper.cpp             |  86 +++++-
 be/test/format/new_parquet/parquet_reader_test.cpp | 303 +++++++++++++++++++++
 4 files changed, 422 insertions(+), 23 deletions(-)

diff --git a/be/src/format/new_parquet/reader/arrow_leaf_reader_adapter.cpp 
b/be/src/format/new_parquet/reader/arrow_leaf_reader_adapter.cpp
index 7ed69c5e48b..b1fab9042fa 100644
--- a/be/src/format/new_parquet/reader/arrow_leaf_reader_adapter.cpp
+++ b/be/src/format/new_parquet/reader/arrow_leaf_reader_adapter.cpp
@@ -276,6 +276,27 @@ Status read_nested_leaf_batch(const 
ArrowLeafReaderContext& context, int64_t bat
         std::copy(rep_levels, rep_levels + batch->levels_written, 
batch->rep_levels.begin());
     }
 
+    batch->value_indices.resize(static_cast<size_t>(batch->levels_written), 
-1);
+    int64_t value_idx = 0;
+    const bool dense_value_slots = values_written == batch->levels_written;
+    for (int64_t level_idx = 0; level_idx < batch->levels_written; 
++level_idx) {
+        if (batch->def_levels[level_idx] < value_slot_definition_level ||
+            batch->rep_levels[level_idx] > value_slot_repetition_level) {
+            continue;
+        }
+        if (dense_value_slots) {
+            batch->value_indices[static_cast<size_t>(level_idx)] = level_idx;
+        } else {
+            if (value_idx >= values_written) {
+                return Status::Corruption(
+                        "Nested parquet reader returned fewer values than 
definition levels for "
+                        "column {}",
+                        context.column_name());
+            }
+            batch->value_indices[static_cast<size_t>(level_idx)] = value_idx++;
+        }
+    }
+
     const auto value_type = remove_nullable(context.data_type());
     batch->values_column = value_type->create_column();
     if (values_written > 0) {
diff --git a/be/src/format/new_parquet/reader/nested_column_reader.h 
b/be/src/format/new_parquet/reader/nested_column_reader.h
index 9b0a0043b3f..ad6bd90c49b 100644
--- a/be/src/format/new_parquet/reader/nested_column_reader.h
+++ b/be/src/format/new_parquet/reader/nested_column_reader.h
@@ -45,6 +45,7 @@ struct NestedScalarBatch {
     int16_t value_slot_repetition_level = std::numeric_limits<int16_t>::max();
     std::vector<int16_t> def_levels;
     std::vector<int16_t> rep_levels;
+    std::vector<int64_t> value_indices;
     MutableColumnPtr values_column;
 
     bool empty() const { return levels_written == 0; }
@@ -137,25 +138,15 @@ public:
     void reset(const NestedScalarBatch* batch) {
         DORIS_CHECK(batch != nullptr);
         _batch = batch;
-        _next_level_idx = 0;
-        _next_value_idx = 0;
     }
 
     Status value_index(const std::string& column_name, int64_t level_idx, 
int64_t* value_idx) {
         DORIS_CHECK(_batch != nullptr);
         DORIS_CHECK(value_idx != nullptr);
-        DORIS_CHECK(level_idx >= _next_level_idx);
         DORIS_CHECK(level_idx < _batch->levels_written);
-        int64_t computed_value_idx = -1;
-        while (_next_level_idx <= level_idx) {
-            if (has_value_slot(_next_level_idx)) {
-                if (_next_level_idx == level_idx) {
-                    computed_value_idx = _next_value_idx;
-                }
-                ++_next_value_idx;
-            }
-            ++_next_level_idx;
-        }
+        DORIS_CHECK(level_idx >= 0);
+        DORIS_CHECK(static_cast<size_t>(level_idx) < 
_batch->value_indices.size());
+        const int64_t computed_value_idx = 
_batch->value_indices[static_cast<size_t>(level_idx)];
         if (computed_value_idx < 0) {
             return Status::Corruption("Nested parquet value is absent for 
column {}", column_name);
         }
@@ -170,14 +161,13 @@ public:
 
     bool has_value_slot(int64_t level_idx) const {
         DORIS_CHECK(_batch != nullptr);
-        return _batch->def_levels[level_idx] >= 
_batch->value_slot_definition_level &&
-               _batch->rep_levels[level_idx] <= 
_batch->value_slot_repetition_level;
+        DORIS_CHECK(level_idx >= 0);
+        DORIS_CHECK(static_cast<size_t>(level_idx) < 
_batch->value_indices.size());
+        return _batch->value_indices[static_cast<size_t>(level_idx)] >= 0;
     }
 
 private:
     const NestedScalarBatch* _batch = nullptr;
-    int64_t _next_level_idx = 0;
-    int64_t _next_value_idx = 0;
 };
 
 inline void move_nested_scalar_tail(const NestedScalarBatch& src, int64_t 
start_level,
@@ -195,17 +185,18 @@ inline void move_nested_scalar_tail(const 
NestedScalarBatch& src, int64_t start_
     dst.rep_levels.assign(src.rep_levels.begin() + start_level, 
src.rep_levels.end());
     dst.value_slot_definition_level = src.value_slot_definition_level;
     dst.value_slot_repetition_level = src.value_slot_repetition_level;
+    dst.value_indices.resize(static_cast<size_t>(dst.levels_written), -1);
     dst.values_column = src.values_column->clone_empty();
 
-    NestedScalarValueCursor value_cursor(&src);
+    int64_t values_written = 0;
     for (int64_t level_idx = start_level; level_idx < src.levels_written; 
++level_idx) {
-        if (!value_cursor.has_value_slot(level_idx)) {
+        const int64_t value_idx = 
src.value_indices[static_cast<size_t>(level_idx)];
+        if (value_idx < 0) {
             continue;
         }
-        int64_t value_idx = -1;
-        auto status = value_cursor.value_index("overflow", level_idx, 
&value_idx);
-        DORIS_CHECK(status.ok());
+        dst.value_indices[static_cast<size_t>(level_idx - start_level)] = 
values_written;
         dst.values_column->insert_from(*src.values_column, 
static_cast<size_t>(value_idx));
+        values_written++;
     }
     overflow->batch = std::move(dst);
 }
diff --git a/be/src/format/reader/column_mapper.cpp 
b/be/src/format/reader/column_mapper.cpp
index 1efd255673b..7e7e7894ddd 100644
--- a/be/src/format/reader/column_mapper.cpp
+++ b/be/src/format/reader/column_mapper.cpp
@@ -29,6 +29,8 @@
 #include "common/status.h"
 #include "core/data_type/convert_field_to_type.h"
 #include "core/data_type/data_type_nullable.h"
+#include "exprs/create_predicate_function.h"
+#include "exprs/vin_predicate.h"
 #include "format/reader/expr/cast.h"
 #include "format/reader/expr/literal.h"
 #include "format/reader/expr/slot_ref.h"
@@ -482,6 +484,46 @@ static std::shared_ptr<ColumnPredicate> 
build_nested_comparison_predicate(
     }
 }
 
+static std::shared_ptr<ColumnPredicate> build_nested_in_list_predicate(
+        const VExprSPtrs& literal_exprs, const NestedPredicateTargetInfo& 
target) {
+    if (literal_exprs.empty() || target.file_leaf_type == nullptr) {
+        return nullptr;
+    }
+
+    auto value_column = target.file_leaf_type->create_column();
+    for (const auto& literal_expr : literal_exprs) {
+        if (literal_expr == nullptr || !literal_expr->is_literal()) {
+            return nullptr;
+        }
+        const auto original_literal = original_table_literal(literal_expr);
+        const Field original_field = literal_field(original_literal);
+        Field file_field;
+        try {
+            convert_field_to_type(original_field, *target.file_leaf_type, 
&file_field,
+                                  original_literal->data_type().get());
+        } catch (const Exception&) {
+            return nullptr;
+        }
+        if (file_field.is_null()) {
+            return nullptr;
+        }
+        value_column->insert(file_field);
+    }
+
+    std::shared_ptr<HybridSetBase> values;
+    try {
+        values.reset(create_set(target.file_leaf_type->get_primitive_type(), 
literal_exprs.size(),
+                                false));
+        ColumnPtr value_column_ptr = std::move(value_column);
+        values->insert_range_from(value_column_ptr, 0, 
value_column_ptr->size());
+        return create_in_list_predicate<PredicateType::IN_LIST>(
+                cast_set<uint32_t>(target.root_file_column_id), 
target.leaf_name,
+                target.file_leaf_type, values, false);
+    } catch (const Exception&) {
+        return nullptr;
+    }
+}
+
 static bool extract_nested_binary_comparison_filter(const VExprSPtr& expr,
                                                     const 
std::vector<ColumnMapping>& mappings,
                                                     FileColumnPredicateFilter* 
column_filter) {
@@ -517,6 +559,47 @@ static bool extract_nested_binary_comparison_filter(const 
VExprSPtr& expr,
     return true;
 }
 
+static bool extract_nested_in_list_filter(const VExprSPtr& expr,
+                                          const std::vector<ColumnMapping>& 
mappings,
+                                          FileColumnPredicateFilter* 
column_filter) {
+    DORIS_CHECK(column_filter != nullptr);
+    if (expr == nullptr || expr->node_type() != TExprNodeType::IN_PRED ||
+        expr->get_num_children() < 2) {
+        return false;
+    }
+    if (const auto* in_predicate = dynamic_cast<const 
VInPredicate*>(expr.get());
+        in_predicate != nullptr && in_predicate->is_not_in()) {
+        return false;
+    }
+
+    NestedStructPath path;
+    if (!extract_nested_struct_path(expr->children()[0], &path)) {
+        return false;
+    }
+
+    VExprSPtrs literal_exprs;
+    literal_exprs.reserve(expr->get_num_children() - 1);
+    for (size_t child_idx = 1; child_idx < expr->children().size(); 
++child_idx) {
+        if (!expr->children()[child_idx]->is_literal()) {
+            return false;
+        }
+        literal_exprs.push_back(expr->children()[child_idx]);
+    }
+
+    NestedPredicateTargetInfo target;
+    if (!resolve_nested_predicate_target(path, mappings, &target)) {
+        return false;
+    }
+    auto predicate = build_nested_in_list_predicate(literal_exprs, target);
+    if (predicate == nullptr) {
+        return false;
+    }
+    column_filter->file_column_id = target.root_file_column_id;
+    column_filter->file_child_id_path = std::move(target.file_child_id_path);
+    column_filter->predicates.push_back(std::move(predicate));
+    return true;
+}
+
 static void merge_column_predicate_filter(FileColumnPredicateFilter 
column_filter,
                                           
std::vector<FileColumnPredicateFilter>* filters) {
     DORIS_CHECK(filters != nullptr);
@@ -548,7 +631,8 @@ static void collect_nested_column_predicate_filters(
         return;
     }
     FileColumnPredicateFilter column_filter;
-    if (extract_nested_binary_comparison_filter(expr, mappings, 
&column_filter)) {
+    if (extract_nested_binary_comparison_filter(expr, mappings, 
&column_filter) ||
+        extract_nested_in_list_filter(expr, mappings, &column_filter)) {
         merge_column_predicate_filter(std::move(column_filter), filters);
     }
 }
diff --git a/be/test/format/new_parquet/parquet_reader_test.cpp 
b/be/test/format/new_parquet/parquet_reader_test.cpp
index 5e0fe23b2fe..13c7ee419cc 100644
--- a/be/test/format/new_parquet/parquet_reader_test.cpp
+++ b/be/test/format/new_parquet/parquet_reader_test.cpp
@@ -141,6 +141,17 @@ VExprSPtr struct_element_expr(const VExprSPtr& parent, 
const DataTypePtr& child_
     return expr;
 }
 
+VExprSPtr in_predicate_expr(const VExprSPtr& probe_expr, const DataTypePtr& 
literal_type,
+                            const std::vector<Field>& values) {
+    auto expr = std::make_shared<TestFunctionExpr>("in", 
std::make_shared<DataTypeUInt8>(),
+                                                   TExprNodeType::IN_PRED);
+    expr->add_child(probe_expr);
+    for (const auto& value : values) {
+        expr->add_child(TableLiteral::create_shared(literal_type, value));
+    }
+    return expr;
+}
+
 class Int32SumGreaterThanExpr final : public VExpr {
 public:
     Int32SumGreaterThanExpr(int left_column_id, int right_column_id, int32_t 
value)
@@ -672,6 +683,298 @@ TEST(TableColumnMapperTest, 
MergesStructFilterOnlyChildIntoPredicateProjection)
     EXPECT_EQ(read_type->get_element_name(1), "a");
 }
 
+TEST(TableColumnMapperTest, BuildsNestedStructInListPredicateFilter) {
+    auto a_type = std::make_shared<DataTypeInt32>();
+    auto b_type = std::make_shared<DataTypeString>();
+    reader::SchemaField a_field;
+    a_field.id = 0;
+    a_field.name = "a";
+    a_field.type = a_type;
+    reader::SchemaField b_field;
+    b_field.id = 1;
+    b_field.name = "b";
+    b_field.type = b_type;
+    reader::SchemaField struct_field;
+    struct_field.id = 0;
+    struct_field.name = "s";
+    struct_field.type =
+            std::make_shared<DataTypeStruct>(DataTypes {a_type, b_type}, 
Strings {"a", "b"});
+    struct_field.children = {a_field, b_field};
+
+    reader::TableColumn table_child;
+    table_child.id = 101;
+    table_child.name = "b";
+    table_child.type = b_type;
+    reader::TableColumn table_column;
+    table_column.id = 100;
+    table_column.name = "s";
+    table_column.type = std::make_shared<DataTypeStruct>(DataTypes {b_type}, 
Strings {"b"});
+    table_column.children = {table_child};
+
+    const auto full_table_struct_type =
+            std::make_shared<DataTypeStruct>(DataTypes {a_type, b_type}, 
Strings {"a", "b"});
+    auto filter_expr = in_predicate_expr(
+            struct_element_expr(
+                    TableSlotRef::create_shared(100, 100, -1, 
full_table_struct_type, "s"), a_type,
+                    "a"),
+            a_type, {Field::create_field<TYPE_INT>(5), 
Field::create_field<TYPE_INT>(7)});
+    reader::TableFilter table_filter {
+            .conjunct = VExprContext::create_shared(filter_expr),
+            .slot_ids = {100},
+    };
+
+    reader::TableColumnMapperOptions options;
+    options.mode = reader::TableColumnMappingMode::BY_NAME;
+    reader::TableColumnMapper mapper(options);
+    ASSERT_TRUE(mapper.create_mapping({table_column}, {}, 
{struct_field}).ok());
+
+    reader::FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({table_filter}, {}, {table_column}, 
&request).ok());
+
+    ASSERT_EQ(request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].file_column_id, 0);
+    EXPECT_EQ(request.column_predicate_filters[0].file_child_id_path, 
std::vector<int32_t>({0}));
+    ASSERT_EQ(request.column_predicate_filters[0].predicates.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].predicates[0]->type(), 
PredicateType::IN_LIST);
+}
+
+TEST(TableColumnMapperTest, 
BuildsNestedStructPredicateFilterForReverseComparison) {
+    auto a_type = std::make_shared<DataTypeInt32>();
+    auto b_type = std::make_shared<DataTypeString>();
+    reader::SchemaField a_field;
+    a_field.id = 0;
+    a_field.name = "a";
+    a_field.type = a_type;
+    reader::SchemaField b_field;
+    b_field.id = 1;
+    b_field.name = "b";
+    b_field.type = b_type;
+    reader::SchemaField struct_field;
+    struct_field.id = 0;
+    struct_field.name = "s";
+    struct_field.type =
+            std::make_shared<DataTypeStruct>(DataTypes {a_type, b_type}, 
Strings {"a", "b"});
+    struct_field.children = {a_field, b_field};
+
+    reader::TableColumn table_child;
+    table_child.id = 101;
+    table_child.name = "b";
+    table_child.type = b_type;
+    reader::TableColumn table_column;
+    table_column.id = 100;
+    table_column.name = "s";
+    table_column.type = std::make_shared<DataTypeStruct>(DataTypes {b_type}, 
Strings {"b"});
+    table_column.children = {table_child};
+
+    const auto full_table_struct_type =
+            std::make_shared<DataTypeStruct>(DataTypes {a_type, b_type}, 
Strings {"a", "b"});
+    auto filter_expr = std::make_shared<TestFunctionExpr>(
+            "lt", std::make_shared<DataTypeUInt8>(), 
TExprNodeType::BINARY_PRED, TExprOpcode::LT);
+    filter_expr->add_child(TableLiteral::create_shared(a_type, 
Field::create_field<TYPE_INT>(5)));
+    filter_expr->add_child(struct_element_expr(
+            TableSlotRef::create_shared(100, 100, -1, full_table_struct_type, 
"s"), a_type, "a"));
+    reader::TableFilter table_filter {
+            .conjunct = VExprContext::create_shared(filter_expr),
+            .slot_ids = {100},
+    };
+
+    reader::TableColumnMapperOptions options;
+    options.mode = reader::TableColumnMappingMode::BY_NAME;
+    reader::TableColumnMapper mapper(options);
+    ASSERT_TRUE(mapper.create_mapping({table_column}, {}, 
{struct_field}).ok());
+
+    reader::FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({table_filter}, {}, {table_column}, 
&request).ok());
+
+    ASSERT_EQ(request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].file_column_id, 0);
+    EXPECT_EQ(request.column_predicate_filters[0].file_child_id_path, 
std::vector<int32_t>({0}));
+    ASSERT_EQ(request.column_predicate_filters[0].predicates.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].predicates[0]->type(), 
PredicateType::GT);
+}
+
+TEST(TableColumnMapperTest, 
BuildsNestedStructInListPredicateFilterForDeepPath) {
+    auto id_type = std::make_shared<DataTypeInt32>();
+    auto name_type = std::make_shared<DataTypeString>();
+    auto b_type = std::make_shared<DataTypeString>();
+    auto inner_type =
+            std::make_shared<DataTypeStruct>(DataTypes {id_type, name_type}, 
Strings {"id", "n"});
+    auto full_struct_type =
+            std::make_shared<DataTypeStruct>(DataTypes {inner_type, b_type}, 
Strings {"a", "b"});
+
+    reader::SchemaField id_field;
+    id_field.id = 0;
+    id_field.name = "id";
+    id_field.type = id_type;
+    reader::SchemaField name_field;
+    name_field.id = 1;
+    name_field.name = "n";
+    name_field.type = name_type;
+    reader::SchemaField a_field;
+    a_field.id = 0;
+    a_field.name = "a";
+    a_field.type = inner_type;
+    a_field.children = {id_field, name_field};
+    reader::SchemaField b_field;
+    b_field.id = 1;
+    b_field.name = "b";
+    b_field.type = b_type;
+    reader::SchemaField struct_field;
+    struct_field.id = 0;
+    struct_field.name = "s";
+    struct_field.type = full_struct_type;
+    struct_field.children = {a_field, b_field};
+
+    reader::TableColumn table_child;
+    table_child.id = 101;
+    table_child.name = "b";
+    table_child.type = b_type;
+    reader::TableColumn table_column;
+    table_column.id = 100;
+    table_column.name = "s";
+    table_column.type = std::make_shared<DataTypeStruct>(DataTypes {b_type}, 
Strings {"b"});
+    table_column.children = {table_child};
+
+    auto nested_id_expr = struct_element_expr(
+            struct_element_expr(TableSlotRef::create_shared(100, 100, -1, 
full_struct_type, "s"),
+                                inner_type, "a"),
+            id_type, "id");
+    auto filter_expr =
+            in_predicate_expr(nested_id_expr, id_type,
+                              {Field::create_field<TYPE_INT>(5), 
Field::create_field<TYPE_INT>(7)});
+    reader::TableFilter table_filter {
+            .conjunct = VExprContext::create_shared(filter_expr),
+            .slot_ids = {100},
+    };
+
+    reader::TableColumnMapperOptions options;
+    options.mode = reader::TableColumnMappingMode::BY_NAME;
+    reader::TableColumnMapper mapper(options);
+    ASSERT_TRUE(mapper.create_mapping({table_column}, {}, 
{struct_field}).ok());
+
+    reader::FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({table_filter}, {}, {table_column}, 
&request).ok());
+
+    ASSERT_EQ(request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].file_column_id, 0);
+    EXPECT_EQ(request.column_predicate_filters[0].file_child_id_path, 
std::vector<int32_t>({0, 0}));
+    ASSERT_EQ(request.column_predicate_filters[0].predicates.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].predicates[0]->type(), 
PredicateType::IN_LIST);
+}
+
+TEST(TableColumnMapperTest, DoesNotBuildNestedPredicateFilterForMissingChild) {
+    auto a_type = std::make_shared<DataTypeInt32>();
+    auto b_type = std::make_shared<DataTypeString>();
+    reader::SchemaField a_field;
+    a_field.id = 0;
+    a_field.name = "a";
+    a_field.type = a_type;
+    reader::SchemaField b_field;
+    b_field.id = 1;
+    b_field.name = "b";
+    b_field.type = b_type;
+    reader::SchemaField struct_field;
+    struct_field.id = 0;
+    struct_field.name = "s";
+    struct_field.type =
+            std::make_shared<DataTypeStruct>(DataTypes {a_type, b_type}, 
Strings {"a", "b"});
+    struct_field.children = {a_field, b_field};
+
+    reader::TableColumn table_child;
+    table_child.id = 101;
+    table_child.name = "b";
+    table_child.type = b_type;
+    reader::TableColumn table_column;
+    table_column.id = 100;
+    table_column.name = "s";
+    table_column.type = std::make_shared<DataTypeStruct>(DataTypes {b_type}, 
Strings {"b"});
+    table_column.children = {table_child};
+
+    const auto full_table_struct_type =
+            std::make_shared<DataTypeStruct>(DataTypes {a_type, b_type}, 
Strings {"a", "b"});
+    auto filter_expr = std::make_shared<TestFunctionExpr>(
+            "gt", std::make_shared<DataTypeUInt8>(), 
TExprNodeType::BINARY_PRED, TExprOpcode::GT);
+    filter_expr->add_child(struct_element_expr(
+            TableSlotRef::create_shared(100, 100, -1, full_table_struct_type, 
"s"), a_type,
+            "missing"));
+    filter_expr->add_child(TableLiteral::create_shared(a_type, 
Field::create_field<TYPE_INT>(5)));
+    reader::TableFilter table_filter {
+            .conjunct = VExprContext::create_shared(filter_expr),
+            .slot_ids = {100},
+    };
+
+    reader::TableColumnMapperOptions options;
+    options.mode = reader::TableColumnMappingMode::BY_NAME;
+    reader::TableColumnMapper mapper(options);
+    ASSERT_TRUE(mapper.create_mapping({table_column}, {}, 
{struct_field}).ok());
+
+    reader::FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({table_filter}, {}, {table_column}, 
&request).ok());
+
+    EXPECT_TRUE(request.column_predicate_filters.empty());
+}
+
+TEST(TableColumnMapperTest, DoesNotBuildNestedPredicateFilterFromOr) {
+    auto a_type = std::make_shared<DataTypeInt32>();
+    auto b_type = std::make_shared<DataTypeString>();
+    reader::SchemaField a_field;
+    a_field.id = 0;
+    a_field.name = "a";
+    a_field.type = a_type;
+    reader::SchemaField b_field;
+    b_field.id = 1;
+    b_field.name = "b";
+    b_field.type = b_type;
+    reader::SchemaField struct_field;
+    struct_field.id = 0;
+    struct_field.name = "s";
+    struct_field.type =
+            std::make_shared<DataTypeStruct>(DataTypes {a_type, b_type}, 
Strings {"a", "b"});
+    struct_field.children = {a_field, b_field};
+
+    reader::TableColumn table_child;
+    table_child.id = 101;
+    table_child.name = "b";
+    table_child.type = b_type;
+    reader::TableColumn table_column;
+    table_column.id = 100;
+    table_column.name = "s";
+    table_column.type = std::make_shared<DataTypeStruct>(DataTypes {b_type}, 
Strings {"b"});
+    table_column.children = {table_child};
+
+    const auto full_table_struct_type =
+            std::make_shared<DataTypeStruct>(DataTypes {a_type, b_type}, 
Strings {"a", "b"});
+    auto left = std::make_shared<TestFunctionExpr>("gt", 
std::make_shared<DataTypeUInt8>(),
+                                                   TExprNodeType::BINARY_PRED, 
TExprOpcode::GT);
+    left->add_child(struct_element_expr(
+            TableSlotRef::create_shared(100, 100, -1, full_table_struct_type, 
"s"), a_type, "a"));
+    left->add_child(TableLiteral::create_shared(a_type, 
Field::create_field<TYPE_INT>(5)));
+    auto right = in_predicate_expr(
+            struct_element_expr(
+                    TableSlotRef::create_shared(100, 100, -1, 
full_table_struct_type, "s"), a_type,
+                    "a"),
+            a_type, {Field::create_field<TYPE_INT>(7)});
+    auto filter_expr = std::make_shared<TestFunctionExpr>("or", 
std::make_shared<DataTypeUInt8>(),
+                                                          
TExprNodeType::COMPOUND_PRED,
+                                                          
TExprOpcode::COMPOUND_OR);
+    filter_expr->add_child(left);
+    filter_expr->add_child(right);
+    reader::TableFilter table_filter {
+            .conjunct = VExprContext::create_shared(filter_expr),
+            .slot_ids = {100},
+    };
+
+    reader::TableColumnMapperOptions options;
+    options.mode = reader::TableColumnMappingMode::BY_NAME;
+    reader::TableColumnMapper mapper(options);
+    ASSERT_TRUE(mapper.create_mapping({table_column}, {}, 
{struct_field}).ok());
+
+    reader::FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({table_filter}, {}, {table_column}, 
&request).ok());
+
+    EXPECT_TRUE(request.column_predicate_filters.empty());
+}
+
 TEST(TableColumnMapperTest, CreatesComplexProjectionForMapValueStructChildren) 
{
     auto key_type = std::make_shared<DataTypeInt32>();
     auto a_type = std::make_shared<DataTypeInt32>();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to