This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 56ccb9a6577 [fix](parquet) fix parquet reader missing column and
filter missing column (#36182)
56ccb9a6577 is described below
commit 56ccb9a657761d08fa216e741443c899e122843b
Author: Ashin Gau <[email protected]>
AuthorDate: Thu Jun 13 21:30:05 2024 +0800
[fix](parquet) fix parquet reader missing column and filter missing column
(#36182)
bp #36189
---
be/src/vec/exec/format/parquet/vparquet_group_reader.cpp | 4 ++++
be/src/vec/exec/format/parquet/vparquet_group_reader.h | 1 +
be/src/vec/exec/format/parquet/vparquet_reader.cpp | 8 ++++++++
3 files changed, 13 insertions(+)
diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
index 807f016cb43..5e824f34817 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
@@ -174,6 +174,10 @@ Status RowGroupReader::init(
}
}
}
+ //For check missing column : missing column == xx, missing column is
null,missing column is not null.
+ _filter_conjuncts.insert(_filter_conjuncts.end(),
+ _lazy_read_ctx.missing_columns_conjuncts.begin(),
+ _lazy_read_ctx.missing_columns_conjuncts.end());
RETURN_IF_ERROR(_rewrite_dict_predicates());
return Status::OK();
}
diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.h
b/be/src/vec/exec/format/parquet/vparquet_group_reader.h
index d38f5a74adf..d9f7f2dbf34 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.h
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.h
@@ -94,6 +94,7 @@ public:
std::unordered_map<std::string, std::tuple<std::string, const
SlotDescriptor*>>
partition_columns;
std::unordered_map<std::string, VExprContextSPtr>
predicate_missing_columns;
+ VExprContextSPtrs missing_columns_conjuncts;
// lazy read missing columns or all missing columns
std::unordered_map<std::string, VExprContextSPtr> missing_columns;
// should turn off filtering by page index, lazy read and dict filter
if having complex type
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
index 18610a1f655..f99786dc6e2 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@@ -426,6 +426,14 @@ Status ParquetReader::set_fill_columns(
if (iter == predicate_columns.end()) {
_lazy_read_ctx.missing_columns.emplace(kv.first, kv.second);
} else {
+ //For check missing column : missing column == xx, missing
column is null,missing column is not null.
+ if (_slot_id_to_filter_conjuncts->find(iter->second.second) !=
+ _slot_id_to_filter_conjuncts->end()) {
+ for (auto& ctx :
_slot_id_to_filter_conjuncts->find(iter->second.second)->second) {
+ _lazy_read_ctx.missing_columns_conjuncts.emplace_back(ctx);
+ }
+ }
+
_lazy_read_ctx.predicate_missing_columns.emplace(kv.first,
kv.second);
_lazy_read_ctx.all_predicate_col_ids.emplace_back(iter->second.first);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]