This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 3e2461838a Revert "[improvement](scanner) Remove the predicate that is always true for the segment (#25366) (#25427)" (#25440) 3e2461838a is described below commit 3e2461838a25226e821552c555dcef63edf34b00 Author: Kang <kxiao.ti...@gmail.com> AuthorDate: Sat Oct 14 17:53:03 2023 +0800 Revert "[improvement](scanner) Remove the predicate that is always true for the segment (#25366) (#25427)" (#25440) --- be/src/common/config.cpp | 2 - be/src/common/config.h | 3 - be/src/olap/column_predicate.h | 4 -- be/src/olap/comparison_predicate.h | 25 -------- be/src/olap/rowset/segment_v2/column_reader.cpp | 26 --------- be/src/olap/rowset/segment_v2/column_reader.h | 3 - be/src/olap/rowset/segment_v2/segment.cpp | 20 +------ .../query_p0/test_select_with_predicate_prune.out | 25 -------- .../test_select_with_predicate_prune.groovy | 67 ---------------------- 9 files changed, 1 insertion(+), 174 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index f2c726a33b..63ba1a1288 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1084,8 +1084,6 @@ DEFINE_mInt32(tablet_schema_cache_recycle_interval, "86400"); DEFINE_Bool(exit_on_exception, "false") -DEFINE_Bool(ignore_always_true_predicate_for_segment, "true"); - // clang-format off #ifdef BE_TEST // test s3 diff --git a/be/src/common/config.h b/be/src/common/config.h index 06f9a18fcd..7c4ab39a40 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1141,9 +1141,6 @@ DECLARE_mInt32(tablet_schema_cache_recycle_interval); // Use `LOG(FATAL)` to replace `throw` when true DECLARE_mBool(exit_on_exception); -// Remove predicate that is always true for a segment. -DECLARE_Bool(ignore_always_true_predicate_for_segment); - #ifdef BE_TEST // test s3 DECLARE_String(test_s3_resource); diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h index 05e84999a8..b98156f5fb 100644 --- a/be/src/olap/column_predicate.h +++ b/be/src/olap/column_predicate.h @@ -173,10 +173,6 @@ public: return true; } - virtual bool is_always_true(const std::pair<WrapperField*, WrapperField*>& statistic) const { - return false; - } - virtual bool evaluate_del(const std::pair<WrapperField*, WrapperField*>& statistic) const { return false; } diff --git a/be/src/olap/comparison_predicate.h b/be/src/olap/comparison_predicate.h index fddc554f51..04dfd5dc5c 100644 --- a/be/src/olap/comparison_predicate.h +++ b/be/src/olap/comparison_predicate.h @@ -158,8 +158,6 @@ public: return _operator(*reinterpret_cast<const T*>(statistic.ELE->cell_ptr()), _value); \ } - using WarpperFieldType = std::conditional_t<Type == TYPE_DATE, uint24_t, T>; - bool evaluate_and(const std::pair<WrapperField*, WrapperField*>& statistic) const override { if (statistic.first->is_null()) { return true; @@ -204,29 +202,6 @@ public: } } - bool is_always_true(const std::pair<WrapperField*, WrapperField*>& statistic) const override { - if (statistic.first->is_null() || statistic.second->is_null()) { - return false; - } - - T tmp_min_value {}; - T tmp_max_value {}; - memcpy((char*)(&tmp_min_value), statistic.first->cell_ptr(), sizeof(WarpperFieldType)); - memcpy((char*)(&tmp_max_value), statistic.second->cell_ptr(), sizeof(WarpperFieldType)); - - if constexpr (PT == PredicateType::LT) { - return _value > tmp_max_value; - } else if constexpr (PT == PredicateType::LE) { - return _value >= tmp_max_value; - } else if constexpr (PT == PredicateType::GT) { - return _value < tmp_min_value; - } else if constexpr (PT == PredicateType::GE) { - return _value <= tmp_min_value; - } - - return false; - } - bool evaluate_del(const std::pair<WrapperField*, WrapperField*>& statistic) const override { if (statistic.first->is_null() || statistic.second->is_null()) { return false; diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index d9a074e290..b1b817f545 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -30,7 +30,6 @@ #include "io/fs/file_reader.h" #include "olap/block_column_predicate.h" #include "olap/column_predicate.h" -#include "olap/comparison_predicate.h" #include "olap/decimal12.h" #include "olap/inverted_index_parser.h" #include "olap/iterators.h" @@ -340,31 +339,6 @@ bool ColumnReader::match_condition(const AndBlockColumnPredicate* col_predicates col_predicates); } -bool ColumnReader::prune_predicates_by_zone_map(std::vector<ColumnPredicate*>& predicates, - const int column_id) const { - if (_zone_map_index == nullptr) { - return false; - } - - FieldType type = _type_info->type(); - std::unique_ptr<WrapperField> min_value(WrapperField::create_by_type(type, _meta_length)); - std::unique_ptr<WrapperField> max_value(WrapperField::create_by_type(type, _meta_length)); - _parse_zone_map(*_segment_zone_map, min_value.get(), max_value.get()); - - auto pruned = false; - for (auto it = predicates.begin(); it != predicates.end();) { - auto predicate = *it; - if (predicate->column_id() == column_id && - predicate->is_always_true({min_value.get(), max_value.get()})) { - pruned = true; - it = predicates.erase(it); - } else { - ++it; - } - } - return pruned; -} - void ColumnReader::_parse_zone_map(const ZoneMapPB& zone_map, WrapperField* min_value_container, WrapperField* max_value_container) const { // min value and max value are valid if has_not_null is true diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index 7964555ade..174aabdefa 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -162,9 +162,6 @@ public: bool is_empty() const { return _num_rows == 0; } - bool prune_predicates_by_zone_map(std::vector<ColumnPredicate*>& predicates, - const int column_id) const; - CompressionTypePB get_compression() const { return _meta_compression; } uint64_t num_rows() const { return _num_rows; } diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index ddce80bcc3..153ed92517 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -147,6 +147,7 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o return Status::OK(); } } + if (read_options.use_topn_opt) { auto query_ctx = read_options.runtime_state->get_query_ctx(); auto runtime_predicate = query_ctx->get_runtime_predicate().get_predictate(); @@ -174,25 +175,6 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o iter->reset(new SegmentIterator(this->shared_from_this(), schema)); } - if (config::ignore_always_true_predicate_for_segment && - read_options.io_ctx.reader_type == ReaderType::READER_QUERY && - !read_options.column_predicates.empty()) { - auto pruned_predicates = read_options.column_predicates; - auto pruned = false; - for (auto& it : _column_readers) { - if (it.second->prune_predicates_by_zone_map(pruned_predicates, it.first)) { - pruned = true; - } - } - - if (pruned) { - auto options_with_pruned_predicates = read_options; - options_with_pruned_predicates.column_predicates = pruned_predicates; - LOG(INFO) << "column_predicates pruned from " << read_options.column_predicates.size() - << " to " << pruned_predicates.size(); - return iter->get()->init(options_with_pruned_predicates); - } - } return iter->get()->init(read_options); } diff --git a/regression-test/data/query_p0/test_select_with_predicate_prune.out b/regression-test/data/query_p0/test_select_with_predicate_prune.out deleted file mode 100644 index 2e1fad8749..0000000000 --- a/regression-test/data/query_p0/test_select_with_predicate_prune.out +++ /dev/null @@ -1,25 +0,0 @@ --- This file is automatically generated. You should know what you did if you want to edit this --- !select1 -- -1 jerry 2020-10-01 -2 tom 2020-10-02 -3 jack 2020-10-01 -4 tony 2020-10-02 - --- !select2 -- -1 jerry 2020-10-01 -3 jack 2020-10-01 - --- !select3 -- - --- !select4 -- -1 jerry 2020-10-01 -2 tom 2020-10-02 -3 jack 2020-10-01 -4 tony 2020-10-02 - --- !select5 -- -2 tom 2020-10-02 -4 tony 2020-10-02 - --- !select6 -- - diff --git a/regression-test/suites/query_p0/test_select_with_predicate_prune.groovy b/regression-test/suites/query_p0/test_select_with_predicate_prune.groovy deleted file mode 100644 index 768e04b4c3..0000000000 --- a/regression-test/suites/query_p0/test_select_with_predicate_prune.groovy +++ /dev/null @@ -1,67 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -suite("test_select_with_predicate_prune") { - sql """ - drop table if exists `test_select_with_predicate_prune`; - """ - sql """ - CREATE TABLE IF NOT EXISTS `test_select_with_predicate_prune` ( - id int, - name string, - birthday date not null - ) - duplicate key(`id`) - AUTO PARTITION BY LIST (`birthday`)() - DISTRIBUTED BY HASH(`id`) buckets 1 - PROPERTIES - ( - "replication_allocation" = "tag.location.default: 1" - ); - """ - - sql """ - insert into test_select_with_predicate_prune values (1, 'jerry', '2020-10-01'), (2, 'tom', '2020-10-02'); - """ - sql """ - insert into test_select_with_predicate_prune values (3, 'jack', '2020-10-01'), (4, 'tony', '2020-10-02'); - """ - - qt_select1 """ - select * from test_select_with_predicate_prune where birthday < '2020-10-03' order by id; - """ - - qt_select2 """ - select * from test_select_with_predicate_prune where birthday < '2020-10-02' order by id; - """ - - qt_select3 """ - select * from test_select_with_predicate_prune where birthday < '2020-10-01' order by id; - """ - - - qt_select4 """ - select * from test_select_with_predicate_prune where birthday > '2020-09-30' order by id; - """ - - qt_select5 """ - select * from test_select_with_predicate_prune where birthday > '2020-10-01' order by id; - """ - - qt_select6 """ - select * from test_select_with_predicate_prune where birthday > '2020-10-02' order by id; - """ -} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org