Tanya-W commented on code in PR #16615:
URL: https://github.com/apache/doris/pull/16615#discussion_r1102904598


##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -642,52 +641,133 @@ bool 
SegmentIterator::_is_handle_predicate_by_fulltext(ColumnPredicate* predicat
     return handle_by_fulltext;
 }
 
+#define all_predicates_are_range_predicate(predicate_set)   \
+    std::all_of(predicate_set.begin(), predicate_set.end(), \
+                [](const ColumnPredicate* p) { return 
PredicateTypeTraits::is_range(p->type()); })
+
+Status SegmentIterator::_apply_inverted_index_on_column_predicate(
+        ColumnPredicate* pred, std::vector<ColumnPredicate*>& 
remaining_predicates,
+        bool* continue_apply) {
+    int32_t unique_id = _schema.unique_id(pred->column_id());
+    bool handle_by_fulltext = _is_handle_predicate_by_fulltext(unique_id);
+
+    if (_inverted_index_iterators.count(unique_id) < 1 ||
+        _inverted_index_iterators[unique_id] == nullptr ||
+        (pred->type() != PredicateType::MATCH && handle_by_fulltext) ||
+        pred->type() == PredicateType::IS_NULL || pred->type() == 
PredicateType::IS_NOT_NULL ||
+        pred->type() == PredicateType::BF) {
+        // 1. this column no inverted index
+        // 2. equal or range for fulltext index
+        // 3. is_null or is_not_null predicate
+        // 4. bloom filter predicate
+        remaining_predicates.emplace_back(pred);
+    } else {
+        roaring::Roaring bitmap = _row_bitmap;
+        Status res =
+                pred->evaluate(_schema, _inverted_index_iterators[unique_id], 
num_rows(), &bitmap);
+        if (!res.ok()) {
+            if ((res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND &&
+                 pred->type() != PredicateType::MATCH) ||
+                res.code() == ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT) {
+                //downgrade without index query
+                remaining_predicates.emplace_back(pred);
+                return Status::OK();
+            }
+            LOG(WARNING) << "failed to evaluate index"
+                         << ", column predicate type: " << 
pred->pred_type_string(pred->type())
+                         << ", error msg: " << res.code_as_string();
+            return res;
+        }
+
+        auto pred_type = pred->type();
+        if (pred_type == PredicateType::MATCH) {
+            std::string pred_result_sign = _gen_predicate_result_sign(pred);
+            _rowid_result_for_index.emplace(
+                    std::make_pair(pred_result_sign, std::make_pair(false, 
bitmap)));
+        }
+
+        _row_bitmap &= bitmap;
+        if (_row_bitmap.isEmpty()) {
+            // all rows have been pruned, no need to process further predicates
+            *continue_apply = false;
+        }
+    }
+    return Status::OK();
+}
+
+Status SegmentIterator::_apply_inverted_index_on_block_column_predicate(
+        ColumnId column_id, MutilColumnBlockPredicate* pred,
+        std::set<const ColumnPredicate*>& no_need_to_pass_column_predicate_set,
+        bool* continue_apply) {
+    auto unique_id = _schema.unique_id(column_id);
+    bool handle_by_fulltext = _is_handle_predicate_by_fulltext(unique_id);
+    std::set<const ColumnPredicate*> predicate_set {};
+
+    pred->get_all_column_predicate(predicate_set);
+
+    //four requirements here.
+    //1. Column has inverted index
+    //2. There are multiple predicates for this column.
+    //3. All the predicates are range predicate.
+    //4. if it's under fulltext parser type, we need to skip inverted index 
evaluate.
+    if (_inverted_index_iterators.count(unique_id) > 0 &&
+        _inverted_index_iterators[unique_id] != nullptr && 
predicate_set.size() > 1 &&
+        all_predicates_are_range_predicate(predicate_set) && 
!handle_by_fulltext) {
+        roaring::Roaring output_result = _row_bitmap;
+
+        std::string column_name = _schema.column(column_id)->name();
+
+        auto res = pred->evaluate(column_name, 
_inverted_index_iterators[unique_id], num_rows(),
+                                  &output_result);
+
+        if (res.ok()) {
+            no_need_to_pass_column_predicate_set.insert(predicate_set.begin(), 
predicate_set.end());
+            _row_bitmap &= output_result;
+            if (_row_bitmap.isEmpty()) {
+                // all rows have been pruned, no need to process further 
predicates
+                *continue_apply = false;
+            }
+            return res;
+        } else {
+            //TODO:mock until AndBlockColumnPredicate evaluate is ok.
+            if (res.code() == ErrorCode::NOT_IMPLEMENTED_ERROR) {
+                return Status::OK();
+            }
+            LOG(WARNING) << "failed to evaluate index"
+                         << ", column predicate type: range predicate"
+                         << ", error msg: " << res.code_as_string();
+            return res;
+        }
+    }
+    return Status::OK();

Review Comment:
   here use "return res" maybe better, then can remove redundant "return res" 
on line 730 and line 739



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -642,52 +641,133 @@ bool 
SegmentIterator::_is_handle_predicate_by_fulltext(ColumnPredicate* predicat
     return handle_by_fulltext;
 }
 
+#define all_predicates_are_range_predicate(predicate_set)   \
+    std::all_of(predicate_set.begin(), predicate_set.end(), \
+                [](const ColumnPredicate* p) { return 
PredicateTypeTraits::is_range(p->type()); })
+
+Status SegmentIterator::_apply_inverted_index_on_column_predicate(
+        ColumnPredicate* pred, std::vector<ColumnPredicate*>& 
remaining_predicates,
+        bool* continue_apply) {
+    int32_t unique_id = _schema.unique_id(pred->column_id());
+    bool handle_by_fulltext = _is_handle_predicate_by_fulltext(unique_id);
+
+    if (_inverted_index_iterators.count(unique_id) < 1 ||
+        _inverted_index_iterators[unique_id] == nullptr ||
+        (pred->type() != PredicateType::MATCH && handle_by_fulltext) ||
+        pred->type() == PredicateType::IS_NULL || pred->type() == 
PredicateType::IS_NOT_NULL ||
+        pred->type() == PredicateType::BF) {
+        // 1. this column no inverted index
+        // 2. equal or range for fulltext index
+        // 3. is_null or is_not_null predicate
+        // 4. bloom filter predicate
+        remaining_predicates.emplace_back(pred);
+    } else {
+        roaring::Roaring bitmap = _row_bitmap;
+        Status res =
+                pred->evaluate(_schema, _inverted_index_iterators[unique_id], 
num_rows(), &bitmap);
+        if (!res.ok()) {
+            if ((res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND &&
+                 pred->type() != PredicateType::MATCH) ||
+                res.code() == ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT) {
+                //downgrade without index query
+                remaining_predicates.emplace_back(pred);
+                return Status::OK();
+            }
+            LOG(WARNING) << "failed to evaluate index"
+                         << ", column predicate type: " << 
pred->pred_type_string(pred->type())
+                         << ", error msg: " << res.code_as_string();
+            return res;
+        }
+
+        auto pred_type = pred->type();
+        if (pred_type == PredicateType::MATCH) {
+            std::string pred_result_sign = _gen_predicate_result_sign(pred);
+            _rowid_result_for_index.emplace(
+                    std::make_pair(pred_result_sign, std::make_pair(false, 
bitmap)));
+        }
+
+        _row_bitmap &= bitmap;
+        if (_row_bitmap.isEmpty()) {
+            // all rows have been pruned, no need to process further predicates
+            *continue_apply = false;
+        }
+    }
+    return Status::OK();
+}
+
+Status SegmentIterator::_apply_inverted_index_on_block_column_predicate(
+        ColumnId column_id, MutilColumnBlockPredicate* pred,
+        std::set<const ColumnPredicate*>& no_need_to_pass_column_predicate_set,
+        bool* continue_apply) {
+    auto unique_id = _schema.unique_id(column_id);
+    bool handle_by_fulltext = _is_handle_predicate_by_fulltext(unique_id);
+    std::set<const ColumnPredicate*> predicate_set {};
+
+    pred->get_all_column_predicate(predicate_set);
+
+    //four requirements here.
+    //1. Column has inverted index
+    //2. There are multiple predicates for this column.
+    //3. All the predicates are range predicate.
+    //4. if it's under fulltext parser type, we need to skip inverted index 
evaluate.
+    if (_inverted_index_iterators.count(unique_id) > 0 &&
+        _inverted_index_iterators[unique_id] != nullptr && 
predicate_set.size() > 1 &&
+        all_predicates_are_range_predicate(predicate_set) && 
!handle_by_fulltext) {
+        roaring::Roaring output_result = _row_bitmap;
+
+        std::string column_name = _schema.column(column_id)->name();
+
+        auto res = pred->evaluate(column_name, 
_inverted_index_iterators[unique_id], num_rows(),
+                                  &output_result);
+
+        if (res.ok()) {
+            no_need_to_pass_column_predicate_set.insert(predicate_set.begin(), 
predicate_set.end());
+            _row_bitmap &= output_result;
+            if (_row_bitmap.isEmpty()) {
+                // all rows have been pruned, no need to process further 
predicates
+                *continue_apply = false;
+            }
+            return res;
+        } else {
+            //TODO:mock until AndBlockColumnPredicate evaluate is ok.
+            if (res.code() == ErrorCode::NOT_IMPLEMENTED_ERROR) {
+                return Status::OK();
+            }
+            LOG(WARNING) << "failed to evaluate index"
+                         << ", column predicate type: range predicate"
+                         << ", error msg: " << res.code_as_string();
+            return res;
+        }
+    }
+    return Status::OK();
+}
+
 Status SegmentIterator::_apply_inverted_index() {
     SCOPED_RAW_TIMER(&_opts.stats->inverted_index_filter_timer);
     size_t input_rows = _row_bitmap.cardinality();
     std::vector<ColumnPredicate*> remaining_predicates;
+    std::set<const ColumnPredicate*> no_need_to_pass_column_predicate_set;

Review Comment:
   can use a more clear name, such as "success_applied_column_predicate", or 
add some note.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to