Tanya-W commented on code in PR #16615: URL: https://github.com/apache/doris/pull/16615#discussion_r1102904598
########## be/src/olap/rowset/segment_v2/segment_iterator.cpp: ########## @@ -642,52 +641,133 @@ bool SegmentIterator::_is_handle_predicate_by_fulltext(ColumnPredicate* predicat return handle_by_fulltext; } +#define all_predicates_are_range_predicate(predicate_set) \ + std::all_of(predicate_set.begin(), predicate_set.end(), \ + [](const ColumnPredicate* p) { return PredicateTypeTraits::is_range(p->type()); }) + +Status SegmentIterator::_apply_inverted_index_on_column_predicate( + ColumnPredicate* pred, std::vector<ColumnPredicate*>& remaining_predicates, + bool* continue_apply) { + int32_t unique_id = _schema.unique_id(pred->column_id()); + bool handle_by_fulltext = _is_handle_predicate_by_fulltext(unique_id); + + if (_inverted_index_iterators.count(unique_id) < 1 || + _inverted_index_iterators[unique_id] == nullptr || + (pred->type() != PredicateType::MATCH && handle_by_fulltext) || + pred->type() == PredicateType::IS_NULL || pred->type() == PredicateType::IS_NOT_NULL || + pred->type() == PredicateType::BF) { + // 1. this column no inverted index + // 2. equal or range for fulltext index + // 3. is_null or is_not_null predicate + // 4. bloom filter predicate + remaining_predicates.emplace_back(pred); + } else { + roaring::Roaring bitmap = _row_bitmap; + Status res = + pred->evaluate(_schema, _inverted_index_iterators[unique_id], num_rows(), &bitmap); + if (!res.ok()) { + if ((res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND && + pred->type() != PredicateType::MATCH) || + res.code() == ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT) { + //downgrade without index query + remaining_predicates.emplace_back(pred); + return Status::OK(); + } + LOG(WARNING) << "failed to evaluate index" + << ", column predicate type: " << pred->pred_type_string(pred->type()) + << ", error msg: " << res.code_as_string(); + return res; + } + + auto pred_type = pred->type(); + if (pred_type == PredicateType::MATCH) { + std::string pred_result_sign = _gen_predicate_result_sign(pred); + _rowid_result_for_index.emplace( + std::make_pair(pred_result_sign, std::make_pair(false, bitmap))); + } + + _row_bitmap &= bitmap; + if (_row_bitmap.isEmpty()) { + // all rows have been pruned, no need to process further predicates + *continue_apply = false; + } + } + return Status::OK(); +} + +Status SegmentIterator::_apply_inverted_index_on_block_column_predicate( + ColumnId column_id, MutilColumnBlockPredicate* pred, + std::set<const ColumnPredicate*>& no_need_to_pass_column_predicate_set, + bool* continue_apply) { + auto unique_id = _schema.unique_id(column_id); + bool handle_by_fulltext = _is_handle_predicate_by_fulltext(unique_id); + std::set<const ColumnPredicate*> predicate_set {}; + + pred->get_all_column_predicate(predicate_set); + + //four requirements here. + //1. Column has inverted index + //2. There are multiple predicates for this column. + //3. All the predicates are range predicate. + //4. if it's under fulltext parser type, we need to skip inverted index evaluate. + if (_inverted_index_iterators.count(unique_id) > 0 && + _inverted_index_iterators[unique_id] != nullptr && predicate_set.size() > 1 && + all_predicates_are_range_predicate(predicate_set) && !handle_by_fulltext) { + roaring::Roaring output_result = _row_bitmap; + + std::string column_name = _schema.column(column_id)->name(); + + auto res = pred->evaluate(column_name, _inverted_index_iterators[unique_id], num_rows(), + &output_result); + + if (res.ok()) { + no_need_to_pass_column_predicate_set.insert(predicate_set.begin(), predicate_set.end()); + _row_bitmap &= output_result; + if (_row_bitmap.isEmpty()) { + // all rows have been pruned, no need to process further predicates + *continue_apply = false; + } + return res; + } else { + //TODO:mock until AndBlockColumnPredicate evaluate is ok. + if (res.code() == ErrorCode::NOT_IMPLEMENTED_ERROR) { + return Status::OK(); + } + LOG(WARNING) << "failed to evaluate index" + << ", column predicate type: range predicate" + << ", error msg: " << res.code_as_string(); + return res; + } + } + return Status::OK(); Review Comment: here use "return res" maybe better, then can remove redundant "return res" on line 730 and line 739 ########## be/src/olap/rowset/segment_v2/segment_iterator.cpp: ########## @@ -642,52 +641,133 @@ bool SegmentIterator::_is_handle_predicate_by_fulltext(ColumnPredicate* predicat return handle_by_fulltext; } +#define all_predicates_are_range_predicate(predicate_set) \ + std::all_of(predicate_set.begin(), predicate_set.end(), \ + [](const ColumnPredicate* p) { return PredicateTypeTraits::is_range(p->type()); }) + +Status SegmentIterator::_apply_inverted_index_on_column_predicate( + ColumnPredicate* pred, std::vector<ColumnPredicate*>& remaining_predicates, + bool* continue_apply) { + int32_t unique_id = _schema.unique_id(pred->column_id()); + bool handle_by_fulltext = _is_handle_predicate_by_fulltext(unique_id); + + if (_inverted_index_iterators.count(unique_id) < 1 || + _inverted_index_iterators[unique_id] == nullptr || + (pred->type() != PredicateType::MATCH && handle_by_fulltext) || + pred->type() == PredicateType::IS_NULL || pred->type() == PredicateType::IS_NOT_NULL || + pred->type() == PredicateType::BF) { + // 1. this column no inverted index + // 2. equal or range for fulltext index + // 3. is_null or is_not_null predicate + // 4. bloom filter predicate + remaining_predicates.emplace_back(pred); + } else { + roaring::Roaring bitmap = _row_bitmap; + Status res = + pred->evaluate(_schema, _inverted_index_iterators[unique_id], num_rows(), &bitmap); + if (!res.ok()) { + if ((res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND && + pred->type() != PredicateType::MATCH) || + res.code() == ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT) { + //downgrade without index query + remaining_predicates.emplace_back(pred); + return Status::OK(); + } + LOG(WARNING) << "failed to evaluate index" + << ", column predicate type: " << pred->pred_type_string(pred->type()) + << ", error msg: " << res.code_as_string(); + return res; + } + + auto pred_type = pred->type(); + if (pred_type == PredicateType::MATCH) { + std::string pred_result_sign = _gen_predicate_result_sign(pred); + _rowid_result_for_index.emplace( + std::make_pair(pred_result_sign, std::make_pair(false, bitmap))); + } + + _row_bitmap &= bitmap; + if (_row_bitmap.isEmpty()) { + // all rows have been pruned, no need to process further predicates + *continue_apply = false; + } + } + return Status::OK(); +} + +Status SegmentIterator::_apply_inverted_index_on_block_column_predicate( + ColumnId column_id, MutilColumnBlockPredicate* pred, + std::set<const ColumnPredicate*>& no_need_to_pass_column_predicate_set, + bool* continue_apply) { + auto unique_id = _schema.unique_id(column_id); + bool handle_by_fulltext = _is_handle_predicate_by_fulltext(unique_id); + std::set<const ColumnPredicate*> predicate_set {}; + + pred->get_all_column_predicate(predicate_set); + + //four requirements here. + //1. Column has inverted index + //2. There are multiple predicates for this column. + //3. All the predicates are range predicate. + //4. if it's under fulltext parser type, we need to skip inverted index evaluate. + if (_inverted_index_iterators.count(unique_id) > 0 && + _inverted_index_iterators[unique_id] != nullptr && predicate_set.size() > 1 && + all_predicates_are_range_predicate(predicate_set) && !handle_by_fulltext) { + roaring::Roaring output_result = _row_bitmap; + + std::string column_name = _schema.column(column_id)->name(); + + auto res = pred->evaluate(column_name, _inverted_index_iterators[unique_id], num_rows(), + &output_result); + + if (res.ok()) { + no_need_to_pass_column_predicate_set.insert(predicate_set.begin(), predicate_set.end()); + _row_bitmap &= output_result; + if (_row_bitmap.isEmpty()) { + // all rows have been pruned, no need to process further predicates + *continue_apply = false; + } + return res; + } else { + //TODO:mock until AndBlockColumnPredicate evaluate is ok. + if (res.code() == ErrorCode::NOT_IMPLEMENTED_ERROR) { + return Status::OK(); + } + LOG(WARNING) << "failed to evaluate index" + << ", column predicate type: range predicate" + << ", error msg: " << res.code_as_string(); + return res; + } + } + return Status::OK(); +} + Status SegmentIterator::_apply_inverted_index() { SCOPED_RAW_TIMER(&_opts.stats->inverted_index_filter_timer); size_t input_rows = _row_bitmap.cardinality(); std::vector<ColumnPredicate*> remaining_predicates; + std::set<const ColumnPredicate*> no_need_to_pass_column_predicate_set; Review Comment: can use a more clear name, such as "success_applied_column_predicate", or add some note. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org