BiteTheDDDDt commented on code in PR #11468: URL: https://github.com/apache/doris/pull/11468#discussion_r938411595
########## be/src/vec/exec/volap_scan_node.cpp: ########## @@ -966,152 +794,83 @@ static bool ignore_cast(SlotDescriptor* slot, Expr* expr) { return false; } -bool VOlapScanNode::should_push_down_in_predicate(doris::SlotDescriptor* slot, - doris::InPredicate* pred) { - if (Expr::type_without_cast(pred->get_child(0)) != TExprNodeType::SLOT_REF) { - // not a slot ref(column) - return false; - } - - std::vector<SlotId> slot_ids; - if (pred->get_child(0)->get_slot_ids(&slot_ids) != 1) { - // not a single column predicate - return false; - } - - if (slot_ids[0] != slot->id()) { - // predicate not related to current column - return false; - } - - if (pred->get_child(0)->type().type != slot->type().type) { - if (!ignore_cast(slot, pred->get_child(0))) { - // the type of predicate not match the slot's type - return false; - } - } - - VLOG_CRITICAL << slot->col_name() << " fixed_values add num: " << pred->hybrid_set()->size(); - - // if there are too many elements in InPredicate, exceed the limit, - // we will not push any condition of this column to storage engine. - // because too many conditions pushed down to storage engine may even - // slow down the query process. - // ATTN: This is just an experience value. You may need to try - // different thresholds to improve performance. - if (pred->hybrid_set()->size() > _max_pushdown_conditions_per_column) { - VLOG_NOTICE << "Predicate value num " << pred->hybrid_set()->size() << " exceed limit " - << _max_pushdown_conditions_per_column; - return false; - } - - return true; -} - -std::pair<bool, void*> VOlapScanNode::should_push_down_eq_predicate(doris::SlotDescriptor* slot, - doris::Expr* pred, int conj_idx, - int child_idx) { - auto result_pair = std::make_pair<bool, void*>(false, nullptr); - - // Do not get slot_ref of column, should not push_down to Storage Engine - if (Expr::type_without_cast(pred->get_child(child_idx)) != TExprNodeType::SLOT_REF) { - return result_pair; - } - - std::vector<SlotId> slot_ids; - if (pred->get_child(child_idx)->get_slot_ids(&slot_ids) != 1) { - // not a single column predicate - return result_pair; - } - - if (slot_ids[0] != slot->id()) { - // predicate not related to current column - return result_pair; - } - - if (pred->get_child(child_idx)->type().type != slot->type().type) { - if (!ignore_cast(slot, pred->get_child(child_idx))) { - // the type of predicate not match the slot's type - return result_pair; +template <bool IsFixed, PrimitiveType PrimitiveType, typename ChangeFixedValueRangeFunc> +Status VOlapScanNode::change_value_range(ColumnValueRange<PrimitiveType>& temp_range, void* value, + const ChangeFixedValueRangeFunc& func, std::string fn_name, + int slot_ref_child) { + if constexpr (PrimitiveType == TYPE_DATE) { + DateTimeValue date_value; + reinterpret_cast<VecDateTimeValue*>(value)->convert_vec_dt_to_dt(&date_value); + if constexpr (IsFixed) { + if (!date_value.check_loss_accuracy_cast_to_date()) { + func(temp_range, + reinterpret_cast<typename PrimitiveTypeTraits<PrimitiveType>::CppType*>( + &date_value)); + } + } else { + if (date_value.check_loss_accuracy_cast_to_date()) { + if (fn_name == "lt" || fn_name == "ge") { + ++date_value; + } + } + func(temp_range, to_olap_filter_type(fn_name, slot_ref_child), + reinterpret_cast<typename PrimitiveTypeTraits<PrimitiveType>::CppType*>( + &date_value)); } - } - - Expr* expr = pred->get_child(1 - child_idx); - if (!expr->is_constant()) { - // only handle constant value - return result_pair; - } - - // get value in result pair - result_pair = std::make_pair( - true, _conjunct_ctxs[conj_idx]->get_value(expr, nullptr, slot->type().precision, - slot->type().scale)); - - return result_pair; -} - -template <PrimitiveType primitive_type, typename ChangeFixedValueRangeFunc> -Status VOlapScanNode::change_fixed_value_range(ColumnValueRange<primitive_type>& temp_range, - void* value, const ChangeFixedValueRangeFunc& func) { - switch (primitive_type) { - case TYPE_DATE: { - DateTimeValue date_value = *reinterpret_cast<DateTimeValue*>(value); - // There is must return empty data in olap_scan_node, - // Because data value loss accuracy - if (!date_value.check_loss_accuracy_cast_to_date()) { + } else if constexpr (PrimitiveType == TYPE_DATETIME) { + DateTimeValue date_value; + reinterpret_cast<VecDateTimeValue*>(value)->convert_vec_dt_to_dt(&date_value); + if constexpr (IsFixed) { func(temp_range, - reinterpret_cast<typename PrimitiveTypeTraits<primitive_type>::CppType*>( + reinterpret_cast<typename PrimitiveTypeTraits<PrimitiveType>::CppType*>( &date_value)); + } else { + func(temp_range, to_olap_filter_type(fn_name, slot_ref_child), + reinterpret_cast<typename PrimitiveTypeTraits<PrimitiveType>::CppType*>( + reinterpret_cast<char*>(&date_value))); } - break; - } - case TYPE_DECIMALV2: - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_HLL: - case TYPE_DATETIME: - case TYPE_DATETIMEV2: - case TYPE_TINYINT: - case TYPE_SMALLINT: - case TYPE_INT: - case TYPE_BIGINT: - case TYPE_LARGEINT: - case TYPE_DECIMAL32: - case TYPE_DECIMAL64: - case TYPE_DECIMAL128: - case TYPE_STRING: { - func(temp_range, - reinterpret_cast<typename PrimitiveTypeTraits<primitive_type>::CppType*>(value)); - break; - } - case TYPE_BOOLEAN: { - bool v = *reinterpret_cast<bool*>(value); - func(temp_range, - reinterpret_cast<typename PrimitiveTypeTraits<primitive_type>::CppType*>(&v)); - break; - } - case TYPE_DATEV2: { + } else if constexpr (PrimitiveType == TYPE_DATEV2) { DateV2Value<DateTimeV2ValueType> datetimev2_value = *reinterpret_cast<DateV2Value<DateTimeV2ValueType>*>(value); - if (datetimev2_value.can_cast_to_date_without_loss_accuracy()) { - DateV2Value<DateV2ValueType> date_v2; + if constexpr (IsFixed) { + if (datetimev2_value.can_cast_to_date_without_loss_accuracy()) { + DateV2Value<DateV2ValueType> date_v2; + date_v2.set_date_uint32( + binary_cast<DateV2Value<DateTimeV2ValueType>, uint64_t>(datetimev2_value) >> + TIME_PART_LENGTH); + func(temp_range, &date_v2); + } + } else { + doris::vectorized::DateV2Value<DateV2ValueType> date_v2; date_v2.set_date_uint32( binary_cast<DateV2Value<DateTimeV2ValueType>, uint64_t>(datetimev2_value) >> TIME_PART_LENGTH); - if constexpr (primitive_type == PrimitiveType::TYPE_DATEV2) { - func(temp_range, &date_v2); - } else { - __builtin_unreachable(); + if (!datetimev2_value.can_cast_to_date_without_loss_accuracy()) { + if (fn_name == "lt" || fn_name == "ge") { + ++date_v2; + } } + func(temp_range, to_olap_filter_type(fn_name, slot_ref_child), &date_v2); + } + } else if constexpr ((PrimitiveType == TYPE_DECIMALV2) || (PrimitiveType == TYPE_CHAR) || + (PrimitiveType == TYPE_VARCHAR) || (PrimitiveType == TYPE_HLL) || + (PrimitiveType == TYPE_DATETIMEV2) || (PrimitiveType == TYPE_TINYINT) || + (PrimitiveType == TYPE_SMALLINT) || (PrimitiveType == TYPE_INT) || + (PrimitiveType == TYPE_BIGINT) || (PrimitiveType == TYPE_LARGEINT) || + (PrimitiveType == TYPE_DECIMAL32) || (PrimitiveType == TYPE_DECIMAL64) || + (PrimitiveType == TYPE_DECIMAL128) || (PrimitiveType == TYPE_STRING) || + (PrimitiveType == TYPE_BOOLEAN)) { + if constexpr (IsFixed) { Review Comment: `IsFixed` It doesn't seem to enter here ########## be/src/vec/exec/volap_scan_node.cpp: ########## @@ -621,143 +621,71 @@ Status VOlapScanNode::_add_blocks(std::vector<Block*>& block) { return Status::OK(); } -void VOlapScanNode::eval_const_conjuncts() { - for (int conj_idx = 0; conj_idx < _conjunct_ctxs.size(); ++conj_idx) { - // if conjunct is constant, compute direct and set eos = true - if (_conjunct_ctxs[conj_idx]->root()->is_constant()) { - void* value = _conjunct_ctxs[conj_idx]->get_value(nullptr); - if (value == nullptr || *reinterpret_cast<bool*>(value) == false) { - _eos = true; - break; - } - } - } -} - Status VOlapScanNode::normalize_conjuncts() { std::vector<SlotDescriptor*> slots = _tuple_desc->slots(); for (int slot_idx = 0; slot_idx < slots.size(); ++slot_idx) { switch (slots[slot_idx]->type().type) { - case TYPE_TINYINT: { - ColumnValueRange<TYPE_TINYINT> range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - - case TYPE_SMALLINT: { - ColumnValueRange<TYPE_SMALLINT> range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - - case TYPE_INT: { - ColumnValueRange<TYPE_INT> range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - - case TYPE_BIGINT: { - ColumnValueRange<TYPE_BIGINT> range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - - case TYPE_LARGEINT: { - ColumnValueRange<TYPE_LARGEINT> range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - - case TYPE_CHAR: { - ColumnValueRange<TYPE_CHAR> range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - case TYPE_VARCHAR: { - ColumnValueRange<TYPE_VARCHAR> range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - case TYPE_HLL: { - ColumnValueRange<TYPE_HLL> range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - case TYPE_STRING: { - ColumnValueRange<TYPE_STRING> range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - - case TYPE_DATE: { - ColumnValueRange<TYPE_DATE> range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - case TYPE_DATETIME: { - ColumnValueRange<TYPE_DATETIME> range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - - case TYPE_DATEV2: { - ColumnValueRange<TYPE_DATEV2> range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - - case TYPE_DATETIMEV2: { - ColumnValueRange<TYPE_DATETIMEV2> range(slots[slot_idx]->col_name(), - slots[slot_idx]->type().precision, - slots[slot_idx]->type().scale); - normalize_predicate(range, slots[slot_idx]); - break; - } - - case TYPE_DECIMALV2: { - ColumnValueRange<TYPE_DECIMALV2> range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - - case TYPE_DECIMAL32: { - ColumnValueRange<TYPE_DECIMAL32> range(slots[slot_idx]->col_name(), - slots[slot_idx]->type().precision, - slots[slot_idx]->type().scale); - normalize_predicate(range, slots[slot_idx]); - break; - } - - case TYPE_DECIMAL64: { - ColumnValueRange<TYPE_DECIMAL64> range(slots[slot_idx]->col_name(), - slots[slot_idx]->type().precision, - slots[slot_idx]->type().scale); - normalize_predicate(range, slots[slot_idx]); - break; - } - - case TYPE_DECIMAL128: { - ColumnValueRange<TYPE_DECIMAL128> range(slots[slot_idx]->col_name(), - slots[slot_idx]->type().precision, - slots[slot_idx]->type().scale); - normalize_predicate(range, slots[slot_idx]); - break; - } - - case TYPE_BOOLEAN: { - ColumnValueRange<TYPE_BOOLEAN> range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - +#define M(NAME) \ + case TYPE_##NAME: { \ + ColumnValueRange<TYPE_##NAME> range(slots[slot_idx]->col_name(), \ + slots[slot_idx]->type().precision, \ + slots[slot_idx]->type().scale); \ + _id_to_slot_column_value_range[slots[slot_idx]->id()] = \ + std::pair {slots[slot_idx], range}; \ + break; \ + } +#define APPLY_FOR_PRIMITIVE_TYPE(M) \ + M(TINYINT) \ + M(SMALLINT) \ + M(INT) \ + M(BIGINT) \ + M(LARGEINT) \ + M(CHAR) \ + M(DATE) \ + M(DATETIME) \ + M(DATEV2) \ + M(DATETIMEV2) \ + M(VARCHAR) \ + M(STRING) \ + M(HLL) \ + M(DECIMAL32) \ + M(DECIMAL64) \ + M(DECIMAL128) \ + M(DECIMALV2) \ + M(BOOLEAN) + APPLY_FOR_PRIMITIVE_TYPE(M) +#undef M default: { VLOG_CRITICAL << "Unsupported Normalize Slot [ColName=" << slots[slot_idx]->col_name() << "]"; break; } } } + if (_vconjunct_ctx_ptr) { + if ((*_vconjunct_ctx_ptr)->root()) { + VExpr* new_root = _normalize_predicate(_runtime_state, (*_vconjunct_ctx_ptr)->root()); + if (new_root) { + (*_vconjunct_ctx_ptr)->set_root(new_root); + } else { + (*(_vconjunct_ctx_ptr.get()))->mark_as_stale(); + _stale_vexpr_ctxs.push_back(std::move(_vconjunct_ctx_ptr)); + _vconjunct_ctx_ptr.reset(nullptr); + } + } + } + for (auto it = _id_to_slot_column_value_range.begin(); Review Comment: better use `for(auto it: _id_to_slot_column_value_range)` ########## be/src/vec/exec/volap_scan_node.cpp: ########## @@ -898,62 +765,23 @@ Status VOlapScanNode::build_key_ranges_and_filters() { Status VOlapScanNode::start_scan(RuntimeState* state) { RETURN_IF_CANCELLED(state); - VLOG_CRITICAL << "Eval Const Conjuncts"; Review Comment: Is it good idea to remove those comment? ########## be/src/vec/exec/volap_scan_node.cpp: ########## @@ -966,152 +794,83 @@ static bool ignore_cast(SlotDescriptor* slot, Expr* expr) { return false; } -bool VOlapScanNode::should_push_down_in_predicate(doris::SlotDescriptor* slot, - doris::InPredicate* pred) { - if (Expr::type_without_cast(pred->get_child(0)) != TExprNodeType::SLOT_REF) { - // not a slot ref(column) - return false; - } - - std::vector<SlotId> slot_ids; - if (pred->get_child(0)->get_slot_ids(&slot_ids) != 1) { - // not a single column predicate - return false; - } - - if (slot_ids[0] != slot->id()) { - // predicate not related to current column - return false; - } - - if (pred->get_child(0)->type().type != slot->type().type) { - if (!ignore_cast(slot, pred->get_child(0))) { - // the type of predicate not match the slot's type - return false; - } - } - - VLOG_CRITICAL << slot->col_name() << " fixed_values add num: " << pred->hybrid_set()->size(); - - // if there are too many elements in InPredicate, exceed the limit, - // we will not push any condition of this column to storage engine. - // because too many conditions pushed down to storage engine may even - // slow down the query process. - // ATTN: This is just an experience value. You may need to try - // different thresholds to improve performance. - if (pred->hybrid_set()->size() > _max_pushdown_conditions_per_column) { - VLOG_NOTICE << "Predicate value num " << pred->hybrid_set()->size() << " exceed limit " - << _max_pushdown_conditions_per_column; - return false; - } - - return true; -} - -std::pair<bool, void*> VOlapScanNode::should_push_down_eq_predicate(doris::SlotDescriptor* slot, - doris::Expr* pred, int conj_idx, - int child_idx) { - auto result_pair = std::make_pair<bool, void*>(false, nullptr); - - // Do not get slot_ref of column, should not push_down to Storage Engine - if (Expr::type_without_cast(pred->get_child(child_idx)) != TExprNodeType::SLOT_REF) { - return result_pair; - } - - std::vector<SlotId> slot_ids; - if (pred->get_child(child_idx)->get_slot_ids(&slot_ids) != 1) { - // not a single column predicate - return result_pair; - } - - if (slot_ids[0] != slot->id()) { - // predicate not related to current column - return result_pair; - } - - if (pred->get_child(child_idx)->type().type != slot->type().type) { - if (!ignore_cast(slot, pred->get_child(child_idx))) { - // the type of predicate not match the slot's type - return result_pair; +template <bool IsFixed, PrimitiveType PrimitiveType, typename ChangeFixedValueRangeFunc> +Status VOlapScanNode::change_value_range(ColumnValueRange<PrimitiveType>& temp_range, void* value, + const ChangeFixedValueRangeFunc& func, std::string fn_name, + int slot_ref_child) { + if constexpr (PrimitiveType == TYPE_DATE) { + DateTimeValue date_value; + reinterpret_cast<VecDateTimeValue*>(value)->convert_vec_dt_to_dt(&date_value); + if constexpr (IsFixed) { + if (!date_value.check_loss_accuracy_cast_to_date()) { + func(temp_range, + reinterpret_cast<typename PrimitiveTypeTraits<PrimitiveType>::CppType*>( + &date_value)); + } + } else { + if (date_value.check_loss_accuracy_cast_to_date()) { + if (fn_name == "lt" || fn_name == "ge") { + ++date_value; + } + } + func(temp_range, to_olap_filter_type(fn_name, slot_ref_child), + reinterpret_cast<typename PrimitiveTypeTraits<PrimitiveType>::CppType*>( + &date_value)); } - } - - Expr* expr = pred->get_child(1 - child_idx); - if (!expr->is_constant()) { - // only handle constant value - return result_pair; - } - - // get value in result pair - result_pair = std::make_pair( - true, _conjunct_ctxs[conj_idx]->get_value(expr, nullptr, slot->type().precision, - slot->type().scale)); - - return result_pair; -} - -template <PrimitiveType primitive_type, typename ChangeFixedValueRangeFunc> -Status VOlapScanNode::change_fixed_value_range(ColumnValueRange<primitive_type>& temp_range, - void* value, const ChangeFixedValueRangeFunc& func) { - switch (primitive_type) { - case TYPE_DATE: { - DateTimeValue date_value = *reinterpret_cast<DateTimeValue*>(value); - // There is must return empty data in olap_scan_node, - // Because data value loss accuracy - if (!date_value.check_loss_accuracy_cast_to_date()) { + } else if constexpr (PrimitiveType == TYPE_DATETIME) { + DateTimeValue date_value; + reinterpret_cast<VecDateTimeValue*>(value)->convert_vec_dt_to_dt(&date_value); + if constexpr (IsFixed) { func(temp_range, - reinterpret_cast<typename PrimitiveTypeTraits<primitive_type>::CppType*>( + reinterpret_cast<typename PrimitiveTypeTraits<PrimitiveType>::CppType*>( &date_value)); + } else { + func(temp_range, to_olap_filter_type(fn_name, slot_ref_child), + reinterpret_cast<typename PrimitiveTypeTraits<PrimitiveType>::CppType*>( + reinterpret_cast<char*>(&date_value))); } - break; - } - case TYPE_DECIMALV2: - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_HLL: - case TYPE_DATETIME: - case TYPE_DATETIMEV2: - case TYPE_TINYINT: - case TYPE_SMALLINT: - case TYPE_INT: - case TYPE_BIGINT: - case TYPE_LARGEINT: - case TYPE_DECIMAL32: - case TYPE_DECIMAL64: - case TYPE_DECIMAL128: - case TYPE_STRING: { - func(temp_range, - reinterpret_cast<typename PrimitiveTypeTraits<primitive_type>::CppType*>(value)); - break; - } - case TYPE_BOOLEAN: { - bool v = *reinterpret_cast<bool*>(value); - func(temp_range, - reinterpret_cast<typename PrimitiveTypeTraits<primitive_type>::CppType*>(&v)); - break; - } - case TYPE_DATEV2: { + } else if constexpr (PrimitiveType == TYPE_DATEV2) { DateV2Value<DateTimeV2ValueType> datetimev2_value = *reinterpret_cast<DateV2Value<DateTimeV2ValueType>*>(value); - if (datetimev2_value.can_cast_to_date_without_loss_accuracy()) { - DateV2Value<DateV2ValueType> date_v2; + if constexpr (IsFixed) { + if (datetimev2_value.can_cast_to_date_without_loss_accuracy()) { + DateV2Value<DateV2ValueType> date_v2; + date_v2.set_date_uint32( + binary_cast<DateV2Value<DateTimeV2ValueType>, uint64_t>(datetimev2_value) >> + TIME_PART_LENGTH); Review Comment: `>>` seems strange -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org