zenoyang commented on a change in pull request #8627: URL: https://github.com/apache/incubator-doris/pull/8627#discussion_r834951443
########## File path: be/src/vec/columns/column_dictionary.h ########## @@ -242,18 +213,73 @@ class ColumnDictionary final : public COWHelper<IColumn, ColumnDictionary<T>> { const StringRef* dict_array, size_t data_num, uint32_t dict_num) override { if (!is_dict_inited()) { - dict.reserve(dict_num); + _dict.reserve(dict_num); for (uint32_t i = 0; i < dict_num; ++i) { auto value = StringValue(dict_array[i].data, dict_array[i].size); - dict.insert_value(value); + _dict.insert_value(value); } _dict_inited = true; } - char* end_ptr = (char*)codes.get_end_ptr(); + char* end_ptr = (char*)_codes.get_end_ptr(); memcpy(end_ptr, data_array + start_index, data_num * sizeof(T)); end_ptr += data_num * sizeof(T); - codes.set_end_ptr(end_ptr); + _codes.set_end_ptr(end_ptr); + } + + void convert_dict_codes_if_necessary() override { + if (!is_dict_sorted()) { + _dict.sort(); + _dict_sorted = true; + } + + if (!is_dict_code_converted()) { + for (size_t i = 0; i < size(); ++i) { + _codes[i] = _dict.convert_code(_codes[i]); + } + _dict_code_converted = true; + } + } + + void set_predicate_dict_code_if_necessary(doris::ColumnPredicate* predicate) override { + switch (predicate->type()) { + case PredicateType::EQ: + case PredicateType::NE: { + // cast to EqualPredicate, just to get value, may not be EqualPredicate + auto* comp_pred = reinterpret_cast<doris::EqualPredicate<StringValue>*>(predicate); + auto pred_value = comp_pred->get_value(); + auto code = _dict.find_code(pred_value); + comp_pred->set_dict_code(code); + break; + } + case PredicateType::LT: + case PredicateType::LE: + case PredicateType::GT: + case PredicateType::GE: { + // cast to LessPredicate, just to get value, may not be LessPredicate + auto* comp_pred = reinterpret_cast<doris::LessPredicate<StringValue>*>(predicate); + auto pred_value = comp_pred->get_value(); + auto less = predicate->type() == PredicateType::LT || + predicate->type() == PredicateType::LE; + auto eq = predicate->type() == PredicateType::LE || + predicate->type() == PredicateType::GE; + auto code = _dict.find_bound_code(pred_value, less, eq); + comp_pred->set_dict_code(code); + break; + } + case PredicateType::InList: + case PredicateType::NotInList: { + auto* in_pred = reinterpret_cast<doris::InListPredicate<StringValue>*>(predicate); + auto pred_values = in_pred->get_values(); + auto code_set = _dict.find_codes(pred_values); + in_pred->set_dict_codes(code_set); + break; + } + default: + LOG(FATAL) << "PredicateType: " << static_cast<int>(predicate->type()) + << " not supported in ColumnDictionary"; + break; Review comment: ok ########## File path: be/src/olap/rowset/segment_v2/segment_iterator.cpp ########## @@ -830,23 +831,17 @@ void SegmentIterator::_evaluate_short_circuit_predicate(uint16_t* vec_sel_rowid_ return; } - for (auto column_predicate : _short_cir_eval_predicate) { - auto column_id = column_predicate->column_id(); + for (auto predicate : _short_cir_eval_predicate) { + auto column_id = predicate->column_id(); auto& short_cir_column = _current_return_columns[column_id]; auto* col_ptr = short_cir_column.get(); - // todo(zeno) define convert_dict_codes_if_dictionary interface in IColumn - if (short_cir_column->is_nullable()) { - auto nullable_col = - reinterpret_cast<vectorized::ColumnNullable*>(short_cir_column.get()); - col_ptr = nullable_col->get_nested_column_ptr().get(); + // range comparison predicate needs to sort the dict and convert the encoding + if (predicate->type() == PredicateType::LT || predicate->type() == PredicateType::LE || + predicate->type() == PredicateType::GT || predicate->type() == PredicateType::GE) { + col_ptr->convert_dict_codes_if_necessary(); } - - if (col_ptr->is_column_dictionary() && column_predicate->is_range_comparison_predicate()) { - auto& dict_col = - reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>(*col_ptr); - dict_col.convert_dict_codes(); - } - column_predicate->evaluate(*short_cir_column, vec_sel_rowid_idx, selected_size_ptr); + col_ptr->set_predicate_dict_code_if_necessary(predicate); Review comment: It's a problem, but it can't be implemented in `SegmentIterator::init`, because when the init method, there is no dictionary. But I can add a judgment, only set once in the block load stage. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org