github-actions[bot] commented on code in PR #15605: URL: https://github.com/apache/doris/pull/15605#discussion_r1067755075
########## be/src/vec/exec/join/vjoin_node_base.cpp: ########## @@ -104,51 +104,85 @@ _join_block.columns() - 1, remove_nullable(_join_block.get_by_position(_join_block.columns() - 1).column)); } + _join_block_column_num = _join_block.columns(); } Status VJoinNodeBase::_build_output_block(Block* origin_block, Block* output_block) { auto is_mem_reuse = output_block->mem_reuse(); - MutableBlock mutable_block = - is_mem_reuse - ? MutableBlock(output_block) - : MutableBlock(VectorizedUtils::create_empty_columnswithtypename(row_desc())); auto rows = origin_block->rows(); - // TODO: After FE plan support same nullable of output expr and origin block and mutable column - // we should replace `insert_column_datas` by `insert_range_from` - - auto insert_column_datas = [](auto& to, const auto& from, size_t rows) { - if (to->is_nullable() && !from.is_nullable()) { - auto& null_column = reinterpret_cast<ColumnNullable&>(*to); - null_column.get_nested_column().insert_range_from(from, 0, rows); - null_column.get_null_map_column().get_data().resize_fill(rows, 0); - } else { - to->insert_range_from(from, 0, rows); - } - }; + auto origin_col_num = origin_block->columns(); if (rows != 0) { - auto& mutable_columns = mutable_block.mutable_columns(); + SCOPED_TIMER(_projection_timer); + Columns columns; + // origin block id --> output block id + // This mapping is used to help reuse the columns from output block to reduce the page fault + std::map<int, int> col_mapping; + Columns resusable_columns; + size_t output_size = row_desc().num_materialized_slots(); if (_output_expr_ctxs.empty()) { - DCHECK(mutable_columns.size() == row_desc().num_materialized_slots()); - for (int i = 0; i < mutable_columns.size(); ++i) { - insert_column_datas(mutable_columns[i], *origin_block->get_by_position(i).column, - rows); + for (int i = 0; i < output_size; ++i) { + columns.emplace_back(origin_block->get_by_position(i).column); + col_map.push_back(i); + col_mapping[i] = i; } } else { - DCHECK(mutable_columns.size() == row_desc().num_materialized_slots()); - SCOPED_TIMER(_projection_timer); - for (int i = 0; i < mutable_columns.size(); ++i) { - auto result_column_id = -1; + for (int i = 0; i < output_size; ++i) { + int result_column_id = -1; RETURN_IF_ERROR(_output_expr_ctxs[i]->execute(origin_block, &result_column_id)); auto column_ptr = origin_block->get_by_position(result_column_id) .column->convert_to_full_column_if_const(); - insert_column_datas(mutable_columns[i], *column_ptr, rows); + columns.emplace_back(column_ptr); + if (result_column_id < origin_col_num) { + col_mapping[result_column_id] = i; + } + } + } + // TODO: After FE plan support same nullable of output expr and origin block and mutable column + // we should replace `insert_column_datas` by `insert_range_from` + // Sometimes the origin block's column nullable property is not equal to row descriptor + // So that should change it here + ColumnsWithTypeAndName empty_columns_with_type = + VectorizedUtils::create_columns_with_type_and_name(row_desc()); + for (int i = 0; i < output_size; ++i) { + if (empty_columns_with_type[i].type->is_nullable() && !columns[i]->is_nullable()) { + columns[i] = vectorized::make_nullable(columns[i]); + } else if (!empty_columns_with_type[i].type->is_nullable() && + columns[i]->is_nullable()) { + LOG(FATAL) << "To column is not nullable, but from column is nullable"; + } + } + + if (is_mem_reuse) { + for (int i = 0; i < origin_col_num; ++i) { + if (col_mapping.find(i) == col_mapping.end()) { + // It means this column is not transferred to output block, it is reuseable + resusable_columns.emplace_back(origin_block->get_by_position(i).column); + } else { + // Move the column from output block + auto output_col_id = col_mapping[i]; + if (empty_columns_with_type[col_mapping[i]].type->is_nullable() && + !origin_block->get_by_position(i).column->is_nullable) { + resusable_columns.emplace_back( + output_block->get_by_position(i).column->get_nested_column_ptr()); Review Comment: warning: no member named 'get_nested_column_ptr' in 'doris::vectorized::IColumn' [clang-diagnostic-error] ```cpp output_block->get_by_position(i).column->get_nested_column_ptr()); ^ ``` ########## be/src/vec/exec/join/vjoin_node_base.cpp: ########## @@ -104,51 +104,85 @@ _join_block.columns() - 1, remove_nullable(_join_block.get_by_position(_join_block.columns() - 1).column)); } + _join_block_column_num = _join_block.columns(); } Status VJoinNodeBase::_build_output_block(Block* origin_block, Block* output_block) { auto is_mem_reuse = output_block->mem_reuse(); - MutableBlock mutable_block = - is_mem_reuse - ? MutableBlock(output_block) - : MutableBlock(VectorizedUtils::create_empty_columnswithtypename(row_desc())); auto rows = origin_block->rows(); - // TODO: After FE plan support same nullable of output expr and origin block and mutable column - // we should replace `insert_column_datas` by `insert_range_from` - - auto insert_column_datas = [](auto& to, const auto& from, size_t rows) { - if (to->is_nullable() && !from.is_nullable()) { - auto& null_column = reinterpret_cast<ColumnNullable&>(*to); - null_column.get_nested_column().insert_range_from(from, 0, rows); - null_column.get_null_map_column().get_data().resize_fill(rows, 0); - } else { - to->insert_range_from(from, 0, rows); - } - }; + auto origin_col_num = origin_block->columns(); if (rows != 0) { - auto& mutable_columns = mutable_block.mutable_columns(); + SCOPED_TIMER(_projection_timer); + Columns columns; + // origin block id --> output block id + // This mapping is used to help reuse the columns from output block to reduce the page fault + std::map<int, int> col_mapping; + Columns resusable_columns; + size_t output_size = row_desc().num_materialized_slots(); if (_output_expr_ctxs.empty()) { - DCHECK(mutable_columns.size() == row_desc().num_materialized_slots()); - for (int i = 0; i < mutable_columns.size(); ++i) { - insert_column_datas(mutable_columns[i], *origin_block->get_by_position(i).column, - rows); + for (int i = 0; i < output_size; ++i) { + columns.emplace_back(origin_block->get_by_position(i).column); + col_map.push_back(i); + col_mapping[i] = i; } } else { - DCHECK(mutable_columns.size() == row_desc().num_materialized_slots()); - SCOPED_TIMER(_projection_timer); - for (int i = 0; i < mutable_columns.size(); ++i) { - auto result_column_id = -1; + for (int i = 0; i < output_size; ++i) { + int result_column_id = -1; RETURN_IF_ERROR(_output_expr_ctxs[i]->execute(origin_block, &result_column_id)); auto column_ptr = origin_block->get_by_position(result_column_id) .column->convert_to_full_column_if_const(); - insert_column_datas(mutable_columns[i], *column_ptr, rows); + columns.emplace_back(column_ptr); + if (result_column_id < origin_col_num) { + col_mapping[result_column_id] = i; + } + } + } + // TODO: After FE plan support same nullable of output expr and origin block and mutable column + // we should replace `insert_column_datas` by `insert_range_from` + // Sometimes the origin block's column nullable property is not equal to row descriptor + // So that should change it here + ColumnsWithTypeAndName empty_columns_with_type = + VectorizedUtils::create_columns_with_type_and_name(row_desc()); + for (int i = 0; i < output_size; ++i) { + if (empty_columns_with_type[i].type->is_nullable() && !columns[i]->is_nullable()) { + columns[i] = vectorized::make_nullable(columns[i]); + } else if (!empty_columns_with_type[i].type->is_nullable() && + columns[i]->is_nullable()) { + LOG(FATAL) << "To column is not nullable, but from column is nullable"; + } + } + + if (is_mem_reuse) { + for (int i = 0; i < origin_col_num; ++i) { + if (col_mapping.find(i) == col_mapping.end()) { + // It means this column is not transferred to output block, it is reuseable + resusable_columns.emplace_back(origin_block->get_by_position(i).column); + } else { + // Move the column from output block + auto output_col_id = col_mapping[i]; + if (empty_columns_with_type[col_mapping[i]].type->is_nullable() && + !origin_block->get_by_position(i).column->is_nullable) { Review Comment: warning: reference to non-static member function must be called; did you mean to call it with no arguments? [clang-diagnostic-error] ```suggestion !origin_block->get_by_position(i).column->is_nullable()) { ``` ########## be/src/vec/exec/join/vnested_loop_join_node.cpp: ########## @@ -642,17 +642,17 @@ Status VNestedLoopJoinNode::pull(RuntimeState* state, vectorized::Block* block, : _matched_rows_done; { - Block tmp_block = _join_block; _add_tuple_is_null_column(&tmp_block); Review Comment: warning: use of undeclared identifier 'tmp_block' [clang-diagnostic-error] ```cpp _add_tuple_is_null_column(&tmp_block); ^ ``` ########## be/src/vec/exec/join/vjoin_node_base.cpp: ########## @@ -104,51 +104,85 @@ void VJoinNodeBase::_construct_mutable_join_block() { _join_block.columns() - 1, remove_nullable(_join_block.get_by_position(_join_block.columns() - 1).column)); } + _join_block_column_num = _join_block.columns(); } Status VJoinNodeBase::_build_output_block(Block* origin_block, Block* output_block) { auto is_mem_reuse = output_block->mem_reuse(); - MutableBlock mutable_block = - is_mem_reuse - ? MutableBlock(output_block) - : MutableBlock(VectorizedUtils::create_empty_columnswithtypename(row_desc())); auto rows = origin_block->rows(); - // TODO: After FE plan support same nullable of output expr and origin block and mutable column - // we should replace `insert_column_datas` by `insert_range_from` - - auto insert_column_datas = [](auto& to, const auto& from, size_t rows) { - if (to->is_nullable() && !from.is_nullable()) { - auto& null_column = reinterpret_cast<ColumnNullable&>(*to); - null_column.get_nested_column().insert_range_from(from, 0, rows); - null_column.get_null_map_column().get_data().resize_fill(rows, 0); - } else { - to->insert_range_from(from, 0, rows); - } - }; + auto origin_col_num = origin_block->columns(); if (rows != 0) { - auto& mutable_columns = mutable_block.mutable_columns(); + SCOPED_TIMER(_projection_timer); + Columns columns; + // origin block id --> output block id + // This mapping is used to help reuse the columns from output block to reduce the page fault + std::map<int, int> col_mapping; + Columns resusable_columns; + size_t output_size = row_desc().num_materialized_slots(); if (_output_expr_ctxs.empty()) { - DCHECK(mutable_columns.size() == row_desc().num_materialized_slots()); - for (int i = 0; i < mutable_columns.size(); ++i) { - insert_column_datas(mutable_columns[i], *origin_block->get_by_position(i).column, - rows); + for (int i = 0; i < output_size; ++i) { + columns.emplace_back(origin_block->get_by_position(i).column); + col_map.push_back(i); Review Comment: warning: use of undeclared identifier 'col_map' [clang-diagnostic-error] ```cpp col_map.push_back(i); ^ ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org