github-actions[bot] commented on code in PR #15605:
URL: https://github.com/apache/doris/pull/15605#discussion_r1067755075


##########
be/src/vec/exec/join/vjoin_node_base.cpp:
##########
@@ -104,51 +104,85 @@
                 _join_block.columns() - 1,
                 
remove_nullable(_join_block.get_by_position(_join_block.columns() - 1).column));
     }
+    _join_block_column_num = _join_block.columns();
 }
 
 Status VJoinNodeBase::_build_output_block(Block* origin_block, Block* 
output_block) {
     auto is_mem_reuse = output_block->mem_reuse();
-    MutableBlock mutable_block =
-            is_mem_reuse
-                    ? MutableBlock(output_block)
-                    : 
MutableBlock(VectorizedUtils::create_empty_columnswithtypename(row_desc()));
     auto rows = origin_block->rows();
-    // TODO: After FE plan support same nullable of output expr and origin 
block and mutable column
-    // we should replace `insert_column_datas` by `insert_range_from`
-
-    auto insert_column_datas = [](auto& to, const auto& from, size_t rows) {
-        if (to->is_nullable() && !from.is_nullable()) {
-            auto& null_column = reinterpret_cast<ColumnNullable&>(*to);
-            null_column.get_nested_column().insert_range_from(from, 0, rows);
-            null_column.get_null_map_column().get_data().resize_fill(rows, 0);
-        } else {
-            to->insert_range_from(from, 0, rows);
-        }
-    };
+    auto origin_col_num = origin_block->columns();
     if (rows != 0) {
-        auto& mutable_columns = mutable_block.mutable_columns();
+        SCOPED_TIMER(_projection_timer);
+        Columns columns;
+        // origin block id --> output block id
+        // This mapping is used to help reuse the columns from output block to 
reduce the page fault
+        std::map<int, int> col_mapping;
+        Columns resusable_columns;
+        size_t output_size = row_desc().num_materialized_slots();
         if (_output_expr_ctxs.empty()) {
-            DCHECK(mutable_columns.size() == 
row_desc().num_materialized_slots());
-            for (int i = 0; i < mutable_columns.size(); ++i) {
-                insert_column_datas(mutable_columns[i], 
*origin_block->get_by_position(i).column,
-                                    rows);
+            for (int i = 0; i < output_size; ++i) {
+                columns.emplace_back(origin_block->get_by_position(i).column);
+                col_map.push_back(i);
+                col_mapping[i] = i;
             }
         } else {
-            DCHECK(mutable_columns.size() == 
row_desc().num_materialized_slots());
-            SCOPED_TIMER(_projection_timer);
-            for (int i = 0; i < mutable_columns.size(); ++i) {
-                auto result_column_id = -1;
+            for (int i = 0; i < output_size; ++i) {
+                int result_column_id = -1;
                 RETURN_IF_ERROR(_output_expr_ctxs[i]->execute(origin_block, 
&result_column_id));
                 auto column_ptr = 
origin_block->get_by_position(result_column_id)
                                           
.column->convert_to_full_column_if_const();
-                insert_column_datas(mutable_columns[i], *column_ptr, rows);
+                columns.emplace_back(column_ptr);
+                if (result_column_id < origin_col_num) {
+                    col_mapping[result_column_id] = i;
+                }
+            }
+        }
+        // TODO: After FE plan support same nullable of output expr and origin 
block and mutable column
+        // we should replace `insert_column_datas` by `insert_range_from`
+        // Sometimes the origin block's column nullable property is not equal 
to row descriptor
+        // So that should change it here
+        ColumnsWithTypeAndName empty_columns_with_type =
+                VectorizedUtils::create_columns_with_type_and_name(row_desc());
+        for (int i = 0; i < output_size; ++i) {
+            if (empty_columns_with_type[i].type->is_nullable() && 
!columns[i]->is_nullable()) {
+                columns[i] = vectorized::make_nullable(columns[i]);
+            } else if (!empty_columns_with_type[i].type->is_nullable() &&
+                       columns[i]->is_nullable()) {
+                LOG(FATAL) << "To column is not nullable, but from column is 
nullable";
+            }
+        }
+
+        if (is_mem_reuse) {
+            for (int i = 0; i < origin_col_num; ++i) {
+                if (col_mapping.find(i) == col_mapping.end()) {
+                    // It means this column is not transferred to output 
block, it is reuseable
+                    
resusable_columns.emplace_back(origin_block->get_by_position(i).column);
+                } else {
+                    // Move the column from output block
+                    auto output_col_id = col_mapping[i];
+                    if 
(empty_columns_with_type[col_mapping[i]].type->is_nullable() &&
+                        !origin_block->get_by_position(i).column->is_nullable) 
{
+                        resusable_columns.emplace_back(
+                                
output_block->get_by_position(i).column->get_nested_column_ptr());

Review Comment:
   warning: no member named 'get_nested_column_ptr' in 
'doris::vectorized::IColumn' [clang-diagnostic-error]
   ```cpp
                                   
output_block->get_by_position(i).column->get_nested_column_ptr());
                                                                            ^
   ```
   



##########
be/src/vec/exec/join/vjoin_node_base.cpp:
##########
@@ -104,51 +104,85 @@
                 _join_block.columns() - 1,
                 
remove_nullable(_join_block.get_by_position(_join_block.columns() - 1).column));
     }
+    _join_block_column_num = _join_block.columns();
 }
 
 Status VJoinNodeBase::_build_output_block(Block* origin_block, Block* 
output_block) {
     auto is_mem_reuse = output_block->mem_reuse();
-    MutableBlock mutable_block =
-            is_mem_reuse
-                    ? MutableBlock(output_block)
-                    : 
MutableBlock(VectorizedUtils::create_empty_columnswithtypename(row_desc()));
     auto rows = origin_block->rows();
-    // TODO: After FE plan support same nullable of output expr and origin 
block and mutable column
-    // we should replace `insert_column_datas` by `insert_range_from`
-
-    auto insert_column_datas = [](auto& to, const auto& from, size_t rows) {
-        if (to->is_nullable() && !from.is_nullable()) {
-            auto& null_column = reinterpret_cast<ColumnNullable&>(*to);
-            null_column.get_nested_column().insert_range_from(from, 0, rows);
-            null_column.get_null_map_column().get_data().resize_fill(rows, 0);
-        } else {
-            to->insert_range_from(from, 0, rows);
-        }
-    };
+    auto origin_col_num = origin_block->columns();
     if (rows != 0) {
-        auto& mutable_columns = mutable_block.mutable_columns();
+        SCOPED_TIMER(_projection_timer);
+        Columns columns;
+        // origin block id --> output block id
+        // This mapping is used to help reuse the columns from output block to 
reduce the page fault
+        std::map<int, int> col_mapping;
+        Columns resusable_columns;
+        size_t output_size = row_desc().num_materialized_slots();
         if (_output_expr_ctxs.empty()) {
-            DCHECK(mutable_columns.size() == 
row_desc().num_materialized_slots());
-            for (int i = 0; i < mutable_columns.size(); ++i) {
-                insert_column_datas(mutable_columns[i], 
*origin_block->get_by_position(i).column,
-                                    rows);
+            for (int i = 0; i < output_size; ++i) {
+                columns.emplace_back(origin_block->get_by_position(i).column);
+                col_map.push_back(i);
+                col_mapping[i] = i;
             }
         } else {
-            DCHECK(mutable_columns.size() == 
row_desc().num_materialized_slots());
-            SCOPED_TIMER(_projection_timer);
-            for (int i = 0; i < mutable_columns.size(); ++i) {
-                auto result_column_id = -1;
+            for (int i = 0; i < output_size; ++i) {
+                int result_column_id = -1;
                 RETURN_IF_ERROR(_output_expr_ctxs[i]->execute(origin_block, 
&result_column_id));
                 auto column_ptr = 
origin_block->get_by_position(result_column_id)
                                           
.column->convert_to_full_column_if_const();
-                insert_column_datas(mutable_columns[i], *column_ptr, rows);
+                columns.emplace_back(column_ptr);
+                if (result_column_id < origin_col_num) {
+                    col_mapping[result_column_id] = i;
+                }
+            }
+        }
+        // TODO: After FE plan support same nullable of output expr and origin 
block and mutable column
+        // we should replace `insert_column_datas` by `insert_range_from`
+        // Sometimes the origin block's column nullable property is not equal 
to row descriptor
+        // So that should change it here
+        ColumnsWithTypeAndName empty_columns_with_type =
+                VectorizedUtils::create_columns_with_type_and_name(row_desc());
+        for (int i = 0; i < output_size; ++i) {
+            if (empty_columns_with_type[i].type->is_nullable() && 
!columns[i]->is_nullable()) {
+                columns[i] = vectorized::make_nullable(columns[i]);
+            } else if (!empty_columns_with_type[i].type->is_nullable() &&
+                       columns[i]->is_nullable()) {
+                LOG(FATAL) << "To column is not nullable, but from column is 
nullable";
+            }
+        }
+
+        if (is_mem_reuse) {
+            for (int i = 0; i < origin_col_num; ++i) {
+                if (col_mapping.find(i) == col_mapping.end()) {
+                    // It means this column is not transferred to output 
block, it is reuseable
+                    
resusable_columns.emplace_back(origin_block->get_by_position(i).column);
+                } else {
+                    // Move the column from output block
+                    auto output_col_id = col_mapping[i];
+                    if 
(empty_columns_with_type[col_mapping[i]].type->is_nullable() &&
+                        !origin_block->get_by_position(i).column->is_nullable) 
{

Review Comment:
   warning: reference to non-static member function must be called; did you 
mean to call it with no arguments? [clang-diagnostic-error]
   
   ```suggestion
                           
!origin_block->get_by_position(i).column->is_nullable()) {
   ```
   



##########
be/src/vec/exec/join/vnested_loop_join_node.cpp:
##########
@@ -642,17 +642,17 @@ Status VNestedLoopJoinNode::pull(RuntimeState* state, 
vectorized::Block* block,
                        : _matched_rows_done;
 
         {
-            Block tmp_block = _join_block;
             _add_tuple_is_null_column(&tmp_block);

Review Comment:
   warning: use of undeclared identifier 'tmp_block' [clang-diagnostic-error]
   ```cpp
               _add_tuple_is_null_column(&tmp_block);
                                          ^
   ```
   



##########
be/src/vec/exec/join/vjoin_node_base.cpp:
##########
@@ -104,51 +104,85 @@ void VJoinNodeBase::_construct_mutable_join_block() {
                 _join_block.columns() - 1,
                 
remove_nullable(_join_block.get_by_position(_join_block.columns() - 1).column));
     }
+    _join_block_column_num = _join_block.columns();
 }
 
 Status VJoinNodeBase::_build_output_block(Block* origin_block, Block* 
output_block) {
     auto is_mem_reuse = output_block->mem_reuse();
-    MutableBlock mutable_block =
-            is_mem_reuse
-                    ? MutableBlock(output_block)
-                    : 
MutableBlock(VectorizedUtils::create_empty_columnswithtypename(row_desc()));
     auto rows = origin_block->rows();
-    // TODO: After FE plan support same nullable of output expr and origin 
block and mutable column
-    // we should replace `insert_column_datas` by `insert_range_from`
-
-    auto insert_column_datas = [](auto& to, const auto& from, size_t rows) {
-        if (to->is_nullable() && !from.is_nullable()) {
-            auto& null_column = reinterpret_cast<ColumnNullable&>(*to);
-            null_column.get_nested_column().insert_range_from(from, 0, rows);
-            null_column.get_null_map_column().get_data().resize_fill(rows, 0);
-        } else {
-            to->insert_range_from(from, 0, rows);
-        }
-    };
+    auto origin_col_num = origin_block->columns();
     if (rows != 0) {
-        auto& mutable_columns = mutable_block.mutable_columns();
+        SCOPED_TIMER(_projection_timer);
+        Columns columns;
+        // origin block id --> output block id
+        // This mapping is used to help reuse the columns from output block to 
reduce the page fault
+        std::map<int, int> col_mapping;
+        Columns resusable_columns;
+        size_t output_size = row_desc().num_materialized_slots();
         if (_output_expr_ctxs.empty()) {
-            DCHECK(mutable_columns.size() == 
row_desc().num_materialized_slots());
-            for (int i = 0; i < mutable_columns.size(); ++i) {
-                insert_column_datas(mutable_columns[i], 
*origin_block->get_by_position(i).column,
-                                    rows);
+            for (int i = 0; i < output_size; ++i) {
+                columns.emplace_back(origin_block->get_by_position(i).column);
+                col_map.push_back(i);

Review Comment:
   warning: use of undeclared identifier 'col_map' [clang-diagnostic-error]
   ```cpp
                   col_map.push_back(i);
                   ^
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to