dataroaring commented on code in PR #39756:
URL: https://github.com/apache/doris/pull/39756#discussion_r1732865470


##########
be/src/olap/base_tablet.cpp:
##########
@@ -983,12 +994,111 @@ Status BaseTablet::generate_new_block_for_partial_update(
                     mutable_column->insert_default();
                 }
             } else {
-                mutable_column->insert_from(
-                        
*old_block.get_columns_with_type_and_name()[i].column.get(),
-                        read_index_old[idx]);
+                
mutable_column->insert_from(*old_block.get_by_position(i).column,
+                                            read_index_old[idx]);
+            }
+        }
+    }
+    output_block->set_columns(std::move(full_mutable_columns));
+    VLOG_DEBUG << "full block when publish: " << output_block->dump_data();
+    return Status::OK();
+}
+
+Status BaseTablet::generate_new_block_for_flexible_partial_update(
+        TabletSchemaSPtr rowset_schema, const PartialUpdateInfo* 
partial_update_info,
+        const FixedReadPlan& read_plan_ori, const FixedReadPlan& 
read_plan_update,
+        const std::map<RowsetId, RowsetSharedPtr>& rsid_to_rowset,
+        vectorized::Block* output_block) {
+    CHECK(output_block);
+
+    const auto& non_sort_key_cids = partial_update_info->missing_cids;
+    std::vector<uint32_t> all_cids(rowset_schema->num_columns());
+    std::iota(all_cids.begin(), all_cids.end(), 0);
+    auto old_block = rowset_schema->create_block_by_cids(non_sort_key_cids);
+    auto update_block = rowset_schema->create_block_by_cids(all_cids);
+
+    // rowid in the final block(start from 0, increase continuously) -> rowid 
to read in update_block
+    std::map<uint32_t, uint32_t> read_index_update;
+
+    // 1. read the current rowset first, if a row in the current rowset has 
delete sign mark
+    // we don't need to read values from old block for that row
+    RETURN_IF_ERROR(read_plan_update.read_columns_by_plan(*rowset_schema, 
all_cids, rsid_to_rowset,
+                                                          update_block, 
&read_index_update));
+    size_t update_rows = read_index_update.size();
+
+    // TODO(bobhan1): add the delete sign optimazation here
+    // // if there is sequence column in the table, we need to read the 
sequence column,
+    // // otherwise it may cause the merge-on-read based compaction policy to 
produce incorrect results
+    // const auto* __restrict new_block_delete_signs =
+    //         rowset_schema->has_sequence_col()
+    //                 ? nullptr
+    //                 : get_delete_sign_column_data(update_block, 
update_rows);
+
+    // 2. read previous rowsets
+    // rowid in the final block(start from 0, increase, may not continuous 
becasue we skip to read some rows) -> rowid to read in old_block
+    std::map<uint32_t, uint32_t> read_index_old;
+    RETURN_IF_ERROR(read_plan_ori.read_columns_by_plan(*rowset_schema, 
non_sort_key_cids,
+                                                       rsid_to_rowset, 
old_block, &read_index_old));
+    size_t old_rows = read_index_old.size();
+    DCHECK(update_rows >= old_rows);
+    const auto* __restrict old_block_delete_signs =
+            get_delete_sign_column_data(old_block, old_rows);
+    DCHECK(old_block_delete_signs != nullptr);
+
+    // 3. build default value block
+    auto default_value_block = old_block.clone_empty();
+    RETURN_IF_ERROR(BaseTablet::generate_default_value_block(*rowset_schema, 
non_sort_key_cids,
+                                                             
partial_update_info->default_values,
+                                                             old_block, 
default_value_block));
+
+    // 4. build the final block
+    auto full_mutable_columns = output_block->mutate_columns();
+    DCHECK(rowset_schema->has_skip_bitmap_col());
+    auto skip_bitmap_col_idx = rowset_schema->skip_bitmap_col_idx();
+    const std::vector<BitmapValue>* skip_bitmaps =
+            &(assert_cast<const vectorized::ColumnBitmap*, 
TypeCheckOnRelease::DISABLE>(
+                      
update_block.get_by_position(skip_bitmap_col_idx).column->get_ptr().get())
+                      ->get_data());
+    for (std::size_t cid {0}; cid < rowset_schema->num_columns(); cid++) {
+        if (cid < rowset_schema->num_key_columns()) {
+            full_mutable_columns[cid] =
+                    
std::move(*update_block.get_by_position(cid).column).mutate();
+        } else {
+            const auto& rs_column = rowset_schema->column(cid);
+            auto col_uid = rs_column.unique_id();
+            auto& cur_col = full_mutable_columns[cid];
+            for (auto idx = 0; idx < update_rows; ++idx) {
+                if (skip_bitmaps->at(idx).contains(col_uid)) {
+                    if (old_block_delete_signs != nullptr &&
+                        old_block_delete_signs[read_index_old[idx]] != 0) {
+                        if (rs_column.has_default_value()) {
+                            const auto& src_column =
+                                    *default_value_block
+                                             .get_by_position(cid -
+                                                              
rowset_schema->num_key_columns())
+                                             .column;
+                            cur_col->insert_from(src_column, 0);

Review Comment:
   too many idents. Please consider put some code in a function.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to