dataroaring commented on code in PR #39756: URL: https://github.com/apache/doris/pull/39756#discussion_r1732865470
########## be/src/olap/base_tablet.cpp: ########## @@ -983,12 +994,111 @@ Status BaseTablet::generate_new_block_for_partial_update( mutable_column->insert_default(); } } else { - mutable_column->insert_from( - *old_block.get_columns_with_type_and_name()[i].column.get(), - read_index_old[idx]); + mutable_column->insert_from(*old_block.get_by_position(i).column, + read_index_old[idx]); + } + } + } + output_block->set_columns(std::move(full_mutable_columns)); + VLOG_DEBUG << "full block when publish: " << output_block->dump_data(); + return Status::OK(); +} + +Status BaseTablet::generate_new_block_for_flexible_partial_update( + TabletSchemaSPtr rowset_schema, const PartialUpdateInfo* partial_update_info, + const FixedReadPlan& read_plan_ori, const FixedReadPlan& read_plan_update, + const std::map<RowsetId, RowsetSharedPtr>& rsid_to_rowset, + vectorized::Block* output_block) { + CHECK(output_block); + + const auto& non_sort_key_cids = partial_update_info->missing_cids; + std::vector<uint32_t> all_cids(rowset_schema->num_columns()); + std::iota(all_cids.begin(), all_cids.end(), 0); + auto old_block = rowset_schema->create_block_by_cids(non_sort_key_cids); + auto update_block = rowset_schema->create_block_by_cids(all_cids); + + // rowid in the final block(start from 0, increase continuously) -> rowid to read in update_block + std::map<uint32_t, uint32_t> read_index_update; + + // 1. read the current rowset first, if a row in the current rowset has delete sign mark + // we don't need to read values from old block for that row + RETURN_IF_ERROR(read_plan_update.read_columns_by_plan(*rowset_schema, all_cids, rsid_to_rowset, + update_block, &read_index_update)); + size_t update_rows = read_index_update.size(); + + // TODO(bobhan1): add the delete sign optimazation here + // // if there is sequence column in the table, we need to read the sequence column, + // // otherwise it may cause the merge-on-read based compaction policy to produce incorrect results + // const auto* __restrict new_block_delete_signs = + // rowset_schema->has_sequence_col() + // ? nullptr + // : get_delete_sign_column_data(update_block, update_rows); + + // 2. read previous rowsets + // rowid in the final block(start from 0, increase, may not continuous becasue we skip to read some rows) -> rowid to read in old_block + std::map<uint32_t, uint32_t> read_index_old; + RETURN_IF_ERROR(read_plan_ori.read_columns_by_plan(*rowset_schema, non_sort_key_cids, + rsid_to_rowset, old_block, &read_index_old)); + size_t old_rows = read_index_old.size(); + DCHECK(update_rows >= old_rows); + const auto* __restrict old_block_delete_signs = + get_delete_sign_column_data(old_block, old_rows); + DCHECK(old_block_delete_signs != nullptr); + + // 3. build default value block + auto default_value_block = old_block.clone_empty(); + RETURN_IF_ERROR(BaseTablet::generate_default_value_block(*rowset_schema, non_sort_key_cids, + partial_update_info->default_values, + old_block, default_value_block)); + + // 4. build the final block + auto full_mutable_columns = output_block->mutate_columns(); + DCHECK(rowset_schema->has_skip_bitmap_col()); + auto skip_bitmap_col_idx = rowset_schema->skip_bitmap_col_idx(); + const std::vector<BitmapValue>* skip_bitmaps = + &(assert_cast<const vectorized::ColumnBitmap*, TypeCheckOnRelease::DISABLE>( + update_block.get_by_position(skip_bitmap_col_idx).column->get_ptr().get()) + ->get_data()); + for (std::size_t cid {0}; cid < rowset_schema->num_columns(); cid++) { + if (cid < rowset_schema->num_key_columns()) { + full_mutable_columns[cid] = + std::move(*update_block.get_by_position(cid).column).mutate(); + } else { + const auto& rs_column = rowset_schema->column(cid); + auto col_uid = rs_column.unique_id(); + auto& cur_col = full_mutable_columns[cid]; + for (auto idx = 0; idx < update_rows; ++idx) { + if (skip_bitmaps->at(idx).contains(col_uid)) { + if (old_block_delete_signs != nullptr && + old_block_delete_signs[read_index_old[idx]] != 0) { + if (rs_column.has_default_value()) { + const auto& src_column = + *default_value_block + .get_by_position(cid - + rowset_schema->num_key_columns()) + .column; + cur_col->insert_from(src_column, 0); Review Comment: too many idents. Please consider put some code in a function. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org