This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new b489cdf8408 [opt](merge-on-write) avoid to check delete bitmap while lookup rowkey in some situation to reduce CPU cost (#41480) (#41439) b489cdf8408 is described below commit b489cdf840856eb553973edde7fcd056d3ef92b1 Author: zhannngchen <48427519+zhannngc...@users.noreply.github.com> AuthorDate: Fri Oct 11 10:15:39 2024 +0800 [opt](merge-on-write) avoid to check delete bitmap while lookup rowkey in some situation to reduce CPU cost (#41480) (#41439) ## Proposed changes Issue Number: close #xxx cherry-pick #41480 --- be/src/olap/tablet.cpp | 27 +++++++++++++++++---------- be/src/olap/tablet.h | 3 ++- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index c1a94223eff..9f07f17ab8a 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -2822,7 +2822,7 @@ Status Tablet::lookup_row_key(const Slice& encoded_key, TabletSchema* latest_sch const std::vector<RowsetSharedPtr>& specified_rowsets, RowLocation* row_location, uint32_t version, std::vector<std::unique_ptr<SegmentCacheHandle>>& segment_caches, - RowsetSharedPtr* rowset, bool with_rowid) { + RowsetSharedPtr* rowset, bool with_rowid, bool is_partial_update) { SCOPED_BVAR_LATENCY(g_tablet_lookup_rowkey_latency); size_t seq_col_length = 0; // use the latest tablet schema to decide if the tablet has sequence column currently @@ -2839,6 +2839,8 @@ Status Tablet::lookup_row_key(const Slice& encoded_key, TabletSchema* latest_sch Slice(encoded_key.get_data(), encoded_key.get_size() - seq_col_length - rowid_length); RowLocation loc; + bool need_to_check_delete_bitmap = is_partial_update || with_seq_col; + for (size_t i = 0; i < specified_rowsets.size(); i++) { auto& rs = specified_rowsets[i]; auto& segments_key_bounds = rs->rowset_meta()->get_segments_key_bounds(); @@ -2877,15 +2879,19 @@ Status Tablet::lookup_row_key(const Slice& encoded_key, TabletSchema* latest_sch if (!s.ok() && !s.is<KEY_ALREADY_EXISTS>()) { return s; } - if (s.ok() && _tablet_meta->delete_bitmap().contains_agg_without_cache( - {loc.rowset_id, loc.segment_id, version}, loc.row_id)) { - // if has sequence col, we continue to compare the sequence_id of - // all rowsets, util we find an existing key. - if (schema->has_sequence_col()) { - continue; + if (s.ok() && need_to_check_delete_bitmap) { + // check if the key is already mark deleted + if (_tablet_meta->delete_bitmap().contains_agg_without_cache( + {loc.rowset_id, loc.segment_id, version}, loc.row_id)) { + // if has sequence col, we continue to compare the sequence_id of + // all rowsets, util we find an existing key. + if (with_seq_col) { + continue; + } + // The key is deleted, we need to break the loop and return + // KEY_NOT_FOUND. + break; } - // The key is deleted, we don't need to search for it any more. - break; } // `st` is either OK or KEY_ALREADY_EXISTS now. // for partial update, even if the key is already exists, we still need to @@ -3052,7 +3058,8 @@ Status Tablet::calc_segment_delete_bitmap(RowsetSharedPtr rowset, RowsetSharedPtr rowset_find; auto st = lookup_row_key(key, rowset_schema.get(), true, specified_rowsets, &loc, - dummy_version.first - 1, segment_caches, &rowset_find); + dummy_version.first - 1, segment_caches, &rowset_find, false, + is_partial_update); bool expected_st = st.ok() || st.is<KEY_NOT_FOUND>() || st.is<KEY_ALREADY_EXISTS>(); // It's a defensive DCHECK, we need to exclude some common errors to avoid core-dump // while stress test diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h index fb20498355f..fdfad28a0de 100644 --- a/be/src/olap/tablet.h +++ b/be/src/olap/tablet.h @@ -438,7 +438,8 @@ public: const std::vector<RowsetSharedPtr>& specified_rowsets, RowLocation* row_location, uint32_t version, std::vector<std::unique_ptr<SegmentCacheHandle>>& segment_caches, - RowsetSharedPtr* rowset = nullptr, bool with_rowid = true); + RowsetSharedPtr* rowset = nullptr, bool with_rowid = true, + bool is_partial_update = false); // Lookup a row with TupleDescriptor and fill Block Status lookup_row_data(const Slice& encoded_key, const RowLocation& row_location, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org