This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new b489cdf8408 [opt](merge-on-write) avoid to check delete bitmap while 
lookup rowkey in some situation to reduce CPU cost (#41480) (#41439)
b489cdf8408 is described below

commit b489cdf840856eb553973edde7fcd056d3ef92b1
Author: zhannngchen <48427519+zhannngc...@users.noreply.github.com>
AuthorDate: Fri Oct 11 10:15:39 2024 +0800

    [opt](merge-on-write) avoid to check delete bitmap while lookup rowkey in 
some situation to reduce CPU cost (#41480) (#41439)
    
    ## Proposed changes
    
    Issue Number: close #xxx
    
    cherry-pick #41480
---
 be/src/olap/tablet.cpp | 27 +++++++++++++++++----------
 be/src/olap/tablet.h   |  3 ++-
 2 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index c1a94223eff..9f07f17ab8a 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -2822,7 +2822,7 @@ Status Tablet::lookup_row_key(const Slice& encoded_key, 
TabletSchema* latest_sch
                               const std::vector<RowsetSharedPtr>& 
specified_rowsets,
                               RowLocation* row_location, uint32_t version,
                               
std::vector<std::unique_ptr<SegmentCacheHandle>>& segment_caches,
-                              RowsetSharedPtr* rowset, bool with_rowid) {
+                              RowsetSharedPtr* rowset, bool with_rowid, bool 
is_partial_update) {
     SCOPED_BVAR_LATENCY(g_tablet_lookup_rowkey_latency);
     size_t seq_col_length = 0;
     // use the latest tablet schema to decide if the tablet has sequence 
column currently
@@ -2839,6 +2839,8 @@ Status Tablet::lookup_row_key(const Slice& encoded_key, 
TabletSchema* latest_sch
             Slice(encoded_key.get_data(), encoded_key.get_size() - 
seq_col_length - rowid_length);
     RowLocation loc;
 
+    bool need_to_check_delete_bitmap = is_partial_update || with_seq_col;
+
     for (size_t i = 0; i < specified_rowsets.size(); i++) {
         auto& rs = specified_rowsets[i];
         auto& segments_key_bounds = 
rs->rowset_meta()->get_segments_key_bounds();
@@ -2877,15 +2879,19 @@ Status Tablet::lookup_row_key(const Slice& encoded_key, 
TabletSchema* latest_sch
             if (!s.ok() && !s.is<KEY_ALREADY_EXISTS>()) {
                 return s;
             }
-            if (s.ok() && 
_tablet_meta->delete_bitmap().contains_agg_without_cache(
-                                  {loc.rowset_id, loc.segment_id, version}, 
loc.row_id)) {
-                // if has sequence col, we continue to compare the sequence_id 
of
-                // all rowsets, util we find an existing key.
-                if (schema->has_sequence_col()) {
-                    continue;
+            if (s.ok() && need_to_check_delete_bitmap) {
+                // check if the key is already mark deleted
+                if (_tablet_meta->delete_bitmap().contains_agg_without_cache(
+                            {loc.rowset_id, loc.segment_id, version}, 
loc.row_id)) {
+                    // if has sequence col, we continue to compare the 
sequence_id of
+                    // all rowsets, util we find an existing key.
+                    if (with_seq_col) {
+                        continue;
+                    }
+                    // The key is deleted, we need to break the loop and return
+                    // KEY_NOT_FOUND.
+                    break;
                 }
-                // The key is deleted, we don't need to search for it any more.
-                break;
             }
             // `st` is either OK or KEY_ALREADY_EXISTS now.
             // for partial update, even if the key is already exists, we still 
need to
@@ -3052,7 +3058,8 @@ Status Tablet::calc_segment_delete_bitmap(RowsetSharedPtr 
rowset,
 
             RowsetSharedPtr rowset_find;
             auto st = lookup_row_key(key, rowset_schema.get(), true, 
specified_rowsets, &loc,
-                                     dummy_version.first - 1, segment_caches, 
&rowset_find);
+                                     dummy_version.first - 1, segment_caches, 
&rowset_find, false,
+                                     is_partial_update);
             bool expected_st = st.ok() || st.is<KEY_NOT_FOUND>() || 
st.is<KEY_ALREADY_EXISTS>();
             // It's a defensive DCHECK, we need to exclude some common errors 
to avoid core-dump
             // while stress test
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index fb20498355f..fdfad28a0de 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -438,7 +438,8 @@ public:
                           const std::vector<RowsetSharedPtr>& 
specified_rowsets,
                           RowLocation* row_location, uint32_t version,
                           std::vector<std::unique_ptr<SegmentCacheHandle>>& 
segment_caches,
-                          RowsetSharedPtr* rowset = nullptr, bool with_rowid = 
true);
+                          RowsetSharedPtr* rowset = nullptr, bool with_rowid = 
true,
+                          bool is_partial_update = false);
 
     // Lookup a row with TupleDescriptor and fill Block
     Status lookup_row_data(const Slice& encoded_key, const RowLocation& 
row_location,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to