This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 24c5767bc2a branch-3.0: [Fix](compaction) Failed compaction tablets 
should sleep before being selected again #50672 (#51030)
24c5767bc2a is described below

commit 24c5767bc2aebf590bed599fee4ed73a8486d3f6
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue May 20 09:29:09 2025 +0800

    branch-3.0: [Fix](compaction) Failed compaction tablets should sleep before 
being selected again #50672 (#51030)
    
    Cherry-picked from #50672
    
    Co-authored-by: abmdocrt <[email protected]>
---
 be/src/cloud/cloud_cumulative_compaction.cpp     |  16 +-
 be/src/cloud/cloud_storage_engine.cpp            |  15 +-
 be/src/cloud/cloud_tablet_mgr.cpp                |   9 +-
 be/src/cloud/cloud_tablet_mgr.h                  |   3 +
 be/src/common/status.h                           |   1 +
 be/src/olap/cumulative_compaction.cpp            |   4 +-
 be/src/olap/tablet.cpp                           |   2 +-
 be/test/cloud/cloud_compaction_test.cpp          | 195 +++++++++++++++++++++++
 regression-test/plugins/plugin_compaction.groovy |   2 +-
 9 files changed, 228 insertions(+), 19 deletions(-)

diff --git a/be/src/cloud/cloud_cumulative_compaction.cpp 
b/be/src/cloud/cloud_cumulative_compaction.cpp
index 5d6a519aa5a..d04155ab283 100644
--- a/be/src/cloud/cloud_cumulative_compaction.cpp
+++ b/be/src/cloud/cloud_cumulative_compaction.cpp
@@ -86,8 +86,8 @@ Status CloudCumulativeCompaction::prepare_compact() {
             // NOTICE: after that, the cumulative point may be larger than max 
version of this tablet, but it doesn't matter.
             update_cumulative_point();
             if (!config::enable_sleep_between_delete_cumu_compaction) {
-                st = Status::Error<CUMULATIVE_NO_SUITABLE_VERSION>(
-                        "_last_delete_version.first not equal to -1");
+                st = Status::Error<CUMULATIVE_MEET_DELETE_VERSION>(
+                        "cumulative compaction meet delete version");
             }
         }
         return st;
@@ -154,7 +154,8 @@ Status CloudCumulativeCompaction::request_global_lock() {
             LOG_WARNING("failed to prepare cumu compaction")
                     .tag("job_id", _uuid)
                     .tag("msg", resp.status().msg());
-            return Status::Error<CUMULATIVE_NO_SUITABLE_VERSION>("no suitable 
versions");
+            return Status::Error<CUMULATIVE_NO_SUITABLE_VERSION>(
+                    "cumu no suitable versions: job tablet busy");
         } else if (resp.status().code() == cloud::JOB_CHECK_ALTER_VERSION) {
             
(static_cast<CloudTablet*>(_tablet.get()))->set_alter_version(resp.alter_version());
             std::stringstream ss;
@@ -483,7 +484,8 @@ Status CloudCumulativeCompaction::pick_rowsets_to_compact() 
{
                 });
     }
     if (candidate_rowsets.empty()) {
-        return Status::Error<CUMULATIVE_NO_SUITABLE_VERSION>("no suitable 
versions");
+        return Status::Error<CUMULATIVE_NO_SUITABLE_VERSION>(
+                "no suitable versions: candidate rowsets empty");
     }
     std::sort(candidate_rowsets.begin(), candidate_rowsets.end(), 
Rowset::comparator);
     if (auto st = check_version_continuity(candidate_rowsets); !st.ok()) {
@@ -511,12 +513,14 @@ Status 
CloudCumulativeCompaction::pick_rowsets_to_compact() {
                                  &_last_delete_version, &compaction_score);
 
     if (_input_rowsets.empty()) {
-        return Status::Error<CUMULATIVE_NO_SUITABLE_VERSION>("no suitable 
versions");
+        return Status::Error<CUMULATIVE_NO_SUITABLE_VERSION>(
+                "no suitable versions: input rowsets empty");
     } else if (_input_rowsets.size() == 1 &&
                
!_input_rowsets.front()->rowset_meta()->is_segments_overlapping()) {
         VLOG_DEBUG << "there is only one rowset and not overlapping. 
tablet_id="
                    << _tablet->tablet_id() << ", version=" << 
_input_rowsets.front()->version();
-        return Status::Error<CUMULATIVE_NO_SUITABLE_VERSION>("no suitable 
versions");
+        return Status::Error<CUMULATIVE_NO_SUITABLE_VERSION>(
+                "no suitable versions: only one rowset and not overlapping");
     }
     return Status::OK();
 }
diff --git a/be/src/cloud/cloud_storage_engine.cpp 
b/be/src/cloud/cloud_storage_engine.cpp
index 668880be6df..f19bf4f4e4d 100644
--- a/be/src/cloud/cloud_storage_engine.cpp
+++ b/be/src/cloud/cloud_storage_engine.cpp
@@ -738,12 +738,14 @@ Status 
CloudStorageEngine::_submit_cumulative_compaction_task(const CloudTabletS
         long now = duration_cast<std::chrono::milliseconds>(
                            std::chrono::system_clock::now().time_since_epoch())
                            .count();
-        if (st.is<ErrorCode::CUMULATIVE_NO_SUITABLE_VERSION>() &&
-            st.msg() != "_last_delete_version.first not equal to -1") {
-            // Backoff strategy if no suitable version
-            tablet->last_cumu_no_suitable_version_ms = now;
+        if (!st.is<ErrorCode::CUMULATIVE_MEET_DELETE_VERSION>()) {
+            if (st.is<ErrorCode::CUMULATIVE_NO_SUITABLE_VERSION>()) {
+                // Backoff strategy if no suitable version
+                tablet->last_cumu_no_suitable_version_ms = now;
+            } else {
+                tablet->set_last_cumu_compaction_failure_time(now);
+            }
         }
-        tablet->set_last_cumu_compaction_failure_time(now);
         std::lock_guard lock(_compaction_mtx);
         _tablet_preparing_cumu_compaction.erase(tablet->tablet_id());
         return st;
@@ -831,10 +833,9 @@ Status 
CloudStorageEngine::_submit_cumulative_compaction_task(const CloudTabletS
                 if (_should_delay_large_task()) {
                     long now = 
duration_cast<milliseconds>(system_clock::now().time_since_epoch())
                                        .count();
+                    // sleep 5s for this tablet
                     tablet->set_last_cumu_compaction_failure_time(now);
                     erase_executing_cumu_compaction();
-                    // sleep 5s for this tablet
-                    tablet->last_cumu_no_suitable_version_ms = now;
                     LOG_WARNING(
                             "failed to do CloudCumulativeCompaction, cumu 
thread pool is "
                             "intensive, delay large task.")
diff --git a/be/src/cloud/cloud_tablet_mgr.cpp 
b/be/src/cloud/cloud_tablet_mgr.cpp
index deab00c7ccf..4fcf5fed2f3 100644
--- a/be/src/cloud/cloud_tablet_mgr.cpp
+++ b/be/src/cloud/cloud_tablet_mgr.cpp
@@ -331,11 +331,13 @@ Status CloudTabletMgr::get_topn_tablets_to_compact(
     auto now = 
duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
     auto skip = [now, compaction_type](CloudTablet* t) {
         if (compaction_type == CompactionType::BASE_COMPACTION) {
-            return now - t->last_base_compaction_success_time_ms < 
config::base_compaction_freeze_interval_s * 1000;
+            return now - t->last_base_compaction_success_time_ms < 
config::base_compaction_freeze_interval_s * 1000 ||
+                now - t->last_base_compaction_failure_time() < 
config::min_compaction_failure_interval_ms;
         }
         // If tablet has too many rowsets but not be compacted for a long 
time, compaction should be performed
         // regardless of whether there is a load job recently.
-        return now - t->last_cumu_no_suitable_version_ms < 
config::min_compaction_failure_interval_ms ||
+        return now - t->last_cumu_compaction_failure_time() < 
config::min_compaction_failure_interval_ms ||
+               now - t->last_cumu_no_suitable_version_ms < 
config::min_compaction_failure_interval_ms ||
                (now - t->last_load_time_ms > 
config::cu_compaction_freeze_interval_s * 1000
                && now - t->last_cumu_compaction_success_time_ms < 
config::cumu_compaction_interval_s * 1000
                && t->fetch_add_approximate_num_rowsets(0) < 
config::max_tablet_version_num / 2);
@@ -481,4 +483,7 @@ void CloudTabletMgr::get_topn_tablet_delete_bitmap_score(
               << max_base_rowset_delete_bitmap_score_tablet_id << ",tablets=[" 
<< ss.str() << "]";
 }
 
+void CloudTabletMgr::put_tablet_for_UT(std::shared_ptr<CloudTablet> tablet) {
+    _tablet_map->put(tablet);
+}
 } // namespace doris
diff --git a/be/src/cloud/cloud_tablet_mgr.h b/be/src/cloud/cloud_tablet_mgr.h
index 1a6ec72c1f7..ab56586cd88 100644
--- a/be/src/cloud/cloud_tablet_mgr.h
+++ b/be/src/cloud/cloud_tablet_mgr.h
@@ -87,6 +87,9 @@ public:
     void get_topn_tablet_delete_bitmap_score(uint64_t* max_delete_bitmap_score,
                                              uint64_t* 
max_base_rowset_delete_bitmap_score);
 
+    // **ATTN: JUST FOR UT**
+    void put_tablet_for_UT(std::shared_ptr<CloudTablet> tablet);
+
 private:
     CloudStorageEngine& _engine;
 
diff --git a/be/src/common/status.h b/be/src/common/status.h
index d059f289402..d003645b258 100644
--- a/be/src/common/status.h
+++ b/be/src/common/status.h
@@ -245,6 +245,7 @@ namespace ErrorCode {
     E(CUMULATIVE_MISS_VERSION, -2006, true);                 \
     E(FULL_NO_SUITABLE_VERSION, -2008, false);               \
     E(FULL_MISS_VERSION, -2009, true);                       \
+    E(CUMULATIVE_MEET_DELETE_VERSION, -2010, false);         \
     E(META_INVALID_ARGUMENT, -3000, true);                   \
     E(META_OPEN_DB_ERROR, -3001, true);                      \
     E(META_KEY_NOT_FOUND, -3002, false);                     \
diff --git a/be/src/olap/cumulative_compaction.cpp 
b/be/src/olap/cumulative_compaction.cpp
index bc71fdafbf6..18873f55dec 100644
--- a/be/src/olap/cumulative_compaction.cpp
+++ b/be/src/olap/cumulative_compaction.cpp
@@ -194,8 +194,8 @@ Status CumulativeCompaction::pick_rowsets_to_compact() {
                     .tag("tablet id:", tablet()->tablet_id())
                     .tag("after cumulative compaction, cumu point:",
                          tablet()->cumulative_layer_point());
-            return Status::Error<CUMULATIVE_NO_SUITABLE_VERSION>(
-                    "_last_delete_version.first not equal to -1");
+            return Status::Error<CUMULATIVE_MEET_DELETE_VERSION>(
+                    "cumulative compaction meet delete version");
         }
 
         // we did not meet any delete version. which means compaction_score is 
not enough to do cumulative compaction.
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index f3d1917e66a..2a0bb32396f 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -1715,7 +1715,7 @@ Status Tablet::prepare_compaction_and_calculate_permits(
             permits = 0;
             // if we meet a delete version, should increase the cumulative 
point to let base compaction handle the delete version.
             // no need to wait 5s.
-            if (!(res.msg() == "_last_delete_version.first not equal to -1") ||
+            if (!res.is<ErrorCode::CUMULATIVE_MEET_DELETE_VERSION>() ||
                 config::enable_sleep_between_delete_cumu_compaction) {
                 tablet->set_last_cumu_compaction_failure_time(UnixMillis());
             }
diff --git a/be/test/cloud/cloud_compaction_test.cpp 
b/be/test/cloud/cloud_compaction_test.cpp
new file mode 100644
index 00000000000..c8db6739084
--- /dev/null
+++ b/be/test/cloud/cloud_compaction_test.cpp
@@ -0,0 +1,195 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gen_cpp/AgentService_types.h>
+#include <gen_cpp/olap_file.pb.h>
+#include <gtest/gtest-message.h>
+#include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
+
+#include <memory>
+
+#include "cloud/cloud_storage_engine.h"
+#include "cloud/cloud_tablet.h"
+#include "cloud/cloud_tablet_mgr.h"
+#include "gtest/gtest_pred_impl.h"
+#include "json2pb/json_to_pb.h"
+#include "olap/olap_common.h"
+#include "olap/rowset/rowset_factory.h"
+#include "olap/rowset/rowset_meta.h"
+#include "olap/tablet_meta.h"
+#include "util/uid_util.h"
+
+namespace doris {
+class TabletMap;
+
+class CloudCompactionTest : public testing::Test {
+    CloudCompactionTest() : _engine(CloudStorageEngine({})) {}
+    void SetUp() override {
+        config::compaction_promotion_size_mbytes = 1024;
+        config::compaction_promotion_ratio = 0.05;
+        config::compaction_promotion_min_size_mbytes = 64;
+        config::compaction_min_size_mbytes = 64;
+
+        _tablet_meta.reset(new TabletMeta(1, 2, 15673, 15674, 4, 5, 
TTabletSchema(), 6, {{7, 8}},
+                                          UniqueId(9, 10), 
TTabletType::TABLET_TYPE_DISK,
+                                          TCompressionType::LZ4F));
+
+        _json_rowset_meta = R"({
+            "rowset_id": 540081,
+            "tablet_id": 15673,
+            "txn_id": 4042,
+            "tablet_schema_hash": 567997577,
+            "rowset_type": "BETA_ROWSET",
+            "rowset_state": "VISIBLE",
+            "start_version": 2,
+            "end_version": 2,
+            "num_rows": 3929,
+            "total_disk_size": 41,
+            "data_disk_size": 41,
+            "index_disk_size": 235,
+            "empty": false,
+            "load_id": {
+                "hi": -5350970832824939812,
+                "lo": -6717994719194512122
+            },
+            "creation_time": 1553765670,
+            "num_segments": 3
+        })";
+    }
+    void TearDown() override {}
+
+    void init_rs_meta(RowsetMetaSharedPtr& pb1, int64_t start, int64_t end) {
+        RowsetMetaPB rowset_meta_pb;
+        json2pb::JsonToProtoMessage(_json_rowset_meta, &rowset_meta_pb);
+        rowset_meta_pb.set_start_version(start);
+        rowset_meta_pb.set_end_version(end);
+        rowset_meta_pb.set_creation_time(10000);
+
+        pb1->init_from_pb(rowset_meta_pb);
+        pb1->set_total_disk_size(41);
+        pb1->set_tablet_schema(_tablet_meta->tablet_schema());
+    }
+
+    void init_rs_meta_small_base(std::vector<RowsetMetaSharedPtr>* rs_metas) {
+        RowsetMetaSharedPtr ptr1(new RowsetMeta());
+        init_rs_meta(ptr1, 0, 0);
+        rs_metas->push_back(ptr1);
+
+        RowsetMetaSharedPtr ptr2(new RowsetMeta());
+        init_rs_meta(ptr2, 1, 1);
+        rs_metas->push_back(ptr2);
+
+        RowsetMetaSharedPtr ptr3(new RowsetMeta());
+        init_rs_meta(ptr3, 2, 2);
+        rs_metas->push_back(ptr3);
+
+        RowsetMetaSharedPtr ptr4(new RowsetMeta());
+        init_rs_meta(ptr4, 3, 3);
+        rs_metas->push_back(ptr4);
+
+        RowsetMetaSharedPtr ptr5(new RowsetMeta());
+        init_rs_meta(ptr5, 4, 4);
+        rs_metas->push_back(ptr5);
+    }
+
+protected:
+    std::string _json_rowset_meta;
+    TabletMetaSharedPtr _tablet_meta;
+
+public:
+    CloudStorageEngine _engine;
+};
+
+TEST_F(CloudCompactionTest, failure_base_compaction_tablet_sleep_test) {
+    auto filter_out = [](CloudTablet* t) { return false; };
+    CloudTabletMgr mgr(_engine);
+
+    std::vector<RowsetMetaSharedPtr> rs_metas;
+    init_rs_meta_small_base(&rs_metas);
+
+    CloudTabletSPtr tablet1 = std::make_shared<CloudTablet>(_engine, 
_tablet_meta);
+    for (auto& rs_meta : rs_metas) {
+        static_cast<void>(_tablet_meta->add_rs_meta(rs_meta));
+    }
+    tablet1->tablet_meta()->_tablet_id = 10000;
+    tablet1->set_last_base_compaction_failure_time(
+            duration_cast<std::chrono::milliseconds>(
+                    std::chrono::system_clock::now().time_since_epoch())
+                    .count() -
+            100000);
+    tablet1->set_last_base_compaction_failure_time(0);
+    
tablet1->tablet_meta()->tablet_schema()->set_disable_auto_compaction(false);
+    tablet1->_approximate_num_rowsets = 10;
+    mgr.put_tablet_for_UT(tablet1);
+
+    int64_t max_score;
+    std::vector<std::shared_ptr<CloudTablet>> tablets {};
+    Status st = mgr.get_topn_tablets_to_compact(1, 
CompactionType::BASE_COMPACTION, filter_out,
+                                                &tablets, &max_score);
+    ASSERT_EQ(st, Status::OK());
+    ASSERT_EQ(tablets.size(), 1);
+
+    tablet1->set_last_base_compaction_failure_time(
+            duration_cast<std::chrono::milliseconds>(
+                    std::chrono::system_clock::now().time_since_epoch())
+                    .count());
+    st = mgr.get_topn_tablets_to_compact(1, CompactionType::BASE_COMPACTION, 
filter_out, &tablets,
+                                         &max_score);
+    ASSERT_EQ(st, Status::OK());
+    ASSERT_EQ(tablets.size(), 0);
+}
+
+TEST_F(CloudCompactionTest, failure_cumu_compaction_tablet_sleep_test) {
+    auto filter_out = [](CloudTablet* t) { return false; };
+    CloudTabletMgr mgr(_engine);
+
+    std::vector<RowsetMetaSharedPtr> rs_metas;
+    init_rs_meta_small_base(&rs_metas);
+
+    CloudTabletSPtr tablet1 = std::make_shared<CloudTablet>(_engine, 
_tablet_meta);
+    for (auto& rs_meta : rs_metas) {
+        static_cast<void>(_tablet_meta->add_rs_meta(rs_meta));
+    }
+    tablet1->tablet_meta()->_tablet_id = 10000;
+    tablet1->set_last_cumu_compaction_failure_time(
+            duration_cast<std::chrono::milliseconds>(
+                    std::chrono::system_clock::now().time_since_epoch())
+                    .count() -
+            100000);
+    tablet1->set_last_cumu_compaction_failure_time(0);
+    
tablet1->tablet_meta()->tablet_schema()->set_disable_auto_compaction(false);
+    tablet1->_approximate_cumu_num_deltas = 10;
+    mgr.put_tablet_for_UT(tablet1);
+
+    int64_t max_score;
+    std::vector<std::shared_ptr<CloudTablet>> tablets {};
+    Status st = mgr.get_topn_tablets_to_compact(1, 
CompactionType::CUMULATIVE_COMPACTION,
+                                                filter_out, &tablets, 
&max_score);
+    ASSERT_EQ(st, Status::OK());
+    ASSERT_EQ(tablets.size(), 1);
+
+    tablet1->set_last_cumu_compaction_failure_time(
+            duration_cast<std::chrono::milliseconds>(
+                    std::chrono::system_clock::now().time_since_epoch())
+                    .count());
+    st = mgr.get_topn_tablets_to_compact(1, CompactionType::BASE_COMPACTION, 
filter_out, &tablets,
+                                         &max_score);
+    ASSERT_EQ(st, Status::OK());
+    ASSERT_EQ(tablets.size(), 0);
+}
+} // namespace doris
diff --git a/regression-test/plugins/plugin_compaction.groovy 
b/regression-test/plugins/plugin_compaction.groovy
index 45dd99a97a3..b187fe4b178 100644
--- a/regression-test/plugins/plugin_compaction.groovy
+++ b/regression-test/plugins/plugin_compaction.groovy
@@ -106,7 +106,7 @@ Suite.metaClass.trigger_and_wait_compaction = { String 
table_name, String compac
                 triggered_tablets.add(tablet) // compaction already in queue, 
treat it as successfully triggered
             } else if (!auto_compaction_disabled) {
                 // ignore the error if auto compaction enabled
-            } else if (status_lower.contains("e-2000")) {
+            } else if (status_lower.contains("e-2000") || 
status_lower.contains("e-2010")) {
                 // ignore this tablet compaction.
             } else if (ignored_errors.any { error -> 
status_lower.contains(error.toLowerCase()) }) {
                 // ignore this tablet compaction if the error is in the 
ignored_errors list


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to