This is an automated email from the ASF dual-hosted git repository.

lijibing pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git

The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 1976b85fa44 [improvement](statistics)Use min row count of all replicas 
as tablet/table row count. (#41894) (#41979)
1976b85fa44 is described below

commit 1976b85fa448cd74497941514bc062468301cb2e
Author: Jibing-Li <64681310+jibing...@users.noreply.github.com>
AuthorDate: Wed Oct 16 21:03:15 2024 +0800

    [improvement](statistics)Use min row count of all replicas as tablet/table 
row count. (#41894) (#41979)
    
    backport: https://github.com/apache/doris/pull/41894
---
 .../main/java/org/apache/doris/catalog/Tablet.java | 17 ++++++++++
 .../org/apache/doris/catalog/TabletStatMgr.java    | 12 ++++++--
 .../apache/doris/statistics/OlapAnalysisTask.java  |  3 +-
 .../java/org/apache/doris/catalog/TabletTest.java  | 36 ++++++++++++++++++++++
 4 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
index a7240895029..baf21cbeebe 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
@@ -476,6 +476,23 @@ public class Tablet extends MetaObject implements Writable 
{
         return singleReplica ? 
Double.valueOf(s.average().orElse(0)).longValue() : s.sum();
     }
 
+    // Get the least row count among all valid replicas.
+    // The replica with the least row count is the most accurate one. Because 
it performs most compaction.
+    public long getMinReplicaRowCount(long version) {
+        long minRowCount = Long.MAX_VALUE;
+        long maxReplicaVersion = 0;
+        for (Replica r : replicas) {
+            if (r.isAlive()
+                    && r.checkVersionCatchUp(version, false)
+                    && (r.getVersion() > maxReplicaVersion
+                        || r.getVersion() == maxReplicaVersion && 
r.getRowCount() < minRowCount)) {
+                minRowCount = r.getRowCount();
+                maxReplicaVersion = r.getVersion();
+            }
+        }
+        return minRowCount == Long.MAX_VALUE ? 0 : minRowCount;
+    }
+
     /**
      * A replica is healthy only if
      * 1. the backend is available
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java
index 00b3bcfca87..2cbb55dca19 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java
@@ -115,14 +115,17 @@ public class TabletStatMgr extends MasterDaemon {
                             long indexRowCount = 0L;
                             boolean indexReported = true;
                             for (Tablet tablet : index.getTablets()) {
-                                long tabletRowCount = 0L;
+                                long tabletRowCount = Long.MAX_VALUE;
                                 boolean tabletReported = false;
                                 for (Replica replica : tablet.getReplicas()) {
                                     LOG.debug("Table {} replica {} current 
version {}, report version {}",
                                             olapTable.getName(), 
replica.getId(),
                                             replica.getVersion(), 
replica.getLastReportVersion());
+                                    // Replica with less row count is more 
accurate than the others
+                                    // when replicas' version are identical. 
Because less row count
+                                    // means this replica does more compaction 
than the others.
                                     if (replica.checkVersionCatchUp(version, 
false)
-                                            && replica.getRowCount() >= 
tabletRowCount) {
+                                            && replica.getRowCount() < 
tabletRowCount) {
                                         // 1. If replica version and reported 
replica version are all equal to
                                         // PARTITION_INIT_VERSION, set 
tabletReported to true, which indicates this
                                         // tablet is empty for sure when 
previous report.
@@ -139,6 +142,11 @@ public class TabletStatMgr extends MasterDaemon {
                                         tabletRowCount = replica.getRowCount();
                                     }
                                 }
+
+                                // When all BEs are down, avoid set 
Long.MAX_VALUE to index and table row count. Use 0.
+                                if (tabletRowCount == Long.MAX_VALUE) {
+                                    tabletRowCount = 0L;
+                                }
                                 indexRowCount += tabletRowCount;
                                 // Only when all tablets of this index are 
reported, we set indexReported to true.
                                 indexReported = indexReported && 
tabletReported;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index 34fb339564a..94370659b34 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -284,7 +284,8 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
                 int seekTid = (int) ((i + seek) % ids.size());
                 long tabletId = ids.get(seekTid);
                 sampleTabletIds.add(tabletId);
-                actualSampledRowCount += 
materializedIndex.getTablet(tabletId).getRowCount(true);
+                actualSampledRowCount += materializedIndex.getTablet(tabletId)
+                        .getMinReplicaRowCount(p.getVisibleVersion());
                 if (actualSampledRowCount >= sampleRows && 
!forPartitionColumn) {
                     enough = true;
                     break;
diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/TabletTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/catalog/TabletTest.java
index 99769a6b525..665aaa078ed 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/catalog/TabletTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/TabletTest.java
@@ -213,4 +213,40 @@ public class TabletTest {
                 Pair.of(1L, false), Pair.of(2L, false), Pair.of(3L, false), 
Pair.of(4L, true)
         );
     }
+
+    @Test
+    public void testGetMinReplicaRowCount() {
+        Tablet t = new Tablet(1);
+        long row = t.getMinReplicaRowCount(1);
+        Assert.assertEquals(0, row);
+
+        Replica r1 = new Replica(1, 1, 10, 0, 0, 0, 100, ReplicaState.NORMAL, 
0, 10);
+        t.addReplica(r1);
+        row = t.getMinReplicaRowCount(10);
+        Assert.assertEquals(100, row);
+
+        row = t.getMinReplicaRowCount(11);
+        Assert.assertEquals(0, row);
+
+        Replica r2 = new Replica(2, 2, 10, 0, 0, 0, 110, ReplicaState.NORMAL, 
0, 10);
+        Replica r3 = new Replica(3, 3, 10, 0, 0, 0, 90, ReplicaState.NORMAL, 
0, 10);
+        t.addReplica(r2);
+        t.addReplica(r3);
+        row = t.getMinReplicaRowCount(11);
+        Assert.assertEquals(0, row);
+        row = t.getMinReplicaRowCount(9);
+        Assert.assertEquals(90, row);
+
+        r3.setBad(true);
+        row = t.getMinReplicaRowCount(9);
+        Assert.assertEquals(100, row);
+
+        r3.setBad(false);
+        row = t.getMinReplicaRowCount(9);
+        Assert.assertEquals(90, row);
+
+        r2.updateVersion(11);
+        row = t.getMinReplicaRowCount(9);
+        Assert.assertEquals(110, row);
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to