This is an automated email from the ASF dual-hosted git repository.

lijibing pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new c9886c5c4c6 [improvement](statistics)Use min row count of all replicas 
as tablet/table row count. (#41894)
c9886c5c4c6 is described below

commit c9886c5c4c607cd45c9df2a83e48be9f288b8112
Author: Jibing-Li <64681310+jibing...@users.noreply.github.com>
AuthorDate: Wed Oct 16 19:11:34 2024 +0800

    [improvement](statistics)Use min row count of all replicas as tablet/table 
row count. (#41894)
    
    Use min row count of all replicas with same version as tablet/table row
    count. Because replica with the least row count means it perform more
    compaction operation than the others. Use it as tablet row count is more
    accurate.
    Meanwhile, use min row count as tablet row count while choosing tablets
    during sample analyze.
---
 .../main/java/org/apache/doris/catalog/Tablet.java | 17 ++++++++++
 .../org/apache/doris/catalog/TabletStatMgr.java    | 11 +++++--
 .../apache/doris/statistics/OlapAnalysisTask.java  |  5 +--
 .../java/org/apache/doris/catalog/TabletTest.java  | 36 ++++++++++++++++++++++
 4 files changed, 65 insertions(+), 4 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
index 9fb44f07126..2dfdab1ad3d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
@@ -528,6 +528,23 @@ public class Tablet extends MetaObject {
         return singleReplica ? 
Double.valueOf(s.average().orElse(0)).longValue() : s.sum();
     }
 
+    // Get the least row count among all valid replicas.
+    // The replica with the least row count is the most accurate one. Because 
it performs most compaction.
+    public long getMinReplicaRowCount(long version) {
+        long minRowCount = Long.MAX_VALUE;
+        long maxReplicaVersion = 0;
+        for (Replica r : replicas) {
+            if (r.isAlive()
+                    && r.checkVersionCatchUp(version, false)
+                    && (r.getVersion() > maxReplicaVersion
+                        || r.getVersion() == maxReplicaVersion && 
r.getRowCount() < minRowCount)) {
+                minRowCount = r.getRowCount();
+                maxReplicaVersion = r.getVersion();
+            }
+        }
+        return minRowCount == Long.MAX_VALUE ? 0 : minRowCount;
+    }
+
     /**
      * A replica is healthy only if
      * 1. the backend is available
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java
index 18e1bfd6526..aa46c362e38 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java
@@ -134,15 +134,18 @@ public class TabletStatMgr extends MasterDaemon {
                                 Long tabletDataSize = 0L;
                                 Long tabletRemoteDataSize = 0L;
 
-                                Long tabletRowCount = 0L;
+                                Long tabletRowCount = Long.MAX_VALUE;
 
                                 boolean tabletReported = false;
                                 for (Replica replica : tablet.getReplicas()) {
                                     LOG.debug("Table {} replica {} current 
version {}, report version {}",
                                             olapTable.getName(), 
replica.getId(),
                                             replica.getVersion(), 
replica.getLastReportVersion());
+                                    // Replica with less row count is more 
accurate than the others
+                                    // when replicas' version are identical. 
Because less row count
+                                    // means this replica does more compaction 
than the others.
                                     if (replica.checkVersionCatchUp(version, 
false)
-                                            && replica.getRowCount() >= 
tabletRowCount) {
+                                            && replica.getRowCount() < 
tabletRowCount) {
                                         // 1. If replica version and reported 
replica version are all equal to
                                         // PARTITION_INIT_VERSION, set 
tabletReported to true, which indicates this
                                         // tablet is empty for sure when 
previous report.
@@ -173,6 +176,10 @@ public class TabletStatMgr extends MasterDaemon {
                                 tableDataSize += tabletDataSize;
                                 tableRemoteDataSize += tabletRemoteDataSize;
 
+                                // When all BEs are down, avoid set 
Long.MAX_VALUE to index and table row count. Use 0.
+                                if (tabletRowCount == Long.MAX_VALUE) {
+                                    tabletRowCount = 0L;
+                                }
                                 tableRowCount += tabletRowCount;
                                 indexRowCount += tabletRowCount;
                                 // Only when all tablets of this index are 
reported, we set indexReported to true.
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index 99a29c601db..fe673ddd1ba 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -177,7 +177,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
         Map<String, String> params = buildSqlParams();
         StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
         String sql = stringSubstitutor.replace(BASIC_STATS_TEMPLATE);
-        ResultRow resultRow = null;
+        ResultRow resultRow;
         try (AutoCloseConnectContext r = 
StatisticsUtil.buildConnectContext(false)) {
             stmtExecutor = new StmtExecutor(r.connectContext, sql);
             resultRow = stmtExecutor.executeInternalQuery().get(0);
@@ -321,7 +321,8 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
                 int seekTid = (int) ((i + seek) % ids.size());
                 long tabletId = ids.get(seekTid);
                 sampleTabletIds.add(tabletId);
-                actualSampledRowCount += 
materializedIndex.getTablet(tabletId).getRowCount(true);
+                actualSampledRowCount += materializedIndex.getTablet(tabletId)
+                        .getMinReplicaRowCount(p.getVisibleVersion());
                 if (actualSampledRowCount >= sampleRows && 
!forPartitionColumn) {
                     enough = true;
                     break;
diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/TabletTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/catalog/TabletTest.java
index 413851800e3..14902f6fd94 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/catalog/TabletTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/TabletTest.java
@@ -215,4 +215,40 @@ public class TabletTest {
                 Pair.of(1L, false), Pair.of(2L, false), Pair.of(3L, false), 
Pair.of(4L, true)
         );
     }
+
+    @Test
+    public void testGetMinReplicaRowCount() {
+        Tablet t = new Tablet(1);
+        long row = t.getMinReplicaRowCount(1);
+        Assert.assertEquals(0, row);
+
+        Replica r1 = new Replica(1, 1, 10, 0, 0, 0, 100, ReplicaState.NORMAL, 
0, 10);
+        t.addReplica(r1);
+        row = t.getMinReplicaRowCount(10);
+        Assert.assertEquals(100, row);
+
+        row = t.getMinReplicaRowCount(11);
+        Assert.assertEquals(0, row);
+
+        Replica r2 = new Replica(2, 2, 10, 0, 0, 0, 110, ReplicaState.NORMAL, 
0, 10);
+        Replica r3 = new Replica(3, 3, 10, 0, 0, 0, 90, ReplicaState.NORMAL, 
0, 10);
+        t.addReplica(r2);
+        t.addReplica(r3);
+        row = t.getMinReplicaRowCount(11);
+        Assert.assertEquals(0, row);
+        row = t.getMinReplicaRowCount(9);
+        Assert.assertEquals(90, row);
+
+        r3.setBad(true);
+        row = t.getMinReplicaRowCount(9);
+        Assert.assertEquals(100, row);
+
+        r3.setBad(false);
+        row = t.getMinReplicaRowCount(9);
+        Assert.assertEquals(90, row);
+
+        r2.updateVersion(11);
+        row = t.getMinReplicaRowCount(9);
+        Assert.assertEquals(110, row);
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to