This is an automated email from the ASF dual-hosted git repository. lijibing pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new c9886c5c4c6 [improvement](statistics)Use min row count of all replicas as tablet/table row count. (#41894) c9886c5c4c6 is described below commit c9886c5c4c607cd45c9df2a83e48be9f288b8112 Author: Jibing-Li <64681310+jibing...@users.noreply.github.com> AuthorDate: Wed Oct 16 19:11:34 2024 +0800 [improvement](statistics)Use min row count of all replicas as tablet/table row count. (#41894) Use min row count of all replicas with same version as tablet/table row count. Because replica with the least row count means it perform more compaction operation than the others. Use it as tablet row count is more accurate. Meanwhile, use min row count as tablet row count while choosing tablets during sample analyze. --- .../main/java/org/apache/doris/catalog/Tablet.java | 17 ++++++++++ .../org/apache/doris/catalog/TabletStatMgr.java | 11 +++++-- .../apache/doris/statistics/OlapAnalysisTask.java | 5 +-- .../java/org/apache/doris/catalog/TabletTest.java | 36 ++++++++++++++++++++++ 4 files changed, 65 insertions(+), 4 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java index 9fb44f07126..2dfdab1ad3d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java @@ -528,6 +528,23 @@ public class Tablet extends MetaObject { return singleReplica ? Double.valueOf(s.average().orElse(0)).longValue() : s.sum(); } + // Get the least row count among all valid replicas. + // The replica with the least row count is the most accurate one. Because it performs most compaction. + public long getMinReplicaRowCount(long version) { + long minRowCount = Long.MAX_VALUE; + long maxReplicaVersion = 0; + for (Replica r : replicas) { + if (r.isAlive() + && r.checkVersionCatchUp(version, false) + && (r.getVersion() > maxReplicaVersion + || r.getVersion() == maxReplicaVersion && r.getRowCount() < minRowCount)) { + minRowCount = r.getRowCount(); + maxReplicaVersion = r.getVersion(); + } + } + return minRowCount == Long.MAX_VALUE ? 0 : minRowCount; + } + /** * A replica is healthy only if * 1. the backend is available diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java index 18e1bfd6526..aa46c362e38 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java @@ -134,15 +134,18 @@ public class TabletStatMgr extends MasterDaemon { Long tabletDataSize = 0L; Long tabletRemoteDataSize = 0L; - Long tabletRowCount = 0L; + Long tabletRowCount = Long.MAX_VALUE; boolean tabletReported = false; for (Replica replica : tablet.getReplicas()) { LOG.debug("Table {} replica {} current version {}, report version {}", olapTable.getName(), replica.getId(), replica.getVersion(), replica.getLastReportVersion()); + // Replica with less row count is more accurate than the others + // when replicas' version are identical. Because less row count + // means this replica does more compaction than the others. if (replica.checkVersionCatchUp(version, false) - && replica.getRowCount() >= tabletRowCount) { + && replica.getRowCount() < tabletRowCount) { // 1. If replica version and reported replica version are all equal to // PARTITION_INIT_VERSION, set tabletReported to true, which indicates this // tablet is empty for sure when previous report. @@ -173,6 +176,10 @@ public class TabletStatMgr extends MasterDaemon { tableDataSize += tabletDataSize; tableRemoteDataSize += tabletRemoteDataSize; + // When all BEs are down, avoid set Long.MAX_VALUE to index and table row count. Use 0. + if (tabletRowCount == Long.MAX_VALUE) { + tabletRowCount = 0L; + } tableRowCount += tabletRowCount; indexRowCount += tabletRowCount; // Only when all tablets of this index are reported, we set indexReported to true. diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java index 99a29c601db..fe673ddd1ba 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java @@ -177,7 +177,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask { Map<String, String> params = buildSqlParams(); StringSubstitutor stringSubstitutor = new StringSubstitutor(params); String sql = stringSubstitutor.replace(BASIC_STATS_TEMPLATE); - ResultRow resultRow = null; + ResultRow resultRow; try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext(false)) { stmtExecutor = new StmtExecutor(r.connectContext, sql); resultRow = stmtExecutor.executeInternalQuery().get(0); @@ -321,7 +321,8 @@ public class OlapAnalysisTask extends BaseAnalysisTask { int seekTid = (int) ((i + seek) % ids.size()); long tabletId = ids.get(seekTid); sampleTabletIds.add(tabletId); - actualSampledRowCount += materializedIndex.getTablet(tabletId).getRowCount(true); + actualSampledRowCount += materializedIndex.getTablet(tabletId) + .getMinReplicaRowCount(p.getVisibleVersion()); if (actualSampledRowCount >= sampleRows && !forPartitionColumn) { enough = true; break; diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/TabletTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/TabletTest.java index 413851800e3..14902f6fd94 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/TabletTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/TabletTest.java @@ -215,4 +215,40 @@ public class TabletTest { Pair.of(1L, false), Pair.of(2L, false), Pair.of(3L, false), Pair.of(4L, true) ); } + + @Test + public void testGetMinReplicaRowCount() { + Tablet t = new Tablet(1); + long row = t.getMinReplicaRowCount(1); + Assert.assertEquals(0, row); + + Replica r1 = new Replica(1, 1, 10, 0, 0, 0, 100, ReplicaState.NORMAL, 0, 10); + t.addReplica(r1); + row = t.getMinReplicaRowCount(10); + Assert.assertEquals(100, row); + + row = t.getMinReplicaRowCount(11); + Assert.assertEquals(0, row); + + Replica r2 = new Replica(2, 2, 10, 0, 0, 0, 110, ReplicaState.NORMAL, 0, 10); + Replica r3 = new Replica(3, 3, 10, 0, 0, 0, 90, ReplicaState.NORMAL, 0, 10); + t.addReplica(r2); + t.addReplica(r3); + row = t.getMinReplicaRowCount(11); + Assert.assertEquals(0, row); + row = t.getMinReplicaRowCount(9); + Assert.assertEquals(90, row); + + r3.setBad(true); + row = t.getMinReplicaRowCount(9); + Assert.assertEquals(100, row); + + r3.setBad(false); + row = t.getMinReplicaRowCount(9); + Assert.assertEquals(90, row); + + r2.updateVersion(11); + row = t.getMinReplicaRowCount(9); + Assert.assertEquals(110, row); + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org