This is an automated email from the ASF dual-hosted git repository.

lijibing pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 2b82bec4066 [fix](statistics)Add row count to AnalysisJob and update 
tableStats using this row count after analyze. (#38428)
2b82bec4066 is described below

commit 2b82bec4066d5950911a2237939b4acb6fd2f593
Author: Jibing-Li <64681310+jibing...@users.noreply.github.com>
AuthorDate: Sat Jul 27 11:32:29 2024 +0800

    [fix](statistics)Add row count to AnalysisJob and update tableStats using 
this row count after analyze. (#38428)
    
    Add row count to AnalysisJob and update tableStats using this row count
    after analyze.
    Take row count to consideration in stats health calculation.
---
 .../java/org/apache/doris/catalog/OlapTable.java   | 27 ++++++++-
 .../org/apache/doris/statistics/AnalysisInfo.java  |  7 ++-
 .../doris/statistics/AnalysisInfoBuilder.java      |  9 ++-
 .../apache/doris/statistics/AnalysisManager.java   |  2 +
 .../apache/doris/statistics/OlapAnalysisTask.java  | 12 +++-
 .../doris/statistics/StatisticsAutoCollector.java  |  2 +
 .../apache/doris/statistics/TableStatsMeta.java    |  2 +-
 .../doris/statistics/AnalysisManagerTest.java      | 70 ++++++++++------------
 .../doris/statistics/TableStatsMetaTest.java       | 10 +---
 .../suites/statistics/analyze_stats.groovy         | 28 +++++++++
 10 files changed, 113 insertions(+), 56 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
index 9a66cbc68ae..b0169571dfc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
@@ -1300,12 +1300,33 @@ public class OlapTable extends Table implements 
MTMVRelatedTableIf {
                 .collect(Collectors.toSet())))) {
             return true;
         }
-        long rowCount = getRowCount();
-        if (rowCount > 0 && tblStats.rowCount == 0) {
+
+        // 1 Check row count.
+        long currentRowCount = getRowCount();
+        long lastAnalyzeRowCount = tblStats.rowCount;
+        // 1.1 Empty table -> non-empty table. Need analyze.
+        if (currentRowCount != 0 && lastAnalyzeRowCount == 0) {
+            return true;
+        }
+        // 1.2 Non-empty table -> empty table. Need analyze;
+        if (currentRowCount == 0 && lastAnalyzeRowCount != 0) {
+            return true;
+        }
+        // 1.3 Table is still empty. Not need to analyze. lastAnalyzeRowCount 
== 0 is always true here.
+        if (currentRowCount == 0) {
+            return false;
+        }
+        // 1.4 If row count changed more than the threshold, need analyze.
+        // lastAnalyzeRowCount == 0 is always false here.
+        double changeRate =
+                ((double) Math.abs(currentRowCount - lastAnalyzeRowCount) / 
lastAnalyzeRowCount) * 100.0;
+        if (changeRate > (100 - 
StatisticsUtil.getTableStatsHealthThreshold())) {
             return true;
         }
+
+        // 2. Check update rows.
         long updateRows = tblStats.updatedRows.get();
-        int tblHealth = StatisticsUtil.getTableHealth(rowCount, updateRows);
+        int tblHealth = StatisticsUtil.getTableHealth(currentRowCount, 
updateRows);
         return tblHealth < StatisticsUtil.getTableStatsHealthThreshold();
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
index c167db2228d..ab7f9935c72 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
@@ -190,6 +190,9 @@ public class AnalysisInfo implements Writable {
 
     @SerializedName("emptyJob")
     public final boolean emptyJob;
+
+    @SerializedName("rowCount")
+    public final long rowCount;
     /**
      *
      * Used to store the newest partition version of tbl when creating this 
job.
@@ -206,7 +209,8 @@ public class AnalysisInfo implements Writable {
             long lastExecTimeInMs, long timeCostInMs, AnalysisState state, 
ScheduleType scheduleType,
             boolean isExternalTableLevelTask, boolean partitionOnly, boolean 
samplingPartition,
             boolean isAllPartition, long partitionCount, CronExpression 
cronExpression, boolean forceFull,
-            boolean usingSqlForPartitionColumn, long tblUpdateTime, boolean 
emptyJob, boolean userInject) {
+            boolean usingSqlForPartitionColumn, long tblUpdateTime, boolean 
emptyJob, boolean userInject,
+            long rowCount) {
         this.jobId = jobId;
         this.taskId = taskId;
         this.taskIds = taskIds;
@@ -244,6 +248,7 @@ public class AnalysisInfo implements Writable {
         this.tblUpdateTime = tblUpdateTime;
         this.emptyJob = emptyJob;
         this.userInject = userInject;
+        this.rowCount = rowCount;
     }
 
     @Override
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
index 00cf9f7b1bc..6541027538a 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
@@ -64,6 +64,7 @@ public class AnalysisInfoBuilder {
     private long tblUpdateTime;
     private boolean emptyJob;
     private boolean userInject;
+    private long rowCount;
 
     public AnalysisInfoBuilder() {
     }
@@ -103,6 +104,7 @@ public class AnalysisInfoBuilder {
         tblUpdateTime = info.tblUpdateTime;
         emptyJob = info.emptyJob;
         userInject = info.userInject;
+        rowCount = info.rowCount;
     }
 
     public AnalysisInfoBuilder setJobId(long jobId) {
@@ -275,12 +277,17 @@ public class AnalysisInfoBuilder {
         return this;
     }
 
+    public AnalysisInfoBuilder setRowCount(long rowCount) {
+        this.rowCount = rowCount;
+        return this;
+    }
+
     public AnalysisInfo build() {
         return new AnalysisInfo(jobId, taskId, taskIds, catalogId, dbId, 
tblId, jobColumns, partitionNames,
                 colName, indexId, jobType, analysisMode, analysisMethod, 
analysisType, samplePercent,
                 sampleRows, maxBucketNum, periodTimeInMs, message, 
lastExecTimeInMs, timeCostInMs, state, scheduleType,
                 externalTableLevelTask, partitionOnly, samplingPartition, 
isAllPartition, partitionCount,
-                cronExpression, forceFull, usingSqlForPartitionColumn, 
tblUpdateTime, emptyJob, userInject);
+                cronExpression, forceFull, usingSqlForPartitionColumn, 
tblUpdateTime, emptyJob, userInject, rowCount);
     }
 
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index 06f6ca331b3..71f5ce0fa87 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -354,6 +354,8 @@ public class AnalysisManager implements Writable {
         infoBuilder.setTblUpdateTime(table.getUpdateTime());
         infoBuilder.setEmptyJob(table instanceof OlapTable && 
table.getRowCount() == 0
                 && analysisMethod.equals(AnalysisMethod.SAMPLE));
+        long rowCount = StatisticsUtil.isEmptyTable(table, analysisMethod) ? 0 
: table.getRowCount();
+        infoBuilder.setRowCount(rowCount);
         return infoBuilder.build();
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index dffba735fe9..c9b5edaff22 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -19,6 +19,7 @@ package org.apache.doris.statistics;
 
 import org.apache.doris.analysis.CreateMaterializedViewStmt;
 import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.KeysType;
 import org.apache.doris.catalog.MaterializedIndex;
 import org.apache.doris.catalog.MaterializedIndexMeta;
@@ -37,7 +38,6 @@ import org.apache.commons.text.StringSubstitutor;
 
 import java.security.SecureRandom;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
@@ -69,10 +69,16 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
             return;
         }
         List<Pair<String, String>> columnList = info.jobColumns;
-        if (StatisticsUtil.isEmptyTable(tbl, info.analysisMethod) || 
columnList == null || columnList.isEmpty()) {
+        if (columnList == null || columnList.isEmpty()) {
+            LOG.warn("Table {}.{}.{}, jobColumns is null or empty.", 
info.catalogId, info.dbId, info.tblId);
+            throw new RuntimeException();
+        }
+        if (StatisticsUtil.isEmptyTable(tbl, info.analysisMethod)) {
             StatsId statsId = new StatsId(concatColumnStatsId(), 
info.catalogId, info.dbId,
                     info.tblId, info.indexId, info.colName, null);
-            job.appendBuf(this, Arrays.asList(new ColStatsData(statsId)));
+            ColStatsData colStatsData = new ColStatsData(statsId);
+            
Env.getCurrentEnv().getStatisticsCache().syncColStats(colStatsData);
+            job.appendBuf(this, Collections.singletonList(colStatsData));
             return;
         }
         if (tableSample != null) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
index 9ca971845b7..4408a0d9255 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
@@ -181,6 +181,7 @@ public class StatisticsAutoCollector extends 
StatisticsCollector {
             List<AnalysisInfo> analysisInfos, TableIf table) {
         AnalysisMethod analysisMethod = table.getDataSize(true) >= 
StatisticsUtil.getHugeTableLowerBoundSizeInBytes()
                 ? AnalysisMethod.SAMPLE : AnalysisMethod.FULL;
+        long rowCount = StatisticsUtil.isEmptyTable(table, analysisMethod) ? 0 
: table.getRowCount();
         AnalysisInfo jobInfo = new AnalysisInfoBuilder()
                 .setJobId(Env.getCurrentEnv().getNextId())
                 .setCatalogId(db.getCatalog().getId())
@@ -200,6 +201,7 @@ public class StatisticsAutoCollector extends 
StatisticsCollector {
                 .setTblUpdateTime(table.getUpdateTime())
                 .setEmptyJob(table instanceof OlapTable && table.getRowCount() 
== 0
                     && analysisMethod.equals(AnalysisMethod.SAMPLE))
+                .setRowCount(rowCount)
                 .build();
         analysisInfos.add(jobInfo);
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
index 3b9b1e2bead..a9a580c8b40 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
@@ -141,7 +141,7 @@ public class TableStatsMeta implements Writable {
         jobType = analyzedJob.jobType;
         if (tableIf != null) {
             if (tableIf instanceof OlapTable) {
-                rowCount = analyzedJob.emptyJob ? 0 : tableIf.getRowCount();
+                rowCount = analyzedJob.rowCount;
             }
             if (analyzedJob.emptyJob) {
                 return;
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java
index 674456b0b46..8a803bd2a30 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java
@@ -34,12 +34,12 @@ import 
org.apache.doris.statistics.AnalysisInfo.ScheduleType;
 import org.apache.doris.statistics.util.StatisticsUtil;
 
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.collect.Lists;
 import mockit.Expectations;
 import mockit.Injectable;
 import mockit.Mock;
 import mockit.MockUp;
 import mockit.Mocked;
+import org.apache.hadoop.util.Lists;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
 
@@ -48,7 +48,6 @@ import java.util.Collection;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 
 // CHECKSTYLE OFF
 public class AnalysisManagerTest {
@@ -265,63 +264,58 @@ public class AnalysisManagerTest {
     public void testReAnalyze() {
         new MockUp<OlapTable>() {
 
+            int count = 0;
+            int[] rowCount = new int[]{100, 200, 1, 0, 0, 100};
+
             final Column c = new Column("col1", PrimitiveType.INT);
             @Mock
-            public List<Column> getBaseSchema() {
-                return Lists.newArrayList(c);
+            public long getRowCount() {
+                return rowCount[count++];
             }
 
             @Mock
-            public List<Column> getColumns() { return Lists.newArrayList(c); }
-
-            @Mock
-            public List<Pair<String, String>> getColumnIndexPairs(Set<String> 
columns) {
-                List<Pair<String, String>> jobList = Lists.newArrayList();
-                jobList.add(Pair.of("1", "1"));
-                jobList.add(Pair.of("2", "2"));
-                jobList.add(Pair.of("3", "3"));
-                return jobList;
+            public List<Column> getBaseSchema() {
+                return org.apache.hadoop.util.Lists.newArrayList(c);
             }
-        };
-        OlapTable olapTable = new OlapTable();
-        List<Pair<String, String>> jobList = Lists.newArrayList();
-        jobList.add(Pair.of("1", "1"));
-        jobList.add(Pair.of("2", "2"));
-        TableStatsMeta stats0 = new TableStatsMeta(
-            0, new AnalysisInfoBuilder().setJobColumns(jobList)
-            .setColName("col1").build(), olapTable);
-        Assertions.assertTrue(olapTable.needReAnalyzeTable(stats0));
 
-        new MockUp<OlapTable>() {
-            int count = 0;
-            int[] rowCount = new int[]{100, 100, 200, 200, 1, 1};
-
-            @Mock
-            public long getRowCount() {
-                return rowCount[count++];
-            }
             @Mock
-            public List<Pair<String, String>> getColumnIndexPairs(Set<String> 
columns) {
-                List<Pair<String, String>> jobList = Lists.newArrayList();
-                return jobList;
+            public List<Column> getColumns() {
+                return Lists.newArrayList(c);
             }
+
         };
+        OlapTable olapTable = new OlapTable();
         TableStatsMeta stats1 = new TableStatsMeta(
                 50, new AnalysisInfoBuilder().setJobColumns(new ArrayList<>())
-                .setColName("col1").build(), olapTable);
-        stats1.updatedRows.addAndGet(50);
+                .setColName("col1").setRowCount(100).build(), olapTable);
+        stats1.updatedRows.addAndGet(70);
 
         Assertions.assertTrue(olapTable.needReAnalyzeTable(stats1));
         TableStatsMeta stats2 = new TableStatsMeta(
-                190, new AnalysisInfoBuilder()
-                .setJobColumns(new ArrayList<>()).setColName("col1").build(), 
olapTable);
+                190, new AnalysisInfoBuilder().setJobColumns(new ArrayList<>())
+                .setColName("col1").setRowCount(200).build(), olapTable);
         stats2.updatedRows.addAndGet(20);
         Assertions.assertFalse(olapTable.needReAnalyzeTable(stats2));
 
         TableStatsMeta stats3 = new TableStatsMeta(0, new AnalysisInfoBuilder()
-                .setJobColumns(new 
ArrayList<>()).setEmptyJob(true).setColName("col1").build(), olapTable);
+                .setEmptyJob(true).setColName("col1").setJobColumns(new 
ArrayList<>())
+                .setRowCount(0).build(), olapTable);
         Assertions.assertTrue(olapTable.needReAnalyzeTable(stats3));
 
+        TableStatsMeta stats4 = new TableStatsMeta(0, new AnalysisInfoBuilder()
+                .setEmptyJob(true).setColName("col1").setJobColumns(new 
ArrayList<>())
+                .setRowCount(1).build(), olapTable);
+        Assertions.assertTrue(olapTable.needReAnalyzeTable(stats4));
+
+        TableStatsMeta stats5 = new TableStatsMeta(0, new AnalysisInfoBuilder()
+                .setEmptyJob(true).setColName("col1").setJobColumns(new 
ArrayList<>())
+                .setRowCount(0).build(), olapTable);
+        Assertions.assertFalse(olapTable.needReAnalyzeTable(stats5));
+
+        TableStatsMeta stats6 = new TableStatsMeta(0, new AnalysisInfoBuilder()
+                .setEmptyJob(true).setColName("col1").setJobColumns(new 
ArrayList<>())
+                .setRowCount(30).build(), olapTable);
+        Assertions.assertTrue(olapTable.needReAnalyzeTable(stats6));
     }
 
     @Test
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java
index 94eab9e00cc..349f415a3c4 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java
@@ -19,8 +19,6 @@ package org.apache.doris.statistics;
 
 import org.apache.doris.catalog.OlapTable;
 
-import mockit.Mock;
-import mockit.MockUp;
 import mockit.Mocked;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
@@ -31,15 +29,9 @@ class TableStatsMetaTest {
 
     @Test
     void update(@Mocked OlapTable table) {
-        new MockUp<OlapTable>() {
-            @Mock
-            public long getRowCount() {
-                return 4;
-            }
-        };
         TableStatsMeta tableStatsMeta = new TableStatsMeta();
         AnalysisInfo jobInfo = new AnalysisInfoBuilder().setJobColumns(new 
ArrayList<>())
-                .setColName("col1").build();
+                .setColName("col1").setRowCount(4).build();
         tableStatsMeta.update(jobInfo, table);
         Assertions.assertEquals(4, tableStatsMeta.rowCount);
     }
diff --git a/regression-test/suites/statistics/analyze_stats.groovy 
b/regression-test/suites/statistics/analyze_stats.groovy
index db56b400ea9..5a6e753e0a3 100644
--- a/regression-test/suites/statistics/analyze_stats.groovy
+++ b/regression-test/suites/statistics/analyze_stats.groovy
@@ -2845,6 +2845,34 @@ PARTITION `p599` VALUES IN (599)
     assertEquals("521779.0", alter_result[0][5])
     assertEquals("7.142863009760572", alter_result[0][6])
 
+    // Test analyze after new empty partition created.
+    sql """CREATE TABLE `part` (
+          `id` INT NULL,
+          `colint` INT NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        COMMENT 'OLAP'
+        PARTITION BY RANGE(`id`)
+        (PARTITION p1 VALUES [("-2147483648"), ("10000")),
+        PARTITION p2 VALUES [("10000"), ("20000")))
+        DISTRIBUTED BY HASH(`id`) BUCKETS 3 
+        PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+
+    sql """analyze table part with sync;"""
+    sql """Insert into part values (1, 1), (10001, 10001);"""
+    sql """analyze table part with sync;"""
+    sleep(1000)
+    sql """alter table part add partition p3 VALUES [("20000"), ("30000"));"""
+    sql """analyze table part with sync;"""
+    sql """analyze table part with sync;"""
+    def new_part_result = sql """show column stats part(id)"""
+    assertEquals("2.0", new_part_result[0][2])
+    new_part_result = sql """show column stats part(colint)"""
+    assertEquals("2.0", new_part_result[0][2])
+
 
     sql """DROP DATABASE IF EXISTS trigger"""
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to