This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new f4c5ce260b4 [fix](statistics)Fix rowCount==0 while analyzing bug 
(#28969)
f4c5ce260b4 is described below

commit f4c5ce260b4ac4b3974bcd8ef7dcd059ecfdd78b
Author: Jibing-Li <64681310+jibing...@users.noreply.github.com>
AuthorDate: Wed Dec 27 23:04:37 2023 +0800

    [fix](statistics)Fix rowCount==0 while analyzing bug (#28969)
    
    Sample analyzing need to get row count by using table.getRowCount(). This 
method is not updated in real time, which may cause the sample task to scan 
whole table.
    This pr is to fix this. Set the flag that indicate the analyze job is for 
an empty table and skip scan the table. Meanwhile, don't reset updatedRows in 
this case.
    
    Set hugeTableAutoAnalyzeIntervalInMillis = 0 because all default huge table 
size has been set to 0.
---
 docs/en/docs/query-acceleration/statistics.md                  |  4 ++--
 docs/zh-CN/docs/query-acceleration/statistics.md               |  4 ++--
 .../src/main/java/org/apache/doris/qe/SessionVariable.java     |  2 +-
 .../main/java/org/apache/doris/statistics/AnalysisInfo.java    |  7 ++++++-
 .../java/org/apache/doris/statistics/AnalysisInfoBuilder.java  | 10 ++++++++--
 .../main/java/org/apache/doris/statistics/AnalysisManager.java |  1 +
 .../java/org/apache/doris/statistics/OlapAnalysisTask.java     |  3 ++-
 .../java/org/apache/doris/statistics/StatisticConstants.java   |  2 +-
 .../org/apache/doris/statistics/StatisticsAutoCollector.java   |  1 +
 .../main/java/org/apache/doris/statistics/TableStatsMeta.java  |  2 +-
 .../apache/doris/statistics/StatisticsAutoCollectorTest.java   |  2 +-
 regression-test/suites/statistics/analyze_stats.groovy         |  2 +-
 12 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/docs/en/docs/query-acceleration/statistics.md 
b/docs/en/docs/query-acceleration/statistics.md
index c7a58277580..4cb0891172d 100644
--- a/docs/en/docs/query-acceleration/statistics.md
+++ b/docs/en/docs/query-acceleration/statistics.md
@@ -295,8 +295,8 @@ mysql> KILL ANALYZE 52357;
 |auto_analyze_end_time|End time for automatic statistics collection|23:59:59|
 |enable_auto_analyze|Enable automatic collection functionality|true|
 |huge_table_default_sample_rows|Sampling rows for large tables|4194304|
-|huge_table_lower_bound_size_in_bytes|Tables with size greater than this value 
will be automatically sampled during collection of statistics|5368709120|
-|huge_table_auto_analyze_interval_in_millis|Controls the minimum time interval 
for automatic ANALYZE on large tables. Tables with sizes greater than 
`huge_table_lower_bound_size_in_bytes * 5` will be ANALYZEed only once within 
this time interval.|43200000|
+|huge_table_lower_bound_size_in_bytes|Tables with size greater than this value 
will be automatically sampled during collection of statistics|0|
+|huge_table_auto_analyze_interval_in_millis|Controls the minimum time interval 
for automatic ANALYZE on large tables. Tables with sizes greater than 
`huge_table_lower_bound_size_in_bytes * 5` will be ANALYZEed only once within 
this time interval.|0|
 |table_stats_health_threshold|Ranges from 0 to 100. If data updates since the 
last statistics collection exceed `(100 - table_stats_health_threshold)%`, the 
table's statistics are considered outdated.|60|
 |analyze_timeout|Controls the timeout for synchronous ANALYZE in seconds|43200|
 |auto_analyze_table_width_threshold|Controls the maximum width of table that 
will be auto analyzed. Table with more columns than this value will not be auto 
analyzed.|70|
diff --git a/docs/zh-CN/docs/query-acceleration/statistics.md 
b/docs/zh-CN/docs/query-acceleration/statistics.md
index 20b535e357b..bff100fa98a 100644
--- a/docs/zh-CN/docs/query-acceleration/statistics.md
+++ b/docs/zh-CN/docs/query-acceleration/statistics.md
@@ -299,8 +299,8 @@ mysql> KILL ANALYZE 52357;
 |auto_analyze_end_time|自动统计信息收集结束时间|23:59:59|
 |enable_auto_analyze|开启自动收集功能|true|
 |huge_table_default_sample_rows|对大表的采样行数|4194304|
-|huge_table_lower_bound_size_in_bytes|大小超过该值的的表,在自动收集时将会自动通过采样收集统计信息|5368709120|
-|huge_table_auto_analyze_interval_in_millis|控制对大表的自动ANALYZE的最小时间间隔,在该时间间隔内大小超过huge_table_lower_bound_size_in_bytes
 * 5的表仅ANALYZE一次|43200000|
+|huge_table_lower_bound_size_in_bytes|大小超过该值的的表,在自动收集时将会自动通过采样收集统计信息|0|
+|huge_table_auto_analyze_interval_in_millis|控制对大表的自动ANALYZE的最小时间间隔,在该时间间隔内大小超过huge_table_lower_bound_size_in_bytes
 * 5的表仅ANALYZE一次|0|
 |table_stats_health_threshold|取值在0-100之间,当自上次统计信息收集操作之后,数据更新量达到 (100 - 
table_stats_health_threshold)% ,认为该表的统计信息已过时|60|
 |analyze_timeout|控制ANALYZE超时时间,单位为秒|43200|
 |auto_analyze_table_width_threshold|控制自动统计信息收集处理的最大表宽度,列数大于该值的表不会参与自动统计信息收集|70|
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 35545059901..c1ea2f29ff2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -1450,7 +1450,7 @@ public class SessionVariable implements Serializable, 
Writable {
                     "This controls the minimum time interval for automatic 
ANALYZE on large tables."
                             + "Within this interval,"
                             + "tables larger than 
huge_table_lower_bound_size_in_bytes are analyzed only once."})
-    public long hugeTableAutoAnalyzeIntervalInMillis = 
TimeUnit.HOURS.toMillis(12);
+    public long hugeTableAutoAnalyzeIntervalInMillis = 
TimeUnit.HOURS.toMillis(0);
 
     @VariableMgr.VarAttr(name = 
EXTERNAL_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS, flag = VariableMgr.GLOBAL,
             description = {"控制对外表的自动ANALYZE的最小时间间隔,在该时间间隔内的外表仅ANALYZE一次",
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
index 65bb4a5dd95..aaff9e59927 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
@@ -188,6 +188,9 @@ public class AnalysisInfo implements Writable {
 
     @SerializedName("endTime")
     public long endTime;
+
+    @SerializedName("emptyJob")
+    public final boolean emptyJob;
     /**
      *
      * Used to store the newest partition version of tbl when creating this 
job.
@@ -202,7 +205,7 @@ public class AnalysisInfo implements Writable {
             long lastExecTimeInMs, long timeCostInMs, AnalysisState state, 
ScheduleType scheduleType,
             boolean isExternalTableLevelTask, boolean partitionOnly, boolean 
samplingPartition,
             boolean isAllPartition, long partitionCount, CronExpression 
cronExpression, boolean forceFull,
-            boolean usingSqlForPartitionColumn, long tblUpdateTime) {
+            boolean usingSqlForPartitionColumn, long tblUpdateTime, boolean 
emptyJob) {
         this.jobId = jobId;
         this.taskId = taskId;
         this.taskIds = taskIds;
@@ -238,6 +241,7 @@ public class AnalysisInfo implements Writable {
         this.forceFull = forceFull;
         this.usingSqlForPartitionColumn = usingSqlForPartitionColumn;
         this.tblUpdateTime = tblUpdateTime;
+        this.emptyJob = emptyJob;
     }
 
     @Override
@@ -279,6 +283,7 @@ public class AnalysisInfo implements Writable {
         }
         sj.add("forceFull: " + forceFull);
         sj.add("usingSqlForPartitionColumn: " + usingSqlForPartitionColumn);
+        sj.add("emptyJob: " + emptyJob);
         return sj.toString();
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
index 204aba6d0f8..310b7816ecd 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
@@ -61,8 +61,8 @@ public class AnalysisInfoBuilder {
     private CronExpression cronExpression;
     private boolean forceFull;
     private boolean usingSqlForPartitionColumn;
-
     private long tblUpdateTime;
+    private boolean emptyJob;
 
     public AnalysisInfoBuilder() {
     }
@@ -100,6 +100,7 @@ public class AnalysisInfoBuilder {
         forceFull = info.forceFull;
         usingSqlForPartitionColumn = info.usingSqlForPartitionColumn;
         tblUpdateTime = info.tblUpdateTime;
+        emptyJob = info.emptyJob;
     }
 
     public AnalysisInfoBuilder setJobId(long jobId) {
@@ -262,12 +263,17 @@ public class AnalysisInfoBuilder {
         return this;
     }
 
+    public AnalysisInfoBuilder setEmptyJob(boolean emptyJob) {
+        this.emptyJob = emptyJob;
+        return this;
+    }
+
     public AnalysisInfo build() {
         return new AnalysisInfo(jobId, taskId, taskIds, catalogId, dbId, 
tblId, colToPartitions, partitionNames,
                 colName, indexId, jobType, analysisMode, analysisMethod, 
analysisType, samplePercent,
                 sampleRows, maxBucketNum, periodTimeInMs, message, 
lastExecTimeInMs, timeCostInMs, state, scheduleType,
                 externalTableLevelTask, partitionOnly, samplingPartition, 
isAllPartition, partitionCount,
-                cronExpression, forceFull, usingSqlForPartitionColumn, 
tblUpdateTime);
+                cronExpression, forceFull, usingSqlForPartitionColumn, 
tblUpdateTime, emptyJob);
     }
 
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index 0bf24e0c288..39ae191d45a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -519,6 +519,7 @@ public class AnalysisManager implements Writable {
         infoBuilder.setColToPartitions(colToPartitions);
         infoBuilder.setTaskIds(Lists.newArrayList());
         infoBuilder.setTblUpdateTime(table.getUpdateTime());
+        infoBuilder.setEmptyJob(table instanceof OlapTable && 
table.getRowCount() == 0);
         return infoBuilder.build();
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index e062e4eef85..81348c1f948 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -61,7 +61,8 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
 
     public void doExecute() throws Exception {
         Set<String> partitionNames = info.colToPartitions.get(info.colName);
-        if (partitionNames == null || partitionNames.isEmpty()) {
+        if ((info.emptyJob && 
info.analysisMethod.equals(AnalysisInfo.AnalysisMethod.SAMPLE))
+                || partitionNames == null || partitionNames.isEmpty()) {
             if (partitionNames == null) {
                 LOG.warn("Table {}.{}.{}, partitionNames for column {} is 
null. ColToPartitions:[{}]",
                         info.catalogId, info.dbId, info.tblId, info.colName, 
info.colToPartitions);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
index 3d6d2fe52aa..857a50e234c 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
@@ -88,7 +88,7 @@ public class StatisticConstants {
     public static final long HUGE_TABLE_DEFAULT_SAMPLE_ROWS = 4194304;
     public static final long HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = 0;
 
-    public static final long HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS = 
TimeUnit.HOURS.toMillis(12);
+    public static final long HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS = 
TimeUnit.HOURS.toMillis(0);
 
     public static final long EXTERNAL_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS = 
TimeUnit.HOURS.toMillis(24);
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
index ee50471175d..f799da56206 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
@@ -170,6 +170,7 @@ public class StatisticsAutoCollector extends 
StatisticsCollector {
                 .setLastExecTimeInMs(System.currentTimeMillis())
                 .setJobType(JobType.SYSTEM)
                 .setTblUpdateTime(table.getUpdateTime())
+                .setEmptyJob(table instanceof OlapTable && table.getRowCount() 
== 0)
                 .build();
         analysisInfos.add(jobInfo);
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
index f500ab09f0b..eb6672ffe18 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
@@ -149,7 +149,7 @@ public class TableStatsMeta implements Writable {
             if (tableIf instanceof OlapTable) {
                 rowCount = tableIf.getRowCount();
             }
-            if (analyzedJob.colToPartitions.keySet()
+            if (!analyzedJob.emptyJob && analyzedJob.colToPartitions.keySet()
                     .containsAll(tableIf.getBaseSchema().stream()
                             .filter(c -> 
!StatisticsUtil.isUnsupportedType(c.getType()))
                             
.map(Column::getName).collect(Collectors.toSet()))) {
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
index 0b4b2203d0d..87342202fb2 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
@@ -299,7 +299,7 @@ public class StatisticsAutoCollectorTest {
         // A very huge table has been updated recently, so we should skip it 
this time
         stats.updatedTime = System.currentTimeMillis() - 1000;
         StatisticsAutoCollector autoCollector = new StatisticsAutoCollector();
-        Assertions.assertTrue(autoCollector.skip(olapTable));
+        Assertions.assertFalse(autoCollector.skip(olapTable));
         // The update of this huge table is long time ago, so we shouldn't 
skip it this time
         stats.updatedTime = System.currentTimeMillis()
                 - StatisticsUtil.getHugeTableAutoAnalyzeIntervalInMillis() - 
10000;
diff --git a/regression-test/suites/statistics/analyze_stats.groovy 
b/regression-test/suites/statistics/analyze_stats.groovy
index e7e89f858fb..64967280ce9 100644
--- a/regression-test/suites/statistics/analyze_stats.groovy
+++ b/regression-test/suites/statistics/analyze_stats.groovy
@@ -1168,7 +1168,7 @@ PARTITION `p599` VALUES IN (599)
     sql """ INSERT INTO test_updated_rows SELECT * FROM test_updated_rows """
     sql """ANALYZE TABLE test_updated_rows WITH SYNC"""
     def cnt2 = sql """ SHOW TABLE STATS test_updated_rows """
-    assertEquals(Integer.valueOf(cnt2[0][0]), 0)
+    assertTrue(Integer.valueOf(cnt2[0][0]) == 0 || Integer.valueOf(cnt2[0][0]) 
== 8)
 
     // test analyze specific column
     sql """CREATE TABLE test_analyze_specific_column (col1 varchar(11451) not 
null, col2 int not null, col3 int not null)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to