This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 75b019e8160 [opt](statistics) create or update table stats after alter
column stats #29254 (#29721)
75b019e8160 is described below
commit 75b019e816055d7579199507801e58ece0168794
Author: Jibing-Li <[email protected]>
AuthorDate: Tue Jan 9 22:13:14 2024 +0800
[opt](statistics) create or update table stats after alter column stats
#29254 (#29721)
Create or update table stats after alter column stats.
Set flag to disable auto analyze for the table after user inject column
stats.
---
.../apache/doris/analysis/ShowTableStatsStmt.java | 2 ++
.../org/apache/doris/statistics/AnalysisInfo.java | 5 +++-
.../doris/statistics/AnalysisInfoBuilder.java | 9 ++++++-
.../apache/doris/statistics/AnalysisManager.java | 12 +++++++++
.../doris/statistics/StatisticsAutoCollector.java | 3 +++
.../doris/statistics/StatisticsRepository.java | 8 ++++++
.../apache/doris/statistics/TableStatsMeta.java | 7 +++++-
.../statistics/StatisticsAutoCollectorTest.java | 9 +++++++
.../suites/statistics/analyze_stats.groovy | 29 ++++++++++++++++++++++
9 files changed, 81 insertions(+), 3 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
index 284b6248b85..95d36867da2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
@@ -55,6 +55,7 @@ public class ShowTableStatsStmt extends ShowStmt {
.add("columns")
.add("trigger")
.add("new_partition")
+ .add("user_inject")
.build();
private final TableName tableName;
@@ -151,6 +152,7 @@ public class ShowTableStatsStmt extends ShowStmt {
row.add(tableStatistic.analyzeColumns().toString());
row.add(tableStatistic.jobType.toString());
row.add(String.valueOf(tableStatistic.newPartitionLoaded.get()));
+ row.add(String.valueOf(tableStatistic.userInjected));
result.add(row);
return new ShowResultSet(getMetaData(), result);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
index aaff9e59927..97788174e69 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
@@ -198,6 +198,8 @@ public class AnalysisInfo implements Writable {
*/
public final long tblUpdateTime;
+ public final boolean userInject;
+
public AnalysisInfo(long jobId, long taskId, List<Long> taskIds, long
catalogId, long dbId, long tblId,
Map<String, Set<String>> colToPartitions, Set<String>
partitionNames, String colName, Long indexId,
JobType jobType, AnalysisMode analysisMode, AnalysisMethod
analysisMethod, AnalysisType analysisType,
@@ -205,7 +207,7 @@ public class AnalysisInfo implements Writable {
long lastExecTimeInMs, long timeCostInMs, AnalysisState state,
ScheduleType scheduleType,
boolean isExternalTableLevelTask, boolean partitionOnly, boolean
samplingPartition,
boolean isAllPartition, long partitionCount, CronExpression
cronExpression, boolean forceFull,
- boolean usingSqlForPartitionColumn, long tblUpdateTime, boolean
emptyJob) {
+ boolean usingSqlForPartitionColumn, long tblUpdateTime, boolean
emptyJob, boolean userInject) {
this.jobId = jobId;
this.taskId = taskId;
this.taskIds = taskIds;
@@ -242,6 +244,7 @@ public class AnalysisInfo implements Writable {
this.usingSqlForPartitionColumn = usingSqlForPartitionColumn;
this.tblUpdateTime = tblUpdateTime;
this.emptyJob = emptyJob;
+ this.userInject = userInject;
}
@Override
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
index 310b7816ecd..22f3d22b3ce 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
@@ -63,6 +63,7 @@ public class AnalysisInfoBuilder {
private boolean usingSqlForPartitionColumn;
private long tblUpdateTime;
private boolean emptyJob;
+ private boolean userInject;
public AnalysisInfoBuilder() {
}
@@ -101,6 +102,7 @@ public class AnalysisInfoBuilder {
usingSqlForPartitionColumn = info.usingSqlForPartitionColumn;
tblUpdateTime = info.tblUpdateTime;
emptyJob = info.emptyJob;
+ userInject = info.userInject;
}
public AnalysisInfoBuilder setJobId(long jobId) {
@@ -268,12 +270,17 @@ public class AnalysisInfoBuilder {
return this;
}
+ public AnalysisInfoBuilder setUserInject(boolean userInject) {
+ this.userInject = userInject;
+ return this;
+ }
+
public AnalysisInfo build() {
return new AnalysisInfo(jobId, taskId, taskIds, catalogId, dbId,
tblId, colToPartitions, partitionNames,
colName, indexId, jobType, analysisMode, analysisMethod,
analysisType, samplePercent,
sampleRows, maxBucketNum, periodTimeInMs, message,
lastExecTimeInMs, timeCostInMs, state, scheduleType,
externalTableLevelTask, partitionOnly, samplingPartition,
isAllPartition, partitionCount,
- cronExpression, forceFull, usingSqlForPartitionColumn,
tblUpdateTime, emptyJob);
+ cronExpression, forceFull, usingSqlForPartitionColumn,
tblUpdateTime, emptyJob, userInject);
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index 8db50e667f6..f56e800e83d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -560,6 +560,17 @@ public class AnalysisManager implements Writable {
}
}
+ @VisibleForTesting
+ public void updateTableStatsForAlterStats(AnalysisInfo jobInfo, TableIf
tbl) {
+ TableStatsMeta tableStats = findTableStatsStatus(tbl.getId());
+ if (tableStats == null) {
+ updateTableStatsStatus(new TableStatsMeta(0, jobInfo, tbl));
+ } else {
+ tableStats.update(jobInfo, tbl);
+ logCreateTableStats(tableStats);
+ }
+ }
+
public List<AnalysisInfo> showAnalysisJob(ShowAnalyzeStmt stmt) {
return findShowAnalyzeResult(analysisJobInfoMap.values(), stmt);
}
@@ -654,6 +665,7 @@ public class AnalysisManager implements Writable {
}
tableStats.updatedTime = 0;
}
+ tableStats.userInjected = false;
logCreateTableStats(tableStats);
StatisticsRepository.dropStatistics(tblId, cols);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
index cc6e7d88db4..f6cf79b0e5c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
@@ -158,6 +158,9 @@ public class StatisticsAutoCollector extends
StatisticsCollector {
if (tableStats == null || tableStats.newPartitionLoaded.get()) {
return false;
}
+ if (tableStats.userInjected) {
+ return true;
+ }
return System.currentTimeMillis()
- tableStats.updatedTime <
StatisticsUtil.getHugeTableAutoAnalyzeIntervalInMillis();
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
index dc4f89c1e4d..6e88e46beb1 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
@@ -303,6 +303,14 @@ public class StatisticsRepository {
StatisticsUtil.execUpdate(INSERT_INTO_COLUMN_STATISTICS, params);
Env.getCurrentEnv().getStatisticsCache()
.updateColStatsCache(objects.table.getId(), -1, colName,
columnStatistic);
+ AnalysisInfo mockedJobInfo = new AnalysisInfoBuilder()
+ .setTblUpdateTime(System.currentTimeMillis())
+ .setColName("")
+ .setColToPartitions(Maps.newHashMap())
+ .setUserInject(true)
+ .setJobType(AnalysisInfo.JobType.MANUAL)
+ .build();
+
Env.getCurrentEnv().getAnalysisManager().updateTableStatsForAlterStats(mockedJobInfo,
objects.table);
} else {
// update partition granularity statistics
for (Long partitionId : partitionIds) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
index 00878adcc44..926194a7258 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
@@ -72,6 +72,9 @@ public class TableStatsMeta implements Writable {
@SerializedName("newPartitionLoaded")
public AtomicBoolean newPartitionLoaded = new AtomicBoolean(false);
+ @SerializedName("userInjected")
+ public boolean userInjected;
+
@VisibleForTesting
public TableStatsMeta() {
tblId = 0;
@@ -130,13 +133,15 @@ public class TableStatsMeta implements Writable {
public void update(AnalysisInfo analyzedJob, TableIf tableIf) {
updatedTime = analyzedJob.tblUpdateTime;
+ userInjected = analyzedJob.userInject;
String colNameStr = analyzedJob.colName;
// colName field AnalyzeJob's format likes: "[col1, col2]", we need to
remove brackets here
// TODO: Refactor this later
if (analyzedJob.colName.startsWith("[") &&
analyzedJob.colName.endsWith("]")) {
colNameStr = colNameStr.substring(1, colNameStr.length() - 1);
}
- List<String> cols =
Arrays.stream(colNameStr.split(",")).map(String::trim).collect(Collectors.toList());
+ List<String> cols = Arrays.stream(colNameStr.split(","))
+ .map(String::trim).filter(s ->
!s.isEmpty()).collect(Collectors.toList());
for (String col : cols) {
ColStatsMeta colStatsMeta = colNameToColStatsMeta.get(col);
if (colStatsMeta == null) {
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
index c4b2b08720f..6d95cc381b8 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
@@ -321,6 +321,15 @@ public class StatisticsAutoCollectorTest {
};
// can't find table stats meta, which means this table never get
analyzed, so we shouldn't skip it this time
Assertions.assertFalse(autoCollector.skip(olapTable));
+ new MockUp<AnalysisManager>() {
+
+ @Mock
+ public TableStatsMeta findTableStatsStatus(long tblId) {
+ return stats;
+ }
+ };
+ stats.userInjected = true;
+ Assertions.assertTrue(autoCollector.skip(olapTable));
// this is not olap table nor external table, so we should skip it
this time
Assertions.assertTrue(autoCollector.skip(anyOtherTable));
}
diff --git a/regression-test/suites/statistics/analyze_stats.groovy
b/regression-test/suites/statistics/analyze_stats.groovy
index 718f48bc72e..0c5479d6744 100644
--- a/regression-test/suites/statistics/analyze_stats.groovy
+++ b/regression-test/suites/statistics/analyze_stats.groovy
@@ -2654,6 +2654,35 @@ PARTITION `p599` VALUES IN (599)
}
sql """set forbid_unknown_col_stats=true"""
+ // Test alter
+ sql """
+ CREATE TABLE alter_test(
+ `id` int NOT NULL,
+ `name` VARCHAR(25) NOT NULL
+ )ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT "OLAP"
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES (
+ "replication_num" = "1"
+ );
+ """
+ sql """ANALYZE TABLE alter_test WITH SYNC"""
+ def alter_result = sql """show table stats alter_test"""
+ assertEquals("false", alter_result[0][7])
+ sql """alter table alter_test modify column id set stats
('row_count'='2.0E7', 'ndv'='3927659.0', 'num_nulls'='0.0',
'data_size'='2.69975443E8', 'min_value'='1', 'max_value'='2');"""
+ alter_result = sql """show table stats alter_test"""
+ assertEquals("true", alter_result[0][7])
+ sql """ANALYZE TABLE alter_test WITH SYNC"""
+ alter_result = sql """show table stats alter_test"""
+ assertEquals("false", alter_result[0][7])
+ sql """alter table alter_test modify column id set stats
('row_count'='2.0E7', 'ndv'='3927659.0', 'num_nulls'='0.0',
'data_size'='2.69975443E8', 'min_value'='1', 'max_value'='2');"""
+ alter_result = sql """show table stats alter_test"""
+ assertEquals("true", alter_result[0][7])
+ sql """drop stats alter_test"""
+ alter_result = sql """show table stats alter_test"""
+ assertEquals("false", alter_result[0][7])
+
// Test trigger type.
sql """DROP DATABASE IF EXISTS trigger"""
sql """CREATE DATABASE IF NOT EXISTS trigger"""
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]