This is an automated email from the ASF dual-hosted git repository. lijibing pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new 2b82bec4066 [fix](statistics)Add row count to AnalysisJob and update tableStats using this row count after analyze. (#38428) 2b82bec4066 is described below commit 2b82bec4066d5950911a2237939b4acb6fd2f593 Author: Jibing-Li <64681310+jibing...@users.noreply.github.com> AuthorDate: Sat Jul 27 11:32:29 2024 +0800 [fix](statistics)Add row count to AnalysisJob and update tableStats using this row count after analyze. (#38428) Add row count to AnalysisJob and update tableStats using this row count after analyze. Take row count to consideration in stats health calculation. --- .../java/org/apache/doris/catalog/OlapTable.java | 27 ++++++++- .../org/apache/doris/statistics/AnalysisInfo.java | 7 ++- .../doris/statistics/AnalysisInfoBuilder.java | 9 ++- .../apache/doris/statistics/AnalysisManager.java | 2 + .../apache/doris/statistics/OlapAnalysisTask.java | 12 +++- .../doris/statistics/StatisticsAutoCollector.java | 2 + .../apache/doris/statistics/TableStatsMeta.java | 2 +- .../doris/statistics/AnalysisManagerTest.java | 70 ++++++++++------------ .../doris/statistics/TableStatsMetaTest.java | 10 +--- .../suites/statistics/analyze_stats.groovy | 28 +++++++++ 10 files changed, 113 insertions(+), 56 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index 9a66cbc68ae..b0169571dfc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -1300,12 +1300,33 @@ public class OlapTable extends Table implements MTMVRelatedTableIf { .collect(Collectors.toSet())))) { return true; } - long rowCount = getRowCount(); - if (rowCount > 0 && tblStats.rowCount == 0) { + + // 1 Check row count. + long currentRowCount = getRowCount(); + long lastAnalyzeRowCount = tblStats.rowCount; + // 1.1 Empty table -> non-empty table. Need analyze. + if (currentRowCount != 0 && lastAnalyzeRowCount == 0) { + return true; + } + // 1.2 Non-empty table -> empty table. Need analyze; + if (currentRowCount == 0 && lastAnalyzeRowCount != 0) { + return true; + } + // 1.3 Table is still empty. Not need to analyze. lastAnalyzeRowCount == 0 is always true here. + if (currentRowCount == 0) { + return false; + } + // 1.4 If row count changed more than the threshold, need analyze. + // lastAnalyzeRowCount == 0 is always false here. + double changeRate = + ((double) Math.abs(currentRowCount - lastAnalyzeRowCount) / lastAnalyzeRowCount) * 100.0; + if (changeRate > (100 - StatisticsUtil.getTableStatsHealthThreshold())) { return true; } + + // 2. Check update rows. long updateRows = tblStats.updatedRows.get(); - int tblHealth = StatisticsUtil.getTableHealth(rowCount, updateRows); + int tblHealth = StatisticsUtil.getTableHealth(currentRowCount, updateRows); return tblHealth < StatisticsUtil.getTableStatsHealthThreshold(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java index c167db2228d..ab7f9935c72 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java @@ -190,6 +190,9 @@ public class AnalysisInfo implements Writable { @SerializedName("emptyJob") public final boolean emptyJob; + + @SerializedName("rowCount") + public final long rowCount; /** * * Used to store the newest partition version of tbl when creating this job. @@ -206,7 +209,8 @@ public class AnalysisInfo implements Writable { long lastExecTimeInMs, long timeCostInMs, AnalysisState state, ScheduleType scheduleType, boolean isExternalTableLevelTask, boolean partitionOnly, boolean samplingPartition, boolean isAllPartition, long partitionCount, CronExpression cronExpression, boolean forceFull, - boolean usingSqlForPartitionColumn, long tblUpdateTime, boolean emptyJob, boolean userInject) { + boolean usingSqlForPartitionColumn, long tblUpdateTime, boolean emptyJob, boolean userInject, + long rowCount) { this.jobId = jobId; this.taskId = taskId; this.taskIds = taskIds; @@ -244,6 +248,7 @@ public class AnalysisInfo implements Writable { this.tblUpdateTime = tblUpdateTime; this.emptyJob = emptyJob; this.userInject = userInject; + this.rowCount = rowCount; } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java index 00cf9f7b1bc..6541027538a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java @@ -64,6 +64,7 @@ public class AnalysisInfoBuilder { private long tblUpdateTime; private boolean emptyJob; private boolean userInject; + private long rowCount; public AnalysisInfoBuilder() { } @@ -103,6 +104,7 @@ public class AnalysisInfoBuilder { tblUpdateTime = info.tblUpdateTime; emptyJob = info.emptyJob; userInject = info.userInject; + rowCount = info.rowCount; } public AnalysisInfoBuilder setJobId(long jobId) { @@ -275,12 +277,17 @@ public class AnalysisInfoBuilder { return this; } + public AnalysisInfoBuilder setRowCount(long rowCount) { + this.rowCount = rowCount; + return this; + } + public AnalysisInfo build() { return new AnalysisInfo(jobId, taskId, taskIds, catalogId, dbId, tblId, jobColumns, partitionNames, colName, indexId, jobType, analysisMode, analysisMethod, analysisType, samplePercent, sampleRows, maxBucketNum, periodTimeInMs, message, lastExecTimeInMs, timeCostInMs, state, scheduleType, externalTableLevelTask, partitionOnly, samplingPartition, isAllPartition, partitionCount, - cronExpression, forceFull, usingSqlForPartitionColumn, tblUpdateTime, emptyJob, userInject); + cronExpression, forceFull, usingSqlForPartitionColumn, tblUpdateTime, emptyJob, userInject, rowCount); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 06f6ca331b3..71f5ce0fa87 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -354,6 +354,8 @@ public class AnalysisManager implements Writable { infoBuilder.setTblUpdateTime(table.getUpdateTime()); infoBuilder.setEmptyJob(table instanceof OlapTable && table.getRowCount() == 0 && analysisMethod.equals(AnalysisMethod.SAMPLE)); + long rowCount = StatisticsUtil.isEmptyTable(table, analysisMethod) ? 0 : table.getRowCount(); + infoBuilder.setRowCount(rowCount); return infoBuilder.build(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java index dffba735fe9..c9b5edaff22 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java @@ -19,6 +19,7 @@ package org.apache.doris.statistics; import org.apache.doris.analysis.CreateMaterializedViewStmt; import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Env; import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.MaterializedIndex; import org.apache.doris.catalog.MaterializedIndexMeta; @@ -37,7 +38,6 @@ import org.apache.commons.text.StringSubstitutor; import java.security.SecureRandom; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; @@ -69,10 +69,16 @@ public class OlapAnalysisTask extends BaseAnalysisTask { return; } List<Pair<String, String>> columnList = info.jobColumns; - if (StatisticsUtil.isEmptyTable(tbl, info.analysisMethod) || columnList == null || columnList.isEmpty()) { + if (columnList == null || columnList.isEmpty()) { + LOG.warn("Table {}.{}.{}, jobColumns is null or empty.", info.catalogId, info.dbId, info.tblId); + throw new RuntimeException(); + } + if (StatisticsUtil.isEmptyTable(tbl, info.analysisMethod)) { StatsId statsId = new StatsId(concatColumnStatsId(), info.catalogId, info.dbId, info.tblId, info.indexId, info.colName, null); - job.appendBuf(this, Arrays.asList(new ColStatsData(statsId))); + ColStatsData colStatsData = new ColStatsData(statsId); + Env.getCurrentEnv().getStatisticsCache().syncColStats(colStatsData); + job.appendBuf(this, Collections.singletonList(colStatsData)); return; } if (tableSample != null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index 9ca971845b7..4408a0d9255 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -181,6 +181,7 @@ public class StatisticsAutoCollector extends StatisticsCollector { List<AnalysisInfo> analysisInfos, TableIf table) { AnalysisMethod analysisMethod = table.getDataSize(true) >= StatisticsUtil.getHugeTableLowerBoundSizeInBytes() ? AnalysisMethod.SAMPLE : AnalysisMethod.FULL; + long rowCount = StatisticsUtil.isEmptyTable(table, analysisMethod) ? 0 : table.getRowCount(); AnalysisInfo jobInfo = new AnalysisInfoBuilder() .setJobId(Env.getCurrentEnv().getNextId()) .setCatalogId(db.getCatalog().getId()) @@ -200,6 +201,7 @@ public class StatisticsAutoCollector extends StatisticsCollector { .setTblUpdateTime(table.getUpdateTime()) .setEmptyJob(table instanceof OlapTable && table.getRowCount() == 0 && analysisMethod.equals(AnalysisMethod.SAMPLE)) + .setRowCount(rowCount) .build(); analysisInfos.add(jobInfo); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java index 3b9b1e2bead..a9a580c8b40 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java @@ -141,7 +141,7 @@ public class TableStatsMeta implements Writable { jobType = analyzedJob.jobType; if (tableIf != null) { if (tableIf instanceof OlapTable) { - rowCount = analyzedJob.emptyJob ? 0 : tableIf.getRowCount(); + rowCount = analyzedJob.rowCount; } if (analyzedJob.emptyJob) { return; diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java index 674456b0b46..8a803bd2a30 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java @@ -34,12 +34,12 @@ import org.apache.doris.statistics.AnalysisInfo.ScheduleType; import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.Lists; import mockit.Expectations; import mockit.Injectable; import mockit.Mock; import mockit.MockUp; import mockit.Mocked; +import org.apache.hadoop.util.Lists; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -48,7 +48,6 @@ import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Set; // CHECKSTYLE OFF public class AnalysisManagerTest { @@ -265,63 +264,58 @@ public class AnalysisManagerTest { public void testReAnalyze() { new MockUp<OlapTable>() { + int count = 0; + int[] rowCount = new int[]{100, 200, 1, 0, 0, 100}; + final Column c = new Column("col1", PrimitiveType.INT); @Mock - public List<Column> getBaseSchema() { - return Lists.newArrayList(c); + public long getRowCount() { + return rowCount[count++]; } @Mock - public List<Column> getColumns() { return Lists.newArrayList(c); } - - @Mock - public List<Pair<String, String>> getColumnIndexPairs(Set<String> columns) { - List<Pair<String, String>> jobList = Lists.newArrayList(); - jobList.add(Pair.of("1", "1")); - jobList.add(Pair.of("2", "2")); - jobList.add(Pair.of("3", "3")); - return jobList; + public List<Column> getBaseSchema() { + return org.apache.hadoop.util.Lists.newArrayList(c); } - }; - OlapTable olapTable = new OlapTable(); - List<Pair<String, String>> jobList = Lists.newArrayList(); - jobList.add(Pair.of("1", "1")); - jobList.add(Pair.of("2", "2")); - TableStatsMeta stats0 = new TableStatsMeta( - 0, new AnalysisInfoBuilder().setJobColumns(jobList) - .setColName("col1").build(), olapTable); - Assertions.assertTrue(olapTable.needReAnalyzeTable(stats0)); - new MockUp<OlapTable>() { - int count = 0; - int[] rowCount = new int[]{100, 100, 200, 200, 1, 1}; - - @Mock - public long getRowCount() { - return rowCount[count++]; - } @Mock - public List<Pair<String, String>> getColumnIndexPairs(Set<String> columns) { - List<Pair<String, String>> jobList = Lists.newArrayList(); - return jobList; + public List<Column> getColumns() { + return Lists.newArrayList(c); } + }; + OlapTable olapTable = new OlapTable(); TableStatsMeta stats1 = new TableStatsMeta( 50, new AnalysisInfoBuilder().setJobColumns(new ArrayList<>()) - .setColName("col1").build(), olapTable); - stats1.updatedRows.addAndGet(50); + .setColName("col1").setRowCount(100).build(), olapTable); + stats1.updatedRows.addAndGet(70); Assertions.assertTrue(olapTable.needReAnalyzeTable(stats1)); TableStatsMeta stats2 = new TableStatsMeta( - 190, new AnalysisInfoBuilder() - .setJobColumns(new ArrayList<>()).setColName("col1").build(), olapTable); + 190, new AnalysisInfoBuilder().setJobColumns(new ArrayList<>()) + .setColName("col1").setRowCount(200).build(), olapTable); stats2.updatedRows.addAndGet(20); Assertions.assertFalse(olapTable.needReAnalyzeTable(stats2)); TableStatsMeta stats3 = new TableStatsMeta(0, new AnalysisInfoBuilder() - .setJobColumns(new ArrayList<>()).setEmptyJob(true).setColName("col1").build(), olapTable); + .setEmptyJob(true).setColName("col1").setJobColumns(new ArrayList<>()) + .setRowCount(0).build(), olapTable); Assertions.assertTrue(olapTable.needReAnalyzeTable(stats3)); + TableStatsMeta stats4 = new TableStatsMeta(0, new AnalysisInfoBuilder() + .setEmptyJob(true).setColName("col1").setJobColumns(new ArrayList<>()) + .setRowCount(1).build(), olapTable); + Assertions.assertTrue(olapTable.needReAnalyzeTable(stats4)); + + TableStatsMeta stats5 = new TableStatsMeta(0, new AnalysisInfoBuilder() + .setEmptyJob(true).setColName("col1").setJobColumns(new ArrayList<>()) + .setRowCount(0).build(), olapTable); + Assertions.assertFalse(olapTable.needReAnalyzeTable(stats5)); + + TableStatsMeta stats6 = new TableStatsMeta(0, new AnalysisInfoBuilder() + .setEmptyJob(true).setColName("col1").setJobColumns(new ArrayList<>()) + .setRowCount(30).build(), olapTable); + Assertions.assertTrue(olapTable.needReAnalyzeTable(stats6)); } @Test diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java index 94eab9e00cc..349f415a3c4 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java @@ -19,8 +19,6 @@ package org.apache.doris.statistics; import org.apache.doris.catalog.OlapTable; -import mockit.Mock; -import mockit.MockUp; import mockit.Mocked; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -31,15 +29,9 @@ class TableStatsMetaTest { @Test void update(@Mocked OlapTable table) { - new MockUp<OlapTable>() { - @Mock - public long getRowCount() { - return 4; - } - }; TableStatsMeta tableStatsMeta = new TableStatsMeta(); AnalysisInfo jobInfo = new AnalysisInfoBuilder().setJobColumns(new ArrayList<>()) - .setColName("col1").build(); + .setColName("col1").setRowCount(4).build(); tableStatsMeta.update(jobInfo, table); Assertions.assertEquals(4, tableStatsMeta.rowCount); } diff --git a/regression-test/suites/statistics/analyze_stats.groovy b/regression-test/suites/statistics/analyze_stats.groovy index db56b400ea9..5a6e753e0a3 100644 --- a/regression-test/suites/statistics/analyze_stats.groovy +++ b/regression-test/suites/statistics/analyze_stats.groovy @@ -2845,6 +2845,34 @@ PARTITION `p599` VALUES IN (599) assertEquals("521779.0", alter_result[0][5]) assertEquals("7.142863009760572", alter_result[0][6]) + // Test analyze after new empty partition created. + sql """CREATE TABLE `part` ( + `id` INT NULL, + `colint` INT NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + PARTITION BY RANGE(`id`) + (PARTITION p1 VALUES [("-2147483648"), ("10000")), + PARTITION p2 VALUES [("10000"), ("20000"))) + DISTRIBUTED BY HASH(`id`) BUCKETS 3 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + sql """analyze table part with sync;""" + sql """Insert into part values (1, 1), (10001, 10001);""" + sql """analyze table part with sync;""" + sleep(1000) + sql """alter table part add partition p3 VALUES [("20000"), ("30000"));""" + sql """analyze table part with sync;""" + sql """analyze table part with sync;""" + def new_part_result = sql """show column stats part(id)""" + assertEquals("2.0", new_part_result[0][2]) + new_part_result = sql """show column stats part(colint)""" + assertEquals("2.0", new_part_result[0][2]) + sql """DROP DATABASE IF EXISTS trigger""" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org