This is an automated email from the ASF dual-hosted git repository. lijibing pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 90a65501f77 [fix](statistics)Remove old partition stats in the next analyze after insert overwrite. (#38175) 90a65501f77 is described below commit 90a65501f7720c1246046b6d1729efb1266cc636 Author: Jibing-Li <64681310+jibing...@users.noreply.github.com> AuthorDate: Mon Jul 22 12:05:45 2024 +0800 [fix](statistics)Remove old partition stats in the next analyze after insert overwrite. (#38175) Remove old partition stats in the next analyze after insert overwrite. Because insert overwrite will use new partitions to replace old ones, which means all partitions are new and the old partition stats should be removed. --- .../apache/doris/statistics/OlapAnalysisTask.java | 10 +- .../doris/statistics/util/StatisticsUtil.java | 5 +- .../suites/statistics/test_partition_stats.groovy | 109 +++++++++++++++++++++ 3 files changed, 120 insertions(+), 4 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java index 5b6c87a02ef..5f4d812c265 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java @@ -210,9 +210,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask { @Override protected void deleteNotExistPartitionStats(AnalysisInfo jobInfo) throws DdlException { TableStatsMeta tableStats = Env.getServingEnv().getAnalysisManager().findTableStatsStatus(tbl.getId()); - // When a partition was dropped, newPartitionLoaded will set to true. - // So we don't need to check dropped partition if newPartitionLoaded is false. - if (tableStats == null || !tableStats.partitionChanged.get()) { + if (tableStats == null) { return; } OlapTable table = (OlapTable) tbl; @@ -222,6 +220,12 @@ public class OlapAnalysisTask extends BaseAnalysisTask { || columnStats.partitionUpdateRows.isEmpty()) { return; } + // When a partition was dropped, partitionChanged will be set to true. + // So we don't need to check dropped partition if partitionChanged is false. + if (!tableStats.partitionChanged.get() + && columnStats.partitionUpdateRows.size() == table.getPartitions().size()) { + return; + } Set<Long> expiredPartition = Sets.newHashSet(); String columnCondition = "AND col_id = " + StatisticsUtil.quote(col.getName()); for (long partId : columnStats.partitionUpdateRows.keySet()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index e13b7a3df68..76da12f20c8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -1058,11 +1058,14 @@ public class StatisticsUtil { if (!StatisticsUtil.enablePartitionAnalyze() || !table.isPartitionedTable()) { return false; } - Collection<Partition> partitions = table.getPartitions(); + if (tableStatsStatus.partitionChanged != null && tableStatsStatus.partitionChanged.get()) { + return true; + } ConcurrentMap<Long, Long> partitionUpdateRows = columnStatsMeta.partitionUpdateRows; if (partitionUpdateRows == null) { return true; } + Collection<Partition> partitions = table.getPartitions(); // New partition added or old partition deleted. if (partitions.size() != partitionUpdateRows.size()) { return true; diff --git a/regression-test/suites/statistics/test_partition_stats.groovy b/regression-test/suites/statistics/test_partition_stats.groovy index 9ff80621b2a..16cedb9889d 100644 --- a/regression-test/suites/statistics/test_partition_stats.groovy +++ b/regression-test/suites/statistics/test_partition_stats.groovy @@ -1000,6 +1000,115 @@ suite("test_partition_stats") { result = sql """show column stats part9 partition(*)""" assertEquals(9, result.size()) + // Test insert overwrite. + sql """ + create table part10( + k int null, + v int null + ) + duplicate key (k) + PARTITION BY RANGE(`k`) + ( + PARTITION p1 VALUES [("0"), ("2")), + PARTITION p2 VALUES [("2"), ("4")) + ) + distributed BY hash(k) buckets 3 + properties("replication_num" = "1"); + """ + sql """analyze table part10 with sync""" + result = sql """show column stats part10 (k)""" + assertEquals(1, result.size()) + assertEquals("k", result[0][0]) + assertEquals("0.0", result[0][2]) + assertEquals("0.0", result[0][3]) + assertEquals("0.0", result[0][4]) + assertEquals("0.0", result[0][5]) + assertEquals("0.0", result[0][6]) + assertEquals("N/A", result[0][7]) + assertEquals("N/A", result[0][8]) + assertEquals("FULL", result[0][9]) + + result = sql """show column stats part10 (v)""" + assertEquals(1, result.size()) + assertEquals("v", result[0][0]) + assertEquals("0.0", result[0][2]) + assertEquals("0.0", result[0][3]) + assertEquals("0.0", result[0][4]) + assertEquals("0.0", result[0][5]) + assertEquals("0.0", result[0][6]) + assertEquals("N/A", result[0][7]) + assertEquals("N/A", result[0][8]) + assertEquals("FULL", result[0][9]) + + result = sql """show column stats part10 (k) partition(*)""" + assertEquals(2, result.size()) + result = sql """show column stats part10 (v) partition(*)""" + assertEquals(2, result.size()) + + sql """insert into part10 values (1, 2), (3, 6)""" + sql """analyze table part10 with sync""" + result = sql """show column stats part10 (k)""" + assertEquals(1, result.size()) + assertEquals("k", result[0][0]) + assertEquals("2.0", result[0][2]) + assertEquals("2.0", result[0][3]) + assertEquals("0.0", result[0][4]) + assertEquals("8.0", result[0][5]) + assertEquals("4.0", result[0][6]) + assertEquals("1", result[0][7]) + assertEquals("3", result[0][8]) + assertEquals("FULL", result[0][9]) + + result = sql """show column stats part10 (v)""" + assertEquals(1, result.size()) + assertEquals("v", result[0][0]) + assertEquals("2.0", result[0][2]) + assertEquals("2.0", result[0][3]) + assertEquals("0.0", result[0][4]) + assertEquals("8.0", result[0][5]) + assertEquals("4.0", result[0][6]) + assertEquals("2", result[0][7]) + assertEquals("6", result[0][8]) + assertEquals("FULL", result[0][9]) + + result = sql """show column stats part10 (k) partition(*)""" + assertEquals(2, result.size()) + result = sql """show column stats part10 (v) partition(*)""" + assertEquals(2, result.size()) + + sql """INSERT OVERWRITE table part10 VALUES (0, 100), (2, 200);""" + sql """analyze table part10 with sync""" + result = sql """show column stats part10 (k)""" + assertEquals(1, result.size()) + assertEquals("k", result[0][0]) + assertEquals("2.0", result[0][2]) + assertEquals("2.0", result[0][3]) + assertEquals("0.0", result[0][4]) + assertEquals("8.0", result[0][5]) + assertEquals("4.0", result[0][6]) + assertEquals("0", result[0][7]) + assertEquals("2", result[0][8]) + assertEquals("FULL", result[0][9]) + + result = sql """show column stats part10 (v)""" + assertEquals(1, result.size()) + assertEquals("v", result[0][0]) + assertEquals("2.0", result[0][2]) + assertEquals("2.0", result[0][3]) + assertEquals("0.0", result[0][4]) + assertEquals("8.0", result[0][5]) + assertEquals("4.0", result[0][6]) + assertEquals("100", result[0][7]) + assertEquals("200", result[0][8]) + assertEquals("FULL", result[0][9]) + + result = sql """show column stats part10 (k) partition(*)""" + assertEquals(2, result.size()) + result = sql """show column stats part10 (v) partition(*)""" + assertEquals(2, result.size()) + result = sql """show column stats part10 partition(*)""" + assertEquals(4, result.size()) + sql """drop database test_partition_stats""" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org