This is an automated email from the ASF dual-hosted git repository.

lijibing pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 90a65501f77 [fix](statistics)Remove old partition stats in the next 
analyze after insert overwrite. (#38175)
90a65501f77 is described below

commit 90a65501f7720c1246046b6d1729efb1266cc636
Author: Jibing-Li <64681310+jibing...@users.noreply.github.com>
AuthorDate: Mon Jul 22 12:05:45 2024 +0800

    [fix](statistics)Remove old partition stats in the next analyze after 
insert overwrite. (#38175)
    
    Remove old partition stats in the next analyze after insert overwrite.
    Because insert overwrite will use new partitions to replace old ones,
    which means all partitions are new and the old partition stats should be
    removed.
---
 .../apache/doris/statistics/OlapAnalysisTask.java  |  10 +-
 .../doris/statistics/util/StatisticsUtil.java      |   5 +-
 .../suites/statistics/test_partition_stats.groovy  | 109 +++++++++++++++++++++
 3 files changed, 120 insertions(+), 4 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index 5b6c87a02ef..5f4d812c265 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -210,9 +210,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
     @Override
     protected void deleteNotExistPartitionStats(AnalysisInfo jobInfo) throws 
DdlException {
         TableStatsMeta tableStats = 
Env.getServingEnv().getAnalysisManager().findTableStatsStatus(tbl.getId());
-        // When a partition was dropped, newPartitionLoaded will set to true.
-        // So we don't need to check dropped partition if newPartitionLoaded 
is false.
-        if (tableStats == null || !tableStats.partitionChanged.get()) {
+        if (tableStats == null) {
             return;
         }
         OlapTable table = (OlapTable) tbl;
@@ -222,6 +220,12 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
                 || columnStats.partitionUpdateRows.isEmpty()) {
             return;
         }
+        // When a partition was dropped, partitionChanged will be set to true.
+        // So we don't need to check dropped partition if partitionChanged is 
false.
+        if (!tableStats.partitionChanged.get()
+                && columnStats.partitionUpdateRows.size() == 
table.getPartitions().size()) {
+            return;
+        }
         Set<Long> expiredPartition = Sets.newHashSet();
         String columnCondition = "AND col_id = " + 
StatisticsUtil.quote(col.getName());
         for (long partId : columnStats.partitionUpdateRows.keySet()) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index e13b7a3df68..76da12f20c8 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -1058,11 +1058,14 @@ public class StatisticsUtil {
         if (!StatisticsUtil.enablePartitionAnalyze() || 
!table.isPartitionedTable()) {
             return false;
         }
-        Collection<Partition> partitions = table.getPartitions();
+        if (tableStatsStatus.partitionChanged != null && 
tableStatsStatus.partitionChanged.get()) {
+            return true;
+        }
         ConcurrentMap<Long, Long> partitionUpdateRows = 
columnStatsMeta.partitionUpdateRows;
         if (partitionUpdateRows == null) {
             return true;
         }
+        Collection<Partition> partitions = table.getPartitions();
         // New partition added or old partition deleted.
         if (partitions.size() != partitionUpdateRows.size()) {
             return true;
diff --git a/regression-test/suites/statistics/test_partition_stats.groovy 
b/regression-test/suites/statistics/test_partition_stats.groovy
index 9ff80621b2a..16cedb9889d 100644
--- a/regression-test/suites/statistics/test_partition_stats.groovy
+++ b/regression-test/suites/statistics/test_partition_stats.groovy
@@ -1000,6 +1000,115 @@ suite("test_partition_stats") {
     result = sql """show column stats part9 partition(*)"""
     assertEquals(9, result.size())
 
+    // Test insert overwrite.
+    sql """
+        create table part10(
+            k int null,
+            v int null
+        )
+        duplicate key (k)
+        PARTITION BY RANGE(`k`)
+        (
+            PARTITION p1 VALUES [("0"), ("2")),
+            PARTITION p2 VALUES [("2"), ("4"))
+        )
+        distributed BY hash(k) buckets 3
+        properties("replication_num" = "1");
+    """
+    sql """analyze table part10 with sync"""
+    result = sql """show column stats part10 (k)"""
+    assertEquals(1, result.size())
+    assertEquals("k", result[0][0])
+    assertEquals("0.0", result[0][2])
+    assertEquals("0.0", result[0][3])
+    assertEquals("0.0", result[0][4])
+    assertEquals("0.0", result[0][5])
+    assertEquals("0.0", result[0][6])
+    assertEquals("N/A", result[0][7])
+    assertEquals("N/A", result[0][8])
+    assertEquals("FULL", result[0][9])
+
+    result = sql """show column stats part10 (v)"""
+    assertEquals(1, result.size())
+    assertEquals("v", result[0][0])
+    assertEquals("0.0", result[0][2])
+    assertEquals("0.0", result[0][3])
+    assertEquals("0.0", result[0][4])
+    assertEquals("0.0", result[0][5])
+    assertEquals("0.0", result[0][6])
+    assertEquals("N/A", result[0][7])
+    assertEquals("N/A", result[0][8])
+    assertEquals("FULL", result[0][9])
+
+    result = sql """show column stats part10 (k) partition(*)"""
+    assertEquals(2, result.size())
+    result = sql """show column stats part10 (v) partition(*)"""
+    assertEquals(2, result.size())
+
+    sql """insert into part10 values (1, 2), (3, 6)"""
+    sql """analyze table part10 with sync"""
+    result = sql """show column stats part10 (k)"""
+    assertEquals(1, result.size())
+    assertEquals("k", result[0][0])
+    assertEquals("2.0", result[0][2])
+    assertEquals("2.0", result[0][3])
+    assertEquals("0.0", result[0][4])
+    assertEquals("8.0", result[0][5])
+    assertEquals("4.0", result[0][6])
+    assertEquals("1", result[0][7])
+    assertEquals("3", result[0][8])
+    assertEquals("FULL", result[0][9])
+
+    result = sql """show column stats part10 (v)"""
+    assertEquals(1, result.size())
+    assertEquals("v", result[0][0])
+    assertEquals("2.0", result[0][2])
+    assertEquals("2.0", result[0][3])
+    assertEquals("0.0", result[0][4])
+    assertEquals("8.0", result[0][5])
+    assertEquals("4.0", result[0][6])
+    assertEquals("2", result[0][7])
+    assertEquals("6", result[0][8])
+    assertEquals("FULL", result[0][9])
+
+    result = sql """show column stats part10 (k) partition(*)"""
+    assertEquals(2, result.size())
+    result = sql """show column stats part10 (v) partition(*)"""
+    assertEquals(2, result.size())
+
+    sql """INSERT OVERWRITE table part10 VALUES (0, 100), (2, 200);"""
+    sql """analyze table part10 with sync"""
+    result = sql """show column stats part10 (k)"""
+    assertEquals(1, result.size())
+    assertEquals("k", result[0][0])
+    assertEquals("2.0", result[0][2])
+    assertEquals("2.0", result[0][3])
+    assertEquals("0.0", result[0][4])
+    assertEquals("8.0", result[0][5])
+    assertEquals("4.0", result[0][6])
+    assertEquals("0", result[0][7])
+    assertEquals("2", result[0][8])
+    assertEquals("FULL", result[0][9])
+
+    result = sql """show column stats part10 (v)"""
+    assertEquals(1, result.size())
+    assertEquals("v", result[0][0])
+    assertEquals("2.0", result[0][2])
+    assertEquals("2.0", result[0][3])
+    assertEquals("0.0", result[0][4])
+    assertEquals("8.0", result[0][5])
+    assertEquals("4.0", result[0][6])
+    assertEquals("100", result[0][7])
+    assertEquals("200", result[0][8])
+    assertEquals("FULL", result[0][9])
+
+    result = sql """show column stats part10 (k) partition(*)"""
+    assertEquals(2, result.size())
+    result = sql """show column stats part10 (v) partition(*)"""
+    assertEquals(2, result.size())
+    result = sql """show column stats part10 partition(*)"""
+    assertEquals(4, result.size())
+
     sql """drop database test_partition_stats"""
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to