This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit db9725bc5cedb0f1672e835dfdfe037ef78fb8ef
Author: Jibing-Li <[email protected]>
AuthorDate: Mon May 27 12:17:33 2024 +0800

    Support drop partition statistics. (#35374)
---
 fe/fe-core/src/main/cup/sql_parser.cup             |  4 +-
 .../org/apache/doris/analysis/DropStatsStmt.java   |  8 +--
 .../apache/doris/statistics/AnalysisManager.java   | 16 +++---
 .../apache/doris/statistics/BaseAnalysisTask.java  |  6 --
 .../doris/statistics/InvalidateStatsTarget.java    |  8 +--
 .../doris/statistics/StatisticsRepository.java     | 66 +++++++++++++---------
 .../doris/statistics/HMSAnalysisTaskTest.java      | 20 -------
 .../doris/statistics/OlapAnalysisTaskTest.java     | 23 --------
 8 files changed, 53 insertions(+), 98 deletions(-)

diff --git a/fe/fe-core/src/main/cup/sql_parser.cup 
b/fe/fe-core/src/main/cup/sql_parser.cup
index 087047b36c3..ff03f7958c2 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -3265,9 +3265,9 @@ drop_stmt ::=
         RESULT = new DropPolicyStmt(PolicyTypeEnum.STORAGE, ifExists, 
policyName, null, null, null);
     :}
     /* statistics */
-    | KW_DROP KW_STATS table_name:tbl opt_col_list:cols
+    | KW_DROP KW_STATS table_name:tbl opt_col_list:cols 
opt_partition_names:partitionNames
     {:
-        RESULT = new DropStatsStmt(tbl, cols);
+        RESULT = new DropStatsStmt(tbl, cols, partitionNames);
     :}
     | KW_DROP KW_EXPIRED KW_STATS
     {:
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStatsStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStatsStmt.java
index de17116d739..480dff2d94c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStatsStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStatsStmt.java
@@ -17,7 +17,6 @@
 
 package org.apache.doris.analysis;
 
-import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.DatabaseIf;
 import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.TableIf;
@@ -35,7 +34,6 @@ import org.apache.commons.lang3.StringUtils;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
-import java.util.stream.Collectors;
 
 /**
  * Manually drop statistics for tables or partitions.
@@ -51,6 +49,7 @@ public class DropStatsStmt extends DdlStmt {
 
     private final TableName tableName;
     private Set<String> columnNames;
+    private PartitionNames partitionNames;
     // Flag to drop external table row count in table_statistics.
     private boolean dropTableRowCount;
     private boolean isAllColumns;
@@ -63,12 +62,14 @@ public class DropStatsStmt extends DdlStmt {
         this.dropExpired = dropExpired;
         this.tableName = null;
         this.columnNames = null;
+        this.partitionNames = null;
         this.dropTableRowCount = false;
     }
 
     public DropStatsStmt(TableName tableName,
-            List<String> columnNames) {
+            List<String> columnNames, PartitionNames partitionNames) {
         this.tableName = tableName;
+        this.partitionNames = partitionNames;
         if (columnNames != null) {
             this.columnNames = new HashSet<>(columnNames);
             this.dropTableRowCount = false;
@@ -122,7 +123,6 @@ public class DropStatsStmt extends DdlStmt {
             }
         } else {
             isAllColumns = true;
-            columnNames = 
table.getSchemaAllIndexes(false).stream().map(Column::getName).collect(Collectors.toSet());
         }
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index a77a68618d5..eb10fb09c7b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -655,8 +655,8 @@ public class AnalysisManager implements Writable {
         }
         invalidateLocalStats(catalogId, dbId, tblId, cols, tableStats);
         // Drop stats ddl is master only operation.
-        invalidateRemoteStats(catalogId, dbId, tblId, cols, 
dropStatsStmt.isAllColumns());
-        StatisticsRepository.dropStatisticsByColNames(catalogId, dbId, tblId, 
cols);
+        invalidateRemoteStats(catalogId, dbId, tblId, cols);
+        StatisticsRepository.dropStatistics(catalogId, dbId, tblId, cols, 
null);
     }
 
     public void dropStats(TableIf table) throws DdlException {
@@ -667,11 +667,10 @@ public class AnalysisManager implements Writable {
         long catalogId = table.getDatabase().getCatalog().getId();
         long dbId = table.getDatabase().getId();
         long tableId = table.getId();
-        Set<String> cols = 
table.getSchemaAllIndexes(false).stream().map(Column::getName).collect(Collectors.toSet());
-        invalidateLocalStats(catalogId, dbId, tableId, cols, tableStats);
+        invalidateLocalStats(catalogId, dbId, tableId, null, tableStats);
         // Drop stats ddl is master only operation.
-        invalidateRemoteStats(catalogId, dbId, tableId, cols, true);
-        StatisticsRepository.dropStatisticsByColNames(catalogId, dbId, 
table.getId(), cols);
+        invalidateRemoteStats(catalogId, dbId, tableId, null);
+        StatisticsRepository.dropStatistics(catalogId, dbId, table.getId(), 
null, null);
     }
 
     public void invalidateLocalStats(long catalogId, long dbId, long tableId,
@@ -712,9 +711,8 @@ public class AnalysisManager implements Writable {
         tableStats.rowCount = table.getRowCount();
     }
 
-    public void invalidateRemoteStats(long catalogId, long dbId, long tableId,
-                                      Set<String> columns, boolean 
isAllColumns) {
-        InvalidateStatsTarget target = new InvalidateStatsTarget(catalogId, 
dbId, tableId, columns, isAllColumns);
+    public void invalidateRemoteStats(long catalogId, long dbId, long tableId, 
Set<String> columns) {
+        InvalidateStatsTarget target = new InvalidateStatsTarget(catalogId, 
dbId, tableId, columns);
         TInvalidateFollowerStatsCacheRequest request = new 
TInvalidateFollowerStatsCacheRequest();
         request.key = GsonUtils.GSON.toJson(target);
         StatisticsCache statisticsCache = 
Env.getCurrentEnv().getStatisticsCache();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
index ef77701bfac..c1bb1e574cf 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
@@ -31,9 +31,7 @@ import org.apache.doris.qe.AuditLogHelper;
 import org.apache.doris.qe.AutoCloseConnectContext;
 import org.apache.doris.qe.QueryState;
 import org.apache.doris.qe.StmtExecutor;
-import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod;
 import org.apache.doris.statistics.AnalysisInfo.AnalysisType;
-import org.apache.doris.statistics.AnalysisInfo.JobType;
 import org.apache.doris.statistics.util.DBObjects;
 import org.apache.doris.statistics.util.StatisticsUtil;
 
@@ -331,10 +329,6 @@ public abstract class BaseAnalysisTask {
             return new TableSample(true, (long) info.samplePercent);
         } else if (info.sampleRows > 0) {
             return new TableSample(false, info.sampleRows);
-        } else if (info.jobType.equals(JobType.SYSTEM) && info.analysisMethod 
== AnalysisMethod.FULL
-                && tbl.getDataSize(true) > 
StatisticsUtil.getHugeTableLowerBoundSizeInBytes()) {
-            // If user doesn't specify sample percent/rows, use auto sample 
and update sample rows in analysis info.
-            return new TableSample(false, 
StatisticsUtil.getHugeTableSampleRows());
         } else {
             return null;
         }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/InvalidateStatsTarget.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/InvalidateStatsTarget.java
index e49048f8946..0108edd8228 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/InvalidateStatsTarget.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/InvalidateStatsTarget.java
@@ -35,14 +35,10 @@ public class InvalidateStatsTarget {
     @SerializedName("columns")
     public final Set<String> columns;
 
-    public InvalidateStatsTarget(long catalogId, long dbId, long tableId, 
Set<String> columns, boolean isAllColumns) {
+    public InvalidateStatsTarget(long catalogId, long dbId, long tableId, 
Set<String> columns) {
         this.catalogId = catalogId;
         this.dbId = dbId;
         this.tableId = tableId;
-        if (isAllColumns) {
-            this.columns = null;
-        } else {
-            this.columns = columns;
-        }
+        this.columns = columns;
     }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
index dee9cb042fe..b6ea5bf3947 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
@@ -81,8 +81,19 @@ public class StatisticsRepository {
             + FULL_QUALIFIED_COLUMN_STATISTICS_NAME + " VALUES('${id}', 
${catalogId}, ${dbId}, ${tblId}, '${idxId}',"
             + "'${colId}', ${partId}, ${count}, ${ndv}, ${nullCount}, ${min}, 
${max}, ${dataSize}, NOW())";
 
-    private static final String DELETE_TABLE_STATISTICS_TEMPLATE = "DELETE 
FROM " + FeConstants.INTERNAL_DB_NAME
-            + "." + "${tblName}" + " WHERE ${condition} AND `catalog_id` = 
'${catalogId}' AND `db_id` = '${dbId}'";
+    private static final String DELETE_TABLE_STATISTICS_BY_COLUMN_TEMPLATE = 
"DELETE FROM "
+            + FeConstants.INTERNAL_DB_NAME + "." + 
StatisticConstants.TABLE_STATISTIC_TBL_NAME
+            + " WHERE `catalog_id` = '${catalogId}' AND `db_id` = '${dbId}' 
AND `tbl_id` = '${tblId}'"
+            + "${columnCondition}";
+
+    private static final String DELETE_TABLE_STATISTICS_ALL_COLUMN_TEMPLATE = 
"DELETE FROM "
+            + FeConstants.INTERNAL_DB_NAME + "." + 
StatisticConstants.TABLE_STATISTIC_TBL_NAME
+            + " WHERE `catalog_id` = '${catalogId}' AND `db_id` = '${dbId}' 
AND `tbl_id` = '${tblId}'";
+
+    private static final String DELETE_PARTITION_STATISTICS_TEMPLATE = "DELETE 
FROM "
+            + FeConstants.INTERNAL_DB_NAME + "." + 
StatisticConstants.PARTITION_STATISTIC_TBL_NAME
+            + " WHERE `catalog_id` = '${catalogId}' AND `db_id` = '${dbId}' 
AND `tbl_id` = '${tblId}'"
+            + " ${columnCondition} ${partitionCondition}";
 
     private static final String FETCH_RECENT_STATS_UPDATED_COL =
             "SELECT * FROM "
@@ -194,24 +205,18 @@ public class StatisticsRepository {
         return stringJoiner.toString();
     }
 
-    public static void dropStatisticsByPartIds(long ctlId, long dbId, long 
tblId, Set<String> partIds)
-            throws DdlException {
-        dropStatisticsByPartId(ctlId, dbId, tblId, partIds, 
StatisticConstants.TABLE_STATISTIC_TBL_NAME);
-    }
-
-    public static void dropStatisticsByColNames(
-            long ctlId, long dbId, long tblId, Set<String> colNames) throws 
DdlException {
+    public static void dropStatistics(
+            long ctlId, long dbId, long tblId, Set<String> colNames, 
Set<String> partNames) throws DdlException {
         if (colNames == null) {
-            return;
+            executeDropStatisticsAllColumnSql(ctlId, dbId, tblId);
+        } else {
+            dropStatisticsByColName(ctlId, dbId, tblId, colNames);
         }
-        dropStatisticsByColName(ctlId, dbId, tblId, colNames, 
StatisticConstants.TABLE_STATISTIC_TBL_NAME);
     }
 
-    private static void dropStatisticsByColName(
-            long ctlId, long dbId, long tblId, Set<String> colNames, String 
statsTblName)
+    private static void dropStatisticsByColName(long ctlId, long dbId, long 
tblId, Set<String> colNames)
             throws DdlException {
         Map<String, String> params = new HashMap<>();
-        params.put("tblName", statsTblName);
         Iterator<String> iterator = colNames.iterator();
         int columnCount = 0;
         StringBuilder inPredicate = new StringBuilder();
@@ -222,43 +227,48 @@ public class StatisticsRepository {
             inPredicate.append(",");
             columnCount++;
             if (columnCount == Config.max_allowed_in_element_num_of_delete) {
-                executeDropSql(inPredicate, ctlId, dbId, tblId, params);
+                executeDropStatisticsByColumnSql(inPredicate, ctlId, dbId, 
tblId, params);
                 columnCount = 0;
                 inPredicate.setLength(0);
             }
         }
         if (inPredicate.length() > 0) {
-            executeDropSql(inPredicate, ctlId, dbId, tblId, params);
+            executeDropStatisticsByColumnSql(inPredicate, ctlId, dbId, tblId, 
params);
         }
     }
 
-    private static void executeDropSql(
+    private static void executeDropStatisticsByColumnSql(
             StringBuilder inPredicate, long ctlId, long dbId, long tblId, 
Map<String, String> params)
             throws DdlException {
+        generateCtlDbIdParams(ctlId, dbId, params);
+        params.put("tblId", String.valueOf(tblId));
         if (inPredicate.length() > 0) {
             inPredicate.delete(inPredicate.length() - 1, inPredicate.length());
         }
-        String predicate = String.format("tbl_id = '%s' AND %s IN (%s)", 
tblId, "col_id", inPredicate);
-        params.put("condition", predicate);
-        generateCtlDbIdParams(ctlId, dbId, params);
+        String predicate = String.format("AND %s IN (%s)", "col_id", 
inPredicate);
+        params.put("columnCondition", predicate);
+        params.put("partitionCondition", "");
         try {
-            StatisticsUtil.execUpdate(new 
StringSubstitutor(params).replace(DELETE_TABLE_STATISTICS_TEMPLATE));
+            StatisticsUtil.execUpdate(
+                new 
StringSubstitutor(params).replace(DELETE_TABLE_STATISTICS_BY_COLUMN_TEMPLATE));
+            StatisticsUtil.execUpdate(
+                new 
StringSubstitutor(params).replace(DELETE_PARTITION_STATISTICS_TEMPLATE));
         } catch (Exception e) {
             throw new DdlException(e.getMessage(), e);
         }
     }
 
-    private static void dropStatisticsByPartId(
-            long ctlId, long dbId, long tblId, Set<String> partIds, String 
statsTblName) throws DdlException {
+    private static void executeDropStatisticsAllColumnSql(long ctlId, long 
dbId, long tblId) throws DdlException {
         Map<String, String> params = new HashMap<>();
-        String right = StatisticsUtil.joinElementsToString(partIds, ",");
-        String inPredicate = String.format(" part_id IN (%s)", right);
-        params.put("tblName", statsTblName);
-        params.put("condition", inPredicate);
         generateCtlDbIdParams(ctlId, dbId, params);
         params.put("tblId", String.valueOf(tblId));
+        params.put("columnCondition", "");
+        params.put("partitionCondition", "");
         try {
-            StatisticsUtil.execUpdate(new 
StringSubstitutor(params).replace(DELETE_TABLE_STATISTICS_TEMPLATE));
+            StatisticsUtil.execUpdate(
+                new 
StringSubstitutor(params).replace(DELETE_TABLE_STATISTICS_ALL_COLUMN_TEMPLATE));
+            StatisticsUtil.execUpdate(
+                new 
StringSubstitutor(params).replace(DELETE_PARTITION_STATISTICS_TEMPLATE));
         } catch (Exception e) {
             throw new DdlException(e.getMessage(), e);
         }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
index 2a47f659b7d..3a9cf558cc5 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
@@ -67,26 +67,6 @@ public class HMSAnalysisTaskTest {
         Assertions.assertFalse(task.needLimit(2L * 1024 * 1024 * 1024, 5.0));
     }
 
-    @Test
-    public void testAutoSampleHugeTable(@Mocked HMSExternalTable tableIf)
-            throws Exception {
-        new MockUp<HMSExternalTable>() {
-            @Mock
-            public long getDataSize(boolean singleReplica) {
-                return 6L * 1024 * 1024 * 1024;
-            }
-        };
-        HMSAnalysisTask task = new HMSAnalysisTask();
-        task.tbl = tableIf;
-        AnalysisInfoBuilder analysisInfoBuilder = new AnalysisInfoBuilder();
-        analysisInfoBuilder.setJobType(AnalysisInfo.JobType.SYSTEM);
-        
analysisInfoBuilder.setAnalysisMethod(AnalysisInfo.AnalysisMethod.FULL);
-        task.info = analysisInfoBuilder.build();
-        TableSample tableSample = task.getTableSample();
-        Assertions.assertFalse(tableSample.isPercent());
-        Assertions.assertEquals(StatisticsUtil.getHugeTableSampleRows(), 
tableSample.getSampleValue());
-    }
-
     @Test
     public void testAutoSampleSmallTable(@Mocked HMSExternalTable tableIf)
             throws Exception {
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
index 71109f27c5f..7b4ce3dfca5 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
@@ -71,29 +71,6 @@ public class OlapAnalysisTaskTest {
         Assertions.assertFalse(tableSample.isPercent());
     }
 
-    // test auto big table
-    @Test
-    public void testSample2(@Mocked OlapTable tbl) {
-        new MockUp<OlapTable>() {
-
-            @Mock
-            public long getDataSize(boolean singleReplica) {
-                return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() + 1;
-            }
-        };
-
-        AnalysisInfoBuilder analysisInfoBuilder = new AnalysisInfoBuilder()
-                .setAnalysisMethod(AnalysisMethod.FULL);
-        analysisInfoBuilder.setJobType(JobType.SYSTEM);
-        OlapAnalysisTask olapAnalysisTask = new OlapAnalysisTask();
-        olapAnalysisTask.info = analysisInfoBuilder.build();
-        olapAnalysisTask.tbl = tbl;
-        TableSample tableSample = olapAnalysisTask.getTableSample();
-        Assertions.assertNotNull(tableSample);
-        Assertions.assertEquals(StatisticsUtil.getHugeTableSampleRows(), 
tableSample.getSampleValue());
-
-    }
-
     // test auto small table
     @Test
     public void testSample3(@Mocked OlapTable tbl) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to