This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit db9725bc5cedb0f1672e835dfdfe037ef78fb8ef Author: Jibing-Li <[email protected]> AuthorDate: Mon May 27 12:17:33 2024 +0800 Support drop partition statistics. (#35374) --- fe/fe-core/src/main/cup/sql_parser.cup | 4 +- .../org/apache/doris/analysis/DropStatsStmt.java | 8 +-- .../apache/doris/statistics/AnalysisManager.java | 16 +++--- .../apache/doris/statistics/BaseAnalysisTask.java | 6 -- .../doris/statistics/InvalidateStatsTarget.java | 8 +-- .../doris/statistics/StatisticsRepository.java | 66 +++++++++++++--------- .../doris/statistics/HMSAnalysisTaskTest.java | 20 ------- .../doris/statistics/OlapAnalysisTaskTest.java | 23 -------- 8 files changed, 53 insertions(+), 98 deletions(-) diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 087047b36c3..ff03f7958c2 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -3265,9 +3265,9 @@ drop_stmt ::= RESULT = new DropPolicyStmt(PolicyTypeEnum.STORAGE, ifExists, policyName, null, null, null); :} /* statistics */ - | KW_DROP KW_STATS table_name:tbl opt_col_list:cols + | KW_DROP KW_STATS table_name:tbl opt_col_list:cols opt_partition_names:partitionNames {: - RESULT = new DropStatsStmt(tbl, cols); + RESULT = new DropStatsStmt(tbl, cols, partitionNames); :} | KW_DROP KW_EXPIRED KW_STATS {: diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStatsStmt.java index de17116d739..480dff2d94c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStatsStmt.java @@ -17,7 +17,6 @@ package org.apache.doris.analysis; -import org.apache.doris.catalog.Column; import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.TableIf; @@ -35,7 +34,6 @@ import org.apache.commons.lang3.StringUtils; import java.util.HashSet; import java.util.List; import java.util.Set; -import java.util.stream.Collectors; /** * Manually drop statistics for tables or partitions. @@ -51,6 +49,7 @@ public class DropStatsStmt extends DdlStmt { private final TableName tableName; private Set<String> columnNames; + private PartitionNames partitionNames; // Flag to drop external table row count in table_statistics. private boolean dropTableRowCount; private boolean isAllColumns; @@ -63,12 +62,14 @@ public class DropStatsStmt extends DdlStmt { this.dropExpired = dropExpired; this.tableName = null; this.columnNames = null; + this.partitionNames = null; this.dropTableRowCount = false; } public DropStatsStmt(TableName tableName, - List<String> columnNames) { + List<String> columnNames, PartitionNames partitionNames) { this.tableName = tableName; + this.partitionNames = partitionNames; if (columnNames != null) { this.columnNames = new HashSet<>(columnNames); this.dropTableRowCount = false; @@ -122,7 +123,6 @@ public class DropStatsStmt extends DdlStmt { } } else { isAllColumns = true; - columnNames = table.getSchemaAllIndexes(false).stream().map(Column::getName).collect(Collectors.toSet()); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index a77a68618d5..eb10fb09c7b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -655,8 +655,8 @@ public class AnalysisManager implements Writable { } invalidateLocalStats(catalogId, dbId, tblId, cols, tableStats); // Drop stats ddl is master only operation. - invalidateRemoteStats(catalogId, dbId, tblId, cols, dropStatsStmt.isAllColumns()); - StatisticsRepository.dropStatisticsByColNames(catalogId, dbId, tblId, cols); + invalidateRemoteStats(catalogId, dbId, tblId, cols); + StatisticsRepository.dropStatistics(catalogId, dbId, tblId, cols, null); } public void dropStats(TableIf table) throws DdlException { @@ -667,11 +667,10 @@ public class AnalysisManager implements Writable { long catalogId = table.getDatabase().getCatalog().getId(); long dbId = table.getDatabase().getId(); long tableId = table.getId(); - Set<String> cols = table.getSchemaAllIndexes(false).stream().map(Column::getName).collect(Collectors.toSet()); - invalidateLocalStats(catalogId, dbId, tableId, cols, tableStats); + invalidateLocalStats(catalogId, dbId, tableId, null, tableStats); // Drop stats ddl is master only operation. - invalidateRemoteStats(catalogId, dbId, tableId, cols, true); - StatisticsRepository.dropStatisticsByColNames(catalogId, dbId, table.getId(), cols); + invalidateRemoteStats(catalogId, dbId, tableId, null); + StatisticsRepository.dropStatistics(catalogId, dbId, table.getId(), null, null); } public void invalidateLocalStats(long catalogId, long dbId, long tableId, @@ -712,9 +711,8 @@ public class AnalysisManager implements Writable { tableStats.rowCount = table.getRowCount(); } - public void invalidateRemoteStats(long catalogId, long dbId, long tableId, - Set<String> columns, boolean isAllColumns) { - InvalidateStatsTarget target = new InvalidateStatsTarget(catalogId, dbId, tableId, columns, isAllColumns); + public void invalidateRemoteStats(long catalogId, long dbId, long tableId, Set<String> columns) { + InvalidateStatsTarget target = new InvalidateStatsTarget(catalogId, dbId, tableId, columns); TInvalidateFollowerStatsCacheRequest request = new TInvalidateFollowerStatsCacheRequest(); request.key = GsonUtils.GSON.toJson(target); StatisticsCache statisticsCache = Env.getCurrentEnv().getStatisticsCache(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java index ef77701bfac..c1bb1e574cf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java @@ -31,9 +31,7 @@ import org.apache.doris.qe.AuditLogHelper; import org.apache.doris.qe.AutoCloseConnectContext; import org.apache.doris.qe.QueryState; import org.apache.doris.qe.StmtExecutor; -import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod; import org.apache.doris.statistics.AnalysisInfo.AnalysisType; -import org.apache.doris.statistics.AnalysisInfo.JobType; import org.apache.doris.statistics.util.DBObjects; import org.apache.doris.statistics.util.StatisticsUtil; @@ -331,10 +329,6 @@ public abstract class BaseAnalysisTask { return new TableSample(true, (long) info.samplePercent); } else if (info.sampleRows > 0) { return new TableSample(false, info.sampleRows); - } else if (info.jobType.equals(JobType.SYSTEM) && info.analysisMethod == AnalysisMethod.FULL - && tbl.getDataSize(true) > StatisticsUtil.getHugeTableLowerBoundSizeInBytes()) { - // If user doesn't specify sample percent/rows, use auto sample and update sample rows in analysis info. - return new TableSample(false, StatisticsUtil.getHugeTableSampleRows()); } else { return null; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/InvalidateStatsTarget.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/InvalidateStatsTarget.java index e49048f8946..0108edd8228 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/InvalidateStatsTarget.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/InvalidateStatsTarget.java @@ -35,14 +35,10 @@ public class InvalidateStatsTarget { @SerializedName("columns") public final Set<String> columns; - public InvalidateStatsTarget(long catalogId, long dbId, long tableId, Set<String> columns, boolean isAllColumns) { + public InvalidateStatsTarget(long catalogId, long dbId, long tableId, Set<String> columns) { this.catalogId = catalogId; this.dbId = dbId; this.tableId = tableId; - if (isAllColumns) { - this.columns = null; - } else { - this.columns = columns; - } + this.columns = columns; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java index dee9cb042fe..b6ea5bf3947 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java @@ -81,8 +81,19 @@ public class StatisticsRepository { + FULL_QUALIFIED_COLUMN_STATISTICS_NAME + " VALUES('${id}', ${catalogId}, ${dbId}, ${tblId}, '${idxId}'," + "'${colId}', ${partId}, ${count}, ${ndv}, ${nullCount}, ${min}, ${max}, ${dataSize}, NOW())"; - private static final String DELETE_TABLE_STATISTICS_TEMPLATE = "DELETE FROM " + FeConstants.INTERNAL_DB_NAME - + "." + "${tblName}" + " WHERE ${condition} AND `catalog_id` = '${catalogId}' AND `db_id` = '${dbId}'"; + private static final String DELETE_TABLE_STATISTICS_BY_COLUMN_TEMPLATE = "DELETE FROM " + + FeConstants.INTERNAL_DB_NAME + "." + StatisticConstants.TABLE_STATISTIC_TBL_NAME + + " WHERE `catalog_id` = '${catalogId}' AND `db_id` = '${dbId}' AND `tbl_id` = '${tblId}'" + + "${columnCondition}"; + + private static final String DELETE_TABLE_STATISTICS_ALL_COLUMN_TEMPLATE = "DELETE FROM " + + FeConstants.INTERNAL_DB_NAME + "." + StatisticConstants.TABLE_STATISTIC_TBL_NAME + + " WHERE `catalog_id` = '${catalogId}' AND `db_id` = '${dbId}' AND `tbl_id` = '${tblId}'"; + + private static final String DELETE_PARTITION_STATISTICS_TEMPLATE = "DELETE FROM " + + FeConstants.INTERNAL_DB_NAME + "." + StatisticConstants.PARTITION_STATISTIC_TBL_NAME + + " WHERE `catalog_id` = '${catalogId}' AND `db_id` = '${dbId}' AND `tbl_id` = '${tblId}'" + + " ${columnCondition} ${partitionCondition}"; private static final String FETCH_RECENT_STATS_UPDATED_COL = "SELECT * FROM " @@ -194,24 +205,18 @@ public class StatisticsRepository { return stringJoiner.toString(); } - public static void dropStatisticsByPartIds(long ctlId, long dbId, long tblId, Set<String> partIds) - throws DdlException { - dropStatisticsByPartId(ctlId, dbId, tblId, partIds, StatisticConstants.TABLE_STATISTIC_TBL_NAME); - } - - public static void dropStatisticsByColNames( - long ctlId, long dbId, long tblId, Set<String> colNames) throws DdlException { + public static void dropStatistics( + long ctlId, long dbId, long tblId, Set<String> colNames, Set<String> partNames) throws DdlException { if (colNames == null) { - return; + executeDropStatisticsAllColumnSql(ctlId, dbId, tblId); + } else { + dropStatisticsByColName(ctlId, dbId, tblId, colNames); } - dropStatisticsByColName(ctlId, dbId, tblId, colNames, StatisticConstants.TABLE_STATISTIC_TBL_NAME); } - private static void dropStatisticsByColName( - long ctlId, long dbId, long tblId, Set<String> colNames, String statsTblName) + private static void dropStatisticsByColName(long ctlId, long dbId, long tblId, Set<String> colNames) throws DdlException { Map<String, String> params = new HashMap<>(); - params.put("tblName", statsTblName); Iterator<String> iterator = colNames.iterator(); int columnCount = 0; StringBuilder inPredicate = new StringBuilder(); @@ -222,43 +227,48 @@ public class StatisticsRepository { inPredicate.append(","); columnCount++; if (columnCount == Config.max_allowed_in_element_num_of_delete) { - executeDropSql(inPredicate, ctlId, dbId, tblId, params); + executeDropStatisticsByColumnSql(inPredicate, ctlId, dbId, tblId, params); columnCount = 0; inPredicate.setLength(0); } } if (inPredicate.length() > 0) { - executeDropSql(inPredicate, ctlId, dbId, tblId, params); + executeDropStatisticsByColumnSql(inPredicate, ctlId, dbId, tblId, params); } } - private static void executeDropSql( + private static void executeDropStatisticsByColumnSql( StringBuilder inPredicate, long ctlId, long dbId, long tblId, Map<String, String> params) throws DdlException { + generateCtlDbIdParams(ctlId, dbId, params); + params.put("tblId", String.valueOf(tblId)); if (inPredicate.length() > 0) { inPredicate.delete(inPredicate.length() - 1, inPredicate.length()); } - String predicate = String.format("tbl_id = '%s' AND %s IN (%s)", tblId, "col_id", inPredicate); - params.put("condition", predicate); - generateCtlDbIdParams(ctlId, dbId, params); + String predicate = String.format("AND %s IN (%s)", "col_id", inPredicate); + params.put("columnCondition", predicate); + params.put("partitionCondition", ""); try { - StatisticsUtil.execUpdate(new StringSubstitutor(params).replace(DELETE_TABLE_STATISTICS_TEMPLATE)); + StatisticsUtil.execUpdate( + new StringSubstitutor(params).replace(DELETE_TABLE_STATISTICS_BY_COLUMN_TEMPLATE)); + StatisticsUtil.execUpdate( + new StringSubstitutor(params).replace(DELETE_PARTITION_STATISTICS_TEMPLATE)); } catch (Exception e) { throw new DdlException(e.getMessage(), e); } } - private static void dropStatisticsByPartId( - long ctlId, long dbId, long tblId, Set<String> partIds, String statsTblName) throws DdlException { + private static void executeDropStatisticsAllColumnSql(long ctlId, long dbId, long tblId) throws DdlException { Map<String, String> params = new HashMap<>(); - String right = StatisticsUtil.joinElementsToString(partIds, ","); - String inPredicate = String.format(" part_id IN (%s)", right); - params.put("tblName", statsTblName); - params.put("condition", inPredicate); generateCtlDbIdParams(ctlId, dbId, params); params.put("tblId", String.valueOf(tblId)); + params.put("columnCondition", ""); + params.put("partitionCondition", ""); try { - StatisticsUtil.execUpdate(new StringSubstitutor(params).replace(DELETE_TABLE_STATISTICS_TEMPLATE)); + StatisticsUtil.execUpdate( + new StringSubstitutor(params).replace(DELETE_TABLE_STATISTICS_ALL_COLUMN_TEMPLATE)); + StatisticsUtil.execUpdate( + new StringSubstitutor(params).replace(DELETE_PARTITION_STATISTICS_TEMPLATE)); } catch (Exception e) { throw new DdlException(e.getMessage(), e); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java index 2a47f659b7d..3a9cf558cc5 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java @@ -67,26 +67,6 @@ public class HMSAnalysisTaskTest { Assertions.assertFalse(task.needLimit(2L * 1024 * 1024 * 1024, 5.0)); } - @Test - public void testAutoSampleHugeTable(@Mocked HMSExternalTable tableIf) - throws Exception { - new MockUp<HMSExternalTable>() { - @Mock - public long getDataSize(boolean singleReplica) { - return 6L * 1024 * 1024 * 1024; - } - }; - HMSAnalysisTask task = new HMSAnalysisTask(); - task.tbl = tableIf; - AnalysisInfoBuilder analysisInfoBuilder = new AnalysisInfoBuilder(); - analysisInfoBuilder.setJobType(AnalysisInfo.JobType.SYSTEM); - analysisInfoBuilder.setAnalysisMethod(AnalysisInfo.AnalysisMethod.FULL); - task.info = analysisInfoBuilder.build(); - TableSample tableSample = task.getTableSample(); - Assertions.assertFalse(tableSample.isPercent()); - Assertions.assertEquals(StatisticsUtil.getHugeTableSampleRows(), tableSample.getSampleValue()); - } - @Test public void testAutoSampleSmallTable(@Mocked HMSExternalTable tableIf) throws Exception { diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java index 71109f27c5f..7b4ce3dfca5 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java @@ -71,29 +71,6 @@ public class OlapAnalysisTaskTest { Assertions.assertFalse(tableSample.isPercent()); } - // test auto big table - @Test - public void testSample2(@Mocked OlapTable tbl) { - new MockUp<OlapTable>() { - - @Mock - public long getDataSize(boolean singleReplica) { - return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() + 1; - } - }; - - AnalysisInfoBuilder analysisInfoBuilder = new AnalysisInfoBuilder() - .setAnalysisMethod(AnalysisMethod.FULL); - analysisInfoBuilder.setJobType(JobType.SYSTEM); - OlapAnalysisTask olapAnalysisTask = new OlapAnalysisTask(); - olapAnalysisTask.info = analysisInfoBuilder.build(); - olapAnalysisTask.tbl = tbl; - TableSample tableSample = olapAnalysisTask.getTableSample(); - Assertions.assertNotNull(tableSample); - Assertions.assertEquals(StatisticsUtil.getHugeTableSampleRows(), tableSample.getSampleValue()); - - } - // test auto small table @Test public void testSample3(@Mocked OlapTable tbl) { --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
