This is an automated email from the ASF dual-hosted git repository. lijibing pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 4ba219f0470 [feature](statistics)Remove not exist partition stats while analyzing. (#35814) 4ba219f0470 is described below commit 4ba219f0470877ca57d61763271a58afa76ebcec Author: Jibing-Li <64681310+jibing...@users.noreply.github.com> AuthorDate: Tue Jun 4 22:11:20 2024 +0800 [feature](statistics)Remove not exist partition stats while analyzing. (#35814) ## Proposed changes Remove not exist partition stats while analyzing. <!--Describe your changes.--> When doing full analyze, we need to drop not existing partition's statistics before analyzing. Because user may drop partition at any time. If we don't drop expired partition stats, we may get wrong table level stats by merging all partition stats. --- .../apache/doris/analysis/ShowTableStatsStmt.java | 2 +- .../org/apache/doris/catalog/InternalSchema.java | 4 +- .../doris/catalog/InternalSchemaInitializer.java | 2 +- .../apache/doris/datasource/InternalCatalog.java | 11 ++++ .../apache/doris/statistics/AnalysisManager.java | 8 +-- .../apache/doris/statistics/BaseAnalysisTask.java | 15 +++-- .../doris/statistics/ExternalAnalysisTask.java | 5 ++ .../apache/doris/statistics/HMSAnalysisTask.java | 21 +++++++ .../org/apache/doris/statistics/HistogramTask.java | 5 ++ .../apache/doris/statistics/OlapAnalysisTask.java | 42 +++++++++++++ .../doris/statistics/StatisticsAutoCollector.java | 2 +- .../doris/statistics/StatisticsRepository.java | 27 ++++++-- .../apache/doris/statistics/TableStatsMeta.java | 6 +- .../doris/statistics/util/StatisticsUtil.java | 2 +- .../doris/statistics/util/StatisticsUtilTest.java | 14 ++--- .../suites/statistics/test_partition_stats.groovy | 72 ++++++++++++++++++++++ 16 files changed, 209 insertions(+), 29 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java index 3b647c8e9fa..9d025695c7b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java @@ -177,7 +177,7 @@ public class ShowTableStatsStmt extends ShowStmt { row.add(formattedDateTime); row.add(tableStatistic.analyzeColumns().toString()); row.add(tableStatistic.jobType.toString()); - row.add(String.valueOf(tableStatistic.newPartitionLoaded.get())); + row.add(String.valueOf(tableStatistic.partitionChanged.get())); row.add(String.valueOf(tableStatistic.userInjected)); result.add(row); return new ShowResultSet(getMetaData(), result); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchema.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchema.java index 74c2897bdc4..a2618db2ff4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchema.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchema.java @@ -78,7 +78,9 @@ public class InternalSchema { ColumnNullableType.NOT_NULLABLE)); PARTITION_STATS_SCHEMA.add(new ColumnDef("idx_id", TypeDef.createVarchar(StatisticConstants.MAX_NAME_LEN), ColumnNullableType.NOT_NULLABLE)); - PARTITION_STATS_SCHEMA.add(new ColumnDef("part_id", TypeDef.createVarchar(StatisticConstants.MAX_NAME_LEN), + PARTITION_STATS_SCHEMA.add(new ColumnDef("part_name", TypeDef.createVarchar(StatisticConstants.MAX_NAME_LEN), + ColumnNullableType.NOT_NULLABLE)); + PARTITION_STATS_SCHEMA.add(new ColumnDef("part_id", TypeDef.create(PrimitiveType.BIGINT), ColumnNullableType.NOT_NULLABLE)); PARTITION_STATS_SCHEMA.add(new ColumnDef("col_id", TypeDef.createVarchar(StatisticConstants.MAX_NAME_LEN), ColumnNullableType.NOT_NULLABLE)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java index af9451d9de9..727cf89f339 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java @@ -169,7 +169,7 @@ public class InternalSchemaInitializer extends Thread { Lists.newArrayList("id", "catalog_id", "db_id", "tbl_id", "idx_id", "col_id", "part_id"))); Env.getCurrentEnv().getInternalCatalog().createTable( buildStatisticsTblStmt(StatisticConstants.PARTITION_STATISTIC_TBL_NAME, - Lists.newArrayList("catalog_id", "db_id", "tbl_id", "idx_id", "part_id", "col_id"))); + Lists.newArrayList("catalog_id", "db_id", "tbl_id", "idx_id", "part_name", "part_id", "col_id"))); // audit table Env.getCurrentEnv().getInternalCatalog().createTable(buildAuditTblStmt()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java index b52d09e42c5..dcad188d946 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java @@ -155,6 +155,7 @@ import org.apache.doris.persist.ReplicaPersistInfo; import org.apache.doris.persist.TruncateTableInfo; import org.apache.doris.qe.ConnectContext; import org.apache.doris.resource.Tag; +import org.apache.doris.statistics.TableStatsMeta; import org.apache.doris.system.Backend; import org.apache.doris.system.SystemInfoService; import org.apache.doris.task.AgentBatchTask; @@ -1901,6 +1902,11 @@ public class InternalCatalog implements CatalogIf<Database> { // but in order to avoid bugs affecting the original logic, all exceptions are caught LOG.warn("produceEvent failed: ", t); } + // Set new partition loaded flag for statistics. This will trigger auto analyzing to update dropped partition. + TableStatsMeta tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(olapTable.getId()); + if (tableStats != null && tableStats.partitionChanged != null) { + tableStats.partitionChanged.set(true); + } // log DropPartitionInfo info = new DropPartitionInfo(db.getId(), olapTable.getId(), partitionName, isTempPartition, clause.isForceDrop(), recycleTime, version, versionTime); @@ -1924,6 +1930,11 @@ public class InternalCatalog implements CatalogIf<Database> { } } olapTable.updateVisibleVersionAndTime(info.getVersion(), info.getVersionTime()); + // Replay set new partition loaded flag to true for auto analyze. + TableStatsMeta stats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(olapTable.getId()); + if (stats != null && stats.partitionChanged != null) { + stats.partitionChanged.set(true); + } } finally { olapTable.writeUnlock(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 542efc224a5..4b9c7129be6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -657,7 +657,7 @@ public class AnalysisManager implements Writable { invalidateLocalStats(catalogId, dbId, tblId, cols, tableStats); // Drop stats ddl is master only operation. invalidateRemoteStats(catalogId, dbId, tblId, cols); - StatisticsRepository.dropStatistics(catalogId, dbId, tblId, cols, null); + StatisticsRepository.dropStatistics(catalogId, dbId, tblId, cols); } public void dropStats(TableIf table) throws DdlException { @@ -671,7 +671,7 @@ public class AnalysisManager implements Writable { invalidateLocalStats(catalogId, dbId, tableId, null, tableStats); // Drop stats ddl is master only operation. invalidateRemoteStats(catalogId, dbId, tableId, null); - StatisticsRepository.dropStatistics(catalogId, dbId, table.getId(), null, null); + StatisticsRepository.dropStatistics(catalogId, dbId, table.getId(), null); } public void invalidateLocalStats(long catalogId, long dbId, long tableId, @@ -1054,7 +1054,7 @@ public class AnalysisManager implements Writable { for (long tableId : tableIds) { TableStatsMeta statsStatus = idToTblStats.get(tableId); if (statsStatus != null) { - statsStatus.newPartitionLoaded.set(true); + statsStatus.partitionChanged.set(true); } } logNewPartitionLoadedEvent(new NewPartitionLoadedEvent(tableIds)); @@ -1189,7 +1189,7 @@ public class AnalysisManager implements Writable { for (long tableId : event.getTableIds()) { TableStatsMeta statsStatus = idToTblStats.get(tableId); if (statsStatus != null) { - statsStatus.newPartitionLoaded.set(true); + statsStatus.partitionChanged.set(true); } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java index c1bb1e574cf..466d1ff6947 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java @@ -25,6 +25,7 @@ import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.TableIf; import org.apache.doris.catalog.Type; +import org.apache.doris.common.DdlException; import org.apache.doris.common.util.DebugUtil; import org.apache.doris.datasource.CatalogIf; import org.apache.doris.qe.AuditLogHelper; @@ -162,6 +163,7 @@ public abstract class BaseAnalysisTask { + "${dbId} AS `db_id`, " + "${tblId} AS `tbl_id`, " + "${idxId} AS `idx_id`, " + + "${partName} AS `part_name`, " + "${partId} AS `part_id`, " + "'${colId}' AS `col_id`, " + "COUNT(1) AS `row_count`, " @@ -346,11 +348,13 @@ public abstract class BaseAnalysisTask { } /** - * 1. Get stats of each partition - * 2. insert partition in batch - * 3. calculate column stats based on partition stats + * 1. Remove not exist partition stats + * 2. Get stats of each partition + * 3. insert partition in batch + * 4. calculate column stats based on partition stats */ protected void doPartitionTable() throws Exception { + deleteNotExistPartitionStats(); Map<String, String> params = buildSqlParams(); params.put("dataSizeFunction", getDataSizeFunction(col, false)); Set<String> partitionNames = tbl.getPartitionNames(); @@ -359,6 +363,7 @@ public abstract class BaseAnalysisTask { TableStatsMeta tableStatsStatus = Env.getServingEnv().getAnalysisManager().findTableStatsStatus(tbl.getId()); for (String part : partitionNames) { Partition partition = tbl.getPartition(part); + params.put("partId", partition == null ? "-1" : String.valueOf(partition.getId())); // Skip partitions that not changed after last analyze. // External table getPartition always return null. So external table doesn't skip any partitions. if (partition != null && tableStatsStatus != null && tableStatsStatus.partitionUpdateRows != null) { @@ -375,7 +380,7 @@ public abstract class BaseAnalysisTask { } } } - params.put("partId", "'" + StatisticsUtil.escapeColumnName(part) + "'"); + params.put("partName", "'" + StatisticsUtil.escapeColumnName(part) + "'"); params.put("partitionInfo", getPartitionInfo(part)); StringSubstitutor stringSubstitutor = new StringSubstitutor(params); sqls.add(stringSubstitutor.replace(PARTITION_ANALYZE_TEMPLATE)); @@ -400,6 +405,8 @@ public abstract class BaseAnalysisTask { runQuery(stringSubstitutor.replace(MERGE_PARTITION_TEMPLATE)); } + protected abstract void deleteNotExistPartitionStats() throws DdlException; + protected String getPartitionInfo(String partitionName) { return ""; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java index 8452781cd60..1f3e8782923 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java @@ -17,6 +17,7 @@ package org.apache.doris.statistics; +import org.apache.doris.common.DdlException; import org.apache.doris.common.FeConstants; import org.apache.doris.common.NotImplementedException; import org.apache.doris.datasource.ExternalTable; @@ -51,6 +52,10 @@ public class ExternalAnalysisTask extends BaseAnalysisTask { } } + @Override + protected void deleteNotExistPartitionStats() throws DdlException { + } + protected void doFull() throws Exception { StringBuilder sb = new StringBuilder(); Map<String, String> params = buildSqlParams(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java index e368d4b5051..61a29ef03a5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java @@ -18,7 +18,10 @@ package org.apache.doris.statistics; import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.OlapTable; import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.DdlException; import org.apache.doris.common.Pair; import org.apache.doris.datasource.ExternalTable; import org.apache.doris.datasource.hive.HMSExternalTable; @@ -273,6 +276,24 @@ public class HMSAnalysisTask extends ExternalAnalysisTask { } } + @Override + protected void deleteNotExistPartitionStats() throws DdlException { + TableStatsMeta tableStats = Env.getServingEnv().getAnalysisManager().findTableStatsStatus(tbl.getId()); + if (tableStats == null) { + return; + } + OlapTable table = (OlapTable) tbl; + String indexName = info.indexId == -1 ? table.getName() : table.getIndexNameById(info.indexId); + ColStatsMeta columnStats = tableStats.findColumnStatsMeta(indexName, info.colName); + if (columnStats == null) { + return; + } + // For external table, simply remove all partition stats for the given column and re-analyze it again. + String columnCondition = "AND col_id = " + StatisticsUtil.quote(col.getName()); + StatisticsRepository.dropPartitionsColumnStatistics(info.catalogId, info.dbId, info.tblId, + columnCondition, ""); + } + @Override protected String getPartitionInfo(String partitionName) { // partitionName is like "date=20230101" for one level partition diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java index 00c92325e19..c97747e7d67 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java @@ -18,6 +18,7 @@ package org.apache.doris.statistics; import org.apache.doris.catalog.Env; +import org.apache.doris.common.DdlException; import org.apache.doris.common.FeConstants; import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod; import org.apache.doris.statistics.util.StatisticsUtil; @@ -73,6 +74,10 @@ public class HistogramTask extends BaseAnalysisTask { tbl.getDatabase().getCatalog().getId(), tbl.getDatabase().getId(), tbl.getId(), -1, col.getName()); } + @Override + protected void deleteNotExistPartitionStats() throws DdlException { + } + private String getSampleRateFunction() { if (info.analysisMethod == AnalysisMethod.FULL) { return "0"; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java index edda97a1eed..5f09893ba1f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java @@ -19,11 +19,14 @@ package org.apache.doris.statistics; import org.apache.doris.analysis.CreateMaterializedViewStmt; import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Env; import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.MaterializedIndex; import org.apache.doris.catalog.MaterializedIndexMeta; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; +import org.apache.doris.common.Config; +import org.apache.doris.common.DdlException; import org.apache.doris.common.FeConstants; import org.apache.doris.common.Pair; import org.apache.doris.common.util.DebugUtil; @@ -33,6 +36,8 @@ import org.apache.doris.statistics.AnalysisInfo.JobType; import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Joiner; +import com.google.common.collect.Sets; import org.apache.commons.text.StringSubstitutor; import java.security.SecureRandom; @@ -201,6 +206,43 @@ public class OlapAnalysisTask extends BaseAnalysisTask { } } + @Override + protected void deleteNotExistPartitionStats() throws DdlException { + TableStatsMeta tableStats = Env.getServingEnv().getAnalysisManager().findTableStatsStatus(tbl.getId()); + // When a partition was dropped, newPartitionLoaded will set to true. + // So we don't need to check dropped partition if newPartitionLoaded is false. + if (tableStats == null || !tableStats.partitionChanged.get()) { + return; + } + OlapTable table = (OlapTable) tbl; + String indexName = info.indexId == -1 ? table.getName() : table.getIndexNameById(info.indexId); + ColStatsMeta columnStats = tableStats.findColumnStatsMeta(indexName, info.colName); + if (columnStats == null || columnStats.partitionUpdateRows == null + || columnStats.partitionUpdateRows.isEmpty()) { + return; + } + Set<Long> expiredPartition = Sets.newHashSet(); + String columnCondition = "AND col_id = " + StatisticsUtil.quote(col.getName()); + for (long partId : columnStats.partitionUpdateRows.keySet()) { + Partition partition = table.getPartition(partId); + if (partition == null) { + columnStats.partitionUpdateRows.remove(partId); + expiredPartition.add(partId); + if (expiredPartition.size() == Config.max_allowed_in_element_num_of_delete) { + String partitionCondition = " AND part_id in (" + Joiner.on(", ").join(expiredPartition) + ")"; + StatisticsRepository.dropPartitionsColumnStatistics(info.catalogId, info.dbId, info.tblId, + columnCondition, partitionCondition); + expiredPartition.clear(); + } + } + } + if (expiredPartition.size() > 0) { + String partitionCondition = " AND part_id in (" + Joiner.on(", ").join(expiredPartition) + ")"; + StatisticsRepository.dropPartitionsColumnStatistics(info.catalogId, info.dbId, info.tblId, + columnCondition, partitionCondition); + } + } + @Override protected String getPartitionInfo(String partitionName) { return "partition " + partitionName; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index ee86379d7a8..ba2262ea547 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -153,7 +153,7 @@ public class StatisticsAutoCollector extends MasterDaemon { } AnalysisManager manager = Env.getServingEnv().getAnalysisManager(); TableStatsMeta tableStatsStatus = manager.findTableStatsStatus(table.getId()); - if (tableStatsStatus != null && tableStatsStatus.newPartitionLoaded.get()) { + if (tableStatsStatus != null && tableStatsStatus.partitionChanged.get()) { OlapTable olapTable = (OlapTable) table; columns.addAll(olapTable.getColumnIndexPairs(olapTable.getPartitionColumnNames())); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java index 752067d4808..02b22504d3d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java @@ -69,13 +69,13 @@ public class StatisticsRepository { + "`col_id`, `count`, hll_to_base64(`ndv`) as ndv, `null_count`, `min`, `max`, `data_size_in_bytes`, " + "`update_time` FROM " + FULL_QUALIFIED_PARTITION_STATISTICS_NAME + " WHERE `catalog_id` = '${catalogId}' AND `db_id` = '${dbId}' AND `tbl_id` = ${tableId}" - + " AND `idx_id` = '${indexId}' AND `part_id` = '${partId}' AND `col_id` = '${columnId}'"; + + " AND `idx_id` = '${indexId}' AND `part_name` = '${partName}' AND `col_id` = '${columnId}'"; - private static final String FETCH_PARTITIONS_STATISTIC_TEMPLATE = "SELECT col_id, part_id, idx_id, count, " + private static final String FETCH_PARTITIONS_STATISTIC_TEMPLATE = "SELECT col_id, part_name, idx_id, count, " + "hll_cardinality(ndv) as ndv, null_count, min, max, data_size_in_bytes, update_time FROM " + FULL_QUALIFIED_PARTITION_STATISTICS_NAME + " WHERE `catalog_id` = '${catalogId}' AND `db_id` = '${dbId}' AND `tbl_id` = ${tableId}" - + " AND `part_id` in (${partitionInfo}) AND `col_id` in (${columnInfo})"; + + " AND `part_name` in (${partitionInfo}) AND `col_id` in (${columnInfo})"; private static final String FETCH_COLUMN_HISTOGRAM_TEMPLATE = "SELECT * FROM " + FULL_QUALIFIED_COLUMN_HISTOGRAM_NAME @@ -113,7 +113,7 @@ public class StatisticsRepository { + "LIMIT ${limit} OFFSET ${offset}"; private static final String FETCH_PARTITION_STATS_FULL_NAME = - "SELECT \"\" as id, catalog_id, db_id, tbl_id, idx_id, col_id, part_id FROM " + "SELECT \"\" as id, catalog_id, db_id, tbl_id, idx_id, col_id, part_name FROM " + FeConstants.INTERNAL_DB_NAME + "." + StatisticConstants.PARTITION_STATISTIC_TBL_NAME + " ORDER BY update_time " + "LIMIT ${limit} OFFSET ${offset}"; @@ -206,7 +206,7 @@ public class StatisticsRepository { } public static void dropStatistics( - long ctlId, long dbId, long tblId, Set<String> colNames, Set<String> partNames) throws DdlException { + long ctlId, long dbId, long tblId, Set<String> colNames) throws DdlException { if (colNames == null) { executeDropStatisticsAllColumnSql(ctlId, dbId, tblId); } else { @@ -214,6 +214,21 @@ public class StatisticsRepository { } } + public static void dropPartitionsColumnStatistics(long ctlId, long dbId, long tblId, + String columnCondition, String partitionCondition) throws DdlException { + Map<String, String> params = new HashMap<>(); + generateCtlDbIdParams(ctlId, dbId, params); + params.put("tblId", String.valueOf(tblId)); + params.put("columnCondition", columnCondition); + params.put("partitionCondition", partitionCondition); + try { + StatisticsUtil.execUpdate( + new StringSubstitutor(params).replace(DELETE_PARTITION_STATISTICS_TEMPLATE)); + } catch (Exception e) { + throw new DdlException(e.getMessage(), e); + } + } + private static void dropStatisticsByColName(long ctlId, long dbId, long tblId, Set<String> colNames) throws DdlException { Map<String, String> params = new HashMap<>(); @@ -389,7 +404,7 @@ public class StatisticsRepository { generateCtlDbIdParams(ctlId, dbId, params); params.put("tableId", String.valueOf(tableId)); params.put("indexId", String.valueOf(idxId)); - params.put("partId", partName); + params.put("partName", partName); params.put("columnId", colName); return StatisticsUtil.execStatisticQuery(new StringSubstitutor(params) .replace(FETCH_PARTITION_STATISTIC_TEMPLATE)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java index cd1d010f262..7c2a01c7e11 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java @@ -74,7 +74,7 @@ public class TableStatsMeta implements Writable { public JobType jobType; @SerializedName("newPartitionLoaded") - public AtomicBoolean newPartitionLoaded = new AtomicBoolean(false); + public AtomicBoolean partitionChanged = new AtomicBoolean(false); @SerializedName("userInjected") public boolean userInjected; @@ -163,14 +163,14 @@ public class TableStatsMeta implements Writable { tableIf.getSchemaAllIndexes(false).stream() .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) .map(Column::getName).collect(Collectors.toSet())))) { - newPartitionLoaded.set(false); + partitionChanged.set(false); userInjected = false; } else if (tableIf instanceof OlapTable) { PartitionInfo partitionInfo = ((OlapTable) tableIf).getPartitionInfo(); if (partitionInfo != null && analyzedJob.jobColumns .containsAll(tableIf.getColumnIndexPairs(partitionInfo.getPartitionColumns().stream() .map(Column::getName).collect(Collectors.toSet())))) { - newPartitionLoaded.set(false); + partitionChanged.set(false); } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index b908ecf2019..1c6457e9521 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -990,7 +990,7 @@ public class StatisticsUtil { if (table instanceof OlapTable) { OlapTable olapTable = (OlapTable) table; // 0. Check new partition first time loaded flag. - if (olapTable.isPartitionColumn(column.second) && tableStatsStatus.newPartitionLoaded.get()) { + if (olapTable.isPartitionColumn(column.second) && tableStatsStatus.partitionChanged.get()) { return true; } // 1. Check row count. diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java index 49e7751f264..ebe86ce9d61 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java @@ -221,7 +221,7 @@ class StatisticsUtilTest { return true; } }; - tableMeta.newPartitionLoaded.set(true); + tableMeta.partitionChanged.set(true); Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName()))); // Test empty table to non-empty table. @@ -231,7 +231,7 @@ class StatisticsUtilTest { return 100; } }; - tableMeta.newPartitionLoaded.set(false); + tableMeta.partitionChanged.set(false); Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName()))); // Test non-empty table to empty table. @@ -247,7 +247,7 @@ class StatisticsUtilTest { return new ColStatsMeta(0, null, null, null, 0, 100, 0, null); } }; - tableMeta.newPartitionLoaded.set(false); + tableMeta.partitionChanged.set(false); Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName()))); // Test table still empty. @@ -257,7 +257,7 @@ class StatisticsUtilTest { return new ColStatsMeta(0, null, null, null, 0, 0, 0, null); } }; - tableMeta.newPartitionLoaded.set(false); + tableMeta.partitionChanged.set(false); Assertions.assertFalse(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName()))); // Test row count changed more than threshold. @@ -273,7 +273,7 @@ class StatisticsUtilTest { return new ColStatsMeta(0, null, null, null, 0, 500, 0, null); } }; - tableMeta.newPartitionLoaded.set(false); + tableMeta.partitionChanged.set(false); Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName()))); // Test update rows changed more than threshold. @@ -289,12 +289,12 @@ class StatisticsUtilTest { return new ColStatsMeta(0, null, null, null, 0, 100, 80, null); } }; - tableMeta.newPartitionLoaded.set(false); + tableMeta.partitionChanged.set(false); tableMeta.updatedRows.set(200); Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName()))); // Test update rows changed less than threshold - tableMeta.newPartitionLoaded.set(false); + tableMeta.partitionChanged.set(false); tableMeta.updatedRows.set(100); Assertions.assertFalse(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName()))); diff --git a/regression-test/suites/statistics/test_partition_stats.groovy b/regression-test/suites/statistics/test_partition_stats.groovy index c07a8ce8461..ae780c3e221 100644 --- a/regression-test/suites/statistics/test_partition_stats.groovy +++ b/regression-test/suites/statistics/test_partition_stats.groovy @@ -145,6 +145,7 @@ suite("test_partition_stats") { sql """Insert into part1 values (1, 1, 1, 1, 1, 1, 1.1, 1.1, 1.1), (2, 2, 2, 2, 2, 2, 2.2, 2.2, 2.2), (3, 3, 3, 3, 3, 3, 3.3, 3.3, 3.3),(4, 4, 4, 4, 4, 4, 4.4, 4.4, 4.4),(5, 5, 5, 5, 5, 5, 5.5, 5.5, 5.5),(6, 6, 6, 6, 6, 6, 6.6, 6.6, 6.6),(10001, 10001, 10001, 10001, 10001, 10001, 10001.10001, 10001.10001, 10001.10001),(10002, 10002, 10002, 10002, 10002, 10002, 10002.10002, 10002.10002, 10002.10002),(10003, 10003, 10003, 10003, 10003, 10003, 10003.10003, 10003.10003, 10003.10003),(100 [...] sql """analyze table part1 with sync;""" + // Test drop expired stats. def dbId def tblIdPart def tblIdPart1 @@ -181,5 +182,76 @@ suite("test_partition_stats") { result = sql """select * from internal.__internal_schema.partition_statistics where tbl_id = ${tblIdPart1}""" assertEquals(0, result.size()) + // Test analyze table after drop partition + sql """drop database if exists test_partition_stats""" + sql """create database test_partition_stats""" + sql """use test_partition_stats""" + sql """CREATE TABLE `part` ( + `id` INT NULL, + `colint` INT NULL, + `coltinyint` tinyint NULL, + `colsmallint` smallINT NULL, + `colbigint` bigINT NULL, + `collargeint` largeINT NULL, + `colfloat` float NULL, + `coldouble` double NULL, + `coldecimal` decimal(27, 9) NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + PARTITION BY RANGE(`id`) + ( + PARTITION p1 VALUES [("-2147483648"), ("10000")), + PARTITION p2 VALUES [("10000"), ("20000")), + PARTITION p3 VALUES [("20000"), ("30000")) + ) + DISTRIBUTED BY HASH(`id`) BUCKETS 3 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ) + """ + sql """analyze table part with sync;""" + sql """Insert into part values (1, 1, 1, 1, 1, 1, 1.1, 1.1, 1.1), (2, 2, 2, 2, 2, 2, 2.2, 2.2, 2.2), (3, 3, 3, 3, 3, 3, 3.3, 3.3, 3.3),(4, 4, 4, 4, 4, 4, 4.4, 4.4, 4.4),(5, 5, 5, 5, 5, 5, 5.5, 5.5, 5.5),(6, 6, 6, 6, 6, 6, 6.6, 6.6, 6.6),(10001, 10001, 10001, 10001, 10001, 10001, 10001.10001, 10001.10001, 10001.10001),(10002, 10002, 10002, 10002, 10002, 10002, 10002.10002, 10002.10002, 10002.10002),(10003, 10003, 10003, 10003, 10003, 10003, 10003.10003, 10003.10003, 10003.10003),(1000 [...] + sql """analyze table part with sync;""" + result = sql """show column stats part""" + assertEquals(9, result.size()) + assertEquals("18.0", result[0][2]) + assertEquals("18.0", result[1][2]) + assertEquals("18.0", result[2][2]) + assertEquals("18.0", result[3][2]) + assertEquals("18.0", result[4][2]) + assertEquals("18.0", result[5][2]) + assertEquals("18.0", result[6][2]) + assertEquals("18.0", result[7][2]) + assertEquals("18.0", result[8][2]) + result = sql """show column stats part partition(*)""" + assertEquals(27, result.size()) + sql """alter table part drop partition p3""" + result = sql """show table stats part""" + assertEquals("true", result[0][6]) + sql """analyze table part with sync;""" + result = sql """show table stats part""" + assertEquals("false", result[0][6]) + result = sql """show column stats part partition(*)""" + assertEquals(18, result.size()) + result = sql """show column stats part partition(p3)""" + assertEquals(0, result.size()) + result = sql """show column stats part partition(p1)""" + assertEquals(9, result.size()) + result = sql """show column stats part partition(p2)""" + assertEquals(9, result.size()) + result = sql """show column stats part""" + assertEquals(9, result.size()) + assertEquals("12.0", result[0][2]) + assertEquals("12.0", result[1][2]) + assertEquals("12.0", result[2][2]) + assertEquals("12.0", result[3][2]) + assertEquals("12.0", result[4][2]) + assertEquals("12.0", result[5][2]) + assertEquals("12.0", result[6][2]) + assertEquals("12.0", result[7][2]) + assertEquals("12.0", result[8][2]) + + sql """drop database test_partition_stats""" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org