This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new a1a8ee8320 [enchancement](stats) Inject partition statistics #21543
a1a8ee8320 is described below

commit a1a8ee83208978a7c40d11c4503440af4c2531e2
Author: ElvinWei <zhengte....@outlook.com>
AuthorDate: Mon Jul 10 15:06:25 2023 +0800

    [enchancement](stats) Inject partition statistics #21543
    
    The cost estimation can be more accurate if the statistics of partition are 
available. But we are running big data like 1T, can not really import.
    
    So now we want to extend this by injecting partition statistics.
    
    Syntax:
    
    ALTER TABLE table_name MODIFY COLUMN column_name SET STATS ('stat_name' = 
'stat_value', ...)
      [ PARTITION (partition_name) ];
    Explanation:
    
    - Table_name: The table to which the statistics are dropped. It can be a 
db_name.table_name form.
    Column_name: Specified target column. table_name Must be a column that 
exists in. Statistics can only be modified one column at a time.
    
    - Stat _ name and stat _ value: The corresponding stat name and the value 
of the stat info. Multiple stats are comma separated. Statistics that can be 
modified include row_count, ndv, num_nulls min_value max_value, and data_size.
    
    - Partition_name: specifies the target partition. Must be a partition 
existing in table_name. Multiple partitions are separated by commas.
---
 docs/en/docs/query-acceleration/statistics.md      |  3 +-
 docs/zh-CN/docs/query-acceleration/statistics.md   |  3 +-
 fe/fe-core/src/main/cup/sql_parser.cup             |  2 +-
 .../doris/analysis/AlterColumnStatsStmt.java       | 50 ++++++++++++++++++----
 .../doris/statistics/StatisticsRepository.java     | 23 ++++++++--
 5 files changed, 66 insertions(+), 15 deletions(-)

diff --git a/docs/en/docs/query-acceleration/statistics.md 
b/docs/en/docs/query-acceleration/statistics.md
index 6fb9de9c99..2ab48ebc57 100644
--- a/docs/en/docs/query-acceleration/statistics.md
+++ b/docs/en/docs/query-acceleration/statistics.md
@@ -789,7 +789,7 @@ Buckets description:
 Users can modify the statistics information through statements `ALTER`, and 
modify the corresponding statistics information of the column according to the 
provided parameters.
 
 ```SQL
-ALTER TABLE table_name MODIFY COLUMN column_name SET STATS ('stat_name' = 
'stat_value', ...);
+ALTER TABLE table_name MODIFY COLUMN column_name SET STATS ('stat_name' = 
'stat_value', ...) [ PARTITION (partition_name) ];
 ```
 
 Explanation:
@@ -797,6 +797,7 @@ Explanation:
 - Table_name: The table to which the statistics are dropped. It can be a 
`db_name.table_name` form.
 - Column_name: Specified target column. `table_name` Must be a column that 
exists in. Statistics can only be modified one column at a time.
 - Stat _ name and stat _ value: The corresponding stat name and the value of 
the stat info. Multiple stats are comma separated. Statistics that can be 
modified include `row_count`, `ndv`, `num_nulls` `min_value` `max_value`, and 
`data_size`.
+- Partition_name: specifies the target partition. Must be a partition existing 
in `table_name`. Multiple partitions are separated by commas.
 
 Example:
 
diff --git a/docs/zh-CN/docs/query-acceleration/statistics.md 
b/docs/zh-CN/docs/query-acceleration/statistics.md
index 3e64b08bd5..9be196e79b 100644
--- a/docs/zh-CN/docs/query-acceleration/statistics.md
+++ b/docs/zh-CN/docs/query-acceleration/statistics.md
@@ -852,7 +852,7 @@ Buckets 说明:
 ⽤户通过 `ALTER` 语句修改统计信息,根据提供的参数,修改列相应的统计信息。
 
 ```SQL
-ALTER TABLE table_name MODIFY COLUMN column_name SET STATS ('stat_name' = 
'stat_value', ...);
+ALTER TABLE table_name MODIFY COLUMN column_name SET STATS ('stat_name' = 
'stat_value', ...) [ PARTITION (partition_name) ];
 ```
 
 其中:
@@ -860,6 +860,7 @@ ALTER TABLE table_name MODIFY COLUMN column_name SET STATS 
('stat_name' = 'stat_
 - table_name: 删除统计信息的目标表。可以是 `db_name.table_name` 形式。
 - column_name: 指定的目标列,必须是 `table_name` 中存在的列,每次只能修改一列的统计信息。
 - stat_name 和 stat_value: 相应的统计信息名称和统计信息信息的值,多个统计信息逗号分隔。可以修改的统计信息包括 
`row_count`, `ndv`, `num_nulls`, `min_value`, `max_value`, `data_size`。
+- partition_name: 指定的目标分区。必须是 `table_name` 中存在的分区,多个分区使用逗号分割。
 
 示例:
 
diff --git a/fe/fe-core/src/main/cup/sql_parser.cup 
b/fe/fe-core/src/main/cup/sql_parser.cup
index fbf319fd1b..20d722c6a2 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -1339,7 +1339,7 @@ alter_stmt ::=
     | KW_ALTER KW_TABLE table_name:tbl KW_MODIFY KW_COLUMN ident:columnName
       KW_SET KW_STATS LPAREN key_value_map:map RPAREN 
opt_partition_names:partitionNames
     {:
-        RESULT = new AlterColumnStatsStmt(tbl, columnName, map);
+        RESULT = new AlterColumnStatsStmt(tbl, columnName, map, 
partitionNames);
     :}
     | KW_ALTER KW_TABLE table_name:tbl KW_SET LPAREN key_value_map:properties 
RPAREN
     {:
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
index 3e16a380e4..0e7892dcd1 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
@@ -19,6 +19,10 @@ package org.apache.doris.analysis;
 
 import org.apache.doris.catalog.DatabaseIf;
 import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.Partition;
+import org.apache.doris.catalog.PartitionType;
+import org.apache.doris.catalog.Table;
 import org.apache.doris.catalog.TableIf;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.Config;
@@ -53,6 +57,9 @@ import java.util.Optional;
  * e.g.
  *   ALTER TABLE stats_test.example_tbl MODIFY COLUMN age
  *   SET STATS ('row_count'='6001215');
+ *
+ * Note: partition stats injection is mainly convenient for test cost 
estimation,
+ * and can be removed after the related functions are completed.
  */
 public class AlterColumnStatsStmt extends DdlStmt {
 
@@ -70,15 +77,17 @@ public class AlterColumnStatsStmt extends DdlStmt {
     private final TableName tableName;
     private final String columnName;
     private final Map<String, String> properties;
+    private final PartitionNames optPartitionNames;
 
-    private final List<String> partitionNames = Lists.newArrayList();
+    private final List<Long> partitionIds = Lists.newArrayList();
     private final Map<StatsType, String> statsTypeToValue = Maps.newHashMap();
 
     public AlterColumnStatsStmt(TableName tableName, String columnName,
-            Map<String, String> properties) {
+            Map<String, String> properties, PartitionNames optPartitionNames) {
         this.tableName = tableName;
         this.columnName = columnName;
         this.properties = properties == null ? Collections.emptyMap() : 
properties;
+        this.optPartitionNames = optPartitionNames;
     }
 
     public TableName getTableName() {
@@ -89,8 +98,8 @@ public class AlterColumnStatsStmt extends DdlStmt {
         return columnName;
     }
 
-    public List<String> getPartitionNames() {
-        return partitionNames;
+    public List<Long> getPartitionIds() {
+        return partitionIds;
     }
 
     public Map<StatsType, String> getStatsTypeToValue() {
@@ -109,7 +118,7 @@ public class AlterColumnStatsStmt extends DdlStmt {
         tableName.analyze(analyzer);
 
         // check partition & column
-        checkColumnNames();
+        checkPartitionAndColumn();
 
         // check properties
         Optional<StatsType> optional = 
properties.keySet().stream().map(StatsType::fromString)
@@ -134,14 +143,36 @@ public class AlterColumnStatsStmt extends DdlStmt {
         });
     }
 
-    private void checkColumnNames() throws AnalysisException {
+    private void checkPartitionAndColumn() throws AnalysisException {
         CatalogIf catalog = 
analyzer.getEnv().getCatalogMgr().getCatalog(tableName.getCtl());
         DatabaseIf db = catalog.getDbOrAnalysisException(tableName.getDb());
         TableIf table = db.getTableOrAnalysisException(tableName.getTbl());
-        if (table.getColumn(columnName) == null) {
+
+        if (table.getType() != Table.TableType.OLAP) {
+            throw new AnalysisException("Only OLAP table statistics are 
supported");
+        }
+
+        OlapTable olapTable = (OlapTable) table;
+        if (olapTable.getColumn(columnName) == null) {
             
ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME,
                     columnName, FeNameFormat.getColumnNameRegex());
         }
+
+        if (optPartitionNames != null) {
+            if 
(olapTable.getPartitionInfo().getType().equals(PartitionType.UNPARTITIONED)) {
+                throw new AnalysisException("Not a partitioned table: " + 
olapTable.getName());
+            }
+
+            optPartitionNames.analyze(analyzer);
+            List<String> partitionNames = 
optPartitionNames.getPartitionNames();
+            for (String partitionName : partitionNames) {
+                Partition partition = olapTable.getPartition(partitionName);
+                if (partition == null) {
+                    throw new AnalysisException("Partition does not exist: " + 
partitionName);
+                }
+                partitionIds.add(partition.getId());
+            }
+        }
     }
 
     @Override
@@ -156,7 +187,10 @@ public class AlterColumnStatsStmt extends DdlStmt {
         sb.append(new PrintableMap<>(properties,
                 " = ", true, false));
         sb.append(")");
-
+        if (optPartitionNames != null) {
+            sb.append(" ");
+            sb.append(optPartitionNames.toSql());
+        }
         return sb.toString();
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
index 70c495f29b..53ea8e7fd2 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
@@ -271,6 +271,7 @@ public class StatisticsRepository {
 
     public static void alterColumnStatistics(AlterColumnStatsStmt 
alterColumnStatsStmt) throws Exception {
         TableName tableName = alterColumnStatsStmt.getTableName();
+        List<Long> partitionIds = alterColumnStatsStmt.getPartitionIds();
         DBObjects objects = 
StatisticsUtil.convertTableNameToObjects(tableName);
         String rowCount = alterColumnStatsStmt.getValue(StatsType.ROW_COUNT);
         String ndv = alterColumnStatsStmt.getValue(StatsType.NDV);
@@ -312,16 +313,30 @@ public class StatisticsRepository {
         params.put("idxId", "-1");
         params.put("tblId", String.valueOf(objects.table.getId()));
         params.put("colId", String.valueOf(colName));
-        params.put("partId", "NULL");
         params.put("count", String.valueOf(columnStatistic.count));
         params.put("ndv", String.valueOf(columnStatistic.ndv));
         params.put("nullCount", String.valueOf(columnStatistic.numNulls));
         params.put("min", min == null ? "NULL" : min);
         params.put("max", max == null ? "NULL" : max);
         params.put("dataSize", String.valueOf(columnStatistic.dataSize));
-        StatisticsUtil.execUpdate(INSERT_INTO_COLUMN_STATISTICS, params);
-        Env.getCurrentEnv().getStatisticsCache()
-                .updateColStatsCache(objects.table.getId(), -1, colName, 
builder.build());
+
+        if (partitionIds.isEmpty()) {
+            // update table granularity statistics
+            params.put("partId", "NULL");
+            StatisticsUtil.execUpdate(INSERT_INTO_COLUMN_STATISTICS, params);
+            Env.getCurrentEnv().getStatisticsCache()
+                    .updateColStatsCache(objects.table.getId(), -1, colName, 
builder.build());
+        } else {
+            // update partition granularity statistics
+            for (Long partitionId : partitionIds) {
+                HashMap<String, String> partParams = Maps.newHashMap(params);
+                partParams.put("partId", String.valueOf(partitionId));
+                StatisticsUtil.execUpdate(INSERT_INTO_COLUMN_STATISTICS, 
partParams);
+                // TODO cache partition granular statistics
+                // Env.getCurrentEnv().getStatisticsCache()
+                //         .updateColStatsCache(partitionId, -1, colName, 
builder.build());
+            }
+        }
     }
 
     public static List<ResultRow> fetchRecentStatsUpdatedCol() {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to