This is an automated email from the ASF dual-hosted git repository.

lijibing pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 4f01c03a188 Improve show column stats performance. (#31298) (#31458)
4f01c03a188 is described below

commit 4f01c03a1889810257e9e380dc5667ed9598fe0d
Author: Jibing-Li <64681310+jibing...@users.noreply.github.com>
AuthorDate: Tue Feb 27 19:47:56 2024 +0800

    Improve show column stats performance. (#31298) (#31458)
---
 .../apache/doris/analysis/ShowColumnStatsStmt.java |  4 +++
 .../java/org/apache/doris/qe/ShowExecutor.java     | 32 +++++++++++++++--
 .../doris/statistics/StatisticsRepository.java     | 13 +++++++
 .../suites/statistics/test_analyze_mv.groovy       | 42 ++++++++++++++++++++--
 4 files changed, 86 insertions(+), 5 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
index be1a54eb8d2..a8d0284c138 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
@@ -179,4 +179,8 @@ public class ShowColumnStatsStmt extends ShowStmt {
     public boolean isCached() {
         return cached;
     }
+
+    public boolean isAllColumns() {
+        return columnNames == null;
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
index 1c05fd19027..ff9a41af076 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
@@ -200,6 +200,7 @@ import org.apache.doris.mysql.privilege.PrivPredicate;
 import org.apache.doris.statistics.AnalysisInfo;
 import org.apache.doris.statistics.ColumnStatistic;
 import org.apache.doris.statistics.Histogram;
+import org.apache.doris.statistics.ResultRow;
 import org.apache.doris.statistics.StatisticsRepository;
 import org.apache.doris.statistics.TableStatsMeta;
 import org.apache.doris.statistics.query.QueryStatsUtil;
@@ -2487,7 +2488,35 @@ public class ShowExecutor {
         Set<String> columnNames = showColumnStatsStmt.getColumnNames();
         PartitionNames partitionNames = 
showColumnStatsStmt.getPartitionNames();
         boolean showCache = showColumnStatsStmt.isCached();
+        boolean isAllColumns = showColumnStatsStmt.isAllColumns();
+        if (isAllColumns && !showCache && partitionNames == null) {
+            getStatsForAllColumns(columnStatistics, tableIf);
+        } else {
+            getStatsForSpecifiedColumns(columnStatistics, columnNames, 
tableIf, showCache, tableName, partitionNames);
+        }
+        resultSet = showColumnStatsStmt.constructResultSet(columnStatistics);
+    }
+
+    private void getStatsForAllColumns(List<Pair<Pair<String, String>, 
ColumnStatistic>> columnStatistics,
+                                       TableIf tableIf) throws 
AnalysisException {
+        List<ResultRow> resultRows = 
StatisticsRepository.queryColumnStatisticsForTable(tableIf.getId());
+        for (ResultRow row : resultRows) {
+            String indexName = "N/A";
+            long indexId = Long.parseLong(row.get(4));
+            if (indexId != -1) {
+                indexName = ((OlapTable) tableIf).getIndexNameById(indexId);
+                if (indexName == null) {
+                    continue;
+                }
+            }
+            columnStatistics.add(Pair.of(Pair.of(row.get(5), indexName), 
ColumnStatistic.fromResultRow(row)));
+        }
+    }
 
+    private void getStatsForSpecifiedColumns(List<Pair<Pair<String, String>, 
ColumnStatistic>> columnStatistics,
+                                             Set<String> columnNames, TableIf 
tableIf, boolean showCache,
+                                             TableName tableName, 
PartitionNames partitionNames)
+            throws AnalysisException {
         for (String colName : columnNames) {
             // Olap base index use -1 as index id.
             List<Long> indexIds = Lists.newArrayList();
@@ -2518,13 +2547,12 @@ public class ShowExecutor {
                 } else {
                     String finalIndexName = indexName;
                     
columnStatistics.addAll(StatisticsRepository.queryColumnStatisticsByPartitions(tableName,
-                            colName, 
showColumnStatsStmt.getPartitionNames().getPartitionNames())
+                            colName, partitionNames.getPartitionNames())
                             .stream().map(s -> Pair.of(Pair.of(colName, 
finalIndexName), s))
                             .collect(Collectors.toList()));
                 }
             }
         }
-        resultSet = showColumnStatsStmt.constructResultSet(columnStatistics);
     }
 
     public void handleShowColumnHist() {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
index 3bf5b9fc019..75b827ecd9d 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
@@ -104,6 +104,11 @@ public class StatisticsRepository {
             + " ${inPredicate}"
             + " AND part_id IS NOT NULL";
 
+    private static final String FETCH_TABLE_STATISTICS = "SELECT * FROM "
+            + FeConstants.INTERNAL_DB_NAME + "." + 
StatisticConstants.STATISTIC_TBL_NAME
+            + " WHERE tbl_id = ${tblId}"
+            + " AND part_id IS NULL";
+
     public static ColumnStatistic queryColumnStatisticsByName(long tableId, 
long indexId, String colName) {
         ResultRow resultRow = queryColumnStatisticById(tableId, indexId, 
colName);
         if (resultRow == null) {
@@ -128,6 +133,14 @@ public class StatisticsRepository {
                 Collectors.toList());
     }
 
+    public static List<ResultRow> queryColumnStatisticsForTable(long tableId)
+            throws AnalysisException {
+        Map<String, String> params = new HashMap<>();
+        params.put("tblId", String.valueOf(tableId));
+        List<ResultRow> rows = 
StatisticsUtil.executeQuery(FETCH_TABLE_STATISTICS, params);
+        return rows == null ? Collections.emptyList() : rows;
+    }
+
     public static ResultRow queryColumnStatisticById(long tblId, long indexId, 
String colName) {
         return queryColumnStatisticById(tblId, indexId, colName, false);
     }
diff --git a/regression-test/suites/statistics/test_analyze_mv.groovy 
b/regression-test/suites/statistics/test_analyze_mv.groovy
index 65571c65bf4..d5611cd985b 100644
--- a/regression-test/suites/statistics/test_analyze_mv.groovy
+++ b/regression-test/suites/statistics/test_analyze_mv.groovy
@@ -74,6 +74,25 @@ suite("test_analyze_mv") {
         }
     }
 
+    def verify_column_stats = { all_column_result, one_column_result ->
+        logger.info("all column result: " + all_column_result)
+        logger.info("one column result: " + one_column_result)
+        boolean found = false;
+        for (int i = 0; i < all_column_result.size(); i++) {
+            if (all_column_result[i][0] == one_column_result[0] && 
all_column_result[i][1] == one_column_result[1]) {
+                assertEquals(all_column_result[i][2], one_column_result[2])
+                assertEquals(all_column_result[i][3], one_column_result[3])
+                assertEquals(all_column_result[i][4], one_column_result[4])
+                assertEquals(all_column_result[i][5], one_column_result[5])
+                assertEquals(all_column_result[i][6], one_column_result[6])
+                assertEquals(all_column_result[i][7], one_column_result[7])
+                assertEquals(all_column_result[i][8], one_column_result[8])
+                found = true;
+            }
+        }
+        assertTrue(found)
+    }
+
     sql """drop database if exists test_analyze_mv"""
     sql """create database test_analyze_mv"""
     sql """use test_analyze_mv"""
@@ -102,10 +121,13 @@ suite("test_analyze_mv") {
 
     sql """analyze table mvTestDup with sync;"""
 
-    def result_sample = sql """show column stats mvTestDup"""
-    assertEquals(12, result_sample.size())
+    // Compare show whole table column stats result with show single column.
+    def result_all = sql """show column stats mvTestDup"""
+    assertEquals(12, result_all.size())
+    def result_all_cached = sql """show column cached stats mvTestDup"""
+    assertEquals(12, result_all_cached.size())
 
-    result_sample = sql """show column stats mvTestDup(key1)"""
+    def result_sample = sql """show column stats mvTestDup(key1)"""
     assertEquals(1, result_sample.size())
     assertEquals("key1", result_sample[0][0])
     assertEquals("N/A", result_sample[0][1])
@@ -114,6 +136,8 @@ suite("test_analyze_mv") {
     assertEquals("1", result_sample[0][7])
     assertEquals("1001", result_sample[0][8])
     assertEquals("FULL", result_sample[0][9])
+    verify_column_stats(result_all, result_sample[0])
+    verify_column_stats(result_all_cached, result_sample[0])
 
     result_sample = sql """show column stats mvTestDup(value1)"""
     assertEquals(1, result_sample.size())
@@ -124,6 +148,8 @@ suite("test_analyze_mv") {
     assertEquals("3", result_sample[0][7])
     assertEquals("3001", result_sample[0][8])
     assertEquals("FULL", result_sample[0][9])
+    verify_column_stats(result_all, result_sample[0])
+    verify_column_stats(result_all_cached, result_sample[0])
 
     result_sample = sql """show column stats mvTestDup(mv_key1)"""
     assertEquals(2, result_sample.size())
@@ -138,6 +164,10 @@ suite("test_analyze_mv") {
     assertEquals("1", result_sample[0][7])
     assertEquals("1001", result_sample[0][8])
     assertEquals("FULL", result_sample[0][9])
+    verify_column_stats(result_all, result_sample[0])
+    verify_column_stats(result_all_cached, result_sample[0])
+    verify_column_stats(result_all, result_sample[1])
+    verify_column_stats(result_all_cached, result_sample[1])
 
     result_sample = sql """show column stats 
mvTestDup(`mva_SUM__CAST(``value1`` AS BIGINT)`)"""
     assertEquals(1, result_sample.size())
@@ -148,6 +178,8 @@ suite("test_analyze_mv") {
     assertEquals("6", result_sample[0][7])
     assertEquals("3001", result_sample[0][8])
     assertEquals("FULL", result_sample[0][9])
+    verify_column_stats(result_all, result_sample[0])
+    verify_column_stats(result_all_cached, result_sample[0])
 
     result_sample = sql """show column stats 
mvTestDup(`mva_MAX__``value2```)"""
     assertEquals(1, result_sample.size())
@@ -158,6 +190,8 @@ suite("test_analyze_mv") {
     assertEquals("4", result_sample[0][7])
     assertEquals("4001", result_sample[0][8])
     assertEquals("FULL", result_sample[0][9])
+    verify_column_stats(result_all, result_sample[0])
+    verify_column_stats(result_all_cached, result_sample[0])
 
     result_sample = sql """show column stats 
mvTestDup(`mva_MIN__``value3```)"""
     assertEquals(1, result_sample.size())
@@ -168,6 +202,8 @@ suite("test_analyze_mv") {
     assertEquals("5", result_sample[0][7])
     assertEquals("5001", result_sample[0][8])
     assertEquals("FULL", result_sample[0][9])
+    verify_column_stats(result_all, result_sample[0])
+    verify_column_stats(result_all_cached, result_sample[0])
 
 
     sql """CREATE TABLE mvTestAgg (


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to