This is an automated email from the ASF dual-hosted git repository. lijibing pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 4f01c03a188 Improve show column stats performance. (#31298) (#31458) 4f01c03a188 is described below commit 4f01c03a1889810257e9e380dc5667ed9598fe0d Author: Jibing-Li <64681310+jibing...@users.noreply.github.com> AuthorDate: Tue Feb 27 19:47:56 2024 +0800 Improve show column stats performance. (#31298) (#31458) --- .../apache/doris/analysis/ShowColumnStatsStmt.java | 4 +++ .../java/org/apache/doris/qe/ShowExecutor.java | 32 +++++++++++++++-- .../doris/statistics/StatisticsRepository.java | 13 +++++++ .../suites/statistics/test_analyze_mv.groovy | 42 ++++++++++++++++++++-- 4 files changed, 86 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java index be1a54eb8d2..a8d0284c138 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java @@ -179,4 +179,8 @@ public class ShowColumnStatsStmt extends ShowStmt { public boolean isCached() { return cached; } + + public boolean isAllColumns() { + return columnNames == null; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index 1c05fd19027..ff9a41af076 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -200,6 +200,7 @@ import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.statistics.AnalysisInfo; import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.Histogram; +import org.apache.doris.statistics.ResultRow; import org.apache.doris.statistics.StatisticsRepository; import org.apache.doris.statistics.TableStatsMeta; import org.apache.doris.statistics.query.QueryStatsUtil; @@ -2487,7 +2488,35 @@ public class ShowExecutor { Set<String> columnNames = showColumnStatsStmt.getColumnNames(); PartitionNames partitionNames = showColumnStatsStmt.getPartitionNames(); boolean showCache = showColumnStatsStmt.isCached(); + boolean isAllColumns = showColumnStatsStmt.isAllColumns(); + if (isAllColumns && !showCache && partitionNames == null) { + getStatsForAllColumns(columnStatistics, tableIf); + } else { + getStatsForSpecifiedColumns(columnStatistics, columnNames, tableIf, showCache, tableName, partitionNames); + } + resultSet = showColumnStatsStmt.constructResultSet(columnStatistics); + } + + private void getStatsForAllColumns(List<Pair<Pair<String, String>, ColumnStatistic>> columnStatistics, + TableIf tableIf) throws AnalysisException { + List<ResultRow> resultRows = StatisticsRepository.queryColumnStatisticsForTable(tableIf.getId()); + for (ResultRow row : resultRows) { + String indexName = "N/A"; + long indexId = Long.parseLong(row.get(4)); + if (indexId != -1) { + indexName = ((OlapTable) tableIf).getIndexNameById(indexId); + if (indexName == null) { + continue; + } + } + columnStatistics.add(Pair.of(Pair.of(row.get(5), indexName), ColumnStatistic.fromResultRow(row))); + } + } + private void getStatsForSpecifiedColumns(List<Pair<Pair<String, String>, ColumnStatistic>> columnStatistics, + Set<String> columnNames, TableIf tableIf, boolean showCache, + TableName tableName, PartitionNames partitionNames) + throws AnalysisException { for (String colName : columnNames) { // Olap base index use -1 as index id. List<Long> indexIds = Lists.newArrayList(); @@ -2518,13 +2547,12 @@ public class ShowExecutor { } else { String finalIndexName = indexName; columnStatistics.addAll(StatisticsRepository.queryColumnStatisticsByPartitions(tableName, - colName, showColumnStatsStmt.getPartitionNames().getPartitionNames()) + colName, partitionNames.getPartitionNames()) .stream().map(s -> Pair.of(Pair.of(colName, finalIndexName), s)) .collect(Collectors.toList())); } } } - resultSet = showColumnStatsStmt.constructResultSet(columnStatistics); } public void handleShowColumnHist() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java index 3bf5b9fc019..75b827ecd9d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java @@ -104,6 +104,11 @@ public class StatisticsRepository { + " ${inPredicate}" + " AND part_id IS NOT NULL"; + private static final String FETCH_TABLE_STATISTICS = "SELECT * FROM " + + FeConstants.INTERNAL_DB_NAME + "." + StatisticConstants.STATISTIC_TBL_NAME + + " WHERE tbl_id = ${tblId}" + + " AND part_id IS NULL"; + public static ColumnStatistic queryColumnStatisticsByName(long tableId, long indexId, String colName) { ResultRow resultRow = queryColumnStatisticById(tableId, indexId, colName); if (resultRow == null) { @@ -128,6 +133,14 @@ public class StatisticsRepository { Collectors.toList()); } + public static List<ResultRow> queryColumnStatisticsForTable(long tableId) + throws AnalysisException { + Map<String, String> params = new HashMap<>(); + params.put("tblId", String.valueOf(tableId)); + List<ResultRow> rows = StatisticsUtil.executeQuery(FETCH_TABLE_STATISTICS, params); + return rows == null ? Collections.emptyList() : rows; + } + public static ResultRow queryColumnStatisticById(long tblId, long indexId, String colName) { return queryColumnStatisticById(tblId, indexId, colName, false); } diff --git a/regression-test/suites/statistics/test_analyze_mv.groovy b/regression-test/suites/statistics/test_analyze_mv.groovy index 65571c65bf4..d5611cd985b 100644 --- a/regression-test/suites/statistics/test_analyze_mv.groovy +++ b/regression-test/suites/statistics/test_analyze_mv.groovy @@ -74,6 +74,25 @@ suite("test_analyze_mv") { } } + def verify_column_stats = { all_column_result, one_column_result -> + logger.info("all column result: " + all_column_result) + logger.info("one column result: " + one_column_result) + boolean found = false; + for (int i = 0; i < all_column_result.size(); i++) { + if (all_column_result[i][0] == one_column_result[0] && all_column_result[i][1] == one_column_result[1]) { + assertEquals(all_column_result[i][2], one_column_result[2]) + assertEquals(all_column_result[i][3], one_column_result[3]) + assertEquals(all_column_result[i][4], one_column_result[4]) + assertEquals(all_column_result[i][5], one_column_result[5]) + assertEquals(all_column_result[i][6], one_column_result[6]) + assertEquals(all_column_result[i][7], one_column_result[7]) + assertEquals(all_column_result[i][8], one_column_result[8]) + found = true; + } + } + assertTrue(found) + } + sql """drop database if exists test_analyze_mv""" sql """create database test_analyze_mv""" sql """use test_analyze_mv""" @@ -102,10 +121,13 @@ suite("test_analyze_mv") { sql """analyze table mvTestDup with sync;""" - def result_sample = sql """show column stats mvTestDup""" - assertEquals(12, result_sample.size()) + // Compare show whole table column stats result with show single column. + def result_all = sql """show column stats mvTestDup""" + assertEquals(12, result_all.size()) + def result_all_cached = sql """show column cached stats mvTestDup""" + assertEquals(12, result_all_cached.size()) - result_sample = sql """show column stats mvTestDup(key1)""" + def result_sample = sql """show column stats mvTestDup(key1)""" assertEquals(1, result_sample.size()) assertEquals("key1", result_sample[0][0]) assertEquals("N/A", result_sample[0][1]) @@ -114,6 +136,8 @@ suite("test_analyze_mv") { assertEquals("1", result_sample[0][7]) assertEquals("1001", result_sample[0][8]) assertEquals("FULL", result_sample[0][9]) + verify_column_stats(result_all, result_sample[0]) + verify_column_stats(result_all_cached, result_sample[0]) result_sample = sql """show column stats mvTestDup(value1)""" assertEquals(1, result_sample.size()) @@ -124,6 +148,8 @@ suite("test_analyze_mv") { assertEquals("3", result_sample[0][7]) assertEquals("3001", result_sample[0][8]) assertEquals("FULL", result_sample[0][9]) + verify_column_stats(result_all, result_sample[0]) + verify_column_stats(result_all_cached, result_sample[0]) result_sample = sql """show column stats mvTestDup(mv_key1)""" assertEquals(2, result_sample.size()) @@ -138,6 +164,10 @@ suite("test_analyze_mv") { assertEquals("1", result_sample[0][7]) assertEquals("1001", result_sample[0][8]) assertEquals("FULL", result_sample[0][9]) + verify_column_stats(result_all, result_sample[0]) + verify_column_stats(result_all_cached, result_sample[0]) + verify_column_stats(result_all, result_sample[1]) + verify_column_stats(result_all_cached, result_sample[1]) result_sample = sql """show column stats mvTestDup(`mva_SUM__CAST(``value1`` AS BIGINT)`)""" assertEquals(1, result_sample.size()) @@ -148,6 +178,8 @@ suite("test_analyze_mv") { assertEquals("6", result_sample[0][7]) assertEquals("3001", result_sample[0][8]) assertEquals("FULL", result_sample[0][9]) + verify_column_stats(result_all, result_sample[0]) + verify_column_stats(result_all_cached, result_sample[0]) result_sample = sql """show column stats mvTestDup(`mva_MAX__``value2```)""" assertEquals(1, result_sample.size()) @@ -158,6 +190,8 @@ suite("test_analyze_mv") { assertEquals("4", result_sample[0][7]) assertEquals("4001", result_sample[0][8]) assertEquals("FULL", result_sample[0][9]) + verify_column_stats(result_all, result_sample[0]) + verify_column_stats(result_all_cached, result_sample[0]) result_sample = sql """show column stats mvTestDup(`mva_MIN__``value3```)""" assertEquals(1, result_sample.size()) @@ -168,6 +202,8 @@ suite("test_analyze_mv") { assertEquals("5", result_sample[0][7]) assertEquals("5001", result_sample[0][8]) assertEquals("FULL", result_sample[0][9]) + verify_column_stats(result_all, result_sample[0]) + verify_column_stats(result_all_cached, result_sample[0]) sql """CREATE TABLE mvTestAgg ( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org