This is an automated email from the ASF dual-hosted git repository. lijibing pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new c78e04ac31a [improvement](statistics)Support get index row count and table delta rows. (#38520) c78e04ac31a is described below commit c78e04ac31a04ae389e812dea3e96469e3146ed2 Author: Jibing-Li <64681310+jibing...@users.noreply.github.com> AuthorDate: Wed Jul 31 10:39:21 2024 +0800 [improvement](statistics)Support get index row count and table delta rows. (#38520) backport: https://github.com/apache/doris/pull/38492 --- fe/fe-core/src/main/cup/sql_parser.cup | 7 ++- .../doris/analysis/AlterColumnStatsStmt.java | 4 ++ .../apache/doris/analysis/ShowTableStatsStmt.java | 72 ++++++++++++++++------ .../org/apache/doris/statistics/AnalysisInfo.java | 8 +++ .../apache/doris/statistics/BaseAnalysisTask.java | 9 +++ .../doris/statistics/StatisticsRepository.java | 13 +++- .../apache/doris/statistics/TableStatsMeta.java | 32 +++++++++- .../suites/statistics/test_analyze_mv.groovy | 39 ++++++++++++ 8 files changed, 161 insertions(+), 23 deletions(-) diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index f5b5ead9cc6..90534059cf9 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -4362,7 +4362,12 @@ show_param ::= /* show table stats */ | KW_TABLE opt_cached:cached KW_STATS table_name:tbl opt_partition_names:partitionNames {: - RESULT = new ShowTableStatsStmt(tbl, partitionNames, cached); + RESULT = new ShowTableStatsStmt(tbl, partitionNames, cached, null); + :} + /* show index stats */ + | KW_INDEX KW_STATS table_name:tbl ident:id + {: + RESULT = new ShowTableStatsStmt(tbl, null, false, id); :} /* show column stats */ | KW_COLUMN opt_cached:cached KW_STATS table_name:tbl opt_col_list:cols opt_partition_names:partitionNames diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java index 9e76b065921..9aebeb59b82 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java @@ -134,6 +134,10 @@ public class AlterColumnStatsStmt extends DdlStmt { throw new AnalysisException(optional.get() + " is invalid statistics"); } + if (!properties.containsKey(StatsType.ROW_COUNT.getValue())) { + throw new AnalysisException("Set column stats must set row_count. e.g. 'row_count'='5'"); + } + // get statsTypeToValue properties.forEach((key, value) -> { StatsType statsType = StatsType.fromString(key); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java index 8d8cd32c454..fc0860dfd13 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java @@ -20,6 +20,7 @@ package org.apache.doris.analysis; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.TableIf; @@ -58,17 +59,25 @@ public class ShowTableStatsStmt extends ShowStmt { .add("user_inject") .build(); - private final TableName tableName; + private static final ImmutableList<String> INDEX_TITLE_NAMES = + new ImmutableList.Builder<String>() + .add("table_name") + .add("index_name") + .add("row_count") + .build(); + private final TableName tableName; private final PartitionNames partitionNames; private final boolean cached; + private final String indexName; private TableIf table; - public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames, boolean cached) { + public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames, boolean cached, String indexName) { this.tableName = tableName; this.partitionNames = partitionNames; this.cached = cached; + this.indexName = indexName; } public TableName getTableName() { @@ -117,7 +126,13 @@ public class ShowTableStatsStmt extends ShowStmt { public ShowResultSetMetaData getMetaData() { ShowResultSetMetaData.Builder builder = ShowResultSetMetaData.builder(); - for (String title : TITLE_NAMES) { + ImmutableList<String> titles; + if (indexName != null) { + titles = INDEX_TITLE_NAMES; + } else { + titles = TITLE_NAMES; + } + for (String title : titles) { builder.addColumn(new Column(title, ScalarType.createVarchar(30))); } return builder.build(); @@ -127,15 +142,29 @@ public class ShowTableStatsStmt extends ShowStmt { return table; } - public long getPartitionId() { - if (partitionNames == null) { - return 0; + public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) { + if (indexName != null) { + return constructIndexResultSet(tableStatistic); } - String partitionName = partitionNames.getPartitionNames().get(0); - return table.getPartition(partitionName).getId(); + return constructTableResultSet(tableStatistic); } - public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) { + public ShowResultSet constructResultSet(long rowCount) { + List<List<String>> result = Lists.newArrayList(); + List<String> row = Lists.newArrayList(); + row.add(""); + row.add(""); + row.add(String.valueOf(rowCount)); + row.add(""); + row.add(""); + row.add(""); + row.add(""); + row.add(""); + result.add(row); + return new ShowResultSet(getMetaData(), result); + } + + public ShowResultSet constructTableResultSet(TableStatsMeta tableStatistic) { DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); if (tableStatistic == null) { return new ShowResultSet(getMetaData(), new ArrayList<>()); @@ -147,7 +176,7 @@ public class ShowTableStatsStmt extends ShowStmt { row.add(String.valueOf(tableStatistic.rowCount)); LocalDateTime dateTime = LocalDateTime.ofInstant(Instant.ofEpochMilli(tableStatistic.updatedTime), - java.time.ZoneId.systemDefault()); + java.time.ZoneId.systemDefault()); String formattedDateTime = dateTime.format(formatter); row.add(formattedDateTime); row.add(tableStatistic.analyzeColumns().toString()); @@ -158,17 +187,24 @@ public class ShowTableStatsStmt extends ShowStmt { return new ShowResultSet(getMetaData(), result); } - public ShowResultSet constructResultSet(long rowCount) { + public ShowResultSet constructIndexResultSet(TableStatsMeta tableStatistic) { List<List<String>> result = Lists.newArrayList(); + if (!(table instanceof OlapTable)) { + return new ShowResultSet(getMetaData(), result); + } + OlapTable olapTable = (OlapTable) table; + Long indexId = olapTable.getIndexIdByName(indexName); + if (indexId == null) { + throw new RuntimeException(String.format("Index %s not exist.", indexName)); + } + long rowCount = tableStatistic.getRowCount(olapTable.getIndexIdByName(indexName)); + if (rowCount == -1) { + return new ShowResultSet(getMetaData(), result); + } List<String> row = Lists.newArrayList(); - row.add(""); - row.add(""); + row.add(table.getName()); + row.add(indexName); row.add(String.valueOf(rowCount)); - row.add(""); - row.add(""); - row.add(""); - row.add(""); - row.add(""); result.add(row); return new ShowResultSet(getMetaData(), result); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java index ab7f9935c72..125b23bce7b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java @@ -38,6 +38,8 @@ import java.text.ParseException; import java.util.List; import java.util.Set; import java.util.StringJoiner; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; public class AnalysisInfo implements Writable { @@ -202,6 +204,8 @@ public class AnalysisInfo implements Writable { public final boolean userInject; + public final ConcurrentMap<Long, Long> indexesRowCount = new ConcurrentHashMap<>(); + public AnalysisInfo(long jobId, long taskId, List<Long> taskIds, long catalogId, long dbId, long tblId, List<Pair<String, String>> jobColumns, Set<String> partitionNames, String colName, Long indexId, JobType jobType, AnalysisMode analysisMode, AnalysisMethod analysisMethod, AnalysisType analysisType, @@ -350,4 +354,8 @@ public class AnalysisInfo implements Writable { public TableIf getTable() { return StatisticsUtil.findTable(catalogId, dbId, tblId); } + + public void addIndexRowCount(long indexId, long rowCount) { + indexesRowCount.put(indexId, rowCount); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java index e04a7274f69..85a2fd0de3f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java @@ -21,6 +21,7 @@ import org.apache.doris.analysis.TableSample; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.TableIf; import org.apache.doris.common.util.DebugUtil; import org.apache.doris.datasource.CatalogIf; @@ -333,6 +334,14 @@ public abstract class BaseAnalysisTask { try (AutoCloseConnectContext a = StatisticsUtil.buildConnectContext()) { stmtExecutor = new StmtExecutor(a.connectContext, sql); ColStatsData colStatsData = new ColStatsData(stmtExecutor.executeInternalQuery().get(0)); + // Update index row count after analyze. + if (this instanceof OlapAnalysisTask) { + AnalysisInfo jobInfo = Env.getCurrentEnv().getAnalysisManager().findJobInfo(job.getJobInfo().jobId); + // For sync job, get jobInfo from job.jobInfo. + jobInfo = jobInfo == null ? job.jobInfo : jobInfo; + long indexId = info.indexId == -1 ? ((OlapTable) tbl).getBaseIndexId() : info.indexId; + jobInfo.addIndexRowCount(indexId, colStatsData.count); + } Env.getCurrentEnv().getStatisticsCache().syncColStats(colStatsData); queryId = DebugUtil.printId(stmtExecutor.getContext().queryId()); job.appendBuf(this, Collections.singletonList(colStatsData)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java index 87ce90c5300..4d2aede413b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java @@ -21,6 +21,7 @@ import org.apache.doris.analysis.AlterColumnStatsStmt; import org.apache.doris.analysis.TableName; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; @@ -278,12 +279,13 @@ public class StatisticsRepository { String max = alterColumnStatsStmt.getValue(StatsType.MAX_VALUE); String dataSize = alterColumnStatsStmt.getValue(StatsType.DATA_SIZE); long indexId = alterColumnStatsStmt.getIndexId(); + if (rowCount == null) { + throw new RuntimeException("Row count is null."); + } ColumnStatisticBuilder builder = new ColumnStatisticBuilder(); String colName = alterColumnStatsStmt.getColumnName(); Column column = objects.table.getColumn(colName); - if (rowCount != null) { - builder.setCount(Double.parseDouble(rowCount)); - } + builder.setCount(Double.parseDouble(rowCount)); if (ndv != null) { double dNdv = Double.parseDouble(ndv); builder.setNdv(dNdv); @@ -338,9 +340,14 @@ public class StatisticsRepository { .setTblUpdateTime(System.currentTimeMillis()) .setColName("") .setJobColumns(Lists.newArrayList()) + .setRowCount((long) Double.parseDouble(rowCount)) .setUserInject(true) .setJobType(AnalysisInfo.JobType.MANUAL) .build(); + if (objects.table instanceof OlapTable) { + indexId = indexId == -1 ? ((OlapTable) objects.table).getBaseIndexId() : indexId; + mockedJobInfo.addIndexRowCount(indexId, (long) Double.parseDouble(rowCount)); + } Env.getCurrentEnv().getAnalysisManager().updateTableStatsForAlterStats(mockedJobInfo, objects.table); } else { // update partition granularity statistics diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java index a9a580c8b40..bb2f19ac25c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java @@ -24,6 +24,7 @@ import org.apache.doris.catalog.TableIf; import org.apache.doris.common.Pair; import org.apache.doris.common.io.Text; import org.apache.doris.common.io.Writable; +import org.apache.doris.persist.gson.GsonPostProcessable; import org.apache.doris.persist.gson.GsonUtils; import org.apache.doris.statistics.AnalysisInfo.JobType; @@ -33,6 +34,8 @@ import com.google.gson.annotations.SerializedName; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.Iterator; +import java.util.List; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; @@ -40,7 +43,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; -public class TableStatsMeta implements Writable { +public class TableStatsMeta implements Writable, GsonPostProcessable { @SerializedName("tblId") public final long tblId; @@ -77,6 +80,9 @@ public class TableStatsMeta implements Writable { @SerializedName("userInjected") public boolean userInjected; + @SerializedName("irc") + public ConcurrentMap<Long, Long> indexesRowCount = new ConcurrentHashMap<>(); + @VisibleForTesting public TableStatsMeta() { tblId = 0; @@ -142,6 +148,8 @@ public class TableStatsMeta implements Writable { if (tableIf != null) { if (tableIf instanceof OlapTable) { rowCount = analyzedJob.rowCount; + indexesRowCount.putAll(analyzedJob.indexesRowCount); + clearStaleIndexRowCount((OlapTable) tableIf); } if (analyzedJob.emptyJob) { return; @@ -166,4 +174,26 @@ public class TableStatsMeta implements Writable { public void convertDeprecatedColStatsToNewVersion() { deprecatedColNameToColStatsMeta = null; } + + @Override + public void gsonPostProcess() throws IOException { + if (indexesRowCount == null) { + indexesRowCount = new ConcurrentHashMap<>(); + } + } + + public long getRowCount(long indexId) { + return indexesRowCount.getOrDefault(indexId, -1L); + } + + private void clearStaleIndexRowCount(OlapTable table) { + Iterator<Long> iterator = indexesRowCount.keySet().iterator(); + List<Long> indexIds = table.getIndexIds(); + while (iterator.hasNext()) { + long key = iterator.next(); + if (indexIds.contains(key)) { + iterator.remove(); + } + } + } } diff --git a/regression-test/suites/statistics/test_analyze_mv.groovy b/regression-test/suites/statistics/test_analyze_mv.groovy index 9128ee2b263..fa9b0701b1a 100644 --- a/regression-test/suites/statistics/test_analyze_mv.groovy +++ b/regression-test/suites/statistics/test_analyze_mv.groovy @@ -133,6 +133,28 @@ suite("test_analyze_mv") { sql """analyze table mvTestDup with sync;""" + // Test show index row count + def result_row = sql """show index stats mvTestDup mvTestDup""" + assertEquals(1, result_row.size()) + assertEquals("mvTestDup", result_row[0][0]) + assertEquals("mvTestDup", result_row[0][1]) + assertEquals("6", result_row[0][2]) + result_row = sql """show index stats mvTestDup mv1""" + assertEquals(1, result_row.size()) + assertEquals("mvTestDup", result_row[0][0]) + assertEquals("mv1", result_row[0][1]) + assertEquals("6", result_row[0][2]) + result_row = sql """show index stats mvTestDup mv2""" + assertEquals(1, result_row.size()) + assertEquals("mvTestDup", result_row[0][0]) + assertEquals("mv2", result_row[0][1]) + assertEquals("6", result_row[0][2]) + result_row = sql """show index stats mvTestDup mv3""" + assertEquals(1, result_row.size()) + assertEquals("mvTestDup", result_row[0][0]) + assertEquals("mv3", result_row[0][1]) + assertEquals("4", result_row[0][2]) + // Compare show whole table column stats result with show single column. def result_all = sql """show column stats mvTestDup""" assertEquals(12, result_all.size()) @@ -410,6 +432,23 @@ suite("test_analyze_mv") { assertEquals("4001", result_sample[0][8]) assertEquals("FULL", result_sample[0][9]) + // Test alter table index row count. + sql """alter table mvTestDup modify column `value2` set stats ('row_count'='1.5E8', 'ndv'='3.0', 'num_nulls'='0.0', 'data_size'='1.5E8', 'min_value'='1', 'max_value'='10');""" + result_row = sql """show index stats mvTestDup mvTestDup;""" + assertEquals("mvTestDup", result_row[0][0]) + assertEquals("mvTestDup", result_row[0][1]) + assertEquals("150000000", result_row[0][2]) + sql """alter table mvTestDup index mv1 modify column `mv_key1` set stats ('row_count'='3443', 'ndv'='3.0', 'num_nulls'='0.0', 'data_size'='1.5E8', 'min_value'='1', 'max_value'='10');""" + result_row = sql """show index stats mvTestDup mv1;""" + assertEquals("mvTestDup", result_row[0][0]) + assertEquals("mv1", result_row[0][1]) + assertEquals("3443", result_row[0][2]) + sql """alter table mvTestDup index mv3 modify column `mva_MAX__``value2``` set stats ('row_count'='234234', 'ndv'='3.0', 'num_nulls'='0.0', 'data_size'='1.5E8', 'min_value'='1', 'max_value'='10');""" + result_row = sql """show index stats mvTestDup mv3;""" + assertEquals("mvTestDup", result_row[0][0]) + assertEquals("mv3", result_row[0][1]) + assertEquals("234234", result_row[0][2]) + sql """drop stats mvTestDup""" result_sample = sql """show column stats mvTestDup""" assertEquals(0, result_sample.size()) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org