This is an automated email from the ASF dual-hosted git repository. lijibing pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 08453f5a06a [improvement](statistics)Support get index row count and table delta rows. (#38516) 08453f5a06a is described below commit 08453f5a06a436e275b65050fccb37b96a7ff4e2 Author: Jibing-Li <64681310+jibing...@users.noreply.github.com> AuthorDate: Wed Jul 31 10:38:58 2024 +0800 [improvement](statistics)Support get index row count and table delta rows. (#38516) backport: https://github.com/apache/doris/pull/38492 --- fe/fe-core/src/main/cup/sql_parser.cup | 7 ++- .../doris/analysis/AlterColumnStatsStmt.java | 4 ++ .../apache/doris/analysis/ShowTableStatsStmt.java | 72 ++++++++++++++++------ .../org/apache/doris/statistics/AnalysisInfo.java | 8 +++ .../apache/doris/statistics/BaseAnalysisTask.java | 9 +++ .../doris/statistics/StatisticsRepository.java | 13 +++- .../apache/doris/statistics/TableStatsMeta.java | 31 +++++++++- .../suites/statistics/test_analyze_mv.groovy | 39 ++++++++++++ 8 files changed, 160 insertions(+), 23 deletions(-) diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index c0f1e9c2ba5..c4aa95a1904 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -4166,7 +4166,12 @@ show_param ::= /* show table stats */ | KW_TABLE opt_cached:cached KW_STATS table_name:tbl opt_partition_names:partitionNames {: - RESULT = new ShowTableStatsStmt(tbl, partitionNames, cached); + RESULT = new ShowTableStatsStmt(tbl, partitionNames, cached, null); + :} + /* show index stats */ + | KW_INDEX KW_STATS table_name:tbl ident:id + {: + RESULT = new ShowTableStatsStmt(tbl, null, false, id); :} /* show column stats */ | KW_COLUMN opt_cached:cached KW_STATS table_name:tbl opt_col_list:cols opt_partition_names:partitionNames diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java index ef4357dc285..8df7c217ad2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java @@ -134,6 +134,10 @@ public class AlterColumnStatsStmt extends DdlStmt { throw new AnalysisException(optional.get() + " is invalid statistics"); } + if (!properties.containsKey(StatsType.ROW_COUNT.getValue())) { + throw new AnalysisException("Set column stats must set row_count. e.g. 'row_count'='5'"); + } + // get statsTypeToValue properties.forEach((key, value) -> { StatsType statsType = StatsType.fromString(key); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java index 95d36867da2..2328581238a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java @@ -20,6 +20,7 @@ package org.apache.doris.analysis; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.TableIf; @@ -58,17 +59,25 @@ public class ShowTableStatsStmt extends ShowStmt { .add("user_inject") .build(); - private final TableName tableName; + private static final ImmutableList<String> INDEX_TITLE_NAMES = + new ImmutableList.Builder<String>() + .add("table_name") + .add("index_name") + .add("row_count") + .build(); + private final TableName tableName; private final PartitionNames partitionNames; private final boolean cached; + private final String indexName; private TableIf table; - public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames, boolean cached) { + public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames, boolean cached, String indexName) { this.tableName = tableName; this.partitionNames = partitionNames; this.cached = cached; + this.indexName = indexName; } public TableName getTableName() { @@ -116,7 +125,13 @@ public class ShowTableStatsStmt extends ShowStmt { public ShowResultSetMetaData getMetaData() { ShowResultSetMetaData.Builder builder = ShowResultSetMetaData.builder(); - for (String title : TITLE_NAMES) { + ImmutableList<String> titles; + if (indexName != null) { + titles = INDEX_TITLE_NAMES; + } else { + titles = TITLE_NAMES; + } + for (String title : titles) { builder.addColumn(new Column(title, ScalarType.createVarchar(30))); } return builder.build(); @@ -126,15 +141,29 @@ public class ShowTableStatsStmt extends ShowStmt { return table; } - public long getPartitionId() { - if (partitionNames == null) { - return 0; + public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) { + if (indexName != null) { + return constructIndexResultSet(tableStatistic); } - String partitionName = partitionNames.getPartitionNames().get(0); - return table.getPartition(partitionName).getId(); + return constructTableResultSet(tableStatistic); } - public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) { + public ShowResultSet constructResultSet(long rowCount) { + List<List<String>> result = Lists.newArrayList(); + List<String> row = Lists.newArrayList(); + row.add(""); + row.add(""); + row.add(String.valueOf(rowCount)); + row.add(""); + row.add(""); + row.add(""); + row.add(""); + row.add(""); + result.add(row); + return new ShowResultSet(getMetaData(), result); + } + + public ShowResultSet constructTableResultSet(TableStatsMeta tableStatistic) { DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); if (tableStatistic == null) { return new ShowResultSet(getMetaData(), new ArrayList<>()); @@ -146,7 +175,7 @@ public class ShowTableStatsStmt extends ShowStmt { row.add(String.valueOf(tableStatistic.rowCount)); LocalDateTime dateTime = LocalDateTime.ofInstant(Instant.ofEpochMilli(tableStatistic.updatedTime), - java.time.ZoneId.systemDefault()); + java.time.ZoneId.systemDefault()); String formattedDateTime = dateTime.format(formatter); row.add(formattedDateTime); row.add(tableStatistic.analyzeColumns().toString()); @@ -157,17 +186,24 @@ public class ShowTableStatsStmt extends ShowStmt { return new ShowResultSet(getMetaData(), result); } - public ShowResultSet constructResultSet(long rowCount) { + public ShowResultSet constructIndexResultSet(TableStatsMeta tableStatistic) { List<List<String>> result = Lists.newArrayList(); + if (!(table instanceof OlapTable)) { + return new ShowResultSet(getMetaData(), result); + } + OlapTable olapTable = (OlapTable) table; + Long indexId = olapTable.getIndexIdByName(indexName); + if (indexId == null) { + throw new RuntimeException(String.format("Index %s not exist.", indexName)); + } + long rowCount = tableStatistic.getRowCount(olapTable.getIndexIdByName(indexName)); + if (rowCount == -1) { + return new ShowResultSet(getMetaData(), result); + } List<String> row = Lists.newArrayList(); - row.add(""); - row.add(""); + row.add(table.getName()); + row.add(indexName); row.add(String.valueOf(rowCount)); - row.add(""); - row.add(""); - row.add(""); - row.add(""); - row.add(""); result.add(row); return new ShowResultSet(getMetaData(), result); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java index 461528b151f..7a722fde576 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java @@ -38,6 +38,8 @@ import java.util.List; import java.util.Map; import java.util.Set; import java.util.StringJoiner; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; public class AnalysisInfo implements Writable { @@ -202,6 +204,8 @@ public class AnalysisInfo implements Writable { public final boolean userInject; + public final ConcurrentMap<Long, Long> indexesRowCount = new ConcurrentHashMap<>(); + public AnalysisInfo(long jobId, long taskId, List<Long> taskIds, long catalogId, long dbId, long tblId, Map<String, Set<String>> colToPartitions, Set<String> partitionNames, String colName, Long indexId, JobType jobType, AnalysisMode analysisMode, AnalysisMethod analysisMethod, AnalysisType analysisType, @@ -350,4 +354,8 @@ public class AnalysisInfo implements Writable { public TableIf getTable() { return StatisticsUtil.findTable(catalogId, dbId, tblId); } + + public void addIndexRowCount(long indexId, long rowCount) { + indexesRowCount.put(indexId, rowCount); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java index e04a7274f69..85a2fd0de3f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java @@ -21,6 +21,7 @@ import org.apache.doris.analysis.TableSample; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.TableIf; import org.apache.doris.common.util.DebugUtil; import org.apache.doris.datasource.CatalogIf; @@ -333,6 +334,14 @@ public abstract class BaseAnalysisTask { try (AutoCloseConnectContext a = StatisticsUtil.buildConnectContext()) { stmtExecutor = new StmtExecutor(a.connectContext, sql); ColStatsData colStatsData = new ColStatsData(stmtExecutor.executeInternalQuery().get(0)); + // Update index row count after analyze. + if (this instanceof OlapAnalysisTask) { + AnalysisInfo jobInfo = Env.getCurrentEnv().getAnalysisManager().findJobInfo(job.getJobInfo().jobId); + // For sync job, get jobInfo from job.jobInfo. + jobInfo = jobInfo == null ? job.jobInfo : jobInfo; + long indexId = info.indexId == -1 ? ((OlapTable) tbl).getBaseIndexId() : info.indexId; + jobInfo.addIndexRowCount(indexId, colStatsData.count); + } Env.getCurrentEnv().getStatisticsCache().syncColStats(colStatsData); queryId = DebugUtil.printId(stmtExecutor.getContext().queryId()); job.appendBuf(this, Collections.singletonList(colStatsData)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java index c1aacaba140..0e0ab5e621b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java @@ -21,6 +21,7 @@ import org.apache.doris.analysis.AlterColumnStatsStmt; import org.apache.doris.analysis.TableName; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; @@ -263,12 +264,13 @@ public class StatisticsRepository { String max = alterColumnStatsStmt.getValue(StatsType.MAX_VALUE); String dataSize = alterColumnStatsStmt.getValue(StatsType.DATA_SIZE); long indexId = alterColumnStatsStmt.getIndexId(); + if (rowCount == null) { + throw new RuntimeException("Row count is null."); + } ColumnStatisticBuilder builder = new ColumnStatisticBuilder(); String colName = alterColumnStatsStmt.getColumnName(); Column column = objects.table.getColumn(colName); - if (rowCount != null) { - builder.setCount(Double.parseDouble(rowCount)); - } + builder.setCount(Double.parseDouble(rowCount)); if (ndv != null) { double dNdv = Double.parseDouble(ndv); builder.setNdv(dNdv); @@ -323,9 +325,14 @@ public class StatisticsRepository { .setTblUpdateTime(System.currentTimeMillis()) .setColName("") .setColToPartitions(Maps.newHashMap()) + .setRowCount((long) Double.parseDouble(rowCount)) .setUserInject(true) .setJobType(AnalysisInfo.JobType.MANUAL) .build(); + if (objects.table instanceof OlapTable) { + indexId = indexId == -1 ? ((OlapTable) objects.table).getBaseIndexId() : indexId; + mockedJobInfo.addIndexRowCount(indexId, (long) Double.parseDouble(rowCount)); + } Env.getCurrentEnv().getAnalysisManager().updateTableStatsForAlterStats(mockedJobInfo, objects.table); } else { // update partition granularity statistics diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java index 4a37106cb6d..07580df3607 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java @@ -23,6 +23,7 @@ import org.apache.doris.catalog.PartitionInfo; import org.apache.doris.catalog.TableIf; import org.apache.doris.common.io.Text; import org.apache.doris.common.io.Writable; +import org.apache.doris.persist.gson.GsonPostProcessable; import org.apache.doris.persist.gson.GsonUtils; import org.apache.doris.statistics.AnalysisInfo.JobType; import org.apache.doris.statistics.util.StatisticsUtil; @@ -34,6 +35,7 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.util.Arrays; +import java.util.Iterator; import java.util.List; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; @@ -42,7 +44,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; -public class TableStatsMeta implements Writable { +public class TableStatsMeta implements Writable, GsonPostProcessable { @SerializedName("tblId") public final long tblId; @@ -75,6 +77,9 @@ public class TableStatsMeta implements Writable { @SerializedName("userInjected") public boolean userInjected; + @SerializedName("irc") + public ConcurrentMap<Long, Long> indexesRowCount = new ConcurrentHashMap<>(); + @VisibleForTesting public TableStatsMeta() { tblId = 0; @@ -158,6 +163,8 @@ public class TableStatsMeta implements Writable { if (tableIf != null) { if (tableIf instanceof OlapTable) { rowCount = analyzedJob.rowCount; + indexesRowCount.putAll(analyzedJob.indexesRowCount); + clearStaleIndexRowCount((OlapTable) tableIf); } if (!analyzedJob.emptyJob && analyzedJob.colToPartitions.keySet() .containsAll(tableIf.getBaseSchema().stream() @@ -176,4 +183,26 @@ public class TableStatsMeta implements Writable { } } } + + @Override + public void gsonPostProcess() throws IOException { + if (indexesRowCount == null) { + indexesRowCount = new ConcurrentHashMap<>(); + } + } + + public long getRowCount(long indexId) { + return indexesRowCount.getOrDefault(indexId, -1L); + } + + private void clearStaleIndexRowCount(OlapTable table) { + Iterator<Long> iterator = indexesRowCount.keySet().iterator(); + List<Long> indexIds = table.getIndexIds(); + while (iterator.hasNext()) { + long key = iterator.next(); + if (indexIds.contains(key)) { + iterator.remove(); + } + } + } } diff --git a/regression-test/suites/statistics/test_analyze_mv.groovy b/regression-test/suites/statistics/test_analyze_mv.groovy index f5dcabb7a62..07fe0b2591c 100644 --- a/regression-test/suites/statistics/test_analyze_mv.groovy +++ b/regression-test/suites/statistics/test_analyze_mv.groovy @@ -136,6 +136,28 @@ suite("test_analyze_mv") { sql """analyze table mvTestDup with sync;""" + // Test show index row count + def result_row = sql """show index stats mvTestDup mvTestDup""" + assertEquals(1, result_row.size()) + assertEquals("mvTestDup", result_row[0][0]) + assertEquals("mvTestDup", result_row[0][1]) + assertEquals("6", result_row[0][2]) + result_row = sql """show index stats mvTestDup mv1""" + assertEquals(1, result_row.size()) + assertEquals("mvTestDup", result_row[0][0]) + assertEquals("mv1", result_row[0][1]) + assertEquals("6", result_row[0][2]) + result_row = sql """show index stats mvTestDup mv2""" + assertEquals(1, result_row.size()) + assertEquals("mvTestDup", result_row[0][0]) + assertEquals("mv2", result_row[0][1]) + assertEquals("6", result_row[0][2]) + result_row = sql """show index stats mvTestDup mv3""" + assertEquals(1, result_row.size()) + assertEquals("mvTestDup", result_row[0][0]) + assertEquals("mv3", result_row[0][1]) + assertEquals("4", result_row[0][2]) + // Compare show whole table column stats result with show single column. def result_all = sql """show column stats mvTestDup""" assertEquals(12, result_all.size()) @@ -417,6 +439,23 @@ suite("test_analyze_mv") { assertEquals("4001", result_sample[0][8]) assertEquals("FULL", result_sample[0][9]) + // Test alter table index row count. + sql """alter table mvTestDup modify column `value2` set stats ('row_count'='1.5E8', 'ndv'='3.0', 'num_nulls'='0.0', 'data_size'='1.5E8', 'min_value'='1', 'max_value'='10');""" + result_row = sql """show index stats mvTestDup mvTestDup;""" + assertEquals("mvTestDup", result_row[0][0]) + assertEquals("mvTestDup", result_row[0][1]) + assertEquals("150000000", result_row[0][2]) + sql """alter table mvTestDup index mv1 modify column `mv_key1` set stats ('row_count'='3443', 'ndv'='3.0', 'num_nulls'='0.0', 'data_size'='1.5E8', 'min_value'='1', 'max_value'='10');""" + result_row = sql """show index stats mvTestDup mv1;""" + assertEquals("mvTestDup", result_row[0][0]) + assertEquals("mv1", result_row[0][1]) + assertEquals("3443", result_row[0][2]) + sql """alter table mvTestDup index mv3 modify column `mva_MAX__``value2``` set stats ('row_count'='234234', 'ndv'='3.0', 'num_nulls'='0.0', 'data_size'='1.5E8', 'min_value'='1', 'max_value'='10');""" + result_row = sql """show index stats mvTestDup mv3;""" + assertEquals("mvTestDup", result_row[0][0]) + assertEquals("mv3", result_row[0][1]) + assertEquals("234234", result_row[0][2]) + sql """drop stats mvTestDup""" result_sample = sql """show column stats mvTestDup""" assertEquals(0, result_sample.size()) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org