This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 3b760fafdd0cdc8c887330bdf175b2367b5ac111 Author: Lei Zhang <27994433+swjtu-zhang...@users.noreply.github.com> AuthorDate: Thu Feb 8 12:06:01 2024 +0800 [feat](fe) Show db total size and recycle bin size with `ShowDataStmt` (#30663) --- .../sql-reference/Show-Statements/SHOW-DATA.md | 28 ++++++- .../sql-reference/Show-Statements/SHOW-DATA.md | 28 ++++++- .../org/apache/doris/analysis/ShowDataStmt.java | 88 ++++++++++++++++++++++ .../apache/doris/catalog/CatalogRecycleBin.java | 39 ++++++++++ .../java/org/apache/doris/catalog/Database.java | 20 +++-- .../suites/show_p0/test_show_data.groovy | 31 ++++++++ 6 files changed, 223 insertions(+), 11 deletions(-) diff --git a/docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA.md b/docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA.md index d0999576ffa..f720d9da4ad 100644 --- a/docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA.md +++ b/docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA.md @@ -56,9 +56,31 @@ illustrate: ### Example -1. Display the data volume, replica number, aggregate data volume and aggregate replica number of each table in the default db. +1. Display the data size and RecycleBin size of each database by default. + + ``` + SHOW DATA; + ``` + + ``` + +-------+-----------------------------------+--------+------------+-------------+-------------------+ + | DbId | DbName | Size | RemoteSize | RecycleSize | RecycleRemoteSize | + +-------+-----------------------------------+--------+------------+-------------+-------------------+ + | 21009 | db1 | 0 | 0 | 0 | 0 | + | 22011 | regression_test_inverted_index_p0 | 72764 | 0 | 0 | 0 | + | 0 | information_schema | 0 | 0 | 0 | 0 | + | 22010 | regression_test | 0 | 0 | 0 | 0 | + | 1 | mysql | 0 | 0 | 0 | 0 | + | 22017 | regression_test_show_p0 | 0 | 0 | 0 | 0 | + | 10002 | __internal_schema | 46182 | 0 | 0 | 0 | + | Total | NULL | 118946 | 0 | 0 | 0 | + +-------+-----------------------------------+--------+------------+-------------+-------------------+ + ``` + +2. Display the data volume, replica number, aggregate data volume and aggregate replica number of each table in a database. ```sql + USE db1; SHOW DATA; ```` @@ -74,7 +96,7 @@ illustrate: +-----------+-------------+--------------+ ```` -2. Display the subdivided data volume, the number of replicas and the number of statistical rows of the specified table under the specified db +3. Display the subdivided data volume, the number of replicas and the number of statistical rows of the specified table under the specified db ```sql SHOW DATA FROM example_db.test; @@ -91,7 +113,7 @@ illustrate: +-----------+-----------+-----------+--------------+----------+ ```` -3. It can be combined and sorted according to the amount of data, the number of copies, the number of statistical rows, etc. +4. It can be combined and sorted according to the amount of data, the number of copies, the number of statistical rows, etc. ```sql SHOW DATA ORDER BY ReplicaCount desc,Size asc; diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA.md b/docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA.md index 2acce978774..ce851227bb8 100644 --- a/docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA.md +++ b/docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA.md @@ -56,9 +56,31 @@ SHOW DATA [FROM [db_name.]table_name] [ORDER BY ...]; ### Example -1. 展示默认 db 的各个 table 的数据量,副本数量,汇总数据量和汇总副本数量。 +1. 默认展示各个db的汇总数据量,RecycleBin中的数据量 + + ``` + SHOW DATA; + ``` + + ``` + +-------+-----------------------------------+--------+------------+-------------+-------------------+ + | DbId | DbName | Size | RemoteSize | RecycleSize | RecycleRemoteSize | + +-------+-----------------------------------+--------+------------+-------------+-------------------+ + | 21009 | db1 | 0 | 0 | 0 | 0 | + | 22011 | regression_test_inverted_index_p0 | 72764 | 0 | 0 | 0 | + | 0 | information_schema | 0 | 0 | 0 | 0 | + | 22010 | regression_test | 0 | 0 | 0 | 0 | + | 1 | mysql | 0 | 0 | 0 | 0 | + | 22017 | regression_test_show_p0 | 0 | 0 | 0 | 0 | + | 10002 | __internal_schema | 46182 | 0 | 0 | 0 | + | Total | NULL | 118946 | 0 | 0 | 0 | + +-------+-----------------------------------+--------+------------+-------------+-------------------+ + ``` + +2. 展示特定 db 的各个 table 的数据量,副本数量,汇总数据量和汇总副本数量。 ```sql + USE db1; SHOW DATA; ``` @@ -74,7 +96,7 @@ SHOW DATA [FROM [db_name.]table_name] [ORDER BY ...]; +-----------+-------------+--------------+ ``` -2. 展示指定 db 的下指定表的细分数据量、副本数量和统计行数 +3. 展示指定 db 的下指定表的细分数据量、副本数量和统计行数 ```sql SHOW DATA FROM example_db.test; @@ -91,7 +113,7 @@ SHOW DATA [FROM [db_name.]table_name] [ORDER BY ...]; +-----------+-----------+-----------+--------------+----------+ ``` -3. 可以按照数据量、副本数量、统计行数等进行组合排序 +4. 可以按照数据量、副本数量、统计行数等进行组合排序 ```sql SHOW DATA ORDER BY ReplicaCount desc,Size asc; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowDataStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowDataStmt.java index ddaf27fa539..f84784e5826 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowDataStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowDataStmt.java @@ -19,6 +19,7 @@ package org.apache.doris.analysis; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.MaterializedIndex; import org.apache.doris.catalog.OlapTable; @@ -38,6 +39,7 @@ import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.ShowResultSetMetaData; +import com.google.common.base.Strings; import com.google.common.collect.ComparisonChain; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; @@ -54,6 +56,16 @@ import java.util.TreeMap; import java.util.TreeSet; public class ShowDataStmt extends ShowStmt { + private static final ShowResultSetMetaData SHOW_DATABASE_DATA_META_DATA = + ShowResultSetMetaData.builder() + .addColumn(new Column("DbId", ScalarType.createVarchar(20))) + .addColumn(new Column("DbName", ScalarType.createVarchar(20))) + .addColumn(new Column("Size", ScalarType.createVarchar(30))) + .addColumn(new Column("RemoteSize", ScalarType.createVarchar(30))) + .addColumn(new Column("RecycleSize", ScalarType.createVarchar(30))) + .addColumn(new Column("RecycleRemoteSize", ScalarType.createVarchar(30))) + .build(); + private static final ShowResultSetMetaData SHOW_TABLE_DATA_META_DATA = ShowResultSetMetaData.builder() .addColumn(new Column("TableName", ScalarType.createVarchar(20))) @@ -71,6 +83,11 @@ public class ShowDataStmt extends ShowStmt { .addColumn(new Column("RowCount", ScalarType.createVarchar(20))) .addColumn(new Column("RemoteSize", ScalarType.createVarchar(30))) .build(); + + public static final ImmutableList<String> SHOW_DATABASE_DATA_META_DATA_ORIGIN = + new ImmutableList.Builder<String>().add("DbId").add("DbName").add("Size") + .add("RemoteSize").add("RecycleSize").add("RecycleRemoteSize").build(); + public static final ImmutableList<String> SHOW_TABLE_DATA_META_DATA_ORIGIN = new ImmutableList.Builder<String>().add("TableName").add("Size").add("ReplicaCount") .add("RemoteSize").build(); @@ -97,6 +114,10 @@ public class ShowDataStmt extends ShowStmt { public void analyze(Analyzer analyzer) throws UserException { super.analyze(analyzer); dbName = analyzer.getDefaultDb(); + if (Strings.isNullOrEmpty(dbName)) { + getAllDbStats(); + return; + } if (tableName != null) { tableName.analyze(analyzer); // disallow external catalog @@ -374,6 +395,9 @@ public class ShowDataStmt extends ShowStmt { @Override public ShowResultSetMetaData getMetaData() { + if (Strings.isNullOrEmpty(dbName)) { + return SHOW_DATABASE_DATA_META_DATA; + } if (tableName != null) { return SHOW_INDEX_DATA_META_DATA; } else { @@ -408,4 +432,68 @@ public class ShowDataStmt extends ShowStmt { public String toString() { return toSql(); } + + private void getAllDbStats() { + List<String> dbNames = Env.getCurrentInternalCatalog().getDbNames(); + if (dbNames == null || dbNames.isEmpty()) { + return; + } + long totalSize = 0; + long totalRemoteSize = 0; + long totalRecycleSize = 0; + long totalRecycleRemoteSize = 0; + Map<Long, Pair<Long, Long>> dbToRecycleSize = Env.getCurrentRecycleBin().getDbToRecycleSize(); + // show all database datasize + for (String dbName : dbNames) { + DatabaseIf db = Env.getCurrentInternalCatalog().getDbNullable(dbName); + if (db == null) { + continue; + } + List<String> dbInfo = new ArrayList<>(); + db.readLock(); + try { + dbInfo.add(String.valueOf(db.getId())); + dbInfo.add(dbName); + Pair<Long, Long> usedSize = ((Database) db).getUsedDataSize(); + dbInfo.add(String.valueOf(usedSize.first)); + dbInfo.add(String.valueOf(usedSize.second)); + totalSize += usedSize.first; + totalRemoteSize += usedSize.second; + } finally { + db.readUnlock(); + } + + Pair<Long, Long> recycleSize = dbToRecycleSize.getOrDefault(db.getId(), Pair.of(0L, 0L)); + dbInfo.add(String.valueOf(recycleSize.first)); + dbInfo.add(String.valueOf(recycleSize.second)); + totalRecycleSize += recycleSize.first; + totalRecycleRemoteSize += recycleSize.second; + dbToRecycleSize.remove(db.getId()); + totalRows.add(dbInfo); + } + + // Append left database in recycle bin + for (Map.Entry<Long, Pair<Long, Long>> entry : dbToRecycleSize.entrySet()) { + List<String> dbInfo = new ArrayList<>(); + dbInfo.add(String.valueOf(entry.getKey())); + dbInfo.add("NULL"); + dbInfo.add("0"); + dbInfo.add("0"); + dbInfo.add(String.valueOf(entry.getValue().first)); + dbInfo.add(String.valueOf(entry.getValue().second)); + totalRecycleSize += entry.getValue().first; + totalRecycleRemoteSize += entry.getValue().second; + totalRows.add(dbInfo); + } + + // calc total size + List<String> dbInfo = new ArrayList<>(); + dbInfo.add("Total"); + dbInfo.add("NULL"); + dbInfo.add(String.valueOf(totalSize)); + dbInfo.add(String.valueOf(totalRemoteSize)); + dbInfo.add(String.valueOf(totalRecycleSize)); + dbInfo.add(String.valueOf(totalRecycleRemoteSize)); + totalRows.add(dbInfo); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java index 50cc2679251..d803cde648a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java @@ -1100,6 +1100,45 @@ public class CatalogRecycleBin extends MasterDaemon implements Writable { return Stream.of(dbInfos, tableInfos, partitionInfos).flatMap(Collection::stream).collect(Collectors.toList()); } + public synchronized Map<Long, Pair<Long, Long>> getDbToRecycleSize() { + Map<Long, Pair<Long, Long>> dbToRecycleSize = new HashMap<>(); + for (Map.Entry<Long, RecycleTableInfo> entry : idToTable.entrySet()) { + RecycleTableInfo tableInfo = entry.getValue(); + Table table = tableInfo.getTable(); + if (!(table instanceof OlapTable)) { + continue; + } + long dataSize = table.getDataSize(false); + long remoteDataSize = ((OlapTable) table).getRemoteDataSize(); + dbToRecycleSize.compute(tableInfo.getDbId(), (k, v) -> { + if (v == null) { + return Pair.of(dataSize, remoteDataSize); + } else { + v.first += dataSize; + v.second += remoteDataSize; + return v; + } + }); + } + + for (Map.Entry<Long, RecyclePartitionInfo> entry : idToPartition.entrySet()) { + RecyclePartitionInfo partitionInfo = entry.getValue(); + Partition partition = partitionInfo.getPartition(); + long dataSize = partition.getDataSize(false); + long remoteDataSize = partition.getRemoteDataSize(); + dbToRecycleSize.compute(partitionInfo.getDbId(), (k, v) -> { + if (v == null) { + return Pair.of(dataSize, remoteDataSize); + } else { + v.first += dataSize; + v.second += remoteDataSize; + return v; + } + }); + } + return dbToRecycleSize; + } + // Need to add "synchronized", because when calling /dump api to dump image, // this class is not protected by any lock, will throw ConcurrentModificationException. @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Database.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Database.java index 3975fe8b388..05ba8fac115 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Database.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Database.java @@ -272,10 +272,19 @@ public class Database extends MetaObject implements Writable, DatabaseIf<Table> } public long getUsedDataQuotaWithLock() { - long usedDataQuota = 0; + return getUsedDataSize().first; + } + + public Pair<Long, Long> getUsedDataSize() { + long usedDataSize = 0; + long usedRemoteDataSize = 0; + List<Table> tables = new ArrayList<>(); readLock(); - List<Table> tables = new ArrayList<>(this.idToTable.values()); - readUnlock(); + try { + tables.addAll(this.idToTable.values()); + } finally { + readUnlock(); + } for (Table table : tables) { if (table.getType() != TableType.OLAP) { @@ -285,12 +294,13 @@ public class Database extends MetaObject implements Writable, DatabaseIf<Table> OlapTable olapTable = (OlapTable) table; olapTable.readLock(); try { - usedDataQuota = usedDataQuota + olapTable.getDataSize(); + usedDataSize = usedDataSize + olapTable.getDataSize(); + usedRemoteDataSize = usedRemoteDataSize + olapTable.getRemoteDataSize(); } finally { olapTable.readUnlock(); } } - return usedDataQuota; + return Pair.of(usedDataSize, usedRemoteDataSize); } public long getReplicaCountWithLock() { diff --git a/regression-test/suites/show_p0/test_show_data.groovy b/regression-test/suites/show_p0/test_show_data.groovy new file mode 100644 index 00000000000..10714a542d4 --- /dev/null +++ b/regression-test/suites/show_p0/test_show_data.groovy @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_show_data") { + def result = sql """show data;""" + logger.info("show data result:${result}"); + assertTrue(result.size() > 0); + + def jdbcUrlWithoutDbStr = (context.config.jdbcUrl).split(context.config.defaultDb) + logger.info("jdbcUrlWithoutDbStr:${jdbcUrlWithoutDbStr}"); + def result2 = connect(context.config.jdbcUser, password = context.config.jdbcPassword, url = jdbcUrlWithoutDbStr[0]) { + sql """show data;""" + } + + assertTrue(result2.size() > 0); + assertTrue(result2[0].size() == 6); +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org