This is an automated email from the ASF dual-hosted git repository. lijibing pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 4403e3012c1 Refactor get row count related interface, add row count cache for external table. (#31276) 4403e3012c1 is described below commit 4403e3012c1b79282cfcc59d1b1c6b5ffd995b58 Author: Jibing-Li <64681310+jibing...@users.noreply.github.com> AuthorDate: Fri Feb 23 13:02:13 2024 +0800 Refactor get row count related interface, add row count cache for external table. (#31276) --- .../main/java/org/apache/doris/common/Config.java | 4 + .../java/org/apache/doris/catalog/OlapTable.java | 7 +- .../main/java/org/apache/doris/catalog/Table.java | 29 ++---- .../java/org/apache/doris/catalog/TableIf.java | 9 +- .../doris/datasource/ExternalMetaCacheMgr.java | 10 +- .../doris/datasource/ExternalRowCountCache.java | 112 +++++++++++++++++++++ .../org/apache/doris/datasource/ExternalTable.java | 15 +-- .../doris/datasource/hive/HMSExternalTable.java | 72 ++----------- .../doris/datasource/jdbc/JdbcExternalTable.java | 26 ----- .../doris/nereids/stats/StatsCalculator.java | 2 +- .../java/org/apache/doris/qe/ShowExecutor.java | 8 +- .../apache/doris/service/FrontendServiceImpl.java | 2 +- .../apache/doris/statistics/AnalysisManager.java | 5 +- ...CacheLoader.java => BasicAsyncCacheLoader.java} | 17 ++-- .../statistics/ColumnStatisticsCacheLoader.java | 2 +- .../apache/doris/statistics/HMSAnalysisTask.java | 4 +- .../doris/statistics/HistogramCacheLoader.java | 2 +- .../doris/statistics/OlapScanStatsDerive.java | 2 +- .../doris/statistics/StatisticConstants.java | 1 - .../doris/statistics/StatisticsCacheKey.java | 1 + .../doris/statistics/util/StatisticsUtil.java | 9 +- 21 files changed, 180 insertions(+), 159 deletions(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 86ed91d3c08..21e4f7ddcd5 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -1902,6 +1902,10 @@ public class Config extends ConfigBase { "Max cache number of remote file system."}) public static long max_remote_file_system_cache_num = 100; + @ConfField(mutable = false, masterOnly = false, description = {"外表行数缓存最大数量", + "Max cache number of external table row count"}) + public static long max_external_table_row_count_cache_num = 100000; + /** * Max cache loader thread-pool size. * Max thread pool size for loading external meta cache diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index b6ba7bb81c6..da25bbafe5b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -1280,7 +1280,7 @@ public class OlapTable extends Table implements MTMVRelatedTableIf { } @Override - public long getRowCount() { + public long fetchRowCount() { long rowCount = 0; for (Map.Entry<Long, Partition> entry : idToPartition.entrySet()) { rowCount += entry.getValue().getBaseIndex().getRowCount(); @@ -1297,11 +1297,6 @@ public class OlapTable extends Table implements MTMVRelatedTableIf { return rowCount; } - @Override - public long getCacheRowCount() { - return getRowCount(); - } - @Override public long getAvgRowLength() { long rowCount = 0; diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java index d08b5e8aa1c..825e55ee09e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java @@ -394,11 +394,7 @@ public abstract class Table extends MetaObject implements Writable, TableIf { } public long getRowCount() { - return 0; - } - - public long getCacheRowCount() { - return getRowCount(); + return fetchRowCount(); } public long getAvgRowLength() { @@ -605,24 +601,6 @@ public abstract class Table extends MetaObject implements Writable, TableIf { throw new NotImplementedException("createAnalysisTask not implemented"); } - /** - * for NOT-ANALYZED Olap table, return estimated row count, - * for other table, return 1 - * @return estimated row count - */ - public long estimatedRowCount() { - long cardinality = 0; - if (this instanceof OlapTable) { - OlapTable table = (OlapTable) this; - for (long selectedPartitionId : table.getPartitionIds()) { - final Partition partition = table.getPartition(selectedPartitionId); - final MaterializedIndex baseIndex = partition.getBaseIndex(); - cardinality += baseIndex.getRowCount(); - } - } - return Math.max(cardinality, 1); - } - @Override public DatabaseIf getDatabase() { return Env.getCurrentInternalCatalog().getDbNullable(qualifiedDbName); @@ -649,4 +627,9 @@ public abstract class Table extends MetaObject implements Writable, TableIf { public List<Long> getChunkSizes() { throw new NotImplementedException("getChunkSized not implemented"); } + + @Override + public long fetchRowCount() { + return 0; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java index 29c7d6b83e2..fd7f5d53880 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java @@ -135,9 +135,7 @@ public interface TableIf { long getRowCount(); - // Get the exact number of rows in the internal table; - // Get the number of cached rows or estimated rows in the external table, if not, return -1. - long getCacheRowCount(); + long fetchRowCount(); long getDataLength(); @@ -151,7 +149,10 @@ public interface TableIf { BaseAnalysisTask createAnalysisTask(AnalysisInfo info); - long estimatedRowCount(); + // For empty table, nereids require getting 1 as row count. This is a wrap function for nereids to call getRowCount. + default long getRowCountForNereids() { + return Math.max(getRowCount(), 1); + } DatabaseIf getDatabase(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalMetaCacheMgr.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalMetaCacheMgr.java index fd25be82ac2..dc48f1b30e9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalMetaCacheMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalMetaCacheMgr.java @@ -44,7 +44,8 @@ import java.util.concurrent.ExecutorService; /** * Cache meta of external catalog * 1. Meta for hive meta store, mainly for partition. - * 2. Table Schema cahce. + * 2. Table Schema cache. + * 3. Row count cache. */ public class ExternalMetaCacheMgr { private static final Logger LOG = LogManager.getLogger(ExternalMetaCacheMgr.class); @@ -58,6 +59,8 @@ public class ExternalMetaCacheMgr { private ExecutorService executor; // all catalogs could share the same fsCache. private FileSystemCache fsCache; + // all external table row count cache. + private ExternalRowCountCache rowCountCache; private final IcebergMetadataCacheMgr icebergMetadataCacheMgr; private final MaxComputeMetadataCacheMgr maxComputeMetadataCacheMgr; @@ -68,6 +71,7 @@ public class ExternalMetaCacheMgr { "ExternalMetaCacheMgr", 120, true); hudiPartitionMgr = HudiPartitionMgr.get(executor); fsCache = new FileSystemCache(executor); + rowCountCache = new ExternalRowCountCache(executor); icebergMetadataCacheMgr = new IcebergMetadataCacheMgr(); maxComputeMetadataCacheMgr = new MaxComputeMetadataCacheMgr(); } @@ -114,6 +118,10 @@ public class ExternalMetaCacheMgr { return fsCache; } + public ExternalRowCountCache getRowCountCache() { + return rowCountCache; + } + public void removeCache(long catalogId) { if (cacheMap.remove(catalogId) != null) { LOG.info("remove hive metastore cache for catalog {}", catalogId); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java new file mode 100644 index 00000000000..1441efa9bf5 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java @@ -0,0 +1,112 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource; + +import org.apache.doris.catalog.TableIf; +import org.apache.doris.common.Config; +import org.apache.doris.statistics.BasicAsyncCacheLoader; +import org.apache.doris.statistics.util.StatisticsUtil; + +import com.github.benmanes.caffeine.cache.AsyncLoadingCache; +import com.github.benmanes.caffeine.cache.Caffeine; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.time.Duration; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; + +public class ExternalRowCountCache { + + private static final Logger LOG = LogManager.getLogger(ExternalRowCountCache.class); + private final AsyncLoadingCache<RowCountKey, Optional<Long>> rowCountCache; + + public ExternalRowCountCache(ExecutorService executor) { + rowCountCache = Caffeine.newBuilder() + .maximumSize(Config.max_external_table_row_count_cache_num) + .expireAfterWrite(Duration.ofMinutes(Config.external_cache_expire_time_minutes_after_access)) + .executor(executor) + .buildAsync(new RowCountCacheLoader()); + } + + public static class RowCountKey { + private final long catalogId; + private final long dbId; + private final long tableId; + + public RowCountKey(long catalogId, long dbId, long tableId) { + this.catalogId = catalogId; + this.dbId = dbId; + this.tableId = tableId; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!(obj instanceof RowCountKey)) { + return false; + } + return ((RowCountKey) obj).tableId == this.tableId; + } + + @Override + public int hashCode() { + return (int) tableId; + } + } + + public static class RowCountCacheLoader extends BasicAsyncCacheLoader<RowCountKey, Optional<Long>> { + + @Override + protected Optional<Long> doLoad(RowCountKey rowCountKey) { + try { + TableIf table = StatisticsUtil.findTable(rowCountKey.catalogId, rowCountKey.dbId, rowCountKey.tableId); + return Optional.of(table.fetchRowCount()); + } catch (Exception e) { + LOG.warn("Failed to get table with catalogId {}, dbId {}, tableId {}", rowCountKey.catalogId, + rowCountKey.dbId, rowCountKey.tableId); + return Optional.empty(); + } + } + } + + /** + * Get cached row count for the given table. Return 0 if cached not loaded or table not exists. + * Cached will be loaded async. + * @param catalogId + * @param dbId + * @param tableId + * @return Cached row count or 0 if not exist + */ + public long getCachedRowCount(long catalogId, long dbId, long tableId) { + RowCountKey key = new RowCountKey(catalogId, dbId, tableId); + try { + CompletableFuture<Optional<Long>> f = rowCountCache.get(key); + if (f.isDone()) { + return f.get().orElse(0L); + } + } catch (Exception e) { + LOG.warn("Unexpected exception while returning row count", e); + } + return 0; + } + +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java index 8f7fada5f61..d756e803a23 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java @@ -297,10 +297,16 @@ public class ExternalTable implements TableIf, Writable, GsonPostProcessable { @Override public long getRowCount() { - return 0; + // All external table should get external row count from cache. + return Env.getCurrentEnv().getExtMetaCacheMgr().getRowCountCache().getCachedRowCount(catalog.getId(), dbId, id); } - public long getCacheRowCount() { + @Override + /** + * Default return 0. Subclass need to implement this interface. + * This is called by ExternalRowCountCache to load row count cache. + */ + public long fetchRowCount() { return 0; } @@ -351,11 +357,6 @@ public class ExternalTable implements TableIf, Writable, GsonPostProcessable { throw new NotImplementedException("createAnalysisTask not implemented"); } - @Override - public long estimatedRowCount() { - return 1; - } - @Override public DatabaseIf getDatabase() { return catalog.getDbNullable(dbName); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java index 6da6073a4a2..0e11267829c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java @@ -40,7 +40,6 @@ import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.ColumnStatisticBuilder; import org.apache.doris.statistics.HMSAnalysisTask; import org.apache.doris.statistics.StatsType; -import org.apache.doris.statistics.TableStatsMeta; import org.apache.doris.statistics.util.StatisticsUtil; import org.apache.doris.thrift.THiveTable; import org.apache.doris.thrift.TTableDescriptor; @@ -146,9 +145,6 @@ public class HMSExternalTable extends ExternalTable implements MTMVRelatedTableI private DLAType dlaType = DLAType.UNKNOWN; - // No as precise as row count in TableStats, but better than none. - private long estimatedRowCount = -1; - // record the event update time when enable hms event listener protected volatile long eventUpdateTime; @@ -196,7 +192,6 @@ public class HMSExternalTable extends ExternalTable implements MTMVRelatedTableI } } objectCreated = true; - estimatedRowCount = getRowCountFromExternalSource(true); } } @@ -319,24 +314,11 @@ public class HMSExternalTable extends ExternalTable implements MTMVRelatedTableI return 0; } - @Override - public long getRowCount() { - makeSureInitialized(); - long rowCount = getRowCountFromExternalSource(false); - if (rowCount == -1) { - if (LOG.isDebugEnabled()) { - LOG.debug("Will estimate row count from file list."); - } - rowCount = StatisticsUtil.getRowCountFromFileList(this); - } - return rowCount; - } - - private long getRowCountFromExternalSource(boolean isInit) { + private long getRowCountFromExternalSource() { long rowCount; switch (dlaType) { case HIVE: - rowCount = StatisticsUtil.getHiveRowCount(this, isInit); + rowCount = StatisticsUtil.getHiveRowCount(this); break; case ICEBERG: rowCount = StatisticsUtil.getIcebergRowCount(this); @@ -514,47 +496,16 @@ public class HMSExternalTable extends ExternalTable implements MTMVRelatedTableI } @Override - public long getCacheRowCount() { - //Cached accurate information - TableStatsMeta tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(id); - if (tableStats != null) { - long rowCount = tableStats.rowCount; - if (LOG.isDebugEnabled()) { - LOG.debug("Estimated row count for db {} table {} is {}.", dbName, name, rowCount); - } - return rowCount; - } - - //estimated information - if (estimatedRowCount != -1) { - return estimatedRowCount; - } - return -1; - } - - @Override - public long estimatedRowCount() { - try { - TableStatsMeta tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(id); - if (tableStats != null) { - long rowCount = tableStats.rowCount; - if (LOG.isDebugEnabled()) { - LOG.debug("Estimated row count for db {} table {} is {}.", dbName, name, rowCount); - } - return rowCount; - } - - if (estimatedRowCount != -1) { - return estimatedRowCount; - } - // Cache the estimated row count in this structure - // though the table never get analyzed, since the row estimation might be expensive caused by RPC. - estimatedRowCount = getRowCount(); - return estimatedRowCount; - } catch (Exception e) { - LOG.warn("Fail to get row count for table {}", name, e); + public long fetchRowCount() { + makeSureInitialized(); + // Get row count from hive metastore property. + long rowCount = getRowCountFromExternalSource(); + // Only hive table supports estimate row count by listing file. + if (rowCount == -1 && dlaType.equals(DLAType.HIVE)) { + LOG.debug("Will estimate row count from file list."); + rowCount = StatisticsUtil.getRowCountFromFileList(this); } - return 1; + return rowCount; } private void initPartitionColumns(List<Column> schema) { @@ -766,7 +717,6 @@ public class HMSExternalTable extends ExternalTable implements MTMVRelatedTableI @Override public void gsonPostProcess() throws IOException { super.gsonPostProcess(); - estimatedRowCount = -1; } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/JdbcExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/JdbcExternalTable.java index a6199ba8c8f..64fd25525e5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/JdbcExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/JdbcExternalTable.java @@ -18,13 +18,11 @@ package org.apache.doris.datasource.jdbc; import org.apache.doris.catalog.Column; -import org.apache.doris.catalog.Env; import org.apache.doris.catalog.JdbcTable; import org.apache.doris.datasource.ExternalTable; import org.apache.doris.statistics.AnalysisInfo; import org.apache.doris.statistics.BaseAnalysisTask; import org.apache.doris.statistics.JdbcAnalysisTask; -import org.apache.doris.statistics.TableStatsMeta; import org.apache.doris.thrift.TTableDescriptor; import org.apache.logging.log4j.LogManager; @@ -111,28 +109,4 @@ public class JdbcExternalTable extends ExternalTable { makeSureInitialized(); return new JdbcAnalysisTask(info); } - - @Override - public long getRowCount() { - makeSureInitialized(); - TableStatsMeta tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(id); - if (tableStats != null) { - long rowCount = tableStats.rowCount; - if (LOG.isDebugEnabled()) { - LOG.debug("Estimated row count for db {} table {} is {}.", dbName, name, rowCount); - } - return rowCount; - } - return 1; - } - - @Override - public long getCacheRowCount() { - return getRowCount(); - } - - @Override - public long estimatedRowCount() { - return getRowCount(); - } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 5027cceab63..ad16d3e8f50 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -649,7 +649,7 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { .map(s -> (SlotReference) s).collect(Collectors.toSet()); Map<Expression, ColumnStatistic> columnStatisticMap = new HashMap<>(); TableIf table = catalogRelation.getTable(); - double rowCount = catalogRelation.getTable().estimatedRowCount(); + double rowCount = catalogRelation.getTable().getRowCountForNereids(); boolean hasUnknownCol = false; long idxId = -1; if (catalogRelation instanceof OlapScan) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index 62e9f4ac5a5..f0617f8bdf4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -920,11 +920,7 @@ public class ShowExecutor { // Row_format row.add(null); // Rows - // Use estimatedRowCount(), not getRowCount(). - // because estimatedRowCount() is an async call, it will not block, and it will call getRowCount() - // finally. So that for some table(especially external table), - // we can get the row count without blocking. - row.add(String.valueOf(table.estimatedRowCount())); + row.add(String.valueOf(table.getRowCount())); // Avg_row_length row.add(String.valueOf(table.getAvgRowLength())); // Data_length @@ -2540,7 +2536,7 @@ public class ShowExecutor { tableStats == null means it's not analyzed, in this case show the estimated row count. */ if (tableStats == null && tableIf instanceof HMSExternalTable) { - resultSet = showTableStatsStmt.constructResultSet(tableIf.estimatedRowCount()); + resultSet = showTableStatsStmt.constructResultSet(tableIf.getRowCount()); } else { resultSet = showTableStatsStmt.constructResultSet(tableStats); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java index cb5e53947e7..cf7533cbc70 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java @@ -609,7 +609,7 @@ public class FrontendServiceImpl implements FrontendService.Iface { status.setUpdateTime(table.getUpdateTime() / 1000); status.setCheckTime(lastCheckTime / 1000); status.setCollation("utf-8"); - status.setRows(table.getCacheRowCount()); + status.setRows(table.getRowCount()); status.setDataLength(table.getDataLength()); status.setAvgRowLength(table.getAvgRowLength()); tablesResult.add(status); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index cd31acaa0b7..eac50b40757 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -550,8 +550,7 @@ public class AnalysisManager implements Writable { @VisibleForTesting public void updateTableStats(AnalysisInfo jobInfo) { - TableIf tbl = StatisticsUtil.findTable(jobInfo.catalogId, - jobInfo.dbId, jobInfo.tblId); + TableIf tbl = StatisticsUtil.findTable(jobInfo.catalogId, jobInfo.dbId, jobInfo.tblId); // External Table only update table stats when all tasks finished. // Because it needs to get the row count from the result of row count task. if (tbl instanceof ExternalTable && !jobInfo.state.equals(AnalysisState.FINISHED)) { @@ -559,7 +558,7 @@ public class AnalysisManager implements Writable { } TableStatsMeta tableStats = findTableStatsStatus(tbl.getId()); if (tableStats == null) { - updateTableStatsStatus(new TableStatsMeta(jobInfo.emptyJob ? 0 : tbl.estimatedRowCount(), jobInfo, tbl)); + updateTableStatsStatus(new TableStatsMeta(jobInfo.emptyJob ? 0 : tbl.getRowCount(), jobInfo, tbl)); } else { tableStats.update(jobInfo, tbl); logCreateTableStats(tableStats); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheLoader.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BasicAsyncCacheLoader.java similarity index 80% rename from fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheLoader.java rename to fe/fe-core/src/main/java/org/apache/doris/statistics/BasicAsyncCacheLoader.java index c212851a284..e7e488a6e7f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheLoader.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BasicAsyncCacheLoader.java @@ -28,15 +28,15 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.Executor; import java.util.concurrent.TimeUnit; -public abstract class StatisticsCacheLoader<V> implements AsyncCacheLoader<StatisticsCacheKey, V> { +public abstract class BasicAsyncCacheLoader<K, V> implements AsyncCacheLoader<K, V> { - private static final Logger LOG = LogManager.getLogger(StatisticsCacheLoader.class); + private static final Logger LOG = LogManager.getLogger(BasicAsyncCacheLoader.class); - private final Map<StatisticsCacheKey, CompletableFutureWithCreateTime<V>> inProgressing = new HashMap<>(); + private final Map<K, CompletableFutureWithCreateTime<V>> inProgressing = new HashMap<>(); @Override public @NonNull CompletableFuture<V> asyncLoad( - @NonNull StatisticsCacheKey key, + @NonNull K key, @NonNull Executor executor) { CompletableFutureWithCreateTime<V> cfWrapper = inProgressing.get(key); if (cfWrapper != null) { @@ -48,8 +48,7 @@ public abstract class StatisticsCacheLoader<V> implements AsyncCacheLoader<Stati return doLoad(key); } finally { long endTime = System.currentTimeMillis(); - LOG.info("Query BE for column stats:{}-{} end time:{} cost time:{}", key.tableId, key.colName, - endTime, endTime - startTime); + LOG.info("Load statistic cache [{}] cost time ms:{}", key, endTime - startTime); removeFromIProgressing(key); } }, executor); @@ -58,7 +57,7 @@ public abstract class StatisticsCacheLoader<V> implements AsyncCacheLoader<Stati return future; } - protected abstract V doLoad(StatisticsCacheKey k); + protected abstract V doLoad(K k); private static class CompletableFutureWithCreateTime<V> extends CompletableFuture<V> { @@ -76,13 +75,13 @@ public abstract class StatisticsCacheLoader<V> implements AsyncCacheLoader<Stati } } - private void putIntoIProgressing(StatisticsCacheKey k, CompletableFutureWithCreateTime<V> v) { + private void putIntoIProgressing(K k, CompletableFutureWithCreateTime<V> v) { synchronized (inProgressing) { inProgressing.put(k, v); } } - private void removeFromIProgressing(StatisticsCacheKey k) { + private void removeFromIProgressing(K k) { synchronized (inProgressing) { inProgressing.remove(k); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java index 24c08c8b755..e33cff3107a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java @@ -30,7 +30,7 @@ import java.util.Optional; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.ThreadPoolExecutor.DiscardOldestPolicy; -public class ColumnStatisticsCacheLoader extends StatisticsCacheLoader<Optional<ColumnStatistic>> { +public class ColumnStatisticsCacheLoader extends BasicAsyncCacheLoader<StatisticsCacheKey, Optional<ColumnStatistic>> { private static final Logger LOG = LogManager.getLogger(ColumnStatisticsCacheLoader.class); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java index 549ab069ace..597acfdfddb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java @@ -110,7 +110,7 @@ public class HMSAnalysisTask extends ExternalAnalysisTask { // Estimate the row count. This value is inaccurate if the table stats is empty. TableStatsMeta tableStatsStatus = Env.getCurrentEnv().getAnalysisManager() .findTableStatsStatus(hmsExternalTable.getId()); - long count = tableStatsStatus == null ? hmsExternalTable.estimatedRowCount() : tableStatsStatus.rowCount; + long count = tableStatsStatus == null ? hmsExternalTable.getRowCount() : tableStatsStatus.rowCount; dataSize = dataSize * count / partitionNames.size(); numNulls = numNulls * count / partitionNames.size(); int ndv = ndvPartValues.size(); @@ -131,7 +131,7 @@ public class HMSAnalysisTask extends ExternalAnalysisTask { private void getHmsColumnStats() throws Exception { TableStatsMeta tableStatsStatus = Env.getCurrentEnv().getAnalysisManager() .findTableStatsStatus(hmsExternalTable.getId()); - long count = tableStatsStatus == null ? hmsExternalTable.estimatedRowCount() : tableStatsStatus.rowCount; + long count = tableStatsStatus == null ? hmsExternalTable.getRowCount() : tableStatsStatus.rowCount; Map<String, String> params = buildStatsParams("NULL"); Map<StatsType, String> statsParams = new HashMap<>(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramCacheLoader.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramCacheLoader.java index d9928f2a639..bf606364a23 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramCacheLoader.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramCacheLoader.java @@ -31,7 +31,7 @@ import java.util.Map; import java.util.Optional; import java.util.concurrent.CompletionException; -public class HistogramCacheLoader extends StatisticsCacheLoader<Optional<Histogram>> { +public class HistogramCacheLoader extends BasicAsyncCacheLoader<StatisticsCacheKey, Optional<Histogram>> { private static final Logger LOG = LogManager.getLogger(HistogramCacheLoader.class); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java index b65678d1859..7ac4b95d484 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java @@ -59,7 +59,7 @@ public class OlapScanStatsDerive extends BaseStatsDerive { Map<Id, ColumnStatistic> columnStatisticMap = new HashMap<>(); Table table = scanNode.getOlapTable(); - double rowCount = table.estimatedRowCount(); + double rowCount = table.getRowCountForNereids(); for (Map.Entry<Id, String> entry : slotIdToTableIdAndColumnName.entrySet()) { String colName = entry.getValue(); // TODO. Get index id for materialized view. diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java index f2f53fa7457..74c7bd7c9db 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java @@ -39,7 +39,6 @@ public class StatisticConstants { public static final int ID_LEN = 4096; public static final int STATISTICS_CACHE_REFRESH_INTERVAL = 24 * 2; - /** * Bucket count fot column_statistics and analysis_job table. */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheKey.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheKey.java index 055dd128bef..fa924ab9284 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheKey.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheKey.java @@ -73,6 +73,7 @@ public class StatisticsCacheKey { @Override public String toString() { StringJoiner sj = new StringJoiner(DELIMITER); + sj.add("ColumnStats"); sj.add(String.valueOf(tableId)); sj.add(String.valueOf(idxId)); sj.add(colName); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index 0ff1488f464..8688447dcb9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -553,10 +553,9 @@ public class StatisticsUtil { * First get it from remote table parameters. If not found, estimate it : totalSize/estimatedRowSize * * @param table Hive HMSExternalTable to estimate row count. - * @param isInit Flag to indicate if this is called during init. To avoid recursively get schema. * @return estimated row count */ - public static long getHiveRowCount(HMSExternalTable table, boolean isInit) { + public static long getHiveRowCount(HMSExternalTable table) { Map<String, String> parameters = table.getRemoteTable().getParameters(); if (parameters == null) { return -1; @@ -569,7 +568,7 @@ public class StatisticsUtil { return rows; } } - if (!parameters.containsKey(TOTAL_SIZE) || isInit) { + if (!parameters.containsKey(TOTAL_SIZE)) { return -1; } // Table parameters doesn't contain row count but contain total size. Estimate row count : totalSize/rowSize @@ -579,7 +578,7 @@ public class StatisticsUtil { estimatedRowSize += column.getDataType().getSlotSize(); } if (estimatedRowSize == 0) { - return 1; + return -1; } return totalSize / estimatedRowSize; } @@ -657,7 +656,7 @@ public class StatisticsUtil { estimatedRowSize += column.getDataType().getSlotSize(); } if (estimatedRowSize == 0) { - return 1; + return 0; } if (samplePartitionSize < totalPartitionSize) { totalSize = totalSize * totalPartitionSize / samplePartitionSize; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org