This is an automated email from the ASF dual-hosted git repository. lijibing pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 4636086952f Refactor get row count related interface, add row count cache for external table. (#31276) (#31596) 4636086952f is described below commit 4636086952fc381b7c7ef12df96f3548e7b3c06f Author: Jibing-Li <64681310+jibing...@users.noreply.github.com> AuthorDate: Thu Feb 29 18:28:11 2024 +0800 Refactor get row count related interface, add row count cache for external table. (#31276) (#31596) --- .../main/java/org/apache/doris/common/Config.java | 4 + .../java/org/apache/doris/catalog/OlapTable.java | 2 +- .../main/java/org/apache/doris/catalog/Table.java | 25 ++--- .../java/org/apache/doris/catalog/TableIf.java | 7 +- .../doris/catalog/external/ExternalTable.java | 15 ++- .../doris/catalog/external/HMSExternalTable.java | 62 +++++------- .../doris/catalog/external/JdbcExternalTable.java | 19 ---- .../doris/datasource/ExternalMetaCacheMgr.java | 10 +- .../doris/datasource/ExternalRowCountCache.java | 112 +++++++++++++++++++++ .../doris/nereids/stats/StatsCalculator.java | 2 +- .../java/org/apache/doris/qe/ShowExecutor.java | 8 +- .../apache/doris/statistics/AnalysisManager.java | 5 +- ...CacheLoader.java => BasicAsyncCacheLoader.java} | 8 +- .../statistics/ColumnStatisticsCacheLoader.java | 2 +- .../apache/doris/statistics/HMSAnalysisTask.java | 8 +- .../doris/statistics/HistogramCacheLoader.java | 2 +- .../doris/statistics/OlapScanStatsDerive.java | 2 +- .../doris/statistics/StatisticConstants.java | 1 - .../doris/statistics/StatisticsCacheKey.java | 1 + .../doris/statistics/util/StatisticsUtil.java | 9 +- 20 files changed, 194 insertions(+), 110 deletions(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index ff61b17ff32..d73cf043ffc 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -1798,6 +1798,10 @@ public class Config extends ConfigBase { "Max cache number of remote file system."}) public static long max_remote_file_system_cache_num = 100; + @ConfField(mutable = false, masterOnly = false, description = {"外表行数缓存最大数量", + "Max cache number of external table row count"}) + public static long max_external_table_row_count_cache_num = 100000; + /** * Max cache loader thread-pool size. * Max thread pool size for loading external meta cache diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index 3536a0a7fec..090b0e543c5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -1196,7 +1196,7 @@ public class OlapTable extends Table { } @Override - public long getRowCount() { + public long fetchRowCount() { long rowCount = 0; for (Map.Entry<Long, Partition> entry : idToPartition.entrySet()) { rowCount += entry.getValue().getBaseIndex().getRowCount(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java index d033a58b238..6ee10997baf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java @@ -339,7 +339,7 @@ public abstract class Table extends MetaObject implements Writable, TableIf { } public long getRowCount() { - return 0; + return fetchRowCount(); } public long getAvgRowLength() { @@ -539,24 +539,6 @@ public abstract class Table extends MetaObject implements Writable, TableIf { throw new NotImplementedException("createAnalysisTask not implemented"); } - /** - * for NOT-ANALYZED Olap table, return estimated row count, - * for other table, return 1 - * @return estimated row count - */ - public long estimatedRowCount() { - long cardinality = 0; - if (this instanceof OlapTable) { - OlapTable table = (OlapTable) this; - for (long selectedPartitionId : table.getPartitionIds()) { - final Partition partition = table.getPartition(selectedPartitionId); - final MaterializedIndex baseIndex = partition.getBaseIndex(); - cardinality += baseIndex.getRowCount(); - } - } - return Math.max(cardinality, 1); - } - @Override public DatabaseIf getDatabase() { return Env.getCurrentInternalCatalog().getDbNullable(qualifiedDbName); @@ -583,4 +565,9 @@ public abstract class Table extends MetaObject implements Writable, TableIf { public List<Long> getChunkSizes() { throw new NotImplementedException("getChunkSized not implemented"); } + + @Override + public long fetchRowCount() { + return 0; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java index c1639768c56..c1bcf5b2179 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java @@ -125,6 +125,8 @@ public interface TableIf { long getRowCount(); + long fetchRowCount(); + long getDataLength(); long getAvgRowLength(); @@ -137,7 +139,10 @@ public interface TableIf { BaseAnalysisTask createAnalysisTask(AnalysisInfo info); - long estimatedRowCount(); + // For empty table, nereids require getting 1 as row count. This is a wrap function for nereids to call getRowCount. + default long getRowCountForNereids() { + return Math.max(getRowCount(), 1); + } DatabaseIf getDatabase(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java index d9f6cf426a7..4abd66abd54 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java @@ -284,6 +284,16 @@ public class ExternalTable implements TableIf, Writable, GsonPostProcessable { @Override public long getRowCount() { + // All external table should get external row count from cache. + return Env.getCurrentEnv().getExtMetaCacheMgr().getRowCountCache().getCachedRowCount(catalog.getId(), dbId, id); + } + + @Override + /** + * Default return 0. Subclass need to implement this interface. + * This is called by ExternalRowCountCache to load row count cache. + */ + public long fetchRowCount() { return 0; } @@ -334,11 +344,6 @@ public class ExternalTable implements TableIf, Writable, GsonPostProcessable { throw new NotImplementedException("createAnalysisTask not implemented"); } - @Override - public long estimatedRowCount() { - return 1; - } - @Override public DatabaseIf getDatabase() { return catalog.getDbNullable(dbName); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java index 961dc6240d6..4baee416a9a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java @@ -34,7 +34,6 @@ import org.apache.doris.statistics.BaseAnalysisTask; import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.ColumnStatisticBuilder; import org.apache.doris.statistics.HMSAnalysisTask; -import org.apache.doris.statistics.TableStatsMeta; import org.apache.doris.statistics.util.StatisticsUtil; import org.apache.doris.thrift.THiveTable; import org.apache.doris.thrift.TTableDescriptor; @@ -116,9 +115,6 @@ public class HMSExternalTable extends ExternalTable { protected DLAType dlaType = DLAType.UNKNOWN; - // No as precise as row count in TableStats, but better than none. - private long estimatedRowCount = -1; - // record the event update time when enable hms event listener protected volatile long eventUpdateTime; @@ -170,7 +166,6 @@ public class HMSExternalTable extends ExternalTable { } } objectCreated = true; - estimatedRowCount = getRowCountFromExternalSource(true); } } @@ -306,22 +301,11 @@ public class HMSExternalTable extends ExternalTable { return 0; } - @Override - public long getRowCount() { - makeSureInitialized(); - long rowCount = getRowCountFromExternalSource(false); - if (rowCount == -1) { - LOG.debug("Will estimate row count from file list."); - rowCount = StatisticsUtil.getRowCountFromFileList(this); - } - return rowCount; - } - - private long getRowCountFromExternalSource(boolean isInit) { + private long getRowCountFromExternalSource() { long rowCount; switch (dlaType) { case HIVE: - rowCount = StatisticsUtil.getHiveRowCount(this, isInit); + rowCount = StatisticsUtil.getHiveRowCount(this); break; case ICEBERG: rowCount = StatisticsUtil.getIcebergRowCount(this); @@ -478,27 +462,30 @@ public class HMSExternalTable extends ExternalTable { return tmpSchema; } - @Override - public long estimatedRowCount() { - try { - TableStatsMeta tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(id); - if (tableStats != null) { - long rowCount = tableStats.rowCount; - LOG.debug("Estimated row count for db {} table {} is {}.", dbName, name, rowCount); - return rowCount; - } + private List<Column> getHiveSchema() { + List<Column> columns; + List<FieldSchema> schema = ((HMSExternalCatalog) catalog).getClient().getSchema(dbName, name); + List<Column> tmpSchema = Lists.newArrayListWithCapacity(schema.size()); + for (FieldSchema field : schema) { + tmpSchema.add(new Column(field.getName().toLowerCase(Locale.ROOT), + HiveMetaStoreClientHelper.hiveTypeToDorisType(field.getType()), true, null, + true, field.getComment(), true, -1)); + } + columns = tmpSchema; + return columns; + } - if (estimatedRowCount != -1) { - return estimatedRowCount; - } - // Cache the estimated row count in this structure - // though the table never get analyzed, since the row estimation might be expensive caused by RPC. - estimatedRowCount = getRowCount(); - return estimatedRowCount; - } catch (Exception e) { - LOG.warn("Fail to get row count for table {}", name, e); + @Override + public long fetchRowCount() { + makeSureInitialized(); + // Get row count from hive metastore property. + long rowCount = getRowCountFromExternalSource(); + // Only hive table supports estimate row count by listing file. + if (rowCount == -1 && dlaType.equals(DLAType.HIVE)) { + LOG.debug("Will estimate row count from file list."); + rowCount = StatisticsUtil.getRowCountFromFileList(this); } - return 1; + return rowCount; } private List<Column> getIcebergSchema(List<FieldSchema> hmsSchema) { @@ -689,7 +676,6 @@ public class HMSExternalTable extends ExternalTable { @Override public void gsonPostProcess() throws IOException { super.gsonPostProcess(); - estimatedRowCount = -1; } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/JdbcExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/JdbcExternalTable.java index 05023591bfd..66bce9b1b67 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/JdbcExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/JdbcExternalTable.java @@ -18,13 +18,11 @@ package org.apache.doris.catalog.external; import org.apache.doris.catalog.Column; -import org.apache.doris.catalog.Env; import org.apache.doris.catalog.JdbcTable; import org.apache.doris.datasource.jdbc.JdbcExternalCatalog; import org.apache.doris.statistics.AnalysisInfo; import org.apache.doris.statistics.BaseAnalysisTask; import org.apache.doris.statistics.JdbcAnalysisTask; -import org.apache.doris.statistics.TableStatsMeta; import org.apache.doris.thrift.TTableDescriptor; import org.apache.logging.log4j.LogManager; @@ -113,21 +111,4 @@ public class JdbcExternalTable extends ExternalTable { makeSureInitialized(); return new JdbcAnalysisTask(info); } - - @Override - public long getRowCount() { - makeSureInitialized(); - TableStatsMeta tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(id); - if (tableStats != null) { - long rowCount = tableStats.rowCount; - LOG.debug("Estimated row count for db {} table {} is {}.", dbName, name, rowCount); - return rowCount; - } - return 1; - } - - @Override - public long estimatedRowCount() { - return getRowCount(); - } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalMetaCacheMgr.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalMetaCacheMgr.java index 57aff50c621..1492392ee67 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalMetaCacheMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalMetaCacheMgr.java @@ -41,7 +41,8 @@ import java.util.concurrent.ExecutorService; /** * Cache meta of external catalog * 1. Meta for hive meta store, mainly for partition. - * 2. Table Schema cahce. + * 2. Table Schema cache. + * 3. Row count cache. */ public class ExternalMetaCacheMgr { private static final Logger LOG = LogManager.getLogger(ExternalMetaCacheMgr.class); @@ -55,6 +56,8 @@ public class ExternalMetaCacheMgr { private ExecutorService executor; // all catalogs could share the same fsCache. private FileSystemCache fsCache; + // all external table row count cache. + private ExternalRowCountCache rowCountCache; private final IcebergMetadataCacheMgr icebergMetadataCacheMgr; private final MaxComputeMetadataCacheMgr maxComputeMetadataCacheMgr; @@ -65,6 +68,7 @@ public class ExternalMetaCacheMgr { "ExternalMetaCacheMgr", 120, true); hudiPartitionMgr = HudiPartitionMgr.get(executor); fsCache = new FileSystemCache(executor); + rowCountCache = new ExternalRowCountCache(executor); icebergMetadataCacheMgr = new IcebergMetadataCacheMgr(); maxComputeMetadataCacheMgr = new MaxComputeMetadataCacheMgr(); } @@ -111,6 +115,10 @@ public class ExternalMetaCacheMgr { return fsCache; } + public ExternalRowCountCache getRowCountCache() { + return rowCountCache; + } + public void removeCache(long catalogId) { if (cacheMap.remove(catalogId) != null) { LOG.info("remove hive metastore cache for catalog {}", catalogId); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java new file mode 100644 index 00000000000..1441efa9bf5 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java @@ -0,0 +1,112 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource; + +import org.apache.doris.catalog.TableIf; +import org.apache.doris.common.Config; +import org.apache.doris.statistics.BasicAsyncCacheLoader; +import org.apache.doris.statistics.util.StatisticsUtil; + +import com.github.benmanes.caffeine.cache.AsyncLoadingCache; +import com.github.benmanes.caffeine.cache.Caffeine; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.time.Duration; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; + +public class ExternalRowCountCache { + + private static final Logger LOG = LogManager.getLogger(ExternalRowCountCache.class); + private final AsyncLoadingCache<RowCountKey, Optional<Long>> rowCountCache; + + public ExternalRowCountCache(ExecutorService executor) { + rowCountCache = Caffeine.newBuilder() + .maximumSize(Config.max_external_table_row_count_cache_num) + .expireAfterWrite(Duration.ofMinutes(Config.external_cache_expire_time_minutes_after_access)) + .executor(executor) + .buildAsync(new RowCountCacheLoader()); + } + + public static class RowCountKey { + private final long catalogId; + private final long dbId; + private final long tableId; + + public RowCountKey(long catalogId, long dbId, long tableId) { + this.catalogId = catalogId; + this.dbId = dbId; + this.tableId = tableId; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!(obj instanceof RowCountKey)) { + return false; + } + return ((RowCountKey) obj).tableId == this.tableId; + } + + @Override + public int hashCode() { + return (int) tableId; + } + } + + public static class RowCountCacheLoader extends BasicAsyncCacheLoader<RowCountKey, Optional<Long>> { + + @Override + protected Optional<Long> doLoad(RowCountKey rowCountKey) { + try { + TableIf table = StatisticsUtil.findTable(rowCountKey.catalogId, rowCountKey.dbId, rowCountKey.tableId); + return Optional.of(table.fetchRowCount()); + } catch (Exception e) { + LOG.warn("Failed to get table with catalogId {}, dbId {}, tableId {}", rowCountKey.catalogId, + rowCountKey.dbId, rowCountKey.tableId); + return Optional.empty(); + } + } + } + + /** + * Get cached row count for the given table. Return 0 if cached not loaded or table not exists. + * Cached will be loaded async. + * @param catalogId + * @param dbId + * @param tableId + * @return Cached row count or 0 if not exist + */ + public long getCachedRowCount(long catalogId, long dbId, long tableId) { + RowCountKey key = new RowCountKey(catalogId, dbId, tableId); + try { + CompletableFuture<Optional<Long>> f = rowCountCache.get(key); + if (f.isDone()) { + return f.get().orElse(0L); + } + } catch (Exception e) { + LOG.warn("Unexpected exception while returning row count", e); + } + return 0; + } + +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 82a67109452..ce35b301589 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -624,7 +624,7 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { .map(s -> (SlotReference) s).collect(Collectors.toSet()); Map<Expression, ColumnStatistic> columnStatisticMap = new HashMap<>(); TableIf table = catalogRelation.getTable(); - double rowCount = catalogRelation.getTable().estimatedRowCount(); + double rowCount = catalogRelation.getTable().getRowCountForNereids(); boolean hasUnknownCol = false; long idxId = -1; if (catalogRelation instanceof OlapScan) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index ff9a41af076..d898185cf85 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -924,11 +924,7 @@ public class ShowExecutor { // Row_format row.add(null); // Rows - // Use estimatedRowCount(), not getRowCount(). - // because estimatedRowCount() is an async call, it will not block, and it will call getRowCount() - // finally. So that for some table(especially external table), - // we can get the row count without blocking. - row.add(String.valueOf(table.estimatedRowCount())); + row.add(String.valueOf(table.getRowCount())); // Avg_row_length row.add(String.valueOf(table.getAvgRowLength())); // Data_length @@ -2474,7 +2470,7 @@ public class ShowExecutor { tableStats == null means it's not analyzed, in this case show the estimated row count. */ if (tableStats == null && tableIf instanceof HMSExternalTable) { - resultSet = showTableStatsStmt.constructResultSet(tableIf.estimatedRowCount()); + resultSet = showTableStatsStmt.constructResultSet(tableIf.getRowCount()); } else { resultSet = showTableStatsStmt.constructResultSet(tableStats); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index d7cb5e6613f..b1275678050 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -553,8 +553,7 @@ public class AnalysisManager implements Writable { @VisibleForTesting public void updateTableStats(AnalysisInfo jobInfo) { - TableIf tbl = StatisticsUtil.findTable(jobInfo.catalogId, - jobInfo.dbId, jobInfo.tblId); + TableIf tbl = StatisticsUtil.findTable(jobInfo.catalogId, jobInfo.dbId, jobInfo.tblId); // External Table only update table stats when all tasks finished. // Because it needs to get the row count from the result of row count task. if (tbl instanceof ExternalTable && !jobInfo.state.equals(AnalysisState.FINISHED)) { @@ -562,7 +561,7 @@ public class AnalysisManager implements Writable { } TableStatsMeta tableStats = findTableStatsStatus(tbl.getId()); if (tableStats == null) { - updateTableStatsStatus(new TableStatsMeta(jobInfo.emptyJob ? 0 : tbl.estimatedRowCount(), jobInfo, tbl)); + updateTableStatsStatus(new TableStatsMeta(jobInfo.emptyJob ? 0 : tbl.getRowCount(), jobInfo, tbl)); } else { tableStats.update(jobInfo, tbl); logCreateTableStats(tableStats); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheLoader.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BasicAsyncCacheLoader.java similarity index 85% rename from fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheLoader.java rename to fe/fe-core/src/main/java/org/apache/doris/statistics/BasicAsyncCacheLoader.java index 8b49c57f1bb..ac5896bb06c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheLoader.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BasicAsyncCacheLoader.java @@ -25,13 +25,13 @@ import org.checkerframework.checker.nullness.qual.NonNull; import java.util.concurrent.CompletableFuture; import java.util.concurrent.Executor; -public abstract class StatisticsCacheLoader<V> implements AsyncCacheLoader<StatisticsCacheKey, V> { +public abstract class BasicAsyncCacheLoader<K, V> implements AsyncCacheLoader<K, V> { - private static final Logger LOG = LogManager.getLogger(StatisticsCacheLoader.class); + private static final Logger LOG = LogManager.getLogger(BasicAsyncCacheLoader.class); @Override public @NonNull CompletableFuture<V> asyncLoad( - @NonNull StatisticsCacheKey key, + @NonNull K key, @NonNull Executor executor) { CompletableFuture<V> future = CompletableFuture.supplyAsync(() -> { long startTime = System.currentTimeMillis(); @@ -45,5 +45,5 @@ public abstract class StatisticsCacheLoader<V> implements AsyncCacheLoader<Stati return future; } - protected abstract V doLoad(StatisticsCacheKey k); + protected abstract V doLoad(K k); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java index bc5fc4c10c0..91006cc4953 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java @@ -27,7 +27,7 @@ import org.apache.logging.log4j.Logger; import java.util.List; import java.util.Optional; -public class ColumnStatisticsCacheLoader extends StatisticsCacheLoader<Optional<ColumnStatistic>> { +public class ColumnStatisticsCacheLoader extends BasicAsyncCacheLoader<StatisticsCacheKey, Optional<ColumnStatistic>> { private static final Logger LOG = LogManager.getLogger(ColumnStatisticsCacheLoader.class); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java index c4175a570db..1fe827420c9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java @@ -190,9 +190,11 @@ public class HMSAnalysisTask extends BaseAnalysisTask { } } } - // Estimate the row count. This value is inaccurate if the table stats is empty. - TableStatsMeta tableStatsStatus = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(table.getId()); - long count = tableStatsStatus == null ? table.estimatedRowCount() : tableStatsStatus.rowCount; + // Estimate the row count. This value is inaccurate. + long count = table.getRowCount(); + if (count == 0) { + count = table.fetchRowCount(); + } dataSize = dataSize * count / partitionNames.size(); numNulls = numNulls * count / partitionNames.size(); int ndv = ndvPartValues.size(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramCacheLoader.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramCacheLoader.java index d9928f2a639..bf606364a23 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramCacheLoader.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramCacheLoader.java @@ -31,7 +31,7 @@ import java.util.Map; import java.util.Optional; import java.util.concurrent.CompletionException; -public class HistogramCacheLoader extends StatisticsCacheLoader<Optional<Histogram>> { +public class HistogramCacheLoader extends BasicAsyncCacheLoader<StatisticsCacheKey, Optional<Histogram>> { private static final Logger LOG = LogManager.getLogger(HistogramCacheLoader.class); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java index b65678d1859..7ac4b95d484 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java @@ -59,7 +59,7 @@ public class OlapScanStatsDerive extends BaseStatsDerive { Map<Id, ColumnStatistic> columnStatisticMap = new HashMap<>(); Table table = scanNode.getOlapTable(); - double rowCount = table.estimatedRowCount(); + double rowCount = table.getRowCountForNereids(); for (Map.Entry<Id, String> entry : slotIdToTableIdAndColumnName.entrySet()) { String colName = entry.getValue(); // TODO. Get index id for materialized view. diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java index 95c8c890863..593ae475810 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java @@ -39,7 +39,6 @@ public class StatisticConstants { public static final int ID_LEN = 4096; public static final int STATISTICS_CACHE_REFRESH_INTERVAL = 24 * 2; - /** * Bucket count fot column_statistics and analysis_job table. */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheKey.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheKey.java index 055dd128bef..fa924ab9284 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheKey.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheKey.java @@ -73,6 +73,7 @@ public class StatisticsCacheKey { @Override public String toString() { StringJoiner sj = new StringJoiner(DELIMITER); + sj.add("ColumnStats"); sj.add(String.valueOf(tableId)); sj.add(String.valueOf(idxId)); sj.add(colName); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index 0d1d6d9b75c..7752cfcc0e8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -549,10 +549,9 @@ public class StatisticsUtil { * First get it from remote table parameters. If not found, estimate it : totalSize/estimatedRowSize * * @param table Hive HMSExternalTable to estimate row count. - * @param isInit Flag to indicate if this is called during init. To avoid recursively get schema. * @return estimated row count */ - public static long getHiveRowCount(HMSExternalTable table, boolean isInit) { + public static long getHiveRowCount(HMSExternalTable table) { Map<String, String> parameters = table.getRemoteTable().getParameters(); if (parameters == null) { return -1; @@ -565,7 +564,7 @@ public class StatisticsUtil { return rows; } } - if (!parameters.containsKey(TOTAL_SIZE) || isInit) { + if (!parameters.containsKey(TOTAL_SIZE)) { return -1; } // Table parameters doesn't contain row count but contain total size. Estimate row count : totalSize/rowSize @@ -575,7 +574,7 @@ public class StatisticsUtil { estimatedRowSize += column.getDataType().getSlotSize(); } if (estimatedRowSize == 0) { - return 1; + return -1; } return totalSize / estimatedRowSize; } @@ -653,7 +652,7 @@ public class StatisticsUtil { estimatedRowSize += column.getDataType().getSlotSize(); } if (estimatedRowSize == 0) { - return 1; + return 0; } if (samplePartitionSize < totalPartitionSize) { totalSize = totalSize * totalPartitionSize / samplePartitionSize; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org