This is an automated email from the ASF dual-hosted git repository.

lijibing pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 4636086952f Refactor get row count related interface, add row count 
cache for external table. (#31276) (#31596)
4636086952f is described below

commit 4636086952fc381b7c7ef12df96f3548e7b3c06f
Author: Jibing-Li <64681310+jibing...@users.noreply.github.com>
AuthorDate: Thu Feb 29 18:28:11 2024 +0800

    Refactor get row count related interface, add row count cache for external 
table. (#31276) (#31596)
---
 .../main/java/org/apache/doris/common/Config.java  |   4 +
 .../java/org/apache/doris/catalog/OlapTable.java   |   2 +-
 .../main/java/org/apache/doris/catalog/Table.java  |  25 ++---
 .../java/org/apache/doris/catalog/TableIf.java     |   7 +-
 .../doris/catalog/external/ExternalTable.java      |  15 ++-
 .../doris/catalog/external/HMSExternalTable.java   |  62 +++++-------
 .../doris/catalog/external/JdbcExternalTable.java  |  19 ----
 .../doris/datasource/ExternalMetaCacheMgr.java     |  10 +-
 .../doris/datasource/ExternalRowCountCache.java    | 112 +++++++++++++++++++++
 .../doris/nereids/stats/StatsCalculator.java       |   2 +-
 .../java/org/apache/doris/qe/ShowExecutor.java     |   8 +-
 .../apache/doris/statistics/AnalysisManager.java   |   5 +-
 ...CacheLoader.java => BasicAsyncCacheLoader.java} |   8 +-
 .../statistics/ColumnStatisticsCacheLoader.java    |   2 +-
 .../apache/doris/statistics/HMSAnalysisTask.java   |   8 +-
 .../doris/statistics/HistogramCacheLoader.java     |   2 +-
 .../doris/statistics/OlapScanStatsDerive.java      |   2 +-
 .../doris/statistics/StatisticConstants.java       |   1 -
 .../doris/statistics/StatisticsCacheKey.java       |   1 +
 .../doris/statistics/util/StatisticsUtil.java      |   9 +-
 20 files changed, 194 insertions(+), 110 deletions(-)

diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java 
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index ff61b17ff32..d73cf043ffc 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -1798,6 +1798,10 @@ public class Config extends ConfigBase {
         "Max cache number of remote file system."})
     public static long max_remote_file_system_cache_num = 100;
 
+    @ConfField(mutable = false, masterOnly = false, description = 
{"外表行数缓存最大数量",
+        "Max cache number of external table row count"})
+    public static long max_external_table_row_count_cache_num = 100000;
+
     /**
      * Max cache loader thread-pool size.
      * Max thread pool size for loading external meta cache
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
index 3536a0a7fec..090b0e543c5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
@@ -1196,7 +1196,7 @@ public class OlapTable extends Table {
     }
 
     @Override
-    public long getRowCount() {
+    public long fetchRowCount() {
         long rowCount = 0;
         for (Map.Entry<Long, Partition> entry : idToPartition.entrySet()) {
             rowCount += entry.getValue().getBaseIndex().getRowCount();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java
index d033a58b238..6ee10997baf 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java
@@ -339,7 +339,7 @@ public abstract class Table extends MetaObject implements 
Writable, TableIf {
     }
 
     public long getRowCount() {
-        return 0;
+        return fetchRowCount();
     }
 
     public long getAvgRowLength() {
@@ -539,24 +539,6 @@ public abstract class Table extends MetaObject implements 
Writable, TableIf {
         throw new NotImplementedException("createAnalysisTask not 
implemented");
     }
 
-    /**
-     * for NOT-ANALYZED Olap table, return estimated row count,
-     * for other table, return 1
-     * @return estimated row count
-     */
-    public long estimatedRowCount() {
-        long cardinality = 0;
-        if (this instanceof OlapTable) {
-            OlapTable table = (OlapTable) this;
-            for (long selectedPartitionId : table.getPartitionIds()) {
-                final Partition partition = 
table.getPartition(selectedPartitionId);
-                final MaterializedIndex baseIndex = partition.getBaseIndex();
-                cardinality += baseIndex.getRowCount();
-            }
-        }
-        return Math.max(cardinality, 1);
-    }
-
     @Override
     public DatabaseIf getDatabase() {
         return Env.getCurrentInternalCatalog().getDbNullable(qualifiedDbName);
@@ -583,4 +565,9 @@ public abstract class Table extends MetaObject implements 
Writable, TableIf {
     public List<Long> getChunkSizes() {
         throw new NotImplementedException("getChunkSized not implemented");
     }
+
+    @Override
+    public long fetchRowCount() {
+        return 0;
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java
index c1639768c56..c1bcf5b2179 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java
@@ -125,6 +125,8 @@ public interface TableIf {
 
     long getRowCount();
 
+    long fetchRowCount();
+
     long getDataLength();
 
     long getAvgRowLength();
@@ -137,7 +139,10 @@ public interface TableIf {
 
     BaseAnalysisTask createAnalysisTask(AnalysisInfo info);
 
-    long estimatedRowCount();
+    // For empty table, nereids require getting 1 as row count. This is a wrap 
function for nereids to call getRowCount.
+    default long getRowCountForNereids() {
+        return Math.max(getRowCount(), 1);
+    }
 
     DatabaseIf getDatabase();
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java
index d9f6cf426a7..4abd66abd54 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java
@@ -284,6 +284,16 @@ public class ExternalTable implements TableIf, Writable, 
GsonPostProcessable {
 
     @Override
     public long getRowCount() {
+        // All external table should get external row count from cache.
+        return 
Env.getCurrentEnv().getExtMetaCacheMgr().getRowCountCache().getCachedRowCount(catalog.getId(),
 dbId, id);
+    }
+
+    @Override
+    /**
+     * Default return 0. Subclass need to implement this interface.
+     * This is called by ExternalRowCountCache to load row count cache.
+     */
+    public long fetchRowCount() {
         return 0;
     }
 
@@ -334,11 +344,6 @@ public class ExternalTable implements TableIf, Writable, 
GsonPostProcessable {
         throw new NotImplementedException("createAnalysisTask not 
implemented");
     }
 
-    @Override
-    public long estimatedRowCount() {
-        return 1;
-    }
-
     @Override
     public DatabaseIf getDatabase() {
         return catalog.getDbNullable(dbName);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
index 961dc6240d6..4baee416a9a 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
@@ -34,7 +34,6 @@ import org.apache.doris.statistics.BaseAnalysisTask;
 import org.apache.doris.statistics.ColumnStatistic;
 import org.apache.doris.statistics.ColumnStatisticBuilder;
 import org.apache.doris.statistics.HMSAnalysisTask;
-import org.apache.doris.statistics.TableStatsMeta;
 import org.apache.doris.statistics.util.StatisticsUtil;
 import org.apache.doris.thrift.THiveTable;
 import org.apache.doris.thrift.TTableDescriptor;
@@ -116,9 +115,6 @@ public class HMSExternalTable extends ExternalTable {
 
     protected DLAType dlaType = DLAType.UNKNOWN;
 
-    // No as precise as row count in TableStats, but better than none.
-    private long estimatedRowCount = -1;
-
     // record the event update time when enable hms event listener
     protected volatile long eventUpdateTime;
 
@@ -170,7 +166,6 @@ public class HMSExternalTable extends ExternalTable {
                 }
             }
             objectCreated = true;
-            estimatedRowCount = getRowCountFromExternalSource(true);
         }
     }
 
@@ -306,22 +301,11 @@ public class HMSExternalTable extends ExternalTable {
         return 0;
     }
 
-    @Override
-    public long getRowCount() {
-        makeSureInitialized();
-        long rowCount = getRowCountFromExternalSource(false);
-        if (rowCount == -1) {
-            LOG.debug("Will estimate row count from file list.");
-            rowCount = StatisticsUtil.getRowCountFromFileList(this);
-        }
-        return rowCount;
-    }
-
-    private long getRowCountFromExternalSource(boolean isInit) {
+    private long getRowCountFromExternalSource() {
         long rowCount;
         switch (dlaType) {
             case HIVE:
-                rowCount = StatisticsUtil.getHiveRowCount(this, isInit);
+                rowCount = StatisticsUtil.getHiveRowCount(this);
                 break;
             case ICEBERG:
                 rowCount = StatisticsUtil.getIcebergRowCount(this);
@@ -478,27 +462,30 @@ public class HMSExternalTable extends ExternalTable {
         return tmpSchema;
     }
 
-    @Override
-    public long estimatedRowCount() {
-        try {
-            TableStatsMeta tableStats = 
Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(id);
-            if (tableStats != null) {
-                long rowCount = tableStats.rowCount;
-                LOG.debug("Estimated row count for db {} table {} is {}.", 
dbName, name, rowCount);
-                return rowCount;
-            }
+    private List<Column> getHiveSchema() {
+        List<Column> columns;
+        List<FieldSchema> schema = ((HMSExternalCatalog) 
catalog).getClient().getSchema(dbName, name);
+        List<Column> tmpSchema = Lists.newArrayListWithCapacity(schema.size());
+        for (FieldSchema field : schema) {
+            tmpSchema.add(new Column(field.getName().toLowerCase(Locale.ROOT),
+                    
HiveMetaStoreClientHelper.hiveTypeToDorisType(field.getType()), true, null,
+                    true, field.getComment(), true, -1));
+        }
+        columns = tmpSchema;
+        return columns;
+    }
 
-            if (estimatedRowCount != -1) {
-                return estimatedRowCount;
-            }
-            // Cache the estimated row count in this structure
-            // though the table never get analyzed, since the row estimation 
might be expensive caused by RPC.
-            estimatedRowCount = getRowCount();
-            return estimatedRowCount;
-        } catch (Exception e) {
-            LOG.warn("Fail to get row count for table {}", name, e);
+    @Override
+    public long fetchRowCount() {
+        makeSureInitialized();
+        // Get row count from hive metastore property.
+        long rowCount = getRowCountFromExternalSource();
+        // Only hive table supports estimate row count by listing file.
+        if (rowCount == -1 && dlaType.equals(DLAType.HIVE)) {
+            LOG.debug("Will estimate row count from file list.");
+            rowCount = StatisticsUtil.getRowCountFromFileList(this);
         }
-        return 1;
+        return rowCount;
     }
 
     private List<Column> getIcebergSchema(List<FieldSchema> hmsSchema) {
@@ -689,7 +676,6 @@ public class HMSExternalTable extends ExternalTable {
     @Override
     public void gsonPostProcess() throws IOException {
         super.gsonPostProcess();
-        estimatedRowCount = -1;
     }
 
     @Override
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/JdbcExternalTable.java
 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/JdbcExternalTable.java
index 05023591bfd..66bce9b1b67 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/JdbcExternalTable.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/JdbcExternalTable.java
@@ -18,13 +18,11 @@
 package org.apache.doris.catalog.external;
 
 import org.apache.doris.catalog.Column;
-import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.JdbcTable;
 import org.apache.doris.datasource.jdbc.JdbcExternalCatalog;
 import org.apache.doris.statistics.AnalysisInfo;
 import org.apache.doris.statistics.BaseAnalysisTask;
 import org.apache.doris.statistics.JdbcAnalysisTask;
-import org.apache.doris.statistics.TableStatsMeta;
 import org.apache.doris.thrift.TTableDescriptor;
 
 import org.apache.logging.log4j.LogManager;
@@ -113,21 +111,4 @@ public class JdbcExternalTable extends ExternalTable {
         makeSureInitialized();
         return new JdbcAnalysisTask(info);
     }
-
-    @Override
-    public long getRowCount() {
-        makeSureInitialized();
-        TableStatsMeta tableStats = 
Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(id);
-        if (tableStats != null) {
-            long rowCount = tableStats.rowCount;
-            LOG.debug("Estimated row count for db {} table {} is {}.", dbName, 
name, rowCount);
-            return rowCount;
-        }
-        return 1;
-    }
-
-    @Override
-    public long estimatedRowCount() {
-        return getRowCount();
-    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalMetaCacheMgr.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalMetaCacheMgr.java
index 57aff50c621..1492392ee67 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalMetaCacheMgr.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalMetaCacheMgr.java
@@ -41,7 +41,8 @@ import java.util.concurrent.ExecutorService;
 /**
  * Cache meta of external catalog
  * 1. Meta for hive meta store, mainly for partition.
- * 2. Table Schema cahce.
+ * 2. Table Schema cache.
+ * 3. Row count cache.
  */
 public class ExternalMetaCacheMgr {
     private static final Logger LOG = 
LogManager.getLogger(ExternalMetaCacheMgr.class);
@@ -55,6 +56,8 @@ public class ExternalMetaCacheMgr {
     private ExecutorService executor;
     // all catalogs could share the same fsCache.
     private FileSystemCache fsCache;
+    // all external table row count cache.
+    private ExternalRowCountCache rowCountCache;
     private final IcebergMetadataCacheMgr icebergMetadataCacheMgr;
     private final MaxComputeMetadataCacheMgr maxComputeMetadataCacheMgr;
 
@@ -65,6 +68,7 @@ public class ExternalMetaCacheMgr {
                 "ExternalMetaCacheMgr", 120, true);
         hudiPartitionMgr = HudiPartitionMgr.get(executor);
         fsCache = new FileSystemCache(executor);
+        rowCountCache = new ExternalRowCountCache(executor);
         icebergMetadataCacheMgr = new IcebergMetadataCacheMgr();
         maxComputeMetadataCacheMgr = new MaxComputeMetadataCacheMgr();
     }
@@ -111,6 +115,10 @@ public class ExternalMetaCacheMgr {
         return fsCache;
     }
 
+    public ExternalRowCountCache getRowCountCache() {
+        return rowCountCache;
+    }
+
     public void removeCache(long catalogId) {
         if (cacheMap.remove(catalogId) != null) {
             LOG.info("remove hive metastore cache for catalog {}", catalogId);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java
new file mode 100644
index 00000000000..1441efa9bf5
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.datasource;
+
+import org.apache.doris.catalog.TableIf;
+import org.apache.doris.common.Config;
+import org.apache.doris.statistics.BasicAsyncCacheLoader;
+import org.apache.doris.statistics.util.StatisticsUtil;
+
+import com.github.benmanes.caffeine.cache.AsyncLoadingCache;
+import com.github.benmanes.caffeine.cache.Caffeine;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.time.Duration;
+import java.util.Optional;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutorService;
+
+public class ExternalRowCountCache {
+
+    private static final Logger LOG = 
LogManager.getLogger(ExternalRowCountCache.class);
+    private final AsyncLoadingCache<RowCountKey, Optional<Long>> rowCountCache;
+
+    public ExternalRowCountCache(ExecutorService executor) {
+        rowCountCache = Caffeine.newBuilder()
+                .maximumSize(Config.max_external_table_row_count_cache_num)
+                
.expireAfterWrite(Duration.ofMinutes(Config.external_cache_expire_time_minutes_after_access))
+                .executor(executor)
+                .buildAsync(new RowCountCacheLoader());
+    }
+
+    public static class RowCountKey {
+        private final long catalogId;
+        private final long dbId;
+        private final long tableId;
+
+        public RowCountKey(long catalogId, long dbId, long tableId) {
+            this.catalogId = catalogId;
+            this.dbId = dbId;
+            this.tableId = tableId;
+        }
+
+        @Override
+        public boolean equals(Object obj) {
+            if (this == obj) {
+                return true;
+            }
+            if (!(obj instanceof RowCountKey)) {
+                return false;
+            }
+            return ((RowCountKey) obj).tableId == this.tableId;
+        }
+
+        @Override
+        public int hashCode() {
+            return (int) tableId;
+        }
+    }
+
+    public static class RowCountCacheLoader extends 
BasicAsyncCacheLoader<RowCountKey, Optional<Long>> {
+
+        @Override
+        protected Optional<Long> doLoad(RowCountKey rowCountKey) {
+            try {
+                TableIf table = 
StatisticsUtil.findTable(rowCountKey.catalogId, rowCountKey.dbId, 
rowCountKey.tableId);
+                return Optional.of(table.fetchRowCount());
+            } catch (Exception e) {
+                LOG.warn("Failed to get table with catalogId {}, dbId {}, 
tableId {}", rowCountKey.catalogId,
+                        rowCountKey.dbId, rowCountKey.tableId);
+                return Optional.empty();
+            }
+        }
+    }
+
+    /**
+     * Get cached row count for the given table. Return 0 if cached not loaded 
or table not exists.
+     * Cached will be loaded async.
+     * @param catalogId
+     * @param dbId
+     * @param tableId
+     * @return Cached row count or 0 if not exist
+     */
+    public long getCachedRowCount(long catalogId, long dbId, long tableId) {
+        RowCountKey key = new RowCountKey(catalogId, dbId, tableId);
+        try {
+            CompletableFuture<Optional<Long>> f = rowCountCache.get(key);
+            if (f.isDone()) {
+                return f.get().orElse(0L);
+            }
+        } catch (Exception e) {
+            LOG.warn("Unexpected exception while returning row count", e);
+        }
+        return 0;
+    }
+
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 82a67109452..ce35b301589 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -624,7 +624,7 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
                 .map(s -> (SlotReference) s).collect(Collectors.toSet());
         Map<Expression, ColumnStatistic> columnStatisticMap = new HashMap<>();
         TableIf table = catalogRelation.getTable();
-        double rowCount = catalogRelation.getTable().estimatedRowCount();
+        double rowCount = catalogRelation.getTable().getRowCountForNereids();
         boolean hasUnknownCol = false;
         long idxId = -1;
         if (catalogRelation instanceof OlapScan) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
index ff9a41af076..d898185cf85 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
@@ -924,11 +924,7 @@ public class ShowExecutor {
                 // Row_format
                 row.add(null);
                 // Rows
-                // Use estimatedRowCount(), not getRowCount().
-                // because estimatedRowCount() is an async call, it will not 
block, and it will call getRowCount()
-                // finally. So that for some table(especially external table),
-                // we can get the row count without blocking.
-                row.add(String.valueOf(table.estimatedRowCount()));
+                row.add(String.valueOf(table.getRowCount()));
                 // Avg_row_length
                 row.add(String.valueOf(table.getAvgRowLength()));
                 // Data_length
@@ -2474,7 +2470,7 @@ public class ShowExecutor {
            tableStats == null means it's not analyzed, in this case show the 
estimated row count.
          */
         if (tableStats == null && tableIf instanceof HMSExternalTable) {
-            resultSet = 
showTableStatsStmt.constructResultSet(tableIf.estimatedRowCount());
+            resultSet = 
showTableStatsStmt.constructResultSet(tableIf.getRowCount());
         } else {
             resultSet = showTableStatsStmt.constructResultSet(tableStats);
         }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index d7cb5e6613f..b1275678050 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -553,8 +553,7 @@ public class AnalysisManager implements Writable {
 
     @VisibleForTesting
     public void updateTableStats(AnalysisInfo jobInfo) {
-        TableIf tbl = StatisticsUtil.findTable(jobInfo.catalogId,
-                jobInfo.dbId, jobInfo.tblId);
+        TableIf tbl = StatisticsUtil.findTable(jobInfo.catalogId, 
jobInfo.dbId, jobInfo.tblId);
         // External Table only update table stats when all tasks finished.
         // Because it needs to get the row count from the result of row count 
task.
         if (tbl instanceof ExternalTable && 
!jobInfo.state.equals(AnalysisState.FINISHED)) {
@@ -562,7 +561,7 @@ public class AnalysisManager implements Writable {
         }
         TableStatsMeta tableStats = findTableStatsStatus(tbl.getId());
         if (tableStats == null) {
-            updateTableStatsStatus(new TableStatsMeta(jobInfo.emptyJob ? 0 : 
tbl.estimatedRowCount(), jobInfo, tbl));
+            updateTableStatsStatus(new TableStatsMeta(jobInfo.emptyJob ? 0 : 
tbl.getRowCount(), jobInfo, tbl));
         } else {
             tableStats.update(jobInfo, tbl);
             logCreateTableStats(tableStats);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheLoader.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/BasicAsyncCacheLoader.java
similarity index 85%
rename from 
fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheLoader.java
rename to 
fe/fe-core/src/main/java/org/apache/doris/statistics/BasicAsyncCacheLoader.java
index 8b49c57f1bb..ac5896bb06c 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheLoader.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/BasicAsyncCacheLoader.java
@@ -25,13 +25,13 @@ import org.checkerframework.checker.nullness.qual.NonNull;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.Executor;
 
-public abstract class StatisticsCacheLoader<V> implements 
AsyncCacheLoader<StatisticsCacheKey, V> {
+public abstract class BasicAsyncCacheLoader<K, V> implements 
AsyncCacheLoader<K, V> {
 
-    private static final Logger LOG = 
LogManager.getLogger(StatisticsCacheLoader.class);
+    private static final Logger LOG = 
LogManager.getLogger(BasicAsyncCacheLoader.class);
 
     @Override
     public @NonNull CompletableFuture<V> asyncLoad(
-            @NonNull StatisticsCacheKey key,
+            @NonNull K key,
             @NonNull Executor executor) {
         CompletableFuture<V> future = CompletableFuture.supplyAsync(() -> {
             long startTime = System.currentTimeMillis();
@@ -45,5 +45,5 @@ public abstract class StatisticsCacheLoader<V> implements 
AsyncCacheLoader<Stati
         return future;
     }
 
-    protected abstract V doLoad(StatisticsCacheKey k);
+    protected abstract V doLoad(K k);
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
index bc5fc4c10c0..91006cc4953 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
@@ -27,7 +27,7 @@ import org.apache.logging.log4j.Logger;
 import java.util.List;
 import java.util.Optional;
 
-public class ColumnStatisticsCacheLoader extends 
StatisticsCacheLoader<Optional<ColumnStatistic>> {
+public class ColumnStatisticsCacheLoader extends 
BasicAsyncCacheLoader<StatisticsCacheKey, Optional<ColumnStatistic>> {
 
     private static final Logger LOG = 
LogManager.getLogger(ColumnStatisticsCacheLoader.class);
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
index c4175a570db..1fe827420c9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
@@ -190,9 +190,11 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
                 }
             }
         }
-        // Estimate the row count. This value is inaccurate if the table stats 
is empty.
-        TableStatsMeta tableStatsStatus = 
Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(table.getId());
-        long count = tableStatsStatus == null ? table.estimatedRowCount() : 
tableStatsStatus.rowCount;
+        // Estimate the row count. This value is inaccurate.
+        long count = table.getRowCount();
+        if (count == 0) {
+            count = table.fetchRowCount();
+        }
         dataSize = dataSize * count / partitionNames.size();
         numNulls = numNulls * count / partitionNames.size();
         int ndv = ndvPartValues.size();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramCacheLoader.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramCacheLoader.java
index d9928f2a639..bf606364a23 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramCacheLoader.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramCacheLoader.java
@@ -31,7 +31,7 @@ import java.util.Map;
 import java.util.Optional;
 import java.util.concurrent.CompletionException;
 
-public class HistogramCacheLoader extends 
StatisticsCacheLoader<Optional<Histogram>> {
+public class HistogramCacheLoader extends 
BasicAsyncCacheLoader<StatisticsCacheKey, Optional<Histogram>> {
 
     private static final Logger LOG = 
LogManager.getLogger(HistogramCacheLoader.class);
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java
index b65678d1859..7ac4b95d484 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java
@@ -59,7 +59,7 @@ public class OlapScanStatsDerive extends BaseStatsDerive {
 
         Map<Id, ColumnStatistic> columnStatisticMap = new HashMap<>();
         Table table = scanNode.getOlapTable();
-        double rowCount = table.estimatedRowCount();
+        double rowCount = table.getRowCountForNereids();
         for (Map.Entry<Id, String> entry : 
slotIdToTableIdAndColumnName.entrySet()) {
             String colName = entry.getValue();
             // TODO. Get index id for materialized view.
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
index 95c8c890863..593ae475810 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
@@ -39,7 +39,6 @@ public class StatisticConstants {
     public static final int ID_LEN = 4096;
 
     public static final int STATISTICS_CACHE_REFRESH_INTERVAL = 24 * 2;
-
     /**
      * Bucket count fot column_statistics and analysis_job table.
      */
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheKey.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheKey.java
index 055dd128bef..fa924ab9284 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheKey.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheKey.java
@@ -73,6 +73,7 @@ public class StatisticsCacheKey {
     @Override
     public String toString() {
         StringJoiner sj = new StringJoiner(DELIMITER);
+        sj.add("ColumnStats");
         sj.add(String.valueOf(tableId));
         sj.add(String.valueOf(idxId));
         sj.add(colName);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index 0d1d6d9b75c..7752cfcc0e8 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -549,10 +549,9 @@ public class StatisticsUtil {
      * First get it from remote table parameters. If not found, estimate it : 
totalSize/estimatedRowSize
      *
      * @param table Hive HMSExternalTable to estimate row count.
-     * @param isInit Flag to indicate if this is called during init. To avoid 
recursively get schema.
      * @return estimated row count
      */
-    public static long getHiveRowCount(HMSExternalTable table, boolean isInit) 
{
+    public static long getHiveRowCount(HMSExternalTable table) {
         Map<String, String> parameters = 
table.getRemoteTable().getParameters();
         if (parameters == null) {
             return -1;
@@ -565,7 +564,7 @@ public class StatisticsUtil {
                 return rows;
             }
         }
-        if (!parameters.containsKey(TOTAL_SIZE) || isInit) {
+        if (!parameters.containsKey(TOTAL_SIZE)) {
             return -1;
         }
         // Table parameters doesn't contain row count but contain total size. 
Estimate row count : totalSize/rowSize
@@ -575,7 +574,7 @@ public class StatisticsUtil {
             estimatedRowSize += column.getDataType().getSlotSize();
         }
         if (estimatedRowSize == 0) {
-            return 1;
+            return -1;
         }
         return totalSize / estimatedRowSize;
     }
@@ -653,7 +652,7 @@ public class StatisticsUtil {
             estimatedRowSize += column.getDataType().getSlotSize();
         }
         if (estimatedRowSize == 0) {
-            return 1;
+            return 0;
         }
         if (samplePartitionSize < totalPartitionSize) {
             totalSize = totalSize * totalPartitionSize / samplePartitionSize;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to