This is an automated email from the ASF dual-hosted git repository.

lijibing pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 4403e3012c1 Refactor get row count related interface, add row count 
cache for external table. (#31276)
4403e3012c1 is described below

commit 4403e3012c1b79282cfcc59d1b1c6b5ffd995b58
Author: Jibing-Li <64681310+jibing...@users.noreply.github.com>
AuthorDate: Fri Feb 23 13:02:13 2024 +0800

    Refactor get row count related interface, add row count cache for external 
table. (#31276)
---
 .../main/java/org/apache/doris/common/Config.java  |   4 +
 .../java/org/apache/doris/catalog/OlapTable.java   |   7 +-
 .../main/java/org/apache/doris/catalog/Table.java  |  29 ++----
 .../java/org/apache/doris/catalog/TableIf.java     |   9 +-
 .../doris/datasource/ExternalMetaCacheMgr.java     |  10 +-
 .../doris/datasource/ExternalRowCountCache.java    | 112 +++++++++++++++++++++
 .../org/apache/doris/datasource/ExternalTable.java |  15 +--
 .../doris/datasource/hive/HMSExternalTable.java    |  72 ++-----------
 .../doris/datasource/jdbc/JdbcExternalTable.java   |  26 -----
 .../doris/nereids/stats/StatsCalculator.java       |   2 +-
 .../java/org/apache/doris/qe/ShowExecutor.java     |   8 +-
 .../apache/doris/service/FrontendServiceImpl.java  |   2 +-
 .../apache/doris/statistics/AnalysisManager.java   |   5 +-
 ...CacheLoader.java => BasicAsyncCacheLoader.java} |  17 ++--
 .../statistics/ColumnStatisticsCacheLoader.java    |   2 +-
 .../apache/doris/statistics/HMSAnalysisTask.java   |   4 +-
 .../doris/statistics/HistogramCacheLoader.java     |   2 +-
 .../doris/statistics/OlapScanStatsDerive.java      |   2 +-
 .../doris/statistics/StatisticConstants.java       |   1 -
 .../doris/statistics/StatisticsCacheKey.java       |   1 +
 .../doris/statistics/util/StatisticsUtil.java      |   9 +-
 21 files changed, 180 insertions(+), 159 deletions(-)

diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java 
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index 86ed91d3c08..21e4f7ddcd5 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -1902,6 +1902,10 @@ public class Config extends ConfigBase {
         "Max cache number of remote file system."})
     public static long max_remote_file_system_cache_num = 100;
 
+    @ConfField(mutable = false, masterOnly = false, description = 
{"外表行数缓存最大数量",
+        "Max cache number of external table row count"})
+    public static long max_external_table_row_count_cache_num = 100000;
+
     /**
      * Max cache loader thread-pool size.
      * Max thread pool size for loading external meta cache
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
index b6ba7bb81c6..da25bbafe5b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
@@ -1280,7 +1280,7 @@ public class OlapTable extends Table implements 
MTMVRelatedTableIf {
     }
 
     @Override
-    public long getRowCount() {
+    public long fetchRowCount() {
         long rowCount = 0;
         for (Map.Entry<Long, Partition> entry : idToPartition.entrySet()) {
             rowCount += entry.getValue().getBaseIndex().getRowCount();
@@ -1297,11 +1297,6 @@ public class OlapTable extends Table implements 
MTMVRelatedTableIf {
         return rowCount;
     }
 
-    @Override
-    public long getCacheRowCount() {
-        return getRowCount();
-    }
-
     @Override
     public long getAvgRowLength() {
         long rowCount = 0;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java
index d08b5e8aa1c..825e55ee09e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java
@@ -394,11 +394,7 @@ public abstract class Table extends MetaObject implements 
Writable, TableIf {
     }
 
     public long getRowCount() {
-        return 0;
-    }
-
-    public long getCacheRowCount() {
-        return getRowCount();
+        return fetchRowCount();
     }
 
     public long getAvgRowLength() {
@@ -605,24 +601,6 @@ public abstract class Table extends MetaObject implements 
Writable, TableIf {
         throw new NotImplementedException("createAnalysisTask not 
implemented");
     }
 
-    /**
-     * for NOT-ANALYZED Olap table, return estimated row count,
-     * for other table, return 1
-     * @return estimated row count
-     */
-    public long estimatedRowCount() {
-        long cardinality = 0;
-        if (this instanceof OlapTable) {
-            OlapTable table = (OlapTable) this;
-            for (long selectedPartitionId : table.getPartitionIds()) {
-                final Partition partition = 
table.getPartition(selectedPartitionId);
-                final MaterializedIndex baseIndex = partition.getBaseIndex();
-                cardinality += baseIndex.getRowCount();
-            }
-        }
-        return Math.max(cardinality, 1);
-    }
-
     @Override
     public DatabaseIf getDatabase() {
         return Env.getCurrentInternalCatalog().getDbNullable(qualifiedDbName);
@@ -649,4 +627,9 @@ public abstract class Table extends MetaObject implements 
Writable, TableIf {
     public List<Long> getChunkSizes() {
         throw new NotImplementedException("getChunkSized not implemented");
     }
+
+    @Override
+    public long fetchRowCount() {
+        return 0;
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java
index 29c7d6b83e2..fd7f5d53880 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java
@@ -135,9 +135,7 @@ public interface TableIf {
 
     long getRowCount();
 
-    // Get the exact number of rows in the internal table;
-    // Get the number of cached rows or estimated rows in the external table, 
if not, return -1.
-    long getCacheRowCount();
+    long fetchRowCount();
 
     long getDataLength();
 
@@ -151,7 +149,10 @@ public interface TableIf {
 
     BaseAnalysisTask createAnalysisTask(AnalysisInfo info);
 
-    long estimatedRowCount();
+    // For empty table, nereids require getting 1 as row count. This is a wrap 
function for nereids to call getRowCount.
+    default long getRowCountForNereids() {
+        return Math.max(getRowCount(), 1);
+    }
 
     DatabaseIf getDatabase();
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalMetaCacheMgr.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalMetaCacheMgr.java
index fd25be82ac2..dc48f1b30e9 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalMetaCacheMgr.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalMetaCacheMgr.java
@@ -44,7 +44,8 @@ import java.util.concurrent.ExecutorService;
 /**
  * Cache meta of external catalog
  * 1. Meta for hive meta store, mainly for partition.
- * 2. Table Schema cahce.
+ * 2. Table Schema cache.
+ * 3. Row count cache.
  */
 public class ExternalMetaCacheMgr {
     private static final Logger LOG = 
LogManager.getLogger(ExternalMetaCacheMgr.class);
@@ -58,6 +59,8 @@ public class ExternalMetaCacheMgr {
     private ExecutorService executor;
     // all catalogs could share the same fsCache.
     private FileSystemCache fsCache;
+    // all external table row count cache.
+    private ExternalRowCountCache rowCountCache;
     private final IcebergMetadataCacheMgr icebergMetadataCacheMgr;
     private final MaxComputeMetadataCacheMgr maxComputeMetadataCacheMgr;
 
@@ -68,6 +71,7 @@ public class ExternalMetaCacheMgr {
                 "ExternalMetaCacheMgr", 120, true);
         hudiPartitionMgr = HudiPartitionMgr.get(executor);
         fsCache = new FileSystemCache(executor);
+        rowCountCache = new ExternalRowCountCache(executor);
         icebergMetadataCacheMgr = new IcebergMetadataCacheMgr();
         maxComputeMetadataCacheMgr = new MaxComputeMetadataCacheMgr();
     }
@@ -114,6 +118,10 @@ public class ExternalMetaCacheMgr {
         return fsCache;
     }
 
+    public ExternalRowCountCache getRowCountCache() {
+        return rowCountCache;
+    }
+
     public void removeCache(long catalogId) {
         if (cacheMap.remove(catalogId) != null) {
             LOG.info("remove hive metastore cache for catalog {}", catalogId);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java
new file mode 100644
index 00000000000..1441efa9bf5
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalRowCountCache.java
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.datasource;
+
+import org.apache.doris.catalog.TableIf;
+import org.apache.doris.common.Config;
+import org.apache.doris.statistics.BasicAsyncCacheLoader;
+import org.apache.doris.statistics.util.StatisticsUtil;
+
+import com.github.benmanes.caffeine.cache.AsyncLoadingCache;
+import com.github.benmanes.caffeine.cache.Caffeine;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.time.Duration;
+import java.util.Optional;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutorService;
+
+public class ExternalRowCountCache {
+
+    private static final Logger LOG = 
LogManager.getLogger(ExternalRowCountCache.class);
+    private final AsyncLoadingCache<RowCountKey, Optional<Long>> rowCountCache;
+
+    public ExternalRowCountCache(ExecutorService executor) {
+        rowCountCache = Caffeine.newBuilder()
+                .maximumSize(Config.max_external_table_row_count_cache_num)
+                
.expireAfterWrite(Duration.ofMinutes(Config.external_cache_expire_time_minutes_after_access))
+                .executor(executor)
+                .buildAsync(new RowCountCacheLoader());
+    }
+
+    public static class RowCountKey {
+        private final long catalogId;
+        private final long dbId;
+        private final long tableId;
+
+        public RowCountKey(long catalogId, long dbId, long tableId) {
+            this.catalogId = catalogId;
+            this.dbId = dbId;
+            this.tableId = tableId;
+        }
+
+        @Override
+        public boolean equals(Object obj) {
+            if (this == obj) {
+                return true;
+            }
+            if (!(obj instanceof RowCountKey)) {
+                return false;
+            }
+            return ((RowCountKey) obj).tableId == this.tableId;
+        }
+
+        @Override
+        public int hashCode() {
+            return (int) tableId;
+        }
+    }
+
+    public static class RowCountCacheLoader extends 
BasicAsyncCacheLoader<RowCountKey, Optional<Long>> {
+
+        @Override
+        protected Optional<Long> doLoad(RowCountKey rowCountKey) {
+            try {
+                TableIf table = 
StatisticsUtil.findTable(rowCountKey.catalogId, rowCountKey.dbId, 
rowCountKey.tableId);
+                return Optional.of(table.fetchRowCount());
+            } catch (Exception e) {
+                LOG.warn("Failed to get table with catalogId {}, dbId {}, 
tableId {}", rowCountKey.catalogId,
+                        rowCountKey.dbId, rowCountKey.tableId);
+                return Optional.empty();
+            }
+        }
+    }
+
+    /**
+     * Get cached row count for the given table. Return 0 if cached not loaded 
or table not exists.
+     * Cached will be loaded async.
+     * @param catalogId
+     * @param dbId
+     * @param tableId
+     * @return Cached row count or 0 if not exist
+     */
+    public long getCachedRowCount(long catalogId, long dbId, long tableId) {
+        RowCountKey key = new RowCountKey(catalogId, dbId, tableId);
+        try {
+            CompletableFuture<Optional<Long>> f = rowCountCache.get(key);
+            if (f.isDone()) {
+                return f.get().orElse(0L);
+            }
+        } catch (Exception e) {
+            LOG.warn("Unexpected exception while returning row count", e);
+        }
+        return 0;
+    }
+
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
index 8f7fada5f61..d756e803a23 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
@@ -297,10 +297,16 @@ public class ExternalTable implements TableIf, Writable, 
GsonPostProcessable {
 
     @Override
     public long getRowCount() {
-        return 0;
+        // All external table should get external row count from cache.
+        return 
Env.getCurrentEnv().getExtMetaCacheMgr().getRowCountCache().getCachedRowCount(catalog.getId(),
 dbId, id);
     }
 
-    public long getCacheRowCount() {
+    @Override
+    /**
+     * Default return 0. Subclass need to implement this interface.
+     * This is called by ExternalRowCountCache to load row count cache.
+     */
+    public long fetchRowCount() {
         return 0;
     }
 
@@ -351,11 +357,6 @@ public class ExternalTable implements TableIf, Writable, 
GsonPostProcessable {
         throw new NotImplementedException("createAnalysisTask not 
implemented");
     }
 
-    @Override
-    public long estimatedRowCount() {
-        return 1;
-    }
-
     @Override
     public DatabaseIf getDatabase() {
         return catalog.getDbNullable(dbName);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
index 6da6073a4a2..0e11267829c 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
@@ -40,7 +40,6 @@ import org.apache.doris.statistics.ColumnStatistic;
 import org.apache.doris.statistics.ColumnStatisticBuilder;
 import org.apache.doris.statistics.HMSAnalysisTask;
 import org.apache.doris.statistics.StatsType;
-import org.apache.doris.statistics.TableStatsMeta;
 import org.apache.doris.statistics.util.StatisticsUtil;
 import org.apache.doris.thrift.THiveTable;
 import org.apache.doris.thrift.TTableDescriptor;
@@ -146,9 +145,6 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
 
     private DLAType dlaType = DLAType.UNKNOWN;
 
-    // No as precise as row count in TableStats, but better than none.
-    private long estimatedRowCount = -1;
-
     // record the event update time when enable hms event listener
     protected volatile long eventUpdateTime;
 
@@ -196,7 +192,6 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
                 }
             }
             objectCreated = true;
-            estimatedRowCount = getRowCountFromExternalSource(true);
         }
     }
 
@@ -319,24 +314,11 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
         return 0;
     }
 
-    @Override
-    public long getRowCount() {
-        makeSureInitialized();
-        long rowCount = getRowCountFromExternalSource(false);
-        if (rowCount == -1) {
-            if (LOG.isDebugEnabled()) {
-                LOG.debug("Will estimate row count from file list.");
-            }
-            rowCount = StatisticsUtil.getRowCountFromFileList(this);
-        }
-        return rowCount;
-    }
-
-    private long getRowCountFromExternalSource(boolean isInit) {
+    private long getRowCountFromExternalSource() {
         long rowCount;
         switch (dlaType) {
             case HIVE:
-                rowCount = StatisticsUtil.getHiveRowCount(this, isInit);
+                rowCount = StatisticsUtil.getHiveRowCount(this);
                 break;
             case ICEBERG:
                 rowCount = StatisticsUtil.getIcebergRowCount(this);
@@ -514,47 +496,16 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
     }
 
     @Override
-    public long getCacheRowCount() {
-        //Cached accurate information
-        TableStatsMeta tableStats = 
Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(id);
-        if (tableStats != null) {
-            long rowCount = tableStats.rowCount;
-            if (LOG.isDebugEnabled()) {
-                LOG.debug("Estimated row count for db {} table {} is {}.", 
dbName, name, rowCount);
-            }
-            return rowCount;
-        }
-
-        //estimated information
-        if (estimatedRowCount != -1) {
-            return estimatedRowCount;
-        }
-        return -1;
-    }
-
-    @Override
-    public long estimatedRowCount() {
-        try {
-            TableStatsMeta tableStats = 
Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(id);
-            if (tableStats != null) {
-                long rowCount = tableStats.rowCount;
-                if (LOG.isDebugEnabled()) {
-                    LOG.debug("Estimated row count for db {} table {} is {}.", 
dbName, name, rowCount);
-                }
-                return rowCount;
-            }
-
-            if (estimatedRowCount != -1) {
-                return estimatedRowCount;
-            }
-            // Cache the estimated row count in this structure
-            // though the table never get analyzed, since the row estimation 
might be expensive caused by RPC.
-            estimatedRowCount = getRowCount();
-            return estimatedRowCount;
-        } catch (Exception e) {
-            LOG.warn("Fail to get row count for table {}", name, e);
+    public long fetchRowCount() {
+        makeSureInitialized();
+        // Get row count from hive metastore property.
+        long rowCount = getRowCountFromExternalSource();
+        // Only hive table supports estimate row count by listing file.
+        if (rowCount == -1 && dlaType.equals(DLAType.HIVE)) {
+            LOG.debug("Will estimate row count from file list.");
+            rowCount = StatisticsUtil.getRowCountFromFileList(this);
         }
-        return 1;
+        return rowCount;
     }
 
     private void initPartitionColumns(List<Column> schema) {
@@ -766,7 +717,6 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
     @Override
     public void gsonPostProcess() throws IOException {
         super.gsonPostProcess();
-        estimatedRowCount = -1;
     }
 
     @Override
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/JdbcExternalTable.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/JdbcExternalTable.java
index a6199ba8c8f..64fd25525e5 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/JdbcExternalTable.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/JdbcExternalTable.java
@@ -18,13 +18,11 @@
 package org.apache.doris.datasource.jdbc;
 
 import org.apache.doris.catalog.Column;
-import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.JdbcTable;
 import org.apache.doris.datasource.ExternalTable;
 import org.apache.doris.statistics.AnalysisInfo;
 import org.apache.doris.statistics.BaseAnalysisTask;
 import org.apache.doris.statistics.JdbcAnalysisTask;
-import org.apache.doris.statistics.TableStatsMeta;
 import org.apache.doris.thrift.TTableDescriptor;
 
 import org.apache.logging.log4j.LogManager;
@@ -111,28 +109,4 @@ public class JdbcExternalTable extends ExternalTable {
         makeSureInitialized();
         return new JdbcAnalysisTask(info);
     }
-
-    @Override
-    public long getRowCount() {
-        makeSureInitialized();
-        TableStatsMeta tableStats = 
Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(id);
-        if (tableStats != null) {
-            long rowCount = tableStats.rowCount;
-            if (LOG.isDebugEnabled()) {
-                LOG.debug("Estimated row count for db {} table {} is {}.", 
dbName, name, rowCount);
-            }
-            return rowCount;
-        }
-        return 1;
-    }
-
-    @Override
-    public long getCacheRowCount() {
-        return getRowCount();
-    }
-
-    @Override
-    public long estimatedRowCount() {
-        return getRowCount();
-    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 5027cceab63..ad16d3e8f50 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -649,7 +649,7 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
                 .map(s -> (SlotReference) s).collect(Collectors.toSet());
         Map<Expression, ColumnStatistic> columnStatisticMap = new HashMap<>();
         TableIf table = catalogRelation.getTable();
-        double rowCount = catalogRelation.getTable().estimatedRowCount();
+        double rowCount = catalogRelation.getTable().getRowCountForNereids();
         boolean hasUnknownCol = false;
         long idxId = -1;
         if (catalogRelation instanceof OlapScan) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
index 62e9f4ac5a5..f0617f8bdf4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
@@ -920,11 +920,7 @@ public class ShowExecutor {
                 // Row_format
                 row.add(null);
                 // Rows
-                // Use estimatedRowCount(), not getRowCount().
-                // because estimatedRowCount() is an async call, it will not 
block, and it will call getRowCount()
-                // finally. So that for some table(especially external table),
-                // we can get the row count without blocking.
-                row.add(String.valueOf(table.estimatedRowCount()));
+                row.add(String.valueOf(table.getRowCount()));
                 // Avg_row_length
                 row.add(String.valueOf(table.getAvgRowLength()));
                 // Data_length
@@ -2540,7 +2536,7 @@ public class ShowExecutor {
            tableStats == null means it's not analyzed, in this case show the 
estimated row count.
          */
         if (tableStats == null && tableIf instanceof HMSExternalTable) {
-            resultSet = 
showTableStatsStmt.constructResultSet(tableIf.estimatedRowCount());
+            resultSet = 
showTableStatsStmt.constructResultSet(tableIf.getRowCount());
         } else {
             resultSet = showTableStatsStmt.constructResultSet(tableStats);
         }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java 
b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
index cb5e53947e7..cf7533cbc70 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
@@ -609,7 +609,7 @@ public class FrontendServiceImpl implements 
FrontendService.Iface {
                             status.setUpdateTime(table.getUpdateTime() / 1000);
                             status.setCheckTime(lastCheckTime / 1000);
                             status.setCollation("utf-8");
-                            status.setRows(table.getCacheRowCount());
+                            status.setRows(table.getRowCount());
                             status.setDataLength(table.getDataLength());
                             status.setAvgRowLength(table.getAvgRowLength());
                             tablesResult.add(status);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index cd31acaa0b7..eac50b40757 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -550,8 +550,7 @@ public class AnalysisManager implements Writable {
 
     @VisibleForTesting
     public void updateTableStats(AnalysisInfo jobInfo) {
-        TableIf tbl = StatisticsUtil.findTable(jobInfo.catalogId,
-                jobInfo.dbId, jobInfo.tblId);
+        TableIf tbl = StatisticsUtil.findTable(jobInfo.catalogId, 
jobInfo.dbId, jobInfo.tblId);
         // External Table only update table stats when all tasks finished.
         // Because it needs to get the row count from the result of row count 
task.
         if (tbl instanceof ExternalTable && 
!jobInfo.state.equals(AnalysisState.FINISHED)) {
@@ -559,7 +558,7 @@ public class AnalysisManager implements Writable {
         }
         TableStatsMeta tableStats = findTableStatsStatus(tbl.getId());
         if (tableStats == null) {
-            updateTableStatsStatus(new TableStatsMeta(jobInfo.emptyJob ? 0 : 
tbl.estimatedRowCount(), jobInfo, tbl));
+            updateTableStatsStatus(new TableStatsMeta(jobInfo.emptyJob ? 0 : 
tbl.getRowCount(), jobInfo, tbl));
         } else {
             tableStats.update(jobInfo, tbl);
             logCreateTableStats(tableStats);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheLoader.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/BasicAsyncCacheLoader.java
similarity index 80%
rename from 
fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheLoader.java
rename to 
fe/fe-core/src/main/java/org/apache/doris/statistics/BasicAsyncCacheLoader.java
index c212851a284..e7e488a6e7f 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheLoader.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/BasicAsyncCacheLoader.java
@@ -28,15 +28,15 @@ import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.Executor;
 import java.util.concurrent.TimeUnit;
 
-public abstract class StatisticsCacheLoader<V> implements 
AsyncCacheLoader<StatisticsCacheKey, V> {
+public abstract class BasicAsyncCacheLoader<K, V> implements 
AsyncCacheLoader<K, V> {
 
-    private static final Logger LOG = 
LogManager.getLogger(StatisticsCacheLoader.class);
+    private static final Logger LOG = 
LogManager.getLogger(BasicAsyncCacheLoader.class);
 
-    private final Map<StatisticsCacheKey, CompletableFutureWithCreateTime<V>> 
inProgressing = new HashMap<>();
+    private final Map<K, CompletableFutureWithCreateTime<V>> inProgressing = 
new HashMap<>();
 
     @Override
     public @NonNull CompletableFuture<V> asyncLoad(
-            @NonNull StatisticsCacheKey key,
+            @NonNull K key,
             @NonNull Executor executor) {
         CompletableFutureWithCreateTime<V> cfWrapper = inProgressing.get(key);
         if (cfWrapper != null) {
@@ -48,8 +48,7 @@ public abstract class StatisticsCacheLoader<V> implements 
AsyncCacheLoader<Stati
                 return doLoad(key);
             } finally {
                 long endTime = System.currentTimeMillis();
-                LOG.info("Query BE for column stats:{}-{} end time:{} cost 
time:{}", key.tableId, key.colName,
-                        endTime, endTime - startTime);
+                LOG.info("Load statistic cache [{}] cost time ms:{}", key, 
endTime - startTime);
                 removeFromIProgressing(key);
             }
         }, executor);
@@ -58,7 +57,7 @@ public abstract class StatisticsCacheLoader<V> implements 
AsyncCacheLoader<Stati
         return future;
     }
 
-    protected abstract V doLoad(StatisticsCacheKey k);
+    protected abstract V doLoad(K k);
 
     private static class CompletableFutureWithCreateTime<V> extends 
CompletableFuture<V> {
 
@@ -76,13 +75,13 @@ public abstract class StatisticsCacheLoader<V> implements 
AsyncCacheLoader<Stati
         }
     }
 
-    private void putIntoIProgressing(StatisticsCacheKey k, 
CompletableFutureWithCreateTime<V> v) {
+    private void putIntoIProgressing(K k, CompletableFutureWithCreateTime<V> 
v) {
         synchronized (inProgressing) {
             inProgressing.put(k, v);
         }
     }
 
-    private void removeFromIProgressing(StatisticsCacheKey k) {
+    private void removeFromIProgressing(K k) {
         synchronized (inProgressing) {
             inProgressing.remove(k);
         }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
index 24c08c8b755..e33cff3107a 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
@@ -30,7 +30,7 @@ import java.util.Optional;
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.ThreadPoolExecutor.DiscardOldestPolicy;
 
-public class ColumnStatisticsCacheLoader extends 
StatisticsCacheLoader<Optional<ColumnStatistic>> {
+public class ColumnStatisticsCacheLoader extends 
BasicAsyncCacheLoader<StatisticsCacheKey, Optional<ColumnStatistic>> {
 
     private static final Logger LOG = 
LogManager.getLogger(ColumnStatisticsCacheLoader.class);
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
index 549ab069ace..597acfdfddb 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
@@ -110,7 +110,7 @@ public class HMSAnalysisTask extends ExternalAnalysisTask {
         // Estimate the row count. This value is inaccurate if the table stats 
is empty.
         TableStatsMeta tableStatsStatus = 
Env.getCurrentEnv().getAnalysisManager()
                 .findTableStatsStatus(hmsExternalTable.getId());
-        long count = tableStatsStatus == null ? 
hmsExternalTable.estimatedRowCount() : tableStatsStatus.rowCount;
+        long count = tableStatsStatus == null ? hmsExternalTable.getRowCount() 
: tableStatsStatus.rowCount;
         dataSize = dataSize * count / partitionNames.size();
         numNulls = numNulls * count / partitionNames.size();
         int ndv = ndvPartValues.size();
@@ -131,7 +131,7 @@ public class HMSAnalysisTask extends ExternalAnalysisTask {
     private void getHmsColumnStats() throws Exception {
         TableStatsMeta tableStatsStatus = 
Env.getCurrentEnv().getAnalysisManager()
                 .findTableStatsStatus(hmsExternalTable.getId());
-        long count = tableStatsStatus == null ? 
hmsExternalTable.estimatedRowCount() : tableStatsStatus.rowCount;
+        long count = tableStatsStatus == null ? hmsExternalTable.getRowCount() 
: tableStatsStatus.rowCount;
 
         Map<String, String> params = buildStatsParams("NULL");
         Map<StatsType, String> statsParams = new HashMap<>();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramCacheLoader.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramCacheLoader.java
index d9928f2a639..bf606364a23 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramCacheLoader.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramCacheLoader.java
@@ -31,7 +31,7 @@ import java.util.Map;
 import java.util.Optional;
 import java.util.concurrent.CompletionException;
 
-public class HistogramCacheLoader extends 
StatisticsCacheLoader<Optional<Histogram>> {
+public class HistogramCacheLoader extends 
BasicAsyncCacheLoader<StatisticsCacheKey, Optional<Histogram>> {
 
     private static final Logger LOG = 
LogManager.getLogger(HistogramCacheLoader.class);
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java
index b65678d1859..7ac4b95d484 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java
@@ -59,7 +59,7 @@ public class OlapScanStatsDerive extends BaseStatsDerive {
 
         Map<Id, ColumnStatistic> columnStatisticMap = new HashMap<>();
         Table table = scanNode.getOlapTable();
-        double rowCount = table.estimatedRowCount();
+        double rowCount = table.getRowCountForNereids();
         for (Map.Entry<Id, String> entry : 
slotIdToTableIdAndColumnName.entrySet()) {
             String colName = entry.getValue();
             // TODO. Get index id for materialized view.
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
index f2f53fa7457..74c7bd7c9db 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
@@ -39,7 +39,6 @@ public class StatisticConstants {
     public static final int ID_LEN = 4096;
 
     public static final int STATISTICS_CACHE_REFRESH_INTERVAL = 24 * 2;
-
     /**
      * Bucket count fot column_statistics and analysis_job table.
      */
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheKey.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheKey.java
index 055dd128bef..fa924ab9284 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheKey.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheKey.java
@@ -73,6 +73,7 @@ public class StatisticsCacheKey {
     @Override
     public String toString() {
         StringJoiner sj = new StringJoiner(DELIMITER);
+        sj.add("ColumnStats");
         sj.add(String.valueOf(tableId));
         sj.add(String.valueOf(idxId));
         sj.add(colName);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index 0ff1488f464..8688447dcb9 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -553,10 +553,9 @@ public class StatisticsUtil {
      * First get it from remote table parameters. If not found, estimate it : 
totalSize/estimatedRowSize
      *
      * @param table Hive HMSExternalTable to estimate row count.
-     * @param isInit Flag to indicate if this is called during init. To avoid 
recursively get schema.
      * @return estimated row count
      */
-    public static long getHiveRowCount(HMSExternalTable table, boolean isInit) 
{
+    public static long getHiveRowCount(HMSExternalTable table) {
         Map<String, String> parameters = 
table.getRemoteTable().getParameters();
         if (parameters == null) {
             return -1;
@@ -569,7 +568,7 @@ public class StatisticsUtil {
                 return rows;
             }
         }
-        if (!parameters.containsKey(TOTAL_SIZE) || isInit) {
+        if (!parameters.containsKey(TOTAL_SIZE)) {
             return -1;
         }
         // Table parameters doesn't contain row count but contain total size. 
Estimate row count : totalSize/rowSize
@@ -579,7 +578,7 @@ public class StatisticsUtil {
             estimatedRowSize += column.getDataType().getSlotSize();
         }
         if (estimatedRowSize == 0) {
-            return 1;
+            return -1;
         }
         return totalSize / estimatedRowSize;
     }
@@ -657,7 +656,7 @@ public class StatisticsUtil {
             estimatedRowSize += column.getDataType().getSlotSize();
         }
         if (estimatedRowSize == 0) {
-            return 1;
+            return 0;
         }
         if (samplePartitionSize < totalPartitionSize) {
             totalSize = totalSize * totalPartitionSize / samplePartitionSize;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org


Reply via email to