(impala) 02/04: IMPALA-14014: Fix COMPUTE STATS with TABLESAMPLE clause

michaelsmith Tue, 10 Jun 2025 15:47:11 -0700

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


commit 4640a72a81e49c45fb7629950befdd89edf82381
Author: Zoltan Borok-Nagy <[email protected]>
AuthorDate: Fri May 9 14:41:06 2025 +0200

    IMPALA-14014: Fix COMPUTE STATS with TABLESAMPLE clause
    
    COMPUTE STATS with TABLESAMPLE clause did a full scan on Iceberg
    tables since IMPALA-13737, because before this patch ComputeStatsStmt
    used FeFsTable.Utils.getFilesSample() which only works correctly on
    FS tables that have the file descriptors loaded. Since IMPALA-13737
    the internal FS table of an Iceberg table doesn't have file descriptor
    information, therefore FeFsTable.Utils.getFilesSample() returned an
    empty map which turned off table sampling for COMPUTE STATS.
    
    We did not have proper testing for COMPUTE STATS with table sampling
    therefore we did not catch the regression.
    
    This patch adds proper table sampling logic for Iceberg tables that
    can be used for COMPUTE STATS. The algorithm previously found in
    IcebergScanNode.getFilesSample() has been moved to
    FeIcebergTable.Utils.getFilesSample().
    
    Testing
     * added e2e tests
    
    Change-Id: Ie59d5fc1374ab69209a74f2488bcb9a7d510b782
    Reviewed-on: http://gerrit.cloudera.org:8080/22873
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../apache/impala/analysis/ComputeStatsStmt.java   |  16 +-
 .../java/org/apache/impala/analysis/TableRef.java  |   2 +-
 .../java/org/apache/impala/catalog/FeFsTable.java  |   6 +
 .../org/apache/impala/catalog/FeIcebergTable.java  | 101 ++++++++-
 .../impala/catalog/IcebergContentFileStore.java    |   6 +
 .../org/apache/impala/planner/HdfsScanNode.java    |   4 +-
 .../apache/impala/planner/IcebergDeleteNode.java   |  27 ++-
 .../org/apache/impala/planner/IcebergScanNode.java |  42 +---
 .../org/apache/impala/analysis/AnalyzeDDLTest.java |  12 +-
 .../apache/impala/analysis/AnalyzeStmtsTest.java   |  16 +-
 .../org/apache/impala/planner/PlannerTest.java     |  11 +-
 .../functional/functional_schema_template.sql      |   9 +-
 .../PlannerTest/iceberg-merge-insert-only.test     |  10 +-
 .../PlannerTest/iceberg-v2-tables-resources.test   | 128 +++++------
 .../queries/PlannerTest/tablesample-iceberg.test   | 206 ++++++++++++++++++
 .../queries/PlannerTest/tablesample.test           | 148 -------------
 .../iceberg-v2-compute-stats-table-sampling.test   | 234 +++++++++++++++++++++
 tests/query_test/test_iceberg.py                   |   8 +
 18 files changed, 693 insertions(+), 293 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java 
b/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java
index bd2dd195a..952126a07 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java
@@ -148,7 +148,7 @@ public class ComputeStatsStmt extends StatementBase 
implements SingleTableStmt {
   protected FeTable table_;
 
   // Effective sampling percent based on the total number of bytes in the 
files sample.
-  // Set to -1 for non-HDFS tables or if TABLESAMPLE was not specified.
+  // Set to -1 for non-FS tables or if TABLESAMPLE was not specified.
   // We run the regular COMPUTE STATS for 0.0 and 1.0 where sampling has no 
benefit.
   protected double effectiveSamplePerc_ = -1;
 
@@ -805,10 +805,10 @@ public class ComputeStatsStmt extends StatementBase 
implements SingleTableStmt {
   private String analyzeTableSampleClause(Analyzer analyzer) throws 
AnalysisException {
     if (sampleParams_ == null) return "";
     if (!(table_ instanceof FeFsTable)) {
-      throw new AnalysisException("TABLESAMPLE is only supported on HDFS 
tables.");
+      throw new AnalysisException("TABLESAMPLE is only supported on file-based 
tables.");
     }
-    FeFsTable hdfsTable = (FeFsTable) table_;
-    if (!FeFsTable.Utils.isStatsExtrapolationEnabled(hdfsTable)) {
+    FeFsTable feFsTable = (FeFsTable) table_;
+    if (!FeFsTable.Utils.isStatsExtrapolationEnabled(feFsTable)) {
       throw new AnalysisException(String.format(
           "COMPUTE STATS TABLESAMPLE requires stats extrapolation which is 
disabled.\n" +
           "Stats extrapolation can be enabled service-wide with %s=true or by 
altering " +
@@ -827,17 +827,15 @@ public class ComputeStatsStmt extends StatementBase 
implements SingleTableStmt {
     // Compute the sample of files and set 'sampleFileBytes_'.
     long minSampleBytes = 
analyzer.getQueryOptions().compute_stats_min_sample_size;
     long samplePerc = sampleParams_.getPercentBytes();
-    // TODO(todd): can we avoid loading all the partitions for this?
-    Collection<? extends FeFsPartition> partitions = 
hdfsTable.loadAllPartitions();
-    Map<Long, List<FileDescriptor>> sample = FeFsTable.Utils.getFilesSample(
-        hdfsTable, partitions, samplePerc, minSampleBytes, sampleSeed);
+    Map<Long, List<FileDescriptor>> sample = feFsTable.getFilesSample(
+        samplePerc, minSampleBytes, sampleSeed);
     long sampleFileBytes = 0;
     for (List<FileDescriptor> fds: sample.values()) {
       for (FileDescriptor fd: fds) sampleFileBytes += fd.getFileLength();
     }
 
     // Compute effective sampling percent.
-    long totalFileBytes = ((FeFsTable)table_).getTotalHdfsBytes();
+    long totalFileBytes = feFsTable.getTotalHdfsBytes();
     if (totalFileBytes > 0) {
       effectiveSamplePerc_ = (double) sampleFileBytes / (double) 
totalFileBytes;
     } else {
diff --git a/fe/src/main/java/org/apache/impala/analysis/TableRef.java 
b/fe/src/main/java/org/apache/impala/analysis/TableRef.java
index a4091928b..c372d93c2 100644
--- a/fe/src/main/java/org/apache/impala/analysis/TableRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/TableRef.java
@@ -482,7 +482,7 @@ public class TableRef extends StmtNode {
     if (!(this instanceof BaseTableRef)
         || !(resolvedPath_.destTable() instanceof FeFsTable)) {
       throw new AnalysisException(
-          "TABLESAMPLE is only supported on HDFS tables: " + getUniqueAlias());
+          "TABLESAMPLE is only supported on file-based tables: " + 
getUniqueAlias());
     }
   }
 
diff --git a/fe/src/main/java/org/apache/impala/catalog/FeFsTable.java 
b/fe/src/main/java/org/apache/impala/catalog/FeFsTable.java
index a3f3d0a92..034b3439d 100644
--- a/fe/src/main/java/org/apache/impala/catalog/FeFsTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/FeFsTable.java
@@ -573,6 +573,12 @@ public interface FeFsTable extends FeTable {
     return result;
   }
 
+  default Map<Long, List<FileDescriptor>> getFilesSample(
+      long percentBytes, long minSampleBytes, long randomSeed) {
+    return Utils.getFilesSample(this, loadAllPartitions(), percentBytes, 
minSampleBytes,
+        randomSeed);
+  }
+
   /**
    * Utility functions for operating on FeFsTable. When we move fully to Java 
8,
    * these can become default methods of the interface.
diff --git a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java 
b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java
index 4366a2355..51d68ad5b 100644
--- a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java
@@ -30,6 +30,7 @@ import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Random;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.stream.Collectors;
@@ -236,7 +237,7 @@ public interface FeIcebergTable extends FeFsTable {
 
   @Override
   default long getTotalHdfsBytes() {
-    return getFeFsTable().getTotalHdfsBytes();
+    return getTTableStats().getTotal_file_bytes();
   }
 
   @Override
@@ -328,6 +329,48 @@ public interface FeIcebergTable extends FeFsTable {
     return false;
   }
 
+  @Override /* FeFsTable */
+  default Map<Long, List<FileDescriptor>> getFilesSample(
+      long percentBytes, long minSampleBytes, long randomSeed) {
+    // There will be two separate IcebergScanNodes for data files without 
delete, and for
+    // data files with deletes, which means they will be sampled 
independently. Let's also
+    // sample them separately here.
+    Map<Long, List<FileDescriptor>> dataFilesWithoutDeletesSample = 
Utils.getFilesSample(
+        this, getContentFileStore().getDataFilesWithoutDeletes(), false,
+        percentBytes, minSampleBytes, randomSeed);
+    Map<Long, List<FileDescriptor>> dataFilesWithDeletesSample = 
Utils.getFilesSample(
+        this, getContentFileStore().getDataFilesWithDeletes(), false,
+        percentBytes, minSampleBytes, randomSeed);
+
+    Map<Long, List<FileDescriptor>> mergedResult = new HashMap<>();
+    mergedResult.putAll(dataFilesWithoutDeletesSample);
+    for (Map.Entry<Long, List<FileDescriptor>> entry :
+        dataFilesWithDeletesSample.entrySet()) {
+      List<FileDescriptor> fds = mergedResult.get(entry.getKey());
+      if (fds != null) {
+        fds.addAll(entry.getValue());
+      } else {
+        mergedResult.put(entry.getKey(), entry.getValue());
+      }
+    }
+
+    // There is no need to add the delete files if there are no data files.
+    if (mergedResult.isEmpty()) return mergedResult;
+
+    // We should have only a single element in the map as there is only a 
single
+    // partition in the table.
+    Preconditions.checkState(mergedResult.size() == 1);
+
+    // We don't sample delete files (for correctness), let's add all of them to
+    // the merged result.
+    for (Map.Entry<Long, List<FileDescriptor>> entry : 
mergedResult.entrySet()) {
+      for (FileDescriptor fd : getContentFileStore().getAllDeleteFiles()) {
+        entry.getValue().add(fd);
+      }
+    }
+    return mergedResult;
+  }
+
   THdfsTable transformToTHdfsTable(boolean updatePartitionFlag, 
ThriftObjectType type);
 
   /**
@@ -701,6 +744,62 @@ public interface FeIcebergTable extends FeFsTable {
       return fileDescMap;
     }
 
+    /**
+     * Return a sample of data files (choosing from 'fileDescs') according to 
the
+     * parameters.
+     * @filesAreSorted if true then the file descriptors are already sorted
+     * @percentBytes percent of the total number of bytes we want to sample at 
least.
+     * @minSampleBytes minimum number of bytes need to be selected.
+     * @randomSeed random seed for repeatable sampling.
+     * The algorithm is based on FeFsTable.Utils.getFilesSample()
+     */
+    public static Map<Long, List<FileDescriptor>> getFilesSample(
+        FeIcebergTable iceTbl, Iterable<? extends FileDescriptor> fileDescs,
+        boolean filesAreSorted,
+        long percentBytes, long minSampleBytes, long randomSeed) {
+      Preconditions.checkState(percentBytes >= 0 && percentBytes <= 100);
+      Preconditions.checkState(minSampleBytes >= 0);
+
+      // Ensure a consistent ordering of files for repeatable runs.
+      List<FileDescriptor> orderedFds = Lists.newArrayList(fileDescs);
+      if (!filesAreSorted) {
+        Collections.sort(orderedFds);
+      }
+
+      List<FeFsPartition> partitions = new ArrayList<>(
+          iceTbl.getFeFsTable().loadAllPartitions());
+      Preconditions.checkState(partitions.size() == 1);
+      FeFsPartition part = partitions.get(0);
+
+      long totalBytes = 0;
+      for (FileDescriptor fd : orderedFds) {
+        totalBytes += fd.getFileLength();
+      }
+
+      int numFilesRemaining = orderedFds.size();
+      double fracPercentBytes = (double) percentBytes / 100;
+      long targetBytes = (long) Math.round(totalBytes * fracPercentBytes);
+      targetBytes = Math.max(targetBytes, minSampleBytes);
+
+      // Randomly select files until targetBytes has been reached or all files 
have been
+      // selected.
+      Random rnd = new Random(randomSeed);
+      long selectedBytes = 0;
+      List<FileDescriptor> sampleFiles = Lists.newArrayList();
+      while (selectedBytes < targetBytes && numFilesRemaining > 0) {
+        int selectedIdx = rnd.nextInt(numFilesRemaining);
+        FileDescriptor fd = orderedFds.get(selectedIdx);
+        sampleFiles.add(fd);
+        selectedBytes += fd.getFileLength();
+        // Avoid selecting the same file multiple times.
+        orderedFds.set(selectedIdx, orderedFds.get(numFilesRemaining - 1));
+        --numFilesRemaining;
+      }
+      Map<Long, List<FileDescriptor>> result = new HashMap<>();
+      result.put(part.getId(), sampleFiles);
+      return result;
+    }
+
     /**
      * Get FileDescriptor by data file location
      */
diff --git 
a/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java 
b/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java
index c857897b6..d9a69c1e2 100644
--- a/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java
+++ b/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java
@@ -275,6 +275,12 @@ public class IcebergContentFileStore {
         dataFilesWithDeletes_.getList());
   }
 
+  public Iterable<IcebergFileDescriptor> getAllDeleteFiles() {
+    return Iterables.concat(
+        positionDeleteFiles_.getList(),
+        equalityDeleteFiles_.getList());
+  }
+
   public boolean hasAvro() { return hasAvro_; }
   public boolean hasOrc() { return hasOrc_; }
   public boolean hasParquet() { return hasParquet_; }
diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java 
b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
index 446add178..ed47bd1f6 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
@@ -223,7 +223,7 @@ public class HdfsScanNode extends ScanNode {
   // An estimate of the width of a row when the information is not available.
   private double DEFAULT_ROW_WIDTH_ESTIMATE = 1.0;
 
-  private final FeFsTable tbl_;
+  protected final FeFsTable tbl_;
 
   // List of partitions to be scanned. Partitions have been pruned.
   protected final List<FeFsPartition> partitions_;
@@ -2720,6 +2720,8 @@ public class HdfsScanNode extends ScanNode {
     return super.isTableMissingTableStats();
   }
 
+  public TableSampleClause getSampleParams() { return sampleParams_; }
+
   @Override
   public boolean hasCorruptTableStats() { return hasCorruptTableStats_; }
 
diff --git a/fe/src/main/java/org/apache/impala/planner/IcebergDeleteNode.java 
b/fe/src/main/java/org/apache/impala/planner/IcebergDeleteNode.java
index ab38561b2..198630b3e 100644
--- a/fe/src/main/java/org/apache/impala/planner/IcebergDeleteNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/IcebergDeleteNode.java
@@ -26,6 +26,7 @@ import org.apache.impala.analysis.BinaryPredicate;
 import org.apache.impala.analysis.Expr;
 import org.apache.impala.analysis.ExprSubstitutionMap;
 import org.apache.impala.analysis.JoinOperator;
+import org.apache.impala.analysis.TableSampleClause;
 import org.apache.impala.catalog.Type;
 import org.apache.impala.common.ImpalaException;
 import org.apache.impala.common.Pair;
@@ -94,14 +95,26 @@ public class IcebergDeleteNode extends JoinNode {
     // Also assume that the left side's selectivity applies to the delete 
records as well.
     // Please note that left side's cardinality already takes the selectivity 
into
     // account (i.e. no need to do leftSelectivity * leftCard).
-    long leftCardWithSelectivity = getChild(0).cardinality_;
+    PlanNode leftChild = getChild(0);
+    Preconditions.checkState(leftChild instanceof HdfsScanNode);
+    HdfsScanNode leftScanChild = (HdfsScanNode) leftChild;
+    TableSampleClause leftSampleParams = leftScanChild.getSampleParams();
+    long leftCardWithSelectivity = leftScanChild.cardinality_;
     long rightCard = getChild(1).cardinality_;
-    // Both sides should have non-zero cardinalities.
-    Preconditions.checkState(leftCardWithSelectivity > 0);
-    Preconditions.checkState(rightCard > 0);
-    double leftSelectivity = getChild(0).computeSelectivity();
-    long rightCardWithSelectivity = (long)(leftSelectivity * rightCard);
-    cardinality_ = Math.max(1, leftCardWithSelectivity - 
rightCardWithSelectivity);
+    // Both sides should have non-negative cardinalities.
+    Preconditions.checkState(leftCardWithSelectivity >= 0);
+    Preconditions.checkState(rightCard >= 0);
+    // The delete records on the right might refer to data records that are 
filtered
+    // out by predicates or table sampling. Let's incorporate this into our 
cardinality
+    // estimation.
+    double leftSelectivity = leftScanChild.computeSelectivity();
+    long effectiveRightCardinality = (long)(leftSelectivity * rightCard);
+    double leftSampling = leftSampleParams == null ?
+        1.0 : leftSampleParams.getPercentBytes() / 100.0;
+    Preconditions.checkState(leftSampling >= 0);
+    Preconditions.checkState(leftSampling <= 1.0);
+    effectiveRightCardinality = (long)(effectiveRightCardinality * 
leftSampling);
+    cardinality_ = Math.max(1, leftCardWithSelectivity - 
effectiveRightCardinality);
   }
 
   @Override
diff --git a/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java 
b/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java
index b59d51519..0c1223521 100644
--- a/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java
@@ -204,50 +204,12 @@ public class IcebergScanNode extends HdfsScanNode {
 
   /**
    * Returns a sample of file descriptors associated to this scan node.
-   * The algorithm is based on FeFsTable.Utils.getFilesSample()
    */
   @Override
   protected Map<Long, List<FileDescriptor>> getFilesSample(
       long percentBytes, long minSampleBytes, long randomSeed) {
-    Preconditions.checkState(percentBytes >= 0 && percentBytes <= 100);
-    Preconditions.checkState(minSampleBytes >= 0);
-
-    // Ensure a consistent ordering of files for repeatable runs.
-    List<FileDescriptor> orderedFds = Lists.newArrayList(fileDescs_);
-    if (!filesAreSorted_) {
-      Collections.sort(orderedFds);
-    }
-
-    Preconditions.checkState(partitions_.size() == 1);
-    FeFsPartition part = partitions_.get(0);
-
-    long totalBytes = 0;
-    for (FileDescriptor fd : orderedFds) {
-      totalBytes += fd.getFileLength();
-    }
-
-    int numFilesRemaining = orderedFds.size();
-    double fracPercentBytes = (double) percentBytes / 100;
-    long targetBytes = (long) Math.round(totalBytes * fracPercentBytes);
-    targetBytes = Math.max(targetBytes, minSampleBytes);
-
-    // Randomly select files until targetBytes has been reached or all files 
have been
-    // selected.
-    Random rnd = new Random(randomSeed);
-    long selectedBytes = 0;
-    List<FileDescriptor> sampleFiles = Lists.newArrayList();
-    while (selectedBytes < targetBytes && numFilesRemaining > 0) {
-      int selectedIdx = rnd.nextInt(numFilesRemaining);
-      FileDescriptor fd = orderedFds.get(selectedIdx);
-      sampleFiles.add(fd);
-      selectedBytes += fd.getFileLength();
-      // Avoid selecting the same file multiple times.
-      orderedFds.set(selectedIdx, orderedFds.get(numFilesRemaining - 1));
-      --numFilesRemaining;
-    }
-    Map<Long, List<FileDescriptor>> result = new HashMap<>();
-    result.put(part.getId(), sampleFiles);
-    return result;
+    return FeIcebergTable.Utils.getFilesSample((FeIcebergTable) tbl_, 
fileDescs_,
+        filesAreSorted_, percentBytes, minSampleBytes, randomSeed);
   }
 
   @Override
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java 
b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
index 47a64bcc5..9cd1eee25 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
@@ -1940,12 +1940,12 @@ public class AnalyzeDDLTest extends FrontendTestBase {
           "Invalid percent of bytes value '101'. " +
           "The percent of bytes to sample must be between 0 and 100.");
       AnalysisError("compute stats functional_kudu.alltypes tablesample system 
(1)",
-          "TABLESAMPLE is only supported on HDFS tables.");
+          "TABLESAMPLE is only supported on file-based tables.");
       AnalysisError("compute stats functional_hbase.alltypes tablesample 
system (2)",
-          "TABLESAMPLE is only supported on HDFS tables.");
+          "TABLESAMPLE is only supported on file-based tables.");
       AnalysisError(
           "compute stats functional.alltypes_datasource tablesample system 
(3)",
-          "TABLESAMPLE is only supported on HDFS tables.");
+          "TABLESAMPLE is only supported on file-based tables.");
 
       // Test file formats with columns whitelist.
       gflags.setEnable_stats_extrapolation(true);
@@ -1957,12 +1957,12 @@ public class AnalyzeDDLTest extends FrontendTestBase {
           "Invalid percent of bytes value '101'. " +
           "The percent of bytes to sample must be between 0 and 100.");
       AnalysisError("compute stats functional_kudu.alltypes tablesample system 
(1)",
-          "TABLESAMPLE is only supported on HDFS tables.");
+          "TABLESAMPLE is only supported on file-based tables.");
       AnalysisError("compute stats functional_hbase.alltypes tablesample 
system (2)",
-          "TABLESAMPLE is only supported on HDFS tables.");
+          "TABLESAMPLE is only supported on file-based tables.");
       AnalysisError(
           "compute stats functional.alltypes_datasource tablesample system 
(3)",
-          "TABLESAMPLE is only supported on HDFS tables.");
+          "TABLESAMPLE is only supported on file-based tables.");
 
       // Test different COMPUTE_STATS_MIN_SAMPLE_BYTES.
       TQueryOptions queryOpts = new TQueryOptions();
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java 
b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
index abb1e71bc..87fafd003 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
@@ -393,27 +393,27 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
 
     // Only applicable to HDFS base table refs.
     AnalysisError("select * from functional_kudu.alltypes tablesample system 
(10)",
-        "TABLESAMPLE is only supported on HDFS tables: 
functional_kudu.alltypes");
+        "TABLESAMPLE is only supported on file-based tables: 
functional_kudu.alltypes");
     AnalysisError("select * from functional_hbase.alltypes tablesample system 
(10)",
-        "TABLESAMPLE is only supported on HDFS tables: 
functional_hbase.alltypes");
+        "TABLESAMPLE is only supported on file-based tables: 
functional_hbase.alltypes");
     AnalysisError("select * from functional.alltypes_datasource tablesample 
system (10)",
-        "TABLESAMPLE is only supported on HDFS tables: " +
+        "TABLESAMPLE is only supported on file-based tables: " +
         "functional.alltypes_datasource");
     AnalysisError("select * from (select * from functional.alltypes) v " +
         "tablesample system (10)",
-        "TABLESAMPLE is only supported on HDFS tables: v");
+        "TABLESAMPLE is only supported on file-based tables: v");
     AnalysisError("with v as (select * from functional.alltypes) " +
         "select * from v tablesample system (10)",
-        "TABLESAMPLE is only supported on HDFS tables: v");
+        "TABLESAMPLE is only supported on file-based tables: v");
     AnalysisError("select * from functional.alltypes_view tablesample system 
(10)",
-        "TABLESAMPLE is only supported on HDFS tables: 
functional.alltypes_view");
+        "TABLESAMPLE is only supported on file-based tables: 
functional.alltypes_view");
     AnalysisError("select * from functional.allcomplextypes.int_array_col " +
         "tablesample system (10)",
-        "TABLESAMPLE is only supported on HDFS tables: " +
+        "TABLESAMPLE is only supported on file-based tables: " +
         "functional.allcomplextypes.int_array_col");
     AnalysisError("select * from functional.allcomplextypes a, a.int_array_col 
" +
         "tablesample system (10)",
-        "TABLESAMPLE is only supported on HDFS tables: a.int_array_col");
+        "TABLESAMPLE is only supported on file-based tables: a.int_array_col");
   }
 
   /**
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java 
b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index a4a54a44e..258040600 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -1000,9 +1000,18 @@ public class PlannerTest extends PlannerTestBase {
   public void testTableSample() {
     TQueryOptions options = defaultQueryOptions();
     runPlannerTestFile("tablesample", options,
+        ImmutableSet.of(PlannerTestOption.EXTENDED_EXPLAIN,
+            
PlannerTestOption.DO_NOT_VALIDATE_ROWCOUNT_ESTIMATION_FOR_PARTITIONS));
+  }
+
+  @Test
+  public void testTableSampleIceberg() {
+    TQueryOptions options = defaultQueryOptions();
+    runPlannerTestFile("tablesample-iceberg", options,
         ImmutableSet.of(PlannerTestOption.EXTENDED_EXPLAIN,
             
PlannerTestOption.DO_NOT_VALIDATE_ROWCOUNT_ESTIMATION_FOR_PARTITIONS,
-            PlannerTestOption.VALIDATE_ICEBERG_SNAPSHOT_IDS));
+            PlannerTestOption.VALIDATE_ICEBERG_SNAPSHOT_IDS,
+            PlannerTestOption.VALIDATE_CARDINALITY));
   }
 
   @Test
diff --git a/testdata/datasets/functional/functional_schema_template.sql 
b/testdata/datasets/functional/functional_schema_template.sql
index 2db189abc..d002e934a 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -3213,7 +3213,8 @@ iceberg_partitioned
 CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name}
 STORED AS ICEBERG
 LOCATION '/test-warehouse/iceberg_test/iceberg_partitioned'
-TBLPROPERTIES('write.format.default'='parquet', 
'iceberg.catalog'='hadoop.tables');
+TBLPROPERTIES('write.format.default'='parquet', 
'iceberg.catalog'='hadoop.tables',
+    'impala.enable.stats.extrapolation'='true');
 ---- DEPENDENT_LOAD
 `hadoop fs -mkdir -p /test-warehouse/iceberg_test && \
 hadoop fs -put -f 
${IMPALA_HOME}/testdata/data/iceberg_test/iceberg_partitioned 
/test-warehouse/iceberg_test/
@@ -3226,7 +3227,8 @@ iceberg_non_partitioned
 CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name}
 STORED AS ICEBERG
 LOCATION '/test-warehouse/iceberg_test/iceberg_non_partitioned'
-TBLPROPERTIES('write.format.default'='parquet', 
'iceberg.catalog'='hadoop.tables');
+TBLPROPERTIES('write.format.default'='parquet', 
'iceberg.catalog'='hadoop.tables',
+    'impala.enable.stats.extrapolation'='true');
 ---- DEPENDENT_LOAD
 `hadoop fs -mkdir -p /test-warehouse/iceberg_test && \
 hadoop fs -put -f 
${IMPALA_HOME}/testdata/data/iceberg_test/iceberg_non_partitioned 
/test-warehouse/iceberg_test/
@@ -3627,6 +3629,7 @@ STORED AS ICEBERG
 TBLPROPERTIES('write.format.default'='parquet', 
'iceberg.catalog'='hadoop.catalog',
               
'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog',
               
'iceberg.table_identifier'='ice.iceberg_v2_delete_equality_partitioned',
+              'impala.enable.stats.extrapolation'='true',
               'format-version'='2');
 ---- DEPENDENT_LOAD
 `hadoop fs -mkdir -p /test-warehouse/iceberg_test/hadoop_catalog/ice && \
@@ -3764,6 +3767,7 @@ STORED AS ICEBERG
 TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
               
'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog',
               
'iceberg.table_identifier'='ice.iceberg_v2_positional_not_all_data_files_have_delete_files',
+              'impala.enable.stats.extrapolation'='true',
               'format-version'='2');
 ---- DEPENDENT_LOAD
 `hadoop fs -mkdir -p /test-warehouse/iceberg_test/hadoop_catalog/ice && \
@@ -3779,6 +3783,7 @@ STORED AS ICEBERG
 TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
               
'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog',
               
'iceberg.table_identifier'='ice.iceberg_v2_positional_not_all_data_files_have_delete_files_orc',
+              'impala.enable.stats.extrapolation'='true',
               'format-version'='2', 'write.format.default'='orc');
 ---- DEPENDENT_LOAD
 `hadoop fs -mkdir -p /test-warehouse/iceberg_test/hadoop_catalog/ice && \
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-merge-insert-only.test
 
b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-merge-insert-only.test
index 06f528f52..5ff3e2297 100644
--- 
a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-merge-insert-only.test
+++ 
b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-merge-insert-only.test
@@ -42,7 +42,7 @@ WRITE TO HDFS [functional_parquet.iceberg_v2_no_deletes, 
OVERWRITE=false]
 |     stored statistics:
 |       table: rows=20 size=22.90KB
 |       columns: unavailable
-|     extrapolated-rows=disabled max-scan-range-rows=1
+|     extrapolated-rows=unavailable max-scan-range-rows=1
 |     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
 |     tuple-ids=1 row-size=44B cardinality=20
 |     in pipelines: 01(GETNEXT)
@@ -103,7 +103,7 @@ WRITE TO HDFS [functional_parquet.iceberg_v2_no_deletes, 
OVERWRITE=false]
 |     stored statistics:
 |       table: rows=20 size=22.90KB
 |       columns: unavailable
-|     extrapolated-rows=disabled max-scan-range-rows=1
+|     extrapolated-rows=unavailable max-scan-range-rows=1
 |     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
 |     tuple-ids=1 row-size=44B cardinality=20
 |     in pipelines: 01(GETNEXT)
@@ -180,7 +180,7 @@ WRITE TO HDFS [functional_parquet.iceberg_v2_no_deletes, 
OVERWRITE=false]
 |     stored statistics:
 |       table: rows=20 size=22.90KB
 |       columns: unavailable
-|     extrapolated-rows=disabled max-scan-range-rows=1
+|     extrapolated-rows=unavailable max-scan-range-rows=1
 |     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
 |     tuple-ids=1 row-size=44B cardinality=20
 |     in pipelines: 01(GETNEXT)
@@ -249,7 +249,7 @@ WRITE TO HDFS [functional_parquet.iceberg_v2_no_deletes, 
OVERWRITE=false]
 |     stored statistics:
 |       table: rows=20 size=22.90KB
 |       columns: unavailable
-|     extrapolated-rows=disabled max-scan-range-rows=1
+|     extrapolated-rows=unavailable max-scan-range-rows=1
 |     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
 |     tuple-ids=1 row-size=44B cardinality=20
 |     in pipelines: 01(GETNEXT)
@@ -346,4 +346,4 @@ WRITE TO HDFS 
[functional_parquet.iceberg_partition_transforms_zorder, OVERWRITE
    mem-estimate=96.00MB mem-reservation=48.00KB thread-reservation=1
    tuple-ids=0 row-size=52B cardinality=1
    in pipelines: 00(GETNEXT)
-====
\ No newline at end of file
+====
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables-resources.test
 
b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables-resources.test
index 3d8951f5e..d8011b34c 100644
--- 
a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables-resources.test
+++ 
b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables-resources.test
@@ -475,7 +475,7 @@ PLAN-ROOT SINK
 |  |     stored statistics:
 |  |       table: rows=4 size=5.33KB
 |  |       columns: all
-|  |     extrapolated-rows=disabled max-scan-range-rows=2
+|  |     extrapolated-rows=unavailable max-scan-range-rows=2
 |  |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |  |     tuple-ids=1 row-size=267B cardinality=4
 |  |     in pipelines: 01(GETNEXT)
@@ -486,7 +486,7 @@ PLAN-ROOT SINK
 |     stored statistics:
 |       table: rows=10 size=7.77KB
 |       columns missing stats: i, s
-|     extrapolated-rows=disabled max-scan-range-rows=5
+|     extrapolated-rows=unavailable max-scan-range-rows=5
 |     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
 |     tuple-ids=0 row-size=36B cardinality=6
 |     in pipelines: 00(GETNEXT)
@@ -497,7 +497,7 @@ PLAN-ROOT SINK
    stored statistics:
      table: rows=10 size=7.77KB
      columns missing stats: i, s
-   extrapolated-rows=disabled max-scan-range-rows=5
+   extrapolated-rows=unavailable max-scan-range-rows=5
    mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
    tuple-ids=0 row-size=36B cardinality=4
    in pipelines: 03(GETNEXT)
@@ -542,7 +542,7 @@ Per-Host Resources: mem-estimate=64.17MB 
mem-reservation=32.76KB thread-reservat
 |  |     stored statistics:
 |  |       table: rows=4 size=5.33KB
 |  |       columns: all
-|  |     extrapolated-rows=disabled max-scan-range-rows=2
+|  |     extrapolated-rows=unavailable max-scan-range-rows=2
 |  |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |  |     tuple-ids=1 row-size=267B cardinality=4
 |  |     in pipelines: 01(GETNEXT)
@@ -553,7 +553,7 @@ Per-Host Resources: mem-estimate=64.17MB 
mem-reservation=32.76KB thread-reservat
 |     stored statistics:
 |       table: rows=10 size=7.77KB
 |       columns missing stats: i, s
-|     extrapolated-rows=disabled max-scan-range-rows=5
+|     extrapolated-rows=unavailable max-scan-range-rows=5
 |     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
 |     tuple-ids=0 row-size=36B cardinality=6
 |     in pipelines: 00(GETNEXT)
@@ -564,7 +564,7 @@ Per-Host Resources: mem-estimate=64.17MB 
mem-reservation=32.76KB thread-reservat
    stored statistics:
      table: rows=10 size=7.77KB
      columns missing stats: i, s
-   extrapolated-rows=disabled max-scan-range-rows=5
+   extrapolated-rows=unavailable max-scan-range-rows=5
    mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
    tuple-ids=0 row-size=36B cardinality=4
    in pipelines: 03(GETNEXT)
@@ -595,7 +595,7 @@ PLAN-ROOT SINK
 |  |     stored statistics:
 |  |       table: rows=4 size=5.33KB
 |  |       columns: all
-|  |     extrapolated-rows=disabled max-scan-range-rows=2
+|  |     extrapolated-rows=unavailable max-scan-range-rows=2
 |  |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |  |     tuple-ids=1 row-size=267B cardinality=4
 |  |     in pipelines: 01(GETNEXT)
@@ -606,7 +606,7 @@ PLAN-ROOT SINK
 |     stored statistics:
 |       table: rows=10 size=7.77KB
 |       columns missing stats: i, s
-|     extrapolated-rows=disabled max-scan-range-rows=5
+|     extrapolated-rows=unavailable max-scan-range-rows=5
 |     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
 |     tuple-ids=0 row-size=36B cardinality=6
 |     in pipelines: 00(GETNEXT)
@@ -617,7 +617,7 @@ PLAN-ROOT SINK
    stored statistics:
      table: rows=10 size=7.77KB
      columns missing stats: i, s
-   extrapolated-rows=disabled max-scan-range-rows=5
+   extrapolated-rows=unavailable max-scan-range-rows=5
    mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
    tuple-ids=0 row-size=36B cardinality=4
    in pipelines: 03(GETNEXT)
@@ -660,7 +660,7 @@ Per-Host Resources: mem-estimate=64.17MB 
mem-reservation=32.76KB thread-reservat
 |  |     stored statistics:
 |  |       table: rows=4 size=5.33KB
 |  |       columns: all
-|  |     extrapolated-rows=disabled max-scan-range-rows=2
+|  |     extrapolated-rows=unavailable max-scan-range-rows=2
 |  |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |  |     tuple-ids=1 row-size=267B cardinality=4
 |  |     in pipelines: 01(GETNEXT)
@@ -671,7 +671,7 @@ Per-Host Resources: mem-estimate=64.17MB 
mem-reservation=32.76KB thread-reservat
 |     stored statistics:
 |       table: rows=10 size=7.77KB
 |       columns missing stats: i, s
-|     extrapolated-rows=disabled max-scan-range-rows=5
+|     extrapolated-rows=unavailable max-scan-range-rows=5
 |     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
 |     tuple-ids=0 row-size=36B cardinality=6
 |     in pipelines: 00(GETNEXT)
@@ -682,7 +682,7 @@ Per-Host Resources: mem-estimate=64.17MB 
mem-reservation=32.76KB thread-reservat
    stored statistics:
      table: rows=10 size=7.77KB
      columns missing stats: i, s
-   extrapolated-rows=disabled max-scan-range-rows=5
+   extrapolated-rows=unavailable max-scan-range-rows=5
    mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
    tuple-ids=0 row-size=36B cardinality=4
    in pipelines: 03(GETNEXT)
@@ -739,7 +739,7 @@ PLAN-ROOT SINK
 |     stored statistics:
 |       table: rows=1 size=2.63KB
 |       columns: all
-|     extrapolated-rows=disabled max-scan-range-rows=1
+|     extrapolated-rows=unavailable max-scan-range-rows=1
 |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |     tuple-ids=2 row-size=267B cardinality=1
 |     in pipelines: 01(GETNEXT)
@@ -750,7 +750,7 @@ PLAN-ROOT SINK
    stored statistics:
      table: rows=10 size=7.77KB
      columns: all
-   extrapolated-rows=disabled max-scan-range-rows=10
+   extrapolated-rows=unavailable max-scan-range-rows=10
    mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
    tuple-ids=0 row-size=20B cardinality=3
    in pipelines: 00(GETNEXT)
@@ -799,7 +799,7 @@ Per-Host Resources: mem-estimate=32.02MB 
mem-reservation=16.76KB thread-reservat
 |     stored statistics:
 |       table: rows=1 size=2.63KB
 |       columns: all
-|     extrapolated-rows=disabled max-scan-range-rows=1
+|     extrapolated-rows=unavailable max-scan-range-rows=1
 |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |     tuple-ids=2 row-size=267B cardinality=1
 |     in pipelines: 01(GETNEXT)
@@ -810,7 +810,7 @@ Per-Host Resources: mem-estimate=32.02MB 
mem-reservation=16.76KB thread-reservat
    stored statistics:
      table: rows=10 size=7.77KB
      columns: all
-   extrapolated-rows=disabled max-scan-range-rows=10
+   extrapolated-rows=unavailable max-scan-range-rows=10
    mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
    tuple-ids=0 row-size=20B cardinality=3
    in pipelines: 00(GETNEXT)
@@ -841,7 +841,7 @@ PLAN-ROOT SINK
 |     stored statistics:
 |       table: rows=1 size=2.63KB
 |       columns: all
-|     extrapolated-rows=disabled max-scan-range-rows=1
+|     extrapolated-rows=unavailable max-scan-range-rows=1
 |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |     tuple-ids=2 row-size=267B cardinality=1
 |     in pipelines: 01(GETNEXT)
@@ -852,7 +852,7 @@ PLAN-ROOT SINK
    stored statistics:
      table: rows=10 size=7.77KB
      columns: all
-   extrapolated-rows=disabled max-scan-range-rows=10
+   extrapolated-rows=unavailable max-scan-range-rows=10
    mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
    tuple-ids=0 row-size=20B cardinality=3
    in pipelines: 00(GETNEXT)
@@ -901,7 +901,7 @@ Per-Host Resources: mem-estimate=32.02MB 
mem-reservation=16.76KB thread-reservat
 |     stored statistics:
 |       table: rows=1 size=2.63KB
 |       columns: all
-|     extrapolated-rows=disabled max-scan-range-rows=1
+|     extrapolated-rows=unavailable max-scan-range-rows=1
 |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |     tuple-ids=2 row-size=267B cardinality=1
 |     in pipelines: 01(GETNEXT)
@@ -912,7 +912,7 @@ Per-Host Resources: mem-estimate=32.02MB 
mem-reservation=16.76KB thread-reservat
    stored statistics:
      table: rows=10 size=7.77KB
      columns: all
-   extrapolated-rows=disabled max-scan-range-rows=10
+   extrapolated-rows=unavailable max-scan-range-rows=10
    mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
    tuple-ids=0 row-size=20B cardinality=3
    in pipelines: 00(GETNEXT)
@@ -943,7 +943,7 @@ PLAN-ROOT SINK
 |     stored statistics:
 |       table: rows=4 size=5.33KB
 |       columns: all
-|     extrapolated-rows=disabled max-scan-range-rows=2
+|     extrapolated-rows=unavailable max-scan-range-rows=2
 |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |     tuple-ids=2 row-size=267B cardinality=4
 |     in pipelines: 01(GETNEXT)
@@ -954,7 +954,7 @@ PLAN-ROOT SINK
    stored statistics:
      table: rows=10 size=7.77KB
      columns: all
-   extrapolated-rows=disabled max-scan-range-rows=5
+   extrapolated-rows=unavailable max-scan-range-rows=5
    mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
    tuple-ids=0 row-size=20B cardinality=6
    in pipelines: 00(GETNEXT)
@@ -1003,7 +1003,7 @@ Per-Host Resources: mem-estimate=32.02MB 
mem-reservation=16.76KB thread-reservat
 |     stored statistics:
 |       table: rows=4 size=5.33KB
 |       columns: all
-|     extrapolated-rows=disabled max-scan-range-rows=2
+|     extrapolated-rows=unavailable max-scan-range-rows=2
 |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |     tuple-ids=2 row-size=267B cardinality=4
 |     in pipelines: 01(GETNEXT)
@@ -1014,7 +1014,7 @@ Per-Host Resources: mem-estimate=32.02MB 
mem-reservation=16.76KB thread-reservat
    stored statistics:
      table: rows=10 size=7.77KB
      columns: all
-   extrapolated-rows=disabled max-scan-range-rows=5
+   extrapolated-rows=unavailable max-scan-range-rows=5
    mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
    tuple-ids=0 row-size=20B cardinality=6
    in pipelines: 00(GETNEXT)
@@ -1248,7 +1248,7 @@ PLAN-ROOT SINK
 |  |     stored statistics:
 |  |       table: rows=4 size=5.33KB
 |  |       columns: all
-|  |     extrapolated-rows=disabled max-scan-range-rows=2
+|  |     extrapolated-rows=unavailable max-scan-range-rows=2
 |  |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |  |     tuple-ids=1 row-size=267B cardinality=4
 |  |     in pipelines: 01(GETNEXT)
@@ -1260,7 +1260,7 @@ PLAN-ROOT SINK
 |     stored statistics:
 |       table: rows=10 size=7.77KB
 |       columns missing stats: i, s
-|     extrapolated-rows=disabled max-scan-range-rows=5
+|     extrapolated-rows=unavailable max-scan-range-rows=5
 |     parquet statistics predicates: i > CAST(2 AS INT)
 |     parquet dictionary predicates: i > CAST(2 AS INT)
 |     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
@@ -1274,7 +1274,7 @@ PLAN-ROOT SINK
    stored statistics:
      table: rows=10 size=7.77KB
      columns missing stats: i, s
-   extrapolated-rows=disabled max-scan-range-rows=5
+   extrapolated-rows=unavailable max-scan-range-rows=5
    parquet statistics predicates: i > CAST(2 AS INT)
    parquet dictionary predicates: i > CAST(2 AS INT)
    mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
@@ -1319,7 +1319,7 @@ Per-Host Resources: mem-estimate=64.17MB 
mem-reservation=32.76KB thread-reservat
 |  |     stored statistics:
 |  |       table: rows=4 size=5.33KB
 |  |       columns: all
-|  |     extrapolated-rows=disabled max-scan-range-rows=2
+|  |     extrapolated-rows=unavailable max-scan-range-rows=2
 |  |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |  |     tuple-ids=1 row-size=267B cardinality=4
 |  |     in pipelines: 01(GETNEXT)
@@ -1331,7 +1331,7 @@ Per-Host Resources: mem-estimate=64.17MB 
mem-reservation=32.76KB thread-reservat
 |     stored statistics:
 |       table: rows=10 size=7.77KB
 |       columns missing stats: i, s
-|     extrapolated-rows=disabled max-scan-range-rows=5
+|     extrapolated-rows=unavailable max-scan-range-rows=5
 |     parquet statistics predicates: i > CAST(2 AS INT)
 |     parquet dictionary predicates: i > CAST(2 AS INT)
 |     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
@@ -1345,7 +1345,7 @@ Per-Host Resources: mem-estimate=64.17MB 
mem-reservation=32.76KB thread-reservat
    stored statistics:
      table: rows=10 size=7.77KB
      columns missing stats: i, s
-   extrapolated-rows=disabled max-scan-range-rows=5
+   extrapolated-rows=unavailable max-scan-range-rows=5
    parquet statistics predicates: i > CAST(2 AS INT)
    parquet dictionary predicates: i > CAST(2 AS INT)
    mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
@@ -1374,7 +1374,7 @@ PLAN-ROOT SINK
 |     stored statistics:
 |       table: rows=10 size=7.77KB
 |       columns: unavailable
-|     extrapolated-rows=disabled max-scan-range-rows=10
+|     extrapolated-rows=unavailable max-scan-range-rows=10
 |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |     tuple-ids=3 row-size=16B cardinality=3
 |     in pipelines: 06(GETNEXT)
@@ -1403,7 +1403,7 @@ PLAN-ROOT SINK
 |  |     stored statistics:
 |  |       table: rows=4 size=5.33KB
 |  |       columns: all
-|  |     extrapolated-rows=disabled max-scan-range-rows=2
+|  |     extrapolated-rows=unavailable max-scan-range-rows=2
 |  |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |  |     tuple-ids=5 row-size=267B cardinality=4
 |  |     in pipelines: 01(GETNEXT)
@@ -1414,7 +1414,7 @@ PLAN-ROOT SINK
 |     stored statistics:
 |       table: rows=10 size=7.77KB
 |       columns missing stats: i, s
-|     extrapolated-rows=disabled max-scan-range-rows=5
+|     extrapolated-rows=unavailable max-scan-range-rows=5
 |     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
 |     tuple-ids=0 row-size=36B cardinality=6
 |     in pipelines: 00(GETNEXT)
@@ -1425,7 +1425,7 @@ PLAN-ROOT SINK
    stored statistics:
      table: rows=10 size=7.77KB
      columns missing stats: i, s
-   extrapolated-rows=disabled max-scan-range-rows=5
+   extrapolated-rows=unavailable max-scan-range-rows=5
    mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
    tuple-ids=0 row-size=36B cardinality=4
    in pipelines: 03(GETNEXT)
@@ -1462,7 +1462,7 @@ Per-Host Resources: mem-estimate=130.02MB 
mem-reservation=35.94MB thread-reserva
 |     stored statistics:
 |       table: rows=10 size=7.77KB
 |       columns: unavailable
-|     extrapolated-rows=disabled max-scan-range-rows=10
+|     extrapolated-rows=unavailable max-scan-range-rows=10
 |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |     tuple-ids=3 row-size=16B cardinality=3
 |     in pipelines: 06(GETNEXT)
@@ -1511,7 +1511,7 @@ Per-Host Resources: mem-estimate=192.17MB 
mem-reservation=34.03MB thread-reserva
 |  |     stored statistics:
 |  |       table: rows=4 size=5.33KB
 |  |       columns: all
-|  |     extrapolated-rows=disabled max-scan-range-rows=2
+|  |     extrapolated-rows=unavailable max-scan-range-rows=2
 |  |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |  |     tuple-ids=5 row-size=267B cardinality=4
 |  |     in pipelines: 01(GETNEXT)
@@ -1522,7 +1522,7 @@ Per-Host Resources: mem-estimate=192.17MB 
mem-reservation=34.03MB thread-reserva
 |     stored statistics:
 |       table: rows=10 size=7.77KB
 |       columns missing stats: i, s
-|     extrapolated-rows=disabled max-scan-range-rows=5
+|     extrapolated-rows=unavailable max-scan-range-rows=5
 |     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
 |     tuple-ids=0 row-size=36B cardinality=6
 |     in pipelines: 00(GETNEXT)
@@ -1533,7 +1533,7 @@ Per-Host Resources: mem-estimate=192.17MB 
mem-reservation=34.03MB thread-reserva
    stored statistics:
      table: rows=10 size=7.77KB
      columns missing stats: i, s
-   extrapolated-rows=disabled max-scan-range-rows=5
+   extrapolated-rows=unavailable max-scan-range-rows=5
    mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
    tuple-ids=0 row-size=36B cardinality=4
    in pipelines: 03(GETNEXT)
@@ -1565,7 +1565,7 @@ PLAN-ROOT SINK
 |  |     stored statistics:
 |  |       table: rows=4 size=5.33KB
 |  |       columns: all
-|  |     extrapolated-rows=disabled max-scan-range-rows=2
+|  |     extrapolated-rows=unavailable max-scan-range-rows=2
 |  |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |  |     tuple-ids=2 row-size=267B cardinality=4
 |  |     in pipelines: 01(GETNEXT)
@@ -1577,7 +1577,7 @@ PLAN-ROOT SINK
 |     stored statistics:
 |       table: rows=10 size=7.77KB
 |       columns missing stats: i, s
-|     extrapolated-rows=disabled max-scan-range-rows=5
+|     extrapolated-rows=unavailable max-scan-range-rows=5
 |     parquet dictionary predicates: CAST(i AS BIGINT) + CAST(1000 AS BIGINT) 
> CAST(1003 AS BIGINT)
 |     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
 |     tuple-ids=0 row-size=36B cardinality=1
@@ -1590,7 +1590,7 @@ PLAN-ROOT SINK
    stored statistics:
      table: rows=10 size=7.77KB
      columns missing stats: i, s
-   extrapolated-rows=disabled max-scan-range-rows=5
+   extrapolated-rows=unavailable max-scan-range-rows=5
    parquet dictionary predicates: CAST(i AS BIGINT) + CAST(1000 AS BIGINT) > 
CAST(1003 AS BIGINT)
    mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
    tuple-ids=0 row-size=36B cardinality=1
@@ -1634,7 +1634,7 @@ Per-Host Resources: mem-estimate=64.17MB 
mem-reservation=32.76KB thread-reservat
 |  |     stored statistics:
 |  |       table: rows=4 size=5.33KB
 |  |       columns: all
-|  |     extrapolated-rows=disabled max-scan-range-rows=2
+|  |     extrapolated-rows=unavailable max-scan-range-rows=2
 |  |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |  |     tuple-ids=2 row-size=267B cardinality=4
 |  |     in pipelines: 01(GETNEXT)
@@ -1646,7 +1646,7 @@ Per-Host Resources: mem-estimate=64.17MB 
mem-reservation=32.76KB thread-reservat
 |     stored statistics:
 |       table: rows=10 size=7.77KB
 |       columns missing stats: i, s
-|     extrapolated-rows=disabled max-scan-range-rows=5
+|     extrapolated-rows=unavailable max-scan-range-rows=5
 |     parquet dictionary predicates: CAST(i AS BIGINT) + CAST(1000 AS BIGINT) 
> CAST(1003 AS BIGINT)
 |     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
 |     tuple-ids=0 row-size=36B cardinality=1
@@ -1659,7 +1659,7 @@ Per-Host Resources: mem-estimate=64.17MB 
mem-reservation=32.76KB thread-reservat
    stored statistics:
      table: rows=10 size=7.77KB
      columns missing stats: i, s
-   extrapolated-rows=disabled max-scan-range-rows=5
+   extrapolated-rows=unavailable max-scan-range-rows=5
    parquet dictionary predicates: CAST(i AS BIGINT) + CAST(1000 AS BIGINT) > 
CAST(1003 AS BIGINT)
    mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
    tuple-ids=0 row-size=36B cardinality=1
@@ -1751,7 +1751,7 @@ PLAN-ROOT SINK
 |  |     stored statistics:
 |  |       table: rows=4 size=5.33KB
 |  |       columns: all
-|  |     extrapolated-rows=disabled max-scan-range-rows=2
+|  |     extrapolated-rows=unavailable max-scan-range-rows=2
 |  |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |  |     tuple-ids=4 row-size=267B cardinality=4
 |  |     in pipelines: 01(GETNEXT)
@@ -1763,7 +1763,7 @@ PLAN-ROOT SINK
 |     stored statistics:
 |       table: rows=10 size=7.77KB
 |       columns missing stats: i, s
-|     extrapolated-rows=disabled max-scan-range-rows=5
+|     extrapolated-rows=unavailable max-scan-range-rows=5
 |     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
 |     tuple-ids=0 row-size=36B cardinality=6
 |     in pipelines: 00(GETNEXT)
@@ -1775,7 +1775,7 @@ PLAN-ROOT SINK
    stored statistics:
      table: rows=10 size=7.77KB
      columns missing stats: i, s
-   extrapolated-rows=disabled max-scan-range-rows=5
+   extrapolated-rows=unavailable max-scan-range-rows=5
    mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
    tuple-ids=0 row-size=36B cardinality=4
    in pipelines: 03(GETNEXT)
@@ -1903,7 +1903,7 @@ Per-Host Resources: mem-estimate=67.11MB 
mem-reservation=2.97MB thread-reservati
 |  |     stored statistics:
 |  |       table: rows=4 size=5.33KB
 |  |       columns: all
-|  |     extrapolated-rows=disabled max-scan-range-rows=2
+|  |     extrapolated-rows=unavailable max-scan-range-rows=2
 |  |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |  |     tuple-ids=4 row-size=267B cardinality=4
 |  |     in pipelines: 01(GETNEXT)
@@ -1915,7 +1915,7 @@ Per-Host Resources: mem-estimate=67.11MB 
mem-reservation=2.97MB thread-reservati
 |     stored statistics:
 |       table: rows=10 size=7.77KB
 |       columns missing stats: i, s
-|     extrapolated-rows=disabled max-scan-range-rows=5
+|     extrapolated-rows=unavailable max-scan-range-rows=5
 |     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
 |     tuple-ids=0 row-size=36B cardinality=6
 |     in pipelines: 00(GETNEXT)
@@ -1927,7 +1927,7 @@ Per-Host Resources: mem-estimate=67.11MB 
mem-reservation=2.97MB thread-reservati
    stored statistics:
      table: rows=10 size=7.77KB
      columns missing stats: i, s
-   extrapolated-rows=disabled max-scan-range-rows=5
+   extrapolated-rows=unavailable max-scan-range-rows=5
    mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
    tuple-ids=0 row-size=36B cardinality=4
    in pipelines: 03(GETNEXT)
@@ -2237,7 +2237,7 @@ PLAN-ROOT SINK
 |  |     stored statistics:
 |  |       table: rows=4 size=5.33KB
 |  |       columns: all
-|  |     extrapolated-rows=disabled max-scan-range-rows=2
+|  |     extrapolated-rows=unavailable max-scan-range-rows=2
 |  |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |  |     tuple-ids=10 row-size=267B cardinality=4
 |  |     in pipelines: 06(GETNEXT)
@@ -2248,7 +2248,7 @@ PLAN-ROOT SINK
 |     stored statistics:
 |       table: rows=10 size=7.77KB
 |       columns: all
-|     extrapolated-rows=disabled max-scan-range-rows=5
+|     extrapolated-rows=unavailable max-scan-range-rows=5
 |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |     tuple-ids=4 row-size=20B cardinality=6
 |     in pipelines: 05(GETNEXT)
@@ -2282,7 +2282,7 @@ PLAN-ROOT SINK
 |     stored statistics:
 |       table: rows=4 size=5.33KB
 |       columns: all
-|     extrapolated-rows=disabled max-scan-range-rows=2
+|     extrapolated-rows=unavailable max-scan-range-rows=2
 |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |     tuple-ids=7 row-size=267B cardinality=4
 |     in pipelines: 01(GETNEXT)
@@ -2293,7 +2293,7 @@ PLAN-ROOT SINK
    stored statistics:
      table: rows=10 size=7.77KB
      columns: all
-   extrapolated-rows=disabled max-scan-range-rows=5
+   extrapolated-rows=unavailable max-scan-range-rows=5
    mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
    tuple-ids=0 row-size=20B cardinality=6
    in pipelines: 00(GETNEXT)
@@ -2354,7 +2354,7 @@ PLAN-ROOT SINK
 |  |     stored statistics:
 |  |       table: rows=4 size=5.33KB
 |  |       columns: all
-|  |     extrapolated-rows=disabled max-scan-range-rows=2
+|  |     extrapolated-rows=unavailable max-scan-range-rows=2
 |  |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |  |     tuple-ids=10 row-size=267B cardinality=4
 |  |     in pipelines: 06(GETNEXT)
@@ -2365,7 +2365,7 @@ PLAN-ROOT SINK
 |     stored statistics:
 |       table: rows=10 size=7.77KB
 |       columns: all
-|     extrapolated-rows=disabled max-scan-range-rows=5
+|     extrapolated-rows=unavailable max-scan-range-rows=5
 |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |     tuple-ids=4 row-size=20B cardinality=6
 |     in pipelines: 05(GETNEXT)
@@ -2426,7 +2426,7 @@ Per-Host Resources: mem-estimate=32.02MB 
mem-reservation=16.76KB thread-reservat
 |     stored statistics:
 |       table: rows=4 size=5.33KB
 |       columns: all
-|     extrapolated-rows=disabled max-scan-range-rows=2
+|     extrapolated-rows=unavailable max-scan-range-rows=2
 |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
 |     tuple-ids=7 row-size=267B cardinality=4
 |     in pipelines: 01(GETNEXT)
@@ -2437,7 +2437,7 @@ Per-Host Resources: mem-estimate=32.02MB 
mem-reservation=16.76KB thread-reservat
    stored statistics:
      table: rows=10 size=7.77KB
      columns: all
-   extrapolated-rows=disabled max-scan-range-rows=5
+   extrapolated-rows=unavailable max-scan-range-rows=5
    mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
    tuple-ids=0 row-size=20B cardinality=6
    in pipelines: 00(GETNEXT)
@@ -2872,7 +2872,7 @@ PLAN-ROOT SINK
 |  |     stored statistics:
 |  |       table: rows=3 size=1.33KB
 |  |       columns missing stats: s, d
-|  |     extrapolated-rows=disabled max-scan-range-rows=1
+|  |     extrapolated-rows=unavailable max-scan-range-rows=1
 |  |     mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=1
 |  |     tuple-ids=2 row-size=24B cardinality=3
 |  |     in pipelines: 01(GETNEXT)
@@ -2883,7 +2883,7 @@ PLAN-ROOT SINK
 |     stored statistics:
 |       table: rows=8 size=4.81KB
 |       columns missing stats: i, s, d
-|     extrapolated-rows=disabled max-scan-range-rows=4
+|     extrapolated-rows=unavailable max-scan-range-rows=4
 |     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
 |     tuple-ids=0 row-size=28B cardinality=5
 |     in pipelines: 00(GETNEXT)
@@ -2894,7 +2894,7 @@ PLAN-ROOT SINK
    stored statistics:
      table: rows=8 size=4.81KB
      columns missing stats: i, s, d
-   extrapolated-rows=disabled max-scan-range-rows=4
+   extrapolated-rows=unavailable max-scan-range-rows=4
    mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
    tuple-ids=0 row-size=28B cardinality=3
    in pipelines: 03(GETNEXT)
@@ -2938,7 +2938,7 @@ Per-Host Resources: mem-estimate=64.12MB 
mem-reservation=1.94MB thread-reservati
 |  |     stored statistics:
 |  |       table: rows=3 size=1.33KB
 |  |       columns missing stats: s, d
-|  |     extrapolated-rows=disabled max-scan-range-rows=1
+|  |     extrapolated-rows=unavailable max-scan-range-rows=1
 |  |     mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=1
 |  |     tuple-ids=2 row-size=24B cardinality=3
 |  |     in pipelines: 01(GETNEXT)
@@ -2956,7 +2956,7 @@ Per-Host Resources: mem-estimate=64.12MB 
mem-reservation=1.94MB thread-reservati
 |     stored statistics:
 |       table: rows=8 size=4.81KB
 |       columns missing stats: i, s, d
-|     extrapolated-rows=disabled max-scan-range-rows=4
+|     extrapolated-rows=unavailable max-scan-range-rows=4
 |     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
 |     tuple-ids=0 row-size=28B cardinality=5
 |     in pipelines: 00(GETNEXT)
@@ -2967,7 +2967,7 @@ Per-Host Resources: mem-estimate=64.12MB 
mem-reservation=1.94MB thread-reservati
    stored statistics:
      table: rows=8 size=4.81KB
      columns missing stats: i, s, d
-   extrapolated-rows=disabled max-scan-range-rows=4
+   extrapolated-rows=unavailable max-scan-range-rows=4
    mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
    tuple-ids=0 row-size=28B cardinality=3
    in pipelines: 03(GETNEXT)
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/tablesample-iceberg.test
 
b/testdata/workloads/functional-planner/queries/PlannerTest/tablesample-iceberg.test
new file mode 100644
index 000000000..30f84cf2d
--- /dev/null
+++ 
b/testdata/workloads/functional-planner/queries/PlannerTest/tablesample-iceberg.test
@@ -0,0 +1,206 @@
+# Sampling Iceberg tables.
+select * from functional_parquet.iceberg_non_partitioned tablesample 
system(10) repeatable(1234)
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+|  Per-Host Resources: mem-estimate=68.00MB mem-reservation=4.03MB 
thread-reservation=2
+PLAN-ROOT SINK
+|  output exprs: functional_parquet.iceberg_non_partitioned.id, 
functional_parquet.iceberg_non_partitioned.user, 
functional_parquet.iceberg_non_partitioned.action, 
functional_parquet.iceberg_non_partitioned.event_time
+|  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
+|
+00:SCAN HDFS [functional_parquet.iceberg_non_partitioned]
+   HDFS partitions=1/1 files=3 size=3.41KB
+   Iceberg snapshot id: 93996984692289973
+   stored statistics:
+     table: rows=20 size=22.90KB
+     columns: unavailable
+   extrapolated-rows=unavailable max-scan-range-rows=6
+   mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
+   tuple-ids=0 row-size=44B cardinality=3
+   in pipelines: 00(GETNEXT)
+====
+# Sampling Iceberg tables. Count(*) is optimized.
+select count(*) from functional_parquet.iceberg_non_partitioned tablesample 
system(10) repeatable(1234)
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+|  Per-Host Resources: mem-estimate=1.02MB mem-reservation=8.00KB 
thread-reservation=2
+PLAN-ROOT SINK
+|  output exprs: count(*)
+|  mem-estimate=0B mem-reservation=0B thread-reservation=0
+|
+01:AGGREGATE [FINALIZE]
+|  output: sum_init_zero(functional_parquet.iceberg_non_partitioned.stats: 
num_rows)
+|  mem-estimate=16.00KB mem-reservation=0B spill-buffer=2.00MB 
thread-reservation=0
+|  tuple-ids=1 row-size=8B cardinality=1
+|  in pipelines: 01(GETNEXT), 00(OPEN)
+|
+00:SCAN HDFS [functional_parquet.iceberg_non_partitioned]
+   HDFS partitions=1/1 files=3 size=3.41KB
+   Iceberg snapshot id: 93996984692289973
+   stored statistics:
+     table: rows=20 size=22.90KB
+     columns: all
+   extrapolated-rows=unavailable max-scan-range-rows=6
+   mem-estimate=1.00MB mem-reservation=8.00KB thread-reservation=1
+   tuple-ids=0 row-size=8B cardinality=20
+   in pipelines: 00(GETNEXT)
+====
+# Sampling partitioned Iceberg tables.
+select * from functional_parquet.iceberg_partitioned tablesample system(50) 
repeatable(1234)
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+|  Per-Host Resources: mem-estimate=68.00MB mem-reservation=4.03MB 
thread-reservation=2
+PLAN-ROOT SINK
+|  output exprs: functional_parquet.iceberg_partitioned.id, 
functional_parquet.iceberg_partitioned.user, 
functional_parquet.iceberg_partitioned.action, 
functional_parquet.iceberg_partitioned.event_time
+|  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
+|
+00:SCAN HDFS [functional_parquet.iceberg_partitioned]
+   HDFS partitions=1/1 files=10 size=11.46KB
+   Iceberg snapshot id: 8270633197658268308
+   stored statistics:
+     table: rows=20 size=22.90KB
+     columns: unavailable
+   extrapolated-rows=unavailable max-scan-range-rows=2
+   mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
+   tuple-ids=0 row-size=44B cardinality=10
+   in pipelines: 00(GETNEXT)
+====
+# Sampling Iceberg tables with predicates. Predicate pushdown to Iceberg 
happens
+# before sampling (similarly to static partition pruning).
+select * from functional_parquet.iceberg_partitioned tablesample system(50) 
repeatable(1234)
+where action = 'click' and id > 0
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+|  Per-Host Resources: mem-estimate=68.00MB mem-reservation=4.03MB 
thread-reservation=2
+PLAN-ROOT SINK
+|  output exprs: functional_parquet.iceberg_partitioned.id, 
functional_parquet.iceberg_partitioned.user, 
functional_parquet.iceberg_partitioned.action, 
functional_parquet.iceberg_partitioned.event_time
+|  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
+|
+00:SCAN HDFS [functional_parquet.iceberg_partitioned]
+   HDFS partitions=1/1 files=4 size=4.57KB
+   predicates: id > CAST(0 AS INT)
+   Iceberg snapshot id: 8270633197658268308
+   skipped Iceberg predicates: action = 'click'
+   stored statistics:
+     table: rows=20 size=22.90KB
+     columns: unavailable
+   extrapolated-rows=unavailable max-scan-range-rows=5
+   parquet statistics predicates: id > CAST(0 AS INT)
+   parquet dictionary predicates: id > CAST(0 AS INT)
+   mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
+   tuple-ids=0 row-size=44B cardinality=1
+   in pipelines: 00(GETNEXT)
+====
+# Sampling Iceberg V2 tables. Delete files are not sampled, only the data 
files. So we
+# don't return rows that are deleted.
+select * from 
functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files
+tablesample system(10) repeatable(1234)
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+|  Per-Host Resources: mem-estimate=100.00MB mem-reservation=4.05MB 
thread-reservation=3
+PLAN-ROOT SINK
+|  output exprs: 
functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i,
 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s
+|  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
+|
+04:UNION
+|  pass-through-operands: all
+|  mem-estimate=0B mem-reservation=0B thread-reservation=0
+|  tuple-ids=0 row-size=36B cardinality=4
+|  in pipelines: 03(GETNEXT), 00(GETNEXT)
+|
+|--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
+|  |  equality predicates: 
functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position
 = 
functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,
 
functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name
 = 
functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path
+|  |  mem-estimate=566B mem-reservation=566B thread-reservation=0
+|  |  tuple-ids=0 row-size=36B cardinality=3
+|  |  in pipelines: 00(GETNEXT), 01(OPEN)
+|  |
+|  |--01:SCAN HDFS 
[functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01
 
functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
+|  |     HDFS partitions=1/1 files=2 size=5.33KB
+|  |     Iceberg snapshot id: 1497619269847778439
+|  |     stored statistics:
+|  |       table: rows=4 size=5.33KB
+|  |       columns: all
+|  |     extrapolated-rows=unavailable max-scan-range-rows=2
+|  |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
+|  |     tuple-ids=1 row-size=267B cardinality=4
+|  |     in pipelines: 01(GETNEXT)
+|  |
+|  00:SCAN HDFS 
[functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
+|     HDFS partitions=1/1 files=1 size=625B
+|     Iceberg snapshot id: 1497619269847778439
+|     stored statistics:
+|       table: rows=10 size=7.77KB
+|       columns missing stats: i, s
+|     extrapolated-rows=unavailable max-scan-range-rows=10
+|     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
+|     tuple-ids=0 row-size=36B cardinality=3
+|     in pipelines: 00(GETNEXT)
+|
+03:SCAN HDFS 
[functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
+   HDFS partitions=1/1 files=1 size=620B
+   Iceberg snapshot id: 1497619269847778439
+   stored statistics:
+     table: rows=10 size=7.77KB
+     columns missing stats: i, s
+   extrapolated-rows=unavailable max-scan-range-rows=10
+   mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
+   tuple-ids=0 row-size=36B cardinality=1
+   in pipelines: 03(GETNEXT)
+====
+# Cardinality of DELETE EVENTS ICEBERG DELETE should take the sampling 
percentage into account.
+# Delete records cardinality: 3
+# Sampling percentage: 35%
+# Effective delete records count: 3 * 0.35 = 1
+# DELETE EVENTS ICEBERG DELETE cardinality = 3 (Left SCAN node cardinality) - 
1 (Effective delete records count) = 2
+select * from functional_parquet.iceberg_v2_positional_update_all_rows 
tablesample system(35) repeatable(1234);
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+|  Per-Host Resources: mem-estimate=100.00MB mem-reservation=4.05MB 
thread-reservation=3
+PLAN-ROOT SINK
+|  output exprs: functional_parquet.iceberg_v2_positional_update_all_rows.i, 
functional_parquet.iceberg_v2_positional_update_all_rows.s
+|  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
+|
+04:UNION
+|  pass-through-operands: all
+|  mem-estimate=0B mem-reservation=0B thread-reservation=0
+|  tuple-ids=0 row-size=36B cardinality=5
+|  in pipelines: 03(GETNEXT), 00(GETNEXT)
+|
+|--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
+|  |  equality predicates: 
functional_parquet.iceberg_v2_positional_update_all_rows.file__position = 
functional_parquet.iceberg_v2_positional_update_all_rows-position-delete.pos, 
functional_parquet.iceberg_v2_positional_update_all_rows.input__file__name = 
functional_parquet.iceberg_v2_positional_update_all_rows-position-delete.file_path
+|  |  mem-estimate=764B mem-reservation=764B thread-reservation=0
+|  |  tuple-ids=0 row-size=36B cardinality=2
+|  |  in pipelines: 00(GETNEXT), 01(OPEN)
+|  |
+|  |--01:SCAN HDFS 
[functional_parquet.iceberg_v2_positional_update_all_rows-POSITION-DELETE-01 
functional_parquet.iceberg_v2_positional_update_all_rows-position-delete]
+|  |     HDFS partitions=1/1 files=1 size=2.60KB
+|  |     Iceberg snapshot id: 3877007445826010687
+|  |     stored statistics:
+|  |       table: rows=3 size=2.60KB
+|  |       columns: all
+|  |     extrapolated-rows=disabled max-scan-range-rows=3
+|  |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
+|  |     tuple-ids=1 row-size=246B cardinality=3
+|  |     in pipelines: 01(GETNEXT)
+|  |
+|  00:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows]
+|     HDFS partitions=1/1 files=1 size=625B
+|     Iceberg snapshot id: 3877007445826010687
+|     stored statistics:
+|       table: rows=6 size=3.82KB
+|       columns missing stats: i, s
+|     extrapolated-rows=disabled max-scan-range-rows=6
+|     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
+|     tuple-ids=0 row-size=36B cardinality=3
+|     in pipelines: 00(GETNEXT)
+|
+03:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows]
+   HDFS partitions=1/1 files=1 size=625B
+   Iceberg snapshot id: 3877007445826010687
+   stored statistics:
+     table: rows=6 size=3.82KB
+     columns missing stats: i, s
+   extrapolated-rows=disabled max-scan-range-rows=6
+   mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
+   tuple-ids=0 row-size=36B cardinality=3
+   in pipelines: 03(GETNEXT)
+====
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test 
b/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test
index 6c3b986f5..66d999eeb 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test
@@ -247,151 +247,3 @@ PLAN-ROOT SINK
    tuple-ids=0 row-size=4B cardinality=730
    in pipelines: 00(GETNEXT)
 ====
-# Sampling Iceberg tables.
-select * from functional_parquet.iceberg_non_partitioned tablesample 
system(10) repeatable(1234)
----- PLAN
-F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-|  Per-Host Resources: mem-estimate=68.00MB mem-reservation=4.03MB 
thread-reservation=2
-PLAN-ROOT SINK
-|  output exprs: functional_parquet.iceberg_non_partitioned.id, 
functional_parquet.iceberg_non_partitioned.user, 
functional_parquet.iceberg_non_partitioned.action, 
functional_parquet.iceberg_non_partitioned.event_time
-|  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
-|
-00:SCAN HDFS [functional_parquet.iceberg_non_partitioned]
-   HDFS partitions=1/1 files=3 size=3.41KB
-   Iceberg snapshot id: 93996984692289973
-   stored statistics:
-     table: rows=20 size=22.90KB
-     columns: unavailable
-   extrapolated-rows=disabled max-scan-range-rows=6
-   mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
-   tuple-ids=0 row-size=44B cardinality=3
-   in pipelines: 00(GETNEXT)
-====
-# Sampling Iceberg tables. Count(*) is optimized.
-select count(*) from functional_parquet.iceberg_non_partitioned tablesample 
system(10) repeatable(1234)
----- PLAN
-F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-|  Per-Host Resources: mem-estimate=1.02MB mem-reservation=8.00KB 
thread-reservation=2
-PLAN-ROOT SINK
-|  output exprs: count(*)
-|  mem-estimate=0B mem-reservation=0B thread-reservation=0
-|
-01:AGGREGATE [FINALIZE]
-|  output: sum_init_zero(functional_parquet.iceberg_non_partitioned.stats: 
num_rows)
-|  mem-estimate=16.00KB mem-reservation=0B spill-buffer=2.00MB 
thread-reservation=0
-|  tuple-ids=1 row-size=8B cardinality=1
-|  in pipelines: 01(GETNEXT), 00(OPEN)
-|
-00:SCAN HDFS [functional_parquet.iceberg_non_partitioned]
-   HDFS partitions=1/1 files=3 size=3.41KB
-   Iceberg snapshot id: 93996984692289973
-   stored statistics:
-     table: rows=20 size=22.90KB
-     columns: all
-   extrapolated-rows=disabled max-scan-range-rows=6
-   mem-estimate=1.00MB mem-reservation=8.00KB thread-reservation=1
-   tuple-ids=0 row-size=8B cardinality=20
-   in pipelines: 00(GETNEXT)
-====
-# Sampling partitioned Iceberg tables.
-select * from functional_parquet.iceberg_partitioned tablesample system(50) 
repeatable(1234)
----- PLAN
-F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-|  Per-Host Resources: mem-estimate=68.00MB mem-reservation=4.03MB 
thread-reservation=2
-PLAN-ROOT SINK
-|  output exprs: functional_parquet.iceberg_partitioned.id, 
functional_parquet.iceberg_partitioned.user, 
functional_parquet.iceberg_partitioned.action, 
functional_parquet.iceberg_partitioned.event_time
-|  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
-|
-00:SCAN HDFS [functional_parquet.iceberg_partitioned]
-   HDFS partitions=1/1 files=10 size=11.46KB
-   Iceberg snapshot id: 8270633197658268308
-   stored statistics:
-     table: rows=20 size=22.90KB
-     columns: unavailable
-   extrapolated-rows=disabled max-scan-range-rows=2
-   mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
-   tuple-ids=0 row-size=44B cardinality=10
-   in pipelines: 00(GETNEXT)
-====
-# Sampling Iceberg tables with predicates. Predicate pushdown to Iceberg 
happens
-# before sampling (similarly to static partition pruning).
-select * from functional_parquet.iceberg_partitioned tablesample system(50) 
repeatable(1234)
-where action = 'click' and id > 0
----- PLAN
-F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-|  Per-Host Resources: mem-estimate=68.00MB mem-reservation=4.03MB 
thread-reservation=2
-PLAN-ROOT SINK
-|  output exprs: functional_parquet.iceberg_partitioned.id, 
functional_parquet.iceberg_partitioned.user, 
functional_parquet.iceberg_partitioned.action, 
functional_parquet.iceberg_partitioned.event_time
-|  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
-|
-00:SCAN HDFS [functional_parquet.iceberg_partitioned]
-   HDFS partitions=1/1 files=4 size=4.57KB
-   predicates: id > CAST(0 AS INT)
-   Iceberg snapshot id: 8270633197658268308
-   skipped Iceberg predicates: action = 'click'
-   stored statistics:
-     table: rows=20 size=22.90KB
-     columns: unavailable
-   extrapolated-rows=disabled max-scan-range-rows=5
-   parquet statistics predicates: id > CAST(0 AS INT)
-   parquet dictionary predicates: id > CAST(0 AS INT)
-   mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
-   tuple-ids=0 row-size=44B cardinality=1
-   in pipelines: 00(GETNEXT)
-====
-# Sampling Iceberg V2 tables. Delete files are not sampled, only the data 
files. So we
-# don't return rows that are deleted.
-select * from 
functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files
-tablesample system(10) repeatable(1234)
----- PLAN
-F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-|  Per-Host Resources: mem-estimate=100.00MB mem-reservation=4.05MB 
thread-reservation=3
-PLAN-ROOT SINK
-|  output exprs: 
functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i,
 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s
-|  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
-|
-04:UNION
-|  pass-through-operands: all
-|  mem-estimate=0B mem-reservation=0B thread-reservation=0
-|  tuple-ids=0 row-size=36B cardinality=2
-|  in pipelines: 03(GETNEXT), 00(GETNEXT)
-|
-|--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
-|  |  equality predicates: 
functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position
 = 
functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,
 
functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name
 = 
functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path
-|  |  mem-estimate=566B mem-reservation=566B thread-reservation=0
-|  |  tuple-ids=0 row-size=36B cardinality=1
-|  |  in pipelines: 00(GETNEXT), 01(OPEN)
-|  |
-|  |--01:SCAN HDFS 
[functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01
 
functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
-|  |     HDFS partitions=1/1 files=2 size=5.33KB
-|  |     Iceberg snapshot id: 1497619269847778439
-|  |     stored statistics:
-|  |       table: rows=4 size=5.33KB
-|  |       columns: all
-|  |     extrapolated-rows=disabled max-scan-range-rows=2
-|  |     mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
-|  |     tuple-ids=1 row-size=267B cardinality=4
-|  |     in pipelines: 01(GETNEXT)
-|  |
-|  00:SCAN HDFS 
[functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
-|     HDFS partitions=1/1 files=1 size=625B
-|     Iceberg snapshot id: 1497619269847778439
-|     stored statistics:
-|       table: rows=10 size=7.77KB
-|       columns missing stats: i, s
-|     extrapolated-rows=disabled max-scan-range-rows=10
-|     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
-|     tuple-ids=0 row-size=36B cardinality=3
-|     in pipelines: 00(GETNEXT)
-|
-03:SCAN HDFS 
[functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
-   HDFS partitions=1/1 files=1 size=620B
-   Iceberg snapshot id: 1497619269847778439
-   stored statistics:
-     table: rows=10 size=7.77KB
-     columns missing stats: i, s
-   extrapolated-rows=disabled max-scan-range-rows=10
-   mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
-   tuple-ids=0 row-size=36B cardinality=1
-   in pipelines: 03(GETNEXT)
-====
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-compute-stats-table-sampling.test
 
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-compute-stats-table-sampling.test
new file mode 100644
index 000000000..19e81836a
--- /dev/null
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-compute-stats-table-sampling.test
@@ -0,0 +1,234 @@
+====
+---- QUERY
+DROP STATS iceberg_non_partitioned;
+COMPUTE STATS iceberg_non_partitioned tablesample system(10) repeatable(1234);
+---- RESULTS
+'Updated 1 partition(s) and 4 column(s).'
+---- TYPES
+STRING
+====
+---- QUERY
+SHOW TABLE STATS iceberg_non_partitioned;
+---- LABELS
+#ROWS, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental 
stats, Location, EC Policy
+---- RESULTS: VERIFY_IS_EQUAL
+20,20,'22.90KB','NOT CACHED','NOT 
CACHED','PARQUET','false','$NAMENODE/test-warehouse/iceberg_test/iceberg_non_partitioned','$ERASURECODE_POLICY'
+---- TYPES
+BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING,STRING
+====
+---- QUERY
+SHOW COLUMN STATS iceberg_non_partitioned;
+---- RESULTS
+'id','INT',3,0,4,4,-1,-1
+'user','STRING',2,0,4,4,-1,-1
+'action','STRING',2,0,5,4.333333492279053,-1,-1
+'event_time','TIMESTAMP',2,0,16,16,-1,-1
+---- TYPES
+STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
+====
+---- QUERY
+DROP STATS iceberg_non_partitioned;
+COMPUTE STATS iceberg_non_partitioned tablesample system(10) repeatable(1111);
+---- RESULTS
+'Updated 1 partition(s) and 4 column(s).'
+---- TYPES
+STRING
+====
+---- QUERY
+SHOW TABLE STATS iceberg_non_partitioned;
+---- LABELS
+#ROWS, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental 
stats, Location, EC Policy
+---- RESULTS: VERIFY_IS_EQUAL
+20,20,'22.90KB','NOT CACHED','NOT 
CACHED','PARQUET','false','$NAMENODE/test-warehouse/iceberg_test/iceberg_non_partitioned','$ERASURECODE_POLICY'
+---- TYPES
+BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING,STRING
+====
+---- QUERY
+SHOW COLUMN STATS iceberg_non_partitioned;
+---- RESULTS
+'id','INT',2,0,4,4,-1,-1
+'user','STRING',2,0,4,4,-1,-1
+'action','STRING',2,0,8,6.5,-1,-1
+'event_time','TIMESTAMP',2,0,16,16,-1,-1
+---- TYPES
+STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
+====
+---- QUERY
+DROP STATS iceberg_partitioned;
+COMPUTE STATS iceberg_partitioned tablesample system(10) repeatable(1111);
+---- RESULTS
+'Updated 1 partition(s) and 4 column(s).'
+---- TYPES
+STRING
+====
+---- QUERY
+SHOW TABLE STATS iceberg_partitioned;
+---- LABELS
+#ROWS, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental 
stats, Location, EC Policy
+---- RESULTS: VERIFY_IS_EQUAL
+20,20,'22.90KB','NOT CACHED','NOT 
CACHED','PARQUET','false','$NAMENODE/test-warehouse/iceberg_test/iceberg_partitioned','$ERASURECODE_POLICY'
+---- TYPES
+BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING,STRING
+====
+---- QUERY
+SHOW COLUMN STATS iceberg_partitioned;
+---- RESULTS
+'id','INT',3,0,4,4,-1,-1
+'user','STRING',3,0,4,4,-1,-1
+'action','STRING',3,0,8,5.666666507720947,-1,-1
+'event_time','TIMESTAMP',3,0,16,16,-1,-1
+---- TYPES
+STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
+====
+---- QUERY
+DROP STATS iceberg_v2_delete_equality_partitioned;
+COMPUTE STATS iceberg_v2_delete_equality_partitioned tablesample system(10) 
repeatable(1111);
+---- RESULTS
+'Updated 1 partition(s) and 3 column(s).'
+---- TYPES
+STRING
+====
+---- QUERY
+SHOW TABLE STATS iceberg_v2_delete_equality_partitioned;
+---- LABELS
+#ROWS, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental 
stats, Location, EC Policy
+---- RESULTS: VERIFY_IS_EQUAL
+3,6,'4.81KB','NOT CACHED','NOT 
CACHED','PARQUET','false','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality_partitioned','$ERASURECODE_POLICY'
+---- TYPES
+BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING,STRING
+====
+---- QUERY
+SHOW COLUMN STATS iceberg_v2_delete_equality_partitioned;
+---- RESULTS
+'i','INT',2,0,4,4,-1,-1
+'s','STRING',2,0,4,4,-1,-1
+'d','DATE',1,0,4,4,-1,-1
+---- TYPES
+STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
+====
+---- QUERY
+DROP STATS iceberg_v2_delete_equality_partitioned;
+COMPUTE STATS iceberg_v2_delete_equality_partitioned tablesample system(10) 
repeatable(1111);
+---- RESULTS
+'Updated 1 partition(s) and 3 column(s).'
+---- TYPES
+STRING
+====
+---- QUERY
+SHOW TABLE STATS iceberg_v2_delete_equality_partitioned;
+---- LABELS
+#ROWS, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental 
stats, Location, EC Policy
+---- RESULTS: VERIFY_IS_EQUAL
+3,6,'4.81KB','NOT CACHED','NOT 
CACHED','PARQUET','false','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality_partitioned','$ERASURECODE_POLICY'
+---- TYPES
+BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING,STRING
+====
+---- QUERY
+SHOW COLUMN STATS iceberg_v2_delete_equality_partitioned;
+---- RESULTS
+'i','INT',2,0,4,4,-1,-1
+'s','STRING',2,0,4,4,-1,-1
+'d','DATE',1,0,4,4,-1,-1
+---- TYPES
+STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
+====
+---- QUERY
+DROP STATS iceberg_v2_positional_not_all_data_files_have_delete_files;
+COMPUTE STATS iceberg_v2_positional_not_all_data_files_have_delete_files 
tablesample system(30) repeatable(1111);
+---- RESULTS
+'Updated 1 partition(s) and 2 column(s).'
+---- TYPES
+STRING
+====
+---- QUERY
+SHOW TABLE STATS iceberg_v2_positional_not_all_data_files_have_delete_files;
+---- LABELS
+#ROWS, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental 
stats, Location, EC Policy
+---- RESULTS: VERIFY_IS_EQUAL
+1,6,'7.77KB','NOT CACHED','NOT 
CACHED','PARQUET','false','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_positional_not_all_data_files_have_delete_files','$ERASURECODE_POLICY'
+---- TYPES
+BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING,STRING
+====
+---- QUERY
+SHOW COLUMN STATS iceberg_v2_positional_not_all_data_files_have_delete_files;
+---- RESULTS
+'i','INT',1,0,4,4,-1,-1
+'s','STRING',1,0,1,1,-1,-1
+---- TYPES
+STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
+====
+---- QUERY
+DROP STATS iceberg_v2_positional_not_all_data_files_have_delete_files_orc;
+COMPUTE STATS iceberg_v2_positional_not_all_data_files_have_delete_files_orc 
tablesample system(30) repeatable(1111);
+---- RESULTS
+'Updated 1 partition(s) and 2 column(s).'
+---- TYPES
+STRING
+====
+---- QUERY
+SHOW TABLE STATS 
iceberg_v2_positional_not_all_data_files_have_delete_files_orc;
+---- LABELS
+#ROWS, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental 
stats, Location, EC Policy
+---- RESULTS: VERIFY_IS_EQUAL
+1,6,'3.97KB','NOT CACHED','NOT 
CACHED','ORC','false','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_positional_not_all_data_files_have_delete_files_orc','$ERASURECODE_POLICY'
+---- TYPES
+BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING,STRING
+====
+---- QUERY
+SHOW COLUMN STATS 
iceberg_v2_positional_not_all_data_files_have_delete_files_orc;
+---- RESULTS
+'i','INT',1,0,4,4,-1,-1
+'s','STRING',1,0,1,1,-1,-1
+---- TYPES
+STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
+====
+---- QUERY
+DROP STATS iceberg_v2_positional_not_all_data_files_have_delete_files;
+COMPUTE STATS iceberg_v2_positional_not_all_data_files_have_delete_files 
tablesample system(50) repeatable(1111);
+---- RESULTS
+'Updated 1 partition(s) and 2 column(s).'
+---- TYPES
+STRING
+====
+---- QUERY
+SHOW TABLE STATS iceberg_v2_positional_not_all_data_files_have_delete_files;
+---- LABELS
+#ROWS, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental 
stats, Location, EC Policy
+---- RESULTS: VERIFY_IS_EQUAL
+4,6,'7.77KB','NOT CACHED','NOT 
CACHED','PARQUET','false','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_positional_not_all_data_files_have_delete_files','$ERASURECODE_POLICY'
+---- TYPES
+BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING,STRING
+====
+---- QUERY
+SHOW COLUMN STATS iceberg_v2_positional_not_all_data_files_have_delete_files;
+---- RESULTS
+'i','INT',4,0,4,4,-1,-1
+'s','STRING',4,0,1,1,-1,-1
+---- TYPES
+STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
+====
+---- QUERY
+DROP STATS iceberg_v2_positional_not_all_data_files_have_delete_files_orc;
+COMPUTE STATS iceberg_v2_positional_not_all_data_files_have_delete_files_orc 
tablesample system(50) repeatable(1111);
+---- RESULTS
+'Updated 1 partition(s) and 2 column(s).'
+---- TYPES
+STRING
+====
+---- QUERY
+SHOW TABLE STATS 
iceberg_v2_positional_not_all_data_files_have_delete_files_orc;
+---- LABELS
+#ROWS, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental 
stats, Location, EC Policy
+---- RESULTS: VERIFY_IS_EQUAL
+4,6,'3.97KB','NOT CACHED','NOT 
CACHED','ORC','false','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_positional_not_all_data_files_have_delete_files_orc','$ERASURECODE_POLICY'
+---- TYPES
+BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING,STRING
+====
+---- QUERY
+SHOW COLUMN STATS 
iceberg_v2_positional_not_all_data_files_have_delete_files_orc;
+---- RESULTS
+'i','INT',4,0,4,4,-1,-1
+'s','STRING',4,0,1,1,-1,-1
+---- TYPES
+STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
+====
diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py
index 141808c67..8515fb910 100644
--- a/tests/query_test/test_iceberg.py
+++ b/tests/query_test/test_iceberg.py
@@ -1545,6 +1545,14 @@ class TestIcebergV2Table(IcebergTestSuite):
     self.run_test_case('QueryTest/iceberg-v2-read-position-deletes-stats', 
vector)
     self.run_test_case('QueryTest/iceberg-v2-read-position-deletes-orc-stats', 
vector)
 
+  @SkipIfDockerizedCluster.internal_hostname
+  @SkipIf.hardcoded_uris
+  @pytest.mark.execute_serially
+  def test_compute_stats_table_sampling(self, vector):
+    """Tests COMPUTE STATS with table sampling."""
+    vector.get_value('exec_option')['COMPUTE_STATS_MIN_SAMPLE_SIZE'] = 0
+    self.run_test_case('QueryTest/iceberg-v2-compute-stats-table-sampling', 
vector)
+
   @SkipIfFS.hive
   def test_read_mixed_format_position_deletes(self, vector, unique_database):
     self.run_test_case('QueryTest/iceberg-mixed-format-position-deletes',

(impala) 02/04: IMPALA-14014: Fix COMPUTE STATS with TABLESAMPLE clause

Reply via email to