This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 4640a72a81e49c45fb7629950befdd89edf82381 Author: Zoltan Borok-Nagy <[email protected]> AuthorDate: Fri May 9 14:41:06 2025 +0200 IMPALA-14014: Fix COMPUTE STATS with TABLESAMPLE clause COMPUTE STATS with TABLESAMPLE clause did a full scan on Iceberg tables since IMPALA-13737, because before this patch ComputeStatsStmt used FeFsTable.Utils.getFilesSample() which only works correctly on FS tables that have the file descriptors loaded. Since IMPALA-13737 the internal FS table of an Iceberg table doesn't have file descriptor information, therefore FeFsTable.Utils.getFilesSample() returned an empty map which turned off table sampling for COMPUTE STATS. We did not have proper testing for COMPUTE STATS with table sampling therefore we did not catch the regression. This patch adds proper table sampling logic for Iceberg tables that can be used for COMPUTE STATS. The algorithm previously found in IcebergScanNode.getFilesSample() has been moved to FeIcebergTable.Utils.getFilesSample(). Testing * added e2e tests Change-Id: Ie59d5fc1374ab69209a74f2488bcb9a7d510b782 Reviewed-on: http://gerrit.cloudera.org:8080/22873 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../apache/impala/analysis/ComputeStatsStmt.java | 16 +- .../java/org/apache/impala/analysis/TableRef.java | 2 +- .../java/org/apache/impala/catalog/FeFsTable.java | 6 + .../org/apache/impala/catalog/FeIcebergTable.java | 101 ++++++++- .../impala/catalog/IcebergContentFileStore.java | 6 + .../org/apache/impala/planner/HdfsScanNode.java | 4 +- .../apache/impala/planner/IcebergDeleteNode.java | 27 ++- .../org/apache/impala/planner/IcebergScanNode.java | 42 +--- .../org/apache/impala/analysis/AnalyzeDDLTest.java | 12 +- .../apache/impala/analysis/AnalyzeStmtsTest.java | 16 +- .../org/apache/impala/planner/PlannerTest.java | 11 +- .../functional/functional_schema_template.sql | 9 +- .../PlannerTest/iceberg-merge-insert-only.test | 10 +- .../PlannerTest/iceberg-v2-tables-resources.test | 128 +++++------ .../queries/PlannerTest/tablesample-iceberg.test | 206 ++++++++++++++++++ .../queries/PlannerTest/tablesample.test | 148 ------------- .../iceberg-v2-compute-stats-table-sampling.test | 234 +++++++++++++++++++++ tests/query_test/test_iceberg.py | 8 + 18 files changed, 693 insertions(+), 293 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java b/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java index bd2dd195a..952126a07 100644 --- a/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java @@ -148,7 +148,7 @@ public class ComputeStatsStmt extends StatementBase implements SingleTableStmt { protected FeTable table_; // Effective sampling percent based on the total number of bytes in the files sample. - // Set to -1 for non-HDFS tables or if TABLESAMPLE was not specified. + // Set to -1 for non-FS tables or if TABLESAMPLE was not specified. // We run the regular COMPUTE STATS for 0.0 and 1.0 where sampling has no benefit. protected double effectiveSamplePerc_ = -1; @@ -805,10 +805,10 @@ public class ComputeStatsStmt extends StatementBase implements SingleTableStmt { private String analyzeTableSampleClause(Analyzer analyzer) throws AnalysisException { if (sampleParams_ == null) return ""; if (!(table_ instanceof FeFsTable)) { - throw new AnalysisException("TABLESAMPLE is only supported on HDFS tables."); + throw new AnalysisException("TABLESAMPLE is only supported on file-based tables."); } - FeFsTable hdfsTable = (FeFsTable) table_; - if (!FeFsTable.Utils.isStatsExtrapolationEnabled(hdfsTable)) { + FeFsTable feFsTable = (FeFsTable) table_; + if (!FeFsTable.Utils.isStatsExtrapolationEnabled(feFsTable)) { throw new AnalysisException(String.format( "COMPUTE STATS TABLESAMPLE requires stats extrapolation which is disabled.\n" + "Stats extrapolation can be enabled service-wide with %s=true or by altering " + @@ -827,17 +827,15 @@ public class ComputeStatsStmt extends StatementBase implements SingleTableStmt { // Compute the sample of files and set 'sampleFileBytes_'. long minSampleBytes = analyzer.getQueryOptions().compute_stats_min_sample_size; long samplePerc = sampleParams_.getPercentBytes(); - // TODO(todd): can we avoid loading all the partitions for this? - Collection<? extends FeFsPartition> partitions = hdfsTable.loadAllPartitions(); - Map<Long, List<FileDescriptor>> sample = FeFsTable.Utils.getFilesSample( - hdfsTable, partitions, samplePerc, minSampleBytes, sampleSeed); + Map<Long, List<FileDescriptor>> sample = feFsTable.getFilesSample( + samplePerc, minSampleBytes, sampleSeed); long sampleFileBytes = 0; for (List<FileDescriptor> fds: sample.values()) { for (FileDescriptor fd: fds) sampleFileBytes += fd.getFileLength(); } // Compute effective sampling percent. - long totalFileBytes = ((FeFsTable)table_).getTotalHdfsBytes(); + long totalFileBytes = feFsTable.getTotalHdfsBytes(); if (totalFileBytes > 0) { effectiveSamplePerc_ = (double) sampleFileBytes / (double) totalFileBytes; } else { diff --git a/fe/src/main/java/org/apache/impala/analysis/TableRef.java b/fe/src/main/java/org/apache/impala/analysis/TableRef.java index a4091928b..c372d93c2 100644 --- a/fe/src/main/java/org/apache/impala/analysis/TableRef.java +++ b/fe/src/main/java/org/apache/impala/analysis/TableRef.java @@ -482,7 +482,7 @@ public class TableRef extends StmtNode { if (!(this instanceof BaseTableRef) || !(resolvedPath_.destTable() instanceof FeFsTable)) { throw new AnalysisException( - "TABLESAMPLE is only supported on HDFS tables: " + getUniqueAlias()); + "TABLESAMPLE is only supported on file-based tables: " + getUniqueAlias()); } } diff --git a/fe/src/main/java/org/apache/impala/catalog/FeFsTable.java b/fe/src/main/java/org/apache/impala/catalog/FeFsTable.java index a3f3d0a92..034b3439d 100644 --- a/fe/src/main/java/org/apache/impala/catalog/FeFsTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/FeFsTable.java @@ -573,6 +573,12 @@ public interface FeFsTable extends FeTable { return result; } + default Map<Long, List<FileDescriptor>> getFilesSample( + long percentBytes, long minSampleBytes, long randomSeed) { + return Utils.getFilesSample(this, loadAllPartitions(), percentBytes, minSampleBytes, + randomSeed); + } + /** * Utility functions for operating on FeFsTable. When we move fully to Java 8, * these can become default methods of the interface. diff --git a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java index 4366a2355..51d68ad5b 100644 --- a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java @@ -30,6 +30,7 @@ import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Random; import java.util.Set; import java.util.TreeMap; import java.util.stream.Collectors; @@ -236,7 +237,7 @@ public interface FeIcebergTable extends FeFsTable { @Override default long getTotalHdfsBytes() { - return getFeFsTable().getTotalHdfsBytes(); + return getTTableStats().getTotal_file_bytes(); } @Override @@ -328,6 +329,48 @@ public interface FeIcebergTable extends FeFsTable { return false; } + @Override /* FeFsTable */ + default Map<Long, List<FileDescriptor>> getFilesSample( + long percentBytes, long minSampleBytes, long randomSeed) { + // There will be two separate IcebergScanNodes for data files without delete, and for + // data files with deletes, which means they will be sampled independently. Let's also + // sample them separately here. + Map<Long, List<FileDescriptor>> dataFilesWithoutDeletesSample = Utils.getFilesSample( + this, getContentFileStore().getDataFilesWithoutDeletes(), false, + percentBytes, minSampleBytes, randomSeed); + Map<Long, List<FileDescriptor>> dataFilesWithDeletesSample = Utils.getFilesSample( + this, getContentFileStore().getDataFilesWithDeletes(), false, + percentBytes, minSampleBytes, randomSeed); + + Map<Long, List<FileDescriptor>> mergedResult = new HashMap<>(); + mergedResult.putAll(dataFilesWithoutDeletesSample); + for (Map.Entry<Long, List<FileDescriptor>> entry : + dataFilesWithDeletesSample.entrySet()) { + List<FileDescriptor> fds = mergedResult.get(entry.getKey()); + if (fds != null) { + fds.addAll(entry.getValue()); + } else { + mergedResult.put(entry.getKey(), entry.getValue()); + } + } + + // There is no need to add the delete files if there are no data files. + if (mergedResult.isEmpty()) return mergedResult; + + // We should have only a single element in the map as there is only a single + // partition in the table. + Preconditions.checkState(mergedResult.size() == 1); + + // We don't sample delete files (for correctness), let's add all of them to + // the merged result. + for (Map.Entry<Long, List<FileDescriptor>> entry : mergedResult.entrySet()) { + for (FileDescriptor fd : getContentFileStore().getAllDeleteFiles()) { + entry.getValue().add(fd); + } + } + return mergedResult; + } + THdfsTable transformToTHdfsTable(boolean updatePartitionFlag, ThriftObjectType type); /** @@ -701,6 +744,62 @@ public interface FeIcebergTable extends FeFsTable { return fileDescMap; } + /** + * Return a sample of data files (choosing from 'fileDescs') according to the + * parameters. + * @filesAreSorted if true then the file descriptors are already sorted + * @percentBytes percent of the total number of bytes we want to sample at least. + * @minSampleBytes minimum number of bytes need to be selected. + * @randomSeed random seed for repeatable sampling. + * The algorithm is based on FeFsTable.Utils.getFilesSample() + */ + public static Map<Long, List<FileDescriptor>> getFilesSample( + FeIcebergTable iceTbl, Iterable<? extends FileDescriptor> fileDescs, + boolean filesAreSorted, + long percentBytes, long minSampleBytes, long randomSeed) { + Preconditions.checkState(percentBytes >= 0 && percentBytes <= 100); + Preconditions.checkState(minSampleBytes >= 0); + + // Ensure a consistent ordering of files for repeatable runs. + List<FileDescriptor> orderedFds = Lists.newArrayList(fileDescs); + if (!filesAreSorted) { + Collections.sort(orderedFds); + } + + List<FeFsPartition> partitions = new ArrayList<>( + iceTbl.getFeFsTable().loadAllPartitions()); + Preconditions.checkState(partitions.size() == 1); + FeFsPartition part = partitions.get(0); + + long totalBytes = 0; + for (FileDescriptor fd : orderedFds) { + totalBytes += fd.getFileLength(); + } + + int numFilesRemaining = orderedFds.size(); + double fracPercentBytes = (double) percentBytes / 100; + long targetBytes = (long) Math.round(totalBytes * fracPercentBytes); + targetBytes = Math.max(targetBytes, minSampleBytes); + + // Randomly select files until targetBytes has been reached or all files have been + // selected. + Random rnd = new Random(randomSeed); + long selectedBytes = 0; + List<FileDescriptor> sampleFiles = Lists.newArrayList(); + while (selectedBytes < targetBytes && numFilesRemaining > 0) { + int selectedIdx = rnd.nextInt(numFilesRemaining); + FileDescriptor fd = orderedFds.get(selectedIdx); + sampleFiles.add(fd); + selectedBytes += fd.getFileLength(); + // Avoid selecting the same file multiple times. + orderedFds.set(selectedIdx, orderedFds.get(numFilesRemaining - 1)); + --numFilesRemaining; + } + Map<Long, List<FileDescriptor>> result = new HashMap<>(); + result.put(part.getId(), sampleFiles); + return result; + } + /** * Get FileDescriptor by data file location */ diff --git a/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java b/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java index c857897b6..d9a69c1e2 100644 --- a/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java +++ b/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java @@ -275,6 +275,12 @@ public class IcebergContentFileStore { dataFilesWithDeletes_.getList()); } + public Iterable<IcebergFileDescriptor> getAllDeleteFiles() { + return Iterables.concat( + positionDeleteFiles_.getList(), + equalityDeleteFiles_.getList()); + } + public boolean hasAvro() { return hasAvro_; } public boolean hasOrc() { return hasOrc_; } public boolean hasParquet() { return hasParquet_; } diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java index 446add178..ed47bd1f6 100644 --- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java +++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java @@ -223,7 +223,7 @@ public class HdfsScanNode extends ScanNode { // An estimate of the width of a row when the information is not available. private double DEFAULT_ROW_WIDTH_ESTIMATE = 1.0; - private final FeFsTable tbl_; + protected final FeFsTable tbl_; // List of partitions to be scanned. Partitions have been pruned. protected final List<FeFsPartition> partitions_; @@ -2720,6 +2720,8 @@ public class HdfsScanNode extends ScanNode { return super.isTableMissingTableStats(); } + public TableSampleClause getSampleParams() { return sampleParams_; } + @Override public boolean hasCorruptTableStats() { return hasCorruptTableStats_; } diff --git a/fe/src/main/java/org/apache/impala/planner/IcebergDeleteNode.java b/fe/src/main/java/org/apache/impala/planner/IcebergDeleteNode.java index ab38561b2..198630b3e 100644 --- a/fe/src/main/java/org/apache/impala/planner/IcebergDeleteNode.java +++ b/fe/src/main/java/org/apache/impala/planner/IcebergDeleteNode.java @@ -26,6 +26,7 @@ import org.apache.impala.analysis.BinaryPredicate; import org.apache.impala.analysis.Expr; import org.apache.impala.analysis.ExprSubstitutionMap; import org.apache.impala.analysis.JoinOperator; +import org.apache.impala.analysis.TableSampleClause; import org.apache.impala.catalog.Type; import org.apache.impala.common.ImpalaException; import org.apache.impala.common.Pair; @@ -94,14 +95,26 @@ public class IcebergDeleteNode extends JoinNode { // Also assume that the left side's selectivity applies to the delete records as well. // Please note that left side's cardinality already takes the selectivity into // account (i.e. no need to do leftSelectivity * leftCard). - long leftCardWithSelectivity = getChild(0).cardinality_; + PlanNode leftChild = getChild(0); + Preconditions.checkState(leftChild instanceof HdfsScanNode); + HdfsScanNode leftScanChild = (HdfsScanNode) leftChild; + TableSampleClause leftSampleParams = leftScanChild.getSampleParams(); + long leftCardWithSelectivity = leftScanChild.cardinality_; long rightCard = getChild(1).cardinality_; - // Both sides should have non-zero cardinalities. - Preconditions.checkState(leftCardWithSelectivity > 0); - Preconditions.checkState(rightCard > 0); - double leftSelectivity = getChild(0).computeSelectivity(); - long rightCardWithSelectivity = (long)(leftSelectivity * rightCard); - cardinality_ = Math.max(1, leftCardWithSelectivity - rightCardWithSelectivity); + // Both sides should have non-negative cardinalities. + Preconditions.checkState(leftCardWithSelectivity >= 0); + Preconditions.checkState(rightCard >= 0); + // The delete records on the right might refer to data records that are filtered + // out by predicates or table sampling. Let's incorporate this into our cardinality + // estimation. + double leftSelectivity = leftScanChild.computeSelectivity(); + long effectiveRightCardinality = (long)(leftSelectivity * rightCard); + double leftSampling = leftSampleParams == null ? + 1.0 : leftSampleParams.getPercentBytes() / 100.0; + Preconditions.checkState(leftSampling >= 0); + Preconditions.checkState(leftSampling <= 1.0); + effectiveRightCardinality = (long)(effectiveRightCardinality * leftSampling); + cardinality_ = Math.max(1, leftCardWithSelectivity - effectiveRightCardinality); } @Override diff --git a/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java b/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java index b59d51519..0c1223521 100644 --- a/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java +++ b/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java @@ -204,50 +204,12 @@ public class IcebergScanNode extends HdfsScanNode { /** * Returns a sample of file descriptors associated to this scan node. - * The algorithm is based on FeFsTable.Utils.getFilesSample() */ @Override protected Map<Long, List<FileDescriptor>> getFilesSample( long percentBytes, long minSampleBytes, long randomSeed) { - Preconditions.checkState(percentBytes >= 0 && percentBytes <= 100); - Preconditions.checkState(minSampleBytes >= 0); - - // Ensure a consistent ordering of files for repeatable runs. - List<FileDescriptor> orderedFds = Lists.newArrayList(fileDescs_); - if (!filesAreSorted_) { - Collections.sort(orderedFds); - } - - Preconditions.checkState(partitions_.size() == 1); - FeFsPartition part = partitions_.get(0); - - long totalBytes = 0; - for (FileDescriptor fd : orderedFds) { - totalBytes += fd.getFileLength(); - } - - int numFilesRemaining = orderedFds.size(); - double fracPercentBytes = (double) percentBytes / 100; - long targetBytes = (long) Math.round(totalBytes * fracPercentBytes); - targetBytes = Math.max(targetBytes, minSampleBytes); - - // Randomly select files until targetBytes has been reached or all files have been - // selected. - Random rnd = new Random(randomSeed); - long selectedBytes = 0; - List<FileDescriptor> sampleFiles = Lists.newArrayList(); - while (selectedBytes < targetBytes && numFilesRemaining > 0) { - int selectedIdx = rnd.nextInt(numFilesRemaining); - FileDescriptor fd = orderedFds.get(selectedIdx); - sampleFiles.add(fd); - selectedBytes += fd.getFileLength(); - // Avoid selecting the same file multiple times. - orderedFds.set(selectedIdx, orderedFds.get(numFilesRemaining - 1)); - --numFilesRemaining; - } - Map<Long, List<FileDescriptor>> result = new HashMap<>(); - result.put(part.getId(), sampleFiles); - return result; + return FeIcebergTable.Utils.getFilesSample((FeIcebergTable) tbl_, fileDescs_, + filesAreSorted_, percentBytes, minSampleBytes, randomSeed); } @Override diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java index 47a64bcc5..9cd1eee25 100644 --- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java @@ -1940,12 +1940,12 @@ public class AnalyzeDDLTest extends FrontendTestBase { "Invalid percent of bytes value '101'. " + "The percent of bytes to sample must be between 0 and 100."); AnalysisError("compute stats functional_kudu.alltypes tablesample system (1)", - "TABLESAMPLE is only supported on HDFS tables."); + "TABLESAMPLE is only supported on file-based tables."); AnalysisError("compute stats functional_hbase.alltypes tablesample system (2)", - "TABLESAMPLE is only supported on HDFS tables."); + "TABLESAMPLE is only supported on file-based tables."); AnalysisError( "compute stats functional.alltypes_datasource tablesample system (3)", - "TABLESAMPLE is only supported on HDFS tables."); + "TABLESAMPLE is only supported on file-based tables."); // Test file formats with columns whitelist. gflags.setEnable_stats_extrapolation(true); @@ -1957,12 +1957,12 @@ public class AnalyzeDDLTest extends FrontendTestBase { "Invalid percent of bytes value '101'. " + "The percent of bytes to sample must be between 0 and 100."); AnalysisError("compute stats functional_kudu.alltypes tablesample system (1)", - "TABLESAMPLE is only supported on HDFS tables."); + "TABLESAMPLE is only supported on file-based tables."); AnalysisError("compute stats functional_hbase.alltypes tablesample system (2)", - "TABLESAMPLE is only supported on HDFS tables."); + "TABLESAMPLE is only supported on file-based tables."); AnalysisError( "compute stats functional.alltypes_datasource tablesample system (3)", - "TABLESAMPLE is only supported on HDFS tables."); + "TABLESAMPLE is only supported on file-based tables."); // Test different COMPUTE_STATS_MIN_SAMPLE_BYTES. TQueryOptions queryOpts = new TQueryOptions(); diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java index abb1e71bc..87fafd003 100644 --- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java @@ -393,27 +393,27 @@ public class AnalyzeStmtsTest extends AnalyzerTest { // Only applicable to HDFS base table refs. AnalysisError("select * from functional_kudu.alltypes tablesample system (10)", - "TABLESAMPLE is only supported on HDFS tables: functional_kudu.alltypes"); + "TABLESAMPLE is only supported on file-based tables: functional_kudu.alltypes"); AnalysisError("select * from functional_hbase.alltypes tablesample system (10)", - "TABLESAMPLE is only supported on HDFS tables: functional_hbase.alltypes"); + "TABLESAMPLE is only supported on file-based tables: functional_hbase.alltypes"); AnalysisError("select * from functional.alltypes_datasource tablesample system (10)", - "TABLESAMPLE is only supported on HDFS tables: " + + "TABLESAMPLE is only supported on file-based tables: " + "functional.alltypes_datasource"); AnalysisError("select * from (select * from functional.alltypes) v " + "tablesample system (10)", - "TABLESAMPLE is only supported on HDFS tables: v"); + "TABLESAMPLE is only supported on file-based tables: v"); AnalysisError("with v as (select * from functional.alltypes) " + "select * from v tablesample system (10)", - "TABLESAMPLE is only supported on HDFS tables: v"); + "TABLESAMPLE is only supported on file-based tables: v"); AnalysisError("select * from functional.alltypes_view tablesample system (10)", - "TABLESAMPLE is only supported on HDFS tables: functional.alltypes_view"); + "TABLESAMPLE is only supported on file-based tables: functional.alltypes_view"); AnalysisError("select * from functional.allcomplextypes.int_array_col " + "tablesample system (10)", - "TABLESAMPLE is only supported on HDFS tables: " + + "TABLESAMPLE is only supported on file-based tables: " + "functional.allcomplextypes.int_array_col"); AnalysisError("select * from functional.allcomplextypes a, a.int_array_col " + "tablesample system (10)", - "TABLESAMPLE is only supported on HDFS tables: a.int_array_col"); + "TABLESAMPLE is only supported on file-based tables: a.int_array_col"); } /** diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java index a4a54a44e..258040600 100644 --- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java +++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java @@ -1000,9 +1000,18 @@ public class PlannerTest extends PlannerTestBase { public void testTableSample() { TQueryOptions options = defaultQueryOptions(); runPlannerTestFile("tablesample", options, + ImmutableSet.of(PlannerTestOption.EXTENDED_EXPLAIN, + PlannerTestOption.DO_NOT_VALIDATE_ROWCOUNT_ESTIMATION_FOR_PARTITIONS)); + } + + @Test + public void testTableSampleIceberg() { + TQueryOptions options = defaultQueryOptions(); + runPlannerTestFile("tablesample-iceberg", options, ImmutableSet.of(PlannerTestOption.EXTENDED_EXPLAIN, PlannerTestOption.DO_NOT_VALIDATE_ROWCOUNT_ESTIMATION_FOR_PARTITIONS, - PlannerTestOption.VALIDATE_ICEBERG_SNAPSHOT_IDS)); + PlannerTestOption.VALIDATE_ICEBERG_SNAPSHOT_IDS, + PlannerTestOption.VALIDATE_CARDINALITY)); } @Test diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql index 2db189abc..d002e934a 100644 --- a/testdata/datasets/functional/functional_schema_template.sql +++ b/testdata/datasets/functional/functional_schema_template.sql @@ -3213,7 +3213,8 @@ iceberg_partitioned CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} STORED AS ICEBERG LOCATION '/test-warehouse/iceberg_test/iceberg_partitioned' -TBLPROPERTIES('write.format.default'='parquet', 'iceberg.catalog'='hadoop.tables'); +TBLPROPERTIES('write.format.default'='parquet', 'iceberg.catalog'='hadoop.tables', + 'impala.enable.stats.extrapolation'='true'); ---- DEPENDENT_LOAD `hadoop fs -mkdir -p /test-warehouse/iceberg_test && \ hadoop fs -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/iceberg_partitioned /test-warehouse/iceberg_test/ @@ -3226,7 +3227,8 @@ iceberg_non_partitioned CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} STORED AS ICEBERG LOCATION '/test-warehouse/iceberg_test/iceberg_non_partitioned' -TBLPROPERTIES('write.format.default'='parquet', 'iceberg.catalog'='hadoop.tables'); +TBLPROPERTIES('write.format.default'='parquet', 'iceberg.catalog'='hadoop.tables', + 'impala.enable.stats.extrapolation'='true'); ---- DEPENDENT_LOAD `hadoop fs -mkdir -p /test-warehouse/iceberg_test && \ hadoop fs -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/iceberg_non_partitioned /test-warehouse/iceberg_test/ @@ -3627,6 +3629,7 @@ STORED AS ICEBERG TBLPROPERTIES('write.format.default'='parquet', 'iceberg.catalog'='hadoop.catalog', 'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog', 'iceberg.table_identifier'='ice.iceberg_v2_delete_equality_partitioned', + 'impala.enable.stats.extrapolation'='true', 'format-version'='2'); ---- DEPENDENT_LOAD `hadoop fs -mkdir -p /test-warehouse/iceberg_test/hadoop_catalog/ice && \ @@ -3764,6 +3767,7 @@ STORED AS ICEBERG TBLPROPERTIES('iceberg.catalog'='hadoop.catalog', 'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog', 'iceberg.table_identifier'='ice.iceberg_v2_positional_not_all_data_files_have_delete_files', + 'impala.enable.stats.extrapolation'='true', 'format-version'='2'); ---- DEPENDENT_LOAD `hadoop fs -mkdir -p /test-warehouse/iceberg_test/hadoop_catalog/ice && \ @@ -3779,6 +3783,7 @@ STORED AS ICEBERG TBLPROPERTIES('iceberg.catalog'='hadoop.catalog', 'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog', 'iceberg.table_identifier'='ice.iceberg_v2_positional_not_all_data_files_have_delete_files_orc', + 'impala.enable.stats.extrapolation'='true', 'format-version'='2', 'write.format.default'='orc'); ---- DEPENDENT_LOAD `hadoop fs -mkdir -p /test-warehouse/iceberg_test/hadoop_catalog/ice && \ diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-merge-insert-only.test b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-merge-insert-only.test index 06f528f52..5ff3e2297 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-merge-insert-only.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-merge-insert-only.test @@ -42,7 +42,7 @@ WRITE TO HDFS [functional_parquet.iceberg_v2_no_deletes, OVERWRITE=false] | stored statistics: | table: rows=20 size=22.90KB | columns: unavailable -| extrapolated-rows=disabled max-scan-range-rows=1 +| extrapolated-rows=unavailable max-scan-range-rows=1 | mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 | tuple-ids=1 row-size=44B cardinality=20 | in pipelines: 01(GETNEXT) @@ -103,7 +103,7 @@ WRITE TO HDFS [functional_parquet.iceberg_v2_no_deletes, OVERWRITE=false] | stored statistics: | table: rows=20 size=22.90KB | columns: unavailable -| extrapolated-rows=disabled max-scan-range-rows=1 +| extrapolated-rows=unavailable max-scan-range-rows=1 | mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 | tuple-ids=1 row-size=44B cardinality=20 | in pipelines: 01(GETNEXT) @@ -180,7 +180,7 @@ WRITE TO HDFS [functional_parquet.iceberg_v2_no_deletes, OVERWRITE=false] | stored statistics: | table: rows=20 size=22.90KB | columns: unavailable -| extrapolated-rows=disabled max-scan-range-rows=1 +| extrapolated-rows=unavailable max-scan-range-rows=1 | mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 | tuple-ids=1 row-size=44B cardinality=20 | in pipelines: 01(GETNEXT) @@ -249,7 +249,7 @@ WRITE TO HDFS [functional_parquet.iceberg_v2_no_deletes, OVERWRITE=false] | stored statistics: | table: rows=20 size=22.90KB | columns: unavailable -| extrapolated-rows=disabled max-scan-range-rows=1 +| extrapolated-rows=unavailable max-scan-range-rows=1 | mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 | tuple-ids=1 row-size=44B cardinality=20 | in pipelines: 01(GETNEXT) @@ -346,4 +346,4 @@ WRITE TO HDFS [functional_parquet.iceberg_partition_transforms_zorder, OVERWRITE mem-estimate=96.00MB mem-reservation=48.00KB thread-reservation=1 tuple-ids=0 row-size=52B cardinality=1 in pipelines: 00(GETNEXT) -==== \ No newline at end of file +==== diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables-resources.test b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables-resources.test index 3d8951f5e..d8011b34c 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables-resources.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables-resources.test @@ -475,7 +475,7 @@ PLAN-ROOT SINK | | stored statistics: | | table: rows=4 size=5.33KB | | columns: all -| | extrapolated-rows=disabled max-scan-range-rows=2 +| | extrapolated-rows=unavailable max-scan-range-rows=2 | | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | | tuple-ids=1 row-size=267B cardinality=4 | | in pipelines: 01(GETNEXT) @@ -486,7 +486,7 @@ PLAN-ROOT SINK | stored statistics: | table: rows=10 size=7.77KB | columns missing stats: i, s -| extrapolated-rows=disabled max-scan-range-rows=5 +| extrapolated-rows=unavailable max-scan-range-rows=5 | mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 | tuple-ids=0 row-size=36B cardinality=6 | in pipelines: 00(GETNEXT) @@ -497,7 +497,7 @@ PLAN-ROOT SINK stored statistics: table: rows=10 size=7.77KB columns missing stats: i, s - extrapolated-rows=disabled max-scan-range-rows=5 + extrapolated-rows=unavailable max-scan-range-rows=5 mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 tuple-ids=0 row-size=36B cardinality=4 in pipelines: 03(GETNEXT) @@ -542,7 +542,7 @@ Per-Host Resources: mem-estimate=64.17MB mem-reservation=32.76KB thread-reservat | | stored statistics: | | table: rows=4 size=5.33KB | | columns: all -| | extrapolated-rows=disabled max-scan-range-rows=2 +| | extrapolated-rows=unavailable max-scan-range-rows=2 | | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | | tuple-ids=1 row-size=267B cardinality=4 | | in pipelines: 01(GETNEXT) @@ -553,7 +553,7 @@ Per-Host Resources: mem-estimate=64.17MB mem-reservation=32.76KB thread-reservat | stored statistics: | table: rows=10 size=7.77KB | columns missing stats: i, s -| extrapolated-rows=disabled max-scan-range-rows=5 +| extrapolated-rows=unavailable max-scan-range-rows=5 | mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 | tuple-ids=0 row-size=36B cardinality=6 | in pipelines: 00(GETNEXT) @@ -564,7 +564,7 @@ Per-Host Resources: mem-estimate=64.17MB mem-reservation=32.76KB thread-reservat stored statistics: table: rows=10 size=7.77KB columns missing stats: i, s - extrapolated-rows=disabled max-scan-range-rows=5 + extrapolated-rows=unavailable max-scan-range-rows=5 mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 tuple-ids=0 row-size=36B cardinality=4 in pipelines: 03(GETNEXT) @@ -595,7 +595,7 @@ PLAN-ROOT SINK | | stored statistics: | | table: rows=4 size=5.33KB | | columns: all -| | extrapolated-rows=disabled max-scan-range-rows=2 +| | extrapolated-rows=unavailable max-scan-range-rows=2 | | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | | tuple-ids=1 row-size=267B cardinality=4 | | in pipelines: 01(GETNEXT) @@ -606,7 +606,7 @@ PLAN-ROOT SINK | stored statistics: | table: rows=10 size=7.77KB | columns missing stats: i, s -| extrapolated-rows=disabled max-scan-range-rows=5 +| extrapolated-rows=unavailable max-scan-range-rows=5 | mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 | tuple-ids=0 row-size=36B cardinality=6 | in pipelines: 00(GETNEXT) @@ -617,7 +617,7 @@ PLAN-ROOT SINK stored statistics: table: rows=10 size=7.77KB columns missing stats: i, s - extrapolated-rows=disabled max-scan-range-rows=5 + extrapolated-rows=unavailable max-scan-range-rows=5 mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 tuple-ids=0 row-size=36B cardinality=4 in pipelines: 03(GETNEXT) @@ -660,7 +660,7 @@ Per-Host Resources: mem-estimate=64.17MB mem-reservation=32.76KB thread-reservat | | stored statistics: | | table: rows=4 size=5.33KB | | columns: all -| | extrapolated-rows=disabled max-scan-range-rows=2 +| | extrapolated-rows=unavailable max-scan-range-rows=2 | | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | | tuple-ids=1 row-size=267B cardinality=4 | | in pipelines: 01(GETNEXT) @@ -671,7 +671,7 @@ Per-Host Resources: mem-estimate=64.17MB mem-reservation=32.76KB thread-reservat | stored statistics: | table: rows=10 size=7.77KB | columns missing stats: i, s -| extrapolated-rows=disabled max-scan-range-rows=5 +| extrapolated-rows=unavailable max-scan-range-rows=5 | mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 | tuple-ids=0 row-size=36B cardinality=6 | in pipelines: 00(GETNEXT) @@ -682,7 +682,7 @@ Per-Host Resources: mem-estimate=64.17MB mem-reservation=32.76KB thread-reservat stored statistics: table: rows=10 size=7.77KB columns missing stats: i, s - extrapolated-rows=disabled max-scan-range-rows=5 + extrapolated-rows=unavailable max-scan-range-rows=5 mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 tuple-ids=0 row-size=36B cardinality=4 in pipelines: 03(GETNEXT) @@ -739,7 +739,7 @@ PLAN-ROOT SINK | stored statistics: | table: rows=1 size=2.63KB | columns: all -| extrapolated-rows=disabled max-scan-range-rows=1 +| extrapolated-rows=unavailable max-scan-range-rows=1 | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | tuple-ids=2 row-size=267B cardinality=1 | in pipelines: 01(GETNEXT) @@ -750,7 +750,7 @@ PLAN-ROOT SINK stored statistics: table: rows=10 size=7.77KB columns: all - extrapolated-rows=disabled max-scan-range-rows=10 + extrapolated-rows=unavailable max-scan-range-rows=10 mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 tuple-ids=0 row-size=20B cardinality=3 in pipelines: 00(GETNEXT) @@ -799,7 +799,7 @@ Per-Host Resources: mem-estimate=32.02MB mem-reservation=16.76KB thread-reservat | stored statistics: | table: rows=1 size=2.63KB | columns: all -| extrapolated-rows=disabled max-scan-range-rows=1 +| extrapolated-rows=unavailable max-scan-range-rows=1 | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | tuple-ids=2 row-size=267B cardinality=1 | in pipelines: 01(GETNEXT) @@ -810,7 +810,7 @@ Per-Host Resources: mem-estimate=32.02MB mem-reservation=16.76KB thread-reservat stored statistics: table: rows=10 size=7.77KB columns: all - extrapolated-rows=disabled max-scan-range-rows=10 + extrapolated-rows=unavailable max-scan-range-rows=10 mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 tuple-ids=0 row-size=20B cardinality=3 in pipelines: 00(GETNEXT) @@ -841,7 +841,7 @@ PLAN-ROOT SINK | stored statistics: | table: rows=1 size=2.63KB | columns: all -| extrapolated-rows=disabled max-scan-range-rows=1 +| extrapolated-rows=unavailable max-scan-range-rows=1 | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | tuple-ids=2 row-size=267B cardinality=1 | in pipelines: 01(GETNEXT) @@ -852,7 +852,7 @@ PLAN-ROOT SINK stored statistics: table: rows=10 size=7.77KB columns: all - extrapolated-rows=disabled max-scan-range-rows=10 + extrapolated-rows=unavailable max-scan-range-rows=10 mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 tuple-ids=0 row-size=20B cardinality=3 in pipelines: 00(GETNEXT) @@ -901,7 +901,7 @@ Per-Host Resources: mem-estimate=32.02MB mem-reservation=16.76KB thread-reservat | stored statistics: | table: rows=1 size=2.63KB | columns: all -| extrapolated-rows=disabled max-scan-range-rows=1 +| extrapolated-rows=unavailable max-scan-range-rows=1 | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | tuple-ids=2 row-size=267B cardinality=1 | in pipelines: 01(GETNEXT) @@ -912,7 +912,7 @@ Per-Host Resources: mem-estimate=32.02MB mem-reservation=16.76KB thread-reservat stored statistics: table: rows=10 size=7.77KB columns: all - extrapolated-rows=disabled max-scan-range-rows=10 + extrapolated-rows=unavailable max-scan-range-rows=10 mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 tuple-ids=0 row-size=20B cardinality=3 in pipelines: 00(GETNEXT) @@ -943,7 +943,7 @@ PLAN-ROOT SINK | stored statistics: | table: rows=4 size=5.33KB | columns: all -| extrapolated-rows=disabled max-scan-range-rows=2 +| extrapolated-rows=unavailable max-scan-range-rows=2 | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | tuple-ids=2 row-size=267B cardinality=4 | in pipelines: 01(GETNEXT) @@ -954,7 +954,7 @@ PLAN-ROOT SINK stored statistics: table: rows=10 size=7.77KB columns: all - extrapolated-rows=disabled max-scan-range-rows=5 + extrapolated-rows=unavailable max-scan-range-rows=5 mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 tuple-ids=0 row-size=20B cardinality=6 in pipelines: 00(GETNEXT) @@ -1003,7 +1003,7 @@ Per-Host Resources: mem-estimate=32.02MB mem-reservation=16.76KB thread-reservat | stored statistics: | table: rows=4 size=5.33KB | columns: all -| extrapolated-rows=disabled max-scan-range-rows=2 +| extrapolated-rows=unavailable max-scan-range-rows=2 | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | tuple-ids=2 row-size=267B cardinality=4 | in pipelines: 01(GETNEXT) @@ -1014,7 +1014,7 @@ Per-Host Resources: mem-estimate=32.02MB mem-reservation=16.76KB thread-reservat stored statistics: table: rows=10 size=7.77KB columns: all - extrapolated-rows=disabled max-scan-range-rows=5 + extrapolated-rows=unavailable max-scan-range-rows=5 mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 tuple-ids=0 row-size=20B cardinality=6 in pipelines: 00(GETNEXT) @@ -1248,7 +1248,7 @@ PLAN-ROOT SINK | | stored statistics: | | table: rows=4 size=5.33KB | | columns: all -| | extrapolated-rows=disabled max-scan-range-rows=2 +| | extrapolated-rows=unavailable max-scan-range-rows=2 | | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | | tuple-ids=1 row-size=267B cardinality=4 | | in pipelines: 01(GETNEXT) @@ -1260,7 +1260,7 @@ PLAN-ROOT SINK | stored statistics: | table: rows=10 size=7.77KB | columns missing stats: i, s -| extrapolated-rows=disabled max-scan-range-rows=5 +| extrapolated-rows=unavailable max-scan-range-rows=5 | parquet statistics predicates: i > CAST(2 AS INT) | parquet dictionary predicates: i > CAST(2 AS INT) | mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 @@ -1274,7 +1274,7 @@ PLAN-ROOT SINK stored statistics: table: rows=10 size=7.77KB columns missing stats: i, s - extrapolated-rows=disabled max-scan-range-rows=5 + extrapolated-rows=unavailable max-scan-range-rows=5 parquet statistics predicates: i > CAST(2 AS INT) parquet dictionary predicates: i > CAST(2 AS INT) mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 @@ -1319,7 +1319,7 @@ Per-Host Resources: mem-estimate=64.17MB mem-reservation=32.76KB thread-reservat | | stored statistics: | | table: rows=4 size=5.33KB | | columns: all -| | extrapolated-rows=disabled max-scan-range-rows=2 +| | extrapolated-rows=unavailable max-scan-range-rows=2 | | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | | tuple-ids=1 row-size=267B cardinality=4 | | in pipelines: 01(GETNEXT) @@ -1331,7 +1331,7 @@ Per-Host Resources: mem-estimate=64.17MB mem-reservation=32.76KB thread-reservat | stored statistics: | table: rows=10 size=7.77KB | columns missing stats: i, s -| extrapolated-rows=disabled max-scan-range-rows=5 +| extrapolated-rows=unavailable max-scan-range-rows=5 | parquet statistics predicates: i > CAST(2 AS INT) | parquet dictionary predicates: i > CAST(2 AS INT) | mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 @@ -1345,7 +1345,7 @@ Per-Host Resources: mem-estimate=64.17MB mem-reservation=32.76KB thread-reservat stored statistics: table: rows=10 size=7.77KB columns missing stats: i, s - extrapolated-rows=disabled max-scan-range-rows=5 + extrapolated-rows=unavailable max-scan-range-rows=5 parquet statistics predicates: i > CAST(2 AS INT) parquet dictionary predicates: i > CAST(2 AS INT) mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 @@ -1374,7 +1374,7 @@ PLAN-ROOT SINK | stored statistics: | table: rows=10 size=7.77KB | columns: unavailable -| extrapolated-rows=disabled max-scan-range-rows=10 +| extrapolated-rows=unavailable max-scan-range-rows=10 | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | tuple-ids=3 row-size=16B cardinality=3 | in pipelines: 06(GETNEXT) @@ -1403,7 +1403,7 @@ PLAN-ROOT SINK | | stored statistics: | | table: rows=4 size=5.33KB | | columns: all -| | extrapolated-rows=disabled max-scan-range-rows=2 +| | extrapolated-rows=unavailable max-scan-range-rows=2 | | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | | tuple-ids=5 row-size=267B cardinality=4 | | in pipelines: 01(GETNEXT) @@ -1414,7 +1414,7 @@ PLAN-ROOT SINK | stored statistics: | table: rows=10 size=7.77KB | columns missing stats: i, s -| extrapolated-rows=disabled max-scan-range-rows=5 +| extrapolated-rows=unavailable max-scan-range-rows=5 | mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 | tuple-ids=0 row-size=36B cardinality=6 | in pipelines: 00(GETNEXT) @@ -1425,7 +1425,7 @@ PLAN-ROOT SINK stored statistics: table: rows=10 size=7.77KB columns missing stats: i, s - extrapolated-rows=disabled max-scan-range-rows=5 + extrapolated-rows=unavailable max-scan-range-rows=5 mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 tuple-ids=0 row-size=36B cardinality=4 in pipelines: 03(GETNEXT) @@ -1462,7 +1462,7 @@ Per-Host Resources: mem-estimate=130.02MB mem-reservation=35.94MB thread-reserva | stored statistics: | table: rows=10 size=7.77KB | columns: unavailable -| extrapolated-rows=disabled max-scan-range-rows=10 +| extrapolated-rows=unavailable max-scan-range-rows=10 | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | tuple-ids=3 row-size=16B cardinality=3 | in pipelines: 06(GETNEXT) @@ -1511,7 +1511,7 @@ Per-Host Resources: mem-estimate=192.17MB mem-reservation=34.03MB thread-reserva | | stored statistics: | | table: rows=4 size=5.33KB | | columns: all -| | extrapolated-rows=disabled max-scan-range-rows=2 +| | extrapolated-rows=unavailable max-scan-range-rows=2 | | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | | tuple-ids=5 row-size=267B cardinality=4 | | in pipelines: 01(GETNEXT) @@ -1522,7 +1522,7 @@ Per-Host Resources: mem-estimate=192.17MB mem-reservation=34.03MB thread-reserva | stored statistics: | table: rows=10 size=7.77KB | columns missing stats: i, s -| extrapolated-rows=disabled max-scan-range-rows=5 +| extrapolated-rows=unavailable max-scan-range-rows=5 | mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 | tuple-ids=0 row-size=36B cardinality=6 | in pipelines: 00(GETNEXT) @@ -1533,7 +1533,7 @@ Per-Host Resources: mem-estimate=192.17MB mem-reservation=34.03MB thread-reserva stored statistics: table: rows=10 size=7.77KB columns missing stats: i, s - extrapolated-rows=disabled max-scan-range-rows=5 + extrapolated-rows=unavailable max-scan-range-rows=5 mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 tuple-ids=0 row-size=36B cardinality=4 in pipelines: 03(GETNEXT) @@ -1565,7 +1565,7 @@ PLAN-ROOT SINK | | stored statistics: | | table: rows=4 size=5.33KB | | columns: all -| | extrapolated-rows=disabled max-scan-range-rows=2 +| | extrapolated-rows=unavailable max-scan-range-rows=2 | | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | | tuple-ids=2 row-size=267B cardinality=4 | | in pipelines: 01(GETNEXT) @@ -1577,7 +1577,7 @@ PLAN-ROOT SINK | stored statistics: | table: rows=10 size=7.77KB | columns missing stats: i, s -| extrapolated-rows=disabled max-scan-range-rows=5 +| extrapolated-rows=unavailable max-scan-range-rows=5 | parquet dictionary predicates: CAST(i AS BIGINT) + CAST(1000 AS BIGINT) > CAST(1003 AS BIGINT) | mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 | tuple-ids=0 row-size=36B cardinality=1 @@ -1590,7 +1590,7 @@ PLAN-ROOT SINK stored statistics: table: rows=10 size=7.77KB columns missing stats: i, s - extrapolated-rows=disabled max-scan-range-rows=5 + extrapolated-rows=unavailable max-scan-range-rows=5 parquet dictionary predicates: CAST(i AS BIGINT) + CAST(1000 AS BIGINT) > CAST(1003 AS BIGINT) mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 tuple-ids=0 row-size=36B cardinality=1 @@ -1634,7 +1634,7 @@ Per-Host Resources: mem-estimate=64.17MB mem-reservation=32.76KB thread-reservat | | stored statistics: | | table: rows=4 size=5.33KB | | columns: all -| | extrapolated-rows=disabled max-scan-range-rows=2 +| | extrapolated-rows=unavailable max-scan-range-rows=2 | | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | | tuple-ids=2 row-size=267B cardinality=4 | | in pipelines: 01(GETNEXT) @@ -1646,7 +1646,7 @@ Per-Host Resources: mem-estimate=64.17MB mem-reservation=32.76KB thread-reservat | stored statistics: | table: rows=10 size=7.77KB | columns missing stats: i, s -| extrapolated-rows=disabled max-scan-range-rows=5 +| extrapolated-rows=unavailable max-scan-range-rows=5 | parquet dictionary predicates: CAST(i AS BIGINT) + CAST(1000 AS BIGINT) > CAST(1003 AS BIGINT) | mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 | tuple-ids=0 row-size=36B cardinality=1 @@ -1659,7 +1659,7 @@ Per-Host Resources: mem-estimate=64.17MB mem-reservation=32.76KB thread-reservat stored statistics: table: rows=10 size=7.77KB columns missing stats: i, s - extrapolated-rows=disabled max-scan-range-rows=5 + extrapolated-rows=unavailable max-scan-range-rows=5 parquet dictionary predicates: CAST(i AS BIGINT) + CAST(1000 AS BIGINT) > CAST(1003 AS BIGINT) mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 tuple-ids=0 row-size=36B cardinality=1 @@ -1751,7 +1751,7 @@ PLAN-ROOT SINK | | stored statistics: | | table: rows=4 size=5.33KB | | columns: all -| | extrapolated-rows=disabled max-scan-range-rows=2 +| | extrapolated-rows=unavailable max-scan-range-rows=2 | | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | | tuple-ids=4 row-size=267B cardinality=4 | | in pipelines: 01(GETNEXT) @@ -1763,7 +1763,7 @@ PLAN-ROOT SINK | stored statistics: | table: rows=10 size=7.77KB | columns missing stats: i, s -| extrapolated-rows=disabled max-scan-range-rows=5 +| extrapolated-rows=unavailable max-scan-range-rows=5 | mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 | tuple-ids=0 row-size=36B cardinality=6 | in pipelines: 00(GETNEXT) @@ -1775,7 +1775,7 @@ PLAN-ROOT SINK stored statistics: table: rows=10 size=7.77KB columns missing stats: i, s - extrapolated-rows=disabled max-scan-range-rows=5 + extrapolated-rows=unavailable max-scan-range-rows=5 mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 tuple-ids=0 row-size=36B cardinality=4 in pipelines: 03(GETNEXT) @@ -1903,7 +1903,7 @@ Per-Host Resources: mem-estimate=67.11MB mem-reservation=2.97MB thread-reservati | | stored statistics: | | table: rows=4 size=5.33KB | | columns: all -| | extrapolated-rows=disabled max-scan-range-rows=2 +| | extrapolated-rows=unavailable max-scan-range-rows=2 | | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | | tuple-ids=4 row-size=267B cardinality=4 | | in pipelines: 01(GETNEXT) @@ -1915,7 +1915,7 @@ Per-Host Resources: mem-estimate=67.11MB mem-reservation=2.97MB thread-reservati | stored statistics: | table: rows=10 size=7.77KB | columns missing stats: i, s -| extrapolated-rows=disabled max-scan-range-rows=5 +| extrapolated-rows=unavailable max-scan-range-rows=5 | mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 | tuple-ids=0 row-size=36B cardinality=6 | in pipelines: 00(GETNEXT) @@ -1927,7 +1927,7 @@ Per-Host Resources: mem-estimate=67.11MB mem-reservation=2.97MB thread-reservati stored statistics: table: rows=10 size=7.77KB columns missing stats: i, s - extrapolated-rows=disabled max-scan-range-rows=5 + extrapolated-rows=unavailable max-scan-range-rows=5 mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 tuple-ids=0 row-size=36B cardinality=4 in pipelines: 03(GETNEXT) @@ -2237,7 +2237,7 @@ PLAN-ROOT SINK | | stored statistics: | | table: rows=4 size=5.33KB | | columns: all -| | extrapolated-rows=disabled max-scan-range-rows=2 +| | extrapolated-rows=unavailable max-scan-range-rows=2 | | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | | tuple-ids=10 row-size=267B cardinality=4 | | in pipelines: 06(GETNEXT) @@ -2248,7 +2248,7 @@ PLAN-ROOT SINK | stored statistics: | table: rows=10 size=7.77KB | columns: all -| extrapolated-rows=disabled max-scan-range-rows=5 +| extrapolated-rows=unavailable max-scan-range-rows=5 | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | tuple-ids=4 row-size=20B cardinality=6 | in pipelines: 05(GETNEXT) @@ -2282,7 +2282,7 @@ PLAN-ROOT SINK | stored statistics: | table: rows=4 size=5.33KB | columns: all -| extrapolated-rows=disabled max-scan-range-rows=2 +| extrapolated-rows=unavailable max-scan-range-rows=2 | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | tuple-ids=7 row-size=267B cardinality=4 | in pipelines: 01(GETNEXT) @@ -2293,7 +2293,7 @@ PLAN-ROOT SINK stored statistics: table: rows=10 size=7.77KB columns: all - extrapolated-rows=disabled max-scan-range-rows=5 + extrapolated-rows=unavailable max-scan-range-rows=5 mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 tuple-ids=0 row-size=20B cardinality=6 in pipelines: 00(GETNEXT) @@ -2354,7 +2354,7 @@ PLAN-ROOT SINK | | stored statistics: | | table: rows=4 size=5.33KB | | columns: all -| | extrapolated-rows=disabled max-scan-range-rows=2 +| | extrapolated-rows=unavailable max-scan-range-rows=2 | | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | | tuple-ids=10 row-size=267B cardinality=4 | | in pipelines: 06(GETNEXT) @@ -2365,7 +2365,7 @@ PLAN-ROOT SINK | stored statistics: | table: rows=10 size=7.77KB | columns: all -| extrapolated-rows=disabled max-scan-range-rows=5 +| extrapolated-rows=unavailable max-scan-range-rows=5 | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | tuple-ids=4 row-size=20B cardinality=6 | in pipelines: 05(GETNEXT) @@ -2426,7 +2426,7 @@ Per-Host Resources: mem-estimate=32.02MB mem-reservation=16.76KB thread-reservat | stored statistics: | table: rows=4 size=5.33KB | columns: all -| extrapolated-rows=disabled max-scan-range-rows=2 +| extrapolated-rows=unavailable max-scan-range-rows=2 | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 | tuple-ids=7 row-size=267B cardinality=4 | in pipelines: 01(GETNEXT) @@ -2437,7 +2437,7 @@ Per-Host Resources: mem-estimate=32.02MB mem-reservation=16.76KB thread-reservat stored statistics: table: rows=10 size=7.77KB columns: all - extrapolated-rows=disabled max-scan-range-rows=5 + extrapolated-rows=unavailable max-scan-range-rows=5 mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 tuple-ids=0 row-size=20B cardinality=6 in pipelines: 00(GETNEXT) @@ -2872,7 +2872,7 @@ PLAN-ROOT SINK | | stored statistics: | | table: rows=3 size=1.33KB | | columns missing stats: s, d -| | extrapolated-rows=disabled max-scan-range-rows=1 +| | extrapolated-rows=unavailable max-scan-range-rows=1 | | mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=1 | | tuple-ids=2 row-size=24B cardinality=3 | | in pipelines: 01(GETNEXT) @@ -2883,7 +2883,7 @@ PLAN-ROOT SINK | stored statistics: | table: rows=8 size=4.81KB | columns missing stats: i, s, d -| extrapolated-rows=disabled max-scan-range-rows=4 +| extrapolated-rows=unavailable max-scan-range-rows=4 | mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 | tuple-ids=0 row-size=28B cardinality=5 | in pipelines: 00(GETNEXT) @@ -2894,7 +2894,7 @@ PLAN-ROOT SINK stored statistics: table: rows=8 size=4.81KB columns missing stats: i, s, d - extrapolated-rows=disabled max-scan-range-rows=4 + extrapolated-rows=unavailable max-scan-range-rows=4 mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 tuple-ids=0 row-size=28B cardinality=3 in pipelines: 03(GETNEXT) @@ -2938,7 +2938,7 @@ Per-Host Resources: mem-estimate=64.12MB mem-reservation=1.94MB thread-reservati | | stored statistics: | | table: rows=3 size=1.33KB | | columns missing stats: s, d -| | extrapolated-rows=disabled max-scan-range-rows=1 +| | extrapolated-rows=unavailable max-scan-range-rows=1 | | mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=1 | | tuple-ids=2 row-size=24B cardinality=3 | | in pipelines: 01(GETNEXT) @@ -2956,7 +2956,7 @@ Per-Host Resources: mem-estimate=64.12MB mem-reservation=1.94MB thread-reservati | stored statistics: | table: rows=8 size=4.81KB | columns missing stats: i, s, d -| extrapolated-rows=disabled max-scan-range-rows=4 +| extrapolated-rows=unavailable max-scan-range-rows=4 | mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 | tuple-ids=0 row-size=28B cardinality=5 | in pipelines: 00(GETNEXT) @@ -2967,7 +2967,7 @@ Per-Host Resources: mem-estimate=64.12MB mem-reservation=1.94MB thread-reservati stored statistics: table: rows=8 size=4.81KB columns missing stats: i, s, d - extrapolated-rows=disabled max-scan-range-rows=4 + extrapolated-rows=unavailable max-scan-range-rows=4 mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 tuple-ids=0 row-size=28B cardinality=3 in pipelines: 03(GETNEXT) diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tablesample-iceberg.test b/testdata/workloads/functional-planner/queries/PlannerTest/tablesample-iceberg.test new file mode 100644 index 000000000..30f84cf2d --- /dev/null +++ b/testdata/workloads/functional-planner/queries/PlannerTest/tablesample-iceberg.test @@ -0,0 +1,206 @@ +# Sampling Iceberg tables. +select * from functional_parquet.iceberg_non_partitioned tablesample system(10) repeatable(1234) +---- PLAN +F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +| Per-Host Resources: mem-estimate=68.00MB mem-reservation=4.03MB thread-reservation=2 +PLAN-ROOT SINK +| output exprs: functional_parquet.iceberg_non_partitioned.id, functional_parquet.iceberg_non_partitioned.user, functional_parquet.iceberg_non_partitioned.action, functional_parquet.iceberg_non_partitioned.event_time +| mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 +| +00:SCAN HDFS [functional_parquet.iceberg_non_partitioned] + HDFS partitions=1/1 files=3 size=3.41KB + Iceberg snapshot id: 93996984692289973 + stored statistics: + table: rows=20 size=22.90KB + columns: unavailable + extrapolated-rows=unavailable max-scan-range-rows=6 + mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 + tuple-ids=0 row-size=44B cardinality=3 + in pipelines: 00(GETNEXT) +==== +# Sampling Iceberg tables. Count(*) is optimized. +select count(*) from functional_parquet.iceberg_non_partitioned tablesample system(10) repeatable(1234) +---- PLAN +F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +| Per-Host Resources: mem-estimate=1.02MB mem-reservation=8.00KB thread-reservation=2 +PLAN-ROOT SINK +| output exprs: count(*) +| mem-estimate=0B mem-reservation=0B thread-reservation=0 +| +01:AGGREGATE [FINALIZE] +| output: sum_init_zero(functional_parquet.iceberg_non_partitioned.stats: num_rows) +| mem-estimate=16.00KB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0 +| tuple-ids=1 row-size=8B cardinality=1 +| in pipelines: 01(GETNEXT), 00(OPEN) +| +00:SCAN HDFS [functional_parquet.iceberg_non_partitioned] + HDFS partitions=1/1 files=3 size=3.41KB + Iceberg snapshot id: 93996984692289973 + stored statistics: + table: rows=20 size=22.90KB + columns: all + extrapolated-rows=unavailable max-scan-range-rows=6 + mem-estimate=1.00MB mem-reservation=8.00KB thread-reservation=1 + tuple-ids=0 row-size=8B cardinality=20 + in pipelines: 00(GETNEXT) +==== +# Sampling partitioned Iceberg tables. +select * from functional_parquet.iceberg_partitioned tablesample system(50) repeatable(1234) +---- PLAN +F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +| Per-Host Resources: mem-estimate=68.00MB mem-reservation=4.03MB thread-reservation=2 +PLAN-ROOT SINK +| output exprs: functional_parquet.iceberg_partitioned.id, functional_parquet.iceberg_partitioned.user, functional_parquet.iceberg_partitioned.action, functional_parquet.iceberg_partitioned.event_time +| mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 +| +00:SCAN HDFS [functional_parquet.iceberg_partitioned] + HDFS partitions=1/1 files=10 size=11.46KB + Iceberg snapshot id: 8270633197658268308 + stored statistics: + table: rows=20 size=22.90KB + columns: unavailable + extrapolated-rows=unavailable max-scan-range-rows=2 + mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 + tuple-ids=0 row-size=44B cardinality=10 + in pipelines: 00(GETNEXT) +==== +# Sampling Iceberg tables with predicates. Predicate pushdown to Iceberg happens +# before sampling (similarly to static partition pruning). +select * from functional_parquet.iceberg_partitioned tablesample system(50) repeatable(1234) +where action = 'click' and id > 0 +---- PLAN +F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +| Per-Host Resources: mem-estimate=68.00MB mem-reservation=4.03MB thread-reservation=2 +PLAN-ROOT SINK +| output exprs: functional_parquet.iceberg_partitioned.id, functional_parquet.iceberg_partitioned.user, functional_parquet.iceberg_partitioned.action, functional_parquet.iceberg_partitioned.event_time +| mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 +| +00:SCAN HDFS [functional_parquet.iceberg_partitioned] + HDFS partitions=1/1 files=4 size=4.57KB + predicates: id > CAST(0 AS INT) + Iceberg snapshot id: 8270633197658268308 + skipped Iceberg predicates: action = 'click' + stored statistics: + table: rows=20 size=22.90KB + columns: unavailable + extrapolated-rows=unavailable max-scan-range-rows=5 + parquet statistics predicates: id > CAST(0 AS INT) + parquet dictionary predicates: id > CAST(0 AS INT) + mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 + tuple-ids=0 row-size=44B cardinality=1 + in pipelines: 00(GETNEXT) +==== +# Sampling Iceberg V2 tables. Delete files are not sampled, only the data files. So we +# don't return rows that are deleted. +select * from functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files +tablesample system(10) repeatable(1234) +---- PLAN +F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +| Per-Host Resources: mem-estimate=100.00MB mem-reservation=4.05MB thread-reservation=3 +PLAN-ROOT SINK +| output exprs: functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i, functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s +| mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 +| +04:UNION +| pass-through-operands: all +| mem-estimate=0B mem-reservation=0B thread-reservation=0 +| tuple-ids=0 row-size=36B cardinality=4 +| in pipelines: 03(GETNEXT), 00(GETNEXT) +| +|--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] +| | equality predicates: functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position = functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos, functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name = functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path +| | mem-estimate=566B mem-reservation=566B thread-reservation=0 +| | tuple-ids=0 row-size=36B cardinality=3 +| | in pipelines: 00(GETNEXT), 01(OPEN) +| | +| |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | HDFS partitions=1/1 files=2 size=5.33KB +| | Iceberg snapshot id: 1497619269847778439 +| | stored statistics: +| | table: rows=4 size=5.33KB +| | columns: all +| | extrapolated-rows=unavailable max-scan-range-rows=2 +| | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 +| | tuple-ids=1 row-size=267B cardinality=4 +| | in pipelines: 01(GETNEXT) +| | +| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| HDFS partitions=1/1 files=1 size=625B +| Iceberg snapshot id: 1497619269847778439 +| stored statistics: +| table: rows=10 size=7.77KB +| columns missing stats: i, s +| extrapolated-rows=unavailable max-scan-range-rows=10 +| mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 +| tuple-ids=0 row-size=36B cardinality=3 +| in pipelines: 00(GETNEXT) +| +03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] + HDFS partitions=1/1 files=1 size=620B + Iceberg snapshot id: 1497619269847778439 + stored statistics: + table: rows=10 size=7.77KB + columns missing stats: i, s + extrapolated-rows=unavailable max-scan-range-rows=10 + mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 + tuple-ids=0 row-size=36B cardinality=1 + in pipelines: 03(GETNEXT) +==== +# Cardinality of DELETE EVENTS ICEBERG DELETE should take the sampling percentage into account. +# Delete records cardinality: 3 +# Sampling percentage: 35% +# Effective delete records count: 3 * 0.35 = 1 +# DELETE EVENTS ICEBERG DELETE cardinality = 3 (Left SCAN node cardinality) - 1 (Effective delete records count) = 2 +select * from functional_parquet.iceberg_v2_positional_update_all_rows tablesample system(35) repeatable(1234); +---- PLAN +F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +| Per-Host Resources: mem-estimate=100.00MB mem-reservation=4.05MB thread-reservation=3 +PLAN-ROOT SINK +| output exprs: functional_parquet.iceberg_v2_positional_update_all_rows.i, functional_parquet.iceberg_v2_positional_update_all_rows.s +| mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 +| +04:UNION +| pass-through-operands: all +| mem-estimate=0B mem-reservation=0B thread-reservation=0 +| tuple-ids=0 row-size=36B cardinality=5 +| in pipelines: 03(GETNEXT), 00(GETNEXT) +| +|--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] +| | equality predicates: functional_parquet.iceberg_v2_positional_update_all_rows.file__position = functional_parquet.iceberg_v2_positional_update_all_rows-position-delete.pos, functional_parquet.iceberg_v2_positional_update_all_rows.input__file__name = functional_parquet.iceberg_v2_positional_update_all_rows-position-delete.file_path +| | mem-estimate=764B mem-reservation=764B thread-reservation=0 +| | tuple-ids=0 row-size=36B cardinality=2 +| | in pipelines: 00(GETNEXT), 01(OPEN) +| | +| |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_update_all_rows-position-delete] +| | HDFS partitions=1/1 files=1 size=2.60KB +| | Iceberg snapshot id: 3877007445826010687 +| | stored statistics: +| | table: rows=3 size=2.60KB +| | columns: all +| | extrapolated-rows=disabled max-scan-range-rows=3 +| | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 +| | tuple-ids=1 row-size=246B cardinality=3 +| | in pipelines: 01(GETNEXT) +| | +| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows] +| HDFS partitions=1/1 files=1 size=625B +| Iceberg snapshot id: 3877007445826010687 +| stored statistics: +| table: rows=6 size=3.82KB +| columns missing stats: i, s +| extrapolated-rows=disabled max-scan-range-rows=6 +| mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 +| tuple-ids=0 row-size=36B cardinality=3 +| in pipelines: 00(GETNEXT) +| +03:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows] + HDFS partitions=1/1 files=1 size=625B + Iceberg snapshot id: 3877007445826010687 + stored statistics: + table: rows=6 size=3.82KB + columns missing stats: i, s + extrapolated-rows=disabled max-scan-range-rows=6 + mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 + tuple-ids=0 row-size=36B cardinality=3 + in pipelines: 03(GETNEXT) +==== diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test b/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test index 6c3b986f5..66d999eeb 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test @@ -247,151 +247,3 @@ PLAN-ROOT SINK tuple-ids=0 row-size=4B cardinality=730 in pipelines: 00(GETNEXT) ==== -# Sampling Iceberg tables. -select * from functional_parquet.iceberg_non_partitioned tablesample system(10) repeatable(1234) ----- PLAN -F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=68.00MB mem-reservation=4.03MB thread-reservation=2 -PLAN-ROOT SINK -| output exprs: functional_parquet.iceberg_non_partitioned.id, functional_parquet.iceberg_non_partitioned.user, functional_parquet.iceberg_non_partitioned.action, functional_parquet.iceberg_non_partitioned.event_time -| mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 -| -00:SCAN HDFS [functional_parquet.iceberg_non_partitioned] - HDFS partitions=1/1 files=3 size=3.41KB - Iceberg snapshot id: 93996984692289973 - stored statistics: - table: rows=20 size=22.90KB - columns: unavailable - extrapolated-rows=disabled max-scan-range-rows=6 - mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 - tuple-ids=0 row-size=44B cardinality=3 - in pipelines: 00(GETNEXT) -==== -# Sampling Iceberg tables. Count(*) is optimized. -select count(*) from functional_parquet.iceberg_non_partitioned tablesample system(10) repeatable(1234) ----- PLAN -F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=1.02MB mem-reservation=8.00KB thread-reservation=2 -PLAN-ROOT SINK -| output exprs: count(*) -| mem-estimate=0B mem-reservation=0B thread-reservation=0 -| -01:AGGREGATE [FINALIZE] -| output: sum_init_zero(functional_parquet.iceberg_non_partitioned.stats: num_rows) -| mem-estimate=16.00KB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0 -| tuple-ids=1 row-size=8B cardinality=1 -| in pipelines: 01(GETNEXT), 00(OPEN) -| -00:SCAN HDFS [functional_parquet.iceberg_non_partitioned] - HDFS partitions=1/1 files=3 size=3.41KB - Iceberg snapshot id: 93996984692289973 - stored statistics: - table: rows=20 size=22.90KB - columns: all - extrapolated-rows=disabled max-scan-range-rows=6 - mem-estimate=1.00MB mem-reservation=8.00KB thread-reservation=1 - tuple-ids=0 row-size=8B cardinality=20 - in pipelines: 00(GETNEXT) -==== -# Sampling partitioned Iceberg tables. -select * from functional_parquet.iceberg_partitioned tablesample system(50) repeatable(1234) ----- PLAN -F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=68.00MB mem-reservation=4.03MB thread-reservation=2 -PLAN-ROOT SINK -| output exprs: functional_parquet.iceberg_partitioned.id, functional_parquet.iceberg_partitioned.user, functional_parquet.iceberg_partitioned.action, functional_parquet.iceberg_partitioned.event_time -| mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 -| -00:SCAN HDFS [functional_parquet.iceberg_partitioned] - HDFS partitions=1/1 files=10 size=11.46KB - Iceberg snapshot id: 8270633197658268308 - stored statistics: - table: rows=20 size=22.90KB - columns: unavailable - extrapolated-rows=disabled max-scan-range-rows=2 - mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 - tuple-ids=0 row-size=44B cardinality=10 - in pipelines: 00(GETNEXT) -==== -# Sampling Iceberg tables with predicates. Predicate pushdown to Iceberg happens -# before sampling (similarly to static partition pruning). -select * from functional_parquet.iceberg_partitioned tablesample system(50) repeatable(1234) -where action = 'click' and id > 0 ----- PLAN -F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=68.00MB mem-reservation=4.03MB thread-reservation=2 -PLAN-ROOT SINK -| output exprs: functional_parquet.iceberg_partitioned.id, functional_parquet.iceberg_partitioned.user, functional_parquet.iceberg_partitioned.action, functional_parquet.iceberg_partitioned.event_time -| mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 -| -00:SCAN HDFS [functional_parquet.iceberg_partitioned] - HDFS partitions=1/1 files=4 size=4.57KB - predicates: id > CAST(0 AS INT) - Iceberg snapshot id: 8270633197658268308 - skipped Iceberg predicates: action = 'click' - stored statistics: - table: rows=20 size=22.90KB - columns: unavailable - extrapolated-rows=disabled max-scan-range-rows=5 - parquet statistics predicates: id > CAST(0 AS INT) - parquet dictionary predicates: id > CAST(0 AS INT) - mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 - tuple-ids=0 row-size=44B cardinality=1 - in pipelines: 00(GETNEXT) -==== -# Sampling Iceberg V2 tables. Delete files are not sampled, only the data files. So we -# don't return rows that are deleted. -select * from functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files -tablesample system(10) repeatable(1234) ----- PLAN -F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 -| Per-Host Resources: mem-estimate=100.00MB mem-reservation=4.05MB thread-reservation=3 -PLAN-ROOT SINK -| output exprs: functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i, functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s -| mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 -| -04:UNION -| pass-through-operands: all -| mem-estimate=0B mem-reservation=0B thread-reservation=0 -| tuple-ids=0 row-size=36B cardinality=2 -| in pipelines: 03(GETNEXT), 00(GETNEXT) -| -|--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] -| | equality predicates: functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position = functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos, functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name = functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path -| | mem-estimate=566B mem-reservation=566B thread-reservation=0 -| | tuple-ids=0 row-size=36B cardinality=1 -| | in pipelines: 00(GETNEXT), 01(OPEN) -| | -| |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| | HDFS partitions=1/1 files=2 size=5.33KB -| | Iceberg snapshot id: 1497619269847778439 -| | stored statistics: -| | table: rows=4 size=5.33KB -| | columns: all -| | extrapolated-rows=disabled max-scan-range-rows=2 -| | mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 -| | tuple-ids=1 row-size=267B cardinality=4 -| | in pipelines: 01(GETNEXT) -| | -| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] -| HDFS partitions=1/1 files=1 size=625B -| Iceberg snapshot id: 1497619269847778439 -| stored statistics: -| table: rows=10 size=7.77KB -| columns missing stats: i, s -| extrapolated-rows=disabled max-scan-range-rows=10 -| mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 -| tuple-ids=0 row-size=36B cardinality=3 -| in pipelines: 00(GETNEXT) -| -03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] - HDFS partitions=1/1 files=1 size=620B - Iceberg snapshot id: 1497619269847778439 - stored statistics: - table: rows=10 size=7.77KB - columns missing stats: i, s - extrapolated-rows=disabled max-scan-range-rows=10 - mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 - tuple-ids=0 row-size=36B cardinality=1 - in pipelines: 03(GETNEXT) -==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-compute-stats-table-sampling.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-compute-stats-table-sampling.test new file mode 100644 index 000000000..19e81836a --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-compute-stats-table-sampling.test @@ -0,0 +1,234 @@ +==== +---- QUERY +DROP STATS iceberg_non_partitioned; +COMPUTE STATS iceberg_non_partitioned tablesample system(10) repeatable(1234); +---- RESULTS +'Updated 1 partition(s) and 4 column(s).' +---- TYPES +STRING +==== +---- QUERY +SHOW TABLE STATS iceberg_non_partitioned; +---- LABELS +#ROWS, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental stats, Location, EC Policy +---- RESULTS: VERIFY_IS_EQUAL +20,20,'22.90KB','NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/iceberg_test/iceberg_non_partitioned','$ERASURECODE_POLICY' +---- TYPES +BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING,STRING +==== +---- QUERY +SHOW COLUMN STATS iceberg_non_partitioned; +---- RESULTS +'id','INT',3,0,4,4,-1,-1 +'user','STRING',2,0,4,4,-1,-1 +'action','STRING',2,0,5,4.333333492279053,-1,-1 +'event_time','TIMESTAMP',2,0,16,16,-1,-1 +---- TYPES +STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT +==== +---- QUERY +DROP STATS iceberg_non_partitioned; +COMPUTE STATS iceberg_non_partitioned tablesample system(10) repeatable(1111); +---- RESULTS +'Updated 1 partition(s) and 4 column(s).' +---- TYPES +STRING +==== +---- QUERY +SHOW TABLE STATS iceberg_non_partitioned; +---- LABELS +#ROWS, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental stats, Location, EC Policy +---- RESULTS: VERIFY_IS_EQUAL +20,20,'22.90KB','NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/iceberg_test/iceberg_non_partitioned','$ERASURECODE_POLICY' +---- TYPES +BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING,STRING +==== +---- QUERY +SHOW COLUMN STATS iceberg_non_partitioned; +---- RESULTS +'id','INT',2,0,4,4,-1,-1 +'user','STRING',2,0,4,4,-1,-1 +'action','STRING',2,0,8,6.5,-1,-1 +'event_time','TIMESTAMP',2,0,16,16,-1,-1 +---- TYPES +STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT +==== +---- QUERY +DROP STATS iceberg_partitioned; +COMPUTE STATS iceberg_partitioned tablesample system(10) repeatable(1111); +---- RESULTS +'Updated 1 partition(s) and 4 column(s).' +---- TYPES +STRING +==== +---- QUERY +SHOW TABLE STATS iceberg_partitioned; +---- LABELS +#ROWS, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental stats, Location, EC Policy +---- RESULTS: VERIFY_IS_EQUAL +20,20,'22.90KB','NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/iceberg_test/iceberg_partitioned','$ERASURECODE_POLICY' +---- TYPES +BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING,STRING +==== +---- QUERY +SHOW COLUMN STATS iceberg_partitioned; +---- RESULTS +'id','INT',3,0,4,4,-1,-1 +'user','STRING',3,0,4,4,-1,-1 +'action','STRING',3,0,8,5.666666507720947,-1,-1 +'event_time','TIMESTAMP',3,0,16,16,-1,-1 +---- TYPES +STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT +==== +---- QUERY +DROP STATS iceberg_v2_delete_equality_partitioned; +COMPUTE STATS iceberg_v2_delete_equality_partitioned tablesample system(10) repeatable(1111); +---- RESULTS +'Updated 1 partition(s) and 3 column(s).' +---- TYPES +STRING +==== +---- QUERY +SHOW TABLE STATS iceberg_v2_delete_equality_partitioned; +---- LABELS +#ROWS, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental stats, Location, EC Policy +---- RESULTS: VERIFY_IS_EQUAL +3,6,'4.81KB','NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality_partitioned','$ERASURECODE_POLICY' +---- TYPES +BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING,STRING +==== +---- QUERY +SHOW COLUMN STATS iceberg_v2_delete_equality_partitioned; +---- RESULTS +'i','INT',2,0,4,4,-1,-1 +'s','STRING',2,0,4,4,-1,-1 +'d','DATE',1,0,4,4,-1,-1 +---- TYPES +STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT +==== +---- QUERY +DROP STATS iceberg_v2_delete_equality_partitioned; +COMPUTE STATS iceberg_v2_delete_equality_partitioned tablesample system(10) repeatable(1111); +---- RESULTS +'Updated 1 partition(s) and 3 column(s).' +---- TYPES +STRING +==== +---- QUERY +SHOW TABLE STATS iceberg_v2_delete_equality_partitioned; +---- LABELS +#ROWS, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental stats, Location, EC Policy +---- RESULTS: VERIFY_IS_EQUAL +3,6,'4.81KB','NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality_partitioned','$ERASURECODE_POLICY' +---- TYPES +BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING,STRING +==== +---- QUERY +SHOW COLUMN STATS iceberg_v2_delete_equality_partitioned; +---- RESULTS +'i','INT',2,0,4,4,-1,-1 +'s','STRING',2,0,4,4,-1,-1 +'d','DATE',1,0,4,4,-1,-1 +---- TYPES +STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT +==== +---- QUERY +DROP STATS iceberg_v2_positional_not_all_data_files_have_delete_files; +COMPUTE STATS iceberg_v2_positional_not_all_data_files_have_delete_files tablesample system(30) repeatable(1111); +---- RESULTS +'Updated 1 partition(s) and 2 column(s).' +---- TYPES +STRING +==== +---- QUERY +SHOW TABLE STATS iceberg_v2_positional_not_all_data_files_have_delete_files; +---- LABELS +#ROWS, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental stats, Location, EC Policy +---- RESULTS: VERIFY_IS_EQUAL +1,6,'7.77KB','NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_positional_not_all_data_files_have_delete_files','$ERASURECODE_POLICY' +---- TYPES +BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING,STRING +==== +---- QUERY +SHOW COLUMN STATS iceberg_v2_positional_not_all_data_files_have_delete_files; +---- RESULTS +'i','INT',1,0,4,4,-1,-1 +'s','STRING',1,0,1,1,-1,-1 +---- TYPES +STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT +==== +---- QUERY +DROP STATS iceberg_v2_positional_not_all_data_files_have_delete_files_orc; +COMPUTE STATS iceberg_v2_positional_not_all_data_files_have_delete_files_orc tablesample system(30) repeatable(1111); +---- RESULTS +'Updated 1 partition(s) and 2 column(s).' +---- TYPES +STRING +==== +---- QUERY +SHOW TABLE STATS iceberg_v2_positional_not_all_data_files_have_delete_files_orc; +---- LABELS +#ROWS, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental stats, Location, EC Policy +---- RESULTS: VERIFY_IS_EQUAL +1,6,'3.97KB','NOT CACHED','NOT CACHED','ORC','false','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_positional_not_all_data_files_have_delete_files_orc','$ERASURECODE_POLICY' +---- TYPES +BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING,STRING +==== +---- QUERY +SHOW COLUMN STATS iceberg_v2_positional_not_all_data_files_have_delete_files_orc; +---- RESULTS +'i','INT',1,0,4,4,-1,-1 +'s','STRING',1,0,1,1,-1,-1 +---- TYPES +STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT +==== +---- QUERY +DROP STATS iceberg_v2_positional_not_all_data_files_have_delete_files; +COMPUTE STATS iceberg_v2_positional_not_all_data_files_have_delete_files tablesample system(50) repeatable(1111); +---- RESULTS +'Updated 1 partition(s) and 2 column(s).' +---- TYPES +STRING +==== +---- QUERY +SHOW TABLE STATS iceberg_v2_positional_not_all_data_files_have_delete_files; +---- LABELS +#ROWS, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental stats, Location, EC Policy +---- RESULTS: VERIFY_IS_EQUAL +4,6,'7.77KB','NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_positional_not_all_data_files_have_delete_files','$ERASURECODE_POLICY' +---- TYPES +BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING,STRING +==== +---- QUERY +SHOW COLUMN STATS iceberg_v2_positional_not_all_data_files_have_delete_files; +---- RESULTS +'i','INT',4,0,4,4,-1,-1 +'s','STRING',4,0,1,1,-1,-1 +---- TYPES +STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT +==== +---- QUERY +DROP STATS iceberg_v2_positional_not_all_data_files_have_delete_files_orc; +COMPUTE STATS iceberg_v2_positional_not_all_data_files_have_delete_files_orc tablesample system(50) repeatable(1111); +---- RESULTS +'Updated 1 partition(s) and 2 column(s).' +---- TYPES +STRING +==== +---- QUERY +SHOW TABLE STATS iceberg_v2_positional_not_all_data_files_have_delete_files_orc; +---- LABELS +#ROWS, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental stats, Location, EC Policy +---- RESULTS: VERIFY_IS_EQUAL +4,6,'3.97KB','NOT CACHED','NOT CACHED','ORC','false','$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_positional_not_all_data_files_have_delete_files_orc','$ERASURECODE_POLICY' +---- TYPES +BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING,STRING +==== +---- QUERY +SHOW COLUMN STATS iceberg_v2_positional_not_all_data_files_have_delete_files_orc; +---- RESULTS +'i','INT',4,0,4,4,-1,-1 +'s','STRING',4,0,1,1,-1,-1 +---- TYPES +STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT +==== diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py index 141808c67..8515fb910 100644 --- a/tests/query_test/test_iceberg.py +++ b/tests/query_test/test_iceberg.py @@ -1545,6 +1545,14 @@ class TestIcebergV2Table(IcebergTestSuite): self.run_test_case('QueryTest/iceberg-v2-read-position-deletes-stats', vector) self.run_test_case('QueryTest/iceberg-v2-read-position-deletes-orc-stats', vector) + @SkipIfDockerizedCluster.internal_hostname + @SkipIf.hardcoded_uris + @pytest.mark.execute_serially + def test_compute_stats_table_sampling(self, vector): + """Tests COMPUTE STATS with table sampling.""" + vector.get_value('exec_option')['COMPUTE_STATS_MIN_SAMPLE_SIZE'] = 0 + self.run_test_case('QueryTest/iceberg-v2-compute-stats-table-sampling', vector) + @SkipIfFS.hive def test_read_mixed_format_position_deletes(self, vector, unique_database): self.run_test_case('QueryTest/iceberg-mixed-format-position-deletes',
