This is an automated email from the ASF dual-hosted git repository.

xy720 pushed a commit to branch fix/iceberg-infer-format-light
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 7e53c657bb974ad9a750b4f6c9a3a5a6c42e64d7
Author: lambxu <[email protected]>
AuthorDate: Tue Jun 23 23:24:03 2026 +0800

    save
---
 .../doris/datasource/iceberg/IcebergUtils.java     | 32 +++++++++++++++++-----
 .../doris/datasource/iceberg/IcebergUtilsTest.java | 30 ++++++++++++++++++++
 2 files changed, 55 insertions(+), 7 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
index ea36d755cfd..5376e4ecb4b 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
@@ -79,9 +79,11 @@ import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.lang3.exception.ExceptionUtils;
 import org.apache.iceberg.BaseTable;
 import org.apache.iceberg.CatalogProperties;
+import org.apache.iceberg.DataFile;
 import org.apache.iceberg.FileFormat;
 import org.apache.iceberg.FileScanTask;
 import org.apache.iceberg.ManifestFile;
+import org.apache.iceberg.ManifestFiles;
 import org.apache.iceberg.MetadataColumns;
 import org.apache.iceberg.MetadataTableType;
 import org.apache.iceberg.MetadataTableUtils;
@@ -106,6 +108,7 @@ import org.apache.iceberg.expressions.Projections;
 import org.apache.iceberg.expressions.Unbound;
 import org.apache.iceberg.hive.HiveCatalog;
 import org.apache.iceberg.io.CloseableIterable;
+import org.apache.iceberg.io.FileIO;
 import org.apache.iceberg.types.Type.TypeID;
 import org.apache.iceberg.types.TypeUtil;
 import org.apache.iceberg.types.Types;
@@ -133,6 +136,7 @@ import java.time.format.DateTimeFormatter;
 import java.time.temporal.ChronoField;
 import java.time.temporal.TemporalAccessor;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.LinkedHashSet;
@@ -1184,16 +1188,30 @@ public class IcebergUtils {
     }
 
     private static String inferFileFormatFromDataFiles(Table icebergTable) {
-        if (icebergTable.currentSnapshot() == null) {
+        Snapshot snapshot = icebergTable.currentSnapshot();
+        if (snapshot == null) {
             LOG.info("Iceberg table {} has no snapshot, defaulting to {}", 
icebergTable.name(), PARQUET_NAME);
             return PARQUET_NAME;
         }
-        try (CloseableIterable<FileScanTask> files = 
icebergTable.newScan().planFiles()) {
-            java.util.Iterator<FileScanTask> it = files.iterator();
-            if (it.hasNext()) {
-                String format = it.next().file().format().name().toLowerCase();
-                LOG.info("Iceberg table {} inferred file format {} from data 
files", icebergTable.name(), format);
-                return format;
+        FileIO io = icebergTable.io();
+        try {
+            for (ManifestFile manifest : snapshot.dataManifests(io)) {
+                // Read the `file_format` field from a single data manifest 
entry.
+                // To avoid many synchronous IO hanging FE planning.
+                if (manifest.addedFilesCount() != null && 
manifest.existingFilesCount() != null
+                        && manifest.addedFilesCount() == 0 && 
manifest.existingFilesCount() == 0) {
+                    continue;
+                }
+                try (CloseableIterable<DataFile> entries = 
ManifestFiles.read(manifest, io)
+                        
.select(Collections.singletonList(DataFile.FILE_FORMAT.name()))) {
+                    java.util.Iterator<DataFile> it = entries.iterator();
+                    if (it.hasNext()) {
+                        String format = 
it.next().format().name().toLowerCase();
+                        LOG.info("Iceberg table {} inferred file format {} 
from manifest {}",
+                                icebergTable.name(), format, manifest.path());
+                        return format;
+                    }
+                }
             }
         } catch (Exception e) {
             LOG.warn("Failed to infer file format from data files for table 
{}, defaulting to {}",
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergUtilsTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergUtilsTest.java
index 5b25da419cc..af0eb0ff498 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergUtilsTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergUtilsTest.java
@@ -25,6 +25,7 @@ import org.apache.doris.common.UserException;
 import org.apache.doris.datasource.iceberg.source.IcebergTableQueryInfo;
 
 import com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.FileFormat;
 import org.apache.iceberg.GenericPartitionFieldSummary;
 import org.apache.iceberg.HistoryEntry;
 import org.apache.iceberg.ManifestContent;
@@ -711,4 +712,33 @@ public class IcebergUtilsTest {
         Assert.assertEquals(expectSchemaId, queryInfo.getSchemaId());
         Assert.assertEquals(expectRef, queryInfo.getRef());
     }
+
+    @Test
+    public void testGetFileFormatHonorsWriteFormatProperty() {
+        Table table = Mockito.mock(Table.class);
+        
Mockito.when(table.properties()).thenReturn(ImmutableMap.of(IcebergUtils.WRITE_FORMAT,
 "ORC"));
+        Assert.assertEquals(FileFormat.ORC, IcebergUtils.getFileFormat(table));
+        // No snapshot lookup should be needed when properties are sufficient.
+        Mockito.verify(table, Mockito.never()).currentSnapshot();
+    }
+
+    @Test
+    public void testGetFileFormatHonorsDefaultFormatProperty() {
+        Table table = Mockito.mock(Table.class);
+        Mockito.when(table.properties()).thenReturn(
+                ImmutableMap.of(TableProperties.DEFAULT_FILE_FORMAT, 
"parquet"));
+        Assert.assertEquals(FileFormat.PARQUET, 
IcebergUtils.getFileFormat(table));
+        Mockito.verify(table, Mockito.never()).currentSnapshot();
+    }
+
+    @Test
+    public void 
testGetFileFormatFallsBackToParquetWhenNoSnapshotAndNoProperties() {
+        // Migrated tables without write-format / write.format.default and no 
snapshot
+        // must not throw and must default to parquet.
+        Table table = Mockito.mock(Table.class);
+        Mockito.when(table.properties()).thenReturn(ImmutableMap.of());
+        Mockito.when(table.currentSnapshot()).thenReturn(null);
+        Mockito.when(table.name()).thenReturn("test_no_snapshot_table");
+        Assert.assertEquals(FileFormat.PARQUET, 
IcebergUtils.getFileFormat(table));
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to