zhangbutao commented on code in PR #34775: URL: https://github.com/apache/doris/pull/34775#discussion_r1598155907
########## fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java: ########## @@ -212,60 +212,73 @@ private List<Split> doGetSplits() throws UserException { HashSet<String> partitionPathSet = new HashSet<>(); boolean isPartitionedTable = icebergTable.spec().isPartitioned(); + TPushAggOp aggOp = getPushDownAggNoGroupingOp(); + boolean canPushCount = aggOp.equals(TPushAggOp.COUNT) && getCountFromSnapshot() > 0; + CloseableIterable<FileScanTask> fileScanTasks = TableScanUtil.splitFiles(scan.planFiles(), splitSize); try (CloseableIterable<CombinedScanTask> combinedScanTasks = TableScanUtil.planTasks(fileScanTasks, splitSize, 1, 0)) { - combinedScanTasks.forEach(taskGrp -> taskGrp.files().forEach(splitTask -> { - String dataFilePath = normalizeLocation(splitTask.file().path().toString()); - - List<String> partitionValues = new ArrayList<>(); - if (isPartitionedTable) { - StructLike structLike = splitTask.file().partition(); - List<PartitionField> fields = splitTask.spec().fields(); - Types.StructType structType = icebergTable.schema().asStruct(); - - // set partitionValue for this IcebergSplit - for (int i = 0; i < structLike.size(); i++) { - Object obj = structLike.get(i, Object.class); - String value = String.valueOf(obj); - PartitionField partitionField = fields.get(i); - if (partitionField.transform().isIdentity()) { - Type type = structType.fieldType(partitionField.name()); - if (type != null && type.typeId().equals(Type.TypeID.DATE)) { - // iceberg use integer to store date, - // we need transform it to string - value = DateTimeUtil.daysToIsoDate((Integer) obj); + for (CombinedScanTask taskGrp : combinedScanTasks) { + for (FileScanTask splitTask : taskGrp.files()) { + String dataFilePath = normalizeLocation(splitTask.file().path().toString()); + + List<String> partitionValues = new ArrayList<>(); + if (isPartitionedTable) { + StructLike structLike = splitTask.file().partition(); + List<PartitionField> fields = splitTask.spec().fields(); + Types.StructType structType = icebergTable.schema().asStruct(); + + // set partitionValue for this IcebergSplit + for (int i = 0; i < structLike.size(); i++) { + Object obj = structLike.get(i, Object.class); + String value = String.valueOf(obj); + PartitionField partitionField = fields.get(i); + if (partitionField.transform().isIdentity()) { + Type type = structType.fieldType(partitionField.name()); + if (type != null && type.typeId().equals(Type.TypeID.DATE)) { + // iceberg use integer to store date, + // we need transform it to string + value = DateTimeUtil.daysToIsoDate((Integer) obj); + } } + partitionValues.add(value); } - partitionValues.add(value); + + // Counts the number of partitions read + partitionPathSet.add(structLike.toString()); + } + LocationPath locationPath = new LocationPath(dataFilePath, source.getCatalog().getProperties()); + Path finalDataFilePath = locationPath.toStorageLocation(); + IcebergSplit split = new IcebergSplit( + finalDataFilePath, + splitTask.start(), + splitTask.length(), + splitTask.file().fileSizeInBytes(), + new String[0], + formatVersion, + source.getCatalog().getProperties(), + partitionValues); + if (formatVersion >= MIN_DELETE_FILE_SUPPORT_VERSION) { + split.setDeleteFileFilters(getDeleteFileFilters(splitTask)); } + split.setTableFormatType(TableFormatType.ICEBERG); + splits.add(split); - // Counts the number of partitions read - partitionPathSet.add(structLike.toString()); + // End loop early as one split is enough if the statement can push down count + if (canPushCount) { + break; Review Comment: This is what i want to do. End the entire loop early to avoid a lot of useless spilts if the statement can push down count. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org